/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 59 by nigel, Sat Feb 24 21:39:54 2007 UTC revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
48    for getting the opcodes for showing compiled code. */
49    
50    #define PCRE_SPY        /* For Win32 build, import data, not export */
51  #include "internal.h"  #include "internal.h"
52    
53  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 66  Makefile. */
66  #endif  #endif
67  #endif  #endif
68    
69  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
70    
71    #define BUFFER_SIZE 30000
72    #define PBUFFER_SIZE BUFFER_SIZE
73    #define DBUFFER_SIZE BUFFER_SIZE
74    
75    
76  static FILE *outfile;  static FILE *outfile;
77  static int log_store = 0;  static int log_store = 0;
78    static int callout_count;
79    static int callout_extra;
80    static int callout_fail_count;
81    static int callout_fail_id;
82    static int first_callout;
83    static int show_malloc;
84    static int use_utf8;
85  static size_t gotten_store;  static size_t gotten_store;
86    
87    static uschar *pbuffer = NULL;
88    
89    
90  static int utf8_table1[] = {  static const int utf8_table1[] = {
91    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
92    
93  static int utf8_table2[] = {  static const int utf8_table2[] = {
94    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
95    
96  static int utf8_table3[] = {  static const int utf8_table3[] = {
97    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
98    
99    
100    
101    /*************************************************
102    *         Print compiled regex                   *
103    *************************************************/
104    
105    /* The code for doing this is held in a separate file that is also included in
106    pcre.c when it is compiled with the debug switch. It defines a function called
107    print_internals(), which uses a table of opcode lengths defined by the macro
108    OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
109    Unicode property names to numbers; this is kept in a separate file. */
110    
111    static uschar OP_lengths[] = { OP_LENGTHS };
112    
113    #include "ucp.h"
114    #include "ucptypetable.c"
115    #include "printint.c"
116    
117    
118    
119    /*************************************************
120    *          Read number from string               *
121    *************************************************/
122    
123    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
124    around with conditional compilation, just do the job by hand. It is only used
125    for unpicking the -o argument, so just keep it simple.
126    
127    Arguments:
128      str           string to be converted
129      endptr        where to put the end pointer
130    
131    Returns:        the unsigned long
132    */
133    
134    static int
135    get_value(unsigned char *str, unsigned char **endptr)
136    {
137    int result = 0;
138    while(*str != 0 && isspace(*str)) str++;
139    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
140    *endptr = str;
141    return(result);
142    }
143    
144    
145    
146  /*************************************************  /*************************************************
147  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
148  *************************************************/  *************************************************/
# Line 100  Returns: > 0 => the number of bytes c Line 195  Returns: > 0 => the number of bytes c
195             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
196  */  */
197    
198  int  static int
199  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
200  {  {
201  int c = *buffer++;  int c = *buffer++;
# Line 143  return i+1; Line 238  return i+1;
238    
239    
240    
241    /*************************************************
242    *             Print character string             *
243    *************************************************/
244    
245    /* Character string printing function. Must handle UTF-8 strings in utf8
246    mode. Yields number of characters printed. If handed a NULL file, just counts
247    chars without printing. */
248    
249    static int pchars(unsigned char *p, int length, FILE *f)
250    {
251    int c;
252    int yield = 0;
253    
254  /* Debugging function to print the internal form of the regex. This is the same  while (length-- > 0)
255  code as contained in pcre.c under the DEBUG macro. */    {
256      if (use_utf8)
257        {
258        int rc = utf82ord(p, &c);
259    
260  static const char *OP_names[] = {      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
261    "End", "\\A", "\\B", "\\b", "\\D", "\\d",        {
262    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",        length -= rc - 1;
263    "Opt", "^", "$", "Any", "chars", "not",        p += rc;
264    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",        if (c < 256 && isprint(c))
265    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",          {
266    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",          if (f != NULL) fprintf(f, "%c", c);
267    "*", "*?", "+", "+?", "?", "??", "{", "{",          yield++;
268    "class", "Ref", "Recurse",          }
269    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",        else
270    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",          {
271    "Brazero", "Braminzero", "Branumber", "Bra"          int n;
272  };          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
273            yield += n;
274            }
275          continue;
276          }
277        }
278    
279       /* Not UTF-8, or malformed UTF-8  */
280    
281  static void print_internals(pcre *re)    if (isprint(c = *(p++)))
282  {      {
283  unsigned char *code = ((real_pcre *)re)->code;      if (f != NULL) fprintf(f, "%c", c);
284        yield++;
285        }
286      else
287        {
288        if (f != NULL) fprintf(f, "\\x%02x", c);
289        yield += 4;
290        }
291      }
292    
293  fprintf(outfile, "------------------------------------------------------------------\n");  return yield;
294    }
295    
 for(;;)  
   {  
   int c;  
   int charlength;  
296    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
297    
298    if (*code >= OP_BRA)  /*************************************************
299    *              Callout function                  *
300    *************************************************/
301    
302    /* Called from PCRE as a result of the (?C) item. We print out where we are in
303    the match. Yield zero unless more callouts than the fail count, or the callout
304    data is not zero. */
305    
306    static int callout(pcre_callout_block *cb)
307    {
308    FILE *f = (first_callout | callout_extra)? outfile : NULL;
309    int i, pre_start, post_start, subject_length;
310    
311    if (callout_extra)
312      {
313      fprintf(f, "Callout %d: last capture = %d\n",
314        cb->callout_number, cb->capture_last);
315    
316      for (i = 0; i < cb->capture_top * 2; i += 2)
317      {      {
318      if (*code - OP_BRA > EXTRACT_BASIC_MAX)      if (cb->offset_vector[i] < 0)
319        fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);        fprintf(f, "%2d: <unset>\n", i/2);
320      else      else
321        fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);        {
322      code += 2;        fprintf(f, "%2d: ", i/2);
323          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
324            cb->offset_vector[i+1] - cb->offset_vector[i], f);
325          fprintf(f, "\n");
326          }
327      }      }
328      }
329    
330    else switch(*code)  /* Re-print the subject in canonical form, the first time or if giving full
331      {  datails. On subsequent calls in the same match, we use pchars just to find the
332      case OP_END:  printed lengths of the substrings. */
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
333    
334      case OP_CLASS:  if (f != NULL) fprintf(f, "--->");
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
335    
336        CLASS_REF_REPEAT:  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
337    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
338      cb->current_position - cb->start_match, f);
339    
340        switch(*code)  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
341    
342          case OP_CRRANGE:  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
343          case OP_CRMINRANGE:    cb->subject_length - cb->current_position, f);
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
344    
345          default:  if (f != NULL) fprintf(f, "\n");
         code--;  
         }  
       }  
     break;  
346    
347      /* Anything else is just a one-node item */  /* Always print appropriate indicators, with callout number if not already
348    shown. For automatic callouts, show the pattern offset. */
349    
350      default:  if (cb->callout_number == 255)
351      fprintf(outfile, "    %s", OP_names[*code]);    {
352      break;    fprintf(outfile, "%+3d ", cb->pattern_position);
353      }    if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
354      }
355    else
356      {
357      if (callout_extra) fprintf(outfile, "    ");
358        else fprintf(outfile, "%3d ", cb->callout_number);
359      }
360    
361    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
362    fprintf(outfile, "^");
363    
364    code++;  if (post_start > 0)
365    fprintf(outfile, "\n");    {
366      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
367      fprintf(outfile, "^");
368    }    }
 }  
369    
370    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
371      fprintf(outfile, " ");
372    
373    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
374      pbuffer + cb->pattern_position);
375    
376  /* Character string printing function. A "normal" and a UTF-8 version. */  fprintf(outfile, "\n");
377    first_callout = 0;
378    
379  static void pchars(unsigned char *p, int length, int utf8)  if (cb->callout_data != NULL)
 {  
 int c;  
 while (length-- > 0)  
380    {    {
381    if (utf8)    int callout_data = *((int *)(cb->callout_data));
382      if (callout_data != 0)
383      {      {
384      int rc = utf82ord(p, &c);      fprintf(outfile, "Callout data = %d\n", callout_data);
385      if (rc > 0)      return callout_data;
       {  
       length -= rc - 1;  
       p += rc;  
       if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x{%02x}", c);  
       continue;  
       }  
386      }      }
   
    /* Not UTF-8, or malformed UTF-8  */  
   
   if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
     else fprintf(outfile, "\\x%02x", c);  
387    }    }
388    
389    return (cb->callout_number != callout_fail_id)? 0 :
390           (++callout_count >= callout_fail_count)? 1 : 0;
391  }  }
392    
393    
394    /*************************************************
395    *            Local malloc functions              *
396    *************************************************/
397    
398  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
399  compiled re. */  compiled re. */
400    
401  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
402  {  {
403    void *block = malloc(size);
404  gotten_store = size;  gotten_store = size;
405  if (log_store)  if (show_malloc)
406    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", size, block);
407      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
 return malloc(size);  
408  }  }
409    
410    static void new_free(void *block)
411    {
412    if (show_malloc)
413      fprintf(outfile, "free             %p\n", block);
414    free(block);
415    }
416    
417    
418    /* For recursion malloc/free, to test stacking calls */
419    
420    static void *stack_malloc(size_t size)
421    {
422    void *block = malloc(size);
423    if (show_malloc)
424      fprintf(outfile, "stack_malloc %3d %p\n", size, block);
425    return block;
426    }
427    
428    static void stack_free(void *block)
429    {
430    if (show_malloc)
431      fprintf(outfile, "stack_free       %p\n", block);
432    free(block);
433    }
434    
435    
436    /*************************************************
437    *          Call pcre_fullinfo()                  *
438    *************************************************/
439    
440  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
441    
# Line 420  if ((rc = pcre_fullinfo(re, study, optio Line 448  if ((rc = pcre_fullinfo(re, study, optio
448    
449    
450    
451    /*************************************************
452    *         Byte flipping function                 *
453    *************************************************/
454    
455    static long int
456    byteflip(long int value, int n)
457    {
458    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
459    return ((value & 0x000000ff) << 24) |
460           ((value & 0x0000ff00) <<  8) |
461           ((value & 0x00ff0000) >>  8) |
462           ((value & 0xff000000) >> 24);
463    }
464    
465    
466    
467    
468    /*************************************************
469    *                Main Program                    *
470    *************************************************/
471    
472  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
473  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 442  int posix = 0; Line 490  int posix = 0;
490  #endif  #endif
491  int debug = 0;  int debug = 0;
492  int done = 0;  int done = 0;
 unsigned char buffer[30000];  
 unsigned char dbuffer[1024];  
493    
494  /* Static so that new_malloc can use it. */  unsigned char *buffer;
495    unsigned char *dbuffer;
496    
497    /* Get buffers from malloc() so that Electric Fence will check their misuse
498    when I am debugging. */
499    
500    buffer = (unsigned char *)malloc(BUFFER_SIZE);
501    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
502    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
503    
504    /* The outfile variable is static so that new_malloc can use it. The _setmode()
505    stuff is some magic that I don't understand, but which apparently does good
506    things in Windows. It's related to line terminations.  */
507    
508    #if defined(_WIN32) || defined(WIN32)
509    _setmode( _fileno( stdout ), 0x8000 );
510    #endif  /* defined(_WIN32) || defined(WIN32) */
511    
512  outfile = stdout;  outfile = stdout;
513    
# Line 453  outfile = stdout; Line 515  outfile = stdout;
515    
516  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
517    {    {
518    char *endptr;    unsigned char *endptr;
519    
520    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
521      showstore = 1;      showstore = 1;
# Line 461  while (argc > 1 && argv[op][0] == '-') Line 523  while (argc > 1 && argv[op][0] == '-')
523    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
524    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
525    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
526        ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
527            *endptr == 0))
528      {      {
529      op++;      op++;
530      argc--;      argc--;
# Line 469  while (argc > 1 && argv[op][0] == '-') Line 532  while (argc > 1 && argv[op][0] == '-')
532  #if !defined NOPOSIX  #if !defined NOPOSIX
533    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
534  #endif  #endif
535      else if (strcmp(argv[op], "-C") == 0)
536        {
537        int rc;
538        printf("PCRE version %s\n", pcre_version());
539        printf("Compiled with\n");
540        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
541        printf("  %sUTF-8 support\n", rc? "" : "No ");
542        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
543        printf("  %sUnicode properties support\n", rc? "" : "No ");
544        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
545        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
546        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
547        printf("  Internal link size = %d\n", rc);
548        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
549        printf("  POSIX malloc threshold = %d\n", rc);
550        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
551        printf("  Default match limit = %d\n", rc);
552        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
553        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
554        exit(0);
555        }
556    else    else
557      {      {
558      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
559      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
560        printf("  -C     show PCRE compile-time options and exit\n");
561      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
562             "  -i     show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
563               "  -m     output memory used information\n"
564             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
565  #if !defined NOPOSIX  #if !defined NOPOSIX
566      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
567  #endif  #endif
568      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
569             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
570      return 1;      return 1;
571      }      }
# Line 490  while (argc > 1 && argv[op][0] == '-') Line 576  while (argc > 1 && argv[op][0] == '-')
576  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
577    
578  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
579  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
580  if (offsets == NULL)  if (offsets == NULL)
581    {    {
582    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
# Line 502  if (offsets == NULL) Line 588  if (offsets == NULL)
588    
589  if (argc > 1)  if (argc > 1)
590    {    {
591    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
592    if (infile == NULL)    if (infile == NULL)
593      {      {
594      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 512  if (argc > 1) Line 598  if (argc > 1)
598    
599  if (argc > 2)  if (argc > 2)
600    {    {
601    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
602    if (outfile == NULL)    if (outfile == NULL)
603      {      {
604      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 523  if (argc > 2) Line 609  if (argc > 2)
609  /* Set alternative malloc function */  /* Set alternative malloc function */
610    
611  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
612    pcre_free = new_free;
613    pcre_stack_malloc = stack_malloc;
614    pcre_stack_free = stack_free;
615    
616  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
617    
# Line 542  while (!done) Line 631  while (!done)
631    
632    const char *error;    const char *error;
633    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
634      unsigned char *to_file = NULL;
635    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
636      unsigned long int true_size, true_study_size = 0;
637      size_t size, regex_gotten_store;
638    int do_study = 0;    int do_study = 0;
639    int do_debug = debug;    int do_debug = debug;
640    int do_G = 0;    int do_G = 0;
641    int do_g = 0;    int do_g = 0;
642    int do_showinfo = showinfo;    int do_showinfo = showinfo;
643    int do_showrest = 0;    int do_showrest = 0;
644    int utf8 = 0;    int do_flip = 0;
645    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
646    
647      use_utf8 = 0;
648    
649    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
650    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
651    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
652      fflush(outfile);
653    
654    p = buffer;    p = buffer;
655    while (isspace(*p)) p++;    while (isspace(*p)) p++;
656    if (*p == 0) continue;    if (*p == 0) continue;
657    
658    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
659    complete, read more. */  
660      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
661        {
662        unsigned long int magic;
663        uschar sbuf[8];
664        FILE *f;
665    
666        p++;
667        pp = p + (int)strlen((char *)p);
668        while (isspace(pp[-1])) pp--;
669        *pp = 0;
670    
671        f = fopen((char *)p, "rb");
672        if (f == NULL)
673          {
674          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
675          continue;
676          }
677    
678        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
679    
680        true_size =
681          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
682        true_study_size =
683          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
684    
685        re = (real_pcre *)new_malloc(true_size);
686        regex_gotten_store = gotten_store;
687    
688        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
689    
690        magic = ((real_pcre *)re)->magic_number;
691        if (magic != MAGIC_NUMBER)
692          {
693          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
694            {
695            do_flip = 1;
696            }
697          else
698            {
699            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
700            fclose(f);
701            continue;
702            }
703          }
704    
705        fprintf(outfile, "Compiled regex%s loaded from %s\n",
706          do_flip? " (byte-inverted)" : "", p);
707    
708        /* Need to know if UTF-8 for printing data strings */
709    
710        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
711        use_utf8 = (options & PCRE_UTF8) != 0;
712    
713        /* Now see if there is any following study data */
714    
715        if (true_study_size != 0)
716          {
717          pcre_study_data *psd;
718    
719          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
720          extra->flags = PCRE_EXTRA_STUDY_DATA;
721    
722          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
723          extra->study_data = psd;
724    
725          if (fread(psd, 1, true_study_size, f) != true_study_size)
726            {
727            FAIL_READ:
728            fprintf(outfile, "Failed to read data from %s\n", p);
729            if (extra != NULL) new_free(extra);
730            if (re != NULL) new_free(re);
731            fclose(f);
732            continue;
733            }
734          fprintf(outfile, "Study data loaded from %s\n", p);
735          do_study = 1;     /* To get the data output if requested */
736          }
737        else fprintf(outfile, "No study data\n");
738    
739        fclose(f);
740        goto SHOW_INFO;
741        }
742    
743      /* In-line pattern (the usual case). Get the delimiter and seek the end of
744      the pattern; if is isn't complete, read more. */
745    
746    delimiter = *p++;    delimiter = *p++;
747    
# Line 583  while (!done) Line 763  while (!done)
763        }        }
764      if (*pp != 0) break;      if (*pp != 0) break;
765    
766      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
767      if (len < 256)      if (len < 256)
768        {        {
769        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 606  while (!done) Line 786  while (!done)
786    
787    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
788    
789    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
790      for callouts. */
791    
792    *pp++ = 0;    *pp++ = 0;
793      strcpy((char *)pbuffer, (char *)p);
794    
795    /* Look for options after final delimiter */    /* Look for options after final delimiter */
796    
# Line 628  while (!done) Line 810  while (!done)
810    
811        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
812        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
813          case 'C': options |= PCRE_AUTO_CALLOUT; break;
814        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
815        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
816          case 'F': do_flip = 1; break;
817        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
818        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
819        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
820          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
821    
822  #if !defined NOPOSIX  #if !defined NOPOSIX
823        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 641  while (!done) Line 826  while (!done)
826        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
827        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
828        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
829        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
830          case '?': options |= PCRE_NO_UTF8_CHECK; break;
831    
832        case 'L':        case 'L':
833        ppp = pp;        ppp = pp;
# Line 656  while (!done) Line 842  while (!done)
842        pp = ppp;        pp = ppp;
843        break;        break;
844    
845          case '>':
846          to_file = pp;
847          while (*pp != 0) pp++;
848          while (isspace(pp[-1])) pp--;
849          *pp = 0;
850          break;
851    
852        case '\n': case ' ': break;        case '\n': case ' ': break;
853    
854        default:        default:
855        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
856        goto SKIP_DATA;        goto SKIP_DATA;
# Line 672  while (!done) Line 866  while (!done)
866      {      {
867      int rc;      int rc;
868      int cflags = 0;      int cflags = 0;
869    
870      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
871      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
872      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
# Line 681  while (!done) Line 876  while (!done)
876    
877      if (rc != 0)      if (rc != 0)
878        {        {
879        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
880        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
881        goto SKIP_DATA;        goto SKIP_DATA;
882        }        }
# Line 705  while (!done) Line 900  while (!done)
900          }          }
901        time_taken = clock() - start_time;        time_taken = clock() - start_time;
902        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
903          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
904          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
905        }        }
906    
907      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 722  while (!done) Line 917  while (!done)
917          {          {
918          for (;;)          for (;;)
919            {            {
920            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
921              {              {
922              done = 1;              done = 1;
923              goto CONTINUE;              goto CONTINUE;
# Line 740  while (!done) Line 935  while (!done)
935      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
936      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
937    
938        if (log_store)
939          fprintf(outfile, "Memory allocation (code space): %d\n",
940            (int)(gotten_store -
941                  sizeof(real_pcre) -
942                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
943    
944        /* Extract the size for possible writing before possibly flipping it,
945        and remember the store that was got. */
946    
947        true_size = ((real_pcre *)re)->size;
948        regex_gotten_store = gotten_store;
949    
950        /* If /S was present, study the regexp to generate additional info to
951        help with the matching. */
952    
953        if (do_study)
954          {
955          if (timeit)
956            {
957            register int i;
958            clock_t time_taken;
959            clock_t start_time = clock();
960            for (i = 0; i < LOOPREPEAT; i++)
961              extra = pcre_study(re, study_options, &error);
962            time_taken = clock() - start_time;
963            if (extra != NULL) free(extra);
964            fprintf(outfile, "  Study time %.3f milliseconds\n",
965              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
966                (double)CLOCKS_PER_SEC);
967            }
968          extra = pcre_study(re, study_options, &error);
969          if (error != NULL)
970            fprintf(outfile, "Failed to study: %s\n", error);
971          else if (extra != NULL)
972            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
973          }
974    
975        /* If the 'F' option was present, we flip the bytes of all the integer
976        fields in the regex data block and the study block. This is to make it
977        possible to test PCRE's handling of byte-flipped patterns, e.g. those
978        compiled on a different architecture. */
979    
980        if (do_flip)
981          {
982          real_pcre *rre = (real_pcre *)re;
983          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
984          rre->size = byteflip(rre->size, sizeof(rre->size));
985          rre->options = byteflip(rre->options, sizeof(rre->options));
986          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
987          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
988          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
989          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
990          rre->name_table_offset = byteflip(rre->name_table_offset,
991            sizeof(rre->name_table_offset));
992          rre->name_entry_size = byteflip(rre->name_entry_size,
993            sizeof(rre->name_entry_size));
994          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
995    
996          if (extra != NULL)
997            {
998            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
999            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1000            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1001            }
1002          }
1003    
1004        /* Extract information from the compiled data if required */
1005    
1006        SHOW_INFO:
1007    
1008      if (do_showinfo)      if (do_showinfo)
1009        {        {
1010        unsigned long int get_options;        unsigned long int get_options, all_options;
1011        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1012        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1013        size_t size;        int nameentrysize, namecount;
1014          const uschar *nametable;
1015    
1016        if (do_debug) print_internals(re);        if (do_debug)
1017            {
1018            fprintf(outfile, "------------------------------------------------------------------\n");
1019            print_internals(re, outfile);
1020            }
1021    
1022        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1023        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1024        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1025        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1026        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1027        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1028          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1029          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1030          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1031    
1032        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1033        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 774  while (!done) Line 1047  while (!done)
1047              get_options, old_options);              get_options, old_options);
1048          }          }
1049    
1050        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1051          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1052          size, gotten_store);          size, regex_gotten_store);
1053    
1054        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1055        if (backrefmax > 0)        if (backrefmax > 0)
1056          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1057    
1058          if (namecount > 0)
1059            {
1060            fprintf(outfile, "Named capturing subpatterns:\n");
1061            while (namecount-- > 0)
1062              {
1063              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1064                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1065                GET2(nametable, 0));
1066              nametable += nameentrysize;
1067              }
1068            }
1069    
1070          /* The NOPARTIAL bit is a private bit in the options, so we have
1071          to fish it out via out back door */
1072    
1073          all_options = ((real_pcre *)re)->options;
1074          if (do_flip)
1075            {
1076            all_options = byteflip(all_options, sizeof(all_options));
1077            }
1078    
1079          if ((all_options & PCRE_NOPARTIAL) != 0)
1080            fprintf(outfile, "Partial matching not supported\n");
1081    
1082        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1083          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
1084            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1085            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1086            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 791  while (!done) Line 1089  while (!done)
1089            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1090            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1091            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1092            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1093              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1094    
1095        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1096          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 806  while (!done) Line 1105  while (!done)
1105          }          }
1106        else        else
1107          {          {
1108          if (isprint(first_char))          int ch = first_char & 255;
1109            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1110              "" : " (caseless)";
1111            if (isprint(ch))
1112              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1113          else          else
1114            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1115          }          }
1116    
1117        if (need_char < 0)        if (need_char < 0)
# Line 818  while (!done) Line 1120  while (!done)
1120          }          }
1121        else        else
1122          {          {
1123          if (isprint(need_char))          int ch = need_char & 255;
1124            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1125              "" : " (caseless)";
1126            if (isprint(ch))
1127              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1128          else          else
1129            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1130          }          }
       }  
1131    
1132      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1133      help with the matching. */        value, but it varies, depending on the computer architecture, and
1134          so messes up the test suite. (And with the /F option, it might be
1135          flipped.) */
1136    
1137      if (do_study)        if (do_study)
       {  
       if (timeit)  
1138          {          {
1139          register int i;          if (extra == NULL)
1140          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1141          clock_t start_time = clock();          else
1142          for (i = 0; i < LOOPREPEAT; i++)            {
1143            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1144          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1145          if (extra != NULL) free(extra);  
1146          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1147            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1148            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1149                {
1150                int i;
1151                int c = 24;
1152                fprintf(outfile, "Starting byte set: ");
1153                for (i = 0; i < 256; i++)
1154                  {
1155                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1156                    {
1157                    if (c > 75)
1158                      {
1159                      fprintf(outfile, "\n  ");
1160                      c = 2;
1161                      }
1162                    if (isprint(i) && i != ' ')
1163                      {
1164                      fprintf(outfile, "%c ", i);
1165                      c += 2;
1166                      }
1167                    else
1168                      {
1169                      fprintf(outfile, "\\x%02x ", i);
1170                      c += 5;
1171                      }
1172                    }
1173                  }
1174                fprintf(outfile, "\n");
1175                }
1176              }
1177          }          }
1178          }
1179    
1180        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1181        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1182          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1183    
1184        else if (do_showinfo)      if (to_file != NULL)
1185          {
1186          FILE *f = fopen((char *)to_file, "wb");
1187          if (f == NULL)
1188            {
1189            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1190            }
1191          else
1192          {          {
1193          uschar *start_bits = NULL;          uschar sbuf[8];
1194          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (true_size >> 24)  & 255;
1195          if (start_bits == NULL)          sbuf[1] = (true_size >> 16)  & 255;
1196            fprintf(outfile, "No starting character set\n");          sbuf[2] = (true_size >>  8)  & 255;
1197            sbuf[3] = (true_size)  & 255;
1198    
1199            sbuf[4] = (true_study_size >> 24)  & 255;
1200            sbuf[5] = (true_study_size >> 16)  & 255;
1201            sbuf[6] = (true_study_size >>  8)  & 255;
1202            sbuf[7] = (true_study_size)  & 255;
1203    
1204            if (fwrite(sbuf, 1, 8, f) < 8 ||
1205                fwrite(re, 1, true_size, f) < true_size)
1206              {
1207              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1208              }
1209          else          else
1210            {            {
1211            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1212            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1213              {              {
1214              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1215                    true_study_size)
1216                {                {
1217                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1218                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1219                }                }
1220                else fprintf(outfile, "Study data written to %s\n", to_file);
1221              }              }
           fprintf(outfile, "\n");  
1222            }            }
1223            fclose(f);
1224          }          }
1225          continue;  /* With next regex */
1226        }        }
1227      }      }        /* End of non-POSIX compile */
1228    
1229    /* Read data lines and test them */    /* Read data lines and test them */
1230    
# Line 896  while (!done) Line 1234  while (!done)
1234      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
1235      int *use_offsets = offsets;      int *use_offsets = offsets;
1236      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1237        int callout_data = 0;
1238        int callout_data_set = 0;
1239      int count, c;      int count, c;
1240      int copystrings = 0;      int copystrings = 0;
1241        int find_match_limit = 0;
1242      int getstrings = 0;      int getstrings = 0;
1243      int getlist = 0;      int getlist = 0;
1244      int gmatched = 0;      int gmatched = 0;
# Line 906  while (!done) Line 1247  while (!done)
1247    
1248      options = 0;      options = 0;
1249    
1250        pcre_callout = callout;
1251        first_callout = 1;
1252        callout_extra = 0;
1253        callout_count = 0;
1254        callout_fail_count = 999999;
1255        callout_fail_id = -1;
1256        show_malloc = 0;
1257    
1258      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1259      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1260        {        {
1261        done = 1;        done = 1;
1262        goto CONTINUE;        goto CONTINUE;
# Line 927  while (!done) Line 1276  while (!done)
1276        {        {
1277        int i = 0;        int i = 0;
1278        int n = 0;        int n = 0;
1279    
1280        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1281          {          {
1282          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 957  while (!done) Line 1307  while (!done)
1307              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1308            if (*pt == '}')            if (*pt == '}')
1309              {              {
1310              unsigned char buffer[8];              unsigned char buff8[8];
1311              int ii, utn;              int ii, utn;
1312              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1313              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1314              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1315              p = pt + 1;              p = pt + 1;
1316              break;              break;
1317              }              }
# Line 978  while (!done) Line 1328  while (!done)
1328            }            }
1329          break;          break;
1330    
1331          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1332          p--;          p--;
1333          continue;          continue;
1334    
1335            case '>':
1336            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1337            continue;
1338    
1339          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1340          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1341          continue;          continue;
# Line 991  while (!done) Line 1345  while (!done)
1345          continue;          continue;
1346    
1347          case 'C':          case 'C':
1348          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1349          copystrings |= 1 << n;            {
1350              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1351              copystrings |= 1 << n;
1352              }
1353            else if (isalnum(*p))
1354              {
1355              uschar name[256];
1356              uschar *npp = name;
1357              while (isalnum(*p)) *npp++ = *p++;
1358              *npp = 0;
1359              n = pcre_get_stringnumber(re, (char *)name);
1360              if (n < 0)
1361                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1362              else copystrings |= 1 << n;
1363              }
1364            else if (*p == '+')
1365              {
1366              callout_extra = 1;
1367              p++;
1368              }
1369            else if (*p == '-')
1370              {
1371              pcre_callout = NULL;
1372              p++;
1373              }
1374            else if (*p == '!')
1375              {
1376              callout_fail_id = 0;
1377              p++;
1378              while(isdigit(*p))
1379                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1380              callout_fail_count = 0;
1381              if (*p == '!')
1382                {
1383                p++;
1384                while(isdigit(*p))
1385                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1386                }
1387              }
1388            else if (*p == '*')
1389              {
1390              int sign = 1;
1391              callout_data = 0;
1392              if (*(++p) == '-') { sign = -1; p++; }
1393              while(isdigit(*p))
1394                callout_data = callout_data * 10 + *p++ - '0';
1395              callout_data *= sign;
1396              callout_data_set = 1;
1397              }
1398          continue;          continue;
1399    
1400          case 'G':          case 'G':
1401          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1402          getstrings |= 1 << n;            {
1403              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404              getstrings |= 1 << n;
1405              }
1406            else if (isalnum(*p))
1407              {
1408              uschar name[256];
1409              uschar *npp = name;
1410              while (isalnum(*p)) *npp++ = *p++;
1411              *npp = 0;
1412              n = pcre_get_stringnumber(re, (char *)name);
1413              if (n < 0)
1414                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1415              else getstrings |= 1 << n;
1416              }
1417          continue;          continue;
1418    
1419          case 'L':          case 'L':
1420          getlist = 1;          getlist = 1;
1421          continue;          continue;
1422    
1423            case 'M':
1424            find_match_limit = 1;
1425            continue;
1426    
1427          case 'N':          case 'N':
1428          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1429          continue;          continue;
# Line 1014  while (!done) Line 1434  while (!done)
1434            {            {
1435            size_offsets_max = n;            size_offsets_max = n;
1436            free(offsets);            free(offsets);
1437            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1438            if (offsets == NULL)            if (offsets == NULL)
1439              {              {
1440              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
# Line 1023  while (!done) Line 1443  while (!done)
1443              }              }
1444            }            }
1445          use_size_offsets = n;          use_size_offsets = n;
1446          if (n == 0) use_offsets = NULL;          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1447            continue;
1448    
1449            case 'P':
1450            options |= PCRE_PARTIAL;
1451            continue;
1452    
1453            case 'S':
1454            show_malloc = 1;
1455          continue;          continue;
1456    
1457          case 'Z':          case 'Z':
1458          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1459          continue;          continue;
1460    
1461            case '?':
1462            options |= PCRE_NO_UTF8_CHECK;
1463            continue;
1464          }          }
1465        *q++ = c;        *q++ = c;
1466        }        }
# Line 1036  while (!done) Line 1468  while (!done)
1468      len = q - dbuffer;      len = q - dbuffer;
1469    
1470      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1471      support timing. */      support timing or playing with the match limit or callout data. */
1472    
1473  #if !defined NOPOSIX  #if !defined NOPOSIX
1474      if (posix || do_posix)      if (posix || do_posix)
1475        {        {
1476        int rc;        int rc;
1477        int eflags = 0;        int eflags = 0;
1478        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1479          if (use_size_offsets > 0)
1480            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1481        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1482        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1483    
# Line 1051  while (!done) Line 1485  while (!done)
1485    
1486        if (rc != 0)        if (rc != 0)
1487          {          {
1488          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1489          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1490          }          }
1491        else        else
1492          {          {
1493          size_t i;          size_t i;
1494          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1495            {            {
1496            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1497              {              {
1498              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1499              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1500                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1501              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1502              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1503                {                {
1504                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1505                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1506                    outfile);
1507                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1508                }                }
1509              }              }
# Line 1094  while (!done) Line 1529  while (!done)
1529              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1530          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1531          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1532            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1533            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1534          }          }
1535    
1536        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1537          start_offset, options | g_notempty, use_offsets, use_size_offsets);        varying limits in order to find the minimum value. */
1538    
1539          if (find_match_limit)
1540            {
1541            int min = 0;
1542            int mid = 64;
1543            int max = -1;
1544    
1545            if (extra == NULL)
1546              {
1547              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1548              extra->flags = 0;
1549              }
1550            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1551    
1552            for (;;)
1553              {
1554              extra->match_limit = mid;
1555              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1556                options | g_notempty, use_offsets, use_size_offsets);
1557              if (count == PCRE_ERROR_MATCHLIMIT)
1558                {
1559                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1560                min = mid;
1561                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1562                }
1563              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1564                                     count == PCRE_ERROR_PARTIAL)
1565                {
1566                if (mid == min + 1)
1567                  {
1568                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1569                  break;
1570                  }
1571                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1572                max = mid;
1573                mid = (min + mid)/2;
1574                }
1575              else break;    /* Some other error */
1576              }
1577    
1578            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1579            }
1580    
1581          /* If callout_data is set, use the interface with additional data */
1582    
1583          else if (callout_data_set)
1584            {
1585            if (extra == NULL)
1586              {
1587              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1588              extra->flags = 0;
1589              }
1590            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1591            extra->callout_data = &callout_data;
1592            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1593              options | g_notempty, use_offsets, use_size_offsets);
1594            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1595            }
1596    
1597          /* The normal case is just to do the match once, with the default
1598          value of match_limit. */
1599    
1600          else
1601            {
1602            count = pcre_exec(re, extra, (char *)bptr, len,
1603              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1604            }
1605    
1606        if (count == 0)        if (count == 0)
1607          {          {
# Line 1119  while (!done) Line 1621  while (!done)
1621            else            else
1622              {              {
1623              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1624              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1625                  use_offsets[i+1] - use_offsets[i], outfile);
1626              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1627              if (i == 0)              if (i == 0)
1628                {                {
1629                if (do_showrest)                if (do_showrest)
1630                  {                  {
1631                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1632                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1633                      outfile);
1634                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1635                  }                  }
1636                }                }
# Line 1184  while (!done) Line 1688  while (!done)
1688            }            }
1689          }          }
1690    
1691          /* There was a partial match */
1692    
1693          else if (count == PCRE_ERROR_PARTIAL)
1694            {
1695            fprintf(outfile, "Partial match\n");
1696            break;  /* Out of the /g loop */
1697            }
1698    
1699        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1700        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1701        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1702        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1703        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1704          that was checked before setting g_notempty. */
1705    
1706        else        else
1707          {          {
1708          if (g_notempty != 0)          if (g_notempty != 0)
1709            {            {
1710              int onechar = 1;
1711            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
1712            use_offsets[1] = start_offset + 1;            if (use_utf8)
1713                {
1714                while (start_offset + onechar < len)
1715                  {
1716                  int tb = bptr[start_offset+onechar];
1717                  if (tb <= 127) break;
1718                  tb &= 0xc0;
1719                  if (tb != 0 && tb != 0xc0) onechar++;
1720                  }
1721                }
1722              use_offsets[1] = start_offset + onechar;
1723            }            }
1724          else          else
1725            {            {
1726            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1727              {              {
1728              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1729              }              }
1730              else fprintf(outfile, "Error %d\n", count);
1731            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1732            }            }
1733          }          }
# Line 1255  while (!done) Line 1779  while (!done)
1779      }      }
1780    }    }
1781    
1782  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1783  return 0;  return 0;
1784  }  }
1785    

Legend:
Removed from v.59  
changed lines
  Added in v.75

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12