/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 59 by nigel, Sat Feb 24 21:39:54 2007 UTC revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places. */
8    
9  #include <ctype.h>  #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
# Line 9  Line 13 
13  #include <time.h>  #include <time.h>
14  #include <locale.h>  #include <locale.h>
15    
16  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
17    for getting the opcodes for showing compiled code. */
18    
19    #define PCRE_SPY        /* For Win32 build, import data, not export */
20  #include "internal.h"  #include "internal.h"
21    
22  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 35  Makefile. */
35  #endif  #endif
36  #endif  #endif
37    
38  #define LOOPREPEAT 20000  #define LOOPREPEAT 50000
39    
40    
41  static FILE *outfile;  static FILE *outfile;
42  static int log_store = 0;  static int log_store = 0;
43    static int callout_count;
44    static int callout_extra;
45    static int callout_fail_count;
46    static int callout_fail_id;
47    static int first_callout;
48    static int utf8;
49  static size_t gotten_store;  static size_t gotten_store;
50    
51    
# Line 48  static int utf8_table3[] = { Line 60  static int utf8_table3[] = {
60    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
61    
62    
63    
64    /*************************************************
65    *         Print compiled regex                   *
66    *************************************************/
67    
68    /* The code for doing this is held in a separate file that is also included in
69    pcre.c when it is compiled with the debug switch. It defines a function called
70    print_internals(), which uses a table of opcode lengths defined by the macro
71    OP_LENGTHS, whose name must be OP_lengths. */
72    
73    static uschar OP_lengths[] = { OP_LENGTHS };
74    
75    #include "printint.c"
76    
77    
78    
79    /*************************************************
80    *          Read number from string               *
81    *************************************************/
82    
83    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
84    around with conditional compilation, just do the job by hand. It is only used
85    for unpicking the -o argument, so just keep it simple.
86    
87    Arguments:
88      str           string to be converted
89      endptr        where to put the end pointer
90    
91    Returns:        the unsigned long
92    */
93    
94    static int
95    get_value(unsigned char *str, unsigned char **endptr)
96    {
97    int result = 0;
98    while(*str != 0 && isspace(*str)) str++;
99    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
100    *endptr = str;
101    return(result);
102    }
103    
104    
105    
106  /*************************************************  /*************************************************
107  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
108  *************************************************/  *************************************************/
# Line 143  return i+1; Line 198  return i+1;
198    
199    
200    
201    /*************************************************
202    *             Print character string             *
203    *************************************************/
204    
205    /* Character string printing function. Must handle UTF-8 strings in utf8
206    mode. Yields number of characters printed. If handed a NULL file, just counts
207    chars without printing. */
208    
209    static int pchars(unsigned char *p, int length, FILE *f)
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
   
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
   
   
 static void print_internals(pcre *re)  
210  {  {
211  unsigned char *code = ((real_pcre *)re)->code;  int c;
212    int yield = 0;
 fprintf(outfile, "------------------------------------------------------------------\n");  
213    
214  for(;;)  while (length-- > 0)
215    {    {
216    int c;    if (utf8)
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     if (*code - OP_BRA > EXTRACT_BASIC_MAX)  
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
217      {      {
218      case OP_END:      int rc = utf82ord(p, &c);
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
219    
220      case OP_CLASS:      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
221        {        {
222        int i, min, max;        length -= rc - 1;
223        code++;        p += rc;
224        fprintf(outfile, "    [");        if (c < 256 && isprint(c))
225            {
226        for (i = 0; i < 256; i++)          if (f != NULL) fprintf(f, "%c", c);
227          {          yield++;
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
228          }          }
229        fprintf(outfile, "]");        else
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
230          {          {
231          case OP_CRSTAR:          int n;
232          case OP_CRMINSTAR:          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
233          case OP_CRPLUS:          yield += n;
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
   
         default:  
         code--;  
234          }          }
235          continue;
236        }        }
237      break;      }
238    
239      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
240    
241      default:    if (isprint(c = *(p++)))
242      fprintf(outfile, "    %s", OP_names[*code]);      {
243      break;      if (f != NULL) fprintf(f, "%c", c);
244        yield++;
245        }
246      else
247        {
248        if (f != NULL) fprintf(f, "\\x%02x", c);
249        yield += 4;
250      }      }
   
   code++;  
   fprintf(outfile, "\n");  
251    }    }
252    
253    return yield;
254  }  }
255    
256    
257    
258  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
259    *              Callout function                  *
260    *************************************************/
261    
262  static void pchars(unsigned char *p, int length, int utf8)  /* Called from PCRE as a result of the (?C) item. We print out where we are in
263    the match. Yield zero unless more callouts than the fail count, or the callout
264    data is not zero. */
265    
266    static int callout(pcre_callout_block *cb)
267  {  {
268  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
269  while (length-- > 0)  int i, pre_start, post_start;
270    
271    if (callout_extra)
272    {    {
273    if (utf8)    int i;
274      fprintf(f, "Callout %d: last capture = %d\n",
275        cb->callout_number, cb->capture_last);
276    
277      for (i = 0; i < cb->capture_top * 2; i += 2)
278      {      {
279      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
280      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
281        else
282        {        {
283        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
284        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
285        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
286          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
287        }        }
288      }      }
289      }
290    
291     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
292    datails. On subsequent calls in the same match, we use pchars just to find the
293    printed lengths of the substrings. */
294    
295    if (f != NULL) fprintf(f, "--->");
296    
297    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
298    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
299      cb->current_position - cb->start_match, f);
300    
301    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
302      cb->subject_length - cb->current_position, f);
303    
304    if (f != NULL) fprintf(f, "\n");
305    
306    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  /* Always print appropriate indicators, with callout number if not already
307      else fprintf(outfile, "\\x%02x", c);  shown */
308    
309    if (callout_extra) fprintf(outfile, "    ");
310      else fprintf(outfile, "%3d ", cb->callout_number);
311    
312    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
313    fprintf(outfile, "^");
314    
315    if (post_start > 0)
316      {
317      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
318      fprintf(outfile, "^");
319    }    }
320    
321    fprintf(outfile, "\n");
322    
323    first_callout = 0;
324    
325    if ((int)(cb->callout_data) != 0)
326      {
327      fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
328      return (int)(cb->callout_data);
329      }
330    
331    return (cb->callout_number != callout_fail_id)? 0 :
332           (++callout_count >= callout_fail_count)? 1 : 0;
333  }  }
334    
335    
336    /*************************************************
337    *            Local malloc function               *
338    *************************************************/
339    
340  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
341  compiled re. */  compiled re. */
# Line 400  compiled re. */ Line 343  compiled re. */
343  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
344  {  {
345  gotten_store = size;  gotten_store = size;
 if (log_store)  
   fprintf(outfile, "Memory allocation (code space): %d\n",  
     (int)((int)size - offsetof(real_pcre, code[0])));  
346  return malloc(size);  return malloc(size);
347  }  }
348    
349    
350    
351    /*************************************************
352    *          Call pcre_fullinfo()                  *
353    *************************************************/
354    
355  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
356    
# Line 420  if ((rc = pcre_fullinfo(re, study, optio Line 363  if ((rc = pcre_fullinfo(re, study, optio
363    
364    
365    
366    /*************************************************
367    *                Main Program                    *
368    *************************************************/
369    
370  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
371  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 453  outfile = stdout; Line 399  outfile = stdout;
399    
400  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
401    {    {
402    char *endptr;    unsigned char *endptr;
403    
404    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
405      showstore = 1;      showstore = 1;
# Line 461  while (argc > 1 && argv[op][0] == '-') Line 407  while (argc > 1 && argv[op][0] == '-')
407    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
408    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
409    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
410        ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value(argv[op+1], &endptr)), *endptr == 0))
411      {      {
412      op++;      op++;
413      argc--;      argc--;
# Line 469  while (argc > 1 && argv[op][0] == '-') Line 415  while (argc > 1 && argv[op][0] == '-')
415  #if !defined NOPOSIX  #if !defined NOPOSIX
416    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
417  #endif  #endif
418      else if (strcmp(argv[op], "-C") == 0)
419        {
420        int rc;
421        printf("PCRE version %s\n", pcre_version());
422        printf("Compiled with\n");
423        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
424        printf("  %sUTF-8 support\n", rc? "" : "No ");
425        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
426        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
427        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
428        printf("  Internal link size = %d\n", rc);
429        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
430        printf("  POSIX malloc threshold = %d\n", rc);
431        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
432        printf("  Default match limit = %d\n", rc);
433        exit(0);
434        }
435    else    else
436      {      {
437      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
438      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
439        printf("  -C     show PCRE compile-time options and exit\n");
440      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
441             "  -i     show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
442             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
# Line 549  while (!done) Line 513  while (!done)
513    int do_g = 0;    int do_g = 0;
514    int do_showinfo = showinfo;    int do_showinfo = showinfo;
515    int do_showrest = 0;    int do_showrest = 0;
   int utf8 = 0;  
516    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
517    
518      utf8 = 0;
519    
520    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
521    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
522    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
523      fflush(outfile);
524    
525    p = buffer;    p = buffer;
526    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 633  while (!done) Line 599  while (!done)
599        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
600        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
601        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
602          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
603    
604  #if !defined NOPOSIX  #if !defined NOPOSIX
605        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 705  while (!done) Line 672  while (!done)
672          }          }
673        time_taken = clock() - start_time;        time_taken = clock() - start_time;
674        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
675          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
676          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
677        }        }
678    
679      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 740  while (!done) Line 707  while (!done)
707      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
708      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
709    
710        if (log_store)
711          fprintf(outfile, "Memory allocation (code space): %d\n",
712            (int)(gotten_store -
713                  sizeof(real_pcre) -
714                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
715    
716      if (do_showinfo)      if (do_showinfo)
717        {        {
718        unsigned long int get_options;        unsigned long int get_options;
719        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
720        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
721          int nameentrysize, namecount;
722          const uschar *nametable;
723        size_t size;        size_t size;
724    
725        if (do_debug) print_internals(re);        if (do_debug)
726            {
727            fprintf(outfile, "------------------------------------------------------------------\n");
728            print_internals(re, outfile);
729            }
730    
731        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
732        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
733        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
734        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
735        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
736        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
737          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
738          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
739          new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);
740    
741        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
742        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 781  while (!done) Line 763  while (!done)
763        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
764        if (backrefmax > 0)        if (backrefmax > 0)
765          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
766    
767          if (namecount > 0)
768            {
769            fprintf(outfile, "Named capturing subpatterns:\n");
770            while (namecount-- > 0)
771              {
772              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
773                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
774                GET2(nametable, 0));
775              nametable += nameentrysize;
776              }
777            }
778    
779        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
780          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
781            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
# Line 806  while (!done) Line 801  while (!done)
801          }          }
802        else        else
803          {          {
804          if (isprint(first_char))          int ch = first_char & 255;
805            fprintf(outfile, "First char = \'%c\'\n", first_char);          char *caseless = ((first_char & REQ_CASELESS) == 0)?
806              "" : " (caseless)";
807            if (isprint(ch))
808              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
809          else          else
810            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
811          }          }
812    
813        if (need_char < 0)        if (need_char < 0)
# Line 818  while (!done) Line 816  while (!done)
816          }          }
817        else        else
818          {          {
819          if (isprint(need_char))          int ch = need_char & 255;
820            fprintf(outfile, "Need char = \'%c\'\n", need_char);          char *caseless = ((need_char & REQ_CASELESS) == 0)?
821              "" : " (caseless)";
822            if (isprint(ch))
823              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
824          else          else
825            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
826          }          }
827        }        }
828    
# Line 840  while (!done) Line 841  while (!done)
841          time_taken = clock() - start_time;          time_taken = clock() - start_time;
842          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
843          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
844            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
845            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
846          }          }
847    
848        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 852  while (!done) Line 853  while (!done)
853    
854        else if (do_showinfo)        else if (do_showinfo)
855          {          {
856            size_t size;
857          uschar *start_bits = NULL;          uschar *start_bits = NULL;
858            new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
859          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
860            fprintf(outfile, "Study size = %d\n", size);
861          if (start_bits == NULL)          if (start_bits == NULL)
862            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
863          else          else
# Line 896  while (!done) Line 900  while (!done)
900      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
901      int *use_offsets = offsets;      int *use_offsets = offsets;
902      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
903        int callout_data = 0;
904        int callout_data_set = 0;
905      int count, c;      int count, c;
906      int copystrings = 0;      int copystrings = 0;
907        int find_match_limit = 0;
908      int getstrings = 0;      int getstrings = 0;
909      int getlist = 0;      int getlist = 0;
910      int gmatched = 0;      int gmatched = 0;
# Line 906  while (!done) Line 913  while (!done)
913    
914      options = 0;      options = 0;
915    
916        pcre_callout = callout;
917        first_callout = 1;
918        callout_extra = 0;
919        callout_count = 0;
920        callout_fail_count = 999999;
921        callout_fail_id = -1;
922    
923      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
924      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
925        {        {
# Line 927  while (!done) Line 941  while (!done)
941        {        {
942        int i = 0;        int i = 0;
943        int n = 0;        int n = 0;
944    
945        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
946          {          {
947          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 991  while (!done) Line 1006  while (!done)
1006          continue;          continue;
1007    
1008          case 'C':          case 'C':
1009          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1010          copystrings |= 1 << n;            {
1011              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1012              copystrings |= 1 << n;
1013              }
1014            else if (isalnum(*p))
1015              {
1016              uschar name[256];
1017              uschar *pp = name;
1018              while (isalnum(*p)) *pp++ = *p++;
1019              *pp = 0;
1020              n = pcre_get_stringnumber(re, name);
1021              if (n < 0)
1022                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1023              else copystrings |= 1 << n;
1024              }
1025            else if (*p == '+')
1026              {
1027              callout_extra = 1;
1028              p++;
1029              }
1030            else if (*p == '-')
1031              {
1032              pcre_callout = NULL;
1033              p++;
1034              }
1035            else if (*p == '!')
1036              {
1037              callout_fail_id = 0;
1038              p++;
1039              while(isdigit(*p))
1040                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1041              callout_fail_count = 0;
1042              if (*p == '!')
1043                {
1044                p++;
1045                while(isdigit(*p))
1046                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1047                }
1048              }
1049            else if (*p == '*')
1050              {
1051              int sign = 1;
1052              callout_data = 0;
1053              if (*(++p) == '-') { sign = -1; p++; }
1054              while(isdigit(*p))
1055                callout_data = callout_data * 10 + *p++ - '0';
1056              callout_data *= sign;
1057              callout_data_set = 1;
1058              }
1059          continue;          continue;
1060    
1061          case 'G':          case 'G':
1062          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1063          getstrings |= 1 << n;            {
1064              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1065              getstrings |= 1 << n;
1066              }
1067            else if (isalnum(*p))
1068              {
1069              uschar name[256];
1070              uschar *pp = name;
1071              while (isalnum(*p)) *pp++ = *p++;
1072              *pp = 0;
1073              n = pcre_get_stringnumber(re, name);
1074              if (n < 0)
1075                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1076              else getstrings |= 1 << n;
1077              }
1078          continue;          continue;
1079    
1080          case 'L':          case 'L':
1081          getlist = 1;          getlist = 1;
1082          continue;          continue;
1083    
1084            case 'M':
1085            find_match_limit = 1;
1086            continue;
1087    
1088          case 'N':          case 'N':
1089          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1090          continue;          continue;
# Line 1023  while (!done) Line 1104  while (!done)
1104              }              }
1105            }            }
1106          use_size_offsets = n;          use_size_offsets = n;
1107          if (n == 0) use_offsets = NULL;          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1108          continue;          continue;
1109    
1110          case 'Z':          case 'Z':
# Line 1036  while (!done) Line 1117  while (!done)
1117      len = q - dbuffer;      len = q - dbuffer;
1118    
1119      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1120      support timing. */      support timing or playing with the match limit or callout data. */
1121    
1122  #if !defined NOPOSIX  #if !defined NOPOSIX
1123      if (posix || do_posix)      if (posix || do_posix)
1124        {        {
1125        int rc;        int rc;
1126        int eflags = 0;        int eflags = 0;
1127        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1128          if (use_size_offsets > 0)
1129            pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1130        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1131        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1132    
# Line 1057  while (!done) Line 1140  while (!done)
1140        else        else
1141          {          {
1142          size_t i;          size_t i;
1143          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1144            {            {
1145            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1146              {              {
1147              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1148              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1149                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1150              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1151              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1152                {                {
1153                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1154                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1155                    outfile);
1156                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1157                }                }
1158              }              }
# Line 1094  while (!done) Line 1178  while (!done)
1178              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1179          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1180          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1181            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1182            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1183          }          }
1184    
1185        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1186          varying limits in order to find the minimum value. */
1187    
1188          if (find_match_limit)
1189            {
1190            int min = 0;
1191            int mid = 64;
1192            int max = -1;
1193    
1194            if (extra == NULL)
1195              {
1196              extra = malloc(sizeof(pcre_extra));
1197              extra->flags = 0;
1198              }
1199            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1200    
1201            for (;;)
1202              {
1203              extra->match_limit = mid;
1204              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1205                options | g_notempty, use_offsets, use_size_offsets);
1206              if (count == PCRE_ERROR_MATCHLIMIT)
1207                {
1208                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1209                min = mid;
1210                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1211                }
1212              else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1213                {
1214                if (mid == min + 1)
1215                  {
1216                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1217                  break;
1218                  }
1219                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1220                max = mid;
1221                mid = (min + mid)/2;
1222                }
1223              else break;    /* Some other error */
1224              }
1225    
1226            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1227            }
1228    
1229          /* If callout_data is set, use the interface with additional data */
1230    
1231          else if (callout_data_set)
1232            {
1233            if (extra == NULL)
1234              {
1235              extra = malloc(sizeof(pcre_extra));
1236              extra->flags = 0;
1237              }
1238            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1239            extra->callout_data = (void *)callout_data;
1240            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1241              options | g_notempty, use_offsets, use_size_offsets);
1242            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1243            }
1244    
1245          /* The normal case is just to do the match once, with the default
1246          value of match_limit. */
1247    
1248          else count = pcre_exec(re, extra, (char *)bptr, len,
1249          start_offset, options | g_notempty, use_offsets, use_size_offsets);          start_offset, options | g_notempty, use_offsets, use_size_offsets);
1250    
1251        if (count == 0)        if (count == 0)
# Line 1119  while (!done) Line 1266  while (!done)
1266            else            else
1267              {              {
1268              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1269              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1270                  use_offsets[i+1] - use_offsets[i], outfile);
1271              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1272              if (i == 0)              if (i == 0)
1273                {                {
1274                if (do_showrest)                if (do_showrest)
1275                  {                  {
1276                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1277                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1278                      outfile);
1279                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1280                  }                  }
1281                }                }

Legend:
Removed from v.59  
changed lines
  Added in v.63

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12