/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 19 by nigel, Sat Feb 24 21:38:33 2007 UTC revision 41 by nigel, Sat Feb 24 21:39:17 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32    #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
# Line 32  code as contained in pcre.c under the DE Line 42  code as contained in pcre.c under the DE
42    
43  static const char *OP_names[] = {  static const char *OP_names[] = {
44    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
45    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
46    "not",    "Opt", "^", "$", "Any", "chars", "not",
47    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
48    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
51    "class", "negclass", "Ref",    "class", "Ref",
52    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
53      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
54    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
55  };  };
56    
# Line 48  static void print_internals(pcre *re) Line 59  static void print_internals(pcre *re)
59  {  {
60  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
61    
62  printf("------------------------------------------------------------------\n");  fprintf(outfile, "------------------------------------------------------------------\n");
63    
64  for(;;)  for(;;)
65    {    {
66    int c;    int c;
67    int charlength;    int charlength;
68    
69    printf("%3d ", code - ((real_pcre *)re)->code);    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
70    
71    if (*code >= OP_BRA)    if (*code >= OP_BRA)
72      {      {
73      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
74      code += 2;      code += 2;
75      }      }
76    
77    else switch(*code)    else switch(*code)
78      {      {
79      case OP_END:      case OP_END:
80      printf("    %s\n", OP_names[*code]);      fprintf(outfile, "    %s\n", OP_names[*code]);
81      printf("------------------------------------------------------------------\n");      fprintf(outfile, "------------------------------------------------------------------\n");
82      return;      return;
83    
84        case OP_OPT:
85        fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
86        code++;
87        break;
88    
89        case OP_COND:
90        fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
91        code += 2;
92        break;
93    
94        case OP_CREF:
95        fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
96        code++;
97        break;
98    
99      case OP_CHARS:      case OP_CHARS:
100      charlength = *(++code);      charlength = *(++code);
101      printf("%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
102      while (charlength-- > 0)      while (charlength-- > 0)
103        if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);        if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
104            else fprintf(outfile, "\\x%02x", c);
105      break;      break;
106    
107      case OP_KETRMAX:      case OP_KETRMAX:
# Line 83  for(;;) Line 110  for(;;)
110      case OP_KET:      case OP_KET:
111      case OP_ASSERT:      case OP_ASSERT:
112      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
113        case OP_ASSERTBACK:
114        case OP_ASSERTBACK_NOT:
115      case OP_ONCE:      case OP_ONCE:
116      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
117        code += 2;
118        break;
119    
120        case OP_REVERSE:
121        fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
122      code += 2;      code += 2;
123      break;      break;
124    
# Line 101  for(;;) Line 135  for(;;)
135      case OP_TYPEQUERY:      case OP_TYPEQUERY:
136      case OP_TYPEMINQUERY:      case OP_TYPEMINQUERY:
137      if (*code >= OP_TYPESTAR)      if (*code >= OP_TYPESTAR)
138        printf("    %s", OP_names[code[1]]);        fprintf(outfile, "    %s", OP_names[code[1]]);
139      else if (isprint(c = code[1])) printf("    %c", c);      else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);
140        else printf("    \\x%02x", c);        else fprintf(outfile, "    \\x%02x", c);
141      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
142      break;      break;
143    
144      case OP_EXACT:      case OP_EXACT:
145      case OP_UPTO:      case OP_UPTO:
146      case OP_MINUPTO:      case OP_MINUPTO:
147      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);
148        else printf("    \\x%02x{", c);        else fprintf(outfile, "    \\x%02x{", c);
149      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) fprintf(outfile, ",");
150      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
151      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) fprintf(outfile, "?");
152      code += 3;      code += 3;
153      break;      break;
154    
155      case OP_TYPEEXACT:      case OP_TYPEEXACT:
156      case OP_TYPEUPTO:      case OP_TYPEUPTO:
157      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
158      printf("    %s{", OP_names[code[3]]);      fprintf(outfile, "    %s{", OP_names[code[3]]);
159      if (*code != OP_TYPEEXACT) printf("0,");      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
160      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
161      if (*code == OP_TYPEMINUPTO) printf("?");      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
162      code += 3;      code += 3;
163      break;      break;
164    
165      case OP_NOT:      case OP_NOT:
166      if (isprint(c = *(++code))) printf("    [^%c]", c);      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);
167        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
168      break;      break;
169    
170      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 139  for(;;) Line 173  for(;;)
173      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
174      case OP_NOTQUERY:      case OP_NOTQUERY:
175      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
176      if (isprint(c = code[1])) printf("    [^%c]", c);      if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);
177        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
178      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
179      break;      break;
180    
181      case OP_NOTEXACT:      case OP_NOTEXACT:
182      case OP_NOTUPTO:      case OP_NOTUPTO:
183      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
184      if (isprint(c = code[3])) printf("    [^%c]{", c);      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);
185        else printf("    [^\\x%02x]{", c);        else fprintf(outfile, "    [^\\x%02x]{", c);
186      if (*code != OP_NOTEXACT) printf(",");      if (*code != OP_NOTEXACT) fprintf(outfile, ",");
187      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
188      if (*code == OP_NOTMINUPTO) printf("?");      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
189      code += 3;      code += 3;
190      break;      break;
191    
192      case OP_REF:      case OP_REF:
193      printf("    \\%d", *(++code));      fprintf(outfile, "    \\%d", *(++code));
194      code++;      code++;
195      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
196    
197      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
198        {        {
199        int i, min, max;        int i, min, max;
200        if (*code++ == OP_CLASS) printf("    [");        code++;
201          else printf("   ^[");        fprintf(outfile, "    [");
202    
203        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
204          {          {
# Line 174  for(;;) Line 207  for(;;)
207            int j;            int j;
208            for (j = i+1; j < 256; j++)            for (j = i+1; j < 256; j++)
209              if ((code[j/8] & (1 << (j&7))) == 0) break;              if ((code[j/8] & (1 << (j&7))) == 0) break;
210            if (i == '-' || i == ']') printf("\\");            if (i == '-' || i == ']') fprintf(outfile, "\\");
211            if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
212            if (--j > i)            if (--j > i)
213              {              {
214              printf("-");              fprintf(outfile, "-");
215              if (j == '-' || j == ']') printf("\\");              if (j == '-' || j == ']') fprintf(outfile, "\\");
216              if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
217              }              }
218            i = j;            i = j;
219            }            }
220          }          }
221        printf("]");        fprintf(outfile, "]");
222        code += 32;        code += 32;
223    
224        CLASS_REF_REPEAT:        CLASS_REF_REPEAT:
# Line 198  for(;;) Line 231  for(;;)
231          case OP_CRMINPLUS:          case OP_CRMINPLUS:
232          case OP_CRQUERY:          case OP_CRQUERY:
233          case OP_CRMINQUERY:          case OP_CRMINQUERY:
234          printf("%s", OP_names[*code]);          fprintf(outfile, "%s", OP_names[*code]);
235          break;          break;
236    
237          case OP_CRRANGE:          case OP_CRRANGE:
238          case OP_CRMINRANGE:          case OP_CRMINRANGE:
239          min = (code[1] << 8) + code[2];          min = (code[1] << 8) + code[2];
240          max = (code[3] << 8) + code[4];          max = (code[3] << 8) + code[4];
241          if (max == 0) printf("{%d,}", min);          if (max == 0) fprintf(outfile, "{%d,}", min);
242          else printf("{%d,%d}", min, max);          else fprintf(outfile, "{%d,%d}", min, max);
243          if (*code == OP_CRMINRANGE) printf("?");          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
244          code += 4;          code += 4;
245          break;          break;
246    
# Line 220  for(;;) Line 253  for(;;)
253      /* Anything else is just a one-node item */      /* Anything else is just a one-node item */
254    
255      default:      default:
256      printf("    %s", OP_names[*code]);      fprintf(outfile, "    %s", OP_names[*code]);
257      break;      break;
258      }      }
259    
260    code++;    code++;
261    printf("\n");    fprintf(outfile, "\n");
262    }    }
263  }  }
264    
# Line 248  compiled re. */ Line 281  compiled re. */
281    
282  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
283  {  {
284  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  if (log_store)
285      fprintf(outfile, "Memory allocation (code space): %d\n",
286        (int)((int)size - offsetof(real_pcre, code[0])));
287  return malloc(size);  return malloc(size);
288  }  }
289    
# Line 266  int study_options = 0; Line 301  int study_options = 0;
301  int op = 1;  int op = 1;
302  int timeit = 0;  int timeit = 0;
303  int showinfo = 0;  int showinfo = 0;
304    int showstore = 0;
305  int posix = 0;  int posix = 0;
306  int debug = 0;  int debug = 0;
307  int done = 0;  int done = 0;
# Line 280  outfile = stdout; Line 316  outfile = stdout;
316    
317  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
318    {    {
319    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
320        showstore = 1;
321    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
322    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
323    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 288  while (argc > 1 && argv[op][0] == '-') Line 325  while (argc > 1 && argv[op][0] == '-')
325    else    else
326      {      {
327      printf("*** Unknown option %s\n", argv[op]);      printf("*** Unknown option %s\n", argv[op]);
328        printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
329        printf("  -d   debug: show compiled code; implies -i\n"
330               "  -i   show information about compiled pattern\n"
331               "  -p   use POSIX interface\n"
332               "  -s   output store information\n"
333               "  -t   time compilation and execution\n");
334      return 1;      return 1;
335      }      }
336    op++;    op++;
# Line 320  if (argc > 2) Line 363  if (argc > 2)
363    
364  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
365    
366  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
367    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
368  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
369    
370  /* Main loop */  /* Main loop */
# Line 331  while (!done) Line 373  while (!done)
373    {    {
374    pcre *re = NULL;    pcre *re = NULL;
375    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
376    
377    #if !defined NOPOSIX  /* There are still compilers that require no indent */
378    regex_t preg;    regex_t preg;
379    #endif
380    
381    const char *error;    const char *error;
382    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
383      unsigned const char *tables = NULL;
384    int do_study = 0;    int do_study = 0;
385    int do_debug = 0;    int do_debug = debug;
386      int do_G = 0;
387      int do_g = 0;
388      int do_showinfo = showinfo;
389      int do_showrest = 0;
390    int do_posix = 0;    int do_posix = 0;
391    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
392    
393    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
394    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
395    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
396    
397    p = buffer;    p = buffer;
398    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 352  while (!done) Line 403  while (!done)
403    
404    delimiter = *p++;    delimiter = *p++;
405    
406    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
407      {      {
408      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
409      goto SKIP_DATA;      goto SKIP_DATA;
410      }      }
411    
# Line 362  while (!done) Line 413  while (!done)
413    
414    for(;;)    for(;;)
415      {      {
416      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
417          {
418          if (*pp == '\\' && pp[1] != 0) pp++;
419            else if (*pp == delimiter) break;
420          pp++;
421          }
422      if (*pp != 0) break;      if (*pp != 0) break;
423    
424      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 379  while (!done) Line 435  while (!done)
435        done = 1;        done = 1;
436        goto CONTINUE;        goto CONTINUE;
437        }        }
438      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
439      }      }
440    
441      /* If the first character after the delimiter is backslash, make
442      the pattern end with backslash. This is purely to provide a way
443      of testing for the error message when a pattern ends with backslash. */
444    
445      if (pp[1] == '\\') *pp++ = '\\';
446    
447    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
448    
449    *pp++ = 0;    *pp++ = 0;
# Line 390  while (!done) Line 452  while (!done)
452    
453    options = 0;    options = 0;
454    study_options = 0;    study_options = 0;
455      log_store = showstore;  /* default from command line */
456    
457    while (*pp != 0)    while (*pp != 0)
458      {      {
459      switch (*pp++)      switch (*pp++)
460        {        {
461          case 'g': do_g = 1; break;
462        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
463        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
464        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
465        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
466    
467          case '+': do_showrest = 1; break;
468        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
469        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
470        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
471          case 'G': do_G = 1; break;
472          case 'I': do_showinfo = 1; break;
473          case 'M': log_store = 1; break;
474    
475    #if !defined NOPOSIX
476        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
477    #endif
478    
479        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
       case 'I': study_options |= PCRE_CASELESS; break;  
480        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
481        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
482    
483          case 'L':
484          ppp = pp;
485          while (*ppp != '\n' && *ppp != ' ') ppp++;
486          *ppp = 0;
487          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
488            {
489            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
490            goto SKIP_DATA;
491            }
492          tables = pcre_maketables();
493          pp = ppp;
494          break;
495    
496        case '\n': case ' ': break;        case '\n': case ' ': break;
497        default:        default:
498        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 414  while (!done) Line 501  while (!done)
501      }      }
502    
503    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
504    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
505      local character tables. */
506    
507    #if !defined NOPOSIX
508    if (posix || do_posix)    if (posix || do_posix)
509      {      {
510      int rc;      int rc;
# Line 438  while (!done) Line 527  while (!done)
527    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
528    
529    else    else
530    #endif  /* !defined NOPOSIX */
531    
532      {      {
533      if (timeit)      if (timeit)
534        {        {
535        register int i;        register int i;
536        clock_t time_taken;        clock_t time_taken;
537        clock_t start_time = clock();        clock_t start_time = clock();
538        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
539          {          {
540          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
541          if (re != NULL) free(re);          if (re != NULL) free(re);
542          }          }
543        time_taken = clock() - start_time;        time_taken = clock() - start_time;
544        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
545          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
546            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
547        }        }
548    
549      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
550    
551      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
552      if non-interactive. */      if non-interactive. */
# Line 478  while (!done) Line 570  while (!done)
570            }            }
571          fprintf(outfile, "\n");          fprintf(outfile, "\n");
572          }          }
573        continue;        goto CONTINUE;
574        }        }
575    
576      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required */
577    
578      if (showinfo || do_debug)      if (do_showinfo)
579        {        {
580        int first_char, count;        int first_char, count;
581    
582        if (debug || do_debug) print_internals(re);        if (do_debug) print_internals(re);
583    
584        count = pcre_info(re, &options, &first_char);        count = pcre_info(re, &options, &first_char);
585        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 505  while (!done) Line 597  while (!done)
597              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
598              ((options & PCRE_EXTRA) != 0)? " extra" : "",              ((options & PCRE_EXTRA) != 0)? " extra" : "",
599              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
600    
601            if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
602              fprintf(outfile, "Case state changes\n");
603    
604          if (first_char == -1)          if (first_char == -1)
605            {            {
606            fprintf(outfile, "First char at start or follows \\n\n");            fprintf(outfile, "First char at start or follows \\n\n");
# Line 520  while (!done) Line 616  while (!done)
616            else            else
617              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "First char = %d\n", first_char);
618            }            }
619    
620            if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
621              {
622              int req_char = ((real_pcre *)re)->req_char;
623              if (isprint(req_char))
624                fprintf(outfile, "Req char = \'%c\'\n", req_char);
625              else
626                fprintf(outfile, "Req char = %d\n", req_char);
627              }
628            else fprintf(outfile, "No req char\n");
629          }          }
630        }        }
631    
# Line 533  while (!done) Line 639  while (!done)
639          register int i;          register int i;
640          clock_t time_taken;          clock_t time_taken;
641          clock_t start_time = clock();          clock_t start_time = clock();
642          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
643            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
644          time_taken = clock() - start_time;          time_taken = clock() - start_time;
645          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
646          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
647            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
648              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
649          }          }
650    
651        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 550  while (!done) Line 657  while (!done)
657        /* This looks at internal information. A bit kludgy to do it this        /* This looks at internal information. A bit kludgy to do it this
658        way, but it is useful for testing. */        way, but it is useful for testing. */
659    
660        else if (showinfo || do_debug)        else if (do_showinfo)
661          {          {
662          real_pcre_extra *xx = (real_pcre_extra *)extra;          real_pcre_extra *xx = (real_pcre_extra *)extra;
663          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          if ((xx->options & PCRE_STUDY_MAPPED) == 0)
# Line 592  while (!done) Line 699  while (!done)
699    for (;;)    for (;;)
700      {      {
701      unsigned char *q;      unsigned char *q;
702        unsigned char *bptr = dbuffer;
703      int count, c;      int count, c;
704      int offsets[30];      int copystrings = 0;
705        int getstrings = 0;
706        int getlist = 0;
707        int gmatched = 0;
708        int start_offset = 0;
709        int g_notempty = 0;
710        int offsets[45];
711      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
712    
713      options = 0;      options = 0;
714    
715      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
716      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
717        {        {
718        done = 1;        done = 1;
719        goto CONTINUE;        goto CONTINUE;
720        }        }
721      if (infile != stdin) fprintf(outfile, (char *)buffer);      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
722    
723      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
724      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 658  while (!done) Line 772  while (!done)
772          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
773          continue;          continue;
774    
775          case 'E':          case 'C':
776          options |= PCRE_DOLLAR_ENDONLY;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
777            copystrings |= 1 << n;
778          continue;          continue;
779    
780          case 'I':          case 'G':
781          options |= PCRE_CASELESS;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
782            getstrings |= 1 << n;
783          continue;          continue;
784    
785          case 'M':          case 'L':
786          options |= PCRE_MULTILINE;          getlist = 1;
787          continue;          continue;
788    
789          case 'S':          case 'N':
790          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
791          continue;          continue;
792    
793          case 'O':          case 'O':
# Line 691  while (!done) Line 807  while (!done)
807      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
808      support timing. */      support timing. */
809    
810    #if !defined NOPOSIX
811      if (posix || do_posix)      if (posix || do_posix)
812        {        {
813        int rc;        int rc;
814        int eflags = 0;        int eflags = 0;
815        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
816        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
817        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
818    
819        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         pmatch, eflags);  
820    
821        if (rc != 0)        if (rc != 0)
822          {          {
# Line 710  while (!done) Line 826  while (!done)
826        else        else
827          {          {
828          size_t i;          size_t i;
829          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
830            {            {
831            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
832              {              {
833              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
834              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
835                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so);
836              fprintf(outfile, "\n");              fprintf(outfile, "\n");
837                if (i == 0 && do_showrest)
838                  {
839                  fprintf(outfile, " 0+ ");
840                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
841                  fprintf(outfile, "\n");
842                  }
843              }              }
844            }            }
845          }          }
846        }        }
847    
848      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
849    
850      else      else
851    #endif  /* !defined NOPOSIX */
852    
853        for (;; gmatched++)    /* Loop for /g or /G */
854        {        {
855        if (timeit)        if (timeit)
856          {          {
857          register int i;          register int i;
858          clock_t time_taken;          clock_t time_taken;
859          clock_t start_time = clock();          clock_t start_time = clock();
860          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
861            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
862              size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
863          time_taken = clock() - start_time;          time_taken = clock() - start_time;
864          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
865            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
866              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
867          }          }
868    
869        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
870          size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
871    
872        if (count == 0)        if (count == 0)
873          {          {
874          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
875          count = size_offsets/2;          count = size_offsets/3;
876          }          }
877    
878          /* Matched */
879    
880        if (count >= 0)        if (count >= 0)
881          {          {
882          int i;          int i;
883          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
884            {            {
885            if (offsets[i] < 0)            if (offsets[i] < 0)
886              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
887            else            else
888              {              {
889              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
890              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
891              fprintf(outfile, "\n");              fprintf(outfile, "\n");
892                if (i == 0)
893                  {
894                  if (do_showrest)
895                    {
896                    fprintf(outfile, " 0+ ");
897                    pchars(bptr + offsets[i+1], len - offsets[i+1]);
898                    fprintf(outfile, "\n");
899                    }
900                  }
901                }
902              }
903    
904            for (i = 0; i < 32; i++)
905              {
906              if ((copystrings & (1 << i)) != 0)
907                {
908                char copybuffer[16];
909                int rc = pcre_copy_substring((char *)bptr, offsets, count,
910                  i, copybuffer, sizeof(copybuffer));
911                if (rc < 0)
912                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
913                else
914                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
915                }
916              }
917    
918            for (i = 0; i < 32; i++)
919              {
920              if ((getstrings & (1 << i)) != 0)
921                {
922                const char *substring;
923                int rc = pcre_get_substring((char *)bptr, offsets, count,
924                  i, &substring);
925                if (rc < 0)
926                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
927                else
928                  {
929                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
930                  free((void *)substring);
931                  }
932                }
933              }
934    
935            if (getlist)
936              {
937              const char **stringlist;
938              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
939                &stringlist);
940              if (rc < 0)
941                fprintf(outfile, "get substring list failed %d\n", rc);
942              else
943                {
944                for (i = 0; i < count; i++)
945                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
946                if (stringlist[i] != NULL)
947                  fprintf(outfile, "string list not terminated by NULL\n");
948                free((void *)stringlist);
949              }              }
950            }            }
951          }          }
952    
953          /* Failed to match. If this is a /g or /G loop and we previously set
954          PCRE_NOTEMPTY after a null match, this is not necessarily the end.
955          We want to advance the start offset, and continue. Fudge the offset
956          values to achieve this. We won't be at the end of the string - that
957          was checked before setting PCRE_NOTEMPTY. */
958    
959        else        else
960          {          {
961          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
962            else fprintf(outfile, "Error %d\n", count);            {
963              offsets[0] = start_offset;
964              offsets[1] = start_offset + 1;
965              }
966            else
967              {
968              if (gmatched == 0)   /* Error if no previous matches */
969                {
970                if (count == -1) fprintf(outfile, "No match\n");
971                  else fprintf(outfile, "Error %d\n", count);
972                }
973              break;  /* Out of the /g loop */
974              }
975          }          }
976        }  
977      }        /* If not /g or /G we are done */
978    
979          if (!do_g && !do_G) break;
980    
981          /* If we have matched an empty string, first check to see if we are at
982          the end of the subject. If so, the /g loop is over. Otherwise, mimic
983          what Perl's /g options does. This turns out to be rather cunning. First
984          we set PCRE_NOTEMPTY and try the match again at the same point. If this
985          fails (picked up above) we advance to the next character. */
986    
987          g_notempty = 0;
988          if (offsets[0] == offsets[1])
989            {
990            if (offsets[0] == len) break;
991            g_notempty = PCRE_NOTEMPTY;
992            }
993    
994          /* For /g, update the start offset, leaving the rest alone */
995    
996          if (do_g) start_offset = offsets[1];
997    
998          /* For /G, update the pointer and length */
999    
1000          else
1001            {
1002            bptr += offsets[1];
1003            len -= offsets[1];
1004            }
1005          }  /* End of loop for /g and /G */
1006        }    /* End of loop for data lines */
1007    
1008    CONTINUE:    CONTINUE:
1009    
1010    #if !defined NOPOSIX
1011    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1012    #endif
1013    
1014    if (re != NULL) free(re);    if (re != NULL) free(re);
1015    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1016      if (tables != NULL)
1017        {
1018        free((void *)tables);
1019        setlocale(LC_CTYPE, "C");
1020        }
1021    }    }
1022    
1023  fprintf(outfile, "\n");  fprintf(outfile, "\n");

Legend:
Removed from v.19  
changed lines
  Added in v.41

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12