/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 41 by nigel, Sat Feb 24 21:39:17 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32    #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
# Line 30  static int log_store = 0; Line 40  static int log_store = 0;
40  /* Debugging function to print the internal form of the regex. This is the same  /* Debugging function to print the internal form of the regex. This is the same
41  code as contained in pcre.c under the DEBUG macro. */  code as contained in pcre.c under the DEBUG macro. */
42    
43  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
44    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
45    "not",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
46      "Opt", "^", "$", "Any", "chars", "not",
47    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
48    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
51    "class", "Ref",    "class", "Ref",
52    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
53      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
54    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
55  };  };
56    
# Line 47  static void print_internals(pcre *re) Line 59  static void print_internals(pcre *re)
59  {  {
60  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
61    
62  printf("------------------------------------------------------------------\n");  fprintf(outfile, "------------------------------------------------------------------\n");
63    
64  for(;;)  for(;;)
65    {    {
66    int c;    int c;
67    int charlength;    int charlength;
68    
69    printf("%3d ", code - ((real_pcre *)re)->code);    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
70    
71    if (*code >= OP_BRA)    if (*code >= OP_BRA)
72      {      {
73      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
74      code += 2;      code += 2;
75      }      }
76    
77    else switch(*code)    else switch(*code)
78      {      {
79      case OP_END:      case OP_END:
80      printf("    %s\n", OP_names[*code]);      fprintf(outfile, "    %s\n", OP_names[*code]);
81      printf("------------------------------------------------------------------\n");      fprintf(outfile, "------------------------------------------------------------------\n");
82      return;      return;
83    
84        case OP_OPT:
85        fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
86        code++;
87        break;
88    
89        case OP_COND:
90        fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
91        code += 2;
92        break;
93    
94        case OP_CREF:
95        fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
96        code++;
97        break;
98    
99      case OP_CHARS:      case OP_CHARS:
100      charlength = *(++code);      charlength = *(++code);
101      printf("%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
102      while (charlength-- > 0)      while (charlength-- > 0)
103        if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);        if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
104            else fprintf(outfile, "\\x%02x", c);
105      break;      break;
106    
107      case OP_KETRMAX:      case OP_KETRMAX:
# Line 82  for(;;) Line 110  for(;;)
110      case OP_KET:      case OP_KET:
111      case OP_ASSERT:      case OP_ASSERT:
112      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
113        case OP_ASSERTBACK:
114        case OP_ASSERTBACK_NOT:
115      case OP_ONCE:      case OP_ONCE:
116      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
117        code += 2;
118        break;
119    
120        case OP_REVERSE:
121        fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
122      code += 2;      code += 2;
123      break;      break;
124    
# Line 100  for(;;) Line 135  for(;;)
135      case OP_TYPEQUERY:      case OP_TYPEQUERY:
136      case OP_TYPEMINQUERY:      case OP_TYPEMINQUERY:
137      if (*code >= OP_TYPESTAR)      if (*code >= OP_TYPESTAR)
138        printf("    %s", OP_names[code[1]]);        fprintf(outfile, "    %s", OP_names[code[1]]);
139      else if (isprint(c = code[1])) printf("    %c", c);      else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);
140        else printf("    \\x%02x", c);        else fprintf(outfile, "    \\x%02x", c);
141      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
142      break;      break;
143    
144      case OP_EXACT:      case OP_EXACT:
145      case OP_UPTO:      case OP_UPTO:
146      case OP_MINUPTO:      case OP_MINUPTO:
147      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);
148        else printf("    \\x%02x{", c);        else fprintf(outfile, "    \\x%02x{", c);
149      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) fprintf(outfile, ",");
150      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
151      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) fprintf(outfile, "?");
152      code += 3;      code += 3;
153      break;      break;
154    
155      case OP_TYPEEXACT:      case OP_TYPEEXACT:
156      case OP_TYPEUPTO:      case OP_TYPEUPTO:
157      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
158      printf("    %s{", OP_names[code[3]]);      fprintf(outfile, "    %s{", OP_names[code[3]]);
159      if (*code != OP_TYPEEXACT) printf(",");      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
160      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
161      if (*code == OP_TYPEMINUPTO) printf("?");      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
162      code += 3;      code += 3;
163      break;      break;
164    
165      case OP_NOT:      case OP_NOT:
166      if (isprint(c = *(++code))) printf("    [^%c]", c);      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);
167        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
168      break;      break;
169    
170      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 138  for(;;) Line 173  for(;;)
173      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
174      case OP_NOTQUERY:      case OP_NOTQUERY:
175      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
176      if (isprint(c = code[1])) printf("    [^%c]", c);      if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);
177        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
178      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
179      break;      break;
180    
181      case OP_NOTEXACT:      case OP_NOTEXACT:
182      case OP_NOTUPTO:      case OP_NOTUPTO:
183      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
184      if (isprint(c = code[3])) printf("    [^%c]{", c);      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);
185        else printf("    [^\\x%02x]{", c);        else fprintf(outfile, "    [^\\x%02x]{", c);
186      if (*code != OP_NOTEXACT) printf(",");      if (*code != OP_NOTEXACT) fprintf(outfile, ",");
187      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
188      if (*code == OP_NOTMINUPTO) printf("?");      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
189      code += 3;      code += 3;
190      break;      break;
191    
192      case OP_REF:      case OP_REF:
193      printf("    \\%d", *(++code));      fprintf(outfile, "    \\%d", *(++code));
194      break;      code++;
195        goto CLASS_REF_REPEAT;
196    
197      case OP_CLASS:      case OP_CLASS:
198        {        {
199        int i, min, max;        int i, min, max;
   
200        code++;        code++;
201        printf("    [");        fprintf(outfile, "    [");
202    
203        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
204          {          {
# Line 172  for(;;) Line 207  for(;;)
207            int j;            int j;
208            for (j = i+1; j < 256; j++)            for (j = i+1; j < 256; j++)
209              if ((code[j/8] & (1 << (j&7))) == 0) break;              if ((code[j/8] & (1 << (j&7))) == 0) break;
210            if (i == '-' || i == ']') printf("\\");            if (i == '-' || i == ']') fprintf(outfile, "\\");
211            if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
212            if (--j > i)            if (--j > i)
213              {              {
214              printf("-");              fprintf(outfile, "-");
215              if (j == '-' || j == ']') printf("\\");              if (j == '-' || j == ']') fprintf(outfile, "\\");
216              if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
217              }              }
218            i = j;            i = j;
219            }            }
220          }          }
221        printf("]");        fprintf(outfile, "]");
222        code += 32;        code += 32;
223    
224          CLASS_REF_REPEAT:
225    
226        switch(*code)        switch(*code)
227          {          {
228          case OP_CRSTAR:          case OP_CRSTAR:
# Line 194  for(;;) Line 231  for(;;)
231          case OP_CRMINPLUS:          case OP_CRMINPLUS:
232          case OP_CRQUERY:          case OP_CRQUERY:
233          case OP_CRMINQUERY:          case OP_CRMINQUERY:
234          printf("%s", OP_names[*code]);          fprintf(outfile, "%s", OP_names[*code]);
235          break;          break;
236    
237          case OP_CRRANGE:          case OP_CRRANGE:
238          case OP_CRMINRANGE:          case OP_CRMINRANGE:
239          min = (code[1] << 8) + code[2];          min = (code[1] << 8) + code[2];
240          max = (code[3] << 8) + code[4];          max = (code[3] << 8) + code[4];
241          if (max == 0) printf("{%d,}", min);          if (max == 0) fprintf(outfile, "{%d,}", min);
242          else printf("{%d,%d}", min, max);          else fprintf(outfile, "{%d,%d}", min, max);
243          if (*code == OP_CRMINRANGE) printf("?");          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
244          code += 4;          code += 4;
245          break;          break;
246    
# Line 216  for(;;) Line 253  for(;;)
253      /* Anything else is just a one-node item */      /* Anything else is just a one-node item */
254    
255      default:      default:
256      printf("    %s", OP_names[*code]);      fprintf(outfile, "    %s", OP_names[*code]);
257      break;      break;
258      }      }
259    
260    code++;    code++;
261    printf("\n");    fprintf(outfile, "\n");
262    }    }
263  }  }
264    
# Line 244  compiled re. */ Line 281  compiled re. */
281    
282  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
283  {  {
284  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  if (log_store)
285      fprintf(outfile, "Memory allocation (code space): %d\n",
286        (int)((int)size - offsetof(real_pcre, code[0])));
287  return malloc(size);  return malloc(size);
288  }  }
289    
# Line 262  int study_options = 0; Line 301  int study_options = 0;
301  int op = 1;  int op = 1;
302  int timeit = 0;  int timeit = 0;
303  int showinfo = 0;  int showinfo = 0;
304    int showstore = 0;
305  int posix = 0;  int posix = 0;
306  int debug = 0;  int debug = 0;
307    int done = 0;
308  unsigned char buffer[30000];  unsigned char buffer[30000];
309  unsigned char dbuffer[1024];  unsigned char dbuffer[1024];
310    
# Line 275  outfile = stdout; Line 316  outfile = stdout;
316    
317  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
318    {    {
319    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
320        showstore = 1;
321    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
322    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
323    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 283  while (argc > 1 && argv[op][0] == '-') Line 325  while (argc > 1 && argv[op][0] == '-')
325    else    else
326      {      {
327      printf("*** Unknown option %s\n", argv[op]);      printf("*** Unknown option %s\n", argv[op]);
328        printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
329        printf("  -d   debug: show compiled code; implies -i\n"
330               "  -i   show information about compiled pattern\n"
331               "  -p   use POSIX interface\n"
332               "  -s   output store information\n"
333               "  -t   time compilation and execution\n");
334      return 1;      return 1;
335      }      }
336    op++;    op++;
# Line 315  if (argc > 2) Line 363  if (argc > 2)
363    
364  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
365    
366  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
367    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
368  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
369    
370  /* Main loop */  /* Main loop */
371    
372  for (;;)  while (!done)
373    {    {
374    pcre *re = NULL;    pcre *re = NULL;
375    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
376    
377    #if !defined NOPOSIX  /* There are still compilers that require no indent */
378    regex_t preg;    regex_t preg;
379    char *error;  #endif
380    unsigned char *p, *pp;  
381      const char *error;
382      unsigned char *p, *pp, *ppp;
383      unsigned const char *tables = NULL;
384    int do_study = 0;    int do_study = 0;
385    int do_debug = 0;    int do_debug = debug;
386      int do_G = 0;
387      int do_g = 0;
388      int do_showinfo = showinfo;
389      int do_showrest = 0;
390    int do_posix = 0;    int do_posix = 0;
391    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
392    
393    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
394    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
395    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
396    
397    p = buffer;    p = buffer;
398    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 347  for (;;) Line 403  for (;;)
403    
404    delimiter = *p++;    delimiter = *p++;
405    
406    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
407      {      {
408      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
409      goto SKIP_DATA;      goto SKIP_DATA;
410      }      }
411    
# Line 357  for (;;) Line 413  for (;;)
413    
414    for(;;)    for(;;)
415      {      {
416      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
417          {
418          if (*pp == '\\' && pp[1] != 0) pp++;
419            else if (*pp == delimiter) break;
420          pp++;
421          }
422      if (*pp != 0) break;      if (*pp != 0) break;
423    
424      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 371  for (;;) Line 432  for (;;)
432      if (fgets((char *)pp, len, infile) == NULL)      if (fgets((char *)pp, len, infile) == NULL)
433        {        {
434        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
435        goto END_OFF;        done = 1;
436          goto CONTINUE;
437        }        }
438      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
439      }      }
440    
441      /* If the first character after the delimiter is backslash, make
442      the pattern end with backslash. This is purely to provide a way
443      of testing for the error message when a pattern ends with backslash. */
444    
445      if (pp[1] == '\\') *pp++ = '\\';
446    
447    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
448    
449    *pp++ = 0;    *pp++ = 0;
# Line 384  for (;;) Line 452  for (;;)
452    
453    options = 0;    options = 0;
454    study_options = 0;    study_options = 0;
455      log_store = showstore;  /* default from command line */
456    
457    while (*pp != 0)    while (*pp != 0)
458      {      {
459      switch (*pp++)      switch (*pp++)
460        {        {
461          case 'g': do_g = 1; break;
462        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
463        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
464        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
465        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
466    
467          case '+': do_showrest = 1; break;
468        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
469        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
470        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
471          case 'G': do_G = 1; break;
472          case 'I': do_showinfo = 1; break;
473          case 'M': log_store = 1; break;
474    
475    #if !defined NOPOSIX
476        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
477    #endif
478    
479        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
480        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
481        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
482    
483          case 'L':
484          ppp = pp;
485          while (*ppp != '\n' && *ppp != ' ') ppp++;
486          *ppp = 0;
487          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
488            {
489            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
490            goto SKIP_DATA;
491            }
492          tables = pcre_maketables();
493          pp = ppp;
494          break;
495    
496        case '\n': case ' ': break;        case '\n': case ' ': break;
497        default:        default:
498        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 406  for (;;) Line 500  for (;;)
500        }        }
501      }      }
502    
503    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
504    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
505      local character tables. */
506    
507    #if !defined NOPOSIX
508    if (posix || do_posix)    if (posix || do_posix)
509      {      {
510      int rc;      int rc;
# Line 431  for (;;) Line 527  for (;;)
527    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
528    
529    else    else
530    #endif  /* !defined NOPOSIX */
531    
532      {      {
533      if (timeit)      if (timeit)
534        {        {
535        register int i;        register int i;
536        clock_t time_taken;        clock_t time_taken;
537        clock_t start_time = clock();        clock_t start_time = clock();
538        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
539          {          {
540          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
541          if (re != NULL) free(re);          if (re != NULL) free(re);
542          }          }
543        time_taken = clock() - start_time;        time_taken = clock() - start_time;
544        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
545          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
546            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
547        }        }
548    
549      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
550    
551      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
552      if non-interactive. */      if non-interactive. */
# Line 461  for (;;) Line 560  for (;;)
560          for (;;)          for (;;)
561            {            {
562            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
563              goto END_OFF;              {
564                done = 1;
565                goto CONTINUE;
566                }
567            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
568            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
569            if (len == 0) break;            if (len == 0) break;
570            }            }
571          fprintf(outfile, "\n");          fprintf(outfile, "\n");
572          }          }
573        continue;        goto CONTINUE;
574        }        }
575    
576      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required */
577    
578      if (showinfo || do_debug)      if (do_showinfo)
579        {        {
580        int first_char, count;        int first_char, count;
581    
582        if (debug || do_debug) print_internals(re);        if (do_debug) print_internals(re);
583    
584        count = pcre_info(re, &options, &first_char);        count = pcre_info(re, &options, &first_char);
585        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 486  for (;;) Line 588  for (;;)
588          {          {
589          fprintf(outfile, "Identifying subpattern count = %d\n", count);          fprintf(outfile, "Identifying subpattern count = %d\n", count);
590          if (options == 0) fprintf(outfile, "No options\n");          if (options == 0) fprintf(outfile, "No options\n");
591            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
592              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
593              ((options & PCRE_CASELESS) != 0)? " caseless" : "",              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
594              ((options & PCRE_EXTENDED) != 0)? " extended" : "",              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
595              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
596              ((options & PCRE_DOTALL) != 0)? " dotall" : "",              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
597              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
598              ((options & PCRE_EXTRA) != 0)? " extra" : "");              ((options & PCRE_EXTRA) != 0)? " extra" : "",
599                ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
600    
601            if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
602              fprintf(outfile, "Case state changes\n");
603    
604          if (first_char == -1)          if (first_char == -1)
605            {            {
606            fprintf(outfile, "First char at start or follows \\n\n");            fprintf(outfile, "First char at start or follows \\n\n");
# Line 509  for (;;) Line 616  for (;;)
616            else            else
617              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "First char = %d\n", first_char);
618            }            }
619    
620            if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
621              {
622              int req_char = ((real_pcre *)re)->req_char;
623              if (isprint(req_char))
624                fprintf(outfile, "Req char = \'%c\'\n", req_char);
625              else
626                fprintf(outfile, "Req char = %d\n", req_char);
627              }
628            else fprintf(outfile, "No req char\n");
629          }          }
630        }        }
631    
# Line 522  for (;;) Line 639  for (;;)
639          register int i;          register int i;
640          clock_t time_taken;          clock_t time_taken;
641          clock_t start_time = clock();          clock_t start_time = clock();
642          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
643            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
644          time_taken = clock() - start_time;          time_taken = clock() - start_time;
645          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
646          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
647            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
648              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
649          }          }
650    
651        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 539  for (;;) Line 657  for (;;)
657        /* This looks at internal information. A bit kludgy to do it this        /* This looks at internal information. A bit kludgy to do it this
658        way, but it is useful for testing. */        way, but it is useful for testing. */
659    
660        else if (showinfo || do_debug)        else if (do_showinfo)
661          {          {
662          real_pcre_extra *xx = (real_pcre_extra *)extra;          real_pcre_extra *xx = (real_pcre_extra *)extra;
663          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          if ((xx->options & PCRE_STUDY_MAPPED) == 0)
# Line 580  for (;;) Line 698  for (;;)
698    
699    for (;;)    for (;;)
700      {      {
701      unsigned char *pp;      unsigned char *q;
702        unsigned char *bptr = dbuffer;
703      int count, c;      int count, c;
704      int offsets[30];      int copystrings = 0;
705        int getstrings = 0;
706        int getlist = 0;
707        int gmatched = 0;
708        int start_offset = 0;
709        int g_notempty = 0;
710        int offsets[45];
711      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
712    
713      options = 0;      options = 0;
714    
715      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
716      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
717      if (infile != stdin) fprintf(outfile, (char *)buffer);        {
718          done = 1;
719          goto CONTINUE;
720          }
721        if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
722    
723      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
724      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 599  for (;;) Line 728  for (;;)
728      p = buffer;      p = buffer;
729      while (isspace(*p)) p++;      while (isspace(*p)) p++;
730    
731      pp = dbuffer;      q = dbuffer;
732      while ((c = *p++) != 0)      while ((c = *p++) != 0)
733        {        {
734        int i = 0;        int i = 0;
# Line 643  for (;;) Line 772  for (;;)
772          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
773          continue;          continue;
774    
775          case 'E':          case 'C':
776          options |= PCRE_DOLLAR_ENDONLY;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
777            copystrings |= 1 << n;
778          continue;          continue;
779    
780          case 'I':          case 'G':
781          options |= PCRE_CASELESS;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
782            getstrings |= 1 << n;
783          continue;          continue;
784    
785          case 'M':          case 'L':
786          options |= PCRE_MULTILINE;          getlist = 1;
787          continue;          continue;
788    
789          case 'S':          case 'N':
790          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
791          continue;          continue;
792    
793          case 'O':          case 'O':
794          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
795          if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
796          continue;          continue;
797    
798          case 'Z':          case 'Z':
799          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
800          continue;          continue;
801          }          }
802        *pp++ = c;        *q++ = c;
803        }        }
804      *pp = 0;      *q = 0;
805      len = pp - dbuffer;      len = q - dbuffer;
806    
807      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
808      support timing. */      support timing. */
809    
810    #if !defined NOPOSIX
811      if (posix || do_posix)      if (posix || do_posix)
812        {        {
813        int rc;        int rc;
814        int eflags = 0;        int eflags = 0;
815        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
816        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
817        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
818    
819        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         pmatch, eflags);  
820    
821        if (rc != 0)        if (rc != 0)
822          {          {
# Line 694  for (;;) Line 825  for (;;)
825          }          }
826        else        else
827          {          {
828          int i;          size_t i;
829          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
830            {            {
831            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
832              {              {
833              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
834              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
835                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so);
836              fprintf(outfile, "\n");              fprintf(outfile, "\n");
837                if (i == 0 && do_showrest)
838                  {
839                  fprintf(outfile, " 0+ ");
840                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
841                  fprintf(outfile, "\n");
842                  }
843              }              }
844            }            }
845          }          }
846        }        }
847    
848      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
849    
850      else      else
851    #endif  /* !defined NOPOSIX */
852    
853        for (;; gmatched++)    /* Loop for /g or /G */
854        {        {
855        if (timeit)        if (timeit)
856          {          {
857          register int i;          register int i;
858          clock_t time_taken;          clock_t time_taken;
859          clock_t start_time = clock();          clock_t start_time = clock();
860          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
861            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
862              size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
863          time_taken = clock() - start_time;          time_taken = clock() - start_time;
864          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
865            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
866              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
867          }          }
868    
869        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
870          size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
871    
872        if (count == 0)        if (count == 0)
873          {          {
874          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
875          count = size_offsets/2;          count = size_offsets/3;
876          }          }
877    
878          /* Matched */
879    
880        if (count >= 0)        if (count >= 0)
881          {          {
882          int i;          int i;
883          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
884            {            {
885            if (offsets[i] < 0)            if (offsets[i] < 0)
886              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
887            else            else
888              {              {
889              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
890              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
891              fprintf(outfile, "\n");              fprintf(outfile, "\n");
892                if (i == 0)
893                  {
894                  if (do_showrest)
895                    {
896                    fprintf(outfile, " 0+ ");
897                    pchars(bptr + offsets[i+1], len - offsets[i+1]);
898                    fprintf(outfile, "\n");
899                    }
900                  }
901                }
902              }
903    
904            for (i = 0; i < 32; i++)
905              {
906              if ((copystrings & (1 << i)) != 0)
907                {
908                char copybuffer[16];
909                int rc = pcre_copy_substring((char *)bptr, offsets, count,
910                  i, copybuffer, sizeof(copybuffer));
911                if (rc < 0)
912                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
913                else
914                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
915                }
916              }
917    
918            for (i = 0; i < 32; i++)
919              {
920              if ((getstrings & (1 << i)) != 0)
921                {
922                const char *substring;
923                int rc = pcre_get_substring((char *)bptr, offsets, count,
924                  i, &substring);
925                if (rc < 0)
926                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
927                else
928                  {
929                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
930                  free((void *)substring);
931                  }
932                }
933              }
934    
935            if (getlist)
936              {
937              const char **stringlist;
938              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
939                &stringlist);
940              if (rc < 0)
941                fprintf(outfile, "get substring list failed %d\n", rc);
942              else
943                {
944                for (i = 0; i < count; i++)
945                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
946                if (stringlist[i] != NULL)
947                  fprintf(outfile, "string list not terminated by NULL\n");
948                free((void *)stringlist);
949                }
950              }
951            }
952    
953          /* Failed to match. If this is a /g or /G loop and we previously set
954          PCRE_NOTEMPTY after a null match, this is not necessarily the end.
955          We want to advance the start offset, and continue. Fudge the offset
956          values to achieve this. We won't be at the end of the string - that
957          was checked before setting PCRE_NOTEMPTY. */
958    
959          else
960            {
961            if (g_notempty != 0)
962              {
963              offsets[0] = start_offset;
964              offsets[1] = start_offset + 1;
965              }
966            else
967              {
968              if (gmatched == 0)   /* Error if no previous matches */
969                {
970                if (count == -1) fprintf(outfile, "No match\n");
971                  else fprintf(outfile, "Error %d\n", count);
972              }              }
973              break;  /* Out of the /g loop */
974            }            }
975          }          }
976    
977          /* If not /g or /G we are done */
978    
979          if (!do_g && !do_G) break;
980    
981          /* If we have matched an empty string, first check to see if we are at
982          the end of the subject. If so, the /g loop is over. Otherwise, mimic
983          what Perl's /g options does. This turns out to be rather cunning. First
984          we set PCRE_NOTEMPTY and try the match again at the same point. If this
985          fails (picked up above) we advance to the next character. */
986    
987          g_notempty = 0;
988          if (offsets[0] == offsets[1])
989            {
990            if (offsets[0] == len) break;
991            g_notempty = PCRE_NOTEMPTY;
992            }
993    
994          /* For /g, update the start offset, leaving the rest alone */
995    
996          if (do_g) start_offset = offsets[1];
997    
998          /* For /G, update the pointer and length */
999    
1000        else        else
1001          {          {
1002          if (count == -1) fprintf(outfile, "No match\n");          bptr += offsets[1];
1003            else fprintf(outfile, "Error %d\n", count);          len -= offsets[1];
1004          }          }
1005        }        }  /* End of loop for /g and /G */
1006      }      }    /* End of loop for data lines */
1007    
1008      CONTINUE:
1009    
1010    #if !defined NOPOSIX
1011    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1012    #endif
1013    
1014    if (re != NULL) free(re);    if (re != NULL) free(re);
1015    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1016      if (tables != NULL)
1017        {
1018        free((void *)tables);
1019        setlocale(LC_CTYPE, "C");
1020        }
1021    }    }
1022    
 END_OFF:  
1023  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1024  return 0;  return 0;
1025  }  }

Legend:
Removed from v.3  
changed lines
  Added in v.41

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12