/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32    #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41  /* Debugging function to print the internal form of the regex. This is the same  /* Debugging function to print the internal form of the regex. This is the same
42  code as contained in pcre.c under the DEBUG macro. */  code as contained in pcre.c under the DEBUG macro. */
43    
44  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
45    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
46    "not",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
47      "Opt", "^", "$", "Any", "chars", "not",
48    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
51    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
52    "class", "Ref",    "class", "Ref", "Recurse",
53    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
54      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
55    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
56  };  };
57    
# Line 47  static void print_internals(pcre *re) Line 60  static void print_internals(pcre *re)
60  {  {
61  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
62    
63  printf("------------------------------------------------------------------\n");  fprintf(outfile, "------------------------------------------------------------------\n");
64    
65  for(;;)  for(;;)
66    {    {
67    int c;    int c;
68    int charlength;    int charlength;
69    
70    printf("%3d ", code - ((real_pcre *)re)->code);    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
71    
72    if (*code >= OP_BRA)    if (*code >= OP_BRA)
73      {      {
74      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
75      code += 2;      code += 2;
76      }      }
77    
78    else switch(*code)    else switch(*code)
79      {      {
80      case OP_END:      case OP_END:
81      printf("    %s\n", OP_names[*code]);      fprintf(outfile, "    %s\n", OP_names[*code]);
82      printf("------------------------------------------------------------------\n");      fprintf(outfile, "------------------------------------------------------------------\n");
83      return;      return;
84    
85        case OP_OPT:
86        fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
87        code++;
88        break;
89    
90        case OP_COND:
91        fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
92        code += 2;
93        break;
94    
95        case OP_CREF:
96        fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
97        code++;
98        break;
99    
100      case OP_CHARS:      case OP_CHARS:
101      charlength = *(++code);      charlength = *(++code);
102      printf("%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
103      while (charlength-- > 0)      while (charlength-- > 0)
104        if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);        if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
105            else fprintf(outfile, "\\x%02x", c);
106      break;      break;
107    
108      case OP_KETRMAX:      case OP_KETRMAX:
# Line 82  for(;;) Line 111  for(;;)
111      case OP_KET:      case OP_KET:
112      case OP_ASSERT:      case OP_ASSERT:
113      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
114        case OP_ASSERTBACK:
115        case OP_ASSERTBACK_NOT:
116      case OP_ONCE:      case OP_ONCE:
117      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
118        code += 2;
119        break;
120    
121        case OP_REVERSE:
122        fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
123      code += 2;      code += 2;
124      break;      break;
125    
# Line 100  for(;;) Line 136  for(;;)
136      case OP_TYPEQUERY:      case OP_TYPEQUERY:
137      case OP_TYPEMINQUERY:      case OP_TYPEMINQUERY:
138      if (*code >= OP_TYPESTAR)      if (*code >= OP_TYPESTAR)
139        printf("    %s", OP_names[code[1]]);        fprintf(outfile, "    %s", OP_names[code[1]]);
140      else if (isprint(c = code[1])) printf("    %c", c);      else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);
141        else printf("    \\x%02x", c);        else fprintf(outfile, "    \\x%02x", c);
142      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
143      break;      break;
144    
145      case OP_EXACT:      case OP_EXACT:
146      case OP_UPTO:      case OP_UPTO:
147      case OP_MINUPTO:      case OP_MINUPTO:
148      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);
149        else printf("    \\x%02x{", c);        else fprintf(outfile, "    \\x%02x{", c);
150      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) fprintf(outfile, ",");
151      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
152      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) fprintf(outfile, "?");
153      code += 3;      code += 3;
154      break;      break;
155    
156      case OP_TYPEEXACT:      case OP_TYPEEXACT:
157      case OP_TYPEUPTO:      case OP_TYPEUPTO:
158      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
159      printf("    %s{", OP_names[code[3]]);      fprintf(outfile, "    %s{", OP_names[code[3]]);
160      if (*code != OP_TYPEEXACT) printf(",");      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
161      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
162      if (*code == OP_TYPEMINUPTO) printf("?");      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
163      code += 3;      code += 3;
164      break;      break;
165    
166      case OP_NOT:      case OP_NOT:
167      if (isprint(c = *(++code))) printf("    [^%c]", c);      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);
168        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
169      break;      break;
170    
171      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 138  for(;;) Line 174  for(;;)
174      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
175      case OP_NOTQUERY:      case OP_NOTQUERY:
176      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
177      if (isprint(c = code[1])) printf("    [^%c]", c);      if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);
178        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
179      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
180      break;      break;
181    
182      case OP_NOTEXACT:      case OP_NOTEXACT:
183      case OP_NOTUPTO:      case OP_NOTUPTO:
184      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
185      if (isprint(c = code[3])) printf("    [^%c]{", c);      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);
186        else printf("    [^\\x%02x]{", c);        else fprintf(outfile, "    [^\\x%02x]{", c);
187      if (*code != OP_NOTEXACT) printf(",");      if (*code != OP_NOTEXACT) fprintf(outfile, ",");
188      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
189      if (*code == OP_NOTMINUPTO) printf("?");      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
190      code += 3;      code += 3;
191      break;      break;
192    
193      case OP_REF:      case OP_REF:
194      printf("    \\%d", *(++code));      fprintf(outfile, "    \\%d", *(++code));
195      break;      code++;
196        goto CLASS_REF_REPEAT;
197    
198      case OP_CLASS:      case OP_CLASS:
199        {        {
200        int i, min, max;        int i, min, max;
   
201        code++;        code++;
202        printf("    [");        fprintf(outfile, "    [");
203    
204        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
205          {          {
# Line 172  for(;;) Line 208  for(;;)
208            int j;            int j;
209            for (j = i+1; j < 256; j++)            for (j = i+1; j < 256; j++)
210              if ((code[j/8] & (1 << (j&7))) == 0) break;              if ((code[j/8] & (1 << (j&7))) == 0) break;
211            if (i == '-' || i == ']') printf("\\");            if (i == '-' || i == ']') fprintf(outfile, "\\");
212            if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
213            if (--j > i)            if (--j > i)
214              {              {
215              printf("-");              fprintf(outfile, "-");
216              if (j == '-' || j == ']') printf("\\");              if (j == '-' || j == ']') fprintf(outfile, "\\");
217              if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
218              }              }
219            i = j;            i = j;
220            }            }
221          }          }
222        printf("]");        fprintf(outfile, "]");
223        code += 32;        code += 32;
224    
225          CLASS_REF_REPEAT:
226    
227        switch(*code)        switch(*code)
228          {          {
229          case OP_CRSTAR:          case OP_CRSTAR:
# Line 194  for(;;) Line 232  for(;;)
232          case OP_CRMINPLUS:          case OP_CRMINPLUS:
233          case OP_CRQUERY:          case OP_CRQUERY:
234          case OP_CRMINQUERY:          case OP_CRMINQUERY:
235          printf("%s", OP_names[*code]);          fprintf(outfile, "%s", OP_names[*code]);
236          break;          break;
237    
238          case OP_CRRANGE:          case OP_CRRANGE:
239          case OP_CRMINRANGE:          case OP_CRMINRANGE:
240          min = (code[1] << 8) + code[2];          min = (code[1] << 8) + code[2];
241          max = (code[3] << 8) + code[4];          max = (code[3] << 8) + code[4];
242          if (max == 0) printf("{%d,}", min);          if (max == 0) fprintf(outfile, "{%d,}", min);
243          else printf("{%d,%d}", min, max);          else fprintf(outfile, "{%d,%d}", min, max);
244          if (*code == OP_CRMINRANGE) printf("?");          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
245          code += 4;          code += 4;
246          break;          break;
247    
# Line 216  for(;;) Line 254  for(;;)
254      /* Anything else is just a one-node item */      /* Anything else is just a one-node item */
255    
256      default:      default:
257      printf("    %s", OP_names[*code]);      fprintf(outfile, "    %s", OP_names[*code]);
258      break;      break;
259      }      }
260    
261    code++;    code++;
262    printf("\n");    fprintf(outfile, "\n");
263    }    }
264  }  }
265    
# Line 244  compiled re. */ Line 282  compiled re. */
282    
283  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
284  {  {
285  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
286    if (log_store)
287      fprintf(outfile, "Memory allocation (code space): %d\n",
288        (int)((int)size - offsetof(real_pcre, code[0])));
289  return malloc(size);  return malloc(size);
290  }  }
291    
292    
293    
294    
295    /* Get one piece of information from the pcre_fullinfo() function */
296    
297    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
298    {
299    int rc;
300    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
301      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
302    }
303    
304    
305    
306    
307  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
308  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
309  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 262  int study_options = 0; Line 316  int study_options = 0;
316  int op = 1;  int op = 1;
317  int timeit = 0;  int timeit = 0;
318  int showinfo = 0;  int showinfo = 0;
319    int showstore = 0;
320  int posix = 0;  int posix = 0;
321  int debug = 0;  int debug = 0;
322    int done = 0;
323  unsigned char buffer[30000];  unsigned char buffer[30000];
324  unsigned char dbuffer[1024];  unsigned char dbuffer[1024];
325    
# Line 275  outfile = stdout; Line 331  outfile = stdout;
331    
332  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
333    {    {
334    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
335        showstore = 1;
336    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
337    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
338    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 283  while (argc > 1 && argv[op][0] == '-') Line 340  while (argc > 1 && argv[op][0] == '-')
340    else    else
341      {      {
342      printf("*** Unknown option %s\n", argv[op]);      printf("*** Unknown option %s\n", argv[op]);
343        printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
344        printf("  -d   debug: show compiled code; implies -i\n"
345               "  -i   show information about compiled pattern\n"
346               "  -p   use POSIX interface\n"
347               "  -s   output store information\n"
348               "  -t   time compilation and execution\n");
349      return 1;      return 1;
350      }      }
351    op++;    op++;
# Line 315  if (argc > 2) Line 378  if (argc > 2)
378    
379  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
380    
381  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
382    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
383  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
384    
385  /* Main loop */  /* Main loop */
386    
387  for (;;)  while (!done)
388    {    {
389    pcre *re = NULL;    pcre *re = NULL;
390    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
391    
392    #if !defined NOPOSIX  /* There are still compilers that require no indent */
393    regex_t preg;    regex_t preg;
394    char *error;  #endif
395    unsigned char *p, *pp;  
396      const char *error;
397      unsigned char *p, *pp, *ppp;
398      unsigned const char *tables = NULL;
399    int do_study = 0;    int do_study = 0;
400    int do_debug = 0;    int do_debug = debug;
401      int do_G = 0;
402      int do_g = 0;
403      int do_showinfo = showinfo;
404      int do_showrest = 0;
405    int do_posix = 0;    int do_posix = 0;
406    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
407    
408    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
409    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
410    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
411    
412    p = buffer;    p = buffer;
413    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 347  for (;;) Line 418  for (;;)
418    
419    delimiter = *p++;    delimiter = *p++;
420    
421    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
422      {      {
423      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
424      goto SKIP_DATA;      goto SKIP_DATA;
425      }      }
426    
# Line 357  for (;;) Line 428  for (;;)
428    
429    for(;;)    for(;;)
430      {      {
431      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
432          {
433          if (*pp == '\\' && pp[1] != 0) pp++;
434            else if (*pp == delimiter) break;
435          pp++;
436          }
437      if (*pp != 0) break;      if (*pp != 0) break;
438    
439      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 371  for (;;) Line 447  for (;;)
447      if (fgets((char *)pp, len, infile) == NULL)      if (fgets((char *)pp, len, infile) == NULL)
448        {        {
449        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
450        goto END_OFF;        done = 1;
451          goto CONTINUE;
452        }        }
453      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
454      }      }
455    
456      /* If the first character after the delimiter is backslash, make
457      the pattern end with backslash. This is purely to provide a way
458      of testing for the error message when a pattern ends with backslash. */
459    
460      if (pp[1] == '\\') *pp++ = '\\';
461    
462    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
463    
464    *pp++ = 0;    *pp++ = 0;
# Line 384  for (;;) Line 467  for (;;)
467    
468    options = 0;    options = 0;
469    study_options = 0;    study_options = 0;
470      log_store = showstore;  /* default from command line */
471    
472    while (*pp != 0)    while (*pp != 0)
473      {      {
474      switch (*pp++)      switch (*pp++)
475        {        {
476          case 'g': do_g = 1; break;
477        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
478        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
479        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
480        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
481    
482          case '+': do_showrest = 1; break;
483        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
484        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
485        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
486          case 'G': do_G = 1; break;
487          case 'I': do_showinfo = 1; break;
488          case 'M': log_store = 1; break;
489    
490    #if !defined NOPOSIX
491        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
492    #endif
493    
494        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
495        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
496        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
497    
498          case 'L':
499          ppp = pp;
500          while (*ppp != '\n' && *ppp != ' ') ppp++;
501          *ppp = 0;
502          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
503            {
504            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
505            goto SKIP_DATA;
506            }
507          tables = pcre_maketables();
508          pp = ppp;
509          break;
510    
511        case '\n': case ' ': break;        case '\n': case ' ': break;
512        default:        default:
513        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 406  for (;;) Line 515  for (;;)
515        }        }
516      }      }
517    
518    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
519    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
520      local character tables. */
521    
522    #if !defined NOPOSIX
523    if (posix || do_posix)    if (posix || do_posix)
524      {      {
525      int rc;      int rc;
# Line 431  for (;;) Line 542  for (;;)
542    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
543    
544    else    else
545    #endif  /* !defined NOPOSIX */
546    
547      {      {
548      if (timeit)      if (timeit)
549        {        {
550        register int i;        register int i;
551        clock_t time_taken;        clock_t time_taken;
552        clock_t start_time = clock();        clock_t start_time = clock();
553        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
554          {          {
555          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
556          if (re != NULL) free(re);          if (re != NULL) free(re);
557          }          }
558        time_taken = clock() - start_time;        time_taken = clock() - start_time;
559        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
560          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
561            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
562        }        }
563    
564      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
565    
566      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
567      if non-interactive. */      if non-interactive. */
# Line 461  for (;;) Line 575  for (;;)
575          for (;;)          for (;;)
576            {            {
577            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
578              goto END_OFF;              {
579                done = 1;
580                goto CONTINUE;
581                }
582            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
583            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
584            if (len == 0) break;            if (len == 0) break;
585            }            }
586          fprintf(outfile, "\n");          fprintf(outfile, "\n");
587          }          }
588        continue;        goto CONTINUE;
589        }        }
590    
591      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
592        info-returning functions. The old one has a limited interface and
593        returns only limited data. Check that it agrees with the newer one. */
594    
595      if (showinfo || do_debug)      if (do_showinfo)
596        {        {
597        int first_char, count;        int old_first_char, old_options, old_count;
598          int count, backrefmax, first_char, need_char;
599        if (debug || do_debug) print_internals(re);        size_t size;
600    
601          if (do_debug) print_internals(re);
602    
603          new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
604          new_info(re, NULL, PCRE_INFO_SIZE, &size);
605          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
606          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
607          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
608          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
609    
610        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
611        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
612          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
613        else        else
614          {          {
615          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
616          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
617            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
618              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
619              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
620              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
621              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
622              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
623              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != options) fprintf(outfile,
624              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
625          if (first_char == -1)              old_options);
626            {          }
627            fprintf(outfile, "First char at start or follows \\n\n");  
628            }        if (size != gotten_store) fprintf(outfile,
629          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
630            {          size, gotten_store);
631            fprintf(outfile, "No first char\n");  
632            }        fprintf(outfile, "Capturing subpattern count = %d\n", count);
633          if (backrefmax > 0)
634            fprintf(outfile, "Max back reference = %d\n", backrefmax);
635          if (options == 0) fprintf(outfile, "No options\n");
636            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
637              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
638              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
639              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
640              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
641              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
642              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
643              ((options & PCRE_EXTRA) != 0)? " extra" : "",
644              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
645    
646          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
647            fprintf(outfile, "Case state changes\n");
648    
649          if (first_char == -1)
650            {
651            fprintf(outfile, "First char at start or follows \\n\n");
652            }
653          else if (first_char < 0)
654            {
655            fprintf(outfile, "No first char\n");
656            }
657          else
658            {
659            if (isprint(first_char))
660              fprintf(outfile, "First char = \'%c\'\n", first_char);
661          else          else
662            {            fprintf(outfile, "First char = %d\n", first_char);
663            if (isprint(first_char))          }
664              fprintf(outfile, "First char = \'%c\'\n", first_char);  
665            else        if (need_char < 0)
666              fprintf(outfile, "First char = %d\n", first_char);          {
667            }          fprintf(outfile, "No need char\n");
668            }
669          else
670            {
671            if (isprint(need_char))
672              fprintf(outfile, "Need char = \'%c\'\n", need_char);
673            else
674              fprintf(outfile, "Need char = %d\n", need_char);
675          }          }
676        }        }
677    
# Line 522  for (;;) Line 685  for (;;)
685          register int i;          register int i;
686          clock_t time_taken;          clock_t time_taken;
687          clock_t start_time = clock();          clock_t start_time = clock();
688          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
689            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
690          time_taken = clock() - start_time;          time_taken = clock() - start_time;
691          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
692          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
693            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
694              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
695          }          }
696    
697        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 536  for (;;) Line 700  for (;;)
700        else if (extra == NULL)        else if (extra == NULL)
701          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
702    
703        /* This looks at internal information. A bit kludgy to do it this        else if (do_showinfo)
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
704          {          {
705          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
706          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
707            if (start_bits == NULL)
708            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
709          else          else
710            {            {
# Line 551  for (;;) Line 713  for (;;)
713            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
714            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
715              {              {
716              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
717                {                {
718                if (c > 75)                if (c > 75)
719                  {                  {
# Line 580  for (;;) Line 742  for (;;)
742    
743    for (;;)    for (;;)
744      {      {
745      unsigned char *pp;      unsigned char *q;
746        unsigned char *bptr = dbuffer;
747      int count, c;      int count, c;
748      int offsets[30];      int copystrings = 0;
749        int getstrings = 0;
750        int getlist = 0;
751        int gmatched = 0;
752        int start_offset = 0;
753        int g_notempty = 0;
754        int offsets[45];
755      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
756    
757      options = 0;      options = 0;
758    
759      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
760      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
761      if (infile != stdin) fprintf(outfile, (char *)buffer);        {
762          done = 1;
763          goto CONTINUE;
764          }
765        if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
766    
767      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
768      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 599  for (;;) Line 772  for (;;)
772      p = buffer;      p = buffer;
773      while (isspace(*p)) p++;      while (isspace(*p)) p++;
774    
775      pp = dbuffer;      q = dbuffer;
776      while ((c = *p++) != 0)      while ((c = *p++) != 0)
777        {        {
778        int i = 0;        int i = 0;
# Line 643  for (;;) Line 816  for (;;)
816          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
817          continue;          continue;
818    
819          case 'E':          case 'C':
820          options |= PCRE_DOLLAR_ENDONLY;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
821            copystrings |= 1 << n;
822          continue;          continue;
823    
824          case 'I':          case 'G':
825          options |= PCRE_CASELESS;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
826            getstrings |= 1 << n;
827          continue;          continue;
828    
829          case 'M':          case 'L':
830          options |= PCRE_MULTILINE;          getlist = 1;
831          continue;          continue;
832    
833          case 'S':          case 'N':
834          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
835          continue;          continue;
836    
837          case 'O':          case 'O':
838          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
839          if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
840          continue;          continue;
841    
842          case 'Z':          case 'Z':
843          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
844          continue;          continue;
845          }          }
846        *pp++ = c;        *q++ = c;
847        }        }
848      *pp = 0;      *q = 0;
849      len = pp - dbuffer;      len = q - dbuffer;
850    
851      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
852      support timing. */      support timing. */
853    
854    #if !defined NOPOSIX
855      if (posix || do_posix)      if (posix || do_posix)
856        {        {
857        int rc;        int rc;
858        int eflags = 0;        int eflags = 0;
859        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
860        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
861        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
862    
863        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         pmatch, eflags);  
864    
865        if (rc != 0)        if (rc != 0)
866          {          {
# Line 694  for (;;) Line 869  for (;;)
869          }          }
870        else        else
871          {          {
872          int i;          size_t i;
873          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
874            {            {
875            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
876              {              {
877              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
878              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
879                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so);
880              fprintf(outfile, "\n");              fprintf(outfile, "\n");
881                if (i == 0 && do_showrest)
882                  {
883                  fprintf(outfile, " 0+ ");
884                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
885                  fprintf(outfile, "\n");
886                  }
887              }              }
888            }            }
889          }          }
890        }        }
891    
892      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
893    
894      else      else
895    #endif  /* !defined NOPOSIX */
896    
897        for (;; gmatched++)    /* Loop for /g or /G */
898        {        {
899        if (timeit)        if (timeit)
900          {          {
901          register int i;          register int i;
902          clock_t time_taken;          clock_t time_taken;
903          clock_t start_time = clock();          clock_t start_time = clock();
904          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
905            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
906              size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
907          time_taken = clock() - start_time;          time_taken = clock() - start_time;
908          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
909            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
910              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
911          }          }
912    
913        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
914          size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
915    
916        if (count == 0)        if (count == 0)
917          {          {
918          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
919          count = size_offsets/2;          count = size_offsets/3;
920          }          }
921    
922          /* Matched */
923    
924        if (count >= 0)        if (count >= 0)
925          {          {
926          int i;          int i;
927          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
928            {            {
929            if (offsets[i] < 0)            if (offsets[i] < 0)
930              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
931            else            else
932              {              {
933              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
934              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
935              fprintf(outfile, "\n");              fprintf(outfile, "\n");
936                if (i == 0)
937                  {
938                  if (do_showrest)
939                    {
940                    fprintf(outfile, " 0+ ");
941                    pchars(bptr + offsets[i+1], len - offsets[i+1]);
942                    fprintf(outfile, "\n");
943                    }
944                  }
945                }
946              }
947    
948            for (i = 0; i < 32; i++)
949              {
950              if ((copystrings & (1 << i)) != 0)
951                {
952                char copybuffer[16];
953                int rc = pcre_copy_substring((char *)bptr, offsets, count,
954                  i, copybuffer, sizeof(copybuffer));
955                if (rc < 0)
956                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
957                else
958                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
959                }
960              }
961    
962            for (i = 0; i < 32; i++)
963              {
964              if ((getstrings & (1 << i)) != 0)
965                {
966                const char *substring;
967                int rc = pcre_get_substring((char *)bptr, offsets, count,
968                  i, &substring);
969                if (rc < 0)
970                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
971                else
972                  {
973                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
974                  free((void *)substring);
975                  }
976                }
977              }
978    
979            if (getlist)
980              {
981              const char **stringlist;
982              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
983                &stringlist);
984              if (rc < 0)
985                fprintf(outfile, "get substring list failed %d\n", rc);
986              else
987                {
988                for (i = 0; i < count; i++)
989                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
990                if (stringlist[i] != NULL)
991                  fprintf(outfile, "string list not terminated by NULL\n");
992                free((void *)stringlist);
993              }              }
994            }            }
995          }          }
996    
997          /* Failed to match. If this is a /g or /G loop and we previously set
998          PCRE_NOTEMPTY after a null match, this is not necessarily the end.
999          We want to advance the start offset, and continue. Fudge the offset
1000          values to achieve this. We won't be at the end of the string - that
1001          was checked before setting PCRE_NOTEMPTY. */
1002    
1003        else        else
1004          {          {
1005          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1006            else fprintf(outfile, "Error %d\n", count);            {
1007              offsets[0] = start_offset;
1008              offsets[1] = start_offset + 1;
1009              }
1010            else
1011              {
1012              if (gmatched == 0)   /* Error if no previous matches */
1013                {
1014                if (count == -1) fprintf(outfile, "No match\n");
1015                  else fprintf(outfile, "Error %d\n", count);
1016                }
1017              break;  /* Out of the /g loop */
1018              }
1019          }          }
       }  
     }  
1020    
1021          /* If not /g or /G we are done */
1022    
1023          if (!do_g && !do_G) break;
1024    
1025          /* If we have matched an empty string, first check to see if we are at
1026          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1027          what Perl's /g options does. This turns out to be rather cunning. First
1028          we set PCRE_NOTEMPTY and try the match again at the same point. If this
1029          fails (picked up above) we advance to the next character. */
1030    
1031          g_notempty = 0;
1032          if (offsets[0] == offsets[1])
1033            {
1034            if (offsets[0] == len) break;
1035            g_notempty = PCRE_NOTEMPTY;
1036            }
1037    
1038          /* For /g, update the start offset, leaving the rest alone */
1039    
1040          if (do_g) start_offset = offsets[1];
1041    
1042          /* For /G, update the pointer and length */
1043    
1044          else
1045            {
1046            bptr += offsets[1];
1047            len -= offsets[1];
1048            }
1049          }  /* End of loop for /g and /G */
1050        }    /* End of loop for data lines */
1051    
1052      CONTINUE:
1053    
1054    #if !defined NOPOSIX
1055    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1056    #endif
1057    
1058    if (re != NULL) free(re);    if (re != NULL) free(re);
1059    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1060      if (tables != NULL)
1061        {
1062        free((void *)tables);
1063        setlocale(LC_CTYPE, "C");
1064        }
1065    }    }
1066    
 END_OFF:  
1067  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1068  return 0;  return 0;
1069  }  }

Legend:
Removed from v.3  
changed lines
  Added in v.43

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12