/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (hide annotations) (download)
Sat Feb 24 21:38:01 2007 UTC (7 years, 8 months ago) by nigel
File MIME type: text/plain
File size: 19266 byte(s)
Load pcre-1.00 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10    
11     /* Use the internal info for displaying the results of pcre_study(). */
12    
13     #include "internal.h"
14     #include "pcreposix.h"
15    
16     #ifndef CLOCKS_PER_SEC
17     #ifdef CLK_TCK
18     #define CLOCKS_PER_SEC CLK_TCK
19     #else
20     #define CLOCKS_PER_SEC 100
21     #endif
22     #endif
23    
24    
25     static FILE *outfile;
26     static int log_store = 0;
27    
28    
29    
30     /* Debugging function to print the internal form of the regex. This is the same
31     code as contained in pcre.c under the DEBUG macro. */
32    
33     static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",
34     "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
35     "not",
36     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
37     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39     "*", "*?", "+", "+?", "?", "??", "{", "{",
40     "class", "Ref",
41     "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
42     "Brazero", "Braminzero", "Bra"
43     };
44    
45    
46     static void print_internals(pcre *re)
47     {
48     unsigned char *code = ((real_pcre *)re)->code;
49    
50     printf("------------------------------------------------------------------\n");
51    
52     for(;;)
53     {
54     int c;
55     int charlength;
56    
57     printf("%3d ", code - ((real_pcre *)re)->code);
58    
59     if (*code >= OP_BRA)
60     {
61     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
62     code += 2;
63     }
64    
65     else switch(*code)
66     {
67     case OP_END:
68     printf(" %s\n", OP_names[*code]);
69     printf("------------------------------------------------------------------\n");
70     return;
71    
72     case OP_CHARS:
73     charlength = *(++code);
74     printf("%3d ", charlength);
75     while (charlength-- > 0)
76     if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
77     break;
78    
79     case OP_KETRMAX:
80     case OP_KETRMIN:
81     case OP_ALT:
82     case OP_KET:
83     case OP_ASSERT:
84     case OP_ASSERT_NOT:
85     case OP_ONCE:
86     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
87     code += 2;
88     break;
89    
90     case OP_STAR:
91     case OP_MINSTAR:
92     case OP_PLUS:
93     case OP_MINPLUS:
94     case OP_QUERY:
95     case OP_MINQUERY:
96     case OP_TYPESTAR:
97     case OP_TYPEMINSTAR:
98     case OP_TYPEPLUS:
99     case OP_TYPEMINPLUS:
100     case OP_TYPEQUERY:
101     case OP_TYPEMINQUERY:
102     if (*code >= OP_TYPESTAR)
103     printf(" %s", OP_names[code[1]]);
104     else if (isprint(c = code[1])) printf(" %c", c);
105     else printf(" \\x%02x", c);
106     printf("%s", OP_names[*code++]);
107     break;
108    
109     case OP_EXACT:
110     case OP_UPTO:
111     case OP_MINUPTO:
112     if (isprint(c = code[3])) printf(" %c{", c);
113     else printf(" \\x%02x{", c);
114     if (*code != OP_EXACT) printf(",");
115     printf("%d}", (code[1] << 8) + code[2]);
116     if (*code == OP_MINUPTO) printf("?");
117     code += 3;
118     break;
119    
120     case OP_TYPEEXACT:
121     case OP_TYPEUPTO:
122     case OP_TYPEMINUPTO:
123     printf(" %s{", OP_names[code[3]]);
124     if (*code != OP_TYPEEXACT) printf(",");
125     printf("%d}", (code[1] << 8) + code[2]);
126     if (*code == OP_TYPEMINUPTO) printf("?");
127     code += 3;
128     break;
129    
130     case OP_NOT:
131     if (isprint(c = *(++code))) printf(" [^%c]", c);
132     else printf(" [^\\x%02x]", c);
133     break;
134    
135     case OP_NOTSTAR:
136     case OP_NOTMINSTAR:
137     case OP_NOTPLUS:
138     case OP_NOTMINPLUS:
139     case OP_NOTQUERY:
140     case OP_NOTMINQUERY:
141     if (isprint(c = code[1])) printf(" [^%c]", c);
142     else printf(" [^\\x%02x]", c);
143     printf("%s", OP_names[*code++]);
144     break;
145    
146     case OP_NOTEXACT:
147     case OP_NOTUPTO:
148     case OP_NOTMINUPTO:
149     if (isprint(c = code[3])) printf(" [^%c]{", c);
150     else printf(" [^\\x%02x]{", c);
151     if (*code != OP_NOTEXACT) printf(",");
152     printf("%d}", (code[1] << 8) + code[2]);
153     if (*code == OP_NOTMINUPTO) printf("?");
154     code += 3;
155     break;
156    
157     case OP_REF:
158     printf(" \\%d", *(++code));
159     break;
160    
161     case OP_CLASS:
162     {
163     int i, min, max;
164    
165     code++;
166     printf(" [");
167    
168     for (i = 0; i < 256; i++)
169     {
170     if ((code[i/8] & (1 << (i&7))) != 0)
171     {
172     int j;
173     for (j = i+1; j < 256; j++)
174     if ((code[j/8] & (1 << (j&7))) == 0) break;
175     if (i == '-' || i == ']') printf("\\");
176     if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
177     if (--j > i)
178     {
179     printf("-");
180     if (j == '-' || j == ']') printf("\\");
181     if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
182     }
183     i = j;
184     }
185     }
186     printf("]");
187     code += 32;
188    
189     switch(*code)
190     {
191     case OP_CRSTAR:
192     case OP_CRMINSTAR:
193     case OP_CRPLUS:
194     case OP_CRMINPLUS:
195     case OP_CRQUERY:
196     case OP_CRMINQUERY:
197     printf("%s", OP_names[*code]);
198     break;
199    
200     case OP_CRRANGE:
201     case OP_CRMINRANGE:
202     min = (code[1] << 8) + code[2];
203     max = (code[3] << 8) + code[4];
204     if (max == 0) printf("{%d,}", min);
205     else printf("{%d,%d}", min, max);
206     if (*code == OP_CRMINRANGE) printf("?");
207     code += 4;
208     break;
209    
210     default:
211     code--;
212     }
213     }
214     break;
215    
216     /* Anything else is just a one-node item */
217    
218     default:
219     printf(" %s", OP_names[*code]);
220     break;
221     }
222    
223     code++;
224     printf("\n");
225     }
226     }
227    
228    
229    
230     /* Character string printing function. */
231    
232     static void pchars(unsigned char *p, int length)
233     {
234     int c;
235     while (length-- > 0)
236     if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
237     else fprintf(outfile, "\\x%02x", c);
238     }
239    
240    
241    
242     /* Alternative malloc function, to test functionality and show the size of the
243     compiled re. */
244    
245     static void *new_malloc(size_t size)
246     {
247     if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
248     return malloc(size);
249     }
250    
251    
252    
253     /* Read lines from named file or stdin and write to named file or stdout; lines
254     consist of a regular expression, in delimiters and optionally followed by
255     options, followed by a set of test data, terminated by an empty line. */
256    
257     int main(int argc, char **argv)
258     {
259     FILE *infile = stdin;
260     int options = 0;
261     int study_options = 0;
262     int op = 1;
263     int timeit = 0;
264     int showinfo = 0;
265     int posix = 0;
266     int debug = 0;
267     unsigned char buffer[30000];
268     unsigned char dbuffer[1024];
269    
270     /* Static so that new_malloc can use it. */
271    
272     outfile = stdout;
273    
274     /* Scan options */
275    
276     while (argc > 1 && argv[op][0] == '-')
277     {
278     if (strcmp(argv[op], "-s") == 0) log_store = 1;
279     else if (strcmp(argv[op], "-t") == 0) timeit = 1;
280     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
281     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
282     else if (strcmp(argv[op], "-p") == 0) posix = 1;
283     else
284     {
285     printf("*** Unknown option %s\n", argv[op]);
286     return 1;
287     }
288     op++;
289     argc--;
290     }
291    
292     /* Sort out the input and output files */
293    
294     if (argc > 1)
295     {
296     infile = fopen(argv[op], "r");
297     if (infile == NULL)
298     {
299     printf("** Failed to open %s\n", argv[op]);
300     return 1;
301     }
302     }
303    
304     if (argc > 2)
305     {
306     outfile = fopen(argv[op+1], "w");
307     if (outfile == NULL)
308     {
309     printf("** Failed to open %s\n", argv[op+1]);
310     return 1;
311     }
312     }
313    
314     /* Set alternative malloc function */
315    
316     pcre_malloc = new_malloc;
317    
318     /* Heading line, then prompt for first re if stdin */
319    
320     fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
321     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
322    
323     /* Main loop */
324    
325     for (;;)
326     {
327     pcre *re = NULL;
328     pcre_extra *extra = NULL;
329     regex_t preg;
330     char *error;
331     unsigned char *p, *pp;
332     int do_study = 0;
333     int do_debug = 0;
334     int do_posix = 0;
335     int erroroffset, len, delimiter;
336    
337     if (infile == stdin) printf(" re> ");
338     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
339     if (infile != stdin) fprintf(outfile, (char *)buffer);
340    
341     p = buffer;
342     while (isspace(*p)) p++;
343     if (*p == 0) continue;
344    
345     /* Get the delimiter and seek the end of the pattern; if is isn't
346     complete, read more. */
347    
348     delimiter = *p++;
349    
350     if (isalnum(delimiter))
351     {
352     fprintf(outfile, "** Delimiter must not be alphameric\n");
353     goto SKIP_DATA;
354     }
355    
356     pp = p;
357    
358     for(;;)
359     {
360     while (*pp != 0 && *pp != delimiter) pp++;
361     if (*pp != 0) break;
362    
363     len = sizeof(buffer) - (pp - buffer);
364     if (len < 256)
365     {
366     fprintf(outfile, "** Expression too long - missing delimiter?\n");
367     goto SKIP_DATA;
368     }
369    
370     if (infile == stdin) printf(" > ");
371     if (fgets((char *)pp, len, infile) == NULL)
372     {
373     fprintf(outfile, "** Unexpected EOF\n");
374     goto END_OFF;
375     }
376     if (infile != stdin) fprintf(outfile, (char *)pp);
377     }
378    
379     /* Terminate the pattern at the delimiter */
380    
381     *pp++ = 0;
382    
383     /* Look for options after final delimiter */
384    
385     options = 0;
386     study_options = 0;
387     while (*pp != 0)
388     {
389     switch (*pp++)
390     {
391     case 'i': options |= PCRE_CASELESS; break;
392     case 'm': options |= PCRE_MULTILINE; break;
393     case 's': options |= PCRE_DOTALL; break;
394     case 'x': options |= PCRE_EXTENDED; break;
395     case 'A': options |= PCRE_ANCHORED; break;
396     case 'D': do_debug = 1; break;
397     case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
398     case 'P': do_posix = 1; break;
399     case 'S': do_study = 1; break;
400     case 'I': study_options |= PCRE_CASELESS; break;
401     case 'X': options |= PCRE_EXTRA; break;
402     case '\n': case ' ': break;
403     default:
404     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
405     goto SKIP_DATA;
406     }
407     }
408    
409     /* Handle compiing via the POSIX interface, which doesn't support the
410     timing, showing, or debugging options. */
411    
412     if (posix || do_posix)
413     {
414     int rc;
415     int cflags = 0;
416     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
417     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
418     rc = regcomp(&preg, (char *)p, cflags);
419    
420     /* Compilation failed; go back for another re, skipping to blank line
421     if non-interactive. */
422    
423     if (rc != 0)
424     {
425     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
426     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
427     goto SKIP_DATA;
428     }
429     }
430    
431     /* Handle compiling via the native interface */
432    
433     else
434     {
435     if (timeit)
436     {
437     register int i;
438     clock_t time_taken;
439     clock_t start_time = clock();
440     for (i = 0; i < 4000; i++)
441     {
442     re = pcre_compile((char *)p, options, &error, &erroroffset);
443     if (re != NULL) free(re);
444     }
445     time_taken = clock() - start_time;
446     fprintf(outfile, "Compile time %.2f milliseconds\n",
447     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
448     }
449    
450     re = pcre_compile((char *)p, options, &error, &erroroffset);
451    
452     /* Compilation failed; go back for another re, skipping to blank line
453     if non-interactive. */
454    
455     if (re == NULL)
456     {
457     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
458     SKIP_DATA:
459     if (infile != stdin)
460     {
461     for (;;)
462     {
463     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
464     goto END_OFF;
465     len = (int)strlen((char *)buffer);
466     while (len > 0 && isspace(buffer[len-1])) len--;
467     if (len == 0) break;
468     }
469     fprintf(outfile, "\n");
470     }
471     continue;
472     }
473    
474     /* Compilation succeeded; print data if required */
475    
476     if (showinfo || do_debug)
477     {
478     int first_char, count;
479    
480     if (debug || do_debug) print_internals(re);
481    
482     count = pcre_info(re, &options, &first_char);
483     if (count < 0) fprintf(outfile,
484     "Error %d while reading info\n", count);
485     else
486     {
487     fprintf(outfile, "Identifying subpattern count = %d\n", count);
488     if (options == 0) fprintf(outfile, "No options\n");
489     else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
490     ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
491     ((options & PCRE_CASELESS) != 0)? " caseless" : "",
492     ((options & PCRE_EXTENDED) != 0)? " extended" : "",
493     ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
494     ((options & PCRE_DOTALL) != 0)? " dotall" : "",
495     ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
496     ((options & PCRE_EXTRA) != 0)? " extra" : "");
497     if (first_char == -1)
498     {
499     fprintf(outfile, "First char at start or follows \\n\n");
500     }
501     else if (first_char < 0)
502     {
503     fprintf(outfile, "No first char\n");
504     }
505     else
506     {
507     if (isprint(first_char))
508     fprintf(outfile, "First char = \'%c\'\n", first_char);
509     else
510     fprintf(outfile, "First char = %d\n", first_char);
511     }
512     }
513     }
514    
515     /* If /S was present, study the regexp to generate additional info to
516     help with the matching. */
517    
518     if (do_study)
519     {
520     if (timeit)
521     {
522     register int i;
523     clock_t time_taken;
524     clock_t start_time = clock();
525     for (i = 0; i < 4000; i++)
526     extra = pcre_study(re, study_options, &error);
527     time_taken = clock() - start_time;
528     if (extra != NULL) free(extra);
529     fprintf(outfile, " Study time %.2f milliseconds\n",
530     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
531     }
532    
533     extra = pcre_study(re, study_options, &error);
534     if (error != NULL)
535     fprintf(outfile, "Failed to study: %s\n", error);
536     else if (extra == NULL)
537     fprintf(outfile, "Study returned NULL\n");
538    
539     /* This looks at internal information. A bit kludgy to do it this
540     way, but it is useful for testing. */
541    
542     else if (showinfo || do_debug)
543     {
544     real_pcre_extra *xx = (real_pcre_extra *)extra;
545     if ((xx->options & PCRE_STUDY_MAPPED) == 0)
546     fprintf(outfile, "No starting character set\n");
547     else
548     {
549     int i;
550     int c = 24;
551     fprintf(outfile, "Starting character set: ");
552     for (i = 0; i < 256; i++)
553     {
554     if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
555     {
556     if (c > 75)
557     {
558     fprintf(outfile, "\n ");
559     c = 2;
560     }
561     if (isprint(i) && i != ' ')
562     {
563     fprintf(outfile, "%c ", i);
564     c += 2;
565     }
566     else
567     {
568     fprintf(outfile, "\\x%02x ", i);
569     c += 5;
570     }
571     }
572     }
573     fprintf(outfile, "\n");
574     }
575     }
576     }
577     }
578    
579     /* Read data lines and test them */
580    
581     for (;;)
582     {
583     unsigned char *pp;
584     int count, c;
585     int offsets[30];
586     int size_offsets = sizeof(offsets)/sizeof(int);
587    
588     options = 0;
589    
590     if (infile == stdin) printf(" data> ");
591     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;
592     if (infile != stdin) fprintf(outfile, (char *)buffer);
593    
594     len = (int)strlen((char *)buffer);
595     while (len > 0 && isspace(buffer[len-1])) len--;
596     buffer[len] = 0;
597     if (len == 0) break;
598    
599     p = buffer;
600     while (isspace(*p)) p++;
601    
602     pp = dbuffer;
603     while ((c = *p++) != 0)
604     {
605     int i = 0;
606     int n = 0;
607     if (c == '\\') switch ((c = *p++))
608     {
609     case 'a': c = 7; break;
610     case 'b': c = '\b'; break;
611     case 'e': c = 27; break;
612     case 'f': c = '\f'; break;
613     case 'n': c = '\n'; break;
614     case 'r': c = '\r'; break;
615     case 't': c = '\t'; break;
616     case 'v': c = '\v'; break;
617    
618     case '0': case '1': case '2': case '3':
619     case '4': case '5': case '6': case '7':
620     c -= '0';
621     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
622     c = c * 8 + *p++ - '0';
623     break;
624    
625     case 'x':
626     c = 0;
627     while (i++ < 2 && isxdigit(*p))
628     {
629     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
630     p++;
631     }
632     break;
633    
634     case 0: /* Allows for an empty line */
635     p--;
636     continue;
637    
638     case 'A': /* Option setting */
639     options |= PCRE_ANCHORED;
640     continue;
641    
642     case 'B':
643     options |= PCRE_NOTBOL;
644     continue;
645    
646     case 'E':
647     options |= PCRE_DOLLAR_ENDONLY;
648     continue;
649    
650     case 'I':
651     options |= PCRE_CASELESS;
652     continue;
653    
654     case 'M':
655     options |= PCRE_MULTILINE;
656     continue;
657    
658     case 'S':
659     options |= PCRE_DOTALL;
660     continue;
661    
662     case 'O':
663     while(isdigit(*p)) n = n * 10 + *p++ - '0';
664     if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;
665     continue;
666    
667     case 'Z':
668     options |= PCRE_NOTEOL;
669     continue;
670     }
671     *pp++ = c;
672     }
673     *pp = 0;
674     len = pp - dbuffer;
675    
676     /* Handle matching via the POSIX interface, which does not
677     support timing. */
678    
679     if (posix || do_posix)
680     {
681     int rc;
682     int eflags = 0;
683     regmatch_t pmatch[30];
684     if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
685     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
686    
687     rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
688     pmatch, eflags);
689    
690     if (rc != 0)
691     {
692     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
693     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
694     }
695     else
696     {
697     int i;
698     for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
699     {
700     if (pmatch[i].rm_so >= 0)
701     {
702     fprintf(outfile, "%2d: ", i);
703     pchars(dbuffer + pmatch[i].rm_so,
704     pmatch[i].rm_eo - pmatch[i].rm_so);
705     fprintf(outfile, "\n");
706     }
707     }
708     }
709     }
710    
711     /* Handle matching via the native interface */
712    
713     else
714     {
715     if (timeit)
716     {
717     register int i;
718     clock_t time_taken;
719     clock_t start_time = clock();
720     for (i = 0; i < 4000; i++)
721     count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
722     size_offsets);
723     time_taken = clock() - start_time;
724     fprintf(outfile, "Execute time %.2f milliseconds\n",
725     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
726     }
727    
728     count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
729     size_offsets);
730    
731     if (count == 0)
732     {
733     fprintf(outfile, "Matched, but too many substrings\n");
734     count = size_offsets/2;
735     }
736    
737     if (count >= 0)
738     {
739     int i;
740     count *= 2;
741     for (i = 0; i < count; i += 2)
742     {
743     if (offsets[i] < 0)
744     fprintf(outfile, "%2d: <unset>\n", i/2);
745     else
746     {
747     fprintf(outfile, "%2d: ", i/2);
748     pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
749     fprintf(outfile, "\n");
750     }
751     }
752     }
753     else
754     {
755     if (count == -1) fprintf(outfile, "No match\n");
756     else fprintf(outfile, "Error %d\n", count);
757     }
758     }
759     }
760    
761     if (posix || do_posix) regfree(&preg);
762     if (re != NULL) free(re);
763     if (extra != NULL) free(extra);
764     }
765    
766     END_OFF:
767     fprintf(outfile, "\n");
768     return 0;
769     }
770    
771     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12