/[pcre]/code/tags/pcre-1.02/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-1.02/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 8 - (hide annotations) (download)
Sat Feb 24 21:38:11 2007 UTC (7 years, 5 months ago) by nigel
File MIME type: text/plain
File size: 19288 byte(s)
Tag code/trunk as code/tags/pcre-1.02.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10    
11     /* Use the internal info for displaying the results of pcre_study(). */
12    
13     #include "internal.h"
14     #include "pcreposix.h"
15    
16     #ifndef CLOCKS_PER_SEC
17     #ifdef CLK_TCK
18     #define CLOCKS_PER_SEC CLK_TCK
19     #else
20     #define CLOCKS_PER_SEC 100
21     #endif
22     #endif
23    
24    
25     static FILE *outfile;
26     static int log_store = 0;
27    
28    
29    
30     /* Debugging function to print the internal form of the regex. This is the same
31     code as contained in pcre.c under the DEBUG macro. */
32    
33 nigel 7 static const char *OP_names[] = {
34     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
35 nigel 3 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
36     "not",
37     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
40     "*", "*?", "+", "+?", "?", "??", "{", "{",
41     "class", "Ref",
42     "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
43     "Brazero", "Braminzero", "Bra"
44     };
45    
46    
47     static void print_internals(pcre *re)
48     {
49     unsigned char *code = ((real_pcre *)re)->code;
50    
51     printf("------------------------------------------------------------------\n");
52    
53     for(;;)
54     {
55     int c;
56     int charlength;
57    
58     printf("%3d ", code - ((real_pcre *)re)->code);
59    
60     if (*code >= OP_BRA)
61     {
62     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
63     code += 2;
64     }
65    
66     else switch(*code)
67     {
68     case OP_END:
69     printf(" %s\n", OP_names[*code]);
70     printf("------------------------------------------------------------------\n");
71     return;
72    
73     case OP_CHARS:
74     charlength = *(++code);
75     printf("%3d ", charlength);
76     while (charlength-- > 0)
77     if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
78     break;
79    
80     case OP_KETRMAX:
81     case OP_KETRMIN:
82     case OP_ALT:
83     case OP_KET:
84     case OP_ASSERT:
85     case OP_ASSERT_NOT:
86     case OP_ONCE:
87     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
88     code += 2;
89     break;
90    
91     case OP_STAR:
92     case OP_MINSTAR:
93     case OP_PLUS:
94     case OP_MINPLUS:
95     case OP_QUERY:
96     case OP_MINQUERY:
97     case OP_TYPESTAR:
98     case OP_TYPEMINSTAR:
99     case OP_TYPEPLUS:
100     case OP_TYPEMINPLUS:
101     case OP_TYPEQUERY:
102     case OP_TYPEMINQUERY:
103     if (*code >= OP_TYPESTAR)
104     printf(" %s", OP_names[code[1]]);
105     else if (isprint(c = code[1])) printf(" %c", c);
106     else printf(" \\x%02x", c);
107     printf("%s", OP_names[*code++]);
108     break;
109    
110     case OP_EXACT:
111     case OP_UPTO:
112     case OP_MINUPTO:
113     if (isprint(c = code[3])) printf(" %c{", c);
114     else printf(" \\x%02x{", c);
115     if (*code != OP_EXACT) printf(",");
116     printf("%d}", (code[1] << 8) + code[2]);
117     if (*code == OP_MINUPTO) printf("?");
118     code += 3;
119     break;
120    
121     case OP_TYPEEXACT:
122     case OP_TYPEUPTO:
123     case OP_TYPEMINUPTO:
124     printf(" %s{", OP_names[code[3]]);
125     if (*code != OP_TYPEEXACT) printf(",");
126     printf("%d}", (code[1] << 8) + code[2]);
127     if (*code == OP_TYPEMINUPTO) printf("?");
128     code += 3;
129     break;
130    
131     case OP_NOT:
132     if (isprint(c = *(++code))) printf(" [^%c]", c);
133     else printf(" [^\\x%02x]", c);
134     break;
135    
136     case OP_NOTSTAR:
137     case OP_NOTMINSTAR:
138     case OP_NOTPLUS:
139     case OP_NOTMINPLUS:
140     case OP_NOTQUERY:
141     case OP_NOTMINQUERY:
142     if (isprint(c = code[1])) printf(" [^%c]", c);
143     else printf(" [^\\x%02x]", c);
144     printf("%s", OP_names[*code++]);
145     break;
146    
147     case OP_NOTEXACT:
148     case OP_NOTUPTO:
149     case OP_NOTMINUPTO:
150     if (isprint(c = code[3])) printf(" [^%c]{", c);
151     else printf(" [^\\x%02x]{", c);
152     if (*code != OP_NOTEXACT) printf(",");
153     printf("%d}", (code[1] << 8) + code[2]);
154     if (*code == OP_NOTMINUPTO) printf("?");
155     code += 3;
156     break;
157    
158     case OP_REF:
159     printf(" \\%d", *(++code));
160     break;
161    
162     case OP_CLASS:
163     {
164     int i, min, max;
165    
166     code++;
167     printf(" [");
168    
169     for (i = 0; i < 256; i++)
170     {
171     if ((code[i/8] & (1 << (i&7))) != 0)
172     {
173     int j;
174     for (j = i+1; j < 256; j++)
175     if ((code[j/8] & (1 << (j&7))) == 0) break;
176     if (i == '-' || i == ']') printf("\\");
177     if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
178     if (--j > i)
179     {
180     printf("-");
181     if (j == '-' || j == ']') printf("\\");
182     if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
183     }
184     i = j;
185     }
186     }
187     printf("]");
188     code += 32;
189    
190     switch(*code)
191     {
192     case OP_CRSTAR:
193     case OP_CRMINSTAR:
194     case OP_CRPLUS:
195     case OP_CRMINPLUS:
196     case OP_CRQUERY:
197     case OP_CRMINQUERY:
198     printf("%s", OP_names[*code]);
199     break;
200    
201     case OP_CRRANGE:
202     case OP_CRMINRANGE:
203     min = (code[1] << 8) + code[2];
204     max = (code[3] << 8) + code[4];
205     if (max == 0) printf("{%d,}", min);
206     else printf("{%d,%d}", min, max);
207     if (*code == OP_CRMINRANGE) printf("?");
208     code += 4;
209     break;
210    
211     default:
212     code--;
213     }
214     }
215     break;
216    
217     /* Anything else is just a one-node item */
218    
219     default:
220     printf(" %s", OP_names[*code]);
221     break;
222     }
223    
224     code++;
225     printf("\n");
226     }
227     }
228    
229    
230    
231     /* Character string printing function. */
232    
233     static void pchars(unsigned char *p, int length)
234     {
235     int c;
236     while (length-- > 0)
237     if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
238     else fprintf(outfile, "\\x%02x", c);
239     }
240    
241    
242    
243     /* Alternative malloc function, to test functionality and show the size of the
244     compiled re. */
245    
246     static void *new_malloc(size_t size)
247     {
248     if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
249     return malloc(size);
250     }
251    
252    
253    
254     /* Read lines from named file or stdin and write to named file or stdout; lines
255     consist of a regular expression, in delimiters and optionally followed by
256     options, followed by a set of test data, terminated by an empty line. */
257    
258     int main(int argc, char **argv)
259     {
260     FILE *infile = stdin;
261     int options = 0;
262     int study_options = 0;
263     int op = 1;
264     int timeit = 0;
265     int showinfo = 0;
266     int posix = 0;
267     int debug = 0;
268     unsigned char buffer[30000];
269     unsigned char dbuffer[1024];
270    
271     /* Static so that new_malloc can use it. */
272    
273     outfile = stdout;
274    
275     /* Scan options */
276    
277     while (argc > 1 && argv[op][0] == '-')
278     {
279     if (strcmp(argv[op], "-s") == 0) log_store = 1;
280     else if (strcmp(argv[op], "-t") == 0) timeit = 1;
281     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
282     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
283     else if (strcmp(argv[op], "-p") == 0) posix = 1;
284     else
285     {
286     printf("*** Unknown option %s\n", argv[op]);
287     return 1;
288     }
289     op++;
290     argc--;
291     }
292    
293     /* Sort out the input and output files */
294    
295     if (argc > 1)
296     {
297     infile = fopen(argv[op], "r");
298     if (infile == NULL)
299     {
300     printf("** Failed to open %s\n", argv[op]);
301     return 1;
302     }
303     }
304    
305     if (argc > 2)
306     {
307     outfile = fopen(argv[op+1], "w");
308     if (outfile == NULL)
309     {
310     printf("** Failed to open %s\n", argv[op+1]);
311     return 1;
312     }
313     }
314    
315     /* Set alternative malloc function */
316    
317     pcre_malloc = new_malloc;
318    
319     /* Heading line, then prompt for first re if stdin */
320    
321     fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
322     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
323    
324     /* Main loop */
325    
326     for (;;)
327     {
328     pcre *re = NULL;
329     pcre_extra *extra = NULL;
330     regex_t preg;
331 nigel 7 const char *error;
332 nigel 3 unsigned char *p, *pp;
333     int do_study = 0;
334     int do_debug = 0;
335     int do_posix = 0;
336     int erroroffset, len, delimiter;
337    
338     if (infile == stdin) printf(" re> ");
339     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
340     if (infile != stdin) fprintf(outfile, (char *)buffer);
341    
342     p = buffer;
343     while (isspace(*p)) p++;
344     if (*p == 0) continue;
345    
346     /* Get the delimiter and seek the end of the pattern; if is isn't
347     complete, read more. */
348    
349     delimiter = *p++;
350    
351     if (isalnum(delimiter))
352     {
353     fprintf(outfile, "** Delimiter must not be alphameric\n");
354     goto SKIP_DATA;
355     }
356    
357     pp = p;
358    
359     for(;;)
360     {
361     while (*pp != 0 && *pp != delimiter) pp++;
362     if (*pp != 0) break;
363    
364     len = sizeof(buffer) - (pp - buffer);
365     if (len < 256)
366     {
367     fprintf(outfile, "** Expression too long - missing delimiter?\n");
368     goto SKIP_DATA;
369     }
370    
371     if (infile == stdin) printf(" > ");
372     if (fgets((char *)pp, len, infile) == NULL)
373     {
374     fprintf(outfile, "** Unexpected EOF\n");
375     goto END_OFF;
376     }
377     if (infile != stdin) fprintf(outfile, (char *)pp);
378     }
379    
380     /* Terminate the pattern at the delimiter */
381    
382     *pp++ = 0;
383    
384     /* Look for options after final delimiter */
385    
386     options = 0;
387     study_options = 0;
388     while (*pp != 0)
389     {
390     switch (*pp++)
391     {
392     case 'i': options |= PCRE_CASELESS; break;
393     case 'm': options |= PCRE_MULTILINE; break;
394     case 's': options |= PCRE_DOTALL; break;
395     case 'x': options |= PCRE_EXTENDED; break;
396     case 'A': options |= PCRE_ANCHORED; break;
397     case 'D': do_debug = 1; break;
398     case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
399     case 'P': do_posix = 1; break;
400     case 'S': do_study = 1; break;
401     case 'I': study_options |= PCRE_CASELESS; break;
402     case 'X': options |= PCRE_EXTRA; break;
403     case '\n': case ' ': break;
404     default:
405     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
406     goto SKIP_DATA;
407     }
408     }
409    
410     /* Handle compiing via the POSIX interface, which doesn't support the
411     timing, showing, or debugging options. */
412    
413     if (posix || do_posix)
414     {
415     int rc;
416     int cflags = 0;
417     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
418     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
419     rc = regcomp(&preg, (char *)p, cflags);
420    
421     /* Compilation failed; go back for another re, skipping to blank line
422     if non-interactive. */
423    
424     if (rc != 0)
425     {
426     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
427     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
428     goto SKIP_DATA;
429     }
430     }
431    
432     /* Handle compiling via the native interface */
433    
434     else
435     {
436     if (timeit)
437     {
438     register int i;
439     clock_t time_taken;
440     clock_t start_time = clock();
441     for (i = 0; i < 4000; i++)
442     {
443     re = pcre_compile((char *)p, options, &error, &erroroffset);
444     if (re != NULL) free(re);
445     }
446     time_taken = clock() - start_time;
447     fprintf(outfile, "Compile time %.2f milliseconds\n",
448     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
449     }
450    
451     re = pcre_compile((char *)p, options, &error, &erroroffset);
452    
453     /* Compilation failed; go back for another re, skipping to blank line
454     if non-interactive. */
455    
456     if (re == NULL)
457     {
458     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
459     SKIP_DATA:
460     if (infile != stdin)
461     {
462     for (;;)
463     {
464     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
465     goto END_OFF;
466     len = (int)strlen((char *)buffer);
467     while (len > 0 && isspace(buffer[len-1])) len--;
468     if (len == 0) break;
469     }
470     fprintf(outfile, "\n");
471     }
472     continue;
473     }
474    
475     /* Compilation succeeded; print data if required */
476    
477     if (showinfo || do_debug)
478     {
479     int first_char, count;
480    
481     if (debug || do_debug) print_internals(re);
482    
483     count = pcre_info(re, &options, &first_char);
484     if (count < 0) fprintf(outfile,
485     "Error %d while reading info\n", count);
486     else
487     {
488     fprintf(outfile, "Identifying subpattern count = %d\n", count);
489     if (options == 0) fprintf(outfile, "No options\n");
490     else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
491     ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
492     ((options & PCRE_CASELESS) != 0)? " caseless" : "",
493     ((options & PCRE_EXTENDED) != 0)? " extended" : "",
494     ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
495     ((options & PCRE_DOTALL) != 0)? " dotall" : "",
496     ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
497     ((options & PCRE_EXTRA) != 0)? " extra" : "");
498     if (first_char == -1)
499     {
500     fprintf(outfile, "First char at start or follows \\n\n");
501     }
502     else if (first_char < 0)
503     {
504     fprintf(outfile, "No first char\n");
505     }
506     else
507     {
508     if (isprint(first_char))
509     fprintf(outfile, "First char = \'%c\'\n", first_char);
510     else
511     fprintf(outfile, "First char = %d\n", first_char);
512     }
513     }
514     }
515    
516     /* If /S was present, study the regexp to generate additional info to
517     help with the matching. */
518    
519     if (do_study)
520     {
521     if (timeit)
522     {
523     register int i;
524     clock_t time_taken;
525     clock_t start_time = clock();
526     for (i = 0; i < 4000; i++)
527     extra = pcre_study(re, study_options, &error);
528     time_taken = clock() - start_time;
529     if (extra != NULL) free(extra);
530     fprintf(outfile, " Study time %.2f milliseconds\n",
531     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
532     }
533    
534     extra = pcre_study(re, study_options, &error);
535     if (error != NULL)
536     fprintf(outfile, "Failed to study: %s\n", error);
537     else if (extra == NULL)
538     fprintf(outfile, "Study returned NULL\n");
539    
540     /* This looks at internal information. A bit kludgy to do it this
541     way, but it is useful for testing. */
542    
543     else if (showinfo || do_debug)
544     {
545     real_pcre_extra *xx = (real_pcre_extra *)extra;
546     if ((xx->options & PCRE_STUDY_MAPPED) == 0)
547     fprintf(outfile, "No starting character set\n");
548     else
549     {
550     int i;
551     int c = 24;
552     fprintf(outfile, "Starting character set: ");
553     for (i = 0; i < 256; i++)
554     {
555     if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
556     {
557     if (c > 75)
558     {
559     fprintf(outfile, "\n ");
560     c = 2;
561     }
562     if (isprint(i) && i != ' ')
563     {
564     fprintf(outfile, "%c ", i);
565     c += 2;
566     }
567     else
568     {
569     fprintf(outfile, "\\x%02x ", i);
570     c += 5;
571     }
572     }
573     }
574     fprintf(outfile, "\n");
575     }
576     }
577     }
578     }
579    
580     /* Read data lines and test them */
581    
582     for (;;)
583     {
584     unsigned char *pp;
585     int count, c;
586     int offsets[30];
587     int size_offsets = sizeof(offsets)/sizeof(int);
588    
589     options = 0;
590    
591     if (infile == stdin) printf(" data> ");
592     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;
593     if (infile != stdin) fprintf(outfile, (char *)buffer);
594    
595     len = (int)strlen((char *)buffer);
596     while (len > 0 && isspace(buffer[len-1])) len--;
597     buffer[len] = 0;
598     if (len == 0) break;
599    
600     p = buffer;
601     while (isspace(*p)) p++;
602    
603     pp = dbuffer;
604     while ((c = *p++) != 0)
605     {
606     int i = 0;
607     int n = 0;
608     if (c == '\\') switch ((c = *p++))
609     {
610     case 'a': c = 7; break;
611     case 'b': c = '\b'; break;
612     case 'e': c = 27; break;
613     case 'f': c = '\f'; break;
614     case 'n': c = '\n'; break;
615     case 'r': c = '\r'; break;
616     case 't': c = '\t'; break;
617     case 'v': c = '\v'; break;
618    
619     case '0': case '1': case '2': case '3':
620     case '4': case '5': case '6': case '7':
621     c -= '0';
622     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
623     c = c * 8 + *p++ - '0';
624     break;
625    
626     case 'x':
627     c = 0;
628     while (i++ < 2 && isxdigit(*p))
629     {
630     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
631     p++;
632     }
633     break;
634    
635     case 0: /* Allows for an empty line */
636     p--;
637     continue;
638    
639     case 'A': /* Option setting */
640     options |= PCRE_ANCHORED;
641     continue;
642    
643     case 'B':
644     options |= PCRE_NOTBOL;
645     continue;
646    
647     case 'E':
648     options |= PCRE_DOLLAR_ENDONLY;
649     continue;
650    
651     case 'I':
652     options |= PCRE_CASELESS;
653     continue;
654    
655     case 'M':
656     options |= PCRE_MULTILINE;
657     continue;
658    
659     case 'S':
660     options |= PCRE_DOTALL;
661     continue;
662    
663     case 'O':
664     while(isdigit(*p)) n = n * 10 + *p++ - '0';
665 nigel 7 if (n <= (int)sizeof(offsets)/sizeof(int)) size_offsets = n;
666 nigel 3 continue;
667    
668     case 'Z':
669     options |= PCRE_NOTEOL;
670     continue;
671     }
672     *pp++ = c;
673     }
674     *pp = 0;
675     len = pp - dbuffer;
676    
677     /* Handle matching via the POSIX interface, which does not
678     support timing. */
679    
680     if (posix || do_posix)
681     {
682     int rc;
683     int eflags = 0;
684     regmatch_t pmatch[30];
685     if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
686     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
687    
688     rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
689     pmatch, eflags);
690    
691     if (rc != 0)
692     {
693     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
694     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
695     }
696     else
697     {
698 nigel 7 size_t i;
699 nigel 3 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
700     {
701     if (pmatch[i].rm_so >= 0)
702     {
703     fprintf(outfile, "%2d: ", i);
704     pchars(dbuffer + pmatch[i].rm_so,
705     pmatch[i].rm_eo - pmatch[i].rm_so);
706     fprintf(outfile, "\n");
707     }
708     }
709     }
710     }
711    
712     /* Handle matching via the native interface */
713    
714     else
715     {
716     if (timeit)
717     {
718     register int i;
719     clock_t time_taken;
720     clock_t start_time = clock();
721     for (i = 0; i < 4000; i++)
722     count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
723     size_offsets);
724     time_taken = clock() - start_time;
725     fprintf(outfile, "Execute time %.2f milliseconds\n",
726     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
727     }
728    
729     count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
730     size_offsets);
731    
732     if (count == 0)
733     {
734     fprintf(outfile, "Matched, but too many substrings\n");
735     count = size_offsets/2;
736     }
737    
738     if (count >= 0)
739     {
740     int i;
741     count *= 2;
742     for (i = 0; i < count; i += 2)
743     {
744     if (offsets[i] < 0)
745     fprintf(outfile, "%2d: <unset>\n", i/2);
746     else
747     {
748     fprintf(outfile, "%2d: ", i/2);
749     pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
750     fprintf(outfile, "\n");
751     }
752     }
753     }
754     else
755     {
756     if (count == -1) fprintf(outfile, "No match\n");
757     else fprintf(outfile, "Error %d\n", count);
758     }
759     }
760     }
761    
762     if (posix || do_posix) regfree(&preg);
763     if (re != NULL) free(re);
764     if (extra != NULL) free(extra);
765     }
766    
767     END_OFF:
768     fprintf(outfile, "\n");
769     return 0;
770     }
771    
772     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12