/[pcre]/code/tags/pcre-1.03/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-1.03/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (hide annotations) (download)
Sat Feb 24 21:38:15 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 19338 byte(s)
Tag code/trunk as code/tags/pcre-1.03.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10    
11     /* Use the internal info for displaying the results of pcre_study(). */
12    
13     #include "internal.h"
14     #include "pcreposix.h"
15    
16     #ifndef CLOCKS_PER_SEC
17     #ifdef CLK_TCK
18     #define CLOCKS_PER_SEC CLK_TCK
19     #else
20     #define CLOCKS_PER_SEC 100
21     #endif
22     #endif
23    
24    
25     static FILE *outfile;
26     static int log_store = 0;
27    
28    
29    
30     /* Debugging function to print the internal form of the regex. This is the same
31     code as contained in pcre.c under the DEBUG macro. */
32    
33 nigel 7 static const char *OP_names[] = {
34     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
35 nigel 3 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
36     "not",
37     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
40     "*", "*?", "+", "+?", "?", "??", "{", "{",
41     "class", "Ref",
42     "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
43     "Brazero", "Braminzero", "Bra"
44     };
45    
46    
47     static void print_internals(pcre *re)
48     {
49     unsigned char *code = ((real_pcre *)re)->code;
50    
51     printf("------------------------------------------------------------------\n");
52    
53     for(;;)
54     {
55     int c;
56     int charlength;
57    
58     printf("%3d ", code - ((real_pcre *)re)->code);
59    
60     if (*code >= OP_BRA)
61     {
62     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
63     code += 2;
64     }
65    
66     else switch(*code)
67     {
68     case OP_END:
69     printf(" %s\n", OP_names[*code]);
70     printf("------------------------------------------------------------------\n");
71     return;
72    
73     case OP_CHARS:
74     charlength = *(++code);
75     printf("%3d ", charlength);
76     while (charlength-- > 0)
77     if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
78     break;
79    
80     case OP_KETRMAX:
81     case OP_KETRMIN:
82     case OP_ALT:
83     case OP_KET:
84     case OP_ASSERT:
85     case OP_ASSERT_NOT:
86     case OP_ONCE:
87     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
88     code += 2;
89     break;
90    
91     case OP_STAR:
92     case OP_MINSTAR:
93     case OP_PLUS:
94     case OP_MINPLUS:
95     case OP_QUERY:
96     case OP_MINQUERY:
97     case OP_TYPESTAR:
98     case OP_TYPEMINSTAR:
99     case OP_TYPEPLUS:
100     case OP_TYPEMINPLUS:
101     case OP_TYPEQUERY:
102     case OP_TYPEMINQUERY:
103     if (*code >= OP_TYPESTAR)
104     printf(" %s", OP_names[code[1]]);
105     else if (isprint(c = code[1])) printf(" %c", c);
106     else printf(" \\x%02x", c);
107     printf("%s", OP_names[*code++]);
108     break;
109    
110     case OP_EXACT:
111     case OP_UPTO:
112     case OP_MINUPTO:
113     if (isprint(c = code[3])) printf(" %c{", c);
114     else printf(" \\x%02x{", c);
115     if (*code != OP_EXACT) printf(",");
116     printf("%d}", (code[1] << 8) + code[2]);
117     if (*code == OP_MINUPTO) printf("?");
118     code += 3;
119     break;
120    
121     case OP_TYPEEXACT:
122     case OP_TYPEUPTO:
123     case OP_TYPEMINUPTO:
124     printf(" %s{", OP_names[code[3]]);
125     if (*code != OP_TYPEEXACT) printf(",");
126     printf("%d}", (code[1] << 8) + code[2]);
127     if (*code == OP_TYPEMINUPTO) printf("?");
128     code += 3;
129     break;
130    
131     case OP_NOT:
132     if (isprint(c = *(++code))) printf(" [^%c]", c);
133     else printf(" [^\\x%02x]", c);
134     break;
135    
136     case OP_NOTSTAR:
137     case OP_NOTMINSTAR:
138     case OP_NOTPLUS:
139     case OP_NOTMINPLUS:
140     case OP_NOTQUERY:
141     case OP_NOTMINQUERY:
142     if (isprint(c = code[1])) printf(" [^%c]", c);
143     else printf(" [^\\x%02x]", c);
144     printf("%s", OP_names[*code++]);
145     break;
146    
147     case OP_NOTEXACT:
148     case OP_NOTUPTO:
149     case OP_NOTMINUPTO:
150     if (isprint(c = code[3])) printf(" [^%c]{", c);
151     else printf(" [^\\x%02x]{", c);
152     if (*code != OP_NOTEXACT) printf(",");
153     printf("%d}", (code[1] << 8) + code[2]);
154     if (*code == OP_NOTMINUPTO) printf("?");
155     code += 3;
156     break;
157    
158     case OP_REF:
159     printf(" \\%d", *(++code));
160 nigel 9 code++;
161     goto CLASS_REF_REPEAT;
162 nigel 3
163     case OP_CLASS:
164     {
165     int i, min, max;
166    
167     code++;
168     printf(" [");
169    
170     for (i = 0; i < 256; i++)
171     {
172     if ((code[i/8] & (1 << (i&7))) != 0)
173     {
174     int j;
175     for (j = i+1; j < 256; j++)
176     if ((code[j/8] & (1 << (j&7))) == 0) break;
177     if (i == '-' || i == ']') printf("\\");
178     if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
179     if (--j > i)
180     {
181     printf("-");
182     if (j == '-' || j == ']') printf("\\");
183     if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
184     }
185     i = j;
186     }
187     }
188     printf("]");
189     code += 32;
190    
191 nigel 9 CLASS_REF_REPEAT:
192    
193 nigel 3 switch(*code)
194     {
195     case OP_CRSTAR:
196     case OP_CRMINSTAR:
197     case OP_CRPLUS:
198     case OP_CRMINPLUS:
199     case OP_CRQUERY:
200     case OP_CRMINQUERY:
201     printf("%s", OP_names[*code]);
202     break;
203    
204     case OP_CRRANGE:
205     case OP_CRMINRANGE:
206     min = (code[1] << 8) + code[2];
207     max = (code[3] << 8) + code[4];
208     if (max == 0) printf("{%d,}", min);
209     else printf("{%d,%d}", min, max);
210     if (*code == OP_CRMINRANGE) printf("?");
211     code += 4;
212     break;
213    
214     default:
215     code--;
216     }
217     }
218     break;
219    
220     /* Anything else is just a one-node item */
221    
222     default:
223     printf(" %s", OP_names[*code]);
224     break;
225     }
226    
227     code++;
228     printf("\n");
229     }
230     }
231    
232    
233    
234     /* Character string printing function. */
235    
236     static void pchars(unsigned char *p, int length)
237     {
238     int c;
239     while (length-- > 0)
240     if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
241     else fprintf(outfile, "\\x%02x", c);
242     }
243    
244    
245    
246     /* Alternative malloc function, to test functionality and show the size of the
247     compiled re. */
248    
249     static void *new_malloc(size_t size)
250     {
251     if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
252     return malloc(size);
253     }
254    
255    
256    
257     /* Read lines from named file or stdin and write to named file or stdout; lines
258     consist of a regular expression, in delimiters and optionally followed by
259     options, followed by a set of test data, terminated by an empty line. */
260    
261     int main(int argc, char **argv)
262     {
263     FILE *infile = stdin;
264     int options = 0;
265     int study_options = 0;
266     int op = 1;
267     int timeit = 0;
268     int showinfo = 0;
269     int posix = 0;
270     int debug = 0;
271     unsigned char buffer[30000];
272     unsigned char dbuffer[1024];
273    
274     /* Static so that new_malloc can use it. */
275    
276     outfile = stdout;
277    
278     /* Scan options */
279    
280     while (argc > 1 && argv[op][0] == '-')
281     {
282     if (strcmp(argv[op], "-s") == 0) log_store = 1;
283     else if (strcmp(argv[op], "-t") == 0) timeit = 1;
284     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
285     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
286     else if (strcmp(argv[op], "-p") == 0) posix = 1;
287     else
288     {
289     printf("*** Unknown option %s\n", argv[op]);
290     return 1;
291     }
292     op++;
293     argc--;
294     }
295    
296     /* Sort out the input and output files */
297    
298     if (argc > 1)
299     {
300     infile = fopen(argv[op], "r");
301     if (infile == NULL)
302     {
303     printf("** Failed to open %s\n", argv[op]);
304     return 1;
305     }
306     }
307    
308     if (argc > 2)
309     {
310     outfile = fopen(argv[op+1], "w");
311     if (outfile == NULL)
312     {
313     printf("** Failed to open %s\n", argv[op+1]);
314     return 1;
315     }
316     }
317    
318     /* Set alternative malloc function */
319    
320     pcre_malloc = new_malloc;
321    
322     /* Heading line, then prompt for first re if stdin */
323    
324     fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
325     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
326    
327     /* Main loop */
328    
329     for (;;)
330     {
331     pcre *re = NULL;
332     pcre_extra *extra = NULL;
333     regex_t preg;
334 nigel 7 const char *error;
335 nigel 3 unsigned char *p, *pp;
336     int do_study = 0;
337     int do_debug = 0;
338     int do_posix = 0;
339     int erroroffset, len, delimiter;
340    
341     if (infile == stdin) printf(" re> ");
342     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
343     if (infile != stdin) fprintf(outfile, (char *)buffer);
344    
345     p = buffer;
346     while (isspace(*p)) p++;
347     if (*p == 0) continue;
348    
349     /* Get the delimiter and seek the end of the pattern; if is isn't
350     complete, read more. */
351    
352     delimiter = *p++;
353    
354     if (isalnum(delimiter))
355     {
356     fprintf(outfile, "** Delimiter must not be alphameric\n");
357     goto SKIP_DATA;
358     }
359    
360     pp = p;
361    
362     for(;;)
363     {
364     while (*pp != 0 && *pp != delimiter) pp++;
365     if (*pp != 0) break;
366    
367     len = sizeof(buffer) - (pp - buffer);
368     if (len < 256)
369     {
370     fprintf(outfile, "** Expression too long - missing delimiter?\n");
371     goto SKIP_DATA;
372     }
373    
374     if (infile == stdin) printf(" > ");
375     if (fgets((char *)pp, len, infile) == NULL)
376     {
377     fprintf(outfile, "** Unexpected EOF\n");
378     goto END_OFF;
379     }
380     if (infile != stdin) fprintf(outfile, (char *)pp);
381     }
382    
383     /* Terminate the pattern at the delimiter */
384    
385     *pp++ = 0;
386    
387     /* Look for options after final delimiter */
388    
389     options = 0;
390     study_options = 0;
391     while (*pp != 0)
392     {
393     switch (*pp++)
394     {
395     case 'i': options |= PCRE_CASELESS; break;
396     case 'm': options |= PCRE_MULTILINE; break;
397     case 's': options |= PCRE_DOTALL; break;
398     case 'x': options |= PCRE_EXTENDED; break;
399     case 'A': options |= PCRE_ANCHORED; break;
400     case 'D': do_debug = 1; break;
401     case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
402     case 'P': do_posix = 1; break;
403     case 'S': do_study = 1; break;
404     case 'I': study_options |= PCRE_CASELESS; break;
405     case 'X': options |= PCRE_EXTRA; break;
406     case '\n': case ' ': break;
407     default:
408     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
409     goto SKIP_DATA;
410     }
411     }
412    
413     /* Handle compiing via the POSIX interface, which doesn't support the
414     timing, showing, or debugging options. */
415    
416     if (posix || do_posix)
417     {
418     int rc;
419     int cflags = 0;
420     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
421     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
422     rc = regcomp(&preg, (char *)p, cflags);
423    
424     /* Compilation failed; go back for another re, skipping to blank line
425     if non-interactive. */
426    
427     if (rc != 0)
428     {
429     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
430     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
431     goto SKIP_DATA;
432     }
433     }
434    
435     /* Handle compiling via the native interface */
436    
437     else
438     {
439     if (timeit)
440     {
441     register int i;
442     clock_t time_taken;
443     clock_t start_time = clock();
444     for (i = 0; i < 4000; i++)
445     {
446     re = pcre_compile((char *)p, options, &error, &erroroffset);
447     if (re != NULL) free(re);
448     }
449     time_taken = clock() - start_time;
450     fprintf(outfile, "Compile time %.2f milliseconds\n",
451     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
452     }
453    
454     re = pcre_compile((char *)p, options, &error, &erroroffset);
455    
456     /* Compilation failed; go back for another re, skipping to blank line
457     if non-interactive. */
458    
459     if (re == NULL)
460     {
461     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
462     SKIP_DATA:
463     if (infile != stdin)
464     {
465     for (;;)
466     {
467     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
468     goto END_OFF;
469     len = (int)strlen((char *)buffer);
470     while (len > 0 && isspace(buffer[len-1])) len--;
471     if (len == 0) break;
472     }
473     fprintf(outfile, "\n");
474     }
475     continue;
476     }
477    
478     /* Compilation succeeded; print data if required */
479    
480     if (showinfo || do_debug)
481     {
482     int first_char, count;
483    
484     if (debug || do_debug) print_internals(re);
485    
486     count = pcre_info(re, &options, &first_char);
487     if (count < 0) fprintf(outfile,
488     "Error %d while reading info\n", count);
489     else
490     {
491     fprintf(outfile, "Identifying subpattern count = %d\n", count);
492     if (options == 0) fprintf(outfile, "No options\n");
493     else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
494     ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
495     ((options & PCRE_CASELESS) != 0)? " caseless" : "",
496     ((options & PCRE_EXTENDED) != 0)? " extended" : "",
497     ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
498     ((options & PCRE_DOTALL) != 0)? " dotall" : "",
499     ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
500     ((options & PCRE_EXTRA) != 0)? " extra" : "");
501     if (first_char == -1)
502     {
503     fprintf(outfile, "First char at start or follows \\n\n");
504     }
505     else if (first_char < 0)
506     {
507     fprintf(outfile, "No first char\n");
508     }
509     else
510     {
511     if (isprint(first_char))
512     fprintf(outfile, "First char = \'%c\'\n", first_char);
513     else
514     fprintf(outfile, "First char = %d\n", first_char);
515     }
516     }
517     }
518    
519     /* If /S was present, study the regexp to generate additional info to
520     help with the matching. */
521    
522     if (do_study)
523     {
524     if (timeit)
525     {
526     register int i;
527     clock_t time_taken;
528     clock_t start_time = clock();
529     for (i = 0; i < 4000; i++)
530     extra = pcre_study(re, study_options, &error);
531     time_taken = clock() - start_time;
532     if (extra != NULL) free(extra);
533     fprintf(outfile, " Study time %.2f milliseconds\n",
534     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
535     }
536    
537     extra = pcre_study(re, study_options, &error);
538     if (error != NULL)
539     fprintf(outfile, "Failed to study: %s\n", error);
540     else if (extra == NULL)
541     fprintf(outfile, "Study returned NULL\n");
542    
543     /* This looks at internal information. A bit kludgy to do it this
544     way, but it is useful for testing. */
545    
546     else if (showinfo || do_debug)
547     {
548     real_pcre_extra *xx = (real_pcre_extra *)extra;
549     if ((xx->options & PCRE_STUDY_MAPPED) == 0)
550     fprintf(outfile, "No starting character set\n");
551     else
552     {
553     int i;
554     int c = 24;
555     fprintf(outfile, "Starting character set: ");
556     for (i = 0; i < 256; i++)
557     {
558     if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
559     {
560     if (c > 75)
561     {
562     fprintf(outfile, "\n ");
563     c = 2;
564     }
565     if (isprint(i) && i != ' ')
566     {
567     fprintf(outfile, "%c ", i);
568     c += 2;
569     }
570     else
571     {
572     fprintf(outfile, "\\x%02x ", i);
573     c += 5;
574     }
575     }
576     }
577     fprintf(outfile, "\n");
578     }
579     }
580     }
581     }
582    
583     /* Read data lines and test them */
584    
585     for (;;)
586     {
587 nigel 9 unsigned char *q;
588 nigel 3 int count, c;
589     int offsets[30];
590     int size_offsets = sizeof(offsets)/sizeof(int);
591    
592     options = 0;
593    
594     if (infile == stdin) printf(" data> ");
595     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;
596     if (infile != stdin) fprintf(outfile, (char *)buffer);
597    
598     len = (int)strlen((char *)buffer);
599     while (len > 0 && isspace(buffer[len-1])) len--;
600     buffer[len] = 0;
601     if (len == 0) break;
602    
603     p = buffer;
604     while (isspace(*p)) p++;
605    
606 nigel 9 q = dbuffer;
607 nigel 3 while ((c = *p++) != 0)
608     {
609     int i = 0;
610     int n = 0;
611     if (c == '\\') switch ((c = *p++))
612     {
613     case 'a': c = 7; break;
614     case 'b': c = '\b'; break;
615     case 'e': c = 27; break;
616     case 'f': c = '\f'; break;
617     case 'n': c = '\n'; break;
618     case 'r': c = '\r'; break;
619     case 't': c = '\t'; break;
620     case 'v': c = '\v'; break;
621    
622     case '0': case '1': case '2': case '3':
623     case '4': case '5': case '6': case '7':
624     c -= '0';
625     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
626     c = c * 8 + *p++ - '0';
627     break;
628    
629     case 'x':
630     c = 0;
631     while (i++ < 2 && isxdigit(*p))
632     {
633     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
634     p++;
635     }
636     break;
637    
638     case 0: /* Allows for an empty line */
639     p--;
640     continue;
641    
642     case 'A': /* Option setting */
643     options |= PCRE_ANCHORED;
644     continue;
645    
646     case 'B':
647     options |= PCRE_NOTBOL;
648     continue;
649    
650     case 'E':
651     options |= PCRE_DOLLAR_ENDONLY;
652     continue;
653    
654     case 'I':
655     options |= PCRE_CASELESS;
656     continue;
657    
658     case 'M':
659     options |= PCRE_MULTILINE;
660     continue;
661    
662     case 'S':
663     options |= PCRE_DOTALL;
664     continue;
665    
666     case 'O':
667     while(isdigit(*p)) n = n * 10 + *p++ - '0';
668 nigel 9 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
669 nigel 3 continue;
670    
671     case 'Z':
672     options |= PCRE_NOTEOL;
673     continue;
674     }
675 nigel 9 *q++ = c;
676 nigel 3 }
677 nigel 9 *q = 0;
678     len = q - dbuffer;
679 nigel 3
680     /* Handle matching via the POSIX interface, which does not
681     support timing. */
682    
683     if (posix || do_posix)
684     {
685     int rc;
686     int eflags = 0;
687     regmatch_t pmatch[30];
688     if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
689     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
690    
691     rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
692     pmatch, eflags);
693    
694     if (rc != 0)
695     {
696     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
697     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
698     }
699     else
700     {
701 nigel 7 size_t i;
702 nigel 3 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
703     {
704     if (pmatch[i].rm_so >= 0)
705     {
706     fprintf(outfile, "%2d: ", i);
707     pchars(dbuffer + pmatch[i].rm_so,
708     pmatch[i].rm_eo - pmatch[i].rm_so);
709     fprintf(outfile, "\n");
710     }
711     }
712     }
713     }
714    
715     /* Handle matching via the native interface */
716    
717     else
718     {
719     if (timeit)
720     {
721     register int i;
722     clock_t time_taken;
723     clock_t start_time = clock();
724     for (i = 0; i < 4000; i++)
725     count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
726     size_offsets);
727     time_taken = clock() - start_time;
728     fprintf(outfile, "Execute time %.2f milliseconds\n",
729     ((double)time_taken)/(4 * CLOCKS_PER_SEC));
730     }
731    
732     count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
733     size_offsets);
734    
735     if (count == 0)
736     {
737     fprintf(outfile, "Matched, but too many substrings\n");
738     count = size_offsets/2;
739     }
740    
741     if (count >= 0)
742     {
743     int i;
744     count *= 2;
745     for (i = 0; i < count; i += 2)
746     {
747     if (offsets[i] < 0)
748     fprintf(outfile, "%2d: <unset>\n", i/2);
749     else
750     {
751     fprintf(outfile, "%2d: ", i/2);
752     pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
753     fprintf(outfile, "\n");
754     }
755     }
756     }
757     else
758     {
759     if (count == -1) fprintf(outfile, "No match\n");
760     else fprintf(outfile, "Error %d\n", count);
761     }
762     }
763     }
764    
765     if (posix || do_posix) regfree(&preg);
766     if (re != NULL) free(re);
767     if (extra != NULL) free(extra);
768     }
769    
770     END_OFF:
771     fprintf(outfile, "\n");
772     return 0;
773     }
774    
775     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12