/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 7 - (show annotations) (download)
Sat Feb 24 21:38:09 2007 UTC (7 years, 1 month ago) by nigel
File MIME type: text/plain
File size: 19288 byte(s)
Load pcre-1.02 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10
11 /* Use the internal info for displaying the results of pcre_study(). */
12
13 #include "internal.h"
14 #include "pcreposix.h"
15
16 #ifndef CLOCKS_PER_SEC
17 #ifdef CLK_TCK
18 #define CLOCKS_PER_SEC CLK_TCK
19 #else
20 #define CLOCKS_PER_SEC 100
21 #endif
22 #endif
23
24
25 static FILE *outfile;
26 static int log_store = 0;
27
28
29
30 /* Debugging function to print the internal form of the regex. This is the same
31 code as contained in pcre.c under the DEBUG macro. */
32
33 static const char *OP_names[] = {
34 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
35 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
36 "not",
37 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
40 "*", "*?", "+", "+?", "?", "??", "{", "{",
41 "class", "Ref",
42 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
43 "Brazero", "Braminzero", "Bra"
44 };
45
46
47 static void print_internals(pcre *re)
48 {
49 unsigned char *code = ((real_pcre *)re)->code;
50
51 printf("------------------------------------------------------------------\n");
52
53 for(;;)
54 {
55 int c;
56 int charlength;
57
58 printf("%3d ", code - ((real_pcre *)re)->code);
59
60 if (*code >= OP_BRA)
61 {
62 printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
63 code += 2;
64 }
65
66 else switch(*code)
67 {
68 case OP_END:
69 printf(" %s\n", OP_names[*code]);
70 printf("------------------------------------------------------------------\n");
71 return;
72
73 case OP_CHARS:
74 charlength = *(++code);
75 printf("%3d ", charlength);
76 while (charlength-- > 0)
77 if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
78 break;
79
80 case OP_KETRMAX:
81 case OP_KETRMIN:
82 case OP_ALT:
83 case OP_KET:
84 case OP_ASSERT:
85 case OP_ASSERT_NOT:
86 case OP_ONCE:
87 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
88 code += 2;
89 break;
90
91 case OP_STAR:
92 case OP_MINSTAR:
93 case OP_PLUS:
94 case OP_MINPLUS:
95 case OP_QUERY:
96 case OP_MINQUERY:
97 case OP_TYPESTAR:
98 case OP_TYPEMINSTAR:
99 case OP_TYPEPLUS:
100 case OP_TYPEMINPLUS:
101 case OP_TYPEQUERY:
102 case OP_TYPEMINQUERY:
103 if (*code >= OP_TYPESTAR)
104 printf(" %s", OP_names[code[1]]);
105 else if (isprint(c = code[1])) printf(" %c", c);
106 else printf(" \\x%02x", c);
107 printf("%s", OP_names[*code++]);
108 break;
109
110 case OP_EXACT:
111 case OP_UPTO:
112 case OP_MINUPTO:
113 if (isprint(c = code[3])) printf(" %c{", c);
114 else printf(" \\x%02x{", c);
115 if (*code != OP_EXACT) printf(",");
116 printf("%d}", (code[1] << 8) + code[2]);
117 if (*code == OP_MINUPTO) printf("?");
118 code += 3;
119 break;
120
121 case OP_TYPEEXACT:
122 case OP_TYPEUPTO:
123 case OP_TYPEMINUPTO:
124 printf(" %s{", OP_names[code[3]]);
125 if (*code != OP_TYPEEXACT) printf(",");
126 printf("%d}", (code[1] << 8) + code[2]);
127 if (*code == OP_TYPEMINUPTO) printf("?");
128 code += 3;
129 break;
130
131 case OP_NOT:
132 if (isprint(c = *(++code))) printf(" [^%c]", c);
133 else printf(" [^\\x%02x]", c);
134 break;
135
136 case OP_NOTSTAR:
137 case OP_NOTMINSTAR:
138 case OP_NOTPLUS:
139 case OP_NOTMINPLUS:
140 case OP_NOTQUERY:
141 case OP_NOTMINQUERY:
142 if (isprint(c = code[1])) printf(" [^%c]", c);
143 else printf(" [^\\x%02x]", c);
144 printf("%s", OP_names[*code++]);
145 break;
146
147 case OP_NOTEXACT:
148 case OP_NOTUPTO:
149 case OP_NOTMINUPTO:
150 if (isprint(c = code[3])) printf(" [^%c]{", c);
151 else printf(" [^\\x%02x]{", c);
152 if (*code != OP_NOTEXACT) printf(",");
153 printf("%d}", (code[1] << 8) + code[2]);
154 if (*code == OP_NOTMINUPTO) printf("?");
155 code += 3;
156 break;
157
158 case OP_REF:
159 printf(" \\%d", *(++code));
160 break;
161
162 case OP_CLASS:
163 {
164 int i, min, max;
165
166 code++;
167 printf(" [");
168
169 for (i = 0; i < 256; i++)
170 {
171 if ((code[i/8] & (1 << (i&7))) != 0)
172 {
173 int j;
174 for (j = i+1; j < 256; j++)
175 if ((code[j/8] & (1 << (j&7))) == 0) break;
176 if (i == '-' || i == ']') printf("\\");
177 if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
178 if (--j > i)
179 {
180 printf("-");
181 if (j == '-' || j == ']') printf("\\");
182 if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
183 }
184 i = j;
185 }
186 }
187 printf("]");
188 code += 32;
189
190 switch(*code)
191 {
192 case OP_CRSTAR:
193 case OP_CRMINSTAR:
194 case OP_CRPLUS:
195 case OP_CRMINPLUS:
196 case OP_CRQUERY:
197 case OP_CRMINQUERY:
198 printf("%s", OP_names[*code]);
199 break;
200
201 case OP_CRRANGE:
202 case OP_CRMINRANGE:
203 min = (code[1] << 8) + code[2];
204 max = (code[3] << 8) + code[4];
205 if (max == 0) printf("{%d,}", min);
206 else printf("{%d,%d}", min, max);
207 if (*code == OP_CRMINRANGE) printf("?");
208 code += 4;
209 break;
210
211 default:
212 code--;
213 }
214 }
215 break;
216
217 /* Anything else is just a one-node item */
218
219 default:
220 printf(" %s", OP_names[*code]);
221 break;
222 }
223
224 code++;
225 printf("\n");
226 }
227 }
228
229
230
231 /* Character string printing function. */
232
233 static void pchars(unsigned char *p, int length)
234 {
235 int c;
236 while (length-- > 0)
237 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
238 else fprintf(outfile, "\\x%02x", c);
239 }
240
241
242
243 /* Alternative malloc function, to test functionality and show the size of the
244 compiled re. */
245
246 static void *new_malloc(size_t size)
247 {
248 if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
249 return malloc(size);
250 }
251
252
253
254 /* Read lines from named file or stdin and write to named file or stdout; lines
255 consist of a regular expression, in delimiters and optionally followed by
256 options, followed by a set of test data, terminated by an empty line. */
257
258 int main(int argc, char **argv)
259 {
260 FILE *infile = stdin;
261 int options = 0;
262 int study_options = 0;
263 int op = 1;
264 int timeit = 0;
265 int showinfo = 0;
266 int posix = 0;
267 int debug = 0;
268 unsigned char buffer[30000];
269 unsigned char dbuffer[1024];
270
271 /* Static so that new_malloc can use it. */
272
273 outfile = stdout;
274
275 /* Scan options */
276
277 while (argc > 1 && argv[op][0] == '-')
278 {
279 if (strcmp(argv[op], "-s") == 0) log_store = 1;
280 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
281 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
282 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
283 else if (strcmp(argv[op], "-p") == 0) posix = 1;
284 else
285 {
286 printf("*** Unknown option %s\n", argv[op]);
287 return 1;
288 }
289 op++;
290 argc--;
291 }
292
293 /* Sort out the input and output files */
294
295 if (argc > 1)
296 {
297 infile = fopen(argv[op], "r");
298 if (infile == NULL)
299 {
300 printf("** Failed to open %s\n", argv[op]);
301 return 1;
302 }
303 }
304
305 if (argc > 2)
306 {
307 outfile = fopen(argv[op+1], "w");
308 if (outfile == NULL)
309 {
310 printf("** Failed to open %s\n", argv[op+1]);
311 return 1;
312 }
313 }
314
315 /* Set alternative malloc function */
316
317 pcre_malloc = new_malloc;
318
319 /* Heading line, then prompt for first re if stdin */
320
321 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
322 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
323
324 /* Main loop */
325
326 for (;;)
327 {
328 pcre *re = NULL;
329 pcre_extra *extra = NULL;
330 regex_t preg;
331 const char *error;
332 unsigned char *p, *pp;
333 int do_study = 0;
334 int do_debug = 0;
335 int do_posix = 0;
336 int erroroffset, len, delimiter;
337
338 if (infile == stdin) printf(" re> ");
339 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
340 if (infile != stdin) fprintf(outfile, (char *)buffer);
341
342 p = buffer;
343 while (isspace(*p)) p++;
344 if (*p == 0) continue;
345
346 /* Get the delimiter and seek the end of the pattern; if is isn't
347 complete, read more. */
348
349 delimiter = *p++;
350
351 if (isalnum(delimiter))
352 {
353 fprintf(outfile, "** Delimiter must not be alphameric\n");
354 goto SKIP_DATA;
355 }
356
357 pp = p;
358
359 for(;;)
360 {
361 while (*pp != 0 && *pp != delimiter) pp++;
362 if (*pp != 0) break;
363
364 len = sizeof(buffer) - (pp - buffer);
365 if (len < 256)
366 {
367 fprintf(outfile, "** Expression too long - missing delimiter?\n");
368 goto SKIP_DATA;
369 }
370
371 if (infile == stdin) printf(" > ");
372 if (fgets((char *)pp, len, infile) == NULL)
373 {
374 fprintf(outfile, "** Unexpected EOF\n");
375 goto END_OFF;
376 }
377 if (infile != stdin) fprintf(outfile, (char *)pp);
378 }
379
380 /* Terminate the pattern at the delimiter */
381
382 *pp++ = 0;
383
384 /* Look for options after final delimiter */
385
386 options = 0;
387 study_options = 0;
388 while (*pp != 0)
389 {
390 switch (*pp++)
391 {
392 case 'i': options |= PCRE_CASELESS; break;
393 case 'm': options |= PCRE_MULTILINE; break;
394 case 's': options |= PCRE_DOTALL; break;
395 case 'x': options |= PCRE_EXTENDED; break;
396 case 'A': options |= PCRE_ANCHORED; break;
397 case 'D': do_debug = 1; break;
398 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
399 case 'P': do_posix = 1; break;
400 case 'S': do_study = 1; break;
401 case 'I': study_options |= PCRE_CASELESS; break;
402 case 'X': options |= PCRE_EXTRA; break;
403 case '\n': case ' ': break;
404 default:
405 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
406 goto SKIP_DATA;
407 }
408 }
409
410 /* Handle compiing via the POSIX interface, which doesn't support the
411 timing, showing, or debugging options. */
412
413 if (posix || do_posix)
414 {
415 int rc;
416 int cflags = 0;
417 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
418 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
419 rc = regcomp(&preg, (char *)p, cflags);
420
421 /* Compilation failed; go back for another re, skipping to blank line
422 if non-interactive. */
423
424 if (rc != 0)
425 {
426 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
427 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
428 goto SKIP_DATA;
429 }
430 }
431
432 /* Handle compiling via the native interface */
433
434 else
435 {
436 if (timeit)
437 {
438 register int i;
439 clock_t time_taken;
440 clock_t start_time = clock();
441 for (i = 0; i < 4000; i++)
442 {
443 re = pcre_compile((char *)p, options, &error, &erroroffset);
444 if (re != NULL) free(re);
445 }
446 time_taken = clock() - start_time;
447 fprintf(outfile, "Compile time %.2f milliseconds\n",
448 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
449 }
450
451 re = pcre_compile((char *)p, options, &error, &erroroffset);
452
453 /* Compilation failed; go back for another re, skipping to blank line
454 if non-interactive. */
455
456 if (re == NULL)
457 {
458 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
459 SKIP_DATA:
460 if (infile != stdin)
461 {
462 for (;;)
463 {
464 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
465 goto END_OFF;
466 len = (int)strlen((char *)buffer);
467 while (len > 0 && isspace(buffer[len-1])) len--;
468 if (len == 0) break;
469 }
470 fprintf(outfile, "\n");
471 }
472 continue;
473 }
474
475 /* Compilation succeeded; print data if required */
476
477 if (showinfo || do_debug)
478 {
479 int first_char, count;
480
481 if (debug || do_debug) print_internals(re);
482
483 count = pcre_info(re, &options, &first_char);
484 if (count < 0) fprintf(outfile,
485 "Error %d while reading info\n", count);
486 else
487 {
488 fprintf(outfile, "Identifying subpattern count = %d\n", count);
489 if (options == 0) fprintf(outfile, "No options\n");
490 else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
491 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
492 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
493 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
494 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
495 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
496 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
497 ((options & PCRE_EXTRA) != 0)? " extra" : "");
498 if (first_char == -1)
499 {
500 fprintf(outfile, "First char at start or follows \\n\n");
501 }
502 else if (first_char < 0)
503 {
504 fprintf(outfile, "No first char\n");
505 }
506 else
507 {
508 if (isprint(first_char))
509 fprintf(outfile, "First char = \'%c\'\n", first_char);
510 else
511 fprintf(outfile, "First char = %d\n", first_char);
512 }
513 }
514 }
515
516 /* If /S was present, study the regexp to generate additional info to
517 help with the matching. */
518
519 if (do_study)
520 {
521 if (timeit)
522 {
523 register int i;
524 clock_t time_taken;
525 clock_t start_time = clock();
526 for (i = 0; i < 4000; i++)
527 extra = pcre_study(re, study_options, &error);
528 time_taken = clock() - start_time;
529 if (extra != NULL) free(extra);
530 fprintf(outfile, " Study time %.2f milliseconds\n",
531 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
532 }
533
534 extra = pcre_study(re, study_options, &error);
535 if (error != NULL)
536 fprintf(outfile, "Failed to study: %s\n", error);
537 else if (extra == NULL)
538 fprintf(outfile, "Study returned NULL\n");
539
540 /* This looks at internal information. A bit kludgy to do it this
541 way, but it is useful for testing. */
542
543 else if (showinfo || do_debug)
544 {
545 real_pcre_extra *xx = (real_pcre_extra *)extra;
546 if ((xx->options & PCRE_STUDY_MAPPED) == 0)
547 fprintf(outfile, "No starting character set\n");
548 else
549 {
550 int i;
551 int c = 24;
552 fprintf(outfile, "Starting character set: ");
553 for (i = 0; i < 256; i++)
554 {
555 if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
556 {
557 if (c > 75)
558 {
559 fprintf(outfile, "\n ");
560 c = 2;
561 }
562 if (isprint(i) && i != ' ')
563 {
564 fprintf(outfile, "%c ", i);
565 c += 2;
566 }
567 else
568 {
569 fprintf(outfile, "\\x%02x ", i);
570 c += 5;
571 }
572 }
573 }
574 fprintf(outfile, "\n");
575 }
576 }
577 }
578 }
579
580 /* Read data lines and test them */
581
582 for (;;)
583 {
584 unsigned char *pp;
585 int count, c;
586 int offsets[30];
587 int size_offsets = sizeof(offsets)/sizeof(int);
588
589 options = 0;
590
591 if (infile == stdin) printf(" data> ");
592 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;
593 if (infile != stdin) fprintf(outfile, (char *)buffer);
594
595 len = (int)strlen((char *)buffer);
596 while (len > 0 && isspace(buffer[len-1])) len--;
597 buffer[len] = 0;
598 if (len == 0) break;
599
600 p = buffer;
601 while (isspace(*p)) p++;
602
603 pp = dbuffer;
604 while ((c = *p++) != 0)
605 {
606 int i = 0;
607 int n = 0;
608 if (c == '\\') switch ((c = *p++))
609 {
610 case 'a': c = 7; break;
611 case 'b': c = '\b'; break;
612 case 'e': c = 27; break;
613 case 'f': c = '\f'; break;
614 case 'n': c = '\n'; break;
615 case 'r': c = '\r'; break;
616 case 't': c = '\t'; break;
617 case 'v': c = '\v'; break;
618
619 case '0': case '1': case '2': case '3':
620 case '4': case '5': case '6': case '7':
621 c -= '0';
622 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
623 c = c * 8 + *p++ - '0';
624 break;
625
626 case 'x':
627 c = 0;
628 while (i++ < 2 && isxdigit(*p))
629 {
630 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
631 p++;
632 }
633 break;
634
635 case 0: /* Allows for an empty line */
636 p--;
637 continue;
638
639 case 'A': /* Option setting */
640 options |= PCRE_ANCHORED;
641 continue;
642
643 case 'B':
644 options |= PCRE_NOTBOL;
645 continue;
646
647 case 'E':
648 options |= PCRE_DOLLAR_ENDONLY;
649 continue;
650
651 case 'I':
652 options |= PCRE_CASELESS;
653 continue;
654
655 case 'M':
656 options |= PCRE_MULTILINE;
657 continue;
658
659 case 'S':
660 options |= PCRE_DOTALL;
661 continue;
662
663 case 'O':
664 while(isdigit(*p)) n = n * 10 + *p++ - '0';
665 if (n <= (int)sizeof(offsets)/sizeof(int)) size_offsets = n;
666 continue;
667
668 case 'Z':
669 options |= PCRE_NOTEOL;
670 continue;
671 }
672 *pp++ = c;
673 }
674 *pp = 0;
675 len = pp - dbuffer;
676
677 /* Handle matching via the POSIX interface, which does not
678 support timing. */
679
680 if (posix || do_posix)
681 {
682 int rc;
683 int eflags = 0;
684 regmatch_t pmatch[30];
685 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
686 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
687
688 rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
689 pmatch, eflags);
690
691 if (rc != 0)
692 {
693 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
694 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
695 }
696 else
697 {
698 size_t i;
699 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
700 {
701 if (pmatch[i].rm_so >= 0)
702 {
703 fprintf(outfile, "%2d: ", i);
704 pchars(dbuffer + pmatch[i].rm_so,
705 pmatch[i].rm_eo - pmatch[i].rm_so);
706 fprintf(outfile, "\n");
707 }
708 }
709 }
710 }
711
712 /* Handle matching via the native interface */
713
714 else
715 {
716 if (timeit)
717 {
718 register int i;
719 clock_t time_taken;
720 clock_t start_time = clock();
721 for (i = 0; i < 4000; i++)
722 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
723 size_offsets);
724 time_taken = clock() - start_time;
725 fprintf(outfile, "Execute time %.2f milliseconds\n",
726 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
727 }
728
729 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
730 size_offsets);
731
732 if (count == 0)
733 {
734 fprintf(outfile, "Matched, but too many substrings\n");
735 count = size_offsets/2;
736 }
737
738 if (count >= 0)
739 {
740 int i;
741 count *= 2;
742 for (i = 0; i < count; i += 2)
743 {
744 if (offsets[i] < 0)
745 fprintf(outfile, "%2d: <unset>\n", i/2);
746 else
747 {
748 fprintf(outfile, "%2d: ", i/2);
749 pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
750 fprintf(outfile, "\n");
751 }
752 }
753 }
754 else
755 {
756 if (count == -1) fprintf(outfile, "No match\n");
757 else fprintf(outfile, "Error %d\n", count);
758 }
759 }
760 }
761
762 if (posix || do_posix) regfree(&preg);
763 if (re != NULL) free(re);
764 if (extra != NULL) free(extra);
765 }
766
767 END_OFF:
768 fprintf(outfile, "\n");
769 return 0;
770 }
771
772 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12