/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 13 - (show annotations) (download)
Sat Feb 24 21:38:21 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 19543 byte(s)
Load pcre-1.05 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10
11 /* Use the internal info for displaying the results of pcre_study(). */
12
13 #include "internal.h"
14 #include "pcreposix.h"
15
16 #ifndef CLOCKS_PER_SEC
17 #ifdef CLK_TCK
18 #define CLOCKS_PER_SEC CLK_TCK
19 #else
20 #define CLOCKS_PER_SEC 100
21 #endif
22 #endif
23
24
25 static FILE *outfile;
26 static int log_store = 0;
27
28
29
30 /* Debugging function to print the internal form of the regex. This is the same
31 code as contained in pcre.c under the DEBUG macro. */
32
33 static const char *OP_names[] = {
34 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
35 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
36 "not",
37 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
40 "*", "*?", "+", "+?", "?", "??", "{", "{",
41 "class", "negclass", "Ref",
42 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
43 "Brazero", "Braminzero", "Bra"
44 };
45
46
47 static void print_internals(pcre *re)
48 {
49 unsigned char *code = ((real_pcre *)re)->code;
50
51 printf("------------------------------------------------------------------\n");
52
53 for(;;)
54 {
55 int c;
56 int charlength;
57
58 printf("%3d ", code - ((real_pcre *)re)->code);
59
60 if (*code >= OP_BRA)
61 {
62 printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
63 code += 2;
64 }
65
66 else switch(*code)
67 {
68 case OP_END:
69 printf(" %s\n", OP_names[*code]);
70 printf("------------------------------------------------------------------\n");
71 return;
72
73 case OP_CHARS:
74 charlength = *(++code);
75 printf("%3d ", charlength);
76 while (charlength-- > 0)
77 if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
78 break;
79
80 case OP_KETRMAX:
81 case OP_KETRMIN:
82 case OP_ALT:
83 case OP_KET:
84 case OP_ASSERT:
85 case OP_ASSERT_NOT:
86 case OP_ONCE:
87 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
88 code += 2;
89 break;
90
91 case OP_STAR:
92 case OP_MINSTAR:
93 case OP_PLUS:
94 case OP_MINPLUS:
95 case OP_QUERY:
96 case OP_MINQUERY:
97 case OP_TYPESTAR:
98 case OP_TYPEMINSTAR:
99 case OP_TYPEPLUS:
100 case OP_TYPEMINPLUS:
101 case OP_TYPEQUERY:
102 case OP_TYPEMINQUERY:
103 if (*code >= OP_TYPESTAR)
104 printf(" %s", OP_names[code[1]]);
105 else if (isprint(c = code[1])) printf(" %c", c);
106 else printf(" \\x%02x", c);
107 printf("%s", OP_names[*code++]);
108 break;
109
110 case OP_EXACT:
111 case OP_UPTO:
112 case OP_MINUPTO:
113 if (isprint(c = code[3])) printf(" %c{", c);
114 else printf(" \\x%02x{", c);
115 if (*code != OP_EXACT) printf(",");
116 printf("%d}", (code[1] << 8) + code[2]);
117 if (*code == OP_MINUPTO) printf("?");
118 code += 3;
119 break;
120
121 case OP_TYPEEXACT:
122 case OP_TYPEUPTO:
123 case OP_TYPEMINUPTO:
124 printf(" %s{", OP_names[code[3]]);
125 if (*code != OP_TYPEEXACT) printf("0,");
126 printf("%d}", (code[1] << 8) + code[2]);
127 if (*code == OP_TYPEMINUPTO) printf("?");
128 code += 3;
129 break;
130
131 case OP_NOT:
132 if (isprint(c = *(++code))) printf(" [^%c]", c);
133 else printf(" [^\\x%02x]", c);
134 break;
135
136 case OP_NOTSTAR:
137 case OP_NOTMINSTAR:
138 case OP_NOTPLUS:
139 case OP_NOTMINPLUS:
140 case OP_NOTQUERY:
141 case OP_NOTMINQUERY:
142 if (isprint(c = code[1])) printf(" [^%c]", c);
143 else printf(" [^\\x%02x]", c);
144 printf("%s", OP_names[*code++]);
145 break;
146
147 case OP_NOTEXACT:
148 case OP_NOTUPTO:
149 case OP_NOTMINUPTO:
150 if (isprint(c = code[3])) printf(" [^%c]{", c);
151 else printf(" [^\\x%02x]{", c);
152 if (*code != OP_NOTEXACT) printf(",");
153 printf("%d}", (code[1] << 8) + code[2]);
154 if (*code == OP_NOTMINUPTO) printf("?");
155 code += 3;
156 break;
157
158 case OP_REF:
159 printf(" \\%d", *(++code));
160 code++;
161 goto CLASS_REF_REPEAT;
162
163 case OP_CLASS:
164 case OP_NEGCLASS:
165 {
166 int i, min, max;
167 if (*code++ == OP_CLASS) printf(" [");
168 else printf(" ^[");
169
170 for (i = 0; i < 256; i++)
171 {
172 if ((code[i/8] & (1 << (i&7))) != 0)
173 {
174 int j;
175 for (j = i+1; j < 256; j++)
176 if ((code[j/8] & (1 << (j&7))) == 0) break;
177 if (i == '-' || i == ']') printf("\\");
178 if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
179 if (--j > i)
180 {
181 printf("-");
182 if (j == '-' || j == ']') printf("\\");
183 if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
184 }
185 i = j;
186 }
187 }
188 printf("]");
189 code += 32;
190
191 CLASS_REF_REPEAT:
192
193 switch(*code)
194 {
195 case OP_CRSTAR:
196 case OP_CRMINSTAR:
197 case OP_CRPLUS:
198 case OP_CRMINPLUS:
199 case OP_CRQUERY:
200 case OP_CRMINQUERY:
201 printf("%s", OP_names[*code]);
202 break;
203
204 case OP_CRRANGE:
205 case OP_CRMINRANGE:
206 min = (code[1] << 8) + code[2];
207 max = (code[3] << 8) + code[4];
208 if (max == 0) printf("{%d,}", min);
209 else printf("{%d,%d}", min, max);
210 if (*code == OP_CRMINRANGE) printf("?");
211 code += 4;
212 break;
213
214 default:
215 code--;
216 }
217 }
218 break;
219
220 /* Anything else is just a one-node item */
221
222 default:
223 printf(" %s", OP_names[*code]);
224 break;
225 }
226
227 code++;
228 printf("\n");
229 }
230 }
231
232
233
234 /* Character string printing function. */
235
236 static void pchars(unsigned char *p, int length)
237 {
238 int c;
239 while (length-- > 0)
240 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
241 else fprintf(outfile, "\\x%02x", c);
242 }
243
244
245
246 /* Alternative malloc function, to test functionality and show the size of the
247 compiled re. */
248
249 static void *new_malloc(size_t size)
250 {
251 if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
252 return malloc(size);
253 }
254
255
256
257 /* Read lines from named file or stdin and write to named file or stdout; lines
258 consist of a regular expression, in delimiters and optionally followed by
259 options, followed by a set of test data, terminated by an empty line. */
260
261 int main(int argc, char **argv)
262 {
263 FILE *infile = stdin;
264 int options = 0;
265 int study_options = 0;
266 int op = 1;
267 int timeit = 0;
268 int showinfo = 0;
269 int posix = 0;
270 int debug = 0;
271 int done = 0;
272 unsigned char buffer[30000];
273 unsigned char dbuffer[1024];
274
275 /* Static so that new_malloc can use it. */
276
277 outfile = stdout;
278
279 /* Scan options */
280
281 while (argc > 1 && argv[op][0] == '-')
282 {
283 if (strcmp(argv[op], "-s") == 0) log_store = 1;
284 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
285 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
286 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
287 else if (strcmp(argv[op], "-p") == 0) posix = 1;
288 else
289 {
290 printf("*** Unknown option %s\n", argv[op]);
291 return 1;
292 }
293 op++;
294 argc--;
295 }
296
297 /* Sort out the input and output files */
298
299 if (argc > 1)
300 {
301 infile = fopen(argv[op], "r");
302 if (infile == NULL)
303 {
304 printf("** Failed to open %s\n", argv[op]);
305 return 1;
306 }
307 }
308
309 if (argc > 2)
310 {
311 outfile = fopen(argv[op+1], "w");
312 if (outfile == NULL)
313 {
314 printf("** Failed to open %s\n", argv[op+1]);
315 return 1;
316 }
317 }
318
319 /* Set alternative malloc function */
320
321 pcre_malloc = new_malloc;
322
323 /* Heading line, then prompt for first re if stdin */
324
325 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
326 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
327
328 /* Main loop */
329
330 while (!done)
331 {
332 pcre *re = NULL;
333 pcre_extra *extra = NULL;
334 regex_t preg;
335 const char *error;
336 unsigned char *p, *pp;
337 int do_study = 0;
338 int do_debug = 0;
339 int do_posix = 0;
340 int erroroffset, len, delimiter;
341
342 if (infile == stdin) printf(" re> ");
343 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
344 if (infile != stdin) fprintf(outfile, (char *)buffer);
345
346 p = buffer;
347 while (isspace(*p)) p++;
348 if (*p == 0) continue;
349
350 /* Get the delimiter and seek the end of the pattern; if is isn't
351 complete, read more. */
352
353 delimiter = *p++;
354
355 if (isalnum(delimiter))
356 {
357 fprintf(outfile, "** Delimiter must not be alphameric\n");
358 goto SKIP_DATA;
359 }
360
361 pp = p;
362
363 for(;;)
364 {
365 while (*pp != 0 && *pp != delimiter) pp++;
366 if (*pp != 0) break;
367
368 len = sizeof(buffer) - (pp - buffer);
369 if (len < 256)
370 {
371 fprintf(outfile, "** Expression too long - missing delimiter?\n");
372 goto SKIP_DATA;
373 }
374
375 if (infile == stdin) printf(" > ");
376 if (fgets((char *)pp, len, infile) == NULL)
377 {
378 fprintf(outfile, "** Unexpected EOF\n");
379 done = 1;
380 goto CONTINUE;
381 }
382 if (infile != stdin) fprintf(outfile, (char *)pp);
383 }
384
385 /* Terminate the pattern at the delimiter */
386
387 *pp++ = 0;
388
389 /* Look for options after final delimiter */
390
391 options = 0;
392 study_options = 0;
393 while (*pp != 0)
394 {
395 switch (*pp++)
396 {
397 case 'i': options |= PCRE_CASELESS; break;
398 case 'm': options |= PCRE_MULTILINE; break;
399 case 's': options |= PCRE_DOTALL; break;
400 case 'x': options |= PCRE_EXTENDED; break;
401 case 'A': options |= PCRE_ANCHORED; break;
402 case 'D': do_debug = 1; break;
403 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
404 case 'P': do_posix = 1; break;
405 case 'S': do_study = 1; break;
406 case 'I': study_options |= PCRE_CASELESS; break;
407 case 'X': options |= PCRE_EXTRA; break;
408 case '\n': case ' ': break;
409 default:
410 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
411 goto SKIP_DATA;
412 }
413 }
414
415 /* Handle compiling via the POSIX interface, which doesn't support the
416 timing, showing, or debugging options. */
417
418 if (posix || do_posix)
419 {
420 int rc;
421 int cflags = 0;
422 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
423 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
424 rc = regcomp(&preg, (char *)p, cflags);
425
426 /* Compilation failed; go back for another re, skipping to blank line
427 if non-interactive. */
428
429 if (rc != 0)
430 {
431 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
432 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
433 goto SKIP_DATA;
434 }
435 }
436
437 /* Handle compiling via the native interface */
438
439 else
440 {
441 if (timeit)
442 {
443 register int i;
444 clock_t time_taken;
445 clock_t start_time = clock();
446 for (i = 0; i < 4000; i++)
447 {
448 re = pcre_compile((char *)p, options, &error, &erroroffset);
449 if (re != NULL) free(re);
450 }
451 time_taken = clock() - start_time;
452 fprintf(outfile, "Compile time %.2f milliseconds\n",
453 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
454 }
455
456 re = pcre_compile((char *)p, options, &error, &erroroffset);
457
458 /* Compilation failed; go back for another re, skipping to blank line
459 if non-interactive. */
460
461 if (re == NULL)
462 {
463 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
464 SKIP_DATA:
465 if (infile != stdin)
466 {
467 for (;;)
468 {
469 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
470 {
471 done = 1;
472 goto CONTINUE;
473 }
474 len = (int)strlen((char *)buffer);
475 while (len > 0 && isspace(buffer[len-1])) len--;
476 if (len == 0) break;
477 }
478 fprintf(outfile, "\n");
479 }
480 continue;
481 }
482
483 /* Compilation succeeded; print data if required */
484
485 if (showinfo || do_debug)
486 {
487 int first_char, count;
488
489 if (debug || do_debug) print_internals(re);
490
491 count = pcre_info(re, &options, &first_char);
492 if (count < 0) fprintf(outfile,
493 "Error %d while reading info\n", count);
494 else
495 {
496 fprintf(outfile, "Identifying subpattern count = %d\n", count);
497 if (options == 0) fprintf(outfile, "No options\n");
498 else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
499 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
500 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
501 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
502 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
503 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
504 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
505 ((options & PCRE_EXTRA) != 0)? " extra" : "");
506 if (first_char == -1)
507 {
508 fprintf(outfile, "First char at start or follows \\n\n");
509 }
510 else if (first_char < 0)
511 {
512 fprintf(outfile, "No first char\n");
513 }
514 else
515 {
516 if (isprint(first_char))
517 fprintf(outfile, "First char = \'%c\'\n", first_char);
518 else
519 fprintf(outfile, "First char = %d\n", first_char);
520 }
521 }
522 }
523
524 /* If /S was present, study the regexp to generate additional info to
525 help with the matching. */
526
527 if (do_study)
528 {
529 if (timeit)
530 {
531 register int i;
532 clock_t time_taken;
533 clock_t start_time = clock();
534 for (i = 0; i < 4000; i++)
535 extra = pcre_study(re, study_options, &error);
536 time_taken = clock() - start_time;
537 if (extra != NULL) free(extra);
538 fprintf(outfile, " Study time %.2f milliseconds\n",
539 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
540 }
541
542 extra = pcre_study(re, study_options, &error);
543 if (error != NULL)
544 fprintf(outfile, "Failed to study: %s\n", error);
545 else if (extra == NULL)
546 fprintf(outfile, "Study returned NULL\n");
547
548 /* This looks at internal information. A bit kludgy to do it this
549 way, but it is useful for testing. */
550
551 else if (showinfo || do_debug)
552 {
553 real_pcre_extra *xx = (real_pcre_extra *)extra;
554 if ((xx->options & PCRE_STUDY_MAPPED) == 0)
555 fprintf(outfile, "No starting character set\n");
556 else
557 {
558 int i;
559 int c = 24;
560 fprintf(outfile, "Starting character set: ");
561 for (i = 0; i < 256; i++)
562 {
563 if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
564 {
565 if (c > 75)
566 {
567 fprintf(outfile, "\n ");
568 c = 2;
569 }
570 if (isprint(i) && i != ' ')
571 {
572 fprintf(outfile, "%c ", i);
573 c += 2;
574 }
575 else
576 {
577 fprintf(outfile, "\\x%02x ", i);
578 c += 5;
579 }
580 }
581 }
582 fprintf(outfile, "\n");
583 }
584 }
585 }
586 }
587
588 /* Read data lines and test them */
589
590 for (;;)
591 {
592 unsigned char *q;
593 int count, c;
594 int offsets[30];
595 int size_offsets = sizeof(offsets)/sizeof(int);
596
597 options = 0;
598
599 if (infile == stdin) printf(" data> ");
600 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
601 {
602 done = 1;
603 goto CONTINUE;
604 }
605 if (infile != stdin) fprintf(outfile, (char *)buffer);
606
607 len = (int)strlen((char *)buffer);
608 while (len > 0 && isspace(buffer[len-1])) len--;
609 buffer[len] = 0;
610 if (len == 0) break;
611
612 p = buffer;
613 while (isspace(*p)) p++;
614
615 q = dbuffer;
616 while ((c = *p++) != 0)
617 {
618 int i = 0;
619 int n = 0;
620 if (c == '\\') switch ((c = *p++))
621 {
622 case 'a': c = 7; break;
623 case 'b': c = '\b'; break;
624 case 'e': c = 27; break;
625 case 'f': c = '\f'; break;
626 case 'n': c = '\n'; break;
627 case 'r': c = '\r'; break;
628 case 't': c = '\t'; break;
629 case 'v': c = '\v'; break;
630
631 case '0': case '1': case '2': case '3':
632 case '4': case '5': case '6': case '7':
633 c -= '0';
634 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
635 c = c * 8 + *p++ - '0';
636 break;
637
638 case 'x':
639 c = 0;
640 while (i++ < 2 && isxdigit(*p))
641 {
642 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
643 p++;
644 }
645 break;
646
647 case 0: /* Allows for an empty line */
648 p--;
649 continue;
650
651 case 'A': /* Option setting */
652 options |= PCRE_ANCHORED;
653 continue;
654
655 case 'B':
656 options |= PCRE_NOTBOL;
657 continue;
658
659 case 'E':
660 options |= PCRE_DOLLAR_ENDONLY;
661 continue;
662
663 case 'I':
664 options |= PCRE_CASELESS;
665 continue;
666
667 case 'M':
668 options |= PCRE_MULTILINE;
669 continue;
670
671 case 'S':
672 options |= PCRE_DOTALL;
673 continue;
674
675 case 'O':
676 while(isdigit(*p)) n = n * 10 + *p++ - '0';
677 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
678 continue;
679
680 case 'Z':
681 options |= PCRE_NOTEOL;
682 continue;
683 }
684 *q++ = c;
685 }
686 *q = 0;
687 len = q - dbuffer;
688
689 /* Handle matching via the POSIX interface, which does not
690 support timing. */
691
692 if (posix || do_posix)
693 {
694 int rc;
695 int eflags = 0;
696 regmatch_t pmatch[30];
697 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
698 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
699
700 rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
701 pmatch, eflags);
702
703 if (rc != 0)
704 {
705 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
706 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
707 }
708 else
709 {
710 size_t i;
711 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
712 {
713 if (pmatch[i].rm_so >= 0)
714 {
715 fprintf(outfile, "%2d: ", i);
716 pchars(dbuffer + pmatch[i].rm_so,
717 pmatch[i].rm_eo - pmatch[i].rm_so);
718 fprintf(outfile, "\n");
719 }
720 }
721 }
722 }
723
724 /* Handle matching via the native interface */
725
726 else
727 {
728 if (timeit)
729 {
730 register int i;
731 clock_t time_taken;
732 clock_t start_time = clock();
733 for (i = 0; i < 4000; i++)
734 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
735 size_offsets);
736 time_taken = clock() - start_time;
737 fprintf(outfile, "Execute time %.2f milliseconds\n",
738 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
739 }
740
741 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
742 size_offsets);
743
744 if (count == 0)
745 {
746 fprintf(outfile, "Matched, but too many substrings\n");
747 count = size_offsets/2;
748 }
749
750 if (count >= 0)
751 {
752 int i;
753 count *= 2;
754 for (i = 0; i < count; i += 2)
755 {
756 if (offsets[i] < 0)
757 fprintf(outfile, "%2d: <unset>\n", i/2);
758 else
759 {
760 fprintf(outfile, "%2d: ", i/2);
761 pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
762 fprintf(outfile, "\n");
763 }
764 }
765 }
766 else
767 {
768 if (count == -1) fprintf(outfile, "No match\n");
769 else fprintf(outfile, "Error %d\n", count);
770 }
771 }
772 }
773
774 CONTINUE:
775 if (posix || do_posix) regfree(&preg);
776 if (re != NULL) free(re);
777 if (extra != NULL) free(extra);
778 }
779
780 fprintf(outfile, "\n");
781 return 0;
782 }
783
784 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12