/[pcre]/code/tags/pcre-1.09/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-1.09/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 22 - (show annotations) (download)
Sat Feb 24 21:38:39 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 19658 byte(s)
Tag code/trunk as code/tags/pcre-1.09.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10
11 /* Use the internal info for displaying the results of pcre_study(). */
12
13 #include "internal.h"
14 #include "pcreposix.h"
15
16 #ifndef CLOCKS_PER_SEC
17 #ifdef CLK_TCK
18 #define CLOCKS_PER_SEC CLK_TCK
19 #else
20 #define CLOCKS_PER_SEC 100
21 #endif
22 #endif
23
24
25 static FILE *outfile;
26 static int log_store = 0;
27
28
29
30 /* Debugging function to print the internal form of the regex. This is the same
31 code as contained in pcre.c under the DEBUG macro. */
32
33 static const char *OP_names[] = {
34 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
35 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
36 "not",
37 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
40 "*", "*?", "+", "+?", "?", "??", "{", "{",
41 "class", "negclass", "Ref",
42 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
43 "Brazero", "Braminzero", "Bra"
44 };
45
46
47 static void print_internals(pcre *re)
48 {
49 unsigned char *code = ((real_pcre *)re)->code;
50
51 printf("------------------------------------------------------------------\n");
52
53 for(;;)
54 {
55 int c;
56 int charlength;
57
58 printf("%3d ", code - ((real_pcre *)re)->code);
59
60 if (*code >= OP_BRA)
61 {
62 printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
63 code += 2;
64 }
65
66 else switch(*code)
67 {
68 case OP_END:
69 printf(" %s\n", OP_names[*code]);
70 printf("------------------------------------------------------------------\n");
71 return;
72
73 case OP_CHARS:
74 charlength = *(++code);
75 printf("%3d ", charlength);
76 while (charlength-- > 0)
77 if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
78 break;
79
80 case OP_KETRMAX:
81 case OP_KETRMIN:
82 case OP_ALT:
83 case OP_KET:
84 case OP_ASSERT:
85 case OP_ASSERT_NOT:
86 case OP_ONCE:
87 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
88 code += 2;
89 break;
90
91 case OP_STAR:
92 case OP_MINSTAR:
93 case OP_PLUS:
94 case OP_MINPLUS:
95 case OP_QUERY:
96 case OP_MINQUERY:
97 case OP_TYPESTAR:
98 case OP_TYPEMINSTAR:
99 case OP_TYPEPLUS:
100 case OP_TYPEMINPLUS:
101 case OP_TYPEQUERY:
102 case OP_TYPEMINQUERY:
103 if (*code >= OP_TYPESTAR)
104 printf(" %s", OP_names[code[1]]);
105 else if (isprint(c = code[1])) printf(" %c", c);
106 else printf(" \\x%02x", c);
107 printf("%s", OP_names[*code++]);
108 break;
109
110 case OP_EXACT:
111 case OP_UPTO:
112 case OP_MINUPTO:
113 if (isprint(c = code[3])) printf(" %c{", c);
114 else printf(" \\x%02x{", c);
115 if (*code != OP_EXACT) printf(",");
116 printf("%d}", (code[1] << 8) + code[2]);
117 if (*code == OP_MINUPTO) printf("?");
118 code += 3;
119 break;
120
121 case OP_TYPEEXACT:
122 case OP_TYPEUPTO:
123 case OP_TYPEMINUPTO:
124 printf(" %s{", OP_names[code[3]]);
125 if (*code != OP_TYPEEXACT) printf("0,");
126 printf("%d}", (code[1] << 8) + code[2]);
127 if (*code == OP_TYPEMINUPTO) printf("?");
128 code += 3;
129 break;
130
131 case OP_NOT:
132 if (isprint(c = *(++code))) printf(" [^%c]", c);
133 else printf(" [^\\x%02x]", c);
134 break;
135
136 case OP_NOTSTAR:
137 case OP_NOTMINSTAR:
138 case OP_NOTPLUS:
139 case OP_NOTMINPLUS:
140 case OP_NOTQUERY:
141 case OP_NOTMINQUERY:
142 if (isprint(c = code[1])) printf(" [^%c]", c);
143 else printf(" [^\\x%02x]", c);
144 printf("%s", OP_names[*code++]);
145 break;
146
147 case OP_NOTEXACT:
148 case OP_NOTUPTO:
149 case OP_NOTMINUPTO:
150 if (isprint(c = code[3])) printf(" [^%c]{", c);
151 else printf(" [^\\x%02x]{", c);
152 if (*code != OP_NOTEXACT) printf(",");
153 printf("%d}", (code[1] << 8) + code[2]);
154 if (*code == OP_NOTMINUPTO) printf("?");
155 code += 3;
156 break;
157
158 case OP_REF:
159 printf(" \\%d", *(++code));
160 code++;
161 goto CLASS_REF_REPEAT;
162
163 case OP_CLASS:
164 case OP_NEGCLASS:
165 {
166 int i, min, max;
167 if (*code++ == OP_CLASS) printf(" [");
168 else printf(" ^[");
169
170 for (i = 0; i < 256; i++)
171 {
172 if ((code[i/8] & (1 << (i&7))) != 0)
173 {
174 int j;
175 for (j = i+1; j < 256; j++)
176 if ((code[j/8] & (1 << (j&7))) == 0) break;
177 if (i == '-' || i == ']') printf("\\");
178 if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
179 if (--j > i)
180 {
181 printf("-");
182 if (j == '-' || j == ']') printf("\\");
183 if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
184 }
185 i = j;
186 }
187 }
188 printf("]");
189 code += 32;
190
191 CLASS_REF_REPEAT:
192
193 switch(*code)
194 {
195 case OP_CRSTAR:
196 case OP_CRMINSTAR:
197 case OP_CRPLUS:
198 case OP_CRMINPLUS:
199 case OP_CRQUERY:
200 case OP_CRMINQUERY:
201 printf("%s", OP_names[*code]);
202 break;
203
204 case OP_CRRANGE:
205 case OP_CRMINRANGE:
206 min = (code[1] << 8) + code[2];
207 max = (code[3] << 8) + code[4];
208 if (max == 0) printf("{%d,}", min);
209 else printf("{%d,%d}", min, max);
210 if (*code == OP_CRMINRANGE) printf("?");
211 code += 4;
212 break;
213
214 default:
215 code--;
216 }
217 }
218 break;
219
220 /* Anything else is just a one-node item */
221
222 default:
223 printf(" %s", OP_names[*code]);
224 break;
225 }
226
227 code++;
228 printf("\n");
229 }
230 }
231
232
233
234 /* Character string printing function. */
235
236 static void pchars(unsigned char *p, int length)
237 {
238 int c;
239 while (length-- > 0)
240 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
241 else fprintf(outfile, "\\x%02x", c);
242 }
243
244
245
246 /* Alternative malloc function, to test functionality and show the size of the
247 compiled re. */
248
249 static void *new_malloc(size_t size)
250 {
251 if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
252 return malloc(size);
253 }
254
255
256
257 /* Read lines from named file or stdin and write to named file or stdout; lines
258 consist of a regular expression, in delimiters and optionally followed by
259 options, followed by a set of test data, terminated by an empty line. */
260
261 int main(int argc, char **argv)
262 {
263 FILE *infile = stdin;
264 int options = 0;
265 int study_options = 0;
266 int op = 1;
267 int timeit = 0;
268 int showinfo = 0;
269 int posix = 0;
270 int debug = 0;
271 int done = 0;
272 unsigned char buffer[30000];
273 unsigned char dbuffer[1024];
274
275 /* Static so that new_malloc can use it. */
276
277 outfile = stdout;
278
279 /* Scan options */
280
281 while (argc > 1 && argv[op][0] == '-')
282 {
283 if (strcmp(argv[op], "-s") == 0) log_store = 1;
284 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
285 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
286 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
287 else if (strcmp(argv[op], "-p") == 0) posix = 1;
288 else
289 {
290 printf("*** Unknown option %s\n", argv[op]);
291 return 1;
292 }
293 op++;
294 argc--;
295 }
296
297 /* Sort out the input and output files */
298
299 if (argc > 1)
300 {
301 infile = fopen(argv[op], "r");
302 if (infile == NULL)
303 {
304 printf("** Failed to open %s\n", argv[op]);
305 return 1;
306 }
307 }
308
309 if (argc > 2)
310 {
311 outfile = fopen(argv[op+1], "w");
312 if (outfile == NULL)
313 {
314 printf("** Failed to open %s\n", argv[op+1]);
315 return 1;
316 }
317 }
318
319 /* Set alternative malloc function */
320
321 pcre_malloc = new_malloc;
322
323 /* Heading line, then prompt for first re if stdin */
324
325 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
326 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
327
328 /* Main loop */
329
330 while (!done)
331 {
332 pcre *re = NULL;
333 pcre_extra *extra = NULL;
334 regex_t preg;
335 const char *error;
336 unsigned char *p, *pp;
337 int do_study = 0;
338 int do_debug = 0;
339 int do_posix = 0;
340 int erroroffset, len, delimiter;
341
342 if (infile == stdin) printf(" re> ");
343 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
344 if (infile != stdin) fprintf(outfile, (char *)buffer);
345
346 p = buffer;
347 while (isspace(*p)) p++;
348 if (*p == 0) continue;
349
350 /* Get the delimiter and seek the end of the pattern; if is isn't
351 complete, read more. */
352
353 delimiter = *p++;
354
355 if (isalnum(delimiter))
356 {
357 fprintf(outfile, "** Delimiter must not be alphameric\n");
358 goto SKIP_DATA;
359 }
360
361 pp = p;
362
363 for(;;)
364 {
365 while (*pp != 0 && *pp != delimiter) pp++;
366 if (*pp != 0) break;
367
368 len = sizeof(buffer) - (pp - buffer);
369 if (len < 256)
370 {
371 fprintf(outfile, "** Expression too long - missing delimiter?\n");
372 goto SKIP_DATA;
373 }
374
375 if (infile == stdin) printf(" > ");
376 if (fgets((char *)pp, len, infile) == NULL)
377 {
378 fprintf(outfile, "** Unexpected EOF\n");
379 done = 1;
380 goto CONTINUE;
381 }
382 if (infile != stdin) fprintf(outfile, (char *)pp);
383 }
384
385 /* Terminate the pattern at the delimiter */
386
387 *pp++ = 0;
388
389 /* Look for options after final delimiter */
390
391 options = 0;
392 study_options = 0;
393 while (*pp != 0)
394 {
395 switch (*pp++)
396 {
397 case 'i': options |= PCRE_CASELESS; break;
398 case 'm': options |= PCRE_MULTILINE; break;
399 case 's': options |= PCRE_DOTALL; break;
400 case 'x': options |= PCRE_EXTENDED; break;
401 case 'A': options |= PCRE_ANCHORED; break;
402 case 'D': do_debug = 1; break;
403 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
404 case 'P': do_posix = 1; break;
405 case 'S': do_study = 1; break;
406 case 'I': study_options |= PCRE_CASELESS; break;
407 case 'U': options |= PCRE_UNGREEDY; break;
408 case 'X': options |= PCRE_EXTRA; break;
409 case '\n': case ' ': break;
410 default:
411 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
412 goto SKIP_DATA;
413 }
414 }
415
416 /* Handle compiling via the POSIX interface, which doesn't support the
417 timing, showing, or debugging options. */
418
419 if (posix || do_posix)
420 {
421 int rc;
422 int cflags = 0;
423 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
424 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
425 rc = regcomp(&preg, (char *)p, cflags);
426
427 /* Compilation failed; go back for another re, skipping to blank line
428 if non-interactive. */
429
430 if (rc != 0)
431 {
432 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
433 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
434 goto SKIP_DATA;
435 }
436 }
437
438 /* Handle compiling via the native interface */
439
440 else
441 {
442 if (timeit)
443 {
444 register int i;
445 clock_t time_taken;
446 clock_t start_time = clock();
447 for (i = 0; i < 4000; i++)
448 {
449 re = pcre_compile((char *)p, options, &error, &erroroffset);
450 if (re != NULL) free(re);
451 }
452 time_taken = clock() - start_time;
453 fprintf(outfile, "Compile time %.2f milliseconds\n",
454 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
455 }
456
457 re = pcre_compile((char *)p, options, &error, &erroroffset);
458
459 /* Compilation failed; go back for another re, skipping to blank line
460 if non-interactive. */
461
462 if (re == NULL)
463 {
464 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
465 SKIP_DATA:
466 if (infile != stdin)
467 {
468 for (;;)
469 {
470 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
471 {
472 done = 1;
473 goto CONTINUE;
474 }
475 len = (int)strlen((char *)buffer);
476 while (len > 0 && isspace(buffer[len-1])) len--;
477 if (len == 0) break;
478 }
479 fprintf(outfile, "\n");
480 }
481 continue;
482 }
483
484 /* Compilation succeeded; print data if required */
485
486 if (showinfo || do_debug)
487 {
488 int first_char, count;
489
490 if (debug || do_debug) print_internals(re);
491
492 count = pcre_info(re, &options, &first_char);
493 if (count < 0) fprintf(outfile,
494 "Error %d while reading info\n", count);
495 else
496 {
497 fprintf(outfile, "Identifying subpattern count = %d\n", count);
498 if (options == 0) fprintf(outfile, "No options\n");
499 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
500 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
501 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
502 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
503 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
504 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
505 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
506 ((options & PCRE_EXTRA) != 0)? " extra" : "",
507 ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
508 if (first_char == -1)
509 {
510 fprintf(outfile, "First char at start or follows \\n\n");
511 }
512 else if (first_char < 0)
513 {
514 fprintf(outfile, "No first char\n");
515 }
516 else
517 {
518 if (isprint(first_char))
519 fprintf(outfile, "First char = \'%c\'\n", first_char);
520 else
521 fprintf(outfile, "First char = %d\n", first_char);
522 }
523 }
524 }
525
526 /* If /S was present, study the regexp to generate additional info to
527 help with the matching. */
528
529 if (do_study)
530 {
531 if (timeit)
532 {
533 register int i;
534 clock_t time_taken;
535 clock_t start_time = clock();
536 for (i = 0; i < 4000; i++)
537 extra = pcre_study(re, study_options, &error);
538 time_taken = clock() - start_time;
539 if (extra != NULL) free(extra);
540 fprintf(outfile, " Study time %.2f milliseconds\n",
541 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
542 }
543
544 extra = pcre_study(re, study_options, &error);
545 if (error != NULL)
546 fprintf(outfile, "Failed to study: %s\n", error);
547 else if (extra == NULL)
548 fprintf(outfile, "Study returned NULL\n");
549
550 /* This looks at internal information. A bit kludgy to do it this
551 way, but it is useful for testing. */
552
553 else if (showinfo || do_debug)
554 {
555 real_pcre_extra *xx = (real_pcre_extra *)extra;
556 if ((xx->options & PCRE_STUDY_MAPPED) == 0)
557 fprintf(outfile, "No starting character set\n");
558 else
559 {
560 int i;
561 int c = 24;
562 fprintf(outfile, "Starting character set: ");
563 for (i = 0; i < 256; i++)
564 {
565 if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
566 {
567 if (c > 75)
568 {
569 fprintf(outfile, "\n ");
570 c = 2;
571 }
572 if (isprint(i) && i != ' ')
573 {
574 fprintf(outfile, "%c ", i);
575 c += 2;
576 }
577 else
578 {
579 fprintf(outfile, "\\x%02x ", i);
580 c += 5;
581 }
582 }
583 }
584 fprintf(outfile, "\n");
585 }
586 }
587 }
588 }
589
590 /* Read data lines and test them */
591
592 for (;;)
593 {
594 unsigned char *q;
595 int count, c;
596 int offsets[30];
597 int size_offsets = sizeof(offsets)/sizeof(int);
598
599 options = 0;
600
601 if (infile == stdin) printf(" data> ");
602 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
603 {
604 done = 1;
605 goto CONTINUE;
606 }
607 if (infile != stdin) fprintf(outfile, (char *)buffer);
608
609 len = (int)strlen((char *)buffer);
610 while (len > 0 && isspace(buffer[len-1])) len--;
611 buffer[len] = 0;
612 if (len == 0) break;
613
614 p = buffer;
615 while (isspace(*p)) p++;
616
617 q = dbuffer;
618 while ((c = *p++) != 0)
619 {
620 int i = 0;
621 int n = 0;
622 if (c == '\\') switch ((c = *p++))
623 {
624 case 'a': c = 7; break;
625 case 'b': c = '\b'; break;
626 case 'e': c = 27; break;
627 case 'f': c = '\f'; break;
628 case 'n': c = '\n'; break;
629 case 'r': c = '\r'; break;
630 case 't': c = '\t'; break;
631 case 'v': c = '\v'; break;
632
633 case '0': case '1': case '2': case '3':
634 case '4': case '5': case '6': case '7':
635 c -= '0';
636 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
637 c = c * 8 + *p++ - '0';
638 break;
639
640 case 'x':
641 c = 0;
642 while (i++ < 2 && isxdigit(*p))
643 {
644 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
645 p++;
646 }
647 break;
648
649 case 0: /* Allows for an empty line */
650 p--;
651 continue;
652
653 case 'A': /* Option setting */
654 options |= PCRE_ANCHORED;
655 continue;
656
657 case 'B':
658 options |= PCRE_NOTBOL;
659 continue;
660
661 case 'E':
662 options |= PCRE_DOLLAR_ENDONLY;
663 continue;
664
665 case 'I':
666 options |= PCRE_CASELESS;
667 continue;
668
669 case 'M':
670 options |= PCRE_MULTILINE;
671 continue;
672
673 case 'S':
674 options |= PCRE_DOTALL;
675 continue;
676
677 case 'O':
678 while(isdigit(*p)) n = n * 10 + *p++ - '0';
679 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
680 continue;
681
682 case 'Z':
683 options |= PCRE_NOTEOL;
684 continue;
685 }
686 *q++ = c;
687 }
688 *q = 0;
689 len = q - dbuffer;
690
691 /* Handle matching via the POSIX interface, which does not
692 support timing. */
693
694 if (posix || do_posix)
695 {
696 int rc;
697 int eflags = 0;
698 regmatch_t pmatch[30];
699 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
700 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
701
702 rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
703 pmatch, eflags);
704
705 if (rc != 0)
706 {
707 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
708 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
709 }
710 else
711 {
712 size_t i;
713 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
714 {
715 if (pmatch[i].rm_so >= 0)
716 {
717 fprintf(outfile, "%2d: ", i);
718 pchars(dbuffer + pmatch[i].rm_so,
719 pmatch[i].rm_eo - pmatch[i].rm_so);
720 fprintf(outfile, "\n");
721 }
722 }
723 }
724 }
725
726 /* Handle matching via the native interface */
727
728 else
729 {
730 if (timeit)
731 {
732 register int i;
733 clock_t time_taken;
734 clock_t start_time = clock();
735 for (i = 0; i < 4000; i++)
736 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
737 size_offsets);
738 time_taken = clock() - start_time;
739 fprintf(outfile, "Execute time %.2f milliseconds\n",
740 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
741 }
742
743 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
744 size_offsets);
745
746 if (count == 0)
747 {
748 fprintf(outfile, "Matched, but too many substrings\n");
749 count = size_offsets/2;
750 }
751
752 if (count >= 0)
753 {
754 int i;
755 count *= 2;
756 for (i = 0; i < count; i += 2)
757 {
758 if (offsets[i] < 0)
759 fprintf(outfile, "%2d: <unset>\n", i/2);
760 else
761 {
762 fprintf(outfile, "%2d: ", i/2);
763 pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
764 fprintf(outfile, "\n");
765 }
766 }
767 }
768 else
769 {
770 if (count == -1) fprintf(outfile, "No match\n");
771 else fprintf(outfile, "Error %d\n", count);
772 }
773 }
774 }
775
776 CONTINUE:
777 if (posix || do_posix) regfree(&preg);
778 if (re != NULL) free(re);
779 if (extra != NULL) free(extra);
780 }
781
782 fprintf(outfile, "\n");
783 return 0;
784 }
785
786 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12