/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9 - (show annotations) (download)
Sat Feb 24 21:38:13 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 19338 byte(s)
Load pcre-1.03 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10
11 /* Use the internal info for displaying the results of pcre_study(). */
12
13 #include "internal.h"
14 #include "pcreposix.h"
15
16 #ifndef CLOCKS_PER_SEC
17 #ifdef CLK_TCK
18 #define CLOCKS_PER_SEC CLK_TCK
19 #else
20 #define CLOCKS_PER_SEC 100
21 #endif
22 #endif
23
24
25 static FILE *outfile;
26 static int log_store = 0;
27
28
29
30 /* Debugging function to print the internal form of the regex. This is the same
31 code as contained in pcre.c under the DEBUG macro. */
32
33 static const char *OP_names[] = {
34 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
35 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
36 "not",
37 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
40 "*", "*?", "+", "+?", "?", "??", "{", "{",
41 "class", "Ref",
42 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
43 "Brazero", "Braminzero", "Bra"
44 };
45
46
47 static void print_internals(pcre *re)
48 {
49 unsigned char *code = ((real_pcre *)re)->code;
50
51 printf("------------------------------------------------------------------\n");
52
53 for(;;)
54 {
55 int c;
56 int charlength;
57
58 printf("%3d ", code - ((real_pcre *)re)->code);
59
60 if (*code >= OP_BRA)
61 {
62 printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
63 code += 2;
64 }
65
66 else switch(*code)
67 {
68 case OP_END:
69 printf(" %s\n", OP_names[*code]);
70 printf("------------------------------------------------------------------\n");
71 return;
72
73 case OP_CHARS:
74 charlength = *(++code);
75 printf("%3d ", charlength);
76 while (charlength-- > 0)
77 if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
78 break;
79
80 case OP_KETRMAX:
81 case OP_KETRMIN:
82 case OP_ALT:
83 case OP_KET:
84 case OP_ASSERT:
85 case OP_ASSERT_NOT:
86 case OP_ONCE:
87 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
88 code += 2;
89 break;
90
91 case OP_STAR:
92 case OP_MINSTAR:
93 case OP_PLUS:
94 case OP_MINPLUS:
95 case OP_QUERY:
96 case OP_MINQUERY:
97 case OP_TYPESTAR:
98 case OP_TYPEMINSTAR:
99 case OP_TYPEPLUS:
100 case OP_TYPEMINPLUS:
101 case OP_TYPEQUERY:
102 case OP_TYPEMINQUERY:
103 if (*code >= OP_TYPESTAR)
104 printf(" %s", OP_names[code[1]]);
105 else if (isprint(c = code[1])) printf(" %c", c);
106 else printf(" \\x%02x", c);
107 printf("%s", OP_names[*code++]);
108 break;
109
110 case OP_EXACT:
111 case OP_UPTO:
112 case OP_MINUPTO:
113 if (isprint(c = code[3])) printf(" %c{", c);
114 else printf(" \\x%02x{", c);
115 if (*code != OP_EXACT) printf(",");
116 printf("%d}", (code[1] << 8) + code[2]);
117 if (*code == OP_MINUPTO) printf("?");
118 code += 3;
119 break;
120
121 case OP_TYPEEXACT:
122 case OP_TYPEUPTO:
123 case OP_TYPEMINUPTO:
124 printf(" %s{", OP_names[code[3]]);
125 if (*code != OP_TYPEEXACT) printf(",");
126 printf("%d}", (code[1] << 8) + code[2]);
127 if (*code == OP_TYPEMINUPTO) printf("?");
128 code += 3;
129 break;
130
131 case OP_NOT:
132 if (isprint(c = *(++code))) printf(" [^%c]", c);
133 else printf(" [^\\x%02x]", c);
134 break;
135
136 case OP_NOTSTAR:
137 case OP_NOTMINSTAR:
138 case OP_NOTPLUS:
139 case OP_NOTMINPLUS:
140 case OP_NOTQUERY:
141 case OP_NOTMINQUERY:
142 if (isprint(c = code[1])) printf(" [^%c]", c);
143 else printf(" [^\\x%02x]", c);
144 printf("%s", OP_names[*code++]);
145 break;
146
147 case OP_NOTEXACT:
148 case OP_NOTUPTO:
149 case OP_NOTMINUPTO:
150 if (isprint(c = code[3])) printf(" [^%c]{", c);
151 else printf(" [^\\x%02x]{", c);
152 if (*code != OP_NOTEXACT) printf(",");
153 printf("%d}", (code[1] << 8) + code[2]);
154 if (*code == OP_NOTMINUPTO) printf("?");
155 code += 3;
156 break;
157
158 case OP_REF:
159 printf(" \\%d", *(++code));
160 code++;
161 goto CLASS_REF_REPEAT;
162
163 case OP_CLASS:
164 {
165 int i, min, max;
166
167 code++;
168 printf(" [");
169
170 for (i = 0; i < 256; i++)
171 {
172 if ((code[i/8] & (1 << (i&7))) != 0)
173 {
174 int j;
175 for (j = i+1; j < 256; j++)
176 if ((code[j/8] & (1 << (j&7))) == 0) break;
177 if (i == '-' || i == ']') printf("\\");
178 if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
179 if (--j > i)
180 {
181 printf("-");
182 if (j == '-' || j == ']') printf("\\");
183 if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
184 }
185 i = j;
186 }
187 }
188 printf("]");
189 code += 32;
190
191 CLASS_REF_REPEAT:
192
193 switch(*code)
194 {
195 case OP_CRSTAR:
196 case OP_CRMINSTAR:
197 case OP_CRPLUS:
198 case OP_CRMINPLUS:
199 case OP_CRQUERY:
200 case OP_CRMINQUERY:
201 printf("%s", OP_names[*code]);
202 break;
203
204 case OP_CRRANGE:
205 case OP_CRMINRANGE:
206 min = (code[1] << 8) + code[2];
207 max = (code[3] << 8) + code[4];
208 if (max == 0) printf("{%d,}", min);
209 else printf("{%d,%d}", min, max);
210 if (*code == OP_CRMINRANGE) printf("?");
211 code += 4;
212 break;
213
214 default:
215 code--;
216 }
217 }
218 break;
219
220 /* Anything else is just a one-node item */
221
222 default:
223 printf(" %s", OP_names[*code]);
224 break;
225 }
226
227 code++;
228 printf("\n");
229 }
230 }
231
232
233
234 /* Character string printing function. */
235
236 static void pchars(unsigned char *p, int length)
237 {
238 int c;
239 while (length-- > 0)
240 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
241 else fprintf(outfile, "\\x%02x", c);
242 }
243
244
245
246 /* Alternative malloc function, to test functionality and show the size of the
247 compiled re. */
248
249 static void *new_malloc(size_t size)
250 {
251 if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
252 return malloc(size);
253 }
254
255
256
257 /* Read lines from named file or stdin and write to named file or stdout; lines
258 consist of a regular expression, in delimiters and optionally followed by
259 options, followed by a set of test data, terminated by an empty line. */
260
261 int main(int argc, char **argv)
262 {
263 FILE *infile = stdin;
264 int options = 0;
265 int study_options = 0;
266 int op = 1;
267 int timeit = 0;
268 int showinfo = 0;
269 int posix = 0;
270 int debug = 0;
271 unsigned char buffer[30000];
272 unsigned char dbuffer[1024];
273
274 /* Static so that new_malloc can use it. */
275
276 outfile = stdout;
277
278 /* Scan options */
279
280 while (argc > 1 && argv[op][0] == '-')
281 {
282 if (strcmp(argv[op], "-s") == 0) log_store = 1;
283 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
284 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
285 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
286 else if (strcmp(argv[op], "-p") == 0) posix = 1;
287 else
288 {
289 printf("*** Unknown option %s\n", argv[op]);
290 return 1;
291 }
292 op++;
293 argc--;
294 }
295
296 /* Sort out the input and output files */
297
298 if (argc > 1)
299 {
300 infile = fopen(argv[op], "r");
301 if (infile == NULL)
302 {
303 printf("** Failed to open %s\n", argv[op]);
304 return 1;
305 }
306 }
307
308 if (argc > 2)
309 {
310 outfile = fopen(argv[op+1], "w");
311 if (outfile == NULL)
312 {
313 printf("** Failed to open %s\n", argv[op+1]);
314 return 1;
315 }
316 }
317
318 /* Set alternative malloc function */
319
320 pcre_malloc = new_malloc;
321
322 /* Heading line, then prompt for first re if stdin */
323
324 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
325 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
326
327 /* Main loop */
328
329 for (;;)
330 {
331 pcre *re = NULL;
332 pcre_extra *extra = NULL;
333 regex_t preg;
334 const char *error;
335 unsigned char *p, *pp;
336 int do_study = 0;
337 int do_debug = 0;
338 int do_posix = 0;
339 int erroroffset, len, delimiter;
340
341 if (infile == stdin) printf(" re> ");
342 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
343 if (infile != stdin) fprintf(outfile, (char *)buffer);
344
345 p = buffer;
346 while (isspace(*p)) p++;
347 if (*p == 0) continue;
348
349 /* Get the delimiter and seek the end of the pattern; if is isn't
350 complete, read more. */
351
352 delimiter = *p++;
353
354 if (isalnum(delimiter))
355 {
356 fprintf(outfile, "** Delimiter must not be alphameric\n");
357 goto SKIP_DATA;
358 }
359
360 pp = p;
361
362 for(;;)
363 {
364 while (*pp != 0 && *pp != delimiter) pp++;
365 if (*pp != 0) break;
366
367 len = sizeof(buffer) - (pp - buffer);
368 if (len < 256)
369 {
370 fprintf(outfile, "** Expression too long - missing delimiter?\n");
371 goto SKIP_DATA;
372 }
373
374 if (infile == stdin) printf(" > ");
375 if (fgets((char *)pp, len, infile) == NULL)
376 {
377 fprintf(outfile, "** Unexpected EOF\n");
378 goto END_OFF;
379 }
380 if (infile != stdin) fprintf(outfile, (char *)pp);
381 }
382
383 /* Terminate the pattern at the delimiter */
384
385 *pp++ = 0;
386
387 /* Look for options after final delimiter */
388
389 options = 0;
390 study_options = 0;
391 while (*pp != 0)
392 {
393 switch (*pp++)
394 {
395 case 'i': options |= PCRE_CASELESS; break;
396 case 'm': options |= PCRE_MULTILINE; break;
397 case 's': options |= PCRE_DOTALL; break;
398 case 'x': options |= PCRE_EXTENDED; break;
399 case 'A': options |= PCRE_ANCHORED; break;
400 case 'D': do_debug = 1; break;
401 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
402 case 'P': do_posix = 1; break;
403 case 'S': do_study = 1; break;
404 case 'I': study_options |= PCRE_CASELESS; break;
405 case 'X': options |= PCRE_EXTRA; break;
406 case '\n': case ' ': break;
407 default:
408 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
409 goto SKIP_DATA;
410 }
411 }
412
413 /* Handle compiing via the POSIX interface, which doesn't support the
414 timing, showing, or debugging options. */
415
416 if (posix || do_posix)
417 {
418 int rc;
419 int cflags = 0;
420 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
421 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
422 rc = regcomp(&preg, (char *)p, cflags);
423
424 /* Compilation failed; go back for another re, skipping to blank line
425 if non-interactive. */
426
427 if (rc != 0)
428 {
429 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
430 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
431 goto SKIP_DATA;
432 }
433 }
434
435 /* Handle compiling via the native interface */
436
437 else
438 {
439 if (timeit)
440 {
441 register int i;
442 clock_t time_taken;
443 clock_t start_time = clock();
444 for (i = 0; i < 4000; i++)
445 {
446 re = pcre_compile((char *)p, options, &error, &erroroffset);
447 if (re != NULL) free(re);
448 }
449 time_taken = clock() - start_time;
450 fprintf(outfile, "Compile time %.2f milliseconds\n",
451 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
452 }
453
454 re = pcre_compile((char *)p, options, &error, &erroroffset);
455
456 /* Compilation failed; go back for another re, skipping to blank line
457 if non-interactive. */
458
459 if (re == NULL)
460 {
461 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
462 SKIP_DATA:
463 if (infile != stdin)
464 {
465 for (;;)
466 {
467 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
468 goto END_OFF;
469 len = (int)strlen((char *)buffer);
470 while (len > 0 && isspace(buffer[len-1])) len--;
471 if (len == 0) break;
472 }
473 fprintf(outfile, "\n");
474 }
475 continue;
476 }
477
478 /* Compilation succeeded; print data if required */
479
480 if (showinfo || do_debug)
481 {
482 int first_char, count;
483
484 if (debug || do_debug) print_internals(re);
485
486 count = pcre_info(re, &options, &first_char);
487 if (count < 0) fprintf(outfile,
488 "Error %d while reading info\n", count);
489 else
490 {
491 fprintf(outfile, "Identifying subpattern count = %d\n", count);
492 if (options == 0) fprintf(outfile, "No options\n");
493 else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
494 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
495 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
496 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
497 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
498 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
499 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
500 ((options & PCRE_EXTRA) != 0)? " extra" : "");
501 if (first_char == -1)
502 {
503 fprintf(outfile, "First char at start or follows \\n\n");
504 }
505 else if (first_char < 0)
506 {
507 fprintf(outfile, "No first char\n");
508 }
509 else
510 {
511 if (isprint(first_char))
512 fprintf(outfile, "First char = \'%c\'\n", first_char);
513 else
514 fprintf(outfile, "First char = %d\n", first_char);
515 }
516 }
517 }
518
519 /* If /S was present, study the regexp to generate additional info to
520 help with the matching. */
521
522 if (do_study)
523 {
524 if (timeit)
525 {
526 register int i;
527 clock_t time_taken;
528 clock_t start_time = clock();
529 for (i = 0; i < 4000; i++)
530 extra = pcre_study(re, study_options, &error);
531 time_taken = clock() - start_time;
532 if (extra != NULL) free(extra);
533 fprintf(outfile, " Study time %.2f milliseconds\n",
534 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
535 }
536
537 extra = pcre_study(re, study_options, &error);
538 if (error != NULL)
539 fprintf(outfile, "Failed to study: %s\n", error);
540 else if (extra == NULL)
541 fprintf(outfile, "Study returned NULL\n");
542
543 /* This looks at internal information. A bit kludgy to do it this
544 way, but it is useful for testing. */
545
546 else if (showinfo || do_debug)
547 {
548 real_pcre_extra *xx = (real_pcre_extra *)extra;
549 if ((xx->options & PCRE_STUDY_MAPPED) == 0)
550 fprintf(outfile, "No starting character set\n");
551 else
552 {
553 int i;
554 int c = 24;
555 fprintf(outfile, "Starting character set: ");
556 for (i = 0; i < 256; i++)
557 {
558 if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
559 {
560 if (c > 75)
561 {
562 fprintf(outfile, "\n ");
563 c = 2;
564 }
565 if (isprint(i) && i != ' ')
566 {
567 fprintf(outfile, "%c ", i);
568 c += 2;
569 }
570 else
571 {
572 fprintf(outfile, "\\x%02x ", i);
573 c += 5;
574 }
575 }
576 }
577 fprintf(outfile, "\n");
578 }
579 }
580 }
581 }
582
583 /* Read data lines and test them */
584
585 for (;;)
586 {
587 unsigned char *q;
588 int count, c;
589 int offsets[30];
590 int size_offsets = sizeof(offsets)/sizeof(int);
591
592 options = 0;
593
594 if (infile == stdin) printf(" data> ");
595 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;
596 if (infile != stdin) fprintf(outfile, (char *)buffer);
597
598 len = (int)strlen((char *)buffer);
599 while (len > 0 && isspace(buffer[len-1])) len--;
600 buffer[len] = 0;
601 if (len == 0) break;
602
603 p = buffer;
604 while (isspace(*p)) p++;
605
606 q = dbuffer;
607 while ((c = *p++) != 0)
608 {
609 int i = 0;
610 int n = 0;
611 if (c == '\\') switch ((c = *p++))
612 {
613 case 'a': c = 7; break;
614 case 'b': c = '\b'; break;
615 case 'e': c = 27; break;
616 case 'f': c = '\f'; break;
617 case 'n': c = '\n'; break;
618 case 'r': c = '\r'; break;
619 case 't': c = '\t'; break;
620 case 'v': c = '\v'; break;
621
622 case '0': case '1': case '2': case '3':
623 case '4': case '5': case '6': case '7':
624 c -= '0';
625 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
626 c = c * 8 + *p++ - '0';
627 break;
628
629 case 'x':
630 c = 0;
631 while (i++ < 2 && isxdigit(*p))
632 {
633 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
634 p++;
635 }
636 break;
637
638 case 0: /* Allows for an empty line */
639 p--;
640 continue;
641
642 case 'A': /* Option setting */
643 options |= PCRE_ANCHORED;
644 continue;
645
646 case 'B':
647 options |= PCRE_NOTBOL;
648 continue;
649
650 case 'E':
651 options |= PCRE_DOLLAR_ENDONLY;
652 continue;
653
654 case 'I':
655 options |= PCRE_CASELESS;
656 continue;
657
658 case 'M':
659 options |= PCRE_MULTILINE;
660 continue;
661
662 case 'S':
663 options |= PCRE_DOTALL;
664 continue;
665
666 case 'O':
667 while(isdigit(*p)) n = n * 10 + *p++ - '0';
668 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
669 continue;
670
671 case 'Z':
672 options |= PCRE_NOTEOL;
673 continue;
674 }
675 *q++ = c;
676 }
677 *q = 0;
678 len = q - dbuffer;
679
680 /* Handle matching via the POSIX interface, which does not
681 support timing. */
682
683 if (posix || do_posix)
684 {
685 int rc;
686 int eflags = 0;
687 regmatch_t pmatch[30];
688 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
689 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
690
691 rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
692 pmatch, eflags);
693
694 if (rc != 0)
695 {
696 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
697 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
698 }
699 else
700 {
701 size_t i;
702 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
703 {
704 if (pmatch[i].rm_so >= 0)
705 {
706 fprintf(outfile, "%2d: ", i);
707 pchars(dbuffer + pmatch[i].rm_so,
708 pmatch[i].rm_eo - pmatch[i].rm_so);
709 fprintf(outfile, "\n");
710 }
711 }
712 }
713 }
714
715 /* Handle matching via the native interface */
716
717 else
718 {
719 if (timeit)
720 {
721 register int i;
722 clock_t time_taken;
723 clock_t start_time = clock();
724 for (i = 0; i < 4000; i++)
725 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
726 size_offsets);
727 time_taken = clock() - start_time;
728 fprintf(outfile, "Execute time %.2f milliseconds\n",
729 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
730 }
731
732 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
733 size_offsets);
734
735 if (count == 0)
736 {
737 fprintf(outfile, "Matched, but too many substrings\n");
738 count = size_offsets/2;
739 }
740
741 if (count >= 0)
742 {
743 int i;
744 count *= 2;
745 for (i = 0; i < count; i += 2)
746 {
747 if (offsets[i] < 0)
748 fprintf(outfile, "%2d: <unset>\n", i/2);
749 else
750 {
751 fprintf(outfile, "%2d: ", i/2);
752 pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
753 fprintf(outfile, "\n");
754 }
755 }
756 }
757 else
758 {
759 if (count == -1) fprintf(outfile, "No match\n");
760 else fprintf(outfile, "Error %d\n", count);
761 }
762 }
763 }
764
765 if (posix || do_posix) regfree(&preg);
766 if (re != NULL) free(re);
767 if (extra != NULL) free(extra);
768 }
769
770 END_OFF:
771 fprintf(outfile, "\n");
772 return 0;
773 }
774
775 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12