/[pcre]/code/tags/pcre-1.00/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-1.00/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4 - (show annotations) (download)
Sat Feb 24 21:38:03 2007 UTC (7 years, 8 months ago) by nigel
File MIME type: text/plain
File size: 19266 byte(s)
Tag code/trunk as code/tags/pcre-1.00.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10
11 /* Use the internal info for displaying the results of pcre_study(). */
12
13 #include "internal.h"
14 #include "pcreposix.h"
15
16 #ifndef CLOCKS_PER_SEC
17 #ifdef CLK_TCK
18 #define CLOCKS_PER_SEC CLK_TCK
19 #else
20 #define CLOCKS_PER_SEC 100
21 #endif
22 #endif
23
24
25 static FILE *outfile;
26 static int log_store = 0;
27
28
29
30 /* Debugging function to print the internal form of the regex. This is the same
31 code as contained in pcre.c under the DEBUG macro. */
32
33 static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",
34 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
35 "not",
36 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
37 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
38 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
39 "*", "*?", "+", "+?", "?", "??", "{", "{",
40 "class", "Ref",
41 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
42 "Brazero", "Braminzero", "Bra"
43 };
44
45
46 static void print_internals(pcre *re)
47 {
48 unsigned char *code = ((real_pcre *)re)->code;
49
50 printf("------------------------------------------------------------------\n");
51
52 for(;;)
53 {
54 int c;
55 int charlength;
56
57 printf("%3d ", code - ((real_pcre *)re)->code);
58
59 if (*code >= OP_BRA)
60 {
61 printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
62 code += 2;
63 }
64
65 else switch(*code)
66 {
67 case OP_END:
68 printf(" %s\n", OP_names[*code]);
69 printf("------------------------------------------------------------------\n");
70 return;
71
72 case OP_CHARS:
73 charlength = *(++code);
74 printf("%3d ", charlength);
75 while (charlength-- > 0)
76 if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
77 break;
78
79 case OP_KETRMAX:
80 case OP_KETRMIN:
81 case OP_ALT:
82 case OP_KET:
83 case OP_ASSERT:
84 case OP_ASSERT_NOT:
85 case OP_ONCE:
86 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
87 code += 2;
88 break;
89
90 case OP_STAR:
91 case OP_MINSTAR:
92 case OP_PLUS:
93 case OP_MINPLUS:
94 case OP_QUERY:
95 case OP_MINQUERY:
96 case OP_TYPESTAR:
97 case OP_TYPEMINSTAR:
98 case OP_TYPEPLUS:
99 case OP_TYPEMINPLUS:
100 case OP_TYPEQUERY:
101 case OP_TYPEMINQUERY:
102 if (*code >= OP_TYPESTAR)
103 printf(" %s", OP_names[code[1]]);
104 else if (isprint(c = code[1])) printf(" %c", c);
105 else printf(" \\x%02x", c);
106 printf("%s", OP_names[*code++]);
107 break;
108
109 case OP_EXACT:
110 case OP_UPTO:
111 case OP_MINUPTO:
112 if (isprint(c = code[3])) printf(" %c{", c);
113 else printf(" \\x%02x{", c);
114 if (*code != OP_EXACT) printf(",");
115 printf("%d}", (code[1] << 8) + code[2]);
116 if (*code == OP_MINUPTO) printf("?");
117 code += 3;
118 break;
119
120 case OP_TYPEEXACT:
121 case OP_TYPEUPTO:
122 case OP_TYPEMINUPTO:
123 printf(" %s{", OP_names[code[3]]);
124 if (*code != OP_TYPEEXACT) printf(",");
125 printf("%d}", (code[1] << 8) + code[2]);
126 if (*code == OP_TYPEMINUPTO) printf("?");
127 code += 3;
128 break;
129
130 case OP_NOT:
131 if (isprint(c = *(++code))) printf(" [^%c]", c);
132 else printf(" [^\\x%02x]", c);
133 break;
134
135 case OP_NOTSTAR:
136 case OP_NOTMINSTAR:
137 case OP_NOTPLUS:
138 case OP_NOTMINPLUS:
139 case OP_NOTQUERY:
140 case OP_NOTMINQUERY:
141 if (isprint(c = code[1])) printf(" [^%c]", c);
142 else printf(" [^\\x%02x]", c);
143 printf("%s", OP_names[*code++]);
144 break;
145
146 case OP_NOTEXACT:
147 case OP_NOTUPTO:
148 case OP_NOTMINUPTO:
149 if (isprint(c = code[3])) printf(" [^%c]{", c);
150 else printf(" [^\\x%02x]{", c);
151 if (*code != OP_NOTEXACT) printf(",");
152 printf("%d}", (code[1] << 8) + code[2]);
153 if (*code == OP_NOTMINUPTO) printf("?");
154 code += 3;
155 break;
156
157 case OP_REF:
158 printf(" \\%d", *(++code));
159 break;
160
161 case OP_CLASS:
162 {
163 int i, min, max;
164
165 code++;
166 printf(" [");
167
168 for (i = 0; i < 256; i++)
169 {
170 if ((code[i/8] & (1 << (i&7))) != 0)
171 {
172 int j;
173 for (j = i+1; j < 256; j++)
174 if ((code[j/8] & (1 << (j&7))) == 0) break;
175 if (i == '-' || i == ']') printf("\\");
176 if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
177 if (--j > i)
178 {
179 printf("-");
180 if (j == '-' || j == ']') printf("\\");
181 if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
182 }
183 i = j;
184 }
185 }
186 printf("]");
187 code += 32;
188
189 switch(*code)
190 {
191 case OP_CRSTAR:
192 case OP_CRMINSTAR:
193 case OP_CRPLUS:
194 case OP_CRMINPLUS:
195 case OP_CRQUERY:
196 case OP_CRMINQUERY:
197 printf("%s", OP_names[*code]);
198 break;
199
200 case OP_CRRANGE:
201 case OP_CRMINRANGE:
202 min = (code[1] << 8) + code[2];
203 max = (code[3] << 8) + code[4];
204 if (max == 0) printf("{%d,}", min);
205 else printf("{%d,%d}", min, max);
206 if (*code == OP_CRMINRANGE) printf("?");
207 code += 4;
208 break;
209
210 default:
211 code--;
212 }
213 }
214 break;
215
216 /* Anything else is just a one-node item */
217
218 default:
219 printf(" %s", OP_names[*code]);
220 break;
221 }
222
223 code++;
224 printf("\n");
225 }
226 }
227
228
229
230 /* Character string printing function. */
231
232 static void pchars(unsigned char *p, int length)
233 {
234 int c;
235 while (length-- > 0)
236 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
237 else fprintf(outfile, "\\x%02x", c);
238 }
239
240
241
242 /* Alternative malloc function, to test functionality and show the size of the
243 compiled re. */
244
245 static void *new_malloc(size_t size)
246 {
247 if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);
248 return malloc(size);
249 }
250
251
252
253 /* Read lines from named file or stdin and write to named file or stdout; lines
254 consist of a regular expression, in delimiters and optionally followed by
255 options, followed by a set of test data, terminated by an empty line. */
256
257 int main(int argc, char **argv)
258 {
259 FILE *infile = stdin;
260 int options = 0;
261 int study_options = 0;
262 int op = 1;
263 int timeit = 0;
264 int showinfo = 0;
265 int posix = 0;
266 int debug = 0;
267 unsigned char buffer[30000];
268 unsigned char dbuffer[1024];
269
270 /* Static so that new_malloc can use it. */
271
272 outfile = stdout;
273
274 /* Scan options */
275
276 while (argc > 1 && argv[op][0] == '-')
277 {
278 if (strcmp(argv[op], "-s") == 0) log_store = 1;
279 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
280 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
281 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
282 else if (strcmp(argv[op], "-p") == 0) posix = 1;
283 else
284 {
285 printf("*** Unknown option %s\n", argv[op]);
286 return 1;
287 }
288 op++;
289 argc--;
290 }
291
292 /* Sort out the input and output files */
293
294 if (argc > 1)
295 {
296 infile = fopen(argv[op], "r");
297 if (infile == NULL)
298 {
299 printf("** Failed to open %s\n", argv[op]);
300 return 1;
301 }
302 }
303
304 if (argc > 2)
305 {
306 outfile = fopen(argv[op+1], "w");
307 if (outfile == NULL)
308 {
309 printf("** Failed to open %s\n", argv[op+1]);
310 return 1;
311 }
312 }
313
314 /* Set alternative malloc function */
315
316 pcre_malloc = new_malloc;
317
318 /* Heading line, then prompt for first re if stdin */
319
320 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");
321 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
322
323 /* Main loop */
324
325 for (;;)
326 {
327 pcre *re = NULL;
328 pcre_extra *extra = NULL;
329 regex_t preg;
330 char *error;
331 unsigned char *p, *pp;
332 int do_study = 0;
333 int do_debug = 0;
334 int do_posix = 0;
335 int erroroffset, len, delimiter;
336
337 if (infile == stdin) printf(" re> ");
338 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
339 if (infile != stdin) fprintf(outfile, (char *)buffer);
340
341 p = buffer;
342 while (isspace(*p)) p++;
343 if (*p == 0) continue;
344
345 /* Get the delimiter and seek the end of the pattern; if is isn't
346 complete, read more. */
347
348 delimiter = *p++;
349
350 if (isalnum(delimiter))
351 {
352 fprintf(outfile, "** Delimiter must not be alphameric\n");
353 goto SKIP_DATA;
354 }
355
356 pp = p;
357
358 for(;;)
359 {
360 while (*pp != 0 && *pp != delimiter) pp++;
361 if (*pp != 0) break;
362
363 len = sizeof(buffer) - (pp - buffer);
364 if (len < 256)
365 {
366 fprintf(outfile, "** Expression too long - missing delimiter?\n");
367 goto SKIP_DATA;
368 }
369
370 if (infile == stdin) printf(" > ");
371 if (fgets((char *)pp, len, infile) == NULL)
372 {
373 fprintf(outfile, "** Unexpected EOF\n");
374 goto END_OFF;
375 }
376 if (infile != stdin) fprintf(outfile, (char *)pp);
377 }
378
379 /* Terminate the pattern at the delimiter */
380
381 *pp++ = 0;
382
383 /* Look for options after final delimiter */
384
385 options = 0;
386 study_options = 0;
387 while (*pp != 0)
388 {
389 switch (*pp++)
390 {
391 case 'i': options |= PCRE_CASELESS; break;
392 case 'm': options |= PCRE_MULTILINE; break;
393 case 's': options |= PCRE_DOTALL; break;
394 case 'x': options |= PCRE_EXTENDED; break;
395 case 'A': options |= PCRE_ANCHORED; break;
396 case 'D': do_debug = 1; break;
397 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
398 case 'P': do_posix = 1; break;
399 case 'S': do_study = 1; break;
400 case 'I': study_options |= PCRE_CASELESS; break;
401 case 'X': options |= PCRE_EXTRA; break;
402 case '\n': case ' ': break;
403 default:
404 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
405 goto SKIP_DATA;
406 }
407 }
408
409 /* Handle compiing via the POSIX interface, which doesn't support the
410 timing, showing, or debugging options. */
411
412 if (posix || do_posix)
413 {
414 int rc;
415 int cflags = 0;
416 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
417 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
418 rc = regcomp(&preg, (char *)p, cflags);
419
420 /* Compilation failed; go back for another re, skipping to blank line
421 if non-interactive. */
422
423 if (rc != 0)
424 {
425 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
426 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
427 goto SKIP_DATA;
428 }
429 }
430
431 /* Handle compiling via the native interface */
432
433 else
434 {
435 if (timeit)
436 {
437 register int i;
438 clock_t time_taken;
439 clock_t start_time = clock();
440 for (i = 0; i < 4000; i++)
441 {
442 re = pcre_compile((char *)p, options, &error, &erroroffset);
443 if (re != NULL) free(re);
444 }
445 time_taken = clock() - start_time;
446 fprintf(outfile, "Compile time %.2f milliseconds\n",
447 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
448 }
449
450 re = pcre_compile((char *)p, options, &error, &erroroffset);
451
452 /* Compilation failed; go back for another re, skipping to blank line
453 if non-interactive. */
454
455 if (re == NULL)
456 {
457 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
458 SKIP_DATA:
459 if (infile != stdin)
460 {
461 for (;;)
462 {
463 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
464 goto END_OFF;
465 len = (int)strlen((char *)buffer);
466 while (len > 0 && isspace(buffer[len-1])) len--;
467 if (len == 0) break;
468 }
469 fprintf(outfile, "\n");
470 }
471 continue;
472 }
473
474 /* Compilation succeeded; print data if required */
475
476 if (showinfo || do_debug)
477 {
478 int first_char, count;
479
480 if (debug || do_debug) print_internals(re);
481
482 count = pcre_info(re, &options, &first_char);
483 if (count < 0) fprintf(outfile,
484 "Error %d while reading info\n", count);
485 else
486 {
487 fprintf(outfile, "Identifying subpattern count = %d\n", count);
488 if (options == 0) fprintf(outfile, "No options\n");
489 else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
490 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
491 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
492 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
493 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
494 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
495 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
496 ((options & PCRE_EXTRA) != 0)? " extra" : "");
497 if (first_char == -1)
498 {
499 fprintf(outfile, "First char at start or follows \\n\n");
500 }
501 else if (first_char < 0)
502 {
503 fprintf(outfile, "No first char\n");
504 }
505 else
506 {
507 if (isprint(first_char))
508 fprintf(outfile, "First char = \'%c\'\n", first_char);
509 else
510 fprintf(outfile, "First char = %d\n", first_char);
511 }
512 }
513 }
514
515 /* If /S was present, study the regexp to generate additional info to
516 help with the matching. */
517
518 if (do_study)
519 {
520 if (timeit)
521 {
522 register int i;
523 clock_t time_taken;
524 clock_t start_time = clock();
525 for (i = 0; i < 4000; i++)
526 extra = pcre_study(re, study_options, &error);
527 time_taken = clock() - start_time;
528 if (extra != NULL) free(extra);
529 fprintf(outfile, " Study time %.2f milliseconds\n",
530 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
531 }
532
533 extra = pcre_study(re, study_options, &error);
534 if (error != NULL)
535 fprintf(outfile, "Failed to study: %s\n", error);
536 else if (extra == NULL)
537 fprintf(outfile, "Study returned NULL\n");
538
539 /* This looks at internal information. A bit kludgy to do it this
540 way, but it is useful for testing. */
541
542 else if (showinfo || do_debug)
543 {
544 real_pcre_extra *xx = (real_pcre_extra *)extra;
545 if ((xx->options & PCRE_STUDY_MAPPED) == 0)
546 fprintf(outfile, "No starting character set\n");
547 else
548 {
549 int i;
550 int c = 24;
551 fprintf(outfile, "Starting character set: ");
552 for (i = 0; i < 256; i++)
553 {
554 if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
555 {
556 if (c > 75)
557 {
558 fprintf(outfile, "\n ");
559 c = 2;
560 }
561 if (isprint(i) && i != ' ')
562 {
563 fprintf(outfile, "%c ", i);
564 c += 2;
565 }
566 else
567 {
568 fprintf(outfile, "\\x%02x ", i);
569 c += 5;
570 }
571 }
572 }
573 fprintf(outfile, "\n");
574 }
575 }
576 }
577 }
578
579 /* Read data lines and test them */
580
581 for (;;)
582 {
583 unsigned char *pp;
584 int count, c;
585 int offsets[30];
586 int size_offsets = sizeof(offsets)/sizeof(int);
587
588 options = 0;
589
590 if (infile == stdin) printf(" data> ");
591 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;
592 if (infile != stdin) fprintf(outfile, (char *)buffer);
593
594 len = (int)strlen((char *)buffer);
595 while (len > 0 && isspace(buffer[len-1])) len--;
596 buffer[len] = 0;
597 if (len == 0) break;
598
599 p = buffer;
600 while (isspace(*p)) p++;
601
602 pp = dbuffer;
603 while ((c = *p++) != 0)
604 {
605 int i = 0;
606 int n = 0;
607 if (c == '\\') switch ((c = *p++))
608 {
609 case 'a': c = 7; break;
610 case 'b': c = '\b'; break;
611 case 'e': c = 27; break;
612 case 'f': c = '\f'; break;
613 case 'n': c = '\n'; break;
614 case 'r': c = '\r'; break;
615 case 't': c = '\t'; break;
616 case 'v': c = '\v'; break;
617
618 case '0': case '1': case '2': case '3':
619 case '4': case '5': case '6': case '7':
620 c -= '0';
621 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
622 c = c * 8 + *p++ - '0';
623 break;
624
625 case 'x':
626 c = 0;
627 while (i++ < 2 && isxdigit(*p))
628 {
629 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
630 p++;
631 }
632 break;
633
634 case 0: /* Allows for an empty line */
635 p--;
636 continue;
637
638 case 'A': /* Option setting */
639 options |= PCRE_ANCHORED;
640 continue;
641
642 case 'B':
643 options |= PCRE_NOTBOL;
644 continue;
645
646 case 'E':
647 options |= PCRE_DOLLAR_ENDONLY;
648 continue;
649
650 case 'I':
651 options |= PCRE_CASELESS;
652 continue;
653
654 case 'M':
655 options |= PCRE_MULTILINE;
656 continue;
657
658 case 'S':
659 options |= PCRE_DOTALL;
660 continue;
661
662 case 'O':
663 while(isdigit(*p)) n = n * 10 + *p++ - '0';
664 if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;
665 continue;
666
667 case 'Z':
668 options |= PCRE_NOTEOL;
669 continue;
670 }
671 *pp++ = c;
672 }
673 *pp = 0;
674 len = pp - dbuffer;
675
676 /* Handle matching via the POSIX interface, which does not
677 support timing. */
678
679 if (posix || do_posix)
680 {
681 int rc;
682 int eflags = 0;
683 regmatch_t pmatch[30];
684 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
685 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
686
687 rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),
688 pmatch, eflags);
689
690 if (rc != 0)
691 {
692 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
693 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
694 }
695 else
696 {
697 int i;
698 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
699 {
700 if (pmatch[i].rm_so >= 0)
701 {
702 fprintf(outfile, "%2d: ", i);
703 pchars(dbuffer + pmatch[i].rm_so,
704 pmatch[i].rm_eo - pmatch[i].rm_so);
705 fprintf(outfile, "\n");
706 }
707 }
708 }
709 }
710
711 /* Handle matching via the native interface */
712
713 else
714 {
715 if (timeit)
716 {
717 register int i;
718 clock_t time_taken;
719 clock_t start_time = clock();
720 for (i = 0; i < 4000; i++)
721 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
722 size_offsets);
723 time_taken = clock() - start_time;
724 fprintf(outfile, "Execute time %.2f milliseconds\n",
725 ((double)time_taken)/(4 * CLOCKS_PER_SEC));
726 }
727
728 count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
729 size_offsets);
730
731 if (count == 0)
732 {
733 fprintf(outfile, "Matched, but too many substrings\n");
734 count = size_offsets/2;
735 }
736
737 if (count >= 0)
738 {
739 int i;
740 count *= 2;
741 for (i = 0; i < count; i += 2)
742 {
743 if (offsets[i] < 0)
744 fprintf(outfile, "%2d: <unset>\n", i/2);
745 else
746 {
747 fprintf(outfile, "%2d: ", i/2);
748 pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);
749 fprintf(outfile, "\n");
750 }
751 }
752 }
753 else
754 {
755 if (count == -1) fprintf(outfile, "No match\n");
756 else fprintf(outfile, "Error %d\n", count);
757 }
758 }
759 }
760
761 if (posix || do_posix) regfree(&preg);
762 if (re != NULL) free(re);
763 if (extra != NULL) free(extra);
764 }
765
766 END_OFF:
767 fprintf(outfile, "\n");
768 return 0;
769 }
770
771 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12