/[pcre]/code/tags/pcre-3.2/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-3.2/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 48 - (show annotations) (download)
Sat Feb 24 21:39:31 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 28778 byte(s)
Tag code/trunk as code/tags/pcre-3.2.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10 #include <locale.h>
11
12 /* Use the internal info for displaying the results of pcre_study(). */
13
14 #include "internal.h"
15
16 /* It is possible to compile this test program without including support for
17 testing the POSIX interface, though this is not available via the standard
18 Makefile. */
19
20 #if !defined NOPOSIX
21 #include "pcreposix.h"
22 #endif
23
24 #ifndef CLOCKS_PER_SEC
25 #ifdef CLK_TCK
26 #define CLOCKS_PER_SEC CLK_TCK
27 #else
28 #define CLOCKS_PER_SEC 100
29 #endif
30 #endif
31
32 #define LOOPREPEAT 20000
33
34
35 static FILE *outfile;
36 static int log_store = 0;
37 static size_t gotten_store;
38
39
40
41 /* Debugging function to print the internal form of the regex. This is the same
42 code as contained in pcre.c under the DEBUG macro. */
43
44 static const char *OP_names[] = {
45 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
46 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
47 "Opt", "^", "$", "Any", "chars", "not",
48 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
51 "*", "*?", "+", "+?", "?", "??", "{", "{",
52 "class", "Ref", "Recurse",
53 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
54 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
55 "Brazero", "Braminzero", "Bra"
56 };
57
58
59 static void print_internals(pcre *re)
60 {
61 unsigned char *code = ((real_pcre *)re)->code;
62
63 fprintf(outfile, "------------------------------------------------------------------\n");
64
65 for(;;)
66 {
67 int c;
68 int charlength;
69
70 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
71
72 if (*code >= OP_BRA)
73 {
74 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
75 code += 2;
76 }
77
78 else switch(*code)
79 {
80 case OP_END:
81 fprintf(outfile, " %s\n", OP_names[*code]);
82 fprintf(outfile, "------------------------------------------------------------------\n");
83 return;
84
85 case OP_OPT:
86 fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
87 code++;
88 break;
89
90 case OP_COND:
91 fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
92 code += 2;
93 break;
94
95 case OP_CREF:
96 fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
97 code++;
98 break;
99
100 case OP_CHARS:
101 charlength = *(++code);
102 fprintf(outfile, "%3d ", charlength);
103 while (charlength-- > 0)
104 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
105 else fprintf(outfile, "\\x%02x", c);
106 break;
107
108 case OP_KETRMAX:
109 case OP_KETRMIN:
110 case OP_ALT:
111 case OP_KET:
112 case OP_ASSERT:
113 case OP_ASSERT_NOT:
114 case OP_ASSERTBACK:
115 case OP_ASSERTBACK_NOT:
116 case OP_ONCE:
117 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
118 code += 2;
119 break;
120
121 case OP_REVERSE:
122 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
123 code += 2;
124 break;
125
126 case OP_STAR:
127 case OP_MINSTAR:
128 case OP_PLUS:
129 case OP_MINPLUS:
130 case OP_QUERY:
131 case OP_MINQUERY:
132 case OP_TYPESTAR:
133 case OP_TYPEMINSTAR:
134 case OP_TYPEPLUS:
135 case OP_TYPEMINPLUS:
136 case OP_TYPEQUERY:
137 case OP_TYPEMINQUERY:
138 if (*code >= OP_TYPESTAR)
139 fprintf(outfile, " %s", OP_names[code[1]]);
140 else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
141 else fprintf(outfile, " \\x%02x", c);
142 fprintf(outfile, "%s", OP_names[*code++]);
143 break;
144
145 case OP_EXACT:
146 case OP_UPTO:
147 case OP_MINUPTO:
148 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
149 else fprintf(outfile, " \\x%02x{", c);
150 if (*code != OP_EXACT) fprintf(outfile, ",");
151 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
152 if (*code == OP_MINUPTO) fprintf(outfile, "?");
153 code += 3;
154 break;
155
156 case OP_TYPEEXACT:
157 case OP_TYPEUPTO:
158 case OP_TYPEMINUPTO:
159 fprintf(outfile, " %s{", OP_names[code[3]]);
160 if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
161 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
162 if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
163 code += 3;
164 break;
165
166 case OP_NOT:
167 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
168 else fprintf(outfile, " [^\\x%02x]", c);
169 break;
170
171 case OP_NOTSTAR:
172 case OP_NOTMINSTAR:
173 case OP_NOTPLUS:
174 case OP_NOTMINPLUS:
175 case OP_NOTQUERY:
176 case OP_NOTMINQUERY:
177 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
178 else fprintf(outfile, " [^\\x%02x]", c);
179 fprintf(outfile, "%s", OP_names[*code++]);
180 break;
181
182 case OP_NOTEXACT:
183 case OP_NOTUPTO:
184 case OP_NOTMINUPTO:
185 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
186 else fprintf(outfile, " [^\\x%02x]{", c);
187 if (*code != OP_NOTEXACT) fprintf(outfile, ",");
188 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
189 if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
190 code += 3;
191 break;
192
193 case OP_REF:
194 fprintf(outfile, " \\%d", *(++code));
195 code++;
196 goto CLASS_REF_REPEAT;
197
198 case OP_CLASS:
199 {
200 int i, min, max;
201 code++;
202 fprintf(outfile, " [");
203
204 for (i = 0; i < 256; i++)
205 {
206 if ((code[i/8] & (1 << (i&7))) != 0)
207 {
208 int j;
209 for (j = i+1; j < 256; j++)
210 if ((code[j/8] & (1 << (j&7))) == 0) break;
211 if (i == '-' || i == ']') fprintf(outfile, "\\");
212 if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
213 if (--j > i)
214 {
215 fprintf(outfile, "-");
216 if (j == '-' || j == ']') fprintf(outfile, "\\");
217 if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
218 }
219 i = j;
220 }
221 }
222 fprintf(outfile, "]");
223 code += 32;
224
225 CLASS_REF_REPEAT:
226
227 switch(*code)
228 {
229 case OP_CRSTAR:
230 case OP_CRMINSTAR:
231 case OP_CRPLUS:
232 case OP_CRMINPLUS:
233 case OP_CRQUERY:
234 case OP_CRMINQUERY:
235 fprintf(outfile, "%s", OP_names[*code]);
236 break;
237
238 case OP_CRRANGE:
239 case OP_CRMINRANGE:
240 min = (code[1] << 8) + code[2];
241 max = (code[3] << 8) + code[4];
242 if (max == 0) fprintf(outfile, "{%d,}", min);
243 else fprintf(outfile, "{%d,%d}", min, max);
244 if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
245 code += 4;
246 break;
247
248 default:
249 code--;
250 }
251 }
252 break;
253
254 /* Anything else is just a one-node item */
255
256 default:
257 fprintf(outfile, " %s", OP_names[*code]);
258 break;
259 }
260
261 code++;
262 fprintf(outfile, "\n");
263 }
264 }
265
266
267
268 /* Character string printing function. */
269
270 static void pchars(unsigned char *p, int length)
271 {
272 int c;
273 while (length-- > 0)
274 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
275 else fprintf(outfile, "\\x%02x", c);
276 }
277
278
279
280 /* Alternative malloc function, to test functionality and show the size of the
281 compiled re. */
282
283 static void *new_malloc(size_t size)
284 {
285 gotten_store = size;
286 if (log_store)
287 fprintf(outfile, "Memory allocation (code space): %d\n",
288 (int)((int)size - offsetof(real_pcre, code[0])));
289 return malloc(size);
290 }
291
292
293
294
295 /* Get one piece of information from the pcre_fullinfo() function */
296
297 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
298 {
299 int rc;
300 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
301 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
302 }
303
304
305
306
307 /* Read lines from named file or stdin and write to named file or stdout; lines
308 consist of a regular expression, in delimiters and optionally followed by
309 options, followed by a set of test data, terminated by an empty line. */
310
311 int main(int argc, char **argv)
312 {
313 FILE *infile = stdin;
314 int options = 0;
315 int study_options = 0;
316 int op = 1;
317 int timeit = 0;
318 int showinfo = 0;
319 int showstore = 0;
320 int posix = 0;
321 int debug = 0;
322 int done = 0;
323 unsigned char buffer[30000];
324 unsigned char dbuffer[1024];
325
326 /* Static so that new_malloc can use it. */
327
328 outfile = stdout;
329
330 /* Scan options */
331
332 while (argc > 1 && argv[op][0] == '-')
333 {
334 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
335 showstore = 1;
336 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
337 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
338 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
339 else if (strcmp(argv[op], "-p") == 0) posix = 1;
340 else
341 {
342 printf("*** Unknown option %s\n", argv[op]);
343 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
344 printf(" -d debug: show compiled code; implies -i\n"
345 " -i show information about compiled pattern\n"
346 " -p use POSIX interface\n"
347 " -s output store information\n"
348 " -t time compilation and execution\n");
349 return 1;
350 }
351 op++;
352 argc--;
353 }
354
355 /* Sort out the input and output files */
356
357 if (argc > 1)
358 {
359 infile = fopen(argv[op], "r");
360 if (infile == NULL)
361 {
362 printf("** Failed to open %s\n", argv[op]);
363 return 1;
364 }
365 }
366
367 if (argc > 2)
368 {
369 outfile = fopen(argv[op+1], "w");
370 if (outfile == NULL)
371 {
372 printf("** Failed to open %s\n", argv[op+1]);
373 return 1;
374 }
375 }
376
377 /* Set alternative malloc function */
378
379 pcre_malloc = new_malloc;
380
381 /* Heading line, then prompt for first regex if stdin */
382
383 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
384
385 /* Main loop */
386
387 while (!done)
388 {
389 pcre *re = NULL;
390 pcre_extra *extra = NULL;
391
392 #if !defined NOPOSIX /* There are still compilers that require no indent */
393 regex_t preg;
394 int do_posix = 0;
395 #endif
396
397 const char *error;
398 unsigned char *p, *pp, *ppp;
399 unsigned const char *tables = NULL;
400 int do_study = 0;
401 int do_debug = debug;
402 int do_G = 0;
403 int do_g = 0;
404 int do_showinfo = showinfo;
405 int do_showrest = 0;
406 int erroroffset, len, delimiter;
407
408 if (infile == stdin) printf(" re> ");
409 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
410 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
411
412 p = buffer;
413 while (isspace(*p)) p++;
414 if (*p == 0) continue;
415
416 /* Get the delimiter and seek the end of the pattern; if is isn't
417 complete, read more. */
418
419 delimiter = *p++;
420
421 if (isalnum(delimiter) || delimiter == '\\')
422 {
423 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
424 goto SKIP_DATA;
425 }
426
427 pp = p;
428
429 for(;;)
430 {
431 while (*pp != 0)
432 {
433 if (*pp == '\\' && pp[1] != 0) pp++;
434 else if (*pp == delimiter) break;
435 pp++;
436 }
437 if (*pp != 0) break;
438
439 len = sizeof(buffer) - (pp - buffer);
440 if (len < 256)
441 {
442 fprintf(outfile, "** Expression too long - missing delimiter?\n");
443 goto SKIP_DATA;
444 }
445
446 if (infile == stdin) printf(" > ");
447 if (fgets((char *)pp, len, infile) == NULL)
448 {
449 fprintf(outfile, "** Unexpected EOF\n");
450 done = 1;
451 goto CONTINUE;
452 }
453 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
454 }
455
456 /* If the first character after the delimiter is backslash, make
457 the pattern end with backslash. This is purely to provide a way
458 of testing for the error message when a pattern ends with backslash. */
459
460 if (pp[1] == '\\') *pp++ = '\\';
461
462 /* Terminate the pattern at the delimiter */
463
464 *pp++ = 0;
465
466 /* Look for options after final delimiter */
467
468 options = 0;
469 study_options = 0;
470 log_store = showstore; /* default from command line */
471
472 while (*pp != 0)
473 {
474 switch (*pp++)
475 {
476 case 'g': do_g = 1; break;
477 case 'i': options |= PCRE_CASELESS; break;
478 case 'm': options |= PCRE_MULTILINE; break;
479 case 's': options |= PCRE_DOTALL; break;
480 case 'x': options |= PCRE_EXTENDED; break;
481
482 case '+': do_showrest = 1; break;
483 case 'A': options |= PCRE_ANCHORED; break;
484 case 'D': do_debug = do_showinfo = 1; break;
485 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
486 case 'G': do_G = 1; break;
487 case 'I': do_showinfo = 1; break;
488 case 'M': log_store = 1; break;
489
490 #if !defined NOPOSIX
491 case 'P': do_posix = 1; break;
492 #endif
493
494 case 'S': do_study = 1; break;
495 case 'U': options |= PCRE_UNGREEDY; break;
496 case 'X': options |= PCRE_EXTRA; break;
497
498 case 'L':
499 ppp = pp;
500 while (*ppp != '\n' && *ppp != ' ') ppp++;
501 *ppp = 0;
502 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
503 {
504 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
505 goto SKIP_DATA;
506 }
507 tables = pcre_maketables();
508 pp = ppp;
509 break;
510
511 case '\n': case ' ': break;
512 default:
513 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
514 goto SKIP_DATA;
515 }
516 }
517
518 /* Handle compiling via the POSIX interface, which doesn't support the
519 timing, showing, or debugging options, nor the ability to pass over
520 local character tables. */
521
522 #if !defined NOPOSIX
523 if (posix || do_posix)
524 {
525 int rc;
526 int cflags = 0;
527 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
528 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
529 rc = regcomp(&preg, (char *)p, cflags);
530
531 /* Compilation failed; go back for another re, skipping to blank line
532 if non-interactive. */
533
534 if (rc != 0)
535 {
536 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
537 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
538 goto SKIP_DATA;
539 }
540 }
541
542 /* Handle compiling via the native interface */
543
544 else
545 #endif /* !defined NOPOSIX */
546
547 {
548 if (timeit)
549 {
550 register int i;
551 clock_t time_taken;
552 clock_t start_time = clock();
553 for (i = 0; i < LOOPREPEAT; i++)
554 {
555 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
556 if (re != NULL) free(re);
557 }
558 time_taken = clock() - start_time;
559 fprintf(outfile, "Compile time %.3f milliseconds\n",
560 ((double)time_taken * 1000.0) /
561 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
562 }
563
564 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
565
566 /* Compilation failed; go back for another re, skipping to blank line
567 if non-interactive. */
568
569 if (re == NULL)
570 {
571 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
572 SKIP_DATA:
573 if (infile != stdin)
574 {
575 for (;;)
576 {
577 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
578 {
579 done = 1;
580 goto CONTINUE;
581 }
582 len = (int)strlen((char *)buffer);
583 while (len > 0 && isspace(buffer[len-1])) len--;
584 if (len == 0) break;
585 }
586 fprintf(outfile, "\n");
587 }
588 goto CONTINUE;
589 }
590
591 /* Compilation succeeded; print data if required. There are now two
592 info-returning functions. The old one has a limited interface and
593 returns only limited data. Check that it agrees with the newer one. */
594
595 if (do_showinfo)
596 {
597 int old_first_char, old_options, old_count;
598 int count, backrefmax, first_char, need_char;
599 size_t size;
600
601 if (do_debug) print_internals(re);
602
603 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
604 new_info(re, NULL, PCRE_INFO_SIZE, &size);
605 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
606 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
607 new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
608 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
609
610 old_count = pcre_info(re, &old_options, &old_first_char);
611 if (count < 0) fprintf(outfile,
612 "Error %d from pcre_info()\n", count);
613 else
614 {
615 if (old_count != count) fprintf(outfile,
616 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
617 old_count);
618
619 if (old_first_char != first_char) fprintf(outfile,
620 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
621 first_char, old_first_char);
622
623 if (old_options != options) fprintf(outfile,
624 "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
625 old_options);
626 }
627
628 if (size != gotten_store) fprintf(outfile,
629 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
630 size, gotten_store);
631
632 fprintf(outfile, "Capturing subpattern count = %d\n", count);
633 if (backrefmax > 0)
634 fprintf(outfile, "Max back reference = %d\n", backrefmax);
635 if (options == 0) fprintf(outfile, "No options\n");
636 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
637 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
638 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
639 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
640 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
641 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
642 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
643 ((options & PCRE_EXTRA) != 0)? " extra" : "",
644 ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
645
646 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
647 fprintf(outfile, "Case state changes\n");
648
649 if (first_char == -1)
650 {
651 fprintf(outfile, "First char at start or follows \\n\n");
652 }
653 else if (first_char < 0)
654 {
655 fprintf(outfile, "No first char\n");
656 }
657 else
658 {
659 if (isprint(first_char))
660 fprintf(outfile, "First char = \'%c\'\n", first_char);
661 else
662 fprintf(outfile, "First char = %d\n", first_char);
663 }
664
665 if (need_char < 0)
666 {
667 fprintf(outfile, "No need char\n");
668 }
669 else
670 {
671 if (isprint(need_char))
672 fprintf(outfile, "Need char = \'%c\'\n", need_char);
673 else
674 fprintf(outfile, "Need char = %d\n", need_char);
675 }
676 }
677
678 /* If /S was present, study the regexp to generate additional info to
679 help with the matching. */
680
681 if (do_study)
682 {
683 if (timeit)
684 {
685 register int i;
686 clock_t time_taken;
687 clock_t start_time = clock();
688 for (i = 0; i < LOOPREPEAT; i++)
689 extra = pcre_study(re, study_options, &error);
690 time_taken = clock() - start_time;
691 if (extra != NULL) free(extra);
692 fprintf(outfile, " Study time %.3f milliseconds\n",
693 ((double)time_taken * 1000.0)/
694 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
695 }
696
697 extra = pcre_study(re, study_options, &error);
698 if (error != NULL)
699 fprintf(outfile, "Failed to study: %s\n", error);
700 else if (extra == NULL)
701 fprintf(outfile, "Study returned NULL\n");
702
703 else if (do_showinfo)
704 {
705 uschar *start_bits = NULL;
706 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
707 if (start_bits == NULL)
708 fprintf(outfile, "No starting character set\n");
709 else
710 {
711 int i;
712 int c = 24;
713 fprintf(outfile, "Starting character set: ");
714 for (i = 0; i < 256; i++)
715 {
716 if ((start_bits[i/8] & (1<<(i%8))) != 0)
717 {
718 if (c > 75)
719 {
720 fprintf(outfile, "\n ");
721 c = 2;
722 }
723 if (isprint(i) && i != ' ')
724 {
725 fprintf(outfile, "%c ", i);
726 c += 2;
727 }
728 else
729 {
730 fprintf(outfile, "\\x%02x ", i);
731 c += 5;
732 }
733 }
734 }
735 fprintf(outfile, "\n");
736 }
737 }
738 }
739 }
740
741 /* Read data lines and test them */
742
743 for (;;)
744 {
745 unsigned char *q;
746 unsigned char *bptr = dbuffer;
747 int count, c;
748 int copystrings = 0;
749 int getstrings = 0;
750 int getlist = 0;
751 int gmatched = 0;
752 int start_offset = 0;
753 int g_notempty = 0;
754 int offsets[45];
755 int size_offsets = sizeof(offsets)/sizeof(int);
756
757 options = 0;
758
759 if (infile == stdin) printf("data> ");
760 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
761 {
762 done = 1;
763 goto CONTINUE;
764 }
765 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
766
767 len = (int)strlen((char *)buffer);
768 while (len > 0 && isspace(buffer[len-1])) len--;
769 buffer[len] = 0;
770 if (len == 0) break;
771
772 p = buffer;
773 while (isspace(*p)) p++;
774
775 q = dbuffer;
776 while ((c = *p++) != 0)
777 {
778 int i = 0;
779 int n = 0;
780 if (c == '\\') switch ((c = *p++))
781 {
782 case 'a': c = 7; break;
783 case 'b': c = '\b'; break;
784 case 'e': c = 27; break;
785 case 'f': c = '\f'; break;
786 case 'n': c = '\n'; break;
787 case 'r': c = '\r'; break;
788 case 't': c = '\t'; break;
789 case 'v': c = '\v'; break;
790
791 case '0': case '1': case '2': case '3':
792 case '4': case '5': case '6': case '7':
793 c -= '0';
794 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
795 c = c * 8 + *p++ - '0';
796 break;
797
798 case 'x':
799 c = 0;
800 while (i++ < 2 && isxdigit(*p))
801 {
802 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
803 p++;
804 }
805 break;
806
807 case 0: /* Allows for an empty line */
808 p--;
809 continue;
810
811 case 'A': /* Option setting */
812 options |= PCRE_ANCHORED;
813 continue;
814
815 case 'B':
816 options |= PCRE_NOTBOL;
817 continue;
818
819 case 'C':
820 while(isdigit(*p)) n = n * 10 + *p++ - '0';
821 copystrings |= 1 << n;
822 continue;
823
824 case 'G':
825 while(isdigit(*p)) n = n * 10 + *p++ - '0';
826 getstrings |= 1 << n;
827 continue;
828
829 case 'L':
830 getlist = 1;
831 continue;
832
833 case 'N':
834 options |= PCRE_NOTEMPTY;
835 continue;
836
837 case 'O':
838 while(isdigit(*p)) n = n * 10 + *p++ - '0';
839 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
840 continue;
841
842 case 'Z':
843 options |= PCRE_NOTEOL;
844 continue;
845 }
846 *q++ = c;
847 }
848 *q = 0;
849 len = q - dbuffer;
850
851 /* Handle matching via the POSIX interface, which does not
852 support timing. */
853
854 #if !defined NOPOSIX
855 if (posix || do_posix)
856 {
857 int rc;
858 int eflags = 0;
859 regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
860 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
861 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
862
863 rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
864
865 if (rc != 0)
866 {
867 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
868 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
869 }
870 else
871 {
872 size_t i;
873 for (i = 0; i < size_offsets; i++)
874 {
875 if (pmatch[i].rm_so >= 0)
876 {
877 fprintf(outfile, "%2d: ", (int)i);
878 pchars(dbuffer + pmatch[i].rm_so,
879 pmatch[i].rm_eo - pmatch[i].rm_so);
880 fprintf(outfile, "\n");
881 if (i == 0 && do_showrest)
882 {
883 fprintf(outfile, " 0+ ");
884 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
885 fprintf(outfile, "\n");
886 }
887 }
888 }
889 }
890 }
891
892 /* Handle matching via the native interface - repeats for /g and /G */
893
894 else
895 #endif /* !defined NOPOSIX */
896
897 for (;; gmatched++) /* Loop for /g or /G */
898 {
899 if (timeit)
900 {
901 register int i;
902 clock_t time_taken;
903 clock_t start_time = clock();
904 for (i = 0; i < LOOPREPEAT; i++)
905 count = pcre_exec(re, extra, (char *)bptr, len,
906 start_offset, options | g_notempty, offsets, size_offsets);
907 time_taken = clock() - start_time;
908 fprintf(outfile, "Execute time %.3f milliseconds\n",
909 ((double)time_taken * 1000.0)/
910 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
911 }
912
913 count = pcre_exec(re, extra, (char *)bptr, len,
914 start_offset, options | g_notempty, offsets, size_offsets);
915
916 if (count == 0)
917 {
918 fprintf(outfile, "Matched, but too many substrings\n");
919 count = size_offsets/3;
920 }
921
922 /* Matched */
923
924 if (count >= 0)
925 {
926 int i;
927 for (i = 0; i < count * 2; i += 2)
928 {
929 if (offsets[i] < 0)
930 fprintf(outfile, "%2d: <unset>\n", i/2);
931 else
932 {
933 fprintf(outfile, "%2d: ", i/2);
934 pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
935 fprintf(outfile, "\n");
936 if (i == 0)
937 {
938 if (do_showrest)
939 {
940 fprintf(outfile, " 0+ ");
941 pchars(bptr + offsets[i+1], len - offsets[i+1]);
942 fprintf(outfile, "\n");
943 }
944 }
945 }
946 }
947
948 for (i = 0; i < 32; i++)
949 {
950 if ((copystrings & (1 << i)) != 0)
951 {
952 char copybuffer[16];
953 int rc = pcre_copy_substring((char *)bptr, offsets, count,
954 i, copybuffer, sizeof(copybuffer));
955 if (rc < 0)
956 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
957 else
958 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
959 }
960 }
961
962 for (i = 0; i < 32; i++)
963 {
964 if ((getstrings & (1 << i)) != 0)
965 {
966 const char *substring;
967 int rc = pcre_get_substring((char *)bptr, offsets, count,
968 i, &substring);
969 if (rc < 0)
970 fprintf(outfile, "get substring %d failed %d\n", i, rc);
971 else
972 {
973 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
974 free((void *)substring);
975 }
976 }
977 }
978
979 if (getlist)
980 {
981 const char **stringlist;
982 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
983 &stringlist);
984 if (rc < 0)
985 fprintf(outfile, "get substring list failed %d\n", rc);
986 else
987 {
988 for (i = 0; i < count; i++)
989 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
990 if (stringlist[i] != NULL)
991 fprintf(outfile, "string list not terminated by NULL\n");
992 free((void *)stringlist);
993 }
994 }
995 }
996
997 /* Failed to match. If this is a /g or /G loop and we previously set
998 g_notempty after a null match, this is not necessarily the end.
999 We want to advance the start offset, and continue. Fudge the offset
1000 values to achieve this. We won't be at the end of the string - that
1001 was checked before setting g_notempty. */
1002
1003 else
1004 {
1005 if (g_notempty != 0)
1006 {
1007 offsets[0] = start_offset;
1008 offsets[1] = start_offset + 1;
1009 }
1010 else
1011 {
1012 if (gmatched == 0) /* Error if no previous matches */
1013 {
1014 if (count == -1) fprintf(outfile, "No match\n");
1015 else fprintf(outfile, "Error %d\n", count);
1016 }
1017 break; /* Out of the /g loop */
1018 }
1019 }
1020
1021 /* If not /g or /G we are done */
1022
1023 if (!do_g && !do_G) break;
1024
1025 /* If we have matched an empty string, first check to see if we are at
1026 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1027 what Perl's /g options does. This turns out to be rather cunning. First
1028 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1029 same point. If this fails (picked up above) we advance to the next
1030 character. */
1031
1032 g_notempty = 0;
1033 if (offsets[0] == offsets[1])
1034 {
1035 if (offsets[0] == len) break;
1036 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1037 }
1038
1039 /* For /g, update the start offset, leaving the rest alone */
1040
1041 if (do_g) start_offset = offsets[1];
1042
1043 /* For /G, update the pointer and length */
1044
1045 else
1046 {
1047 bptr += offsets[1];
1048 len -= offsets[1];
1049 }
1050 } /* End of loop for /g and /G */
1051 } /* End of loop for data lines */
1052
1053 CONTINUE:
1054
1055 #if !defined NOPOSIX
1056 if (posix || do_posix) regfree(&preg);
1057 #endif
1058
1059 if (re != NULL) free(re);
1060 if (extra != NULL) free(extra);
1061 if (tables != NULL)
1062 {
1063 free((void *)tables);
1064 setlocale(LC_CTYPE, "C");
1065 }
1066 }
1067
1068 fprintf(outfile, "\n");
1069 return 0;
1070 }
1071
1072 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12