/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 41 - (show annotations) (download)
Sat Feb 24 21:39:17 2007 UTC (7 years, 5 months ago) by nigel
File MIME type: text/plain
File size: 27307 byte(s)
Load pcre-2.08a into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10 #include <locale.h>
11
12 /* Use the internal info for displaying the results of pcre_study(). */
13
14 #include "internal.h"
15
16 /* It is possible to compile this test program without including support for
17 testing the POSIX interface, though this is not available via the standard
18 Makefile. */
19
20 #if !defined NOPOSIX
21 #include "pcreposix.h"
22 #endif
23
24 #ifndef CLOCKS_PER_SEC
25 #ifdef CLK_TCK
26 #define CLOCKS_PER_SEC CLK_TCK
27 #else
28 #define CLOCKS_PER_SEC 100
29 #endif
30 #endif
31
32 #define LOOPREPEAT 20000
33
34
35 static FILE *outfile;
36 static int log_store = 0;
37
38
39
40 /* Debugging function to print the internal form of the regex. This is the same
41 code as contained in pcre.c under the DEBUG macro. */
42
43 static const char *OP_names[] = {
44 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
45 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
46 "Opt", "^", "$", "Any", "chars", "not",
47 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
48 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50 "*", "*?", "+", "+?", "?", "??", "{", "{",
51 "class", "Ref",
52 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
53 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
54 "Brazero", "Braminzero", "Bra"
55 };
56
57
58 static void print_internals(pcre *re)
59 {
60 unsigned char *code = ((real_pcre *)re)->code;
61
62 fprintf(outfile, "------------------------------------------------------------------\n");
63
64 for(;;)
65 {
66 int c;
67 int charlength;
68
69 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
70
71 if (*code >= OP_BRA)
72 {
73 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
74 code += 2;
75 }
76
77 else switch(*code)
78 {
79 case OP_END:
80 fprintf(outfile, " %s\n", OP_names[*code]);
81 fprintf(outfile, "------------------------------------------------------------------\n");
82 return;
83
84 case OP_OPT:
85 fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
86 code++;
87 break;
88
89 case OP_COND:
90 fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
91 code += 2;
92 break;
93
94 case OP_CREF:
95 fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
96 code++;
97 break;
98
99 case OP_CHARS:
100 charlength = *(++code);
101 fprintf(outfile, "%3d ", charlength);
102 while (charlength-- > 0)
103 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
104 else fprintf(outfile, "\\x%02x", c);
105 break;
106
107 case OP_KETRMAX:
108 case OP_KETRMIN:
109 case OP_ALT:
110 case OP_KET:
111 case OP_ASSERT:
112 case OP_ASSERT_NOT:
113 case OP_ASSERTBACK:
114 case OP_ASSERTBACK_NOT:
115 case OP_ONCE:
116 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
117 code += 2;
118 break;
119
120 case OP_REVERSE:
121 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
122 code += 2;
123 break;
124
125 case OP_STAR:
126 case OP_MINSTAR:
127 case OP_PLUS:
128 case OP_MINPLUS:
129 case OP_QUERY:
130 case OP_MINQUERY:
131 case OP_TYPESTAR:
132 case OP_TYPEMINSTAR:
133 case OP_TYPEPLUS:
134 case OP_TYPEMINPLUS:
135 case OP_TYPEQUERY:
136 case OP_TYPEMINQUERY:
137 if (*code >= OP_TYPESTAR)
138 fprintf(outfile, " %s", OP_names[code[1]]);
139 else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
140 else fprintf(outfile, " \\x%02x", c);
141 fprintf(outfile, "%s", OP_names[*code++]);
142 break;
143
144 case OP_EXACT:
145 case OP_UPTO:
146 case OP_MINUPTO:
147 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
148 else fprintf(outfile, " \\x%02x{", c);
149 if (*code != OP_EXACT) fprintf(outfile, ",");
150 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
151 if (*code == OP_MINUPTO) fprintf(outfile, "?");
152 code += 3;
153 break;
154
155 case OP_TYPEEXACT:
156 case OP_TYPEUPTO:
157 case OP_TYPEMINUPTO:
158 fprintf(outfile, " %s{", OP_names[code[3]]);
159 if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
160 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
161 if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
162 code += 3;
163 break;
164
165 case OP_NOT:
166 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
167 else fprintf(outfile, " [^\\x%02x]", c);
168 break;
169
170 case OP_NOTSTAR:
171 case OP_NOTMINSTAR:
172 case OP_NOTPLUS:
173 case OP_NOTMINPLUS:
174 case OP_NOTQUERY:
175 case OP_NOTMINQUERY:
176 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
177 else fprintf(outfile, " [^\\x%02x]", c);
178 fprintf(outfile, "%s", OP_names[*code++]);
179 break;
180
181 case OP_NOTEXACT:
182 case OP_NOTUPTO:
183 case OP_NOTMINUPTO:
184 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
185 else fprintf(outfile, " [^\\x%02x]{", c);
186 if (*code != OP_NOTEXACT) fprintf(outfile, ",");
187 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
188 if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
189 code += 3;
190 break;
191
192 case OP_REF:
193 fprintf(outfile, " \\%d", *(++code));
194 code++;
195 goto CLASS_REF_REPEAT;
196
197 case OP_CLASS:
198 {
199 int i, min, max;
200 code++;
201 fprintf(outfile, " [");
202
203 for (i = 0; i < 256; i++)
204 {
205 if ((code[i/8] & (1 << (i&7))) != 0)
206 {
207 int j;
208 for (j = i+1; j < 256; j++)
209 if ((code[j/8] & (1 << (j&7))) == 0) break;
210 if (i == '-' || i == ']') fprintf(outfile, "\\");
211 if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
212 if (--j > i)
213 {
214 fprintf(outfile, "-");
215 if (j == '-' || j == ']') fprintf(outfile, "\\");
216 if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
217 }
218 i = j;
219 }
220 }
221 fprintf(outfile, "]");
222 code += 32;
223
224 CLASS_REF_REPEAT:
225
226 switch(*code)
227 {
228 case OP_CRSTAR:
229 case OP_CRMINSTAR:
230 case OP_CRPLUS:
231 case OP_CRMINPLUS:
232 case OP_CRQUERY:
233 case OP_CRMINQUERY:
234 fprintf(outfile, "%s", OP_names[*code]);
235 break;
236
237 case OP_CRRANGE:
238 case OP_CRMINRANGE:
239 min = (code[1] << 8) + code[2];
240 max = (code[3] << 8) + code[4];
241 if (max == 0) fprintf(outfile, "{%d,}", min);
242 else fprintf(outfile, "{%d,%d}", min, max);
243 if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
244 code += 4;
245 break;
246
247 default:
248 code--;
249 }
250 }
251 break;
252
253 /* Anything else is just a one-node item */
254
255 default:
256 fprintf(outfile, " %s", OP_names[*code]);
257 break;
258 }
259
260 code++;
261 fprintf(outfile, "\n");
262 }
263 }
264
265
266
267 /* Character string printing function. */
268
269 static void pchars(unsigned char *p, int length)
270 {
271 int c;
272 while (length-- > 0)
273 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
274 else fprintf(outfile, "\\x%02x", c);
275 }
276
277
278
279 /* Alternative malloc function, to test functionality and show the size of the
280 compiled re. */
281
282 static void *new_malloc(size_t size)
283 {
284 if (log_store)
285 fprintf(outfile, "Memory allocation (code space): %d\n",
286 (int)((int)size - offsetof(real_pcre, code[0])));
287 return malloc(size);
288 }
289
290
291
292 /* Read lines from named file or stdin and write to named file or stdout; lines
293 consist of a regular expression, in delimiters and optionally followed by
294 options, followed by a set of test data, terminated by an empty line. */
295
296 int main(int argc, char **argv)
297 {
298 FILE *infile = stdin;
299 int options = 0;
300 int study_options = 0;
301 int op = 1;
302 int timeit = 0;
303 int showinfo = 0;
304 int showstore = 0;
305 int posix = 0;
306 int debug = 0;
307 int done = 0;
308 unsigned char buffer[30000];
309 unsigned char dbuffer[1024];
310
311 /* Static so that new_malloc can use it. */
312
313 outfile = stdout;
314
315 /* Scan options */
316
317 while (argc > 1 && argv[op][0] == '-')
318 {
319 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
320 showstore = 1;
321 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
322 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
323 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
324 else if (strcmp(argv[op], "-p") == 0) posix = 1;
325 else
326 {
327 printf("*** Unknown option %s\n", argv[op]);
328 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
329 printf(" -d debug: show compiled code; implies -i\n"
330 " -i show information about compiled pattern\n"
331 " -p use POSIX interface\n"
332 " -s output store information\n"
333 " -t time compilation and execution\n");
334 return 1;
335 }
336 op++;
337 argc--;
338 }
339
340 /* Sort out the input and output files */
341
342 if (argc > 1)
343 {
344 infile = fopen(argv[op], "r");
345 if (infile == NULL)
346 {
347 printf("** Failed to open %s\n", argv[op]);
348 return 1;
349 }
350 }
351
352 if (argc > 2)
353 {
354 outfile = fopen(argv[op+1], "w");
355 if (outfile == NULL)
356 {
357 printf("** Failed to open %s\n", argv[op+1]);
358 return 1;
359 }
360 }
361
362 /* Set alternative malloc function */
363
364 pcre_malloc = new_malloc;
365
366 /* Heading line, then prompt for first regex if stdin */
367
368 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
369
370 /* Main loop */
371
372 while (!done)
373 {
374 pcre *re = NULL;
375 pcre_extra *extra = NULL;
376
377 #if !defined NOPOSIX /* There are still compilers that require no indent */
378 regex_t preg;
379 #endif
380
381 const char *error;
382 unsigned char *p, *pp, *ppp;
383 unsigned const char *tables = NULL;
384 int do_study = 0;
385 int do_debug = debug;
386 int do_G = 0;
387 int do_g = 0;
388 int do_showinfo = showinfo;
389 int do_showrest = 0;
390 int do_posix = 0;
391 int erroroffset, len, delimiter;
392
393 if (infile == stdin) printf(" re> ");
394 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
395 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
396
397 p = buffer;
398 while (isspace(*p)) p++;
399 if (*p == 0) continue;
400
401 /* Get the delimiter and seek the end of the pattern; if is isn't
402 complete, read more. */
403
404 delimiter = *p++;
405
406 if (isalnum(delimiter) || delimiter == '\\')
407 {
408 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
409 goto SKIP_DATA;
410 }
411
412 pp = p;
413
414 for(;;)
415 {
416 while (*pp != 0)
417 {
418 if (*pp == '\\' && pp[1] != 0) pp++;
419 else if (*pp == delimiter) break;
420 pp++;
421 }
422 if (*pp != 0) break;
423
424 len = sizeof(buffer) - (pp - buffer);
425 if (len < 256)
426 {
427 fprintf(outfile, "** Expression too long - missing delimiter?\n");
428 goto SKIP_DATA;
429 }
430
431 if (infile == stdin) printf(" > ");
432 if (fgets((char *)pp, len, infile) == NULL)
433 {
434 fprintf(outfile, "** Unexpected EOF\n");
435 done = 1;
436 goto CONTINUE;
437 }
438 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
439 }
440
441 /* If the first character after the delimiter is backslash, make
442 the pattern end with backslash. This is purely to provide a way
443 of testing for the error message when a pattern ends with backslash. */
444
445 if (pp[1] == '\\') *pp++ = '\\';
446
447 /* Terminate the pattern at the delimiter */
448
449 *pp++ = 0;
450
451 /* Look for options after final delimiter */
452
453 options = 0;
454 study_options = 0;
455 log_store = showstore; /* default from command line */
456
457 while (*pp != 0)
458 {
459 switch (*pp++)
460 {
461 case 'g': do_g = 1; break;
462 case 'i': options |= PCRE_CASELESS; break;
463 case 'm': options |= PCRE_MULTILINE; break;
464 case 's': options |= PCRE_DOTALL; break;
465 case 'x': options |= PCRE_EXTENDED; break;
466
467 case '+': do_showrest = 1; break;
468 case 'A': options |= PCRE_ANCHORED; break;
469 case 'D': do_debug = do_showinfo = 1; break;
470 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
471 case 'G': do_G = 1; break;
472 case 'I': do_showinfo = 1; break;
473 case 'M': log_store = 1; break;
474
475 #if !defined NOPOSIX
476 case 'P': do_posix = 1; break;
477 #endif
478
479 case 'S': do_study = 1; break;
480 case 'U': options |= PCRE_UNGREEDY; break;
481 case 'X': options |= PCRE_EXTRA; break;
482
483 case 'L':
484 ppp = pp;
485 while (*ppp != '\n' && *ppp != ' ') ppp++;
486 *ppp = 0;
487 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
488 {
489 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
490 goto SKIP_DATA;
491 }
492 tables = pcre_maketables();
493 pp = ppp;
494 break;
495
496 case '\n': case ' ': break;
497 default:
498 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
499 goto SKIP_DATA;
500 }
501 }
502
503 /* Handle compiling via the POSIX interface, which doesn't support the
504 timing, showing, or debugging options, nor the ability to pass over
505 local character tables. */
506
507 #if !defined NOPOSIX
508 if (posix || do_posix)
509 {
510 int rc;
511 int cflags = 0;
512 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
513 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
514 rc = regcomp(&preg, (char *)p, cflags);
515
516 /* Compilation failed; go back for another re, skipping to blank line
517 if non-interactive. */
518
519 if (rc != 0)
520 {
521 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
522 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
523 goto SKIP_DATA;
524 }
525 }
526
527 /* Handle compiling via the native interface */
528
529 else
530 #endif /* !defined NOPOSIX */
531
532 {
533 if (timeit)
534 {
535 register int i;
536 clock_t time_taken;
537 clock_t start_time = clock();
538 for (i = 0; i < LOOPREPEAT; i++)
539 {
540 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
541 if (re != NULL) free(re);
542 }
543 time_taken = clock() - start_time;
544 fprintf(outfile, "Compile time %.3f milliseconds\n",
545 ((double)time_taken * 1000.0) /
546 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
547 }
548
549 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
550
551 /* Compilation failed; go back for another re, skipping to blank line
552 if non-interactive. */
553
554 if (re == NULL)
555 {
556 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
557 SKIP_DATA:
558 if (infile != stdin)
559 {
560 for (;;)
561 {
562 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
563 {
564 done = 1;
565 goto CONTINUE;
566 }
567 len = (int)strlen((char *)buffer);
568 while (len > 0 && isspace(buffer[len-1])) len--;
569 if (len == 0) break;
570 }
571 fprintf(outfile, "\n");
572 }
573 goto CONTINUE;
574 }
575
576 /* Compilation succeeded; print data if required */
577
578 if (do_showinfo)
579 {
580 int first_char, count;
581
582 if (do_debug) print_internals(re);
583
584 count = pcre_info(re, &options, &first_char);
585 if (count < 0) fprintf(outfile,
586 "Error %d while reading info\n", count);
587 else
588 {
589 fprintf(outfile, "Identifying subpattern count = %d\n", count);
590 if (options == 0) fprintf(outfile, "No options\n");
591 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
592 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
593 ((options & PCRE_CASELESS) != 0)? " caseless" : "",
594 ((options & PCRE_EXTENDED) != 0)? " extended" : "",
595 ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
596 ((options & PCRE_DOTALL) != 0)? " dotall" : "",
597 ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
598 ((options & PCRE_EXTRA) != 0)? " extra" : "",
599 ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
600
601 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
602 fprintf(outfile, "Case state changes\n");
603
604 if (first_char == -1)
605 {
606 fprintf(outfile, "First char at start or follows \\n\n");
607 }
608 else if (first_char < 0)
609 {
610 fprintf(outfile, "No first char\n");
611 }
612 else
613 {
614 if (isprint(first_char))
615 fprintf(outfile, "First char = \'%c\'\n", first_char);
616 else
617 fprintf(outfile, "First char = %d\n", first_char);
618 }
619
620 if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
621 {
622 int req_char = ((real_pcre *)re)->req_char;
623 if (isprint(req_char))
624 fprintf(outfile, "Req char = \'%c\'\n", req_char);
625 else
626 fprintf(outfile, "Req char = %d\n", req_char);
627 }
628 else fprintf(outfile, "No req char\n");
629 }
630 }
631
632 /* If /S was present, study the regexp to generate additional info to
633 help with the matching. */
634
635 if (do_study)
636 {
637 if (timeit)
638 {
639 register int i;
640 clock_t time_taken;
641 clock_t start_time = clock();
642 for (i = 0; i < LOOPREPEAT; i++)
643 extra = pcre_study(re, study_options, &error);
644 time_taken = clock() - start_time;
645 if (extra != NULL) free(extra);
646 fprintf(outfile, " Study time %.3f milliseconds\n",
647 ((double)time_taken * 1000.0)/
648 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
649 }
650
651 extra = pcre_study(re, study_options, &error);
652 if (error != NULL)
653 fprintf(outfile, "Failed to study: %s\n", error);
654 else if (extra == NULL)
655 fprintf(outfile, "Study returned NULL\n");
656
657 /* This looks at internal information. A bit kludgy to do it this
658 way, but it is useful for testing. */
659
660 else if (do_showinfo)
661 {
662 real_pcre_extra *xx = (real_pcre_extra *)extra;
663 if ((xx->options & PCRE_STUDY_MAPPED) == 0)
664 fprintf(outfile, "No starting character set\n");
665 else
666 {
667 int i;
668 int c = 24;
669 fprintf(outfile, "Starting character set: ");
670 for (i = 0; i < 256; i++)
671 {
672 if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
673 {
674 if (c > 75)
675 {
676 fprintf(outfile, "\n ");
677 c = 2;
678 }
679 if (isprint(i) && i != ' ')
680 {
681 fprintf(outfile, "%c ", i);
682 c += 2;
683 }
684 else
685 {
686 fprintf(outfile, "\\x%02x ", i);
687 c += 5;
688 }
689 }
690 }
691 fprintf(outfile, "\n");
692 }
693 }
694 }
695 }
696
697 /* Read data lines and test them */
698
699 for (;;)
700 {
701 unsigned char *q;
702 unsigned char *bptr = dbuffer;
703 int count, c;
704 int copystrings = 0;
705 int getstrings = 0;
706 int getlist = 0;
707 int gmatched = 0;
708 int start_offset = 0;
709 int g_notempty = 0;
710 int offsets[45];
711 int size_offsets = sizeof(offsets)/sizeof(int);
712
713 options = 0;
714
715 if (infile == stdin) printf("data> ");
716 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
717 {
718 done = 1;
719 goto CONTINUE;
720 }
721 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
722
723 len = (int)strlen((char *)buffer);
724 while (len > 0 && isspace(buffer[len-1])) len--;
725 buffer[len] = 0;
726 if (len == 0) break;
727
728 p = buffer;
729 while (isspace(*p)) p++;
730
731 q = dbuffer;
732 while ((c = *p++) != 0)
733 {
734 int i = 0;
735 int n = 0;
736 if (c == '\\') switch ((c = *p++))
737 {
738 case 'a': c = 7; break;
739 case 'b': c = '\b'; break;
740 case 'e': c = 27; break;
741 case 'f': c = '\f'; break;
742 case 'n': c = '\n'; break;
743 case 'r': c = '\r'; break;
744 case 't': c = '\t'; break;
745 case 'v': c = '\v'; break;
746
747 case '0': case '1': case '2': case '3':
748 case '4': case '5': case '6': case '7':
749 c -= '0';
750 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
751 c = c * 8 + *p++ - '0';
752 break;
753
754 case 'x':
755 c = 0;
756 while (i++ < 2 && isxdigit(*p))
757 {
758 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
759 p++;
760 }
761 break;
762
763 case 0: /* Allows for an empty line */
764 p--;
765 continue;
766
767 case 'A': /* Option setting */
768 options |= PCRE_ANCHORED;
769 continue;
770
771 case 'B':
772 options |= PCRE_NOTBOL;
773 continue;
774
775 case 'C':
776 while(isdigit(*p)) n = n * 10 + *p++ - '0';
777 copystrings |= 1 << n;
778 continue;
779
780 case 'G':
781 while(isdigit(*p)) n = n * 10 + *p++ - '0';
782 getstrings |= 1 << n;
783 continue;
784
785 case 'L':
786 getlist = 1;
787 continue;
788
789 case 'N':
790 options |= PCRE_NOTEMPTY;
791 continue;
792
793 case 'O':
794 while(isdigit(*p)) n = n * 10 + *p++ - '0';
795 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
796 continue;
797
798 case 'Z':
799 options |= PCRE_NOTEOL;
800 continue;
801 }
802 *q++ = c;
803 }
804 *q = 0;
805 len = q - dbuffer;
806
807 /* Handle matching via the POSIX interface, which does not
808 support timing. */
809
810 #if !defined NOPOSIX
811 if (posix || do_posix)
812 {
813 int rc;
814 int eflags = 0;
815 regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
816 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
817 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
818
819 rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
820
821 if (rc != 0)
822 {
823 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
824 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
825 }
826 else
827 {
828 size_t i;
829 for (i = 0; i < size_offsets; i++)
830 {
831 if (pmatch[i].rm_so >= 0)
832 {
833 fprintf(outfile, "%2d: ", (int)i);
834 pchars(dbuffer + pmatch[i].rm_so,
835 pmatch[i].rm_eo - pmatch[i].rm_so);
836 fprintf(outfile, "\n");
837 if (i == 0 && do_showrest)
838 {
839 fprintf(outfile, " 0+ ");
840 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
841 fprintf(outfile, "\n");
842 }
843 }
844 }
845 }
846 }
847
848 /* Handle matching via the native interface - repeats for /g and /G */
849
850 else
851 #endif /* !defined NOPOSIX */
852
853 for (;; gmatched++) /* Loop for /g or /G */
854 {
855 if (timeit)
856 {
857 register int i;
858 clock_t time_taken;
859 clock_t start_time = clock();
860 for (i = 0; i < LOOPREPEAT; i++)
861 count = pcre_exec(re, extra, (char *)bptr, len,
862 start_offset, options | g_notempty, offsets, size_offsets);
863 time_taken = clock() - start_time;
864 fprintf(outfile, "Execute time %.3f milliseconds\n",
865 ((double)time_taken * 1000.0)/
866 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
867 }
868
869 count = pcre_exec(re, extra, (char *)bptr, len,
870 start_offset, options | g_notempty, offsets, size_offsets);
871
872 if (count == 0)
873 {
874 fprintf(outfile, "Matched, but too many substrings\n");
875 count = size_offsets/3;
876 }
877
878 /* Matched */
879
880 if (count >= 0)
881 {
882 int i;
883 for (i = 0; i < count * 2; i += 2)
884 {
885 if (offsets[i] < 0)
886 fprintf(outfile, "%2d: <unset>\n", i/2);
887 else
888 {
889 fprintf(outfile, "%2d: ", i/2);
890 pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
891 fprintf(outfile, "\n");
892 if (i == 0)
893 {
894 if (do_showrest)
895 {
896 fprintf(outfile, " 0+ ");
897 pchars(bptr + offsets[i+1], len - offsets[i+1]);
898 fprintf(outfile, "\n");
899 }
900 }
901 }
902 }
903
904 for (i = 0; i < 32; i++)
905 {
906 if ((copystrings & (1 << i)) != 0)
907 {
908 char copybuffer[16];
909 int rc = pcre_copy_substring((char *)bptr, offsets, count,
910 i, copybuffer, sizeof(copybuffer));
911 if (rc < 0)
912 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
913 else
914 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
915 }
916 }
917
918 for (i = 0; i < 32; i++)
919 {
920 if ((getstrings & (1 << i)) != 0)
921 {
922 const char *substring;
923 int rc = pcre_get_substring((char *)bptr, offsets, count,
924 i, &substring);
925 if (rc < 0)
926 fprintf(outfile, "get substring %d failed %d\n", i, rc);
927 else
928 {
929 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
930 free((void *)substring);
931 }
932 }
933 }
934
935 if (getlist)
936 {
937 const char **stringlist;
938 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
939 &stringlist);
940 if (rc < 0)
941 fprintf(outfile, "get substring list failed %d\n", rc);
942 else
943 {
944 for (i = 0; i < count; i++)
945 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
946 if (stringlist[i] != NULL)
947 fprintf(outfile, "string list not terminated by NULL\n");
948 free((void *)stringlist);
949 }
950 }
951 }
952
953 /* Failed to match. If this is a /g or /G loop and we previously set
954 PCRE_NOTEMPTY after a null match, this is not necessarily the end.
955 We want to advance the start offset, and continue. Fudge the offset
956 values to achieve this. We won't be at the end of the string - that
957 was checked before setting PCRE_NOTEMPTY. */
958
959 else
960 {
961 if (g_notempty != 0)
962 {
963 offsets[0] = start_offset;
964 offsets[1] = start_offset + 1;
965 }
966 else
967 {
968 if (gmatched == 0) /* Error if no previous matches */
969 {
970 if (count == -1) fprintf(outfile, "No match\n");
971 else fprintf(outfile, "Error %d\n", count);
972 }
973 break; /* Out of the /g loop */
974 }
975 }
976
977 /* If not /g or /G we are done */
978
979 if (!do_g && !do_G) break;
980
981 /* If we have matched an empty string, first check to see if we are at
982 the end of the subject. If so, the /g loop is over. Otherwise, mimic
983 what Perl's /g options does. This turns out to be rather cunning. First
984 we set PCRE_NOTEMPTY and try the match again at the same point. If this
985 fails (picked up above) we advance to the next character. */
986
987 g_notempty = 0;
988 if (offsets[0] == offsets[1])
989 {
990 if (offsets[0] == len) break;
991 g_notempty = PCRE_NOTEMPTY;
992 }
993
994 /* For /g, update the start offset, leaving the rest alone */
995
996 if (do_g) start_offset = offsets[1];
997
998 /* For /G, update the pointer and length */
999
1000 else
1001 {
1002 bptr += offsets[1];
1003 len -= offsets[1];
1004 }
1005 } /* End of loop for /g and /G */
1006 } /* End of loop for data lines */
1007
1008 CONTINUE:
1009
1010 #if !defined NOPOSIX
1011 if (posix || do_posix) regfree(&preg);
1012 #endif
1013
1014 if (re != NULL) free(re);
1015 if (extra != NULL) free(extra);
1016 if (tables != NULL)
1017 {
1018 free((void *)tables);
1019 setlocale(LC_CTYPE, "C");
1020 }
1021 }
1022
1023 fprintf(outfile, "\n");
1024 return 0;
1025 }
1026
1027 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12