/[pcre]/code/tags/pcre-3.8/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-3.8/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 60 - (show annotations) (download)
Sat Feb 24 21:39:56 2007 UTC (7 years, 9 months ago) by nigel
File MIME type: text/plain
File size: 33785 byte(s)
Tag code/trunk as code/tags/pcre-3.8.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10 #include <locale.h>
11
12 /* Use the internal info for displaying the results of pcre_study(). */
13
14 #include "internal.h"
15
16 /* It is possible to compile this test program without including support for
17 testing the POSIX interface, though this is not available via the standard
18 Makefile. */
19
20 #if !defined NOPOSIX
21 #include "pcreposix.h"
22 #endif
23
24 #ifndef CLOCKS_PER_SEC
25 #ifdef CLK_TCK
26 #define CLOCKS_PER_SEC CLK_TCK
27 #else
28 #define CLOCKS_PER_SEC 100
29 #endif
30 #endif
31
32 #define LOOPREPEAT 20000
33
34
35 static FILE *outfile;
36 static int log_store = 0;
37 static size_t gotten_store;
38
39
40
41 static int utf8_table1[] = {
42 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43
44 static int utf8_table2[] = {
45 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46
47 static int utf8_table3[] = {
48 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49
50
51 /*************************************************
52 * Convert character value to UTF-8 *
53 *************************************************/
54
55 /* This function takes an integer value in the range 0 - 0x7fffffff
56 and encodes it as a UTF-8 character in 0 to 6 bytes.
57
58 Arguments:
59 cvalue the character value
60 buffer pointer to buffer for result - at least 6 bytes long
61
62 Returns: number of characters placed in the buffer
63 -1 if input character is negative
64 0 if input character is positive but too big (only when
65 int is longer than 32 bits)
66 */
67
68 static int
69 ord2utf8(int cvalue, unsigned char *buffer)
70 {
71 register int i, j;
72 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73 if (cvalue <= utf8_table1[i]) break;
74 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75 if (cvalue < 0) return -1;
76
77 buffer += i;
78 for (j = i; j > 0; j--)
79 {
80 *buffer-- = 0x80 | (cvalue & 0x3f);
81 cvalue >>= 6;
82 }
83 *buffer = utf8_table2[i] | cvalue;
84 return i + 1;
85 }
86
87
88 /*************************************************
89 * Convert UTF-8 string to value *
90 *************************************************/
91
92 /* This function takes one or more bytes that represents a UTF-8 character,
93 and returns the value of the character.
94
95 Argument:
96 buffer a pointer to the byte vector
97 vptr a pointer to an int to receive the value
98
99 Returns: > 0 => the number of bytes consumed
100 -6 to 0 => malformed UTF-8 character at offset = (-return)
101 */
102
103 int
104 utf82ord(unsigned char *buffer, int *vptr)
105 {
106 int c = *buffer++;
107 int d = c;
108 int i, j, s;
109
110 for (i = -1; i < 6; i++) /* i is number of additional bytes */
111 {
112 if ((d & 0x80) == 0) break;
113 d <<= 1;
114 }
115
116 if (i == -1) { *vptr = c; return 1; } /* ascii character */
117 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
118
119 /* i now has a value in the range 1-5 */
120
121 s = 6*i;
122 d = (c & utf8_table3[i]) << s;
123
124 for (j = 0; j < i; j++)
125 {
126 c = *buffer++;
127 if ((c & 0xc0) != 0x80) return -(j+1);
128 s -= 6;
129 d |= (c & 0x3f) << s;
130 }
131
132 /* Check that encoding was the correct unique one */
133
134 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
135 if (d <= utf8_table1[j]) break;
136 if (j != i) return -(i+1);
137
138 /* Valid value */
139
140 *vptr = d;
141 return i+1;
142 }
143
144
145
146
147
148
149 /* Debugging function to print the internal form of the regex. This is the same
150 code as contained in pcre.c under the DEBUG macro. */
151
152 static const char *OP_names[] = {
153 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
154 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
155 "Opt", "^", "$", "Any", "chars", "not",
156 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
159 "*", "*?", "+", "+?", "?", "??", "{", "{",
160 "class", "Ref", "Recurse",
161 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
162 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
163 "Brazero", "Braminzero", "Branumber", "Bra"
164 };
165
166
167 static void print_internals(pcre *re)
168 {
169 unsigned char *code = ((real_pcre *)re)->code;
170
171 fprintf(outfile, "------------------------------------------------------------------\n");
172
173 for(;;)
174 {
175 int c;
176 int charlength;
177
178 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
179
180 if (*code >= OP_BRA)
181 {
182 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
183 fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
184 else
185 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
186 code += 2;
187 }
188
189 else switch(*code)
190 {
191 case OP_END:
192 fprintf(outfile, " %s\n", OP_names[*code]);
193 fprintf(outfile, "------------------------------------------------------------------\n");
194 return;
195
196 case OP_OPT:
197 fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
198 code++;
199 break;
200
201 case OP_CHARS:
202 charlength = *(++code);
203 fprintf(outfile, "%3d ", charlength);
204 while (charlength-- > 0)
205 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
206 else fprintf(outfile, "\\x%02x", c);
207 break;
208
209 case OP_KETRMAX:
210 case OP_KETRMIN:
211 case OP_ALT:
212 case OP_KET:
213 case OP_ASSERT:
214 case OP_ASSERT_NOT:
215 case OP_ASSERTBACK:
216 case OP_ASSERTBACK_NOT:
217 case OP_ONCE:
218 case OP_COND:
219 case OP_BRANUMBER:
220 case OP_REVERSE:
221 case OP_CREF:
222 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
223 code += 2;
224 break;
225
226 case OP_STAR:
227 case OP_MINSTAR:
228 case OP_PLUS:
229 case OP_MINPLUS:
230 case OP_QUERY:
231 case OP_MINQUERY:
232 case OP_TYPESTAR:
233 case OP_TYPEMINSTAR:
234 case OP_TYPEPLUS:
235 case OP_TYPEMINPLUS:
236 case OP_TYPEQUERY:
237 case OP_TYPEMINQUERY:
238 if (*code >= OP_TYPESTAR)
239 fprintf(outfile, " %s", OP_names[code[1]]);
240 else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
241 else fprintf(outfile, " \\x%02x", c);
242 fprintf(outfile, "%s", OP_names[*code++]);
243 break;
244
245 case OP_EXACT:
246 case OP_UPTO:
247 case OP_MINUPTO:
248 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
249 else fprintf(outfile, " \\x%02x{", c);
250 if (*code != OP_EXACT) fprintf(outfile, ",");
251 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
252 if (*code == OP_MINUPTO) fprintf(outfile, "?");
253 code += 3;
254 break;
255
256 case OP_TYPEEXACT:
257 case OP_TYPEUPTO:
258 case OP_TYPEMINUPTO:
259 fprintf(outfile, " %s{", OP_names[code[3]]);
260 if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
261 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
262 if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
263 code += 3;
264 break;
265
266 case OP_NOT:
267 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
268 else fprintf(outfile, " [^\\x%02x]", c);
269 break;
270
271 case OP_NOTSTAR:
272 case OP_NOTMINSTAR:
273 case OP_NOTPLUS:
274 case OP_NOTMINPLUS:
275 case OP_NOTQUERY:
276 case OP_NOTMINQUERY:
277 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
278 else fprintf(outfile, " [^\\x%02x]", c);
279 fprintf(outfile, "%s", OP_names[*code++]);
280 break;
281
282 case OP_NOTEXACT:
283 case OP_NOTUPTO:
284 case OP_NOTMINUPTO:
285 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
286 else fprintf(outfile, " [^\\x%02x]{", c);
287 if (*code != OP_NOTEXACT) fprintf(outfile, ",");
288 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
289 if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
290 code += 3;
291 break;
292
293 case OP_REF:
294 fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
295 code += 3;
296 goto CLASS_REF_REPEAT;
297
298 case OP_CLASS:
299 {
300 int i, min, max;
301 code++;
302 fprintf(outfile, " [");
303
304 for (i = 0; i < 256; i++)
305 {
306 if ((code[i/8] & (1 << (i&7))) != 0)
307 {
308 int j;
309 for (j = i+1; j < 256; j++)
310 if ((code[j/8] & (1 << (j&7))) == 0) break;
311 if (i == '-' || i == ']') fprintf(outfile, "\\");
312 if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
313 if (--j > i)
314 {
315 fprintf(outfile, "-");
316 if (j == '-' || j == ']') fprintf(outfile, "\\");
317 if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
318 }
319 i = j;
320 }
321 }
322 fprintf(outfile, "]");
323 code += 32;
324
325 CLASS_REF_REPEAT:
326
327 switch(*code)
328 {
329 case OP_CRSTAR:
330 case OP_CRMINSTAR:
331 case OP_CRPLUS:
332 case OP_CRMINPLUS:
333 case OP_CRQUERY:
334 case OP_CRMINQUERY:
335 fprintf(outfile, "%s", OP_names[*code]);
336 break;
337
338 case OP_CRRANGE:
339 case OP_CRMINRANGE:
340 min = (code[1] << 8) + code[2];
341 max = (code[3] << 8) + code[4];
342 if (max == 0) fprintf(outfile, "{%d,}", min);
343 else fprintf(outfile, "{%d,%d}", min, max);
344 if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
345 code += 4;
346 break;
347
348 default:
349 code--;
350 }
351 }
352 break;
353
354 /* Anything else is just a one-node item */
355
356 default:
357 fprintf(outfile, " %s", OP_names[*code]);
358 break;
359 }
360
361 code++;
362 fprintf(outfile, "\n");
363 }
364 }
365
366
367
368 /* Character string printing function. A "normal" and a UTF-8 version. */
369
370 static void pchars(unsigned char *p, int length, int utf8)
371 {
372 int c;
373 while (length-- > 0)
374 {
375 if (utf8)
376 {
377 int rc = utf82ord(p, &c);
378 if (rc > 0)
379 {
380 length -= rc - 1;
381 p += rc;
382 if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
383 else fprintf(outfile, "\\x{%02x}", c);
384 continue;
385 }
386 }
387
388 /* Not UTF-8, or malformed UTF-8 */
389
390 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
391 else fprintf(outfile, "\\x%02x", c);
392 }
393 }
394
395
396
397 /* Alternative malloc function, to test functionality and show the size of the
398 compiled re. */
399
400 static void *new_malloc(size_t size)
401 {
402 gotten_store = size;
403 if (log_store)
404 fprintf(outfile, "Memory allocation (code space): %d\n",
405 (int)((int)size - offsetof(real_pcre, code[0])));
406 return malloc(size);
407 }
408
409
410
411
412 /* Get one piece of information from the pcre_fullinfo() function */
413
414 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
415 {
416 int rc;
417 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
418 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
419 }
420
421
422
423
424 /* Read lines from named file or stdin and write to named file or stdout; lines
425 consist of a regular expression, in delimiters and optionally followed by
426 options, followed by a set of test data, terminated by an empty line. */
427
428 int main(int argc, char **argv)
429 {
430 FILE *infile = stdin;
431 int options = 0;
432 int study_options = 0;
433 int op = 1;
434 int timeit = 0;
435 int showinfo = 0;
436 int showstore = 0;
437 int size_offsets = 45;
438 int size_offsets_max;
439 int *offsets;
440 #if !defined NOPOSIX
441 int posix = 0;
442 #endif
443 int debug = 0;
444 int done = 0;
445 unsigned char buffer[30000];
446 unsigned char dbuffer[1024];
447
448 /* Static so that new_malloc can use it. */
449
450 outfile = stdout;
451
452 /* Scan options */
453
454 while (argc > 1 && argv[op][0] == '-')
455 {
456 char *endptr;
457
458 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459 showstore = 1;
460 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
464 ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
465 {
466 op++;
467 argc--;
468 }
469 #if !defined NOPOSIX
470 else if (strcmp(argv[op], "-p") == 0) posix = 1;
471 #endif
472 else
473 {
474 printf("** Unknown or malformed option %s\n", argv[op]);
475 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
476 printf(" -d debug: show compiled code; implies -i\n"
477 " -i show information about compiled pattern\n"
478 " -o <n> set size of offsets vector to <n>\n");
479 #if !defined NOPOSIX
480 printf(" -p use POSIX interface\n");
481 #endif
482 printf(" -s output store information\n"
483 " -t time compilation and execution\n");
484 return 1;
485 }
486 op++;
487 argc--;
488 }
489
490 /* Get the store for the offsets vector, and remember what it was */
491
492 size_offsets_max = size_offsets;
493 offsets = malloc(size_offsets_max * sizeof(int));
494 if (offsets == NULL)
495 {
496 printf("** Failed to get %d bytes of memory for offsets vector\n",
497 size_offsets_max * sizeof(int));
498 return 1;
499 }
500
501 /* Sort out the input and output files */
502
503 if (argc > 1)
504 {
505 infile = fopen(argv[op], "r");
506 if (infile == NULL)
507 {
508 printf("** Failed to open %s\n", argv[op]);
509 return 1;
510 }
511 }
512
513 if (argc > 2)
514 {
515 outfile = fopen(argv[op+1], "w");
516 if (outfile == NULL)
517 {
518 printf("** Failed to open %s\n", argv[op+1]);
519 return 1;
520 }
521 }
522
523 /* Set alternative malloc function */
524
525 pcre_malloc = new_malloc;
526
527 /* Heading line, then prompt for first regex if stdin */
528
529 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
530
531 /* Main loop */
532
533 while (!done)
534 {
535 pcre *re = NULL;
536 pcre_extra *extra = NULL;
537
538 #if !defined NOPOSIX /* There are still compilers that require no indent */
539 regex_t preg;
540 int do_posix = 0;
541 #endif
542
543 const char *error;
544 unsigned char *p, *pp, *ppp;
545 const unsigned char *tables = NULL;
546 int do_study = 0;
547 int do_debug = debug;
548 int do_G = 0;
549 int do_g = 0;
550 int do_showinfo = showinfo;
551 int do_showrest = 0;
552 int utf8 = 0;
553 int erroroffset, len, delimiter;
554
555 if (infile == stdin) printf(" re> ");
556 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
557 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
558
559 p = buffer;
560 while (isspace(*p)) p++;
561 if (*p == 0) continue;
562
563 /* Get the delimiter and seek the end of the pattern; if is isn't
564 complete, read more. */
565
566 delimiter = *p++;
567
568 if (isalnum(delimiter) || delimiter == '\\')
569 {
570 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
571 goto SKIP_DATA;
572 }
573
574 pp = p;
575
576 for(;;)
577 {
578 while (*pp != 0)
579 {
580 if (*pp == '\\' && pp[1] != 0) pp++;
581 else if (*pp == delimiter) break;
582 pp++;
583 }
584 if (*pp != 0) break;
585
586 len = sizeof(buffer) - (pp - buffer);
587 if (len < 256)
588 {
589 fprintf(outfile, "** Expression too long - missing delimiter?\n");
590 goto SKIP_DATA;
591 }
592
593 if (infile == stdin) printf(" > ");
594 if (fgets((char *)pp, len, infile) == NULL)
595 {
596 fprintf(outfile, "** Unexpected EOF\n");
597 done = 1;
598 goto CONTINUE;
599 }
600 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
601 }
602
603 /* If the first character after the delimiter is backslash, make
604 the pattern end with backslash. This is purely to provide a way
605 of testing for the error message when a pattern ends with backslash. */
606
607 if (pp[1] == '\\') *pp++ = '\\';
608
609 /* Terminate the pattern at the delimiter */
610
611 *pp++ = 0;
612
613 /* Look for options after final delimiter */
614
615 options = 0;
616 study_options = 0;
617 log_store = showstore; /* default from command line */
618
619 while (*pp != 0)
620 {
621 switch (*pp++)
622 {
623 case 'g': do_g = 1; break;
624 case 'i': options |= PCRE_CASELESS; break;
625 case 'm': options |= PCRE_MULTILINE; break;
626 case 's': options |= PCRE_DOTALL; break;
627 case 'x': options |= PCRE_EXTENDED; break;
628
629 case '+': do_showrest = 1; break;
630 case 'A': options |= PCRE_ANCHORED; break;
631 case 'D': do_debug = do_showinfo = 1; break;
632 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
633 case 'G': do_G = 1; break;
634 case 'I': do_showinfo = 1; break;
635 case 'M': log_store = 1; break;
636
637 #if !defined NOPOSIX
638 case 'P': do_posix = 1; break;
639 #endif
640
641 case 'S': do_study = 1; break;
642 case 'U': options |= PCRE_UNGREEDY; break;
643 case 'X': options |= PCRE_EXTRA; break;
644 case '8': options |= PCRE_UTF8; utf8 = 1; break;
645
646 case 'L':
647 ppp = pp;
648 while (*ppp != '\n' && *ppp != ' ') ppp++;
649 *ppp = 0;
650 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
651 {
652 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
653 goto SKIP_DATA;
654 }
655 tables = pcre_maketables();
656 pp = ppp;
657 break;
658
659 case '\n': case ' ': break;
660 default:
661 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
662 goto SKIP_DATA;
663 }
664 }
665
666 /* Handle compiling via the POSIX interface, which doesn't support the
667 timing, showing, or debugging options, nor the ability to pass over
668 local character tables. */
669
670 #if !defined NOPOSIX
671 if (posix || do_posix)
672 {
673 int rc;
674 int cflags = 0;
675 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
676 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
677 rc = regcomp(&preg, (char *)p, cflags);
678
679 /* Compilation failed; go back for another re, skipping to blank line
680 if non-interactive. */
681
682 if (rc != 0)
683 {
684 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
685 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
686 goto SKIP_DATA;
687 }
688 }
689
690 /* Handle compiling via the native interface */
691
692 else
693 #endif /* !defined NOPOSIX */
694
695 {
696 if (timeit)
697 {
698 register int i;
699 clock_t time_taken;
700 clock_t start_time = clock();
701 for (i = 0; i < LOOPREPEAT; i++)
702 {
703 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
704 if (re != NULL) free(re);
705 }
706 time_taken = clock() - start_time;
707 fprintf(outfile, "Compile time %.3f milliseconds\n",
708 ((double)time_taken * 1000.0) /
709 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
710 }
711
712 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
713
714 /* Compilation failed; go back for another re, skipping to blank line
715 if non-interactive. */
716
717 if (re == NULL)
718 {
719 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
720 SKIP_DATA:
721 if (infile != stdin)
722 {
723 for (;;)
724 {
725 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
726 {
727 done = 1;
728 goto CONTINUE;
729 }
730 len = (int)strlen((char *)buffer);
731 while (len > 0 && isspace(buffer[len-1])) len--;
732 if (len == 0) break;
733 }
734 fprintf(outfile, "\n");
735 }
736 goto CONTINUE;
737 }
738
739 /* Compilation succeeded; print data if required. There are now two
740 info-returning functions. The old one has a limited interface and
741 returns only limited data. Check that it agrees with the newer one. */
742
743 if (do_showinfo)
744 {
745 unsigned long int get_options;
746 int old_first_char, old_options, old_count;
747 int count, backrefmax, first_char, need_char;
748 size_t size;
749
750 if (do_debug) print_internals(re);
751
752 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
753 new_info(re, NULL, PCRE_INFO_SIZE, &size);
754 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
755 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
756 new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
757 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
758
759 old_count = pcre_info(re, &old_options, &old_first_char);
760 if (count < 0) fprintf(outfile,
761 "Error %d from pcre_info()\n", count);
762 else
763 {
764 if (old_count != count) fprintf(outfile,
765 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
766 old_count);
767
768 if (old_first_char != first_char) fprintf(outfile,
769 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
770 first_char, old_first_char);
771
772 if (old_options != (int)get_options) fprintf(outfile,
773 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
774 get_options, old_options);
775 }
776
777 if (size != gotten_store) fprintf(outfile,
778 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
779 size, gotten_store);
780
781 fprintf(outfile, "Capturing subpattern count = %d\n", count);
782 if (backrefmax > 0)
783 fprintf(outfile, "Max back reference = %d\n", backrefmax);
784 if (get_options == 0) fprintf(outfile, "No options\n");
785 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
786 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
787 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
788 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
789 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
790 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
791 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
792 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
793 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
794 ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
795
796 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
797 fprintf(outfile, "Case state changes\n");
798
799 if (first_char == -1)
800 {
801 fprintf(outfile, "First char at start or follows \\n\n");
802 }
803 else if (first_char < 0)
804 {
805 fprintf(outfile, "No first char\n");
806 }
807 else
808 {
809 if (isprint(first_char))
810 fprintf(outfile, "First char = \'%c\'\n", first_char);
811 else
812 fprintf(outfile, "First char = %d\n", first_char);
813 }
814
815 if (need_char < 0)
816 {
817 fprintf(outfile, "No need char\n");
818 }
819 else
820 {
821 if (isprint(need_char))
822 fprintf(outfile, "Need char = \'%c\'\n", need_char);
823 else
824 fprintf(outfile, "Need char = %d\n", need_char);
825 }
826 }
827
828 /* If /S was present, study the regexp to generate additional info to
829 help with the matching. */
830
831 if (do_study)
832 {
833 if (timeit)
834 {
835 register int i;
836 clock_t time_taken;
837 clock_t start_time = clock();
838 for (i = 0; i < LOOPREPEAT; i++)
839 extra = pcre_study(re, study_options, &error);
840 time_taken = clock() - start_time;
841 if (extra != NULL) free(extra);
842 fprintf(outfile, " Study time %.3f milliseconds\n",
843 ((double)time_taken * 1000.0)/
844 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
845 }
846
847 extra = pcre_study(re, study_options, &error);
848 if (error != NULL)
849 fprintf(outfile, "Failed to study: %s\n", error);
850 else if (extra == NULL)
851 fprintf(outfile, "Study returned NULL\n");
852
853 else if (do_showinfo)
854 {
855 uschar *start_bits = NULL;
856 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
857 if (start_bits == NULL)
858 fprintf(outfile, "No starting character set\n");
859 else
860 {
861 int i;
862 int c = 24;
863 fprintf(outfile, "Starting character set: ");
864 for (i = 0; i < 256; i++)
865 {
866 if ((start_bits[i/8] & (1<<(i%8))) != 0)
867 {
868 if (c > 75)
869 {
870 fprintf(outfile, "\n ");
871 c = 2;
872 }
873 if (isprint(i) && i != ' ')
874 {
875 fprintf(outfile, "%c ", i);
876 c += 2;
877 }
878 else
879 {
880 fprintf(outfile, "\\x%02x ", i);
881 c += 5;
882 }
883 }
884 }
885 fprintf(outfile, "\n");
886 }
887 }
888 }
889 }
890
891 /* Read data lines and test them */
892
893 for (;;)
894 {
895 unsigned char *q;
896 unsigned char *bptr = dbuffer;
897 int *use_offsets = offsets;
898 int use_size_offsets = size_offsets;
899 int count, c;
900 int copystrings = 0;
901 int getstrings = 0;
902 int getlist = 0;
903 int gmatched = 0;
904 int start_offset = 0;
905 int g_notempty = 0;
906
907 options = 0;
908
909 if (infile == stdin) printf("data> ");
910 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
911 {
912 done = 1;
913 goto CONTINUE;
914 }
915 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
916
917 len = (int)strlen((char *)buffer);
918 while (len > 0 && isspace(buffer[len-1])) len--;
919 buffer[len] = 0;
920 if (len == 0) break;
921
922 p = buffer;
923 while (isspace(*p)) p++;
924
925 q = dbuffer;
926 while ((c = *p++) != 0)
927 {
928 int i = 0;
929 int n = 0;
930 if (c == '\\') switch ((c = *p++))
931 {
932 case 'a': c = 7; break;
933 case 'b': c = '\b'; break;
934 case 'e': c = 27; break;
935 case 'f': c = '\f'; break;
936 case 'n': c = '\n'; break;
937 case 'r': c = '\r'; break;
938 case 't': c = '\t'; break;
939 case 'v': c = '\v'; break;
940
941 case '0': case '1': case '2': case '3':
942 case '4': case '5': case '6': case '7':
943 c -= '0';
944 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
945 c = c * 8 + *p++ - '0';
946 break;
947
948 case 'x':
949
950 /* Handle \x{..} specially - new Perl thing for utf8 */
951
952 if (*p == '{')
953 {
954 unsigned char *pt = p;
955 c = 0;
956 while (isxdigit(*(++pt)))
957 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
958 if (*pt == '}')
959 {
960 unsigned char buffer[8];
961 int ii, utn;
962 utn = ord2utf8(c, buffer);
963 for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
964 c = buffer[ii]; /* Last byte */
965 p = pt + 1;
966 break;
967 }
968 /* Not correct form; fall through */
969 }
970
971 /* Ordinary \x */
972
973 c = 0;
974 while (i++ < 2 && isxdigit(*p))
975 {
976 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
977 p++;
978 }
979 break;
980
981 case 0: /* Allows for an empty line */
982 p--;
983 continue;
984
985 case 'A': /* Option setting */
986 options |= PCRE_ANCHORED;
987 continue;
988
989 case 'B':
990 options |= PCRE_NOTBOL;
991 continue;
992
993 case 'C':
994 while(isdigit(*p)) n = n * 10 + *p++ - '0';
995 copystrings |= 1 << n;
996 continue;
997
998 case 'G':
999 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1000 getstrings |= 1 << n;
1001 continue;
1002
1003 case 'L':
1004 getlist = 1;
1005 continue;
1006
1007 case 'N':
1008 options |= PCRE_NOTEMPTY;
1009 continue;
1010
1011 case 'O':
1012 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013 if (n > size_offsets_max)
1014 {
1015 size_offsets_max = n;
1016 free(offsets);
1017 use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1018 if (offsets == NULL)
1019 {
1020 printf("** Failed to get %d bytes of memory for offsets vector\n",
1021 size_offsets_max * sizeof(int));
1022 return 1;
1023 }
1024 }
1025 use_size_offsets = n;
1026 if (n == 0) use_offsets = NULL;
1027 continue;
1028
1029 case 'Z':
1030 options |= PCRE_NOTEOL;
1031 continue;
1032 }
1033 *q++ = c;
1034 }
1035 *q = 0;
1036 len = q - dbuffer;
1037
1038 /* Handle matching via the POSIX interface, which does not
1039 support timing. */
1040
1041 #if !defined NOPOSIX
1042 if (posix || do_posix)
1043 {
1044 int rc;
1045 int eflags = 0;
1046 regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1047 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1048 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1049
1050 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1051
1052 if (rc != 0)
1053 {
1054 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1055 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1056 }
1057 else
1058 {
1059 size_t i;
1060 for (i = 0; i < use_size_offsets; i++)
1061 {
1062 if (pmatch[i].rm_so >= 0)
1063 {
1064 fprintf(outfile, "%2d: ", (int)i);
1065 pchars(dbuffer + pmatch[i].rm_so,
1066 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1067 fprintf(outfile, "\n");
1068 if (i == 0 && do_showrest)
1069 {
1070 fprintf(outfile, " 0+ ");
1071 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1072 fprintf(outfile, "\n");
1073 }
1074 }
1075 }
1076 }
1077 free(pmatch);
1078 }
1079
1080 /* Handle matching via the native interface - repeats for /g and /G */
1081
1082 else
1083 #endif /* !defined NOPOSIX */
1084
1085 for (;; gmatched++) /* Loop for /g or /G */
1086 {
1087 if (timeit)
1088 {
1089 register int i;
1090 clock_t time_taken;
1091 clock_t start_time = clock();
1092 for (i = 0; i < LOOPREPEAT; i++)
1093 count = pcre_exec(re, extra, (char *)bptr, len,
1094 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1095 time_taken = clock() - start_time;
1096 fprintf(outfile, "Execute time %.3f milliseconds\n",
1097 ((double)time_taken * 1000.0)/
1098 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1099 }
1100
1101 count = pcre_exec(re, extra, (char *)bptr, len,
1102 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1103
1104 if (count == 0)
1105 {
1106 fprintf(outfile, "Matched, but too many substrings\n");
1107 count = use_size_offsets/3;
1108 }
1109
1110 /* Matched */
1111
1112 if (count >= 0)
1113 {
1114 int i;
1115 for (i = 0; i < count * 2; i += 2)
1116 {
1117 if (use_offsets[i] < 0)
1118 fprintf(outfile, "%2d: <unset>\n", i/2);
1119 else
1120 {
1121 fprintf(outfile, "%2d: ", i/2);
1122 pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1123 fprintf(outfile, "\n");
1124 if (i == 0)
1125 {
1126 if (do_showrest)
1127 {
1128 fprintf(outfile, " 0+ ");
1129 pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1130 fprintf(outfile, "\n");
1131 }
1132 }
1133 }
1134 }
1135
1136 for (i = 0; i < 32; i++)
1137 {
1138 if ((copystrings & (1 << i)) != 0)
1139 {
1140 char copybuffer[16];
1141 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1142 i, copybuffer, sizeof(copybuffer));
1143 if (rc < 0)
1144 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1145 else
1146 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1147 }
1148 }
1149
1150 for (i = 0; i < 32; i++)
1151 {
1152 if ((getstrings & (1 << i)) != 0)
1153 {
1154 const char *substring;
1155 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1156 i, &substring);
1157 if (rc < 0)
1158 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1159 else
1160 {
1161 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1162 /* free((void *)substring); */
1163 pcre_free_substring(substring);
1164 }
1165 }
1166 }
1167
1168 if (getlist)
1169 {
1170 const char **stringlist;
1171 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1172 &stringlist);
1173 if (rc < 0)
1174 fprintf(outfile, "get substring list failed %d\n", rc);
1175 else
1176 {
1177 for (i = 0; i < count; i++)
1178 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1179 if (stringlist[i] != NULL)
1180 fprintf(outfile, "string list not terminated by NULL\n");
1181 /* free((void *)stringlist); */
1182 pcre_free_substring_list(stringlist);
1183 }
1184 }
1185 }
1186
1187 /* Failed to match. If this is a /g or /G loop and we previously set
1188 g_notempty after a null match, this is not necessarily the end.
1189 We want to advance the start offset, and continue. Fudge the offset
1190 values to achieve this. We won't be at the end of the string - that
1191 was checked before setting g_notempty. */
1192
1193 else
1194 {
1195 if (g_notempty != 0)
1196 {
1197 use_offsets[0] = start_offset;
1198 use_offsets[1] = start_offset + 1;
1199 }
1200 else
1201 {
1202 if (gmatched == 0) /* Error if no previous matches */
1203 {
1204 if (count == -1) fprintf(outfile, "No match\n");
1205 else fprintf(outfile, "Error %d\n", count);
1206 }
1207 break; /* Out of the /g loop */
1208 }
1209 }
1210
1211 /* If not /g or /G we are done */
1212
1213 if (!do_g && !do_G) break;
1214
1215 /* If we have matched an empty string, first check to see if we are at
1216 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1217 what Perl's /g options does. This turns out to be rather cunning. First
1218 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1219 same point. If this fails (picked up above) we advance to the next
1220 character. */
1221
1222 g_notempty = 0;
1223 if (use_offsets[0] == use_offsets[1])
1224 {
1225 if (use_offsets[0] == len) break;
1226 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1227 }
1228
1229 /* For /g, update the start offset, leaving the rest alone */
1230
1231 if (do_g) start_offset = use_offsets[1];
1232
1233 /* For /G, update the pointer and length */
1234
1235 else
1236 {
1237 bptr += use_offsets[1];
1238 len -= use_offsets[1];
1239 }
1240 } /* End of loop for /g and /G */
1241 } /* End of loop for data lines */
1242
1243 CONTINUE:
1244
1245 #if !defined NOPOSIX
1246 if (posix || do_posix) regfree(&preg);
1247 #endif
1248
1249 if (re != NULL) free(re);
1250 if (extra != NULL) free(extra);
1251 if (tables != NULL)
1252 {
1253 free((void *)tables);
1254 setlocale(LC_CTYPE, "C");
1255 }
1256 }
1257
1258 fprintf(outfile, "\n");
1259 return 0;
1260 }
1261
1262 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12