/[pcre]/code/tags/pcre-3.7/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-3.7/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 58 - (show annotations) (download)
Sat Feb 24 21:39:52 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 33804 byte(s)
Tag code/trunk as code/tags/pcre-3.7.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <time.h>
10 #include <locale.h>
11
12 /* Use the internal info for displaying the results of pcre_study(). */
13
14 #include "internal.h"
15
16 /* It is possible to compile this test program without including support for
17 testing the POSIX interface, though this is not available via the standard
18 Makefile. */
19
20 #if !defined NOPOSIX
21 #include "pcreposix.h"
22 #endif
23
24 #ifndef CLOCKS_PER_SEC
25 #ifdef CLK_TCK
26 #define CLOCKS_PER_SEC CLK_TCK
27 #else
28 #define CLOCKS_PER_SEC 100
29 #endif
30 #endif
31
32 #define LOOPREPEAT 20000
33
34
35 static FILE *outfile;
36 static int log_store = 0;
37 static size_t gotten_store;
38
39
40
41 static int utf8_table1[] = {
42 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43
44 static int utf8_table2[] = {
45 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46
47 static int utf8_table3[] = {
48 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49
50
51 /*************************************************
52 * Convert character value to UTF-8 *
53 *************************************************/
54
55 /* This function takes an integer value in the range 0 - 0x7fffffff
56 and encodes it as a UTF-8 character in 0 to 6 bytes.
57
58 Arguments:
59 cvalue the character value
60 buffer pointer to buffer for result - at least 6 bytes long
61
62 Returns: number of characters placed in the buffer
63 -1 if input character is negative
64 0 if input character is positive but too big (only when
65 int is longer than 32 bits)
66 */
67
68 static int
69 ord2utf8(int cvalue, unsigned char *buffer)
70 {
71 register int i, j;
72 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73 if (cvalue <= utf8_table1[i]) break;
74 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75 if (cvalue < 0) return -1;
76 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77 cvalue >>= 6 - i;
78 for (j = 0; j < i; j++)
79 {
80 *buffer++ = 0x80 | (cvalue & 0x3f);
81 cvalue >>= 6;
82 }
83 return i + 1;
84 }
85
86
87 /*************************************************
88 * Convert UTF-8 string to value *
89 *************************************************/
90
91 /* This function takes one or more bytes that represents a UTF-8 character,
92 and returns the value of the character.
93
94 Argument:
95 buffer a pointer to the byte vector
96 vptr a pointer to an int to receive the value
97
98 Returns: > 0 => the number of bytes consumed
99 -6 to 0 => malformed UTF-8 character at offset = (-return)
100 */
101
102 int
103 utf82ord(unsigned char *buffer, int *vptr)
104 {
105 int c = *buffer++;
106 int d = c;
107 int i, j, s;
108
109 for (i = -1; i < 6; i++) /* i is number of additional bytes */
110 {
111 if ((d & 0x80) == 0) break;
112 d <<= 1;
113 }
114
115 if (i == -1) { *vptr = c; return 1; } /* ascii character */
116 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
117
118 /* i now has a value in the range 1-5 */
119
120 d = c & utf8_table3[i];
121 s = 6 - i;
122
123 for (j = 0; j < i; j++)
124 {
125 c = *buffer++;
126 if ((c & 0xc0) != 0x80) return -(j+1);
127 d |= (c & 0x3f) << s;
128 s += 6;
129 }
130
131 /* Check that encoding was the correct unique one */
132
133 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134 if (d <= utf8_table1[j]) break;
135 if (j != i) return -(i+1);
136
137 /* Valid value */
138
139 *vptr = d;
140 return i+1;
141 }
142
143
144
145
146
147
148 /* Debugging function to print the internal form of the regex. This is the same
149 code as contained in pcre.c under the DEBUG macro. */
150
151 static const char *OP_names[] = {
152 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154 "Opt", "^", "$", "Any", "chars", "not",
155 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158 "*", "*?", "+", "+?", "?", "??", "{", "{",
159 "class", "Ref", "Recurse",
160 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162 "Brazero", "Braminzero", "Branumber", "Bra"
163 };
164
165
166 static void print_internals(pcre *re)
167 {
168 unsigned char *code = ((real_pcre *)re)->code;
169
170 fprintf(outfile, "------------------------------------------------------------------\n");
171
172 for(;;)
173 {
174 int c;
175 int charlength;
176
177 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
178
179 if (*code >= OP_BRA)
180 {
181 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
182 fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
183 else
184 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
185 code += 2;
186 }
187
188 else switch(*code)
189 {
190 case OP_END:
191 fprintf(outfile, " %s\n", OP_names[*code]);
192 fprintf(outfile, "------------------------------------------------------------------\n");
193 return;
194
195 case OP_OPT:
196 fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
197 code++;
198 break;
199
200 case OP_CHARS:
201 charlength = *(++code);
202 fprintf(outfile, "%3d ", charlength);
203 while (charlength-- > 0)
204 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
205 else fprintf(outfile, "\\x%02x", c);
206 break;
207
208 case OP_KETRMAX:
209 case OP_KETRMIN:
210 case OP_ALT:
211 case OP_KET:
212 case OP_ASSERT:
213 case OP_ASSERT_NOT:
214 case OP_ASSERTBACK:
215 case OP_ASSERTBACK_NOT:
216 case OP_ONCE:
217 case OP_COND:
218 case OP_BRANUMBER:
219 case OP_REVERSE:
220 case OP_CREF:
221 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
222 code += 2;
223 break;
224
225 case OP_STAR:
226 case OP_MINSTAR:
227 case OP_PLUS:
228 case OP_MINPLUS:
229 case OP_QUERY:
230 case OP_MINQUERY:
231 case OP_TYPESTAR:
232 case OP_TYPEMINSTAR:
233 case OP_TYPEPLUS:
234 case OP_TYPEMINPLUS:
235 case OP_TYPEQUERY:
236 case OP_TYPEMINQUERY:
237 if (*code >= OP_TYPESTAR)
238 fprintf(outfile, " %s", OP_names[code[1]]);
239 else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
240 else fprintf(outfile, " \\x%02x", c);
241 fprintf(outfile, "%s", OP_names[*code++]);
242 break;
243
244 case OP_EXACT:
245 case OP_UPTO:
246 case OP_MINUPTO:
247 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
248 else fprintf(outfile, " \\x%02x{", c);
249 if (*code != OP_EXACT) fprintf(outfile, ",");
250 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
251 if (*code == OP_MINUPTO) fprintf(outfile, "?");
252 code += 3;
253 break;
254
255 case OP_TYPEEXACT:
256 case OP_TYPEUPTO:
257 case OP_TYPEMINUPTO:
258 fprintf(outfile, " %s{", OP_names[code[3]]);
259 if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
260 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
261 if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
262 code += 3;
263 break;
264
265 case OP_NOT:
266 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
267 else fprintf(outfile, " [^\\x%02x]", c);
268 break;
269
270 case OP_NOTSTAR:
271 case OP_NOTMINSTAR:
272 case OP_NOTPLUS:
273 case OP_NOTMINPLUS:
274 case OP_NOTQUERY:
275 case OP_NOTMINQUERY:
276 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
277 else fprintf(outfile, " [^\\x%02x]", c);
278 fprintf(outfile, "%s", OP_names[*code++]);
279 break;
280
281 case OP_NOTEXACT:
282 case OP_NOTUPTO:
283 case OP_NOTMINUPTO:
284 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
285 else fprintf(outfile, " [^\\x%02x]{", c);
286 if (*code != OP_NOTEXACT) fprintf(outfile, ",");
287 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
288 if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
289 code += 3;
290 break;
291
292 case OP_REF:
293 fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
294 code += 3;
295 goto CLASS_REF_REPEAT;
296
297 case OP_CLASS:
298 {
299 int i, min, max;
300 code++;
301 fprintf(outfile, " [");
302
303 for (i = 0; i < 256; i++)
304 {
305 if ((code[i/8] & (1 << (i&7))) != 0)
306 {
307 int j;
308 for (j = i+1; j < 256; j++)
309 if ((code[j/8] & (1 << (j&7))) == 0) break;
310 if (i == '-' || i == ']') fprintf(outfile, "\\");
311 if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
312 if (--j > i)
313 {
314 fprintf(outfile, "-");
315 if (j == '-' || j == ']') fprintf(outfile, "\\");
316 if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
317 }
318 i = j;
319 }
320 }
321 fprintf(outfile, "]");
322 code += 32;
323
324 CLASS_REF_REPEAT:
325
326 switch(*code)
327 {
328 case OP_CRSTAR:
329 case OP_CRMINSTAR:
330 case OP_CRPLUS:
331 case OP_CRMINPLUS:
332 case OP_CRQUERY:
333 case OP_CRMINQUERY:
334 fprintf(outfile, "%s", OP_names[*code]);
335 break;
336
337 case OP_CRRANGE:
338 case OP_CRMINRANGE:
339 min = (code[1] << 8) + code[2];
340 max = (code[3] << 8) + code[4];
341 if (max == 0) fprintf(outfile, "{%d,}", min);
342 else fprintf(outfile, "{%d,%d}", min, max);
343 if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
344 code += 4;
345 break;
346
347 default:
348 code--;
349 }
350 }
351 break;
352
353 /* Anything else is just a one-node item */
354
355 default:
356 fprintf(outfile, " %s", OP_names[*code]);
357 break;
358 }
359
360 code++;
361 fprintf(outfile, "\n");
362 }
363 }
364
365
366
367 /* Character string printing function. A "normal" and a UTF-8 version. */
368
369 static void pchars(unsigned char *p, int length, int utf8)
370 {
371 int c;
372 while (length-- > 0)
373 {
374 if (utf8)
375 {
376 int rc = utf82ord(p, &c);
377 if (rc > 0)
378 {
379 length -= rc - 1;
380 p += rc;
381 if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
382 else fprintf(outfile, "\\x{%02x}", c);
383 continue;
384 }
385 }
386
387 /* Not UTF-8, or malformed UTF-8 */
388
389 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
390 else fprintf(outfile, "\\x%02x", c);
391 }
392 }
393
394
395
396 /* Alternative malloc function, to test functionality and show the size of the
397 compiled re. */
398
399 static void *new_malloc(size_t size)
400 {
401 gotten_store = size;
402 if (log_store)
403 fprintf(outfile, "Memory allocation (code space): %d\n",
404 (int)((int)size - offsetof(real_pcre, code[0])));
405 return malloc(size);
406 }
407
408
409
410
411 /* Get one piece of information from the pcre_fullinfo() function */
412
413 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
414 {
415 int rc;
416 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
417 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
418 }
419
420
421
422
423 /* Read lines from named file or stdin and write to named file or stdout; lines
424 consist of a regular expression, in delimiters and optionally followed by
425 options, followed by a set of test data, terminated by an empty line. */
426
427 int main(int argc, char **argv)
428 {
429 FILE *infile = stdin;
430 int options = 0;
431 int study_options = 0;
432 int op = 1;
433 int timeit = 0;
434 int showinfo = 0;
435 int showstore = 0;
436 int size_offsets = 45;
437 int size_offsets_max;
438 int *offsets;
439 #if !defined NOPOSIX
440 int posix = 0;
441 #endif
442 int debug = 0;
443 int done = 0;
444 unsigned char buffer[30000];
445 unsigned char dbuffer[1024];
446
447 /* Static so that new_malloc can use it. */
448
449 outfile = stdout;
450
451 /* Scan options */
452
453 while (argc > 1 && argv[op][0] == '-')
454 {
455 char *endptr;
456
457 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
458 showstore = 1;
459 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
460 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
461 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
462 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
463 ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
464 {
465 op++;
466 argc--;
467 }
468 #if !defined NOPOSIX
469 else if (strcmp(argv[op], "-p") == 0) posix = 1;
470 #endif
471 else
472 {
473 printf("** Unknown or malformed option %s\n", argv[op]);
474 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
475 printf(" -d debug: show compiled code; implies -i\n"
476 " -i show information about compiled pattern\n"
477 " -o <n> set size of offsets vector to <n>\n");
478 #if !defined NOPOSIX
479 printf(" -p use POSIX interface\n");
480 #endif
481 printf(" -s output store information\n"
482 " -t time compilation and execution\n");
483 return 1;
484 }
485 op++;
486 argc--;
487 }
488
489 /* Get the store for the offsets vector, and remember what it was */
490
491 size_offsets_max = size_offsets;
492 offsets = malloc(size_offsets_max * sizeof(int));
493 if (offsets == NULL)
494 {
495 printf("** Failed to get %d bytes of memory for offsets vector\n",
496 size_offsets_max * sizeof(int));
497 return 1;
498 }
499
500 /* Sort out the input and output files */
501
502 if (argc > 1)
503 {
504 infile = fopen(argv[op], "r");
505 if (infile == NULL)
506 {
507 printf("** Failed to open %s\n", argv[op]);
508 return 1;
509 }
510 }
511
512 if (argc > 2)
513 {
514 outfile = fopen(argv[op+1], "w");
515 if (outfile == NULL)
516 {
517 printf("** Failed to open %s\n", argv[op+1]);
518 return 1;
519 }
520 }
521
522 /* Set alternative malloc function */
523
524 pcre_malloc = new_malloc;
525
526 /* Heading line, then prompt for first regex if stdin */
527
528 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
529
530 /* Main loop */
531
532 while (!done)
533 {
534 pcre *re = NULL;
535 pcre_extra *extra = NULL;
536
537 #if !defined NOPOSIX /* There are still compilers that require no indent */
538 regex_t preg;
539 int do_posix = 0;
540 #endif
541
542 const char *error;
543 unsigned char *p, *pp, *ppp;
544 const unsigned char *tables = NULL;
545 int do_study = 0;
546 int do_debug = debug;
547 int do_G = 0;
548 int do_g = 0;
549 int do_showinfo = showinfo;
550 int do_showrest = 0;
551 int utf8 = 0;
552 int erroroffset, len, delimiter;
553
554 if (infile == stdin) printf(" re> ");
555 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
556 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
557
558 p = buffer;
559 while (isspace(*p)) p++;
560 if (*p == 0) continue;
561
562 /* Get the delimiter and seek the end of the pattern; if is isn't
563 complete, read more. */
564
565 delimiter = *p++;
566
567 if (isalnum(delimiter) || delimiter == '\\')
568 {
569 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
570 goto SKIP_DATA;
571 }
572
573 pp = p;
574
575 for(;;)
576 {
577 while (*pp != 0)
578 {
579 if (*pp == '\\' && pp[1] != 0) pp++;
580 else if (*pp == delimiter) break;
581 pp++;
582 }
583 if (*pp != 0) break;
584
585 len = sizeof(buffer) - (pp - buffer);
586 if (len < 256)
587 {
588 fprintf(outfile, "** Expression too long - missing delimiter?\n");
589 goto SKIP_DATA;
590 }
591
592 if (infile == stdin) printf(" > ");
593 if (fgets((char *)pp, len, infile) == NULL)
594 {
595 fprintf(outfile, "** Unexpected EOF\n");
596 done = 1;
597 goto CONTINUE;
598 }
599 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
600 }
601
602 /* If the first character after the delimiter is backslash, make
603 the pattern end with backslash. This is purely to provide a way
604 of testing for the error message when a pattern ends with backslash. */
605
606 if (pp[1] == '\\') *pp++ = '\\';
607
608 /* Terminate the pattern at the delimiter */
609
610 *pp++ = 0;
611
612 /* Look for options after final delimiter */
613
614 options = 0;
615 study_options = 0;
616 log_store = showstore; /* default from command line */
617
618 while (*pp != 0)
619 {
620 switch (*pp++)
621 {
622 case 'g': do_g = 1; break;
623 case 'i': options |= PCRE_CASELESS; break;
624 case 'm': options |= PCRE_MULTILINE; break;
625 case 's': options |= PCRE_DOTALL; break;
626 case 'x': options |= PCRE_EXTENDED; break;
627
628 case '+': do_showrest = 1; break;
629 case 'A': options |= PCRE_ANCHORED; break;
630 case 'D': do_debug = do_showinfo = 1; break;
631 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
632 case 'G': do_G = 1; break;
633 case 'I': do_showinfo = 1; break;
634 case 'M': log_store = 1; break;
635
636 #if !defined NOPOSIX
637 case 'P': do_posix = 1; break;
638 #endif
639
640 case 'S': do_study = 1; break;
641 case 'U': options |= PCRE_UNGREEDY; break;
642 case 'X': options |= PCRE_EXTRA; break;
643 case '8': options |= PCRE_UTF8; utf8 = 1; break;
644
645 case 'L':
646 ppp = pp;
647 while (*ppp != '\n' && *ppp != ' ') ppp++;
648 *ppp = 0;
649 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
650 {
651 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
652 goto SKIP_DATA;
653 }
654 tables = pcre_maketables();
655 pp = ppp;
656 break;
657
658 case '\n': case ' ': break;
659 default:
660 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
661 goto SKIP_DATA;
662 }
663 }
664
665 /* Handle compiling via the POSIX interface, which doesn't support the
666 timing, showing, or debugging options, nor the ability to pass over
667 local character tables. */
668
669 #if !defined NOPOSIX
670 if (posix || do_posix)
671 {
672 int rc;
673 int cflags = 0;
674 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
675 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
676 rc = regcomp(&preg, (char *)p, cflags);
677
678 /* Compilation failed; go back for another re, skipping to blank line
679 if non-interactive. */
680
681 if (rc != 0)
682 {
683 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
684 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
685 goto SKIP_DATA;
686 }
687 }
688
689 /* Handle compiling via the native interface */
690
691 else
692 #endif /* !defined NOPOSIX */
693
694 {
695 if (timeit)
696 {
697 register int i;
698 clock_t time_taken;
699 clock_t start_time = clock();
700 for (i = 0; i < LOOPREPEAT; i++)
701 {
702 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
703 if (re != NULL) free(re);
704 }
705 time_taken = clock() - start_time;
706 fprintf(outfile, "Compile time %.3f milliseconds\n",
707 ((double)time_taken * 1000.0) /
708 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
709 }
710
711 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
712
713 /* Compilation failed; go back for another re, skipping to blank line
714 if non-interactive. */
715
716 if (re == NULL)
717 {
718 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
719 SKIP_DATA:
720 if (infile != stdin)
721 {
722 for (;;)
723 {
724 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
725 {
726 done = 1;
727 goto CONTINUE;
728 }
729 len = (int)strlen((char *)buffer);
730 while (len > 0 && isspace(buffer[len-1])) len--;
731 if (len == 0) break;
732 }
733 fprintf(outfile, "\n");
734 }
735 goto CONTINUE;
736 }
737
738 /* Compilation succeeded; print data if required. There are now two
739 info-returning functions. The old one has a limited interface and
740 returns only limited data. Check that it agrees with the newer one. */
741
742 if (do_showinfo)
743 {
744 unsigned long int get_options;
745 int old_first_char, old_options, old_count;
746 int count, backrefmax, first_char, need_char;
747 size_t size;
748
749 if (do_debug) print_internals(re);
750
751 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
752 new_info(re, NULL, PCRE_INFO_SIZE, &size);
753 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
754 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
755 new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
756 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
757
758 old_count = pcre_info(re, &old_options, &old_first_char);
759 if (count < 0) fprintf(outfile,
760 "Error %d from pcre_info()\n", count);
761 else
762 {
763 if (old_count != count) fprintf(outfile,
764 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
765 old_count);
766
767 if (old_first_char != first_char) fprintf(outfile,
768 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
769 first_char, old_first_char);
770
771 if (old_options != (int)get_options) fprintf(outfile,
772 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
773 get_options, old_options);
774 }
775
776 if (size != gotten_store) fprintf(outfile,
777 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
778 size, gotten_store);
779
780 fprintf(outfile, "Capturing subpattern count = %d\n", count);
781 if (backrefmax > 0)
782 fprintf(outfile, "Max back reference = %d\n", backrefmax);
783 if (get_options == 0) fprintf(outfile, "No options\n");
784 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
785 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
786 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
787 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
788 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
789 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
790 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
791 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
792 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
793 ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
794
795 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
796 fprintf(outfile, "Case state changes\n");
797
798 if (first_char == -1)
799 {
800 fprintf(outfile, "First char at start or follows \\n\n");
801 }
802 else if (first_char < 0)
803 {
804 fprintf(outfile, "No first char\n");
805 }
806 else
807 {
808 if (isprint(first_char))
809 fprintf(outfile, "First char = \'%c\'\n", first_char);
810 else
811 fprintf(outfile, "First char = %d\n", first_char);
812 }
813
814 if (need_char < 0)
815 {
816 fprintf(outfile, "No need char\n");
817 }
818 else
819 {
820 if (isprint(need_char))
821 fprintf(outfile, "Need char = \'%c\'\n", need_char);
822 else
823 fprintf(outfile, "Need char = %d\n", need_char);
824 }
825 }
826
827 /* If /S was present, study the regexp to generate additional info to
828 help with the matching. */
829
830 if (do_study)
831 {
832 if (timeit)
833 {
834 register int i;
835 clock_t time_taken;
836 clock_t start_time = clock();
837 for (i = 0; i < LOOPREPEAT; i++)
838 extra = pcre_study(re, study_options, &error);
839 time_taken = clock() - start_time;
840 if (extra != NULL) free(extra);
841 fprintf(outfile, " Study time %.3f milliseconds\n",
842 ((double)time_taken * 1000.0)/
843 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
844 }
845
846 extra = pcre_study(re, study_options, &error);
847 if (error != NULL)
848 fprintf(outfile, "Failed to study: %s\n", error);
849 else if (extra == NULL)
850 fprintf(outfile, "Study returned NULL\n");
851
852 else if (do_showinfo)
853 {
854 uschar *start_bits = NULL;
855 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
856 if (start_bits == NULL)
857 fprintf(outfile, "No starting character set\n");
858 else
859 {
860 int i;
861 int c = 24;
862 fprintf(outfile, "Starting character set: ");
863 for (i = 0; i < 256; i++)
864 {
865 if ((start_bits[i/8] & (1<<(i%8))) != 0)
866 {
867 if (c > 75)
868 {
869 fprintf(outfile, "\n ");
870 c = 2;
871 }
872 if (isprint(i) && i != ' ')
873 {
874 fprintf(outfile, "%c ", i);
875 c += 2;
876 }
877 else
878 {
879 fprintf(outfile, "\\x%02x ", i);
880 c += 5;
881 }
882 }
883 }
884 fprintf(outfile, "\n");
885 }
886 }
887 }
888 }
889
890 /* Read data lines and test them */
891
892 for (;;)
893 {
894 unsigned char *q;
895 unsigned char *bptr = dbuffer;
896 int *use_offsets = offsets;
897 int use_size_offsets = size_offsets;
898 int count, c;
899 int copystrings = 0;
900 int getstrings = 0;
901 int getlist = 0;
902 int gmatched = 0;
903 int start_offset = 0;
904 int g_notempty = 0;
905
906 options = 0;
907
908 if (infile == stdin) printf("data> ");
909 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
910 {
911 done = 1;
912 goto CONTINUE;
913 }
914 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
915
916 len = (int)strlen((char *)buffer);
917 while (len > 0 && isspace(buffer[len-1])) len--;
918 buffer[len] = 0;
919 if (len == 0) break;
920
921 p = buffer;
922 while (isspace(*p)) p++;
923
924 q = dbuffer;
925 while ((c = *p++) != 0)
926 {
927 int i = 0;
928 int n = 0;
929 if (c == '\\') switch ((c = *p++))
930 {
931 case 'a': c = 7; break;
932 case 'b': c = '\b'; break;
933 case 'e': c = 27; break;
934 case 'f': c = '\f'; break;
935 case 'n': c = '\n'; break;
936 case 'r': c = '\r'; break;
937 case 't': c = '\t'; break;
938 case 'v': c = '\v'; break;
939
940 case '0': case '1': case '2': case '3':
941 case '4': case '5': case '6': case '7':
942 c -= '0';
943 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
944 c = c * 8 + *p++ - '0';
945 break;
946
947 case 'x':
948
949 /* Handle \x{..} specially - new Perl thing for utf8 */
950
951 if (*p == '{')
952 {
953 unsigned char *pt = p;
954 c = 0;
955 while (isxdigit(*(++pt)))
956 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
957 if (*pt == '}')
958 {
959 unsigned char buffer[8];
960 int ii, utn;
961 utn = ord2utf8(c, buffer);
962 for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
963 c = buffer[ii]; /* Last byte */
964 p = pt + 1;
965 break;
966 }
967 /* Not correct form; fall through */
968 }
969
970 /* Ordinary \x */
971
972 c = 0;
973 while (i++ < 2 && isxdigit(*p))
974 {
975 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
976 p++;
977 }
978 break;
979
980 case 0: /* Allows for an empty line */
981 p--;
982 continue;
983
984 case 'A': /* Option setting */
985 options |= PCRE_ANCHORED;
986 continue;
987
988 case 'B':
989 options |= PCRE_NOTBOL;
990 continue;
991
992 case 'C':
993 while(isdigit(*p)) n = n * 10 + *p++ - '0';
994 copystrings |= 1 << n;
995 continue;
996
997 case 'G':
998 while(isdigit(*p)) n = n * 10 + *p++ - '0';
999 getstrings |= 1 << n;
1000 continue;
1001
1002 case 'L':
1003 getlist = 1;
1004 continue;
1005
1006 case 'N':
1007 options |= PCRE_NOTEMPTY;
1008 continue;
1009
1010 case 'O':
1011 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1012 if (n > size_offsets_max)
1013 {
1014 size_offsets_max = n;
1015 free(offsets);
1016 use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1017 if (offsets == NULL)
1018 {
1019 printf("** Failed to get %d bytes of memory for offsets vector\n",
1020 size_offsets_max * sizeof(int));
1021 return 1;
1022 }
1023 }
1024 use_size_offsets = n;
1025 if (n == 0) use_offsets = NULL;
1026 continue;
1027
1028 case 'Z':
1029 options |= PCRE_NOTEOL;
1030 continue;
1031 }
1032 *q++ = c;
1033 }
1034 *q = 0;
1035 len = q - dbuffer;
1036
1037 /* Handle matching via the POSIX interface, which does not
1038 support timing. */
1039
1040 #if !defined NOPOSIX
1041 if (posix || do_posix)
1042 {
1043 int rc;
1044 int eflags = 0;
1045 regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1046 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1047 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1048
1049 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1050
1051 if (rc != 0)
1052 {
1053 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1054 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1055 }
1056 else
1057 {
1058 size_t i;
1059 for (i = 0; i < use_size_offsets; i++)
1060 {
1061 if (pmatch[i].rm_so >= 0)
1062 {
1063 fprintf(outfile, "%2d: ", (int)i);
1064 pchars(dbuffer + pmatch[i].rm_so,
1065 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1066 fprintf(outfile, "\n");
1067 if (i == 0 && do_showrest)
1068 {
1069 fprintf(outfile, " 0+ ");
1070 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1071 fprintf(outfile, "\n");
1072 }
1073 }
1074 }
1075 }
1076 free(pmatch);
1077 }
1078
1079 /* Handle matching via the native interface - repeats for /g and /G */
1080
1081 else
1082 #endif /* !defined NOPOSIX */
1083
1084 for (;; gmatched++) /* Loop for /g or /G */
1085 {
1086 if (timeit)
1087 {
1088 register int i;
1089 clock_t time_taken;
1090 clock_t start_time = clock();
1091 for (i = 0; i < LOOPREPEAT; i++)
1092 count = pcre_exec(re, extra, (char *)bptr, len,
1093 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1094 time_taken = clock() - start_time;
1095 fprintf(outfile, "Execute time %.3f milliseconds\n",
1096 ((double)time_taken * 1000.0)/
1097 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1098 }
1099
1100 count = pcre_exec(re, extra, (char *)bptr, len,
1101 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1102
1103 if (count == 0)
1104 {
1105 fprintf(outfile, "Matched, but too many substrings\n");
1106 count = use_size_offsets/3;
1107 }
1108
1109 /* Matched */
1110
1111 if (count >= 0)
1112 {
1113 int i;
1114 for (i = 0; i < count * 2; i += 2)
1115 {
1116 if (use_offsets[i] < 0)
1117 fprintf(outfile, "%2d: <unset>\n", i/2);
1118 else
1119 {
1120 fprintf(outfile, "%2d: ", i/2);
1121 pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1122 fprintf(outfile, "\n");
1123 if (i == 0)
1124 {
1125 if (do_showrest)
1126 {
1127 fprintf(outfile, " 0+ ");
1128 pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1129 fprintf(outfile, "\n");
1130 }
1131 }
1132 }
1133 }
1134
1135 for (i = 0; i < 32; i++)
1136 {
1137 if ((copystrings & (1 << i)) != 0)
1138 {
1139 char copybuffer[16];
1140 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1141 i, copybuffer, sizeof(copybuffer));
1142 if (rc < 0)
1143 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1144 else
1145 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1146 }
1147 }
1148
1149 for (i = 0; i < 32; i++)
1150 {
1151 if ((getstrings & (1 << i)) != 0)
1152 {
1153 const char *substring;
1154 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1155 i, &substring);
1156 if (rc < 0)
1157 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1158 else
1159 {
1160 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1161 /* free((void *)substring); */
1162 pcre_free_substring(substring);
1163 }
1164 }
1165 }
1166
1167 if (getlist)
1168 {
1169 const char **stringlist;
1170 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1171 &stringlist);
1172 if (rc < 0)
1173 fprintf(outfile, "get substring list failed %d\n", rc);
1174 else
1175 {
1176 for (i = 0; i < count; i++)
1177 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1178 if (stringlist[i] != NULL)
1179 fprintf(outfile, "string list not terminated by NULL\n");
1180 /* free((void *)stringlist); */
1181 pcre_free_substring_list(stringlist);
1182 }
1183 }
1184 }
1185
1186 /* Failed to match. If this is a /g or /G loop and we previously set
1187 g_notempty after a null match, this is not necessarily the end.
1188 We want to advance the start offset, and continue. Fudge the offset
1189 values to achieve this. We won't be at the end of the string - that
1190 was checked before setting g_notempty. */
1191
1192 else
1193 {
1194 if (g_notempty != 0)
1195 {
1196 use_offsets[0] = start_offset;
1197 use_offsets[1] = start_offset + 1;
1198 }
1199 else
1200 {
1201 if (gmatched == 0) /* Error if no previous matches */
1202 {
1203 if (count == -1) fprintf(outfile, "No match\n");
1204 else fprintf(outfile, "Error %d\n", count);
1205 }
1206 break; /* Out of the /g loop */
1207 }
1208 }
1209
1210 /* If not /g or /G we are done */
1211
1212 if (!do_g && !do_G) break;
1213
1214 /* If we have matched an empty string, first check to see if we are at
1215 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1216 what Perl's /g options does. This turns out to be rather cunning. First
1217 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1218 same point. If this fails (picked up above) we advance to the next
1219 character. */
1220
1221 g_notempty = 0;
1222 if (use_offsets[0] == use_offsets[1])
1223 {
1224 if (use_offsets[0] == len) break;
1225 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1226 }
1227
1228 /* For /g, update the start offset, leaving the rest alone */
1229
1230 if (do_g) start_offset = use_offsets[1];
1231
1232 /* For /G, update the pointer and length */
1233
1234 else
1235 {
1236 bptr += use_offsets[1];
1237 len -= use_offsets[1];
1238 }
1239 } /* End of loop for /g and /G */
1240 } /* End of loop for data lines */
1241
1242 CONTINUE:
1243
1244 #if !defined NOPOSIX
1245 if (posix || do_posix) regfree(&preg);
1246 #endif
1247
1248 if (re != NULL) free(re);
1249 if (extra != NULL) free(extra);
1250 if (tables != NULL)
1251 {
1252 free((void *)tables);
1253 setlocale(LC_CTYPE, "C");
1254 }
1255 }
1256
1257 fprintf(outfile, "\n");
1258 return 0;
1259 }
1260
1261 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12