/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 69 - (show annotations) (download)
Sat Feb 24 21:40:18 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 39441 byte(s)
Load pcre-4.3 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather untidy in places. */
8
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <time.h>
14 #include <locale.h>
15
16 /* We need the internal info for displaying the results of pcre_study(). Also
17 for getting the opcodes for showing compiled code. */
18
19 #define PCRE_SPY /* For Win32 build, import data, not export */
20 #include "internal.h"
21
22 /* It is possible to compile this test program without including support for
23 testing the POSIX interface, though this is not available via the standard
24 Makefile. */
25
26 #if !defined NOPOSIX
27 #include "pcreposix.h"
28 #endif
29
30 #ifndef CLOCKS_PER_SEC
31 #ifdef CLK_TCK
32 #define CLOCKS_PER_SEC CLK_TCK
33 #else
34 #define CLOCKS_PER_SEC 100
35 #endif
36 #endif
37
38 #define LOOPREPEAT 50000
39
40 #define BUFFER_SIZE 30000
41 #define DBUFFER_SIZE 1024
42
43
44 static FILE *outfile;
45 static int log_store = 0;
46 static int callout_count;
47 static int callout_extra;
48 static int callout_fail_count;
49 static int callout_fail_id;
50 static int first_callout;
51 static int use_utf8;
52 static size_t gotten_store;
53
54
55
56 static const int utf8_table1[] = {
57 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58
59 static const int utf8_table2[] = {
60 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61
62 static const int utf8_table3[] = {
63 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64
65
66
67 /*************************************************
68 * Print compiled regex *
69 *************************************************/
70
71 /* The code for doing this is held in a separate file that is also included in
72 pcre.c when it is compiled with the debug switch. It defines a function called
73 print_internals(), which uses a table of opcode lengths defined by the macro
74 OP_LENGTHS, whose name must be OP_lengths. */
75
76 static uschar OP_lengths[] = { OP_LENGTHS };
77
78 #include "printint.c"
79
80
81
82 /*************************************************
83 * Read number from string *
84 *************************************************/
85
86 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87 around with conditional compilation, just do the job by hand. It is only used
88 for unpicking the -o argument, so just keep it simple.
89
90 Arguments:
91 str string to be converted
92 endptr where to put the end pointer
93
94 Returns: the unsigned long
95 */
96
97 static int
98 get_value(unsigned char *str, unsigned char **endptr)
99 {
100 int result = 0;
101 while(*str != 0 && isspace(*str)) str++;
102 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103 *endptr = str;
104 return(result);
105 }
106
107
108
109 /*************************************************
110 * Convert character value to UTF-8 *
111 *************************************************/
112
113 /* This function takes an integer value in the range 0 - 0x7fffffff
114 and encodes it as a UTF-8 character in 0 to 6 bytes.
115
116 Arguments:
117 cvalue the character value
118 buffer pointer to buffer for result - at least 6 bytes long
119
120 Returns: number of characters placed in the buffer
121 -1 if input character is negative
122 0 if input character is positive but too big (only when
123 int is longer than 32 bits)
124 */
125
126 static int
127 ord2utf8(int cvalue, unsigned char *buffer)
128 {
129 register int i, j;
130 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131 if (cvalue <= utf8_table1[i]) break;
132 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133 if (cvalue < 0) return -1;
134
135 buffer += i;
136 for (j = i; j > 0; j--)
137 {
138 *buffer-- = 0x80 | (cvalue & 0x3f);
139 cvalue >>= 6;
140 }
141 *buffer = utf8_table2[i] | cvalue;
142 return i + 1;
143 }
144
145
146 /*************************************************
147 * Convert UTF-8 string to value *
148 *************************************************/
149
150 /* This function takes one or more bytes that represents a UTF-8 character,
151 and returns the value of the character.
152
153 Argument:
154 buffer a pointer to the byte vector
155 vptr a pointer to an int to receive the value
156
157 Returns: > 0 => the number of bytes consumed
158 -6 to 0 => malformed UTF-8 character at offset = (-return)
159 */
160
161 static int
162 utf82ord(unsigned char *buffer, int *vptr)
163 {
164 int c = *buffer++;
165 int d = c;
166 int i, j, s;
167
168 for (i = -1; i < 6; i++) /* i is number of additional bytes */
169 {
170 if ((d & 0x80) == 0) break;
171 d <<= 1;
172 }
173
174 if (i == -1) { *vptr = c; return 1; } /* ascii character */
175 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
176
177 /* i now has a value in the range 1-5 */
178
179 s = 6*i;
180 d = (c & utf8_table3[i]) << s;
181
182 for (j = 0; j < i; j++)
183 {
184 c = *buffer++;
185 if ((c & 0xc0) != 0x80) return -(j+1);
186 s -= 6;
187 d |= (c & 0x3f) << s;
188 }
189
190 /* Check that encoding was the correct unique one */
191
192 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193 if (d <= utf8_table1[j]) break;
194 if (j != i) return -(i+1);
195
196 /* Valid value */
197
198 *vptr = d;
199 return i+1;
200 }
201
202
203
204 /*************************************************
205 * Print character string *
206 *************************************************/
207
208 /* Character string printing function. Must handle UTF-8 strings in utf8
209 mode. Yields number of characters printed. If handed a NULL file, just counts
210 chars without printing. */
211
212 static int pchars(unsigned char *p, int length, FILE *f)
213 {
214 int c;
215 int yield = 0;
216
217 while (length-- > 0)
218 {
219 if (use_utf8)
220 {
221 int rc = utf82ord(p, &c);
222
223 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
224 {
225 length -= rc - 1;
226 p += rc;
227 if (c < 256 && isprint(c))
228 {
229 if (f != NULL) fprintf(f, "%c", c);
230 yield++;
231 }
232 else
233 {
234 int n;
235 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236 yield += n;
237 }
238 continue;
239 }
240 }
241
242 /* Not UTF-8, or malformed UTF-8 */
243
244 if (isprint(c = *(p++)))
245 {
246 if (f != NULL) fprintf(f, "%c", c);
247 yield++;
248 }
249 else
250 {
251 if (f != NULL) fprintf(f, "\\x%02x", c);
252 yield += 4;
253 }
254 }
255
256 return yield;
257 }
258
259
260
261 /*************************************************
262 * Callout function *
263 *************************************************/
264
265 /* Called from PCRE as a result of the (?C) item. We print out where we are in
266 the match. Yield zero unless more callouts than the fail count, or the callout
267 data is not zero. */
268
269 static int callout(pcre_callout_block *cb)
270 {
271 FILE *f = (first_callout | callout_extra)? outfile : NULL;
272 int i, pre_start, post_start;
273
274 if (callout_extra)
275 {
276 fprintf(f, "Callout %d: last capture = %d\n",
277 cb->callout_number, cb->capture_last);
278
279 for (i = 0; i < cb->capture_top * 2; i += 2)
280 {
281 if (cb->offset_vector[i] < 0)
282 fprintf(f, "%2d: <unset>\n", i/2);
283 else
284 {
285 fprintf(f, "%2d: ", i/2);
286 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287 cb->offset_vector[i+1] - cb->offset_vector[i], f);
288 fprintf(f, "\n");
289 }
290 }
291 }
292
293 /* Re-print the subject in canonical form, the first time or if giving full
294 datails. On subsequent calls in the same match, we use pchars just to find the
295 printed lengths of the substrings. */
296
297 if (f != NULL) fprintf(f, "--->");
298
299 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301 cb->current_position - cb->start_match, f);
302
303 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304 cb->subject_length - cb->current_position, f);
305
306 if (f != NULL) fprintf(f, "\n");
307
308 /* Always print appropriate indicators, with callout number if not already
309 shown */
310
311 if (callout_extra) fprintf(outfile, " ");
312 else fprintf(outfile, "%3d ", cb->callout_number);
313
314 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315 fprintf(outfile, "^");
316
317 if (post_start > 0)
318 {
319 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320 fprintf(outfile, "^");
321 }
322
323 fprintf(outfile, "\n");
324
325 first_callout = 0;
326
327 if ((int)(cb->callout_data) != 0)
328 {
329 fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
330 return (int)(cb->callout_data);
331 }
332
333 return (cb->callout_number != callout_fail_id)? 0 :
334 (++callout_count >= callout_fail_count)? 1 : 0;
335 }
336
337
338 /*************************************************
339 * Local malloc function *
340 *************************************************/
341
342 /* Alternative malloc function, to test functionality and show the size of the
343 compiled re. */
344
345 static void *new_malloc(size_t size)
346 {
347 gotten_store = size;
348 return malloc(size);
349 }
350
351
352
353 /*************************************************
354 * Call pcre_fullinfo() *
355 *************************************************/
356
357 /* Get one piece of information from the pcre_fullinfo() function */
358
359 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
360 {
361 int rc;
362 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
363 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
364 }
365
366
367
368 /*************************************************
369 * Main Program *
370 *************************************************/
371
372 /* Read lines from named file or stdin and write to named file or stdout; lines
373 consist of a regular expression, in delimiters and optionally followed by
374 options, followed by a set of test data, terminated by an empty line. */
375
376 int main(int argc, char **argv)
377 {
378 FILE *infile = stdin;
379 int options = 0;
380 int study_options = 0;
381 int op = 1;
382 int timeit = 0;
383 int showinfo = 0;
384 int showstore = 0;
385 int size_offsets = 45;
386 int size_offsets_max;
387 int *offsets;
388 #if !defined NOPOSIX
389 int posix = 0;
390 #endif
391 int debug = 0;
392 int done = 0;
393
394 unsigned char *buffer;
395 unsigned char *dbuffer;
396
397 /* Get buffers from malloc() so that Electric Fence will check their misuse
398 when I am debugging. */
399
400 buffer = malloc(BUFFER_SIZE);
401 dbuffer = malloc(DBUFFER_SIZE);
402
403 /* Static so that new_malloc can use it. */
404
405 outfile = stdout;
406
407 /* Scan options */
408
409 while (argc > 1 && argv[op][0] == '-')
410 {
411 unsigned char *endptr;
412
413 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
414 showstore = 1;
415 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
416 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
417 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
418 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
419 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
420 *endptr == 0))
421 {
422 op++;
423 argc--;
424 }
425 #if !defined NOPOSIX
426 else if (strcmp(argv[op], "-p") == 0) posix = 1;
427 #endif
428 else if (strcmp(argv[op], "-C") == 0)
429 {
430 int rc;
431 printf("PCRE version %s\n", pcre_version());
432 printf("Compiled with\n");
433 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
434 printf(" %sUTF-8 support\n", rc? "" : "No ");
435 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
436 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
437 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
438 printf(" Internal link size = %d\n", rc);
439 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
440 printf(" POSIX malloc threshold = %d\n", rc);
441 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
442 printf(" Default match limit = %d\n", rc);
443 exit(0);
444 }
445 else
446 {
447 printf("** Unknown or malformed option %s\n", argv[op]);
448 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
449 printf(" -C show PCRE compile-time options and exit\n");
450 printf(" -d debug: show compiled code; implies -i\n"
451 " -i show information about compiled pattern\n"
452 " -o <n> set size of offsets vector to <n>\n");
453 #if !defined NOPOSIX
454 printf(" -p use POSIX interface\n");
455 #endif
456 printf(" -s output store information\n"
457 " -t time compilation and execution\n");
458 return 1;
459 }
460 op++;
461 argc--;
462 }
463
464 /* Get the store for the offsets vector, and remember what it was */
465
466 size_offsets_max = size_offsets;
467 offsets = malloc(size_offsets_max * sizeof(int));
468 if (offsets == NULL)
469 {
470 printf("** Failed to get %d bytes of memory for offsets vector\n",
471 size_offsets_max * sizeof(int));
472 return 1;
473 }
474
475 /* Sort out the input and output files */
476
477 if (argc > 1)
478 {
479 infile = fopen(argv[op], "r");
480 if (infile == NULL)
481 {
482 printf("** Failed to open %s\n", argv[op]);
483 return 1;
484 }
485 }
486
487 if (argc > 2)
488 {
489 outfile = fopen(argv[op+1], "w");
490 if (outfile == NULL)
491 {
492 printf("** Failed to open %s\n", argv[op+1]);
493 return 1;
494 }
495 }
496
497 /* Set alternative malloc function */
498
499 pcre_malloc = new_malloc;
500
501 /* Heading line, then prompt for first regex if stdin */
502
503 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
504
505 /* Main loop */
506
507 while (!done)
508 {
509 pcre *re = NULL;
510 pcre_extra *extra = NULL;
511
512 #if !defined NOPOSIX /* There are still compilers that require no indent */
513 regex_t preg;
514 int do_posix = 0;
515 #endif
516
517 const char *error;
518 unsigned char *p, *pp, *ppp;
519 const unsigned char *tables = NULL;
520 int do_study = 0;
521 int do_debug = debug;
522 int do_G = 0;
523 int do_g = 0;
524 int do_showinfo = showinfo;
525 int do_showrest = 0;
526 int erroroffset, len, delimiter;
527
528 use_utf8 = 0;
529
530 if (infile == stdin) printf(" re> ");
531 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
532 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
533 fflush(outfile);
534
535 p = buffer;
536 while (isspace(*p)) p++;
537 if (*p == 0) continue;
538
539 /* Get the delimiter and seek the end of the pattern; if is isn't
540 complete, read more. */
541
542 delimiter = *p++;
543
544 if (isalnum(delimiter) || delimiter == '\\')
545 {
546 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
547 goto SKIP_DATA;
548 }
549
550 pp = p;
551
552 for(;;)
553 {
554 while (*pp != 0)
555 {
556 if (*pp == '\\' && pp[1] != 0) pp++;
557 else if (*pp == delimiter) break;
558 pp++;
559 }
560 if (*pp != 0) break;
561
562 len = BUFFER_SIZE - (pp - buffer);
563 if (len < 256)
564 {
565 fprintf(outfile, "** Expression too long - missing delimiter?\n");
566 goto SKIP_DATA;
567 }
568
569 if (infile == stdin) printf(" > ");
570 if (fgets((char *)pp, len, infile) == NULL)
571 {
572 fprintf(outfile, "** Unexpected EOF\n");
573 done = 1;
574 goto CONTINUE;
575 }
576 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
577 }
578
579 /* If the first character after the delimiter is backslash, make
580 the pattern end with backslash. This is purely to provide a way
581 of testing for the error message when a pattern ends with backslash. */
582
583 if (pp[1] == '\\') *pp++ = '\\';
584
585 /* Terminate the pattern at the delimiter */
586
587 *pp++ = 0;
588
589 /* Look for options after final delimiter */
590
591 options = 0;
592 study_options = 0;
593 log_store = showstore; /* default from command line */
594
595 while (*pp != 0)
596 {
597 switch (*pp++)
598 {
599 case 'g': do_g = 1; break;
600 case 'i': options |= PCRE_CASELESS; break;
601 case 'm': options |= PCRE_MULTILINE; break;
602 case 's': options |= PCRE_DOTALL; break;
603 case 'x': options |= PCRE_EXTENDED; break;
604
605 case '+': do_showrest = 1; break;
606 case 'A': options |= PCRE_ANCHORED; break;
607 case 'D': do_debug = do_showinfo = 1; break;
608 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
609 case 'G': do_G = 1; break;
610 case 'I': do_showinfo = 1; break;
611 case 'M': log_store = 1; break;
612 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
613
614 #if !defined NOPOSIX
615 case 'P': do_posix = 1; break;
616 #endif
617
618 case 'S': do_study = 1; break;
619 case 'U': options |= PCRE_UNGREEDY; break;
620 case 'X': options |= PCRE_EXTRA; break;
621 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
622
623 case 'L':
624 ppp = pp;
625 while (*ppp != '\n' && *ppp != ' ') ppp++;
626 *ppp = 0;
627 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
628 {
629 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
630 goto SKIP_DATA;
631 }
632 tables = pcre_maketables();
633 pp = ppp;
634 break;
635
636 case '\n': case ' ': break;
637 default:
638 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
639 goto SKIP_DATA;
640 }
641 }
642
643 /* Handle compiling via the POSIX interface, which doesn't support the
644 timing, showing, or debugging options, nor the ability to pass over
645 local character tables. */
646
647 #if !defined NOPOSIX
648 if (posix || do_posix)
649 {
650 int rc;
651 int cflags = 0;
652 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
653 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
654 rc = regcomp(&preg, (char *)p, cflags);
655
656 /* Compilation failed; go back for another re, skipping to blank line
657 if non-interactive. */
658
659 if (rc != 0)
660 {
661 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
662 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
663 goto SKIP_DATA;
664 }
665 }
666
667 /* Handle compiling via the native interface */
668
669 else
670 #endif /* !defined NOPOSIX */
671
672 {
673 if (timeit)
674 {
675 register int i;
676 clock_t time_taken;
677 clock_t start_time = clock();
678 for (i = 0; i < LOOPREPEAT; i++)
679 {
680 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
681 if (re != NULL) free(re);
682 }
683 time_taken = clock() - start_time;
684 fprintf(outfile, "Compile time %.3f milliseconds\n",
685 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
686 (double)CLOCKS_PER_SEC);
687 }
688
689 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
690
691 /* Compilation failed; go back for another re, skipping to blank line
692 if non-interactive. */
693
694 if (re == NULL)
695 {
696 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
697 SKIP_DATA:
698 if (infile != stdin)
699 {
700 for (;;)
701 {
702 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
703 {
704 done = 1;
705 goto CONTINUE;
706 }
707 len = (int)strlen((char *)buffer);
708 while (len > 0 && isspace(buffer[len-1])) len--;
709 if (len == 0) break;
710 }
711 fprintf(outfile, "\n");
712 }
713 goto CONTINUE;
714 }
715
716 /* Compilation succeeded; print data if required. There are now two
717 info-returning functions. The old one has a limited interface and
718 returns only limited data. Check that it agrees with the newer one. */
719
720 if (log_store)
721 fprintf(outfile, "Memory allocation (code space): %d\n",
722 (int)(gotten_store -
723 sizeof(real_pcre) -
724 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
725
726 if (do_showinfo)
727 {
728 unsigned long int get_options;
729 int old_first_char, old_options, old_count;
730 int count, backrefmax, first_char, need_char;
731 int nameentrysize, namecount;
732 const uschar *nametable;
733 size_t size;
734
735 if (do_debug)
736 {
737 fprintf(outfile, "------------------------------------------------------------------\n");
738 print_internals(re, outfile);
739 }
740
741 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
742 new_info(re, NULL, PCRE_INFO_SIZE, &size);
743 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
744 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
745 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
746 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
747 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
748 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
749 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
750
751 old_count = pcre_info(re, &old_options, &old_first_char);
752 if (count < 0) fprintf(outfile,
753 "Error %d from pcre_info()\n", count);
754 else
755 {
756 if (old_count != count) fprintf(outfile,
757 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
758 old_count);
759
760 if (old_first_char != first_char) fprintf(outfile,
761 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
762 first_char, old_first_char);
763
764 if (old_options != (int)get_options) fprintf(outfile,
765 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
766 get_options, old_options);
767 }
768
769 if (size != gotten_store) fprintf(outfile,
770 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
771 size, gotten_store);
772
773 fprintf(outfile, "Capturing subpattern count = %d\n", count);
774 if (backrefmax > 0)
775 fprintf(outfile, "Max back reference = %d\n", backrefmax);
776
777 if (namecount > 0)
778 {
779 fprintf(outfile, "Named capturing subpatterns:\n");
780 while (namecount-- > 0)
781 {
782 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
783 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
784 GET2(nametable, 0));
785 nametable += nameentrysize;
786 }
787 }
788
789 if (get_options == 0) fprintf(outfile, "No options\n");
790 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
791 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
792 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
793 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
794 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
795 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
796 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
797 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
798 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
799 ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
800
801 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
802 fprintf(outfile, "Case state changes\n");
803
804 if (first_char == -1)
805 {
806 fprintf(outfile, "First char at start or follows \\n\n");
807 }
808 else if (first_char < 0)
809 {
810 fprintf(outfile, "No first char\n");
811 }
812 else
813 {
814 int ch = first_char & 255;
815 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
816 "" : " (caseless)";
817 if (isprint(ch))
818 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
819 else
820 fprintf(outfile, "First char = %d%s\n", ch, caseless);
821 }
822
823 if (need_char < 0)
824 {
825 fprintf(outfile, "No need char\n");
826 }
827 else
828 {
829 int ch = need_char & 255;
830 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
831 "" : " (caseless)";
832 if (isprint(ch))
833 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
834 else
835 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
836 }
837 }
838
839 /* If /S was present, study the regexp to generate additional info to
840 help with the matching. */
841
842 if (do_study)
843 {
844 if (timeit)
845 {
846 register int i;
847 clock_t time_taken;
848 clock_t start_time = clock();
849 for (i = 0; i < LOOPREPEAT; i++)
850 extra = pcre_study(re, study_options, &error);
851 time_taken = clock() - start_time;
852 if (extra != NULL) free(extra);
853 fprintf(outfile, " Study time %.3f milliseconds\n",
854 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
855 (double)CLOCKS_PER_SEC);
856 }
857
858 extra = pcre_study(re, study_options, &error);
859 if (error != NULL)
860 fprintf(outfile, "Failed to study: %s\n", error);
861 else if (extra == NULL)
862 fprintf(outfile, "Study returned NULL\n");
863
864 else if (do_showinfo)
865 {
866 size_t size;
867 uschar *start_bits = NULL;
868 new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
869 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
870 fprintf(outfile, "Study size = %d\n", size);
871 if (start_bits == NULL)
872 fprintf(outfile, "No starting character set\n");
873 else
874 {
875 int i;
876 int c = 24;
877 fprintf(outfile, "Starting character set: ");
878 for (i = 0; i < 256; i++)
879 {
880 if ((start_bits[i/8] & (1<<(i%8))) != 0)
881 {
882 if (c > 75)
883 {
884 fprintf(outfile, "\n ");
885 c = 2;
886 }
887 if (isprint(i) && i != ' ')
888 {
889 fprintf(outfile, "%c ", i);
890 c += 2;
891 }
892 else
893 {
894 fprintf(outfile, "\\x%02x ", i);
895 c += 5;
896 }
897 }
898 }
899 fprintf(outfile, "\n");
900 }
901 }
902 }
903 }
904
905 /* Read data lines and test them */
906
907 for (;;)
908 {
909 unsigned char *q;
910 unsigned char *bptr = dbuffer;
911 int *use_offsets = offsets;
912 int use_size_offsets = size_offsets;
913 int callout_data = 0;
914 int callout_data_set = 0;
915 int count, c;
916 int copystrings = 0;
917 int find_match_limit = 0;
918 int getstrings = 0;
919 int getlist = 0;
920 int gmatched = 0;
921 int start_offset = 0;
922 int g_notempty = 0;
923
924 options = 0;
925
926 pcre_callout = callout;
927 first_callout = 1;
928 callout_extra = 0;
929 callout_count = 0;
930 callout_fail_count = 999999;
931 callout_fail_id = -1;
932
933 if (infile == stdin) printf("data> ");
934 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
935 {
936 done = 1;
937 goto CONTINUE;
938 }
939 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
940
941 len = (int)strlen((char *)buffer);
942 while (len > 0 && isspace(buffer[len-1])) len--;
943 buffer[len] = 0;
944 if (len == 0) break;
945
946 p = buffer;
947 while (isspace(*p)) p++;
948
949 q = dbuffer;
950 while ((c = *p++) != 0)
951 {
952 int i = 0;
953 int n = 0;
954
955 if (c == '\\') switch ((c = *p++))
956 {
957 case 'a': c = 7; break;
958 case 'b': c = '\b'; break;
959 case 'e': c = 27; break;
960 case 'f': c = '\f'; break;
961 case 'n': c = '\n'; break;
962 case 'r': c = '\r'; break;
963 case 't': c = '\t'; break;
964 case 'v': c = '\v'; break;
965
966 case '0': case '1': case '2': case '3':
967 case '4': case '5': case '6': case '7':
968 c -= '0';
969 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
970 c = c * 8 + *p++ - '0';
971 break;
972
973 case 'x':
974
975 /* Handle \x{..} specially - new Perl thing for utf8 */
976
977 if (*p == '{')
978 {
979 unsigned char *pt = p;
980 c = 0;
981 while (isxdigit(*(++pt)))
982 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
983 if (*pt == '}')
984 {
985 unsigned char buff8[8];
986 int ii, utn;
987 utn = ord2utf8(c, buff8);
988 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
989 c = buff8[ii]; /* Last byte */
990 p = pt + 1;
991 break;
992 }
993 /* Not correct form; fall through */
994 }
995
996 /* Ordinary \x */
997
998 c = 0;
999 while (i++ < 2 && isxdigit(*p))
1000 {
1001 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1002 p++;
1003 }
1004 break;
1005
1006 case 0: /* Allows for an empty line */
1007 p--;
1008 continue;
1009
1010 case 'A': /* Option setting */
1011 options |= PCRE_ANCHORED;
1012 continue;
1013
1014 case 'B':
1015 options |= PCRE_NOTBOL;
1016 continue;
1017
1018 case 'C':
1019 if (isdigit(*p)) /* Set copy string */
1020 {
1021 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1022 copystrings |= 1 << n;
1023 }
1024 else if (isalnum(*p))
1025 {
1026 uschar name[256];
1027 uschar *npp = name;
1028 while (isalnum(*p)) *npp++ = *p++;
1029 *npp = 0;
1030 n = pcre_get_stringnumber(re, (char *)name);
1031 if (n < 0)
1032 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1033 else copystrings |= 1 << n;
1034 }
1035 else if (*p == '+')
1036 {
1037 callout_extra = 1;
1038 p++;
1039 }
1040 else if (*p == '-')
1041 {
1042 pcre_callout = NULL;
1043 p++;
1044 }
1045 else if (*p == '!')
1046 {
1047 callout_fail_id = 0;
1048 p++;
1049 while(isdigit(*p))
1050 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1051 callout_fail_count = 0;
1052 if (*p == '!')
1053 {
1054 p++;
1055 while(isdigit(*p))
1056 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1057 }
1058 }
1059 else if (*p == '*')
1060 {
1061 int sign = 1;
1062 callout_data = 0;
1063 if (*(++p) == '-') { sign = -1; p++; }
1064 while(isdigit(*p))
1065 callout_data = callout_data * 10 + *p++ - '0';
1066 callout_data *= sign;
1067 callout_data_set = 1;
1068 }
1069 continue;
1070
1071 case 'G':
1072 if (isdigit(*p))
1073 {
1074 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1075 getstrings |= 1 << n;
1076 }
1077 else if (isalnum(*p))
1078 {
1079 uschar name[256];
1080 uschar *npp = name;
1081 while (isalnum(*p)) *npp++ = *p++;
1082 *npp = 0;
1083 n = pcre_get_stringnumber(re, (char *)name);
1084 if (n < 0)
1085 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1086 else getstrings |= 1 << n;
1087 }
1088 continue;
1089
1090 case 'L':
1091 getlist = 1;
1092 continue;
1093
1094 case 'M':
1095 find_match_limit = 1;
1096 continue;
1097
1098 case 'N':
1099 options |= PCRE_NOTEMPTY;
1100 continue;
1101
1102 case 'O':
1103 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1104 if (n > size_offsets_max)
1105 {
1106 size_offsets_max = n;
1107 free(offsets);
1108 use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1109 if (offsets == NULL)
1110 {
1111 printf("** Failed to get %d bytes of memory for offsets vector\n",
1112 size_offsets_max * sizeof(int));
1113 return 1;
1114 }
1115 }
1116 use_size_offsets = n;
1117 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1118 continue;
1119
1120 case 'Z':
1121 options |= PCRE_NOTEOL;
1122 continue;
1123 }
1124 *q++ = c;
1125 }
1126 *q = 0;
1127 len = q - dbuffer;
1128
1129 /* Handle matching via the POSIX interface, which does not
1130 support timing or playing with the match limit or callout data. */
1131
1132 #if !defined NOPOSIX
1133 if (posix || do_posix)
1134 {
1135 int rc;
1136 int eflags = 0;
1137 regmatch_t *pmatch = NULL;
1138 if (use_size_offsets > 0)
1139 pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1140 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1141 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1142
1143 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1144
1145 if (rc != 0)
1146 {
1147 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1148 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1149 }
1150 else
1151 {
1152 size_t i;
1153 for (i = 0; i < (size_t)use_size_offsets; i++)
1154 {
1155 if (pmatch[i].rm_so >= 0)
1156 {
1157 fprintf(outfile, "%2d: ", (int)i);
1158 (void)pchars(dbuffer + pmatch[i].rm_so,
1159 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1160 fprintf(outfile, "\n");
1161 if (i == 0 && do_showrest)
1162 {
1163 fprintf(outfile, " 0+ ");
1164 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1165 outfile);
1166 fprintf(outfile, "\n");
1167 }
1168 }
1169 }
1170 }
1171 free(pmatch);
1172 }
1173
1174 /* Handle matching via the native interface - repeats for /g and /G */
1175
1176 else
1177 #endif /* !defined NOPOSIX */
1178
1179 for (;; gmatched++) /* Loop for /g or /G */
1180 {
1181 if (timeit)
1182 {
1183 register int i;
1184 clock_t time_taken;
1185 clock_t start_time = clock();
1186 for (i = 0; i < LOOPREPEAT; i++)
1187 count = pcre_exec(re, extra, (char *)bptr, len,
1188 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1189 time_taken = clock() - start_time;
1190 fprintf(outfile, "Execute time %.3f milliseconds\n",
1191 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1192 (double)CLOCKS_PER_SEC);
1193 }
1194
1195 /* If find_match_limit is set, we want to do repeated matches with
1196 varying limits in order to find the minimum value. */
1197
1198 if (find_match_limit)
1199 {
1200 int min = 0;
1201 int mid = 64;
1202 int max = -1;
1203
1204 if (extra == NULL)
1205 {
1206 extra = malloc(sizeof(pcre_extra));
1207 extra->flags = 0;
1208 }
1209 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1210
1211 for (;;)
1212 {
1213 extra->match_limit = mid;
1214 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1215 options | g_notempty, use_offsets, use_size_offsets);
1216 if (count == PCRE_ERROR_MATCHLIMIT)
1217 {
1218 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1219 min = mid;
1220 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1221 }
1222 else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1223 {
1224 if (mid == min + 1)
1225 {
1226 fprintf(outfile, "Minimum match limit = %d\n", mid);
1227 break;
1228 }
1229 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1230 max = mid;
1231 mid = (min + mid)/2;
1232 }
1233 else break; /* Some other error */
1234 }
1235
1236 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1237 }
1238
1239 /* If callout_data is set, use the interface with additional data */
1240
1241 else if (callout_data_set)
1242 {
1243 if (extra == NULL)
1244 {
1245 extra = malloc(sizeof(pcre_extra));
1246 extra->flags = 0;
1247 }
1248 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1249 extra->callout_data = (void *)callout_data;
1250 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1251 options | g_notempty, use_offsets, use_size_offsets);
1252 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1253 }
1254
1255 /* The normal case is just to do the match once, with the default
1256 value of match_limit. */
1257
1258 else count = pcre_exec(re, extra, (char *)bptr, len,
1259 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1260
1261 if (count == 0)
1262 {
1263 fprintf(outfile, "Matched, but too many substrings\n");
1264 count = use_size_offsets/3;
1265 }
1266
1267 /* Matched */
1268
1269 if (count >= 0)
1270 {
1271 int i;
1272 for (i = 0; i < count * 2; i += 2)
1273 {
1274 if (use_offsets[i] < 0)
1275 fprintf(outfile, "%2d: <unset>\n", i/2);
1276 else
1277 {
1278 fprintf(outfile, "%2d: ", i/2);
1279 (void)pchars(bptr + use_offsets[i],
1280 use_offsets[i+1] - use_offsets[i], outfile);
1281 fprintf(outfile, "\n");
1282 if (i == 0)
1283 {
1284 if (do_showrest)
1285 {
1286 fprintf(outfile, " 0+ ");
1287 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1288 outfile);
1289 fprintf(outfile, "\n");
1290 }
1291 }
1292 }
1293 }
1294
1295 for (i = 0; i < 32; i++)
1296 {
1297 if ((copystrings & (1 << i)) != 0)
1298 {
1299 char copybuffer[16];
1300 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1301 i, copybuffer, sizeof(copybuffer));
1302 if (rc < 0)
1303 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1304 else
1305 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1306 }
1307 }
1308
1309 for (i = 0; i < 32; i++)
1310 {
1311 if ((getstrings & (1 << i)) != 0)
1312 {
1313 const char *substring;
1314 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1315 i, &substring);
1316 if (rc < 0)
1317 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1318 else
1319 {
1320 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1321 /* free((void *)substring); */
1322 pcre_free_substring(substring);
1323 }
1324 }
1325 }
1326
1327 if (getlist)
1328 {
1329 const char **stringlist;
1330 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1331 &stringlist);
1332 if (rc < 0)
1333 fprintf(outfile, "get substring list failed %d\n", rc);
1334 else
1335 {
1336 for (i = 0; i < count; i++)
1337 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1338 if (stringlist[i] != NULL)
1339 fprintf(outfile, "string list not terminated by NULL\n");
1340 /* free((void *)stringlist); */
1341 pcre_free_substring_list(stringlist);
1342 }
1343 }
1344 }
1345
1346 /* Failed to match. If this is a /g or /G loop and we previously set
1347 g_notempty after a null match, this is not necessarily the end.
1348 We want to advance the start offset, and continue. Fudge the offset
1349 values to achieve this. We won't be at the end of the string - that
1350 was checked before setting g_notempty. */
1351
1352 else
1353 {
1354 if (g_notempty != 0)
1355 {
1356 use_offsets[0] = start_offset;
1357 use_offsets[1] = start_offset + 1;
1358 }
1359 else
1360 {
1361 if (gmatched == 0) /* Error if no previous matches */
1362 {
1363 if (count == -1) fprintf(outfile, "No match\n");
1364 else fprintf(outfile, "Error %d\n", count);
1365 }
1366 break; /* Out of the /g loop */
1367 }
1368 }
1369
1370 /* If not /g or /G we are done */
1371
1372 if (!do_g && !do_G) break;
1373
1374 /* If we have matched an empty string, first check to see if we are at
1375 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1376 what Perl's /g options does. This turns out to be rather cunning. First
1377 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1378 same point. If this fails (picked up above) we advance to the next
1379 character. */
1380
1381 g_notempty = 0;
1382 if (use_offsets[0] == use_offsets[1])
1383 {
1384 if (use_offsets[0] == len) break;
1385 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1386 }
1387
1388 /* For /g, update the start offset, leaving the rest alone */
1389
1390 if (do_g) start_offset = use_offsets[1];
1391
1392 /* For /G, update the pointer and length */
1393
1394 else
1395 {
1396 bptr += use_offsets[1];
1397 len -= use_offsets[1];
1398 }
1399 } /* End of loop for /g and /G */
1400 } /* End of loop for data lines */
1401
1402 CONTINUE:
1403
1404 #if !defined NOPOSIX
1405 if (posix || do_posix) regfree(&preg);
1406 #endif
1407
1408 if (re != NULL) free(re);
1409 if (extra != NULL) free(extra);
1410 if (tables != NULL)
1411 {
1412 free((void *)tables);
1413 setlocale(LC_CTYPE, "C");
1414 }
1415 }
1416
1417 fprintf(outfile, "\n");
1418 return 0;
1419 }
1420
1421 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12