/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (show annotations) (download)
Sat Feb 24 21:40:24 2007 UTC (7 years, 1 month ago) by nigel
File MIME type: text/plain
File size: 39984 byte(s)
Load pcre-4.4 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather untidy in places. */
8
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <time.h>
14 #include <locale.h>
15
16 /* We need the internal info for displaying the results of pcre_study(). Also
17 for getting the opcodes for showing compiled code. */
18
19 #define PCRE_SPY /* For Win32 build, import data, not export */
20 #include "internal.h"
21
22 /* It is possible to compile this test program without including support for
23 testing the POSIX interface, though this is not available via the standard
24 Makefile. */
25
26 #if !defined NOPOSIX
27 #include "pcreposix.h"
28 #endif
29
30 #ifndef CLOCKS_PER_SEC
31 #ifdef CLK_TCK
32 #define CLOCKS_PER_SEC CLK_TCK
33 #else
34 #define CLOCKS_PER_SEC 100
35 #endif
36 #endif
37
38 #define LOOPREPEAT 50000
39
40 #define BUFFER_SIZE 30000
41 #define DBUFFER_SIZE 1024
42
43
44 static FILE *outfile;
45 static int log_store = 0;
46 static int callout_count;
47 static int callout_extra;
48 static int callout_fail_count;
49 static int callout_fail_id;
50 static int first_callout;
51 static int use_utf8;
52 static size_t gotten_store;
53
54
55 static const int utf8_table1[] = {
56 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
57
58 static const int utf8_table2[] = {
59 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
60
61 static const int utf8_table3[] = {
62 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
63
64
65
66 /*************************************************
67 * Print compiled regex *
68 *************************************************/
69
70 /* The code for doing this is held in a separate file that is also included in
71 pcre.c when it is compiled with the debug switch. It defines a function called
72 print_internals(), which uses a table of opcode lengths defined by the macro
73 OP_LENGTHS, whose name must be OP_lengths. */
74
75 static uschar OP_lengths[] = { OP_LENGTHS };
76
77 #include "printint.c"
78
79
80
81 /*************************************************
82 * Read number from string *
83 *************************************************/
84
85 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
86 around with conditional compilation, just do the job by hand. It is only used
87 for unpicking the -o argument, so just keep it simple.
88
89 Arguments:
90 str string to be converted
91 endptr where to put the end pointer
92
93 Returns: the unsigned long
94 */
95
96 static int
97 get_value(unsigned char *str, unsigned char **endptr)
98 {
99 int result = 0;
100 while(*str != 0 && isspace(*str)) str++;
101 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
102 *endptr = str;
103 return(result);
104 }
105
106
107
108 /*************************************************
109 * Convert character value to UTF-8 *
110 *************************************************/
111
112 /* This function takes an integer value in the range 0 - 0x7fffffff
113 and encodes it as a UTF-8 character in 0 to 6 bytes.
114
115 Arguments:
116 cvalue the character value
117 buffer pointer to buffer for result - at least 6 bytes long
118
119 Returns: number of characters placed in the buffer
120 -1 if input character is negative
121 0 if input character is positive but too big (only when
122 int is longer than 32 bits)
123 */
124
125 static int
126 ord2utf8(int cvalue, unsigned char *buffer)
127 {
128 register int i, j;
129 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
130 if (cvalue <= utf8_table1[i]) break;
131 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
132 if (cvalue < 0) return -1;
133
134 buffer += i;
135 for (j = i; j > 0; j--)
136 {
137 *buffer-- = 0x80 | (cvalue & 0x3f);
138 cvalue >>= 6;
139 }
140 *buffer = utf8_table2[i] | cvalue;
141 return i + 1;
142 }
143
144
145 /*************************************************
146 * Convert UTF-8 string to value *
147 *************************************************/
148
149 /* This function takes one or more bytes that represents a UTF-8 character,
150 and returns the value of the character.
151
152 Argument:
153 buffer a pointer to the byte vector
154 vptr a pointer to an int to receive the value
155
156 Returns: > 0 => the number of bytes consumed
157 -6 to 0 => malformed UTF-8 character at offset = (-return)
158 */
159
160 static int
161 utf82ord(unsigned char *buffer, int *vptr)
162 {
163 int c = *buffer++;
164 int d = c;
165 int i, j, s;
166
167 for (i = -1; i < 6; i++) /* i is number of additional bytes */
168 {
169 if ((d & 0x80) == 0) break;
170 d <<= 1;
171 }
172
173 if (i == -1) { *vptr = c; return 1; } /* ascii character */
174 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
175
176 /* i now has a value in the range 1-5 */
177
178 s = 6*i;
179 d = (c & utf8_table3[i]) << s;
180
181 for (j = 0; j < i; j++)
182 {
183 c = *buffer++;
184 if ((c & 0xc0) != 0x80) return -(j+1);
185 s -= 6;
186 d |= (c & 0x3f) << s;
187 }
188
189 /* Check that encoding was the correct unique one */
190
191 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
192 if (d <= utf8_table1[j]) break;
193 if (j != i) return -(i+1);
194
195 /* Valid value */
196
197 *vptr = d;
198 return i+1;
199 }
200
201
202
203 /*************************************************
204 * Print character string *
205 *************************************************/
206
207 /* Character string printing function. Must handle UTF-8 strings in utf8
208 mode. Yields number of characters printed. If handed a NULL file, just counts
209 chars without printing. */
210
211 static int pchars(unsigned char *p, int length, FILE *f)
212 {
213 int c;
214 int yield = 0;
215
216 while (length-- > 0)
217 {
218 if (use_utf8)
219 {
220 int rc = utf82ord(p, &c);
221
222 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
223 {
224 length -= rc - 1;
225 p += rc;
226 if (c < 256 && isprint(c))
227 {
228 if (f != NULL) fprintf(f, "%c", c);
229 yield++;
230 }
231 else
232 {
233 int n;
234 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
235 yield += n;
236 }
237 continue;
238 }
239 }
240
241 /* Not UTF-8, or malformed UTF-8 */
242
243 if (isprint(c = *(p++)))
244 {
245 if (f != NULL) fprintf(f, "%c", c);
246 yield++;
247 }
248 else
249 {
250 if (f != NULL) fprintf(f, "\\x%02x", c);
251 yield += 4;
252 }
253 }
254
255 return yield;
256 }
257
258
259
260 /*************************************************
261 * Callout function *
262 *************************************************/
263
264 /* Called from PCRE as a result of the (?C) item. We print out where we are in
265 the match. Yield zero unless more callouts than the fail count, or the callout
266 data is not zero. */
267
268 static int callout(pcre_callout_block *cb)
269 {
270 FILE *f = (first_callout | callout_extra)? outfile : NULL;
271 int i, pre_start, post_start;
272
273 if (callout_extra)
274 {
275 fprintf(f, "Callout %d: last capture = %d\n",
276 cb->callout_number, cb->capture_last);
277
278 for (i = 0; i < cb->capture_top * 2; i += 2)
279 {
280 if (cb->offset_vector[i] < 0)
281 fprintf(f, "%2d: <unset>\n", i/2);
282 else
283 {
284 fprintf(f, "%2d: ", i/2);
285 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
286 cb->offset_vector[i+1] - cb->offset_vector[i], f);
287 fprintf(f, "\n");
288 }
289 }
290 }
291
292 /* Re-print the subject in canonical form, the first time or if giving full
293 datails. On subsequent calls in the same match, we use pchars just to find the
294 printed lengths of the substrings. */
295
296 if (f != NULL) fprintf(f, "--->");
297
298 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
299 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
300 cb->current_position - cb->start_match, f);
301
302 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
303 cb->subject_length - cb->current_position, f);
304
305 if (f != NULL) fprintf(f, "\n");
306
307 /* Always print appropriate indicators, with callout number if not already
308 shown */
309
310 if (callout_extra) fprintf(outfile, " ");
311 else fprintf(outfile, "%3d ", cb->callout_number);
312
313 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314 fprintf(outfile, "^");
315
316 if (post_start > 0)
317 {
318 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
319 fprintf(outfile, "^");
320 }
321
322 fprintf(outfile, "\n");
323 first_callout = 0;
324
325 if (cb->callout_data != NULL)
326 {
327 int callout_data = *((int *)(cb->callout_data));
328 if (callout_data != 0)
329 {
330 fprintf(outfile, "Callout data = %d\n", callout_data);
331 return callout_data;
332 }
333 }
334
335 return (cb->callout_number != callout_fail_id)? 0 :
336 (++callout_count >= callout_fail_count)? 1 : 0;
337 }
338
339
340 /*************************************************
341 * Local malloc function *
342 *************************************************/
343
344 /* Alternative malloc function, to test functionality and show the size of the
345 compiled re. */
346
347 static void *new_malloc(size_t size)
348 {
349 gotten_store = size;
350 return malloc(size);
351 }
352
353
354
355 /*************************************************
356 * Call pcre_fullinfo() *
357 *************************************************/
358
359 /* Get one piece of information from the pcre_fullinfo() function */
360
361 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
362 {
363 int rc;
364 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
365 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
366 }
367
368
369
370 /*************************************************
371 * Main Program *
372 *************************************************/
373
374 /* Read lines from named file or stdin and write to named file or stdout; lines
375 consist of a regular expression, in delimiters and optionally followed by
376 options, followed by a set of test data, terminated by an empty line. */
377
378 int main(int argc, char **argv)
379 {
380 FILE *infile = stdin;
381 int options = 0;
382 int study_options = 0;
383 int op = 1;
384 int timeit = 0;
385 int showinfo = 0;
386 int showstore = 0;
387 int size_offsets = 45;
388 int size_offsets_max;
389 int *offsets;
390 #if !defined NOPOSIX
391 int posix = 0;
392 #endif
393 int debug = 0;
394 int done = 0;
395
396 unsigned char *buffer;
397 unsigned char *dbuffer;
398
399 /* Get buffers from malloc() so that Electric Fence will check their misuse
400 when I am debugging. */
401
402 buffer = (unsigned char *)malloc(BUFFER_SIZE);
403 dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
404
405 /* Static so that new_malloc can use it. */
406
407 outfile = stdout;
408
409 /* Scan options */
410
411 while (argc > 1 && argv[op][0] == '-')
412 {
413 unsigned char *endptr;
414
415 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
416 showstore = 1;
417 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
418 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
419 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
420 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
421 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
422 *endptr == 0))
423 {
424 op++;
425 argc--;
426 }
427 #if !defined NOPOSIX
428 else if (strcmp(argv[op], "-p") == 0) posix = 1;
429 #endif
430 else if (strcmp(argv[op], "-C") == 0)
431 {
432 int rc;
433 printf("PCRE version %s\n", pcre_version());
434 printf("Compiled with\n");
435 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
436 printf(" %sUTF-8 support\n", rc? "" : "No ");
437 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
438 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
439 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
440 printf(" Internal link size = %d\n", rc);
441 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
442 printf(" POSIX malloc threshold = %d\n", rc);
443 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
444 printf(" Default match limit = %d\n", rc);
445 exit(0);
446 }
447 else
448 {
449 printf("** Unknown or malformed option %s\n", argv[op]);
450 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
451 printf(" -C show PCRE compile-time options and exit\n");
452 printf(" -d debug: show compiled code; implies -i\n"
453 " -i show information about compiled pattern\n"
454 " -o <n> set size of offsets vector to <n>\n");
455 #if !defined NOPOSIX
456 printf(" -p use POSIX interface\n");
457 #endif
458 printf(" -s output store information\n"
459 " -t time compilation and execution\n");
460 return 1;
461 }
462 op++;
463 argc--;
464 }
465
466 /* Get the store for the offsets vector, and remember what it was */
467
468 size_offsets_max = size_offsets;
469 offsets = (int *)malloc(size_offsets_max * sizeof(int));
470 if (offsets == NULL)
471 {
472 printf("** Failed to get %d bytes of memory for offsets vector\n",
473 size_offsets_max * sizeof(int));
474 return 1;
475 }
476
477 /* Sort out the input and output files */
478
479 if (argc > 1)
480 {
481 infile = fopen(argv[op], "r");
482 if (infile == NULL)
483 {
484 printf("** Failed to open %s\n", argv[op]);
485 return 1;
486 }
487 }
488
489 if (argc > 2)
490 {
491 outfile = fopen(argv[op+1], "w");
492 if (outfile == NULL)
493 {
494 printf("** Failed to open %s\n", argv[op+1]);
495 return 1;
496 }
497 }
498
499 /* Set alternative malloc function */
500
501 pcre_malloc = new_malloc;
502
503 /* Heading line, then prompt for first regex if stdin */
504
505 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
506
507 /* Main loop */
508
509 while (!done)
510 {
511 pcre *re = NULL;
512 pcre_extra *extra = NULL;
513
514 #if !defined NOPOSIX /* There are still compilers that require no indent */
515 regex_t preg;
516 int do_posix = 0;
517 #endif
518
519 const char *error;
520 unsigned char *p, *pp, *ppp;
521 const unsigned char *tables = NULL;
522 int do_study = 0;
523 int do_debug = debug;
524 int do_G = 0;
525 int do_g = 0;
526 int do_showinfo = showinfo;
527 int do_showrest = 0;
528 int erroroffset, len, delimiter;
529
530 use_utf8 = 0;
531
532 if (infile == stdin) printf(" re> ");
533 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
534 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
535 fflush(outfile);
536
537 p = buffer;
538 while (isspace(*p)) p++;
539 if (*p == 0) continue;
540
541 /* Get the delimiter and seek the end of the pattern; if is isn't
542 complete, read more. */
543
544 delimiter = *p++;
545
546 if (isalnum(delimiter) || delimiter == '\\')
547 {
548 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
549 goto SKIP_DATA;
550 }
551
552 pp = p;
553
554 for(;;)
555 {
556 while (*pp != 0)
557 {
558 if (*pp == '\\' && pp[1] != 0) pp++;
559 else if (*pp == delimiter) break;
560 pp++;
561 }
562 if (*pp != 0) break;
563
564 len = BUFFER_SIZE - (pp - buffer);
565 if (len < 256)
566 {
567 fprintf(outfile, "** Expression too long - missing delimiter?\n");
568 goto SKIP_DATA;
569 }
570
571 if (infile == stdin) printf(" > ");
572 if (fgets((char *)pp, len, infile) == NULL)
573 {
574 fprintf(outfile, "** Unexpected EOF\n");
575 done = 1;
576 goto CONTINUE;
577 }
578 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
579 }
580
581 /* If the first character after the delimiter is backslash, make
582 the pattern end with backslash. This is purely to provide a way
583 of testing for the error message when a pattern ends with backslash. */
584
585 if (pp[1] == '\\') *pp++ = '\\';
586
587 /* Terminate the pattern at the delimiter */
588
589 *pp++ = 0;
590
591 /* Look for options after final delimiter */
592
593 options = 0;
594 study_options = 0;
595 log_store = showstore; /* default from command line */
596
597 while (*pp != 0)
598 {
599 switch (*pp++)
600 {
601 case 'g': do_g = 1; break;
602 case 'i': options |= PCRE_CASELESS; break;
603 case 'm': options |= PCRE_MULTILINE; break;
604 case 's': options |= PCRE_DOTALL; break;
605 case 'x': options |= PCRE_EXTENDED; break;
606
607 case '+': do_showrest = 1; break;
608 case 'A': options |= PCRE_ANCHORED; break;
609 case 'D': do_debug = do_showinfo = 1; break;
610 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611 case 'G': do_G = 1; break;
612 case 'I': do_showinfo = 1; break;
613 case 'M': log_store = 1; break;
614 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
615
616 #if !defined NOPOSIX
617 case 'P': do_posix = 1; break;
618 #endif
619
620 case 'S': do_study = 1; break;
621 case 'U': options |= PCRE_UNGREEDY; break;
622 case 'X': options |= PCRE_EXTRA; break;
623 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
624 case '?': options |= PCRE_NO_UTF8_CHECK; break;
625
626 case 'L':
627 ppp = pp;
628 while (*ppp != '\n' && *ppp != ' ') ppp++;
629 *ppp = 0;
630 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
631 {
632 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
633 goto SKIP_DATA;
634 }
635 tables = pcre_maketables();
636 pp = ppp;
637 break;
638
639 case '\n': case ' ': break;
640 default:
641 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
642 goto SKIP_DATA;
643 }
644 }
645
646 /* Handle compiling via the POSIX interface, which doesn't support the
647 timing, showing, or debugging options, nor the ability to pass over
648 local character tables. */
649
650 #if !defined NOPOSIX
651 if (posix || do_posix)
652 {
653 int rc;
654 int cflags = 0;
655 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
656 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
657 rc = regcomp(&preg, (char *)p, cflags);
658
659 /* Compilation failed; go back for another re, skipping to blank line
660 if non-interactive. */
661
662 if (rc != 0)
663 {
664 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
665 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
666 goto SKIP_DATA;
667 }
668 }
669
670 /* Handle compiling via the native interface */
671
672 else
673 #endif /* !defined NOPOSIX */
674
675 {
676 if (timeit)
677 {
678 register int i;
679 clock_t time_taken;
680 clock_t start_time = clock();
681 for (i = 0; i < LOOPREPEAT; i++)
682 {
683 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
684 if (re != NULL) free(re);
685 }
686 time_taken = clock() - start_time;
687 fprintf(outfile, "Compile time %.3f milliseconds\n",
688 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
689 (double)CLOCKS_PER_SEC);
690 }
691
692 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
693
694 /* Compilation failed; go back for another re, skipping to blank line
695 if non-interactive. */
696
697 if (re == NULL)
698 {
699 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
700 SKIP_DATA:
701 if (infile != stdin)
702 {
703 for (;;)
704 {
705 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
706 {
707 done = 1;
708 goto CONTINUE;
709 }
710 len = (int)strlen((char *)buffer);
711 while (len > 0 && isspace(buffer[len-1])) len--;
712 if (len == 0) break;
713 }
714 fprintf(outfile, "\n");
715 }
716 goto CONTINUE;
717 }
718
719 /* Compilation succeeded; print data if required. There are now two
720 info-returning functions. The old one has a limited interface and
721 returns only limited data. Check that it agrees with the newer one. */
722
723 if (log_store)
724 fprintf(outfile, "Memory allocation (code space): %d\n",
725 (int)(gotten_store -
726 sizeof(real_pcre) -
727 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
728
729 if (do_showinfo)
730 {
731 unsigned long int get_options;
732 int old_first_char, old_options, old_count;
733 int count, backrefmax, first_char, need_char;
734 int nameentrysize, namecount;
735 const uschar *nametable;
736 size_t size;
737
738 if (do_debug)
739 {
740 fprintf(outfile, "------------------------------------------------------------------\n");
741 print_internals(re, outfile);
742 }
743
744 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
745 new_info(re, NULL, PCRE_INFO_SIZE, &size);
746 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
747 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
748 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
749 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
750 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
751 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
752 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
753
754 old_count = pcre_info(re, &old_options, &old_first_char);
755 if (count < 0) fprintf(outfile,
756 "Error %d from pcre_info()\n", count);
757 else
758 {
759 if (old_count != count) fprintf(outfile,
760 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
761 old_count);
762
763 if (old_first_char != first_char) fprintf(outfile,
764 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
765 first_char, old_first_char);
766
767 if (old_options != (int)get_options) fprintf(outfile,
768 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
769 get_options, old_options);
770 }
771
772 if (size != gotten_store) fprintf(outfile,
773 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
774 size, gotten_store);
775
776 fprintf(outfile, "Capturing subpattern count = %d\n", count);
777 if (backrefmax > 0)
778 fprintf(outfile, "Max back reference = %d\n", backrefmax);
779
780 if (namecount > 0)
781 {
782 fprintf(outfile, "Named capturing subpatterns:\n");
783 while (namecount-- > 0)
784 {
785 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
786 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
787 GET2(nametable, 0));
788 nametable += nameentrysize;
789 }
790 }
791
792 if (get_options == 0) fprintf(outfile, "No options\n");
793 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
794 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
795 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
796 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
797 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
798 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
799 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
800 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
801 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
802 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
803 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
804
805 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
806 fprintf(outfile, "Case state changes\n");
807
808 if (first_char == -1)
809 {
810 fprintf(outfile, "First char at start or follows \\n\n");
811 }
812 else if (first_char < 0)
813 {
814 fprintf(outfile, "No first char\n");
815 }
816 else
817 {
818 int ch = first_char & 255;
819 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
820 "" : " (caseless)";
821 if (isprint(ch))
822 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
823 else
824 fprintf(outfile, "First char = %d%s\n", ch, caseless);
825 }
826
827 if (need_char < 0)
828 {
829 fprintf(outfile, "No need char\n");
830 }
831 else
832 {
833 int ch = need_char & 255;
834 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
835 "" : " (caseless)";
836 if (isprint(ch))
837 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
838 else
839 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
840 }
841 }
842
843 /* If /S was present, study the regexp to generate additional info to
844 help with the matching. */
845
846 if (do_study)
847 {
848 if (timeit)
849 {
850 register int i;
851 clock_t time_taken;
852 clock_t start_time = clock();
853 for (i = 0; i < LOOPREPEAT; i++)
854 extra = pcre_study(re, study_options, &error);
855 time_taken = clock() - start_time;
856 if (extra != NULL) free(extra);
857 fprintf(outfile, " Study time %.3f milliseconds\n",
858 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
859 (double)CLOCKS_PER_SEC);
860 }
861
862 extra = pcre_study(re, study_options, &error);
863 if (error != NULL)
864 fprintf(outfile, "Failed to study: %s\n", error);
865 else if (extra == NULL)
866 fprintf(outfile, "Study returned NULL\n");
867
868 /* Don't output study size; at present it is in any case a fixed
869 value, but it varies, depending on the computer architecture, and
870 so messes up the test suite. */
871
872 else if (do_showinfo)
873 {
874 size_t size;
875 uschar *start_bits = NULL;
876 new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
877 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
878 /* fprintf(outfile, "Study size = %d\n", size); */
879 if (start_bits == NULL)
880 fprintf(outfile, "No starting character set\n");
881 else
882 {
883 int i;
884 int c = 24;
885 fprintf(outfile, "Starting character set: ");
886 for (i = 0; i < 256; i++)
887 {
888 if ((start_bits[i/8] & (1<<(i%8))) != 0)
889 {
890 if (c > 75)
891 {
892 fprintf(outfile, "\n ");
893 c = 2;
894 }
895 if (isprint(i) && i != ' ')
896 {
897 fprintf(outfile, "%c ", i);
898 c += 2;
899 }
900 else
901 {
902 fprintf(outfile, "\\x%02x ", i);
903 c += 5;
904 }
905 }
906 }
907 fprintf(outfile, "\n");
908 }
909 }
910 }
911 }
912
913 /* Read data lines and test them */
914
915 for (;;)
916 {
917 unsigned char *q;
918 unsigned char *bptr = dbuffer;
919 int *use_offsets = offsets;
920 int use_size_offsets = size_offsets;
921 int callout_data = 0;
922 int callout_data_set = 0;
923 int count, c;
924 int copystrings = 0;
925 int find_match_limit = 0;
926 int getstrings = 0;
927 int getlist = 0;
928 int gmatched = 0;
929 int start_offset = 0;
930 int g_notempty = 0;
931
932 options = 0;
933
934 pcre_callout = callout;
935 first_callout = 1;
936 callout_extra = 0;
937 callout_count = 0;
938 callout_fail_count = 999999;
939 callout_fail_id = -1;
940
941 if (infile == stdin) printf("data> ");
942 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
943 {
944 done = 1;
945 goto CONTINUE;
946 }
947 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
948
949 len = (int)strlen((char *)buffer);
950 while (len > 0 && isspace(buffer[len-1])) len--;
951 buffer[len] = 0;
952 if (len == 0) break;
953
954 p = buffer;
955 while (isspace(*p)) p++;
956
957 q = dbuffer;
958 while ((c = *p++) != 0)
959 {
960 int i = 0;
961 int n = 0;
962
963 if (c == '\\') switch ((c = *p++))
964 {
965 case 'a': c = 7; break;
966 case 'b': c = '\b'; break;
967 case 'e': c = 27; break;
968 case 'f': c = '\f'; break;
969 case 'n': c = '\n'; break;
970 case 'r': c = '\r'; break;
971 case 't': c = '\t'; break;
972 case 'v': c = '\v'; break;
973
974 case '0': case '1': case '2': case '3':
975 case '4': case '5': case '6': case '7':
976 c -= '0';
977 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
978 c = c * 8 + *p++ - '0';
979 break;
980
981 case 'x':
982
983 /* Handle \x{..} specially - new Perl thing for utf8 */
984
985 if (*p == '{')
986 {
987 unsigned char *pt = p;
988 c = 0;
989 while (isxdigit(*(++pt)))
990 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
991 if (*pt == '}')
992 {
993 unsigned char buff8[8];
994 int ii, utn;
995 utn = ord2utf8(c, buff8);
996 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
997 c = buff8[ii]; /* Last byte */
998 p = pt + 1;
999 break;
1000 }
1001 /* Not correct form; fall through */
1002 }
1003
1004 /* Ordinary \x */
1005
1006 c = 0;
1007 while (i++ < 2 && isxdigit(*p))
1008 {
1009 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1010 p++;
1011 }
1012 break;
1013
1014 case 0: /* Allows for an empty line */
1015 p--;
1016 continue;
1017
1018 case 'A': /* Option setting */
1019 options |= PCRE_ANCHORED;
1020 continue;
1021
1022 case 'B':
1023 options |= PCRE_NOTBOL;
1024 continue;
1025
1026 case 'C':
1027 if (isdigit(*p)) /* Set copy string */
1028 {
1029 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1030 copystrings |= 1 << n;
1031 }
1032 else if (isalnum(*p))
1033 {
1034 uschar name[256];
1035 uschar *npp = name;
1036 while (isalnum(*p)) *npp++ = *p++;
1037 *npp = 0;
1038 n = pcre_get_stringnumber(re, (char *)name);
1039 if (n < 0)
1040 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1041 else copystrings |= 1 << n;
1042 }
1043 else if (*p == '+')
1044 {
1045 callout_extra = 1;
1046 p++;
1047 }
1048 else if (*p == '-')
1049 {
1050 pcre_callout = NULL;
1051 p++;
1052 }
1053 else if (*p == '!')
1054 {
1055 callout_fail_id = 0;
1056 p++;
1057 while(isdigit(*p))
1058 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1059 callout_fail_count = 0;
1060 if (*p == '!')
1061 {
1062 p++;
1063 while(isdigit(*p))
1064 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1065 }
1066 }
1067 else if (*p == '*')
1068 {
1069 int sign = 1;
1070 callout_data = 0;
1071 if (*(++p) == '-') { sign = -1; p++; }
1072 while(isdigit(*p))
1073 callout_data = callout_data * 10 + *p++ - '0';
1074 callout_data *= sign;
1075 callout_data_set = 1;
1076 }
1077 continue;
1078
1079 case 'G':
1080 if (isdigit(*p))
1081 {
1082 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1083 getstrings |= 1 << n;
1084 }
1085 else if (isalnum(*p))
1086 {
1087 uschar name[256];
1088 uschar *npp = name;
1089 while (isalnum(*p)) *npp++ = *p++;
1090 *npp = 0;
1091 n = pcre_get_stringnumber(re, (char *)name);
1092 if (n < 0)
1093 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1094 else getstrings |= 1 << n;
1095 }
1096 continue;
1097
1098 case 'L':
1099 getlist = 1;
1100 continue;
1101
1102 case 'M':
1103 find_match_limit = 1;
1104 continue;
1105
1106 case 'N':
1107 options |= PCRE_NOTEMPTY;
1108 continue;
1109
1110 case 'O':
1111 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1112 if (n > size_offsets_max)
1113 {
1114 size_offsets_max = n;
1115 free(offsets);
1116 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1117 if (offsets == NULL)
1118 {
1119 printf("** Failed to get %d bytes of memory for offsets vector\n",
1120 size_offsets_max * sizeof(int));
1121 return 1;
1122 }
1123 }
1124 use_size_offsets = n;
1125 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1126 continue;
1127
1128 case 'Z':
1129 options |= PCRE_NOTEOL;
1130 continue;
1131
1132 case '?':
1133 options |= PCRE_NO_UTF8_CHECK;
1134 continue;
1135 }
1136 *q++ = c;
1137 }
1138 *q = 0;
1139 len = q - dbuffer;
1140
1141 /* Handle matching via the POSIX interface, which does not
1142 support timing or playing with the match limit or callout data. */
1143
1144 #if !defined NOPOSIX
1145 if (posix || do_posix)
1146 {
1147 int rc;
1148 int eflags = 0;
1149 regmatch_t *pmatch = NULL;
1150 if (use_size_offsets > 0)
1151 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1152 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1153 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1154
1155 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1156
1157 if (rc != 0)
1158 {
1159 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1160 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1161 }
1162 else
1163 {
1164 size_t i;
1165 for (i = 0; i < (size_t)use_size_offsets; i++)
1166 {
1167 if (pmatch[i].rm_so >= 0)
1168 {
1169 fprintf(outfile, "%2d: ", (int)i);
1170 (void)pchars(dbuffer + pmatch[i].rm_so,
1171 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1172 fprintf(outfile, "\n");
1173 if (i == 0 && do_showrest)
1174 {
1175 fprintf(outfile, " 0+ ");
1176 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1177 outfile);
1178 fprintf(outfile, "\n");
1179 }
1180 }
1181 }
1182 }
1183 free(pmatch);
1184 }
1185
1186 /* Handle matching via the native interface - repeats for /g and /G */
1187
1188 else
1189 #endif /* !defined NOPOSIX */
1190
1191 for (;; gmatched++) /* Loop for /g or /G */
1192 {
1193 if (timeit)
1194 {
1195 register int i;
1196 clock_t time_taken;
1197 clock_t start_time = clock();
1198 for (i = 0; i < LOOPREPEAT; i++)
1199 count = pcre_exec(re, extra, (char *)bptr, len,
1200 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1201 time_taken = clock() - start_time;
1202 fprintf(outfile, "Execute time %.3f milliseconds\n",
1203 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1204 (double)CLOCKS_PER_SEC);
1205 }
1206
1207 /* If find_match_limit is set, we want to do repeated matches with
1208 varying limits in order to find the minimum value. */
1209
1210 if (find_match_limit)
1211 {
1212 int min = 0;
1213 int mid = 64;
1214 int max = -1;
1215
1216 if (extra == NULL)
1217 {
1218 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1219 extra->flags = 0;
1220 }
1221 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1222
1223 for (;;)
1224 {
1225 extra->match_limit = mid;
1226 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1227 options | g_notempty, use_offsets, use_size_offsets);
1228 if (count == PCRE_ERROR_MATCHLIMIT)
1229 {
1230 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1231 min = mid;
1232 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1233 }
1234 else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1235 {
1236 if (mid == min + 1)
1237 {
1238 fprintf(outfile, "Minimum match limit = %d\n", mid);
1239 break;
1240 }
1241 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1242 max = mid;
1243 mid = (min + mid)/2;
1244 }
1245 else break; /* Some other error */
1246 }
1247
1248 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1249 }
1250
1251 /* If callout_data is set, use the interface with additional data */
1252
1253 else if (callout_data_set)
1254 {
1255 if (extra == NULL)
1256 {
1257 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1258 extra->flags = 0;
1259 }
1260 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1261 extra->callout_data = &callout_data;
1262 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1263 options | g_notempty, use_offsets, use_size_offsets);
1264 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1265 }
1266
1267 /* The normal case is just to do the match once, with the default
1268 value of match_limit. */
1269
1270 else count = pcre_exec(re, extra, (char *)bptr, len,
1271 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1272
1273 if (count == 0)
1274 {
1275 fprintf(outfile, "Matched, but too many substrings\n");
1276 count = use_size_offsets/3;
1277 }
1278
1279 /* Matched */
1280
1281 if (count >= 0)
1282 {
1283 int i;
1284 for (i = 0; i < count * 2; i += 2)
1285 {
1286 if (use_offsets[i] < 0)
1287 fprintf(outfile, "%2d: <unset>\n", i/2);
1288 else
1289 {
1290 fprintf(outfile, "%2d: ", i/2);
1291 (void)pchars(bptr + use_offsets[i],
1292 use_offsets[i+1] - use_offsets[i], outfile);
1293 fprintf(outfile, "\n");
1294 if (i == 0)
1295 {
1296 if (do_showrest)
1297 {
1298 fprintf(outfile, " 0+ ");
1299 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1300 outfile);
1301 fprintf(outfile, "\n");
1302 }
1303 }
1304 }
1305 }
1306
1307 for (i = 0; i < 32; i++)
1308 {
1309 if ((copystrings & (1 << i)) != 0)
1310 {
1311 char copybuffer[16];
1312 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1313 i, copybuffer, sizeof(copybuffer));
1314 if (rc < 0)
1315 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1316 else
1317 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1318 }
1319 }
1320
1321 for (i = 0; i < 32; i++)
1322 {
1323 if ((getstrings & (1 << i)) != 0)
1324 {
1325 const char *substring;
1326 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1327 i, &substring);
1328 if (rc < 0)
1329 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1330 else
1331 {
1332 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1333 /* free((void *)substring); */
1334 pcre_free_substring(substring);
1335 }
1336 }
1337 }
1338
1339 if (getlist)
1340 {
1341 const char **stringlist;
1342 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1343 &stringlist);
1344 if (rc < 0)
1345 fprintf(outfile, "get substring list failed %d\n", rc);
1346 else
1347 {
1348 for (i = 0; i < count; i++)
1349 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1350 if (stringlist[i] != NULL)
1351 fprintf(outfile, "string list not terminated by NULL\n");
1352 /* free((void *)stringlist); */
1353 pcre_free_substring_list(stringlist);
1354 }
1355 }
1356 }
1357
1358 /* Failed to match. If this is a /g or /G loop and we previously set
1359 g_notempty after a null match, this is not necessarily the end.
1360 We want to advance the start offset, and continue. Fudge the offset
1361 values to achieve this. We won't be at the end of the string - that
1362 was checked before setting g_notempty. */
1363
1364 else
1365 {
1366 if (g_notempty != 0)
1367 {
1368 use_offsets[0] = start_offset;
1369 use_offsets[1] = start_offset + 1;
1370 }
1371 else
1372 {
1373 if (gmatched == 0) /* Error if no previous matches */
1374 {
1375 if (count == -1) fprintf(outfile, "No match\n");
1376 else fprintf(outfile, "Error %d\n", count);
1377 }
1378 break; /* Out of the /g loop */
1379 }
1380 }
1381
1382 /* If not /g or /G we are done */
1383
1384 if (!do_g && !do_G) break;
1385
1386 /* If we have matched an empty string, first check to see if we are at
1387 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1388 what Perl's /g options does. This turns out to be rather cunning. First
1389 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1390 same point. If this fails (picked up above) we advance to the next
1391 character. */
1392
1393 g_notempty = 0;
1394 if (use_offsets[0] == use_offsets[1])
1395 {
1396 if (use_offsets[0] == len) break;
1397 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1398 }
1399
1400 /* For /g, update the start offset, leaving the rest alone */
1401
1402 if (do_g) start_offset = use_offsets[1];
1403
1404 /* For /G, update the pointer and length */
1405
1406 else
1407 {
1408 bptr += use_offsets[1];
1409 len -= use_offsets[1];
1410 }
1411 } /* End of loop for /g and /G */
1412 } /* End of loop for data lines */
1413
1414 CONTINUE:
1415
1416 #if !defined NOPOSIX
1417 if (posix || do_posix) regfree(&preg);
1418 #endif
1419
1420 if (re != NULL) free(re);
1421 if (extra != NULL) free(extra);
1422 if (tables != NULL)
1423 {
1424 free((void *)tables);
1425 setlocale(LC_CTYPE, "C");
1426 }
1427 }
1428
1429 fprintf(outfile, "\n");
1430 return 0;
1431 }
1432
1433 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12