/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 65 - (show annotations) (download)
Sat Feb 24 21:40:08 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 39198 byte(s)
Load pcre-4.1 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather untidy in places. */
8
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <time.h>
14 #include <locale.h>
15
16 /* We need the internal info for displaying the results of pcre_study(). Also
17 for getting the opcodes for showing compiled code. */
18
19 #define PCRE_SPY /* For Win32 build, import data, not export */
20 #include "internal.h"
21
22 /* It is possible to compile this test program without including support for
23 testing the POSIX interface, though this is not available via the standard
24 Makefile. */
25
26 #if !defined NOPOSIX
27 #include "pcreposix.h"
28 #endif
29
30 #ifndef CLOCKS_PER_SEC
31 #ifdef CLK_TCK
32 #define CLOCKS_PER_SEC CLK_TCK
33 #else
34 #define CLOCKS_PER_SEC 100
35 #endif
36 #endif
37
38 #define LOOPREPEAT 50000
39
40
41 static FILE *outfile;
42 static int log_store = 0;
43 static int callout_count;
44 static int callout_extra;
45 static int callout_fail_count;
46 static int callout_fail_id;
47 static int first_callout;
48 static int utf8;
49 static size_t gotten_store;
50
51
52
53 static int utf8_table1[] = {
54 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
55
56 static int utf8_table2[] = {
57 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
58
59 static int utf8_table3[] = {
60 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
61
62
63
64 /*************************************************
65 * Print compiled regex *
66 *************************************************/
67
68 /* The code for doing this is held in a separate file that is also included in
69 pcre.c when it is compiled with the debug switch. It defines a function called
70 print_internals(), which uses a table of opcode lengths defined by the macro
71 OP_LENGTHS, whose name must be OP_lengths. */
72
73 static uschar OP_lengths[] = { OP_LENGTHS };
74
75 #include "printint.c"
76
77
78
79 /*************************************************
80 * Read number from string *
81 *************************************************/
82
83 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
84 around with conditional compilation, just do the job by hand. It is only used
85 for unpicking the -o argument, so just keep it simple.
86
87 Arguments:
88 str string to be converted
89 endptr where to put the end pointer
90
91 Returns: the unsigned long
92 */
93
94 static int
95 get_value(unsigned char *str, unsigned char **endptr)
96 {
97 int result = 0;
98 while(*str != 0 && isspace(*str)) str++;
99 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
100 *endptr = str;
101 return(result);
102 }
103
104
105
106 /*************************************************
107 * Convert character value to UTF-8 *
108 *************************************************/
109
110 /* This function takes an integer value in the range 0 - 0x7fffffff
111 and encodes it as a UTF-8 character in 0 to 6 bytes.
112
113 Arguments:
114 cvalue the character value
115 buffer pointer to buffer for result - at least 6 bytes long
116
117 Returns: number of characters placed in the buffer
118 -1 if input character is negative
119 0 if input character is positive but too big (only when
120 int is longer than 32 bits)
121 */
122
123 static int
124 ord2utf8(int cvalue, unsigned char *buffer)
125 {
126 register int i, j;
127 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
128 if (cvalue <= utf8_table1[i]) break;
129 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
130 if (cvalue < 0) return -1;
131
132 buffer += i;
133 for (j = i; j > 0; j--)
134 {
135 *buffer-- = 0x80 | (cvalue & 0x3f);
136 cvalue >>= 6;
137 }
138 *buffer = utf8_table2[i] | cvalue;
139 return i + 1;
140 }
141
142
143 /*************************************************
144 * Convert UTF-8 string to value *
145 *************************************************/
146
147 /* This function takes one or more bytes that represents a UTF-8 character,
148 and returns the value of the character.
149
150 Argument:
151 buffer a pointer to the byte vector
152 vptr a pointer to an int to receive the value
153
154 Returns: > 0 => the number of bytes consumed
155 -6 to 0 => malformed UTF-8 character at offset = (-return)
156 */
157
158 int
159 utf82ord(unsigned char *buffer, int *vptr)
160 {
161 int c = *buffer++;
162 int d = c;
163 int i, j, s;
164
165 for (i = -1; i < 6; i++) /* i is number of additional bytes */
166 {
167 if ((d & 0x80) == 0) break;
168 d <<= 1;
169 }
170
171 if (i == -1) { *vptr = c; return 1; } /* ascii character */
172 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
173
174 /* i now has a value in the range 1-5 */
175
176 s = 6*i;
177 d = (c & utf8_table3[i]) << s;
178
179 for (j = 0; j < i; j++)
180 {
181 c = *buffer++;
182 if ((c & 0xc0) != 0x80) return -(j+1);
183 s -= 6;
184 d |= (c & 0x3f) << s;
185 }
186
187 /* Check that encoding was the correct unique one */
188
189 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
190 if (d <= utf8_table1[j]) break;
191 if (j != i) return -(i+1);
192
193 /* Valid value */
194
195 *vptr = d;
196 return i+1;
197 }
198
199
200
201 /*************************************************
202 * Print character string *
203 *************************************************/
204
205 /* Character string printing function. Must handle UTF-8 strings in utf8
206 mode. Yields number of characters printed. If handed a NULL file, just counts
207 chars without printing. */
208
209 static int pchars(unsigned char *p, int length, FILE *f)
210 {
211 int c;
212 int yield = 0;
213
214 while (length-- > 0)
215 {
216 if (utf8)
217 {
218 int rc = utf82ord(p, &c);
219
220 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
221 {
222 length -= rc - 1;
223 p += rc;
224 if (c < 256 && isprint(c))
225 {
226 if (f != NULL) fprintf(f, "%c", c);
227 yield++;
228 }
229 else
230 {
231 int n;
232 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
233 yield += n;
234 }
235 continue;
236 }
237 }
238
239 /* Not UTF-8, or malformed UTF-8 */
240
241 if (isprint(c = *(p++)))
242 {
243 if (f != NULL) fprintf(f, "%c", c);
244 yield++;
245 }
246 else
247 {
248 if (f != NULL) fprintf(f, "\\x%02x", c);
249 yield += 4;
250 }
251 }
252
253 return yield;
254 }
255
256
257
258 /*************************************************
259 * Callout function *
260 *************************************************/
261
262 /* Called from PCRE as a result of the (?C) item. We print out where we are in
263 the match. Yield zero unless more callouts than the fail count, or the callout
264 data is not zero. */
265
266 static int callout(pcre_callout_block *cb)
267 {
268 FILE *f = (first_callout | callout_extra)? outfile : NULL;
269 int i, pre_start, post_start;
270
271 if (callout_extra)
272 {
273 int i;
274 fprintf(f, "Callout %d: last capture = %d\n",
275 cb->callout_number, cb->capture_last);
276
277 for (i = 0; i < cb->capture_top * 2; i += 2)
278 {
279 if (cb->offset_vector[i] < 0)
280 fprintf(f, "%2d: <unset>\n", i/2);
281 else
282 {
283 fprintf(f, "%2d: ", i/2);
284 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
285 cb->offset_vector[i+1] - cb->offset_vector[i], f);
286 fprintf(f, "\n");
287 }
288 }
289 }
290
291 /* Re-print the subject in canonical form, the first time or if giving full
292 datails. On subsequent calls in the same match, we use pchars just to find the
293 printed lengths of the substrings. */
294
295 if (f != NULL) fprintf(f, "--->");
296
297 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
298 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
299 cb->current_position - cb->start_match, f);
300
301 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
302 cb->subject_length - cb->current_position, f);
303
304 if (f != NULL) fprintf(f, "\n");
305
306 /* Always print appropriate indicators, with callout number if not already
307 shown */
308
309 if (callout_extra) fprintf(outfile, " ");
310 else fprintf(outfile, "%3d ", cb->callout_number);
311
312 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
313 fprintf(outfile, "^");
314
315 if (post_start > 0)
316 {
317 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
318 fprintf(outfile, "^");
319 }
320
321 fprintf(outfile, "\n");
322
323 first_callout = 0;
324
325 if ((int)(cb->callout_data) != 0)
326 {
327 fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
328 return (int)(cb->callout_data);
329 }
330
331 return (cb->callout_number != callout_fail_id)? 0 :
332 (++callout_count >= callout_fail_count)? 1 : 0;
333 }
334
335
336 /*************************************************
337 * Local malloc function *
338 *************************************************/
339
340 /* Alternative malloc function, to test functionality and show the size of the
341 compiled re. */
342
343 static void *new_malloc(size_t size)
344 {
345 gotten_store = size;
346 return malloc(size);
347 }
348
349
350
351 /*************************************************
352 * Call pcre_fullinfo() *
353 *************************************************/
354
355 /* Get one piece of information from the pcre_fullinfo() function */
356
357 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
358 {
359 int rc;
360 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
361 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
362 }
363
364
365
366 /*************************************************
367 * Main Program *
368 *************************************************/
369
370 /* Read lines from named file or stdin and write to named file or stdout; lines
371 consist of a regular expression, in delimiters and optionally followed by
372 options, followed by a set of test data, terminated by an empty line. */
373
374 int main(int argc, char **argv)
375 {
376 FILE *infile = stdin;
377 int options = 0;
378 int study_options = 0;
379 int op = 1;
380 int timeit = 0;
381 int showinfo = 0;
382 int showstore = 0;
383 int size_offsets = 45;
384 int size_offsets_max;
385 int *offsets;
386 #if !defined NOPOSIX
387 int posix = 0;
388 #endif
389 int debug = 0;
390 int done = 0;
391 unsigned char buffer[30000];
392 unsigned char dbuffer[1024];
393
394 /* Static so that new_malloc can use it. */
395
396 outfile = stdout;
397
398 /* Scan options */
399
400 while (argc > 1 && argv[op][0] == '-')
401 {
402 unsigned char *endptr;
403
404 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
405 showstore = 1;
406 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
407 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
408 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
409 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
410 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
411 *endptr == 0))
412 {
413 op++;
414 argc--;
415 }
416 #if !defined NOPOSIX
417 else if (strcmp(argv[op], "-p") == 0) posix = 1;
418 #endif
419 else if (strcmp(argv[op], "-C") == 0)
420 {
421 int rc;
422 printf("PCRE version %s\n", pcre_version());
423 printf("Compiled with\n");
424 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
425 printf(" %sUTF-8 support\n", rc? "" : "No ");
426 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
427 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
428 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
429 printf(" Internal link size = %d\n", rc);
430 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
431 printf(" POSIX malloc threshold = %d\n", rc);
432 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
433 printf(" Default match limit = %d\n", rc);
434 exit(0);
435 }
436 else
437 {
438 printf("** Unknown or malformed option %s\n", argv[op]);
439 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
440 printf(" -C show PCRE compile-time options and exit\n");
441 printf(" -d debug: show compiled code; implies -i\n"
442 " -i show information about compiled pattern\n"
443 " -o <n> set size of offsets vector to <n>\n");
444 #if !defined NOPOSIX
445 printf(" -p use POSIX interface\n");
446 #endif
447 printf(" -s output store information\n"
448 " -t time compilation and execution\n");
449 return 1;
450 }
451 op++;
452 argc--;
453 }
454
455 /* Get the store for the offsets vector, and remember what it was */
456
457 size_offsets_max = size_offsets;
458 offsets = malloc(size_offsets_max * sizeof(int));
459 if (offsets == NULL)
460 {
461 printf("** Failed to get %d bytes of memory for offsets vector\n",
462 size_offsets_max * sizeof(int));
463 return 1;
464 }
465
466 /* Sort out the input and output files */
467
468 if (argc > 1)
469 {
470 infile = fopen(argv[op], "r");
471 if (infile == NULL)
472 {
473 printf("** Failed to open %s\n", argv[op]);
474 return 1;
475 }
476 }
477
478 if (argc > 2)
479 {
480 outfile = fopen(argv[op+1], "w");
481 if (outfile == NULL)
482 {
483 printf("** Failed to open %s\n", argv[op+1]);
484 return 1;
485 }
486 }
487
488 /* Set alternative malloc function */
489
490 pcre_malloc = new_malloc;
491
492 /* Heading line, then prompt for first regex if stdin */
493
494 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
495
496 /* Main loop */
497
498 while (!done)
499 {
500 pcre *re = NULL;
501 pcre_extra *extra = NULL;
502
503 #if !defined NOPOSIX /* There are still compilers that require no indent */
504 regex_t preg;
505 int do_posix = 0;
506 #endif
507
508 const char *error;
509 unsigned char *p, *pp, *ppp;
510 const unsigned char *tables = NULL;
511 int do_study = 0;
512 int do_debug = debug;
513 int do_G = 0;
514 int do_g = 0;
515 int do_showinfo = showinfo;
516 int do_showrest = 0;
517 int erroroffset, len, delimiter;
518
519 utf8 = 0;
520
521 if (infile == stdin) printf(" re> ");
522 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
523 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
524 fflush(outfile);
525
526 p = buffer;
527 while (isspace(*p)) p++;
528 if (*p == 0) continue;
529
530 /* Get the delimiter and seek the end of the pattern; if is isn't
531 complete, read more. */
532
533 delimiter = *p++;
534
535 if (isalnum(delimiter) || delimiter == '\\')
536 {
537 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
538 goto SKIP_DATA;
539 }
540
541 pp = p;
542
543 for(;;)
544 {
545 while (*pp != 0)
546 {
547 if (*pp == '\\' && pp[1] != 0) pp++;
548 else if (*pp == delimiter) break;
549 pp++;
550 }
551 if (*pp != 0) break;
552
553 len = sizeof(buffer) - (pp - buffer);
554 if (len < 256)
555 {
556 fprintf(outfile, "** Expression too long - missing delimiter?\n");
557 goto SKIP_DATA;
558 }
559
560 if (infile == stdin) printf(" > ");
561 if (fgets((char *)pp, len, infile) == NULL)
562 {
563 fprintf(outfile, "** Unexpected EOF\n");
564 done = 1;
565 goto CONTINUE;
566 }
567 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
568 }
569
570 /* If the first character after the delimiter is backslash, make
571 the pattern end with backslash. This is purely to provide a way
572 of testing for the error message when a pattern ends with backslash. */
573
574 if (pp[1] == '\\') *pp++ = '\\';
575
576 /* Terminate the pattern at the delimiter */
577
578 *pp++ = 0;
579
580 /* Look for options after final delimiter */
581
582 options = 0;
583 study_options = 0;
584 log_store = showstore; /* default from command line */
585
586 while (*pp != 0)
587 {
588 switch (*pp++)
589 {
590 case 'g': do_g = 1; break;
591 case 'i': options |= PCRE_CASELESS; break;
592 case 'm': options |= PCRE_MULTILINE; break;
593 case 's': options |= PCRE_DOTALL; break;
594 case 'x': options |= PCRE_EXTENDED; break;
595
596 case '+': do_showrest = 1; break;
597 case 'A': options |= PCRE_ANCHORED; break;
598 case 'D': do_debug = do_showinfo = 1; break;
599 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
600 case 'G': do_G = 1; break;
601 case 'I': do_showinfo = 1; break;
602 case 'M': log_store = 1; break;
603 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
604
605 #if !defined NOPOSIX
606 case 'P': do_posix = 1; break;
607 #endif
608
609 case 'S': do_study = 1; break;
610 case 'U': options |= PCRE_UNGREEDY; break;
611 case 'X': options |= PCRE_EXTRA; break;
612 case '8': options |= PCRE_UTF8; utf8 = 1; break;
613
614 case 'L':
615 ppp = pp;
616 while (*ppp != '\n' && *ppp != ' ') ppp++;
617 *ppp = 0;
618 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
619 {
620 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
621 goto SKIP_DATA;
622 }
623 tables = pcre_maketables();
624 pp = ppp;
625 break;
626
627 case '\n': case ' ': break;
628 default:
629 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
630 goto SKIP_DATA;
631 }
632 }
633
634 /* Handle compiling via the POSIX interface, which doesn't support the
635 timing, showing, or debugging options, nor the ability to pass over
636 local character tables. */
637
638 #if !defined NOPOSIX
639 if (posix || do_posix)
640 {
641 int rc;
642 int cflags = 0;
643 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
644 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
645 rc = regcomp(&preg, (char *)p, cflags);
646
647 /* Compilation failed; go back for another re, skipping to blank line
648 if non-interactive. */
649
650 if (rc != 0)
651 {
652 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
653 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
654 goto SKIP_DATA;
655 }
656 }
657
658 /* Handle compiling via the native interface */
659
660 else
661 #endif /* !defined NOPOSIX */
662
663 {
664 if (timeit)
665 {
666 register int i;
667 clock_t time_taken;
668 clock_t start_time = clock();
669 for (i = 0; i < LOOPREPEAT; i++)
670 {
671 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
672 if (re != NULL) free(re);
673 }
674 time_taken = clock() - start_time;
675 fprintf(outfile, "Compile time %.3f milliseconds\n",
676 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
677 (double)CLOCKS_PER_SEC);
678 }
679
680 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
681
682 /* Compilation failed; go back for another re, skipping to blank line
683 if non-interactive. */
684
685 if (re == NULL)
686 {
687 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
688 SKIP_DATA:
689 if (infile != stdin)
690 {
691 for (;;)
692 {
693 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
694 {
695 done = 1;
696 goto CONTINUE;
697 }
698 len = (int)strlen((char *)buffer);
699 while (len > 0 && isspace(buffer[len-1])) len--;
700 if (len == 0) break;
701 }
702 fprintf(outfile, "\n");
703 }
704 goto CONTINUE;
705 }
706
707 /* Compilation succeeded; print data if required. There are now two
708 info-returning functions. The old one has a limited interface and
709 returns only limited data. Check that it agrees with the newer one. */
710
711 if (log_store)
712 fprintf(outfile, "Memory allocation (code space): %d\n",
713 (int)(gotten_store -
714 sizeof(real_pcre) -
715 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
716
717 if (do_showinfo)
718 {
719 unsigned long int get_options;
720 int old_first_char, old_options, old_count;
721 int count, backrefmax, first_char, need_char;
722 int nameentrysize, namecount;
723 const uschar *nametable;
724 size_t size;
725
726 if (do_debug)
727 {
728 fprintf(outfile, "------------------------------------------------------------------\n");
729 print_internals(re, outfile);
730 }
731
732 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
733 new_info(re, NULL, PCRE_INFO_SIZE, &size);
734 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
735 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
736 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
737 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
738 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
739 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
740 new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);
741
742 old_count = pcre_info(re, &old_options, &old_first_char);
743 if (count < 0) fprintf(outfile,
744 "Error %d from pcre_info()\n", count);
745 else
746 {
747 if (old_count != count) fprintf(outfile,
748 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
749 old_count);
750
751 if (old_first_char != first_char) fprintf(outfile,
752 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
753 first_char, old_first_char);
754
755 if (old_options != (int)get_options) fprintf(outfile,
756 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
757 get_options, old_options);
758 }
759
760 if (size != gotten_store) fprintf(outfile,
761 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
762 size, gotten_store);
763
764 fprintf(outfile, "Capturing subpattern count = %d\n", count);
765 if (backrefmax > 0)
766 fprintf(outfile, "Max back reference = %d\n", backrefmax);
767
768 if (namecount > 0)
769 {
770 fprintf(outfile, "Named capturing subpatterns:\n");
771 while (namecount-- > 0)
772 {
773 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
774 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
775 GET2(nametable, 0));
776 nametable += nameentrysize;
777 }
778 }
779
780 if (get_options == 0) fprintf(outfile, "No options\n");
781 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
782 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
783 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
784 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
785 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
786 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
787 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
788 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
789 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
790 ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
791
792 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
793 fprintf(outfile, "Case state changes\n");
794
795 if (first_char == -1)
796 {
797 fprintf(outfile, "First char at start or follows \\n\n");
798 }
799 else if (first_char < 0)
800 {
801 fprintf(outfile, "No first char\n");
802 }
803 else
804 {
805 int ch = first_char & 255;
806 char *caseless = ((first_char & REQ_CASELESS) == 0)?
807 "" : " (caseless)";
808 if (isprint(ch))
809 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
810 else
811 fprintf(outfile, "First char = %d%s\n", ch, caseless);
812 }
813
814 if (need_char < 0)
815 {
816 fprintf(outfile, "No need char\n");
817 }
818 else
819 {
820 int ch = need_char & 255;
821 char *caseless = ((need_char & REQ_CASELESS) == 0)?
822 "" : " (caseless)";
823 if (isprint(ch))
824 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
825 else
826 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
827 }
828 }
829
830 /* If /S was present, study the regexp to generate additional info to
831 help with the matching. */
832
833 if (do_study)
834 {
835 if (timeit)
836 {
837 register int i;
838 clock_t time_taken;
839 clock_t start_time = clock();
840 for (i = 0; i < LOOPREPEAT; i++)
841 extra = pcre_study(re, study_options, &error);
842 time_taken = clock() - start_time;
843 if (extra != NULL) free(extra);
844 fprintf(outfile, " Study time %.3f milliseconds\n",
845 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
846 (double)CLOCKS_PER_SEC);
847 }
848
849 extra = pcre_study(re, study_options, &error);
850 if (error != NULL)
851 fprintf(outfile, "Failed to study: %s\n", error);
852 else if (extra == NULL)
853 fprintf(outfile, "Study returned NULL\n");
854
855 else if (do_showinfo)
856 {
857 size_t size;
858 uschar *start_bits = NULL;
859 new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
860 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
861 fprintf(outfile, "Study size = %d\n", size);
862 if (start_bits == NULL)
863 fprintf(outfile, "No starting character set\n");
864 else
865 {
866 int i;
867 int c = 24;
868 fprintf(outfile, "Starting character set: ");
869 for (i = 0; i < 256; i++)
870 {
871 if ((start_bits[i/8] & (1<<(i%8))) != 0)
872 {
873 if (c > 75)
874 {
875 fprintf(outfile, "\n ");
876 c = 2;
877 }
878 if (isprint(i) && i != ' ')
879 {
880 fprintf(outfile, "%c ", i);
881 c += 2;
882 }
883 else
884 {
885 fprintf(outfile, "\\x%02x ", i);
886 c += 5;
887 }
888 }
889 }
890 fprintf(outfile, "\n");
891 }
892 }
893 }
894 }
895
896 /* Read data lines and test them */
897
898 for (;;)
899 {
900 unsigned char *q;
901 unsigned char *bptr = dbuffer;
902 int *use_offsets = offsets;
903 int use_size_offsets = size_offsets;
904 int callout_data = 0;
905 int callout_data_set = 0;
906 int count, c;
907 int copystrings = 0;
908 int find_match_limit = 0;
909 int getstrings = 0;
910 int getlist = 0;
911 int gmatched = 0;
912 int start_offset = 0;
913 int g_notempty = 0;
914
915 options = 0;
916
917 pcre_callout = callout;
918 first_callout = 1;
919 callout_extra = 0;
920 callout_count = 0;
921 callout_fail_count = 999999;
922 callout_fail_id = -1;
923
924 if (infile == stdin) printf("data> ");
925 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
926 {
927 done = 1;
928 goto CONTINUE;
929 }
930 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
931
932 len = (int)strlen((char *)buffer);
933 while (len > 0 && isspace(buffer[len-1])) len--;
934 buffer[len] = 0;
935 if (len == 0) break;
936
937 p = buffer;
938 while (isspace(*p)) p++;
939
940 q = dbuffer;
941 while ((c = *p++) != 0)
942 {
943 int i = 0;
944 int n = 0;
945
946 if (c == '\\') switch ((c = *p++))
947 {
948 case 'a': c = 7; break;
949 case 'b': c = '\b'; break;
950 case 'e': c = 27; break;
951 case 'f': c = '\f'; break;
952 case 'n': c = '\n'; break;
953 case 'r': c = '\r'; break;
954 case 't': c = '\t'; break;
955 case 'v': c = '\v'; break;
956
957 case '0': case '1': case '2': case '3':
958 case '4': case '5': case '6': case '7':
959 c -= '0';
960 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
961 c = c * 8 + *p++ - '0';
962 break;
963
964 case 'x':
965
966 /* Handle \x{..} specially - new Perl thing for utf8 */
967
968 if (*p == '{')
969 {
970 unsigned char *pt = p;
971 c = 0;
972 while (isxdigit(*(++pt)))
973 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
974 if (*pt == '}')
975 {
976 unsigned char buffer[8];
977 int ii, utn;
978 utn = ord2utf8(c, buffer);
979 for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
980 c = buffer[ii]; /* Last byte */
981 p = pt + 1;
982 break;
983 }
984 /* Not correct form; fall through */
985 }
986
987 /* Ordinary \x */
988
989 c = 0;
990 while (i++ < 2 && isxdigit(*p))
991 {
992 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
993 p++;
994 }
995 break;
996
997 case 0: /* Allows for an empty line */
998 p--;
999 continue;
1000
1001 case 'A': /* Option setting */
1002 options |= PCRE_ANCHORED;
1003 continue;
1004
1005 case 'B':
1006 options |= PCRE_NOTBOL;
1007 continue;
1008
1009 case 'C':
1010 if (isdigit(*p)) /* Set copy string */
1011 {
1012 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013 copystrings |= 1 << n;
1014 }
1015 else if (isalnum(*p))
1016 {
1017 uschar name[256];
1018 uschar *pp = name;
1019 while (isalnum(*p)) *pp++ = *p++;
1020 *pp = 0;
1021 n = pcre_get_stringnumber(re, (char *)name);
1022 if (n < 0)
1023 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1024 else copystrings |= 1 << n;
1025 }
1026 else if (*p == '+')
1027 {
1028 callout_extra = 1;
1029 p++;
1030 }
1031 else if (*p == '-')
1032 {
1033 pcre_callout = NULL;
1034 p++;
1035 }
1036 else if (*p == '!')
1037 {
1038 callout_fail_id = 0;
1039 p++;
1040 while(isdigit(*p))
1041 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1042 callout_fail_count = 0;
1043 if (*p == '!')
1044 {
1045 p++;
1046 while(isdigit(*p))
1047 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1048 }
1049 }
1050 else if (*p == '*')
1051 {
1052 int sign = 1;
1053 callout_data = 0;
1054 if (*(++p) == '-') { sign = -1; p++; }
1055 while(isdigit(*p))
1056 callout_data = callout_data * 10 + *p++ - '0';
1057 callout_data *= sign;
1058 callout_data_set = 1;
1059 }
1060 continue;
1061
1062 case 'G':
1063 if (isdigit(*p))
1064 {
1065 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1066 getstrings |= 1 << n;
1067 }
1068 else if (isalnum(*p))
1069 {
1070 uschar name[256];
1071 uschar *pp = name;
1072 while (isalnum(*p)) *pp++ = *p++;
1073 *pp = 0;
1074 n = pcre_get_stringnumber(re, (char *)name);
1075 if (n < 0)
1076 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1077 else getstrings |= 1 << n;
1078 }
1079 continue;
1080
1081 case 'L':
1082 getlist = 1;
1083 continue;
1084
1085 case 'M':
1086 find_match_limit = 1;
1087 continue;
1088
1089 case 'N':
1090 options |= PCRE_NOTEMPTY;
1091 continue;
1092
1093 case 'O':
1094 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1095 if (n > size_offsets_max)
1096 {
1097 size_offsets_max = n;
1098 free(offsets);
1099 use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1100 if (offsets == NULL)
1101 {
1102 printf("** Failed to get %d bytes of memory for offsets vector\n",
1103 size_offsets_max * sizeof(int));
1104 return 1;
1105 }
1106 }
1107 use_size_offsets = n;
1108 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1109 continue;
1110
1111 case 'Z':
1112 options |= PCRE_NOTEOL;
1113 continue;
1114 }
1115 *q++ = c;
1116 }
1117 *q = 0;
1118 len = q - dbuffer;
1119
1120 /* Handle matching via the POSIX interface, which does not
1121 support timing or playing with the match limit or callout data. */
1122
1123 #if !defined NOPOSIX
1124 if (posix || do_posix)
1125 {
1126 int rc;
1127 int eflags = 0;
1128 regmatch_t *pmatch = NULL;
1129 if (use_size_offsets > 0)
1130 pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1131 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1132 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1133
1134 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1135
1136 if (rc != 0)
1137 {
1138 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1139 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1140 }
1141 else
1142 {
1143 size_t i;
1144 for (i = 0; i < (size_t)use_size_offsets; i++)
1145 {
1146 if (pmatch[i].rm_so >= 0)
1147 {
1148 fprintf(outfile, "%2d: ", (int)i);
1149 (void)pchars(dbuffer + pmatch[i].rm_so,
1150 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1151 fprintf(outfile, "\n");
1152 if (i == 0 && do_showrest)
1153 {
1154 fprintf(outfile, " 0+ ");
1155 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1156 outfile);
1157 fprintf(outfile, "\n");
1158 }
1159 }
1160 }
1161 }
1162 free(pmatch);
1163 }
1164
1165 /* Handle matching via the native interface - repeats for /g and /G */
1166
1167 else
1168 #endif /* !defined NOPOSIX */
1169
1170 for (;; gmatched++) /* Loop for /g or /G */
1171 {
1172 if (timeit)
1173 {
1174 register int i;
1175 clock_t time_taken;
1176 clock_t start_time = clock();
1177 for (i = 0; i < LOOPREPEAT; i++)
1178 count = pcre_exec(re, extra, (char *)bptr, len,
1179 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1180 time_taken = clock() - start_time;
1181 fprintf(outfile, "Execute time %.3f milliseconds\n",
1182 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1183 (double)CLOCKS_PER_SEC);
1184 }
1185
1186 /* If find_match_limit is set, we want to do repeated matches with
1187 varying limits in order to find the minimum value. */
1188
1189 if (find_match_limit)
1190 {
1191 int min = 0;
1192 int mid = 64;
1193 int max = -1;
1194
1195 if (extra == NULL)
1196 {
1197 extra = malloc(sizeof(pcre_extra));
1198 extra->flags = 0;
1199 }
1200 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1201
1202 for (;;)
1203 {
1204 extra->match_limit = mid;
1205 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1206 options | g_notempty, use_offsets, use_size_offsets);
1207 if (count == PCRE_ERROR_MATCHLIMIT)
1208 {
1209 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1210 min = mid;
1211 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1212 }
1213 else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1214 {
1215 if (mid == min + 1)
1216 {
1217 fprintf(outfile, "Minimum match limit = %d\n", mid);
1218 break;
1219 }
1220 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1221 max = mid;
1222 mid = (min + mid)/2;
1223 }
1224 else break; /* Some other error */
1225 }
1226
1227 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1228 }
1229
1230 /* If callout_data is set, use the interface with additional data */
1231
1232 else if (callout_data_set)
1233 {
1234 if (extra == NULL)
1235 {
1236 extra = malloc(sizeof(pcre_extra));
1237 extra->flags = 0;
1238 }
1239 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1240 extra->callout_data = (void *)callout_data;
1241 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1242 options | g_notempty, use_offsets, use_size_offsets);
1243 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1244 }
1245
1246 /* The normal case is just to do the match once, with the default
1247 value of match_limit. */
1248
1249 else count = pcre_exec(re, extra, (char *)bptr, len,
1250 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1251
1252 if (count == 0)
1253 {
1254 fprintf(outfile, "Matched, but too many substrings\n");
1255 count = use_size_offsets/3;
1256 }
1257
1258 /* Matched */
1259
1260 if (count >= 0)
1261 {
1262 int i;
1263 for (i = 0; i < count * 2; i += 2)
1264 {
1265 if (use_offsets[i] < 0)
1266 fprintf(outfile, "%2d: <unset>\n", i/2);
1267 else
1268 {
1269 fprintf(outfile, "%2d: ", i/2);
1270 (void)pchars(bptr + use_offsets[i],
1271 use_offsets[i+1] - use_offsets[i], outfile);
1272 fprintf(outfile, "\n");
1273 if (i == 0)
1274 {
1275 if (do_showrest)
1276 {
1277 fprintf(outfile, " 0+ ");
1278 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1279 outfile);
1280 fprintf(outfile, "\n");
1281 }
1282 }
1283 }
1284 }
1285
1286 for (i = 0; i < 32; i++)
1287 {
1288 if ((copystrings & (1 << i)) != 0)
1289 {
1290 char copybuffer[16];
1291 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1292 i, copybuffer, sizeof(copybuffer));
1293 if (rc < 0)
1294 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1295 else
1296 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1297 }
1298 }
1299
1300 for (i = 0; i < 32; i++)
1301 {
1302 if ((getstrings & (1 << i)) != 0)
1303 {
1304 const char *substring;
1305 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1306 i, &substring);
1307 if (rc < 0)
1308 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1309 else
1310 {
1311 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1312 /* free((void *)substring); */
1313 pcre_free_substring(substring);
1314 }
1315 }
1316 }
1317
1318 if (getlist)
1319 {
1320 const char **stringlist;
1321 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1322 &stringlist);
1323 if (rc < 0)
1324 fprintf(outfile, "get substring list failed %d\n", rc);
1325 else
1326 {
1327 for (i = 0; i < count; i++)
1328 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1329 if (stringlist[i] != NULL)
1330 fprintf(outfile, "string list not terminated by NULL\n");
1331 /* free((void *)stringlist); */
1332 pcre_free_substring_list(stringlist);
1333 }
1334 }
1335 }
1336
1337 /* Failed to match. If this is a /g or /G loop and we previously set
1338 g_notempty after a null match, this is not necessarily the end.
1339 We want to advance the start offset, and continue. Fudge the offset
1340 values to achieve this. We won't be at the end of the string - that
1341 was checked before setting g_notempty. */
1342
1343 else
1344 {
1345 if (g_notempty != 0)
1346 {
1347 use_offsets[0] = start_offset;
1348 use_offsets[1] = start_offset + 1;
1349 }
1350 else
1351 {
1352 if (gmatched == 0) /* Error if no previous matches */
1353 {
1354 if (count == -1) fprintf(outfile, "No match\n");
1355 else fprintf(outfile, "Error %d\n", count);
1356 }
1357 break; /* Out of the /g loop */
1358 }
1359 }
1360
1361 /* If not /g or /G we are done */
1362
1363 if (!do_g && !do_G) break;
1364
1365 /* If we have matched an empty string, first check to see if we are at
1366 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1367 what Perl's /g options does. This turns out to be rather cunning. First
1368 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1369 same point. If this fails (picked up above) we advance to the next
1370 character. */
1371
1372 g_notempty = 0;
1373 if (use_offsets[0] == use_offsets[1])
1374 {
1375 if (use_offsets[0] == len) break;
1376 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1377 }
1378
1379 /* For /g, update the start offset, leaving the rest alone */
1380
1381 if (do_g) start_offset = use_offsets[1];
1382
1383 /* For /G, update the pointer and length */
1384
1385 else
1386 {
1387 bptr += use_offsets[1];
1388 len -= use_offsets[1];
1389 }
1390 } /* End of loop for /g and /G */
1391 } /* End of loop for data lines */
1392
1393 CONTINUE:
1394
1395 #if !defined NOPOSIX
1396 if (posix || do_posix) regfree(&preg);
1397 #endif
1398
1399 if (re != NULL) free(re);
1400 if (extra != NULL) free(extra);
1401 if (tables != NULL)
1402 {
1403 free((void *)tables);
1404 setlocale(LC_CTYPE, "C");
1405 }
1406 }
1407
1408 fprintf(outfile, "\n");
1409 return 0;
1410 }
1411
1412 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12