/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (show annotations) (download)
Sat Feb 24 21:41:21 2007 UTC (7 years, 2 months ago) by nigel
File MIME type: text/plain
File size: 55784 byte(s)
Load pcre-6.5 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47 #define PCRE_SPY /* For Win32 build, import data, not export */
48
49 /* We include pcre_internal.h because we need the internal info for displaying
50 the results of pcre_study() and we also need to know about the internal
51 macros, structures, and other internal data values; pcretest has "inside
52 information" compared to a program that strictly follows the PCRE API. */
53
54 #include "pcre_internal.h"
55
56 /* We need access to the data tables that PCRE uses. So as not to have to keep
57 two copies, we include the source file here, changing the names of the external
58 symbols to prevent clashes. */
59
60 #define _pcre_utf8_table1 utf8_table1
61 #define _pcre_utf8_table1_size utf8_table1_size
62 #define _pcre_utf8_table2 utf8_table2
63 #define _pcre_utf8_table3 utf8_table3
64 #define _pcre_utf8_table4 utf8_table4
65 #define _pcre_utt utt
66 #define _pcre_utt_size utt_size
67 #define _pcre_OP_lengths OP_lengths
68
69 #include "pcre_tables.c"
70
71 /* We also need the pcre_printint() function for printing out compiled
72 patterns. This function is in a separate file so that it can be included in
73 pcre_compile.c when that module is compiled with debugging enabled. */
74
75 #include "pcre_printint.src"
76
77
78 /* It is possible to compile this test program without including support for
79 testing the POSIX interface, though this is not available via the standard
80 Makefile. */
81
82 #if !defined NOPOSIX
83 #include "pcreposix.h"
84 #endif
85
86 /* It is also possible, for the benefit of the version imported into Exim, to
87 build pcretest without support for UTF8 (define NOUTF8), without the interface
88 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89 function (define NOINFOCHECK). */
90
91
92 /* Other parameters */
93
94 #ifndef CLOCKS_PER_SEC
95 #ifdef CLK_TCK
96 #define CLOCKS_PER_SEC CLK_TCK
97 #else
98 #define CLOCKS_PER_SEC 100
99 #endif
100 #endif
101
102 #define LOOPREPEAT 500000
103
104 #define BUFFER_SIZE 30000
105 #define PBUFFER_SIZE BUFFER_SIZE
106 #define DBUFFER_SIZE BUFFER_SIZE
107
108
109 /* Static variables */
110
111 static FILE *outfile;
112 static int log_store = 0;
113 static int callout_count;
114 static int callout_extra;
115 static int callout_fail_count;
116 static int callout_fail_id;
117 static int first_callout;
118 static int show_malloc;
119 static int use_utf8;
120 static size_t gotten_store;
121
122 static uschar *pbuffer = NULL;
123
124
125
126 /*************************************************
127 * Read number from string *
128 *************************************************/
129
130 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
131 around with conditional compilation, just do the job by hand. It is only used
132 for unpicking the -o argument, so just keep it simple.
133
134 Arguments:
135 str string to be converted
136 endptr where to put the end pointer
137
138 Returns: the unsigned long
139 */
140
141 static int
142 get_value(unsigned char *str, unsigned char **endptr)
143 {
144 int result = 0;
145 while(*str != 0 && isspace(*str)) str++;
146 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
147 *endptr = str;
148 return(result);
149 }
150
151
152
153
154 /*************************************************
155 * Convert UTF-8 string to value *
156 *************************************************/
157
158 /* This function takes one or more bytes that represents a UTF-8 character,
159 and returns the value of the character.
160
161 Argument:
162 buffer a pointer to the byte vector
163 vptr a pointer to an int to receive the value
164
165 Returns: > 0 => the number of bytes consumed
166 -6 to 0 => malformed UTF-8 character at offset = (-return)
167 */
168
169 #if !defined NOUTF8
170
171 static int
172 utf82ord(unsigned char *buffer, int *vptr)
173 {
174 int c = *buffer++;
175 int d = c;
176 int i, j, s;
177
178 for (i = -1; i < 6; i++) /* i is number of additional bytes */
179 {
180 if ((d & 0x80) == 0) break;
181 d <<= 1;
182 }
183
184 if (i == -1) { *vptr = c; return 1; } /* ascii character */
185 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
186
187 /* i now has a value in the range 1-5 */
188
189 s = 6*i;
190 d = (c & utf8_table3[i]) << s;
191
192 for (j = 0; j < i; j++)
193 {
194 c = *buffer++;
195 if ((c & 0xc0) != 0x80) return -(j+1);
196 s -= 6;
197 d |= (c & 0x3f) << s;
198 }
199
200 /* Check that encoding was the correct unique one */
201
202 for (j = 0; j < utf8_table1_size; j++)
203 if (d <= utf8_table1[j]) break;
204 if (j != i) return -(i+1);
205
206 /* Valid value */
207
208 *vptr = d;
209 return i+1;
210 }
211
212 #endif
213
214
215
216 /*************************************************
217 * Convert character value to UTF-8 *
218 *************************************************/
219
220 /* This function takes an integer value in the range 0 - 0x7fffffff
221 and encodes it as a UTF-8 character in 0 to 6 bytes.
222
223 Arguments:
224 cvalue the character value
225 buffer pointer to buffer for result - at least 6 bytes long
226
227 Returns: number of characters placed in the buffer
228 */
229
230 static int
231 ord2utf8(int cvalue, uschar *buffer)
232 {
233 register int i, j;
234 for (i = 0; i < utf8_table1_size; i++)
235 if (cvalue <= utf8_table1[i]) break;
236 buffer += i;
237 for (j = i; j > 0; j--)
238 {
239 *buffer-- = 0x80 | (cvalue & 0x3f);
240 cvalue >>= 6;
241 }
242 *buffer = utf8_table2[i] | cvalue;
243 return i + 1;
244 }
245
246
247
248 /*************************************************
249 * Print character string *
250 *************************************************/
251
252 /* Character string printing function. Must handle UTF-8 strings in utf8
253 mode. Yields number of characters printed. If handed a NULL file, just counts
254 chars without printing. */
255
256 static int pchars(unsigned char *p, int length, FILE *f)
257 {
258 int c = 0;
259 int yield = 0;
260
261 while (length-- > 0)
262 {
263 #if !defined NOUTF8
264 if (use_utf8)
265 {
266 int rc = utf82ord(p, &c);
267
268 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
269 {
270 length -= rc - 1;
271 p += rc;
272 if (c < 256 && isprint(c))
273 {
274 if (f != NULL) fprintf(f, "%c", c);
275 yield++;
276 }
277 else
278 {
279 int n;
280 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
281 yield += n;
282 }
283 continue;
284 }
285 }
286 #endif
287
288 /* Not UTF-8, or malformed UTF-8 */
289
290 if (isprint(c = *(p++)))
291 {
292 if (f != NULL) fprintf(f, "%c", c);
293 yield++;
294 }
295 else
296 {
297 if (f != NULL) fprintf(f, "\\x%02x", c);
298 yield += 4;
299 }
300 }
301
302 return yield;
303 }
304
305
306
307 /*************************************************
308 * Callout function *
309 *************************************************/
310
311 /* Called from PCRE as a result of the (?C) item. We print out where we are in
312 the match. Yield zero unless more callouts than the fail count, or the callout
313 data is not zero. */
314
315 static int callout(pcre_callout_block *cb)
316 {
317 FILE *f = (first_callout | callout_extra)? outfile : NULL;
318 int i, pre_start, post_start, subject_length;
319
320 if (callout_extra)
321 {
322 fprintf(f, "Callout %d: last capture = %d\n",
323 cb->callout_number, cb->capture_last);
324
325 for (i = 0; i < cb->capture_top * 2; i += 2)
326 {
327 if (cb->offset_vector[i] < 0)
328 fprintf(f, "%2d: <unset>\n", i/2);
329 else
330 {
331 fprintf(f, "%2d: ", i/2);
332 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
333 cb->offset_vector[i+1] - cb->offset_vector[i], f);
334 fprintf(f, "\n");
335 }
336 }
337 }
338
339 /* Re-print the subject in canonical form, the first time or if giving full
340 datails. On subsequent calls in the same match, we use pchars just to find the
341 printed lengths of the substrings. */
342
343 if (f != NULL) fprintf(f, "--->");
344
345 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
346 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347 cb->current_position - cb->start_match, f);
348
349 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350
351 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352 cb->subject_length - cb->current_position, f);
353
354 if (f != NULL) fprintf(f, "\n");
355
356 /* Always print appropriate indicators, with callout number if not already
357 shown. For automatic callouts, show the pattern offset. */
358
359 if (cb->callout_number == 255)
360 {
361 fprintf(outfile, "%+3d ", cb->pattern_position);
362 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
363 }
364 else
365 {
366 if (callout_extra) fprintf(outfile, " ");
367 else fprintf(outfile, "%3d ", cb->callout_number);
368 }
369
370 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371 fprintf(outfile, "^");
372
373 if (post_start > 0)
374 {
375 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
376 fprintf(outfile, "^");
377 }
378
379 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380 fprintf(outfile, " ");
381
382 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383 pbuffer + cb->pattern_position);
384
385 fprintf(outfile, "\n");
386 first_callout = 0;
387
388 if (cb->callout_data != NULL)
389 {
390 int callout_data = *((int *)(cb->callout_data));
391 if (callout_data != 0)
392 {
393 fprintf(outfile, "Callout data = %d\n", callout_data);
394 return callout_data;
395 }
396 }
397
398 return (cb->callout_number != callout_fail_id)? 0 :
399 (++callout_count >= callout_fail_count)? 1 : 0;
400 }
401
402
403 /*************************************************
404 * Local malloc functions *
405 *************************************************/
406
407 /* Alternative malloc function, to test functionality and show the size of the
408 compiled re. */
409
410 static void *new_malloc(size_t size)
411 {
412 void *block = malloc(size);
413 gotten_store = size;
414 if (show_malloc)
415 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
416 return block;
417 }
418
419 static void new_free(void *block)
420 {
421 if (show_malloc)
422 fprintf(outfile, "free %p\n", block);
423 free(block);
424 }
425
426
427 /* For recursion malloc/free, to test stacking calls */
428
429 static void *stack_malloc(size_t size)
430 {
431 void *block = malloc(size);
432 if (show_malloc)
433 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434 return block;
435 }
436
437 static void stack_free(void *block)
438 {
439 if (show_malloc)
440 fprintf(outfile, "stack_free %p\n", block);
441 free(block);
442 }
443
444
445 /*************************************************
446 * Call pcre_fullinfo() *
447 *************************************************/
448
449 /* Get one piece of information from the pcre_fullinfo() function */
450
451 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
452 {
453 int rc;
454 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
455 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
456 }
457
458
459
460 /*************************************************
461 * Byte flipping function *
462 *************************************************/
463
464 static long int
465 byteflip(long int value, int n)
466 {
467 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468 return ((value & 0x000000ff) << 24) |
469 ((value & 0x0000ff00) << 8) |
470 ((value & 0x00ff0000) >> 8) |
471 ((value & 0xff000000) >> 24);
472 }
473
474
475
476
477 /*************************************************
478 * Check match or recursion limit *
479 *************************************************/
480
481 static int
482 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
483 int start_offset, int options, int *use_offsets, int use_size_offsets,
484 int flag, unsigned long int *limit, int errnumber, const char *msg)
485 {
486 int count;
487 int min = 0;
488 int mid = 64;
489 int max = -1;
490
491 extra->flags |= flag;
492
493 for (;;)
494 {
495 *limit = mid;
496
497 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
498 use_offsets, use_size_offsets);
499
500 if (count == errnumber)
501 {
502 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
503 min = mid;
504 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
505 }
506
507 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
508 count == PCRE_ERROR_PARTIAL)
509 {
510 if (mid == min + 1)
511 {
512 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
513 break;
514 }
515 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
516 max = mid;
517 mid = (min + mid)/2;
518 }
519 else break; /* Some other error */
520 }
521
522 extra->flags &= ~flag;
523 return count;
524 }
525
526
527
528 /*************************************************
529 * Main Program *
530 *************************************************/
531
532 /* Read lines from named file or stdin and write to named file or stdout; lines
533 consist of a regular expression, in delimiters and optionally followed by
534 options, followed by a set of test data, terminated by an empty line. */
535
536 int main(int argc, char **argv)
537 {
538 FILE *infile = stdin;
539 int options = 0;
540 int study_options = 0;
541 int op = 1;
542 int timeit = 0;
543 int showinfo = 0;
544 int showstore = 0;
545 int quiet = 0;
546 int size_offsets = 45;
547 int size_offsets_max;
548 int *offsets = NULL;
549 #if !defined NOPOSIX
550 int posix = 0;
551 #endif
552 int debug = 0;
553 int done = 0;
554 int all_use_dfa = 0;
555 int yield = 0;
556
557 unsigned char *buffer;
558 unsigned char *dbuffer;
559
560 /* Get buffers from malloc() so that Electric Fence will check their misuse
561 when I am debugging. */
562
563 buffer = (unsigned char *)malloc(BUFFER_SIZE);
564 dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
565 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
566
567 /* The outfile variable is static so that new_malloc can use it. The _setmode()
568 stuff is some magic that I don't understand, but which apparently does good
569 things in Windows. It's related to line terminations. */
570
571 #if defined(_WIN32) || defined(WIN32)
572 _setmode( _fileno( stdout ), 0x8000 );
573 #endif /* defined(_WIN32) || defined(WIN32) */
574
575 outfile = stdout;
576
577 /* Scan options */
578
579 while (argc > 1 && argv[op][0] == '-')
580 {
581 unsigned char *endptr;
582
583 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
584 showstore = 1;
585 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
586 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
587 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
588 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
589 #if !defined NODFA
590 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
591 #endif
592 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
593 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
594 *endptr == 0))
595 {
596 op++;
597 argc--;
598 }
599 #if !defined NOPOSIX
600 else if (strcmp(argv[op], "-p") == 0) posix = 1;
601 #endif
602 else if (strcmp(argv[op], "-C") == 0)
603 {
604 int rc;
605 printf("PCRE version %s\n", pcre_version());
606 printf("Compiled with\n");
607 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
608 printf(" %sUTF-8 support\n", rc? "" : "No ");
609 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
610 printf(" %sUnicode properties support\n", rc? "" : "No ");
611 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
612 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
613 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
614 printf(" Internal link size = %d\n", rc);
615 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
616 printf(" POSIX malloc threshold = %d\n", rc);
617 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
618 printf(" Default match limit = %d\n", rc);
619 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
620 printf(" Default recursion depth limit = %d\n", rc);
621 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
622 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
623 exit(0);
624 }
625 else
626 {
627 printf("** Unknown or malformed option %s\n", argv[op]);
628 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
629 printf(" -C show PCRE compile-time options and exit\n");
630 printf(" -d debug: show compiled code; implies -i\n");
631 #if !defined NODFA
632 printf(" -dfa force DFA matching for all subjects\n");
633 #endif
634 printf(" -i show information about compiled pattern\n"
635 " -m output memory used information\n"
636 " -o <n> set size of offsets vector to <n>\n");
637 #if !defined NOPOSIX
638 printf(" -p use POSIX interface\n");
639 #endif
640 printf(" -s output store (memory) used information\n"
641 " -t time compilation and execution\n");
642 yield = 1;
643 goto EXIT;
644 }
645 op++;
646 argc--;
647 }
648
649 /* Get the store for the offsets vector, and remember what it was */
650
651 size_offsets_max = size_offsets;
652 offsets = (int *)malloc(size_offsets_max * sizeof(int));
653 if (offsets == NULL)
654 {
655 printf("** Failed to get %d bytes of memory for offsets vector\n",
656 size_offsets_max * sizeof(int));
657 yield = 1;
658 goto EXIT;
659 }
660
661 /* Sort out the input and output files */
662
663 if (argc > 1)
664 {
665 infile = fopen(argv[op], "rb");
666 if (infile == NULL)
667 {
668 printf("** Failed to open %s\n", argv[op]);
669 yield = 1;
670 goto EXIT;
671 }
672 }
673
674 if (argc > 2)
675 {
676 outfile = fopen(argv[op+1], "wb");
677 if (outfile == NULL)
678 {
679 printf("** Failed to open %s\n", argv[op+1]);
680 yield = 1;
681 goto EXIT;
682 }
683 }
684
685 /* Set alternative malloc function */
686
687 pcre_malloc = new_malloc;
688 pcre_free = new_free;
689 pcre_stack_malloc = stack_malloc;
690 pcre_stack_free = stack_free;
691
692 /* Heading line unless quiet, then prompt for first regex if stdin */
693
694 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
695
696 /* Main loop */
697
698 while (!done)
699 {
700 pcre *re = NULL;
701 pcre_extra *extra = NULL;
702
703 #if !defined NOPOSIX /* There are still compilers that require no indent */
704 regex_t preg;
705 int do_posix = 0;
706 #endif
707
708 const char *error;
709 unsigned char *p, *pp, *ppp;
710 unsigned char *to_file = NULL;
711 const unsigned char *tables = NULL;
712 unsigned long int true_size, true_study_size = 0;
713 size_t size, regex_gotten_store;
714 int do_study = 0;
715 int do_debug = debug;
716 int do_G = 0;
717 int do_g = 0;
718 int do_showinfo = showinfo;
719 int do_showrest = 0;
720 int do_flip = 0;
721 int erroroffset, len, delimiter;
722
723 use_utf8 = 0;
724
725 if (infile == stdin) printf(" re> ");
726 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
727 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
728 fflush(outfile);
729
730 p = buffer;
731 while (isspace(*p)) p++;
732 if (*p == 0) continue;
733
734 /* See if the pattern is to be loaded pre-compiled from a file. */
735
736 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
737 {
738 unsigned long int magic;
739 uschar sbuf[8];
740 FILE *f;
741
742 p++;
743 pp = p + (int)strlen((char *)p);
744 while (isspace(pp[-1])) pp--;
745 *pp = 0;
746
747 f = fopen((char *)p, "rb");
748 if (f == NULL)
749 {
750 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
751 continue;
752 }
753
754 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
755
756 true_size =
757 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
758 true_study_size =
759 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
760
761 re = (real_pcre *)new_malloc(true_size);
762 regex_gotten_store = gotten_store;
763
764 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
765
766 magic = ((real_pcre *)re)->magic_number;
767 if (magic != MAGIC_NUMBER)
768 {
769 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
770 {
771 do_flip = 1;
772 }
773 else
774 {
775 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
776 fclose(f);
777 continue;
778 }
779 }
780
781 fprintf(outfile, "Compiled regex%s loaded from %s\n",
782 do_flip? " (byte-inverted)" : "", p);
783
784 /* Need to know if UTF-8 for printing data strings */
785
786 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
787 use_utf8 = (options & PCRE_UTF8) != 0;
788
789 /* Now see if there is any following study data */
790
791 if (true_study_size != 0)
792 {
793 pcre_study_data *psd;
794
795 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
796 extra->flags = PCRE_EXTRA_STUDY_DATA;
797
798 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
799 extra->study_data = psd;
800
801 if (fread(psd, 1, true_study_size, f) != true_study_size)
802 {
803 FAIL_READ:
804 fprintf(outfile, "Failed to read data from %s\n", p);
805 if (extra != NULL) new_free(extra);
806 if (re != NULL) new_free(re);
807 fclose(f);
808 continue;
809 }
810 fprintf(outfile, "Study data loaded from %s\n", p);
811 do_study = 1; /* To get the data output if requested */
812 }
813 else fprintf(outfile, "No study data\n");
814
815 fclose(f);
816 goto SHOW_INFO;
817 }
818
819 /* In-line pattern (the usual case). Get the delimiter and seek the end of
820 the pattern; if is isn't complete, read more. */
821
822 delimiter = *p++;
823
824 if (isalnum(delimiter) || delimiter == '\\')
825 {
826 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
827 goto SKIP_DATA;
828 }
829
830 pp = p;
831
832 for(;;)
833 {
834 while (*pp != 0)
835 {
836 if (*pp == '\\' && pp[1] != 0) pp++;
837 else if (*pp == delimiter) break;
838 pp++;
839 }
840 if (*pp != 0) break;
841
842 len = BUFFER_SIZE - (pp - buffer);
843 if (len < 256)
844 {
845 fprintf(outfile, "** Expression too long - missing delimiter?\n");
846 goto SKIP_DATA;
847 }
848
849 if (infile == stdin) printf(" > ");
850 if (fgets((char *)pp, len, infile) == NULL)
851 {
852 fprintf(outfile, "** Unexpected EOF\n");
853 done = 1;
854 goto CONTINUE;
855 }
856 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
857 }
858
859 /* If the first character after the delimiter is backslash, make
860 the pattern end with backslash. This is purely to provide a way
861 of testing for the error message when a pattern ends with backslash. */
862
863 if (pp[1] == '\\') *pp++ = '\\';
864
865 /* Terminate the pattern at the delimiter, and save a copy of the pattern
866 for callouts. */
867
868 *pp++ = 0;
869 strcpy((char *)pbuffer, (char *)p);
870
871 /* Look for options after final delimiter */
872
873 options = 0;
874 study_options = 0;
875 log_store = showstore; /* default from command line */
876
877 while (*pp != 0)
878 {
879 switch (*pp++)
880 {
881 case 'f': options |= PCRE_FIRSTLINE; break;
882 case 'g': do_g = 1; break;
883 case 'i': options |= PCRE_CASELESS; break;
884 case 'm': options |= PCRE_MULTILINE; break;
885 case 's': options |= PCRE_DOTALL; break;
886 case 'x': options |= PCRE_EXTENDED; break;
887
888 case '+': do_showrest = 1; break;
889 case 'A': options |= PCRE_ANCHORED; break;
890 case 'C': options |= PCRE_AUTO_CALLOUT; break;
891 case 'D': do_debug = do_showinfo = 1; break;
892 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
893 case 'F': do_flip = 1; break;
894 case 'G': do_G = 1; break;
895 case 'I': do_showinfo = 1; break;
896 case 'M': log_store = 1; break;
897 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
898
899 #if !defined NOPOSIX
900 case 'P': do_posix = 1; break;
901 #endif
902
903 case 'S': do_study = 1; break;
904 case 'U': options |= PCRE_UNGREEDY; break;
905 case 'X': options |= PCRE_EXTRA; break;
906 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
907 case '?': options |= PCRE_NO_UTF8_CHECK; break;
908
909 case 'L':
910 ppp = pp;
911 /* The '\r' test here is so that it works on Windows */
912 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
913 *ppp = 0;
914 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
915 {
916 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
917 goto SKIP_DATA;
918 }
919 tables = pcre_maketables();
920 pp = ppp;
921 break;
922
923 case '>':
924 to_file = pp;
925 while (*pp != 0) pp++;
926 while (isspace(pp[-1])) pp--;
927 *pp = 0;
928 break;
929
930 case '\r': /* So that it works in Windows */
931 case '\n':
932 case ' ':
933 break;
934
935 default:
936 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
937 goto SKIP_DATA;
938 }
939 }
940
941 /* Handle compiling via the POSIX interface, which doesn't support the
942 timing, showing, or debugging options, nor the ability to pass over
943 local character tables. */
944
945 #if !defined NOPOSIX
946 if (posix || do_posix)
947 {
948 int rc;
949 int cflags = 0;
950
951 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
952 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
953 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
954 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
955 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
956
957 rc = regcomp(&preg, (char *)p, cflags);
958
959 /* Compilation failed; go back for another re, skipping to blank line
960 if non-interactive. */
961
962 if (rc != 0)
963 {
964 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
965 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
966 goto SKIP_DATA;
967 }
968 }
969
970 /* Handle compiling via the native interface */
971
972 else
973 #endif /* !defined NOPOSIX */
974
975 {
976 if (timeit)
977 {
978 register int i;
979 clock_t time_taken;
980 clock_t start_time = clock();
981 for (i = 0; i < LOOPREPEAT; i++)
982 {
983 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
984 if (re != NULL) free(re);
985 }
986 time_taken = clock() - start_time;
987 fprintf(outfile, "Compile time %.3f milliseconds\n",
988 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
989 (double)CLOCKS_PER_SEC);
990 }
991
992 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
993
994 /* Compilation failed; go back for another re, skipping to blank line
995 if non-interactive. */
996
997 if (re == NULL)
998 {
999 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1000 SKIP_DATA:
1001 if (infile != stdin)
1002 {
1003 for (;;)
1004 {
1005 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1006 {
1007 done = 1;
1008 goto CONTINUE;
1009 }
1010 len = (int)strlen((char *)buffer);
1011 while (len > 0 && isspace(buffer[len-1])) len--;
1012 if (len == 0) break;
1013 }
1014 fprintf(outfile, "\n");
1015 }
1016 goto CONTINUE;
1017 }
1018
1019 /* Compilation succeeded; print data if required. There are now two
1020 info-returning functions. The old one has a limited interface and
1021 returns only limited data. Check that it agrees with the newer one. */
1022
1023 if (log_store)
1024 fprintf(outfile, "Memory allocation (code space): %d\n",
1025 (int)(gotten_store -
1026 sizeof(real_pcre) -
1027 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1028
1029 /* Extract the size for possible writing before possibly flipping it,
1030 and remember the store that was got. */
1031
1032 true_size = ((real_pcre *)re)->size;
1033 regex_gotten_store = gotten_store;
1034
1035 /* If /S was present, study the regexp to generate additional info to
1036 help with the matching. */
1037
1038 if (do_study)
1039 {
1040 if (timeit)
1041 {
1042 register int i;
1043 clock_t time_taken;
1044 clock_t start_time = clock();
1045 for (i = 0; i < LOOPREPEAT; i++)
1046 extra = pcre_study(re, study_options, &error);
1047 time_taken = clock() - start_time;
1048 if (extra != NULL) free(extra);
1049 fprintf(outfile, " Study time %.3f milliseconds\n",
1050 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1051 (double)CLOCKS_PER_SEC);
1052 }
1053 extra = pcre_study(re, study_options, &error);
1054 if (error != NULL)
1055 fprintf(outfile, "Failed to study: %s\n", error);
1056 else if (extra != NULL)
1057 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1058 }
1059
1060 /* If the 'F' option was present, we flip the bytes of all the integer
1061 fields in the regex data block and the study block. This is to make it
1062 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1063 compiled on a different architecture. */
1064
1065 if (do_flip)
1066 {
1067 real_pcre *rre = (real_pcre *)re;
1068 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1069 rre->size = byteflip(rre->size, sizeof(rre->size));
1070 rre->options = byteflip(rre->options, sizeof(rre->options));
1071 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1072 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1073 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1074 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1075 rre->name_table_offset = byteflip(rre->name_table_offset,
1076 sizeof(rre->name_table_offset));
1077 rre->name_entry_size = byteflip(rre->name_entry_size,
1078 sizeof(rre->name_entry_size));
1079 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1080
1081 if (extra != NULL)
1082 {
1083 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1084 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1085 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1086 }
1087 }
1088
1089 /* Extract information from the compiled data if required */
1090
1091 SHOW_INFO:
1092
1093 if (do_showinfo)
1094 {
1095 unsigned long int get_options, all_options;
1096 #if !defined NOINFOCHECK
1097 int old_first_char, old_options, old_count;
1098 #endif
1099 int count, backrefmax, first_char, need_char;
1100 int nameentrysize, namecount;
1101 const uschar *nametable;
1102
1103 if (do_debug)
1104 {
1105 fprintf(outfile, "------------------------------------------------------------------\n");
1106 pcre_printint(re, outfile);
1107 }
1108
1109 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1110 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1111 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1112 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1113 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1114 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1115 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1116 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1117 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1118
1119 #if !defined NOINFOCHECK
1120 old_count = pcre_info(re, &old_options, &old_first_char);
1121 if (count < 0) fprintf(outfile,
1122 "Error %d from pcre_info()\n", count);
1123 else
1124 {
1125 if (old_count != count) fprintf(outfile,
1126 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1127 old_count);
1128
1129 if (old_first_char != first_char) fprintf(outfile,
1130 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1131 first_char, old_first_char);
1132
1133 if (old_options != (int)get_options) fprintf(outfile,
1134 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1135 get_options, old_options);
1136 }
1137 #endif
1138
1139 if (size != regex_gotten_store) fprintf(outfile,
1140 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1141 (int)size, (int)regex_gotten_store);
1142
1143 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1144 if (backrefmax > 0)
1145 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1146
1147 if (namecount > 0)
1148 {
1149 fprintf(outfile, "Named capturing subpatterns:\n");
1150 while (namecount-- > 0)
1151 {
1152 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1153 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1154 GET2(nametable, 0));
1155 nametable += nameentrysize;
1156 }
1157 }
1158
1159 /* The NOPARTIAL bit is a private bit in the options, so we have
1160 to fish it out via out back door */
1161
1162 all_options = ((real_pcre *)re)->options;
1163 if (do_flip)
1164 {
1165 all_options = byteflip(all_options, sizeof(all_options));
1166 }
1167
1168 if ((all_options & PCRE_NOPARTIAL) != 0)
1169 fprintf(outfile, "Partial matching not supported\n");
1170
1171 if (get_options == 0) fprintf(outfile, "No options\n");
1172 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
1173 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1174 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1175 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1176 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1177 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1178 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1179 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1180 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1181 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1182 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1183 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1184 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1185
1186 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1187 fprintf(outfile, "Case state changes\n");
1188
1189 if (first_char == -1)
1190 {
1191 fprintf(outfile, "First char at start or follows \\n\n");
1192 }
1193 else if (first_char < 0)
1194 {
1195 fprintf(outfile, "No first char\n");
1196 }
1197 else
1198 {
1199 int ch = first_char & 255;
1200 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1201 "" : " (caseless)";
1202 if (isprint(ch))
1203 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1204 else
1205 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1206 }
1207
1208 if (need_char < 0)
1209 {
1210 fprintf(outfile, "No need char\n");
1211 }
1212 else
1213 {
1214 int ch = need_char & 255;
1215 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1216 "" : " (caseless)";
1217 if (isprint(ch))
1218 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1219 else
1220 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1221 }
1222
1223 /* Don't output study size; at present it is in any case a fixed
1224 value, but it varies, depending on the computer architecture, and
1225 so messes up the test suite. (And with the /F option, it might be
1226 flipped.) */
1227
1228 if (do_study)
1229 {
1230 if (extra == NULL)
1231 fprintf(outfile, "Study returned NULL\n");
1232 else
1233 {
1234 uschar *start_bits = NULL;
1235 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1236
1237 if (start_bits == NULL)
1238 fprintf(outfile, "No starting byte set\n");
1239 else
1240 {
1241 int i;
1242 int c = 24;
1243 fprintf(outfile, "Starting byte set: ");
1244 for (i = 0; i < 256; i++)
1245 {
1246 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1247 {
1248 if (c > 75)
1249 {
1250 fprintf(outfile, "\n ");
1251 c = 2;
1252 }
1253 if (isprint(i) && i != ' ')
1254 {
1255 fprintf(outfile, "%c ", i);
1256 c += 2;
1257 }
1258 else
1259 {
1260 fprintf(outfile, "\\x%02x ", i);
1261 c += 5;
1262 }
1263 }
1264 }
1265 fprintf(outfile, "\n");
1266 }
1267 }
1268 }
1269 }
1270
1271 /* If the '>' option was present, we write out the regex to a file, and
1272 that is all. The first 8 bytes of the file are the regex length and then
1273 the study length, in big-endian order. */
1274
1275 if (to_file != NULL)
1276 {
1277 FILE *f = fopen((char *)to_file, "wb");
1278 if (f == NULL)
1279 {
1280 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1281 }
1282 else
1283 {
1284 uschar sbuf[8];
1285 sbuf[0] = (true_size >> 24) & 255;
1286 sbuf[1] = (true_size >> 16) & 255;
1287 sbuf[2] = (true_size >> 8) & 255;
1288 sbuf[3] = (true_size) & 255;
1289
1290 sbuf[4] = (true_study_size >> 24) & 255;
1291 sbuf[5] = (true_study_size >> 16) & 255;
1292 sbuf[6] = (true_study_size >> 8) & 255;
1293 sbuf[7] = (true_study_size) & 255;
1294
1295 if (fwrite(sbuf, 1, 8, f) < 8 ||
1296 fwrite(re, 1, true_size, f) < true_size)
1297 {
1298 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1299 }
1300 else
1301 {
1302 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1303 if (extra != NULL)
1304 {
1305 if (fwrite(extra->study_data, 1, true_study_size, f) <
1306 true_study_size)
1307 {
1308 fprintf(outfile, "Write error on %s: %s\n", to_file,
1309 strerror(errno));
1310 }
1311 else fprintf(outfile, "Study data written to %s\n", to_file);
1312 }
1313 }
1314 fclose(f);
1315 }
1316
1317 new_free(re);
1318 if (extra != NULL) new_free(extra);
1319 if (tables != NULL) new_free((void *)tables);
1320 continue; /* With next regex */
1321 }
1322 } /* End of non-POSIX compile */
1323
1324 /* Read data lines and test them */
1325
1326 for (;;)
1327 {
1328 uschar *q;
1329 uschar *bptr = dbuffer;
1330 int *use_offsets = offsets;
1331 int use_size_offsets = size_offsets;
1332 int callout_data = 0;
1333 int callout_data_set = 0;
1334 int count, c;
1335 int copystrings = 0;
1336 int find_match_limit = 0;
1337 int getstrings = 0;
1338 int getlist = 0;
1339 int gmatched = 0;
1340 int start_offset = 0;
1341 int g_notempty = 0;
1342 int use_dfa = 0;
1343
1344 options = 0;
1345
1346 pcre_callout = callout;
1347 first_callout = 1;
1348 callout_extra = 0;
1349 callout_count = 0;
1350 callout_fail_count = 999999;
1351 callout_fail_id = -1;
1352 show_malloc = 0;
1353
1354 if (infile == stdin) printf("data> ");
1355 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1356 {
1357 done = 1;
1358 goto CONTINUE;
1359 }
1360 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1361
1362 len = (int)strlen((char *)buffer);
1363 while (len > 0 && isspace(buffer[len-1])) len--;
1364 buffer[len] = 0;
1365 if (len == 0) break;
1366
1367 p = buffer;
1368 while (isspace(*p)) p++;
1369
1370 q = dbuffer;
1371 while ((c = *p++) != 0)
1372 {
1373 int i = 0;
1374 int n = 0;
1375
1376 if (c == '\\') switch ((c = *p++))
1377 {
1378 case 'a': c = 7; break;
1379 case 'b': c = '\b'; break;
1380 case 'e': c = 27; break;
1381 case 'f': c = '\f'; break;
1382 case 'n': c = '\n'; break;
1383 case 'r': c = '\r'; break;
1384 case 't': c = '\t'; break;
1385 case 'v': c = '\v'; break;
1386
1387 case '0': case '1': case '2': case '3':
1388 case '4': case '5': case '6': case '7':
1389 c -= '0';
1390 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1391 c = c * 8 + *p++ - '0';
1392 break;
1393
1394 case 'x':
1395
1396 /* Handle \x{..} specially - new Perl thing for utf8 */
1397
1398 #if !defined NOUTF8
1399 if (*p == '{')
1400 {
1401 unsigned char *pt = p;
1402 c = 0;
1403 while (isxdigit(*(++pt)))
1404 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1405 if (*pt == '}')
1406 {
1407 unsigned char buff8[8];
1408 int ii, utn;
1409 utn = ord2utf8(c, buff8);
1410 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1411 c = buff8[ii]; /* Last byte */
1412 p = pt + 1;
1413 break;
1414 }
1415 /* Not correct form; fall through */
1416 }
1417 #endif
1418
1419 /* Ordinary \x */
1420
1421 c = 0;
1422 while (i++ < 2 && isxdigit(*p))
1423 {
1424 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1425 p++;
1426 }
1427 break;
1428
1429 case 0: /* \ followed by EOF allows for an empty line */
1430 p--;
1431 continue;
1432
1433 case '>':
1434 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1435 continue;
1436
1437 case 'A': /* Option setting */
1438 options |= PCRE_ANCHORED;
1439 continue;
1440
1441 case 'B':
1442 options |= PCRE_NOTBOL;
1443 continue;
1444
1445 case 'C':
1446 if (isdigit(*p)) /* Set copy string */
1447 {
1448 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1449 copystrings |= 1 << n;
1450 }
1451 else if (isalnum(*p))
1452 {
1453 uschar name[256];
1454 uschar *npp = name;
1455 while (isalnum(*p)) *npp++ = *p++;
1456 *npp = 0;
1457 n = pcre_get_stringnumber(re, (char *)name);
1458 if (n < 0)
1459 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1460 else copystrings |= 1 << n;
1461 }
1462 else if (*p == '+')
1463 {
1464 callout_extra = 1;
1465 p++;
1466 }
1467 else if (*p == '-')
1468 {
1469 pcre_callout = NULL;
1470 p++;
1471 }
1472 else if (*p == '!')
1473 {
1474 callout_fail_id = 0;
1475 p++;
1476 while(isdigit(*p))
1477 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1478 callout_fail_count = 0;
1479 if (*p == '!')
1480 {
1481 p++;
1482 while(isdigit(*p))
1483 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1484 }
1485 }
1486 else if (*p == '*')
1487 {
1488 int sign = 1;
1489 callout_data = 0;
1490 if (*(++p) == '-') { sign = -1; p++; }
1491 while(isdigit(*p))
1492 callout_data = callout_data * 10 + *p++ - '0';
1493 callout_data *= sign;
1494 callout_data_set = 1;
1495 }
1496 continue;
1497
1498 #if !defined NODFA
1499 case 'D':
1500 #if !defined NOPOSIX
1501 if (posix || do_posix)
1502 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1503 else
1504 #endif
1505 use_dfa = 1;
1506 continue;
1507
1508 case 'F':
1509 options |= PCRE_DFA_SHORTEST;
1510 continue;
1511 #endif
1512
1513 case 'G':
1514 if (isdigit(*p))
1515 {
1516 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1517 getstrings |= 1 << n;
1518 }
1519 else if (isalnum(*p))
1520 {
1521 uschar name[256];
1522 uschar *npp = name;
1523 while (isalnum(*p)) *npp++ = *p++;
1524 *npp = 0;
1525 n = pcre_get_stringnumber(re, (char *)name);
1526 if (n < 0)
1527 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1528 else getstrings |= 1 << n;
1529 }
1530 continue;
1531
1532 case 'L':
1533 getlist = 1;
1534 continue;
1535
1536 case 'M':
1537 find_match_limit = 1;
1538 continue;
1539
1540 case 'N':
1541 options |= PCRE_NOTEMPTY;
1542 continue;
1543
1544 case 'O':
1545 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1546 if (n > size_offsets_max)
1547 {
1548 size_offsets_max = n;
1549 free(offsets);
1550 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1551 if (offsets == NULL)
1552 {
1553 printf("** Failed to get %d bytes of memory for offsets vector\n",
1554 size_offsets_max * sizeof(int));
1555 yield = 1;
1556 goto EXIT;
1557 }
1558 }
1559 use_size_offsets = n;
1560 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1561 continue;
1562
1563 case 'P':
1564 options |= PCRE_PARTIAL;
1565 continue;
1566
1567 #if !defined NODFA
1568 case 'R':
1569 options |= PCRE_DFA_RESTART;
1570 continue;
1571 #endif
1572
1573 case 'S':
1574 show_malloc = 1;
1575 continue;
1576
1577 case 'Z':
1578 options |= PCRE_NOTEOL;
1579 continue;
1580
1581 case '?':
1582 options |= PCRE_NO_UTF8_CHECK;
1583 continue;
1584 }
1585 *q++ = c;
1586 }
1587 *q = 0;
1588 len = q - dbuffer;
1589
1590 if ((all_use_dfa || use_dfa) && find_match_limit)
1591 {
1592 printf("**Match limit not relevant for DFA matching: ignored\n");
1593 find_match_limit = 0;
1594 }
1595
1596 /* Handle matching via the POSIX interface, which does not
1597 support timing or playing with the match limit or callout data. */
1598
1599 #if !defined NOPOSIX
1600 if (posix || do_posix)
1601 {
1602 int rc;
1603 int eflags = 0;
1604 regmatch_t *pmatch = NULL;
1605 if (use_size_offsets > 0)
1606 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1607 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1608 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1609
1610 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1611
1612 if (rc != 0)
1613 {
1614 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1615 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1616 }
1617 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1618 != 0)
1619 {
1620 fprintf(outfile, "Matched with REG_NOSUB\n");
1621 }
1622 else
1623 {
1624 size_t i;
1625 for (i = 0; i < (size_t)use_size_offsets; i++)
1626 {
1627 if (pmatch[i].rm_so >= 0)
1628 {
1629 fprintf(outfile, "%2d: ", (int)i);
1630 (void)pchars(dbuffer + pmatch[i].rm_so,
1631 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1632 fprintf(outfile, "\n");
1633 if (i == 0 && do_showrest)
1634 {
1635 fprintf(outfile, " 0+ ");
1636 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1637 outfile);
1638 fprintf(outfile, "\n");
1639 }
1640 }
1641 }
1642 }
1643 free(pmatch);
1644 }
1645
1646 /* Handle matching via the native interface - repeats for /g and /G */
1647
1648 else
1649 #endif /* !defined NOPOSIX */
1650
1651 for (;; gmatched++) /* Loop for /g or /G */
1652 {
1653 if (timeit)
1654 {
1655 register int i;
1656 clock_t time_taken;
1657 clock_t start_time = clock();
1658
1659 #if !defined NODFA
1660 if (all_use_dfa || use_dfa)
1661 {
1662 int workspace[1000];
1663 for (i = 0; i < LOOPREPEAT; i++)
1664 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1665 options | g_notempty, use_offsets, use_size_offsets, workspace,
1666 sizeof(workspace)/sizeof(int));
1667 }
1668 else
1669 #endif
1670
1671 for (i = 0; i < LOOPREPEAT; i++)
1672 count = pcre_exec(re, extra, (char *)bptr, len,
1673 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1674
1675 time_taken = clock() - start_time;
1676 fprintf(outfile, "Execute time %.3f milliseconds\n",
1677 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1678 (double)CLOCKS_PER_SEC);
1679 }
1680
1681 /* If find_match_limit is set, we want to do repeated matches with
1682 varying limits in order to find the minimum value for the match limit and
1683 for the recursion limit. */
1684
1685 if (find_match_limit)
1686 {
1687 if (extra == NULL)
1688 {
1689 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1690 extra->flags = 0;
1691 }
1692
1693 count = check_match_limit(re, extra, bptr, len, start_offset,
1694 options|g_notempty, use_offsets, use_size_offsets,
1695 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1696 PCRE_ERROR_MATCHLIMIT, "match()");
1697
1698 count = check_match_limit(re, extra, bptr, len, start_offset,
1699 options|g_notempty, use_offsets, use_size_offsets,
1700 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1701 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1702 }
1703
1704 /* If callout_data is set, use the interface with additional data */
1705
1706 else if (callout_data_set)
1707 {
1708 if (extra == NULL)
1709 {
1710 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1711 extra->flags = 0;
1712 }
1713 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1714 extra->callout_data = &callout_data;
1715 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1716 options | g_notempty, use_offsets, use_size_offsets);
1717 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1718 }
1719
1720 /* The normal case is just to do the match once, with the default
1721 value of match_limit. */
1722
1723 #if !defined NODFA
1724 else if (all_use_dfa || use_dfa)
1725 {
1726 int workspace[1000];
1727 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1728 options | g_notempty, use_offsets, use_size_offsets, workspace,
1729 sizeof(workspace)/sizeof(int));
1730 if (count == 0)
1731 {
1732 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1733 count = use_size_offsets/2;
1734 }
1735 }
1736 #endif
1737
1738 else
1739 {
1740 count = pcre_exec(re, extra, (char *)bptr, len,
1741 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1742 if (count == 0)
1743 {
1744 fprintf(outfile, "Matched, but too many substrings\n");
1745 count = use_size_offsets/3;
1746 }
1747 }
1748
1749 /* Matched */
1750
1751 if (count >= 0)
1752 {
1753 int i;
1754 for (i = 0; i < count * 2; i += 2)
1755 {
1756 if (use_offsets[i] < 0)
1757 fprintf(outfile, "%2d: <unset>\n", i/2);
1758 else
1759 {
1760 fprintf(outfile, "%2d: ", i/2);
1761 (void)pchars(bptr + use_offsets[i],
1762 use_offsets[i+1] - use_offsets[i], outfile);
1763 fprintf(outfile, "\n");
1764 if (i == 0)
1765 {
1766 if (do_showrest)
1767 {
1768 fprintf(outfile, " 0+ ");
1769 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1770 outfile);
1771 fprintf(outfile, "\n");
1772 }
1773 }
1774 }
1775 }
1776
1777 for (i = 0; i < 32; i++)
1778 {
1779 if ((copystrings & (1 << i)) != 0)
1780 {
1781 char copybuffer[16];
1782 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1783 i, copybuffer, sizeof(copybuffer));
1784 if (rc < 0)
1785 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1786 else
1787 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1788 }
1789 }
1790
1791 for (i = 0; i < 32; i++)
1792 {
1793 if ((getstrings & (1 << i)) != 0)
1794 {
1795 const char *substring;
1796 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1797 i, &substring);
1798 if (rc < 0)
1799 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1800 else
1801 {
1802 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1803 /* free((void *)substring); */
1804 pcre_free_substring(substring);
1805 }
1806 }
1807 }
1808
1809 if (getlist)
1810 {
1811 const char **stringlist;
1812 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1813 &stringlist);
1814 if (rc < 0)
1815 fprintf(outfile, "get substring list failed %d\n", rc);
1816 else
1817 {
1818 for (i = 0; i < count; i++)
1819 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1820 if (stringlist[i] != NULL)
1821 fprintf(outfile, "string list not terminated by NULL\n");
1822 /* free((void *)stringlist); */
1823 pcre_free_substring_list(stringlist);
1824 }
1825 }
1826 }
1827
1828 /* There was a partial match */
1829
1830 else if (count == PCRE_ERROR_PARTIAL)
1831 {
1832 fprintf(outfile, "Partial match");
1833 #if !defined NODFA
1834 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1835 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1836 bptr + use_offsets[0]);
1837 #endif
1838 fprintf(outfile, "\n");
1839 break; /* Out of the /g loop */
1840 }
1841
1842 /* Failed to match. If this is a /g or /G loop and we previously set
1843 g_notempty after a null match, this is not necessarily the end.
1844 We want to advance the start offset, and continue. In the case of UTF-8
1845 matching, the advance must be one character, not one byte. Fudge the
1846 offset values to achieve this. We won't be at the end of the string -
1847 that was checked before setting g_notempty. */
1848
1849 else
1850 {
1851 if (g_notempty != 0)
1852 {
1853 int onechar = 1;
1854 use_offsets[0] = start_offset;
1855 if (use_utf8)
1856 {
1857 while (start_offset + onechar < len)
1858 {
1859 int tb = bptr[start_offset+onechar];
1860 if (tb <= 127) break;
1861 tb &= 0xc0;
1862 if (tb != 0 && tb != 0xc0) onechar++;
1863 }
1864 }
1865 use_offsets[1] = start_offset + onechar;
1866 }
1867 else
1868 {
1869 if (count == PCRE_ERROR_NOMATCH)
1870 {
1871 if (gmatched == 0) fprintf(outfile, "No match\n");
1872 }
1873 else fprintf(outfile, "Error %d\n", count);
1874 break; /* Out of the /g loop */
1875 }
1876 }
1877
1878 /* If not /g or /G we are done */
1879
1880 if (!do_g && !do_G) break;
1881
1882 /* If we have matched an empty string, first check to see if we are at
1883 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1884 what Perl's /g options does. This turns out to be rather cunning. First
1885 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1886 same point. If this fails (picked up above) we advance to the next
1887 character. */
1888
1889 g_notempty = 0;
1890 if (use_offsets[0] == use_offsets[1])
1891 {
1892 if (use_offsets[0] == len) break;
1893 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1894 }
1895
1896 /* For /g, update the start offset, leaving the rest alone */
1897
1898 if (do_g) start_offset = use_offsets[1];
1899
1900 /* For /G, update the pointer and length */
1901
1902 else
1903 {
1904 bptr += use_offsets[1];
1905 len -= use_offsets[1];
1906 }
1907 } /* End of loop for /g and /G */
1908 } /* End of loop for data lines */
1909
1910 CONTINUE:
1911
1912 #if !defined NOPOSIX
1913 if (posix || do_posix) regfree(&preg);
1914 #endif
1915
1916 if (re != NULL) new_free(re);
1917 if (extra != NULL) new_free(extra);
1918 if (tables != NULL)
1919 {
1920 new_free((void *)tables);
1921 setlocale(LC_CTYPE, "C");
1922 }
1923 }
1924
1925 if (infile == stdin) fprintf(outfile, "\n");
1926
1927 EXIT:
1928
1929 if (infile != NULL && infile != stdin) fclose(infile);
1930 if (outfile != NULL && outfile != stdout) fclose(outfile);
1931
1932 free(buffer);
1933 free(dbuffer);
1934 free(pbuffer);
1935 free(offsets);
1936
1937 return yield;
1938 }
1939
1940 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12