/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (show annotations) (download)
Sat Feb 24 21:41:13 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 54545 byte(s)
Load pcre-6.4 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47 #define PCRE_SPY /* For Win32 build, import data, not export */
48
49 /* We include pcre_internal.h because we need the internal info for displaying
50 the results of pcre_study() and we also need to know about the internal
51 macros, structures, and other internal data values; pcretest has "inside
52 information" compared to a program that strictly follows the PCRE API. */
53
54 #include "pcre_internal.h"
55
56 /* We need access to the data tables that PCRE uses. So as not to have to keep
57 two copies, we include the source file here, changing the names of the external
58 symbols to prevent clashes. */
59
60 #define _pcre_utf8_table1 utf8_table1
61 #define _pcre_utf8_table1_size utf8_table1_size
62 #define _pcre_utf8_table2 utf8_table2
63 #define _pcre_utf8_table3 utf8_table3
64 #define _pcre_utf8_table4 utf8_table4
65 #define _pcre_utt utt
66 #define _pcre_utt_size utt_size
67 #define _pcre_OP_lengths OP_lengths
68
69 #include "pcre_tables.c"
70
71 /* We also need the pcre_printint() function for printing out compiled
72 patterns. This function is in a separate file so that it can be included in
73 pcre_compile.c when that module is compiled with debugging enabled. */
74
75 #include "pcre_printint.src"
76
77
78 /* It is possible to compile this test program without including support for
79 testing the POSIX interface, though this is not available via the standard
80 Makefile. */
81
82 #if !defined NOPOSIX
83 #include "pcreposix.h"
84 #endif
85
86 /* It is also possible, for the benefit of the version imported into Exim, to
87 build pcretest without support for UTF8 (define NOUTF8), without the interface
88 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89 function (define NOINFOCHECK). */
90
91
92 /* Other parameters */
93
94 #ifndef CLOCKS_PER_SEC
95 #ifdef CLK_TCK
96 #define CLOCKS_PER_SEC CLK_TCK
97 #else
98 #define CLOCKS_PER_SEC 100
99 #endif
100 #endif
101
102 #define LOOPREPEAT 500000
103
104 #define BUFFER_SIZE 30000
105 #define PBUFFER_SIZE BUFFER_SIZE
106 #define DBUFFER_SIZE BUFFER_SIZE
107
108
109 /* Static variables */
110
111 static FILE *outfile;
112 static int log_store = 0;
113 static int callout_count;
114 static int callout_extra;
115 static int callout_fail_count;
116 static int callout_fail_id;
117 static int first_callout;
118 static int show_malloc;
119 static int use_utf8;
120 static size_t gotten_store;
121
122 static uschar *pbuffer = NULL;
123
124
125
126 /*************************************************
127 * Read number from string *
128 *************************************************/
129
130 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
131 around with conditional compilation, just do the job by hand. It is only used
132 for unpicking the -o argument, so just keep it simple.
133
134 Arguments:
135 str string to be converted
136 endptr where to put the end pointer
137
138 Returns: the unsigned long
139 */
140
141 static int
142 get_value(unsigned char *str, unsigned char **endptr)
143 {
144 int result = 0;
145 while(*str != 0 && isspace(*str)) str++;
146 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
147 *endptr = str;
148 return(result);
149 }
150
151
152
153
154 /*************************************************
155 * Convert UTF-8 string to value *
156 *************************************************/
157
158 /* This function takes one or more bytes that represents a UTF-8 character,
159 and returns the value of the character.
160
161 Argument:
162 buffer a pointer to the byte vector
163 vptr a pointer to an int to receive the value
164
165 Returns: > 0 => the number of bytes consumed
166 -6 to 0 => malformed UTF-8 character at offset = (-return)
167 */
168
169 #if !defined NOUTF8
170
171 static int
172 utf82ord(unsigned char *buffer, int *vptr)
173 {
174 int c = *buffer++;
175 int d = c;
176 int i, j, s;
177
178 for (i = -1; i < 6; i++) /* i is number of additional bytes */
179 {
180 if ((d & 0x80) == 0) break;
181 d <<= 1;
182 }
183
184 if (i == -1) { *vptr = c; return 1; } /* ascii character */
185 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
186
187 /* i now has a value in the range 1-5 */
188
189 s = 6*i;
190 d = (c & utf8_table3[i]) << s;
191
192 for (j = 0; j < i; j++)
193 {
194 c = *buffer++;
195 if ((c & 0xc0) != 0x80) return -(j+1);
196 s -= 6;
197 d |= (c & 0x3f) << s;
198 }
199
200 /* Check that encoding was the correct unique one */
201
202 for (j = 0; j < utf8_table1_size; j++)
203 if (d <= utf8_table1[j]) break;
204 if (j != i) return -(i+1);
205
206 /* Valid value */
207
208 *vptr = d;
209 return i+1;
210 }
211
212 #endif
213
214
215
216 /*************************************************
217 * Convert character value to UTF-8 *
218 *************************************************/
219
220 /* This function takes an integer value in the range 0 - 0x7fffffff
221 and encodes it as a UTF-8 character in 0 to 6 bytes.
222
223 Arguments:
224 cvalue the character value
225 buffer pointer to buffer for result - at least 6 bytes long
226
227 Returns: number of characters placed in the buffer
228 */
229
230 static int
231 ord2utf8(int cvalue, uschar *buffer)
232 {
233 register int i, j;
234 for (i = 0; i < utf8_table1_size; i++)
235 if (cvalue <= utf8_table1[i]) break;
236 buffer += i;
237 for (j = i; j > 0; j--)
238 {
239 *buffer-- = 0x80 | (cvalue & 0x3f);
240 cvalue >>= 6;
241 }
242 *buffer = utf8_table2[i] | cvalue;
243 return i + 1;
244 }
245
246
247
248 /*************************************************
249 * Print character string *
250 *************************************************/
251
252 /* Character string printing function. Must handle UTF-8 strings in utf8
253 mode. Yields number of characters printed. If handed a NULL file, just counts
254 chars without printing. */
255
256 static int pchars(unsigned char *p, int length, FILE *f)
257 {
258 int c = 0;
259 int yield = 0;
260
261 while (length-- > 0)
262 {
263 #if !defined NOUTF8
264 if (use_utf8)
265 {
266 int rc = utf82ord(p, &c);
267
268 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
269 {
270 length -= rc - 1;
271 p += rc;
272 if (c < 256 && isprint(c))
273 {
274 if (f != NULL) fprintf(f, "%c", c);
275 yield++;
276 }
277 else
278 {
279 int n;
280 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
281 yield += n;
282 }
283 continue;
284 }
285 }
286 #endif
287
288 /* Not UTF-8, or malformed UTF-8 */
289
290 if (isprint(c = *(p++)))
291 {
292 if (f != NULL) fprintf(f, "%c", c);
293 yield++;
294 }
295 else
296 {
297 if (f != NULL) fprintf(f, "\\x%02x", c);
298 yield += 4;
299 }
300 }
301
302 return yield;
303 }
304
305
306
307 /*************************************************
308 * Callout function *
309 *************************************************/
310
311 /* Called from PCRE as a result of the (?C) item. We print out where we are in
312 the match. Yield zero unless more callouts than the fail count, or the callout
313 data is not zero. */
314
315 static int callout(pcre_callout_block *cb)
316 {
317 FILE *f = (first_callout | callout_extra)? outfile : NULL;
318 int i, pre_start, post_start, subject_length;
319
320 if (callout_extra)
321 {
322 fprintf(f, "Callout %d: last capture = %d\n",
323 cb->callout_number, cb->capture_last);
324
325 for (i = 0; i < cb->capture_top * 2; i += 2)
326 {
327 if (cb->offset_vector[i] < 0)
328 fprintf(f, "%2d: <unset>\n", i/2);
329 else
330 {
331 fprintf(f, "%2d: ", i/2);
332 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
333 cb->offset_vector[i+1] - cb->offset_vector[i], f);
334 fprintf(f, "\n");
335 }
336 }
337 }
338
339 /* Re-print the subject in canonical form, the first time or if giving full
340 datails. On subsequent calls in the same match, we use pchars just to find the
341 printed lengths of the substrings. */
342
343 if (f != NULL) fprintf(f, "--->");
344
345 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
346 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347 cb->current_position - cb->start_match, f);
348
349 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350
351 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352 cb->subject_length - cb->current_position, f);
353
354 if (f != NULL) fprintf(f, "\n");
355
356 /* Always print appropriate indicators, with callout number if not already
357 shown. For automatic callouts, show the pattern offset. */
358
359 if (cb->callout_number == 255)
360 {
361 fprintf(outfile, "%+3d ", cb->pattern_position);
362 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
363 }
364 else
365 {
366 if (callout_extra) fprintf(outfile, " ");
367 else fprintf(outfile, "%3d ", cb->callout_number);
368 }
369
370 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371 fprintf(outfile, "^");
372
373 if (post_start > 0)
374 {
375 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
376 fprintf(outfile, "^");
377 }
378
379 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380 fprintf(outfile, " ");
381
382 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383 pbuffer + cb->pattern_position);
384
385 fprintf(outfile, "\n");
386 first_callout = 0;
387
388 if (cb->callout_data != NULL)
389 {
390 int callout_data = *((int *)(cb->callout_data));
391 if (callout_data != 0)
392 {
393 fprintf(outfile, "Callout data = %d\n", callout_data);
394 return callout_data;
395 }
396 }
397
398 return (cb->callout_number != callout_fail_id)? 0 :
399 (++callout_count >= callout_fail_count)? 1 : 0;
400 }
401
402
403 /*************************************************
404 * Local malloc functions *
405 *************************************************/
406
407 /* Alternative malloc function, to test functionality and show the size of the
408 compiled re. */
409
410 static void *new_malloc(size_t size)
411 {
412 void *block = malloc(size);
413 gotten_store = size;
414 if (show_malloc)
415 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
416 return block;
417 }
418
419 static void new_free(void *block)
420 {
421 if (show_malloc)
422 fprintf(outfile, "free %p\n", block);
423 free(block);
424 }
425
426
427 /* For recursion malloc/free, to test stacking calls */
428
429 static void *stack_malloc(size_t size)
430 {
431 void *block = malloc(size);
432 if (show_malloc)
433 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434 return block;
435 }
436
437 static void stack_free(void *block)
438 {
439 if (show_malloc)
440 fprintf(outfile, "stack_free %p\n", block);
441 free(block);
442 }
443
444
445 /*************************************************
446 * Call pcre_fullinfo() *
447 *************************************************/
448
449 /* Get one piece of information from the pcre_fullinfo() function */
450
451 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
452 {
453 int rc;
454 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
455 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
456 }
457
458
459
460 /*************************************************
461 * Byte flipping function *
462 *************************************************/
463
464 static long int
465 byteflip(long int value, int n)
466 {
467 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468 return ((value & 0x000000ff) << 24) |
469 ((value & 0x0000ff00) << 8) |
470 ((value & 0x00ff0000) >> 8) |
471 ((value & 0xff000000) >> 24);
472 }
473
474
475
476
477 /*************************************************
478 * Main Program *
479 *************************************************/
480
481 /* Read lines from named file or stdin and write to named file or stdout; lines
482 consist of a regular expression, in delimiters and optionally followed by
483 options, followed by a set of test data, terminated by an empty line. */
484
485 int main(int argc, char **argv)
486 {
487 FILE *infile = stdin;
488 int options = 0;
489 int study_options = 0;
490 int op = 1;
491 int timeit = 0;
492 int showinfo = 0;
493 int showstore = 0;
494 int size_offsets = 45;
495 int size_offsets_max;
496 int *offsets = NULL;
497 #if !defined NOPOSIX
498 int posix = 0;
499 #endif
500 int debug = 0;
501 int done = 0;
502 int all_use_dfa = 0;
503 int yield = 0;
504
505 unsigned char *buffer;
506 unsigned char *dbuffer;
507
508 /* Get buffers from malloc() so that Electric Fence will check their misuse
509 when I am debugging. */
510
511 buffer = (unsigned char *)malloc(BUFFER_SIZE);
512 dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
513 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
514
515 /* The outfile variable is static so that new_malloc can use it. The _setmode()
516 stuff is some magic that I don't understand, but which apparently does good
517 things in Windows. It's related to line terminations. */
518
519 #if defined(_WIN32) || defined(WIN32)
520 _setmode( _fileno( stdout ), 0x8000 );
521 #endif /* defined(_WIN32) || defined(WIN32) */
522
523 outfile = stdout;
524
525 /* Scan options */
526
527 while (argc > 1 && argv[op][0] == '-')
528 {
529 unsigned char *endptr;
530
531 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
532 showstore = 1;
533 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
534 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
535 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
536 #if !defined NODFA
537 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
538 #endif
539 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
540 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
541 *endptr == 0))
542 {
543 op++;
544 argc--;
545 }
546 #if !defined NOPOSIX
547 else if (strcmp(argv[op], "-p") == 0) posix = 1;
548 #endif
549 else if (strcmp(argv[op], "-C") == 0)
550 {
551 int rc;
552 printf("PCRE version %s\n", pcre_version());
553 printf("Compiled with\n");
554 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
555 printf(" %sUTF-8 support\n", rc? "" : "No ");
556 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
557 printf(" %sUnicode properties support\n", rc? "" : "No ");
558 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
559 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
560 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
561 printf(" Internal link size = %d\n", rc);
562 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
563 printf(" POSIX malloc threshold = %d\n", rc);
564 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
565 printf(" Default match limit = %d\n", rc);
566 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
567 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
568 exit(0);
569 }
570 else
571 {
572 printf("** Unknown or malformed option %s\n", argv[op]);
573 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
574 printf(" -C show PCRE compile-time options and exit\n");
575 printf(" -d debug: show compiled code; implies -i\n");
576 #if !defined NODFA
577 printf(" -dfa force DFA matching for all subjects\n");
578 #endif
579 printf(" -i show information about compiled pattern\n"
580 " -m output memory used information\n"
581 " -o <n> set size of offsets vector to <n>\n");
582 #if !defined NOPOSIX
583 printf(" -p use POSIX interface\n");
584 #endif
585 printf(" -s output store (memory) used information\n"
586 " -t time compilation and execution\n");
587 yield = 1;
588 goto EXIT;
589 }
590 op++;
591 argc--;
592 }
593
594 /* Get the store for the offsets vector, and remember what it was */
595
596 size_offsets_max = size_offsets;
597 offsets = (int *)malloc(size_offsets_max * sizeof(int));
598 if (offsets == NULL)
599 {
600 printf("** Failed to get %d bytes of memory for offsets vector\n",
601 size_offsets_max * sizeof(int));
602 yield = 1;
603 goto EXIT;
604 }
605
606 /* Sort out the input and output files */
607
608 if (argc > 1)
609 {
610 infile = fopen(argv[op], "rb");
611 if (infile == NULL)
612 {
613 printf("** Failed to open %s\n", argv[op]);
614 yield = 1;
615 goto EXIT;
616 }
617 }
618
619 if (argc > 2)
620 {
621 outfile = fopen(argv[op+1], "wb");
622 if (outfile == NULL)
623 {
624 printf("** Failed to open %s\n", argv[op+1]);
625 yield = 1;
626 goto EXIT;
627 }
628 }
629
630 /* Set alternative malloc function */
631
632 pcre_malloc = new_malloc;
633 pcre_free = new_free;
634 pcre_stack_malloc = stack_malloc;
635 pcre_stack_free = stack_free;
636
637 /* Heading line, then prompt for first regex if stdin */
638
639 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
640
641 /* Main loop */
642
643 while (!done)
644 {
645 pcre *re = NULL;
646 pcre_extra *extra = NULL;
647
648 #if !defined NOPOSIX /* There are still compilers that require no indent */
649 regex_t preg;
650 int do_posix = 0;
651 #endif
652
653 const char *error;
654 unsigned char *p, *pp, *ppp;
655 unsigned char *to_file = NULL;
656 const unsigned char *tables = NULL;
657 unsigned long int true_size, true_study_size = 0;
658 size_t size, regex_gotten_store;
659 int do_study = 0;
660 int do_debug = debug;
661 int do_G = 0;
662 int do_g = 0;
663 int do_showinfo = showinfo;
664 int do_showrest = 0;
665 int do_flip = 0;
666 int erroroffset, len, delimiter;
667
668 use_utf8 = 0;
669
670 if (infile == stdin) printf(" re> ");
671 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
672 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
673 fflush(outfile);
674
675 p = buffer;
676 while (isspace(*p)) p++;
677 if (*p == 0) continue;
678
679 /* See if the pattern is to be loaded pre-compiled from a file. */
680
681 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
682 {
683 unsigned long int magic;
684 uschar sbuf[8];
685 FILE *f;
686
687 p++;
688 pp = p + (int)strlen((char *)p);
689 while (isspace(pp[-1])) pp--;
690 *pp = 0;
691
692 f = fopen((char *)p, "rb");
693 if (f == NULL)
694 {
695 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
696 continue;
697 }
698
699 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
700
701 true_size =
702 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
703 true_study_size =
704 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
705
706 re = (real_pcre *)new_malloc(true_size);
707 regex_gotten_store = gotten_store;
708
709 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
710
711 magic = ((real_pcre *)re)->magic_number;
712 if (magic != MAGIC_NUMBER)
713 {
714 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
715 {
716 do_flip = 1;
717 }
718 else
719 {
720 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
721 fclose(f);
722 continue;
723 }
724 }
725
726 fprintf(outfile, "Compiled regex%s loaded from %s\n",
727 do_flip? " (byte-inverted)" : "", p);
728
729 /* Need to know if UTF-8 for printing data strings */
730
731 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
732 use_utf8 = (options & PCRE_UTF8) != 0;
733
734 /* Now see if there is any following study data */
735
736 if (true_study_size != 0)
737 {
738 pcre_study_data *psd;
739
740 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
741 extra->flags = PCRE_EXTRA_STUDY_DATA;
742
743 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
744 extra->study_data = psd;
745
746 if (fread(psd, 1, true_study_size, f) != true_study_size)
747 {
748 FAIL_READ:
749 fprintf(outfile, "Failed to read data from %s\n", p);
750 if (extra != NULL) new_free(extra);
751 if (re != NULL) new_free(re);
752 fclose(f);
753 continue;
754 }
755 fprintf(outfile, "Study data loaded from %s\n", p);
756 do_study = 1; /* To get the data output if requested */
757 }
758 else fprintf(outfile, "No study data\n");
759
760 fclose(f);
761 goto SHOW_INFO;
762 }
763
764 /* In-line pattern (the usual case). Get the delimiter and seek the end of
765 the pattern; if is isn't complete, read more. */
766
767 delimiter = *p++;
768
769 if (isalnum(delimiter) || delimiter == '\\')
770 {
771 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
772 goto SKIP_DATA;
773 }
774
775 pp = p;
776
777 for(;;)
778 {
779 while (*pp != 0)
780 {
781 if (*pp == '\\' && pp[1] != 0) pp++;
782 else if (*pp == delimiter) break;
783 pp++;
784 }
785 if (*pp != 0) break;
786
787 len = BUFFER_SIZE - (pp - buffer);
788 if (len < 256)
789 {
790 fprintf(outfile, "** Expression too long - missing delimiter?\n");
791 goto SKIP_DATA;
792 }
793
794 if (infile == stdin) printf(" > ");
795 if (fgets((char *)pp, len, infile) == NULL)
796 {
797 fprintf(outfile, "** Unexpected EOF\n");
798 done = 1;
799 goto CONTINUE;
800 }
801 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
802 }
803
804 /* If the first character after the delimiter is backslash, make
805 the pattern end with backslash. This is purely to provide a way
806 of testing for the error message when a pattern ends with backslash. */
807
808 if (pp[1] == '\\') *pp++ = '\\';
809
810 /* Terminate the pattern at the delimiter, and save a copy of the pattern
811 for callouts. */
812
813 *pp++ = 0;
814 strcpy((char *)pbuffer, (char *)p);
815
816 /* Look for options after final delimiter */
817
818 options = 0;
819 study_options = 0;
820 log_store = showstore; /* default from command line */
821
822 while (*pp != 0)
823 {
824 switch (*pp++)
825 {
826 case 'f': options |= PCRE_FIRSTLINE; break;
827 case 'g': do_g = 1; break;
828 case 'i': options |= PCRE_CASELESS; break;
829 case 'm': options |= PCRE_MULTILINE; break;
830 case 's': options |= PCRE_DOTALL; break;
831 case 'x': options |= PCRE_EXTENDED; break;
832
833 case '+': do_showrest = 1; break;
834 case 'A': options |= PCRE_ANCHORED; break;
835 case 'C': options |= PCRE_AUTO_CALLOUT; break;
836 case 'D': do_debug = do_showinfo = 1; break;
837 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
838 case 'F': do_flip = 1; break;
839 case 'G': do_G = 1; break;
840 case 'I': do_showinfo = 1; break;
841 case 'M': log_store = 1; break;
842 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
843
844 #if !defined NOPOSIX
845 case 'P': do_posix = 1; break;
846 #endif
847
848 case 'S': do_study = 1; break;
849 case 'U': options |= PCRE_UNGREEDY; break;
850 case 'X': options |= PCRE_EXTRA; break;
851 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
852 case '?': options |= PCRE_NO_UTF8_CHECK; break;
853
854 case 'L':
855 ppp = pp;
856 /* The '\r' test here is so that it works on Windows */
857 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
858 *ppp = 0;
859 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
860 {
861 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
862 goto SKIP_DATA;
863 }
864 tables = pcre_maketables();
865 pp = ppp;
866 break;
867
868 case '>':
869 to_file = pp;
870 while (*pp != 0) pp++;
871 while (isspace(pp[-1])) pp--;
872 *pp = 0;
873 break;
874
875 case '\r': /* So that it works in Windows */
876 case '\n':
877 case ' ':
878 break;
879
880 default:
881 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
882 goto SKIP_DATA;
883 }
884 }
885
886 /* Handle compiling via the POSIX interface, which doesn't support the
887 timing, showing, or debugging options, nor the ability to pass over
888 local character tables. */
889
890 #if !defined NOPOSIX
891 if (posix || do_posix)
892 {
893 int rc;
894 int cflags = 0;
895
896 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
897 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
898 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
899 rc = regcomp(&preg, (char *)p, cflags);
900
901 /* Compilation failed; go back for another re, skipping to blank line
902 if non-interactive. */
903
904 if (rc != 0)
905 {
906 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
907 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
908 goto SKIP_DATA;
909 }
910 }
911
912 /* Handle compiling via the native interface */
913
914 else
915 #endif /* !defined NOPOSIX */
916
917 {
918 if (timeit)
919 {
920 register int i;
921 clock_t time_taken;
922 clock_t start_time = clock();
923 for (i = 0; i < LOOPREPEAT; i++)
924 {
925 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
926 if (re != NULL) free(re);
927 }
928 time_taken = clock() - start_time;
929 fprintf(outfile, "Compile time %.3f milliseconds\n",
930 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
931 (double)CLOCKS_PER_SEC);
932 }
933
934 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
935
936 /* Compilation failed; go back for another re, skipping to blank line
937 if non-interactive. */
938
939 if (re == NULL)
940 {
941 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
942 SKIP_DATA:
943 if (infile != stdin)
944 {
945 for (;;)
946 {
947 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
948 {
949 done = 1;
950 goto CONTINUE;
951 }
952 len = (int)strlen((char *)buffer);
953 while (len > 0 && isspace(buffer[len-1])) len--;
954 if (len == 0) break;
955 }
956 fprintf(outfile, "\n");
957 }
958 goto CONTINUE;
959 }
960
961 /* Compilation succeeded; print data if required. There are now two
962 info-returning functions. The old one has a limited interface and
963 returns only limited data. Check that it agrees with the newer one. */
964
965 if (log_store)
966 fprintf(outfile, "Memory allocation (code space): %d\n",
967 (int)(gotten_store -
968 sizeof(real_pcre) -
969 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
970
971 /* Extract the size for possible writing before possibly flipping it,
972 and remember the store that was got. */
973
974 true_size = ((real_pcre *)re)->size;
975 regex_gotten_store = gotten_store;
976
977 /* If /S was present, study the regexp to generate additional info to
978 help with the matching. */
979
980 if (do_study)
981 {
982 if (timeit)
983 {
984 register int i;
985 clock_t time_taken;
986 clock_t start_time = clock();
987 for (i = 0; i < LOOPREPEAT; i++)
988 extra = pcre_study(re, study_options, &error);
989 time_taken = clock() - start_time;
990 if (extra != NULL) free(extra);
991 fprintf(outfile, " Study time %.3f milliseconds\n",
992 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
993 (double)CLOCKS_PER_SEC);
994 }
995 extra = pcre_study(re, study_options, &error);
996 if (error != NULL)
997 fprintf(outfile, "Failed to study: %s\n", error);
998 else if (extra != NULL)
999 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1000 }
1001
1002 /* If the 'F' option was present, we flip the bytes of all the integer
1003 fields in the regex data block and the study block. This is to make it
1004 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1005 compiled on a different architecture. */
1006
1007 if (do_flip)
1008 {
1009 real_pcre *rre = (real_pcre *)re;
1010 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1011 rre->size = byteflip(rre->size, sizeof(rre->size));
1012 rre->options = byteflip(rre->options, sizeof(rre->options));
1013 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1014 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1015 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1016 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1017 rre->name_table_offset = byteflip(rre->name_table_offset,
1018 sizeof(rre->name_table_offset));
1019 rre->name_entry_size = byteflip(rre->name_entry_size,
1020 sizeof(rre->name_entry_size));
1021 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1022
1023 if (extra != NULL)
1024 {
1025 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1026 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1027 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1028 }
1029 }
1030
1031 /* Extract information from the compiled data if required */
1032
1033 SHOW_INFO:
1034
1035 if (do_showinfo)
1036 {
1037 unsigned long int get_options, all_options;
1038 #if !defined NOINFOCHECK
1039 int old_first_char, old_options, old_count;
1040 #endif
1041 int count, backrefmax, first_char, need_char;
1042 int nameentrysize, namecount;
1043 const uschar *nametable;
1044
1045 if (do_debug)
1046 {
1047 fprintf(outfile, "------------------------------------------------------------------\n");
1048 pcre_printint(re, outfile);
1049 }
1050
1051 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1052 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1053 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1054 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1055 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1056 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1057 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1058 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1059 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1060
1061 #if !defined NOINFOCHECK
1062 old_count = pcre_info(re, &old_options, &old_first_char);
1063 if (count < 0) fprintf(outfile,
1064 "Error %d from pcre_info()\n", count);
1065 else
1066 {
1067 if (old_count != count) fprintf(outfile,
1068 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1069 old_count);
1070
1071 if (old_first_char != first_char) fprintf(outfile,
1072 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1073 first_char, old_first_char);
1074
1075 if (old_options != (int)get_options) fprintf(outfile,
1076 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1077 get_options, old_options);
1078 }
1079 #endif
1080
1081 if (size != regex_gotten_store) fprintf(outfile,
1082 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1083 (int)size, (int)regex_gotten_store);
1084
1085 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1086 if (backrefmax > 0)
1087 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1088
1089 if (namecount > 0)
1090 {
1091 fprintf(outfile, "Named capturing subpatterns:\n");
1092 while (namecount-- > 0)
1093 {
1094 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1095 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1096 GET2(nametable, 0));
1097 nametable += nameentrysize;
1098 }
1099 }
1100
1101 /* The NOPARTIAL bit is a private bit in the options, so we have
1102 to fish it out via out back door */
1103
1104 all_options = ((real_pcre *)re)->options;
1105 if (do_flip)
1106 {
1107 all_options = byteflip(all_options, sizeof(all_options));
1108 }
1109
1110 if ((all_options & PCRE_NOPARTIAL) != 0)
1111 fprintf(outfile, "Partial matching not supported\n");
1112
1113 if (get_options == 0) fprintf(outfile, "No options\n");
1114 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1115 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1116 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1117 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1118 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1119 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1120 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1121 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1122 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1123 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1124 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1125 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1126
1127 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1128 fprintf(outfile, "Case state changes\n");
1129
1130 if (first_char == -1)
1131 {
1132 fprintf(outfile, "First char at start or follows \\n\n");
1133 }
1134 else if (first_char < 0)
1135 {
1136 fprintf(outfile, "No first char\n");
1137 }
1138 else
1139 {
1140 int ch = first_char & 255;
1141 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1142 "" : " (caseless)";
1143 if (isprint(ch))
1144 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1145 else
1146 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1147 }
1148
1149 if (need_char < 0)
1150 {
1151 fprintf(outfile, "No need char\n");
1152 }
1153 else
1154 {
1155 int ch = need_char & 255;
1156 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1157 "" : " (caseless)";
1158 if (isprint(ch))
1159 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1160 else
1161 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1162 }
1163
1164 /* Don't output study size; at present it is in any case a fixed
1165 value, but it varies, depending on the computer architecture, and
1166 so messes up the test suite. (And with the /F option, it might be
1167 flipped.) */
1168
1169 if (do_study)
1170 {
1171 if (extra == NULL)
1172 fprintf(outfile, "Study returned NULL\n");
1173 else
1174 {
1175 uschar *start_bits = NULL;
1176 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1177
1178 if (start_bits == NULL)
1179 fprintf(outfile, "No starting byte set\n");
1180 else
1181 {
1182 int i;
1183 int c = 24;
1184 fprintf(outfile, "Starting byte set: ");
1185 for (i = 0; i < 256; i++)
1186 {
1187 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1188 {
1189 if (c > 75)
1190 {
1191 fprintf(outfile, "\n ");
1192 c = 2;
1193 }
1194 if (isprint(i) && i != ' ')
1195 {
1196 fprintf(outfile, "%c ", i);
1197 c += 2;
1198 }
1199 else
1200 {
1201 fprintf(outfile, "\\x%02x ", i);
1202 c += 5;
1203 }
1204 }
1205 }
1206 fprintf(outfile, "\n");
1207 }
1208 }
1209 }
1210 }
1211
1212 /* If the '>' option was present, we write out the regex to a file, and
1213 that is all. The first 8 bytes of the file are the regex length and then
1214 the study length, in big-endian order. */
1215
1216 if (to_file != NULL)
1217 {
1218 FILE *f = fopen((char *)to_file, "wb");
1219 if (f == NULL)
1220 {
1221 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1222 }
1223 else
1224 {
1225 uschar sbuf[8];
1226 sbuf[0] = (true_size >> 24) & 255;
1227 sbuf[1] = (true_size >> 16) & 255;
1228 sbuf[2] = (true_size >> 8) & 255;
1229 sbuf[3] = (true_size) & 255;
1230
1231 sbuf[4] = (true_study_size >> 24) & 255;
1232 sbuf[5] = (true_study_size >> 16) & 255;
1233 sbuf[6] = (true_study_size >> 8) & 255;
1234 sbuf[7] = (true_study_size) & 255;
1235
1236 if (fwrite(sbuf, 1, 8, f) < 8 ||
1237 fwrite(re, 1, true_size, f) < true_size)
1238 {
1239 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1240 }
1241 else
1242 {
1243 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1244 if (extra != NULL)
1245 {
1246 if (fwrite(extra->study_data, 1, true_study_size, f) <
1247 true_study_size)
1248 {
1249 fprintf(outfile, "Write error on %s: %s\n", to_file,
1250 strerror(errno));
1251 }
1252 else fprintf(outfile, "Study data written to %s\n", to_file);
1253 }
1254 }
1255 fclose(f);
1256 }
1257
1258 new_free(re);
1259 if (extra != NULL) new_free(extra);
1260 if (tables != NULL) new_free((void *)tables);
1261 continue; /* With next regex */
1262 }
1263 } /* End of non-POSIX compile */
1264
1265 /* Read data lines and test them */
1266
1267 for (;;)
1268 {
1269 unsigned char *q;
1270 unsigned char *bptr = dbuffer;
1271 int *use_offsets = offsets;
1272 int use_size_offsets = size_offsets;
1273 int callout_data = 0;
1274 int callout_data_set = 0;
1275 int count, c;
1276 int copystrings = 0;
1277 int find_match_limit = 0;
1278 int getstrings = 0;
1279 int getlist = 0;
1280 int gmatched = 0;
1281 int start_offset = 0;
1282 int g_notempty = 0;
1283 int use_dfa = 0;
1284
1285 options = 0;
1286
1287 pcre_callout = callout;
1288 first_callout = 1;
1289 callout_extra = 0;
1290 callout_count = 0;
1291 callout_fail_count = 999999;
1292 callout_fail_id = -1;
1293 show_malloc = 0;
1294
1295 if (infile == stdin) printf("data> ");
1296 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1297 {
1298 done = 1;
1299 goto CONTINUE;
1300 }
1301 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1302
1303 len = (int)strlen((char *)buffer);
1304 while (len > 0 && isspace(buffer[len-1])) len--;
1305 buffer[len] = 0;
1306 if (len == 0) break;
1307
1308 p = buffer;
1309 while (isspace(*p)) p++;
1310
1311 q = dbuffer;
1312 while ((c = *p++) != 0)
1313 {
1314 int i = 0;
1315 int n = 0;
1316
1317 if (c == '\\') switch ((c = *p++))
1318 {
1319 case 'a': c = 7; break;
1320 case 'b': c = '\b'; break;
1321 case 'e': c = 27; break;
1322 case 'f': c = '\f'; break;
1323 case 'n': c = '\n'; break;
1324 case 'r': c = '\r'; break;
1325 case 't': c = '\t'; break;
1326 case 'v': c = '\v'; break;
1327
1328 case '0': case '1': case '2': case '3':
1329 case '4': case '5': case '6': case '7':
1330 c -= '0';
1331 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1332 c = c * 8 + *p++ - '0';
1333 break;
1334
1335 case 'x':
1336
1337 /* Handle \x{..} specially - new Perl thing for utf8 */
1338
1339 #if !defined NOUTF8
1340 if (*p == '{')
1341 {
1342 unsigned char *pt = p;
1343 c = 0;
1344 while (isxdigit(*(++pt)))
1345 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1346 if (*pt == '}')
1347 {
1348 unsigned char buff8[8];
1349 int ii, utn;
1350 utn = ord2utf8(c, buff8);
1351 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1352 c = buff8[ii]; /* Last byte */
1353 p = pt + 1;
1354 break;
1355 }
1356 /* Not correct form; fall through */
1357 }
1358 #endif
1359
1360 /* Ordinary \x */
1361
1362 c = 0;
1363 while (i++ < 2 && isxdigit(*p))
1364 {
1365 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1366 p++;
1367 }
1368 break;
1369
1370 case 0: /* \ followed by EOF allows for an empty line */
1371 p--;
1372 continue;
1373
1374 case '>':
1375 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1376 continue;
1377
1378 case 'A': /* Option setting */
1379 options |= PCRE_ANCHORED;
1380 continue;
1381
1382 case 'B':
1383 options |= PCRE_NOTBOL;
1384 continue;
1385
1386 case 'C':
1387 if (isdigit(*p)) /* Set copy string */
1388 {
1389 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1390 copystrings |= 1 << n;
1391 }
1392 else if (isalnum(*p))
1393 {
1394 uschar name[256];
1395 uschar *npp = name;
1396 while (isalnum(*p)) *npp++ = *p++;
1397 *npp = 0;
1398 n = pcre_get_stringnumber(re, (char *)name);
1399 if (n < 0)
1400 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1401 else copystrings |= 1 << n;
1402 }
1403 else if (*p == '+')
1404 {
1405 callout_extra = 1;
1406 p++;
1407 }
1408 else if (*p == '-')
1409 {
1410 pcre_callout = NULL;
1411 p++;
1412 }
1413 else if (*p == '!')
1414 {
1415 callout_fail_id = 0;
1416 p++;
1417 while(isdigit(*p))
1418 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1419 callout_fail_count = 0;
1420 if (*p == '!')
1421 {
1422 p++;
1423 while(isdigit(*p))
1424 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1425 }
1426 }
1427 else if (*p == '*')
1428 {
1429 int sign = 1;
1430 callout_data = 0;
1431 if (*(++p) == '-') { sign = -1; p++; }
1432 while(isdigit(*p))
1433 callout_data = callout_data * 10 + *p++ - '0';
1434 callout_data *= sign;
1435 callout_data_set = 1;
1436 }
1437 continue;
1438
1439 #if !defined NODFA
1440 case 'D':
1441 #if !defined NOPOSIX
1442 if (posix || do_posix)
1443 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1444 else
1445 #endif
1446 use_dfa = 1;
1447 continue;
1448
1449 case 'F':
1450 options |= PCRE_DFA_SHORTEST;
1451 continue;
1452 #endif
1453
1454 case 'G':
1455 if (isdigit(*p))
1456 {
1457 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1458 getstrings |= 1 << n;
1459 }
1460 else if (isalnum(*p))
1461 {
1462 uschar name[256];
1463 uschar *npp = name;
1464 while (isalnum(*p)) *npp++ = *p++;
1465 *npp = 0;
1466 n = pcre_get_stringnumber(re, (char *)name);
1467 if (n < 0)
1468 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1469 else getstrings |= 1 << n;
1470 }
1471 continue;
1472
1473 case 'L':
1474 getlist = 1;
1475 continue;
1476
1477 case 'M':
1478 find_match_limit = 1;
1479 continue;
1480
1481 case 'N':
1482 options |= PCRE_NOTEMPTY;
1483 continue;
1484
1485 case 'O':
1486 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1487 if (n > size_offsets_max)
1488 {
1489 size_offsets_max = n;
1490 free(offsets);
1491 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1492 if (offsets == NULL)
1493 {
1494 printf("** Failed to get %d bytes of memory for offsets vector\n",
1495 size_offsets_max * sizeof(int));
1496 yield = 1;
1497 goto EXIT;
1498 }
1499 }
1500 use_size_offsets = n;
1501 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1502 continue;
1503
1504 case 'P':
1505 options |= PCRE_PARTIAL;
1506 continue;
1507
1508 #if !defined NODFA
1509 case 'R':
1510 options |= PCRE_DFA_RESTART;
1511 continue;
1512 #endif
1513
1514 case 'S':
1515 show_malloc = 1;
1516 continue;
1517
1518 case 'Z':
1519 options |= PCRE_NOTEOL;
1520 continue;
1521
1522 case '?':
1523 options |= PCRE_NO_UTF8_CHECK;
1524 continue;
1525 }
1526 *q++ = c;
1527 }
1528 *q = 0;
1529 len = q - dbuffer;
1530
1531 if ((all_use_dfa || use_dfa) && find_match_limit)
1532 {
1533 printf("**Match limit not relevant for DFA matching: ignored\n");
1534 find_match_limit = 0;
1535 }
1536
1537 /* Handle matching via the POSIX interface, which does not
1538 support timing or playing with the match limit or callout data. */
1539
1540 #if !defined NOPOSIX
1541 if (posix || do_posix)
1542 {
1543 int rc;
1544 int eflags = 0;
1545 regmatch_t *pmatch = NULL;
1546 if (use_size_offsets > 0)
1547 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1548 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1549 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1550
1551 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1552
1553 if (rc != 0)
1554 {
1555 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1556 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1557 }
1558 else
1559 {
1560 size_t i;
1561 for (i = 0; i < (size_t)use_size_offsets; i++)
1562 {
1563 if (pmatch[i].rm_so >= 0)
1564 {
1565 fprintf(outfile, "%2d: ", (int)i);
1566 (void)pchars(dbuffer + pmatch[i].rm_so,
1567 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1568 fprintf(outfile, "\n");
1569 if (i == 0 && do_showrest)
1570 {
1571 fprintf(outfile, " 0+ ");
1572 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1573 outfile);
1574 fprintf(outfile, "\n");
1575 }
1576 }
1577 }
1578 }
1579 free(pmatch);
1580 }
1581
1582 /* Handle matching via the native interface - repeats for /g and /G */
1583
1584 else
1585 #endif /* !defined NOPOSIX */
1586
1587 for (;; gmatched++) /* Loop for /g or /G */
1588 {
1589 if (timeit)
1590 {
1591 register int i;
1592 clock_t time_taken;
1593 clock_t start_time = clock();
1594
1595 #if !defined NODFA
1596 if (all_use_dfa || use_dfa)
1597 {
1598 int workspace[1000];
1599 for (i = 0; i < LOOPREPEAT; i++)
1600 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1601 options | g_notempty, use_offsets, use_size_offsets, workspace,
1602 sizeof(workspace)/sizeof(int));
1603 }
1604 else
1605 #endif
1606
1607 for (i = 0; i < LOOPREPEAT; i++)
1608 count = pcre_exec(re, extra, (char *)bptr, len,
1609 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1610
1611 time_taken = clock() - start_time;
1612 fprintf(outfile, "Execute time %.3f milliseconds\n",
1613 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1614 (double)CLOCKS_PER_SEC);
1615 }
1616
1617 /* If find_match_limit is set, we want to do repeated matches with
1618 varying limits in order to find the minimum value. */
1619
1620 if (find_match_limit)
1621 {
1622 int min = 0;
1623 int mid = 64;
1624 int max = -1;
1625
1626 if (extra == NULL)
1627 {
1628 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1629 extra->flags = 0;
1630 }
1631 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1632
1633 for (;;)
1634 {
1635 extra->match_limit = mid;
1636 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1637 options | g_notempty, use_offsets, use_size_offsets);
1638 if (count == PCRE_ERROR_MATCHLIMIT)
1639 {
1640 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1641 min = mid;
1642 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1643 }
1644 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1645 count == PCRE_ERROR_PARTIAL)
1646 {
1647 if (mid == min + 1)
1648 {
1649 fprintf(outfile, "Minimum match limit = %d\n", mid);
1650 break;
1651 }
1652 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1653 max = mid;
1654 mid = (min + mid)/2;
1655 }
1656 else break; /* Some other error */
1657 }
1658
1659 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1660 }
1661
1662 /* If callout_data is set, use the interface with additional data */
1663
1664 else if (callout_data_set)
1665 {
1666 if (extra == NULL)
1667 {
1668 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1669 extra->flags = 0;
1670 }
1671 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1672 extra->callout_data = &callout_data;
1673 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1674 options | g_notempty, use_offsets, use_size_offsets);
1675 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1676 }
1677
1678 /* The normal case is just to do the match once, with the default
1679 value of match_limit. */
1680
1681 #if !defined NODFA
1682 else if (all_use_dfa || use_dfa)
1683 {
1684 int workspace[1000];
1685 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1686 options | g_notempty, use_offsets, use_size_offsets, workspace,
1687 sizeof(workspace)/sizeof(int));
1688 if (count == 0)
1689 {
1690 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1691 count = use_size_offsets/2;
1692 }
1693 }
1694 #endif
1695
1696 else
1697 {
1698 count = pcre_exec(re, extra, (char *)bptr, len,
1699 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1700 if (count == 0)
1701 {
1702 fprintf(outfile, "Matched, but too many substrings\n");
1703 count = use_size_offsets/3;
1704 }
1705 }
1706
1707 /* Matched */
1708
1709 if (count >= 0)
1710 {
1711 int i;
1712 for (i = 0; i < count * 2; i += 2)
1713 {
1714 if (use_offsets[i] < 0)
1715 fprintf(outfile, "%2d: <unset>\n", i/2);
1716 else
1717 {
1718 fprintf(outfile, "%2d: ", i/2);
1719 (void)pchars(bptr + use_offsets[i],
1720 use_offsets[i+1] - use_offsets[i], outfile);
1721 fprintf(outfile, "\n");
1722 if (i == 0)
1723 {
1724 if (do_showrest)
1725 {
1726 fprintf(outfile, " 0+ ");
1727 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1728 outfile);
1729 fprintf(outfile, "\n");
1730 }
1731 }
1732 }
1733 }
1734
1735 for (i = 0; i < 32; i++)
1736 {
1737 if ((copystrings & (1 << i)) != 0)
1738 {
1739 char copybuffer[16];
1740 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1741 i, copybuffer, sizeof(copybuffer));
1742 if (rc < 0)
1743 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1744 else
1745 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1746 }
1747 }
1748
1749 for (i = 0; i < 32; i++)
1750 {
1751 if ((getstrings & (1 << i)) != 0)
1752 {
1753 const char *substring;
1754 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1755 i, &substring);
1756 if (rc < 0)
1757 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1758 else
1759 {
1760 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1761 /* free((void *)substring); */
1762 pcre_free_substring(substring);
1763 }
1764 }
1765 }
1766
1767 if (getlist)
1768 {
1769 const char **stringlist;
1770 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1771 &stringlist);
1772 if (rc < 0)
1773 fprintf(outfile, "get substring list failed %d\n", rc);
1774 else
1775 {
1776 for (i = 0; i < count; i++)
1777 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1778 if (stringlist[i] != NULL)
1779 fprintf(outfile, "string list not terminated by NULL\n");
1780 /* free((void *)stringlist); */
1781 pcre_free_substring_list(stringlist);
1782 }
1783 }
1784 }
1785
1786 /* There was a partial match */
1787
1788 else if (count == PCRE_ERROR_PARTIAL)
1789 {
1790 fprintf(outfile, "Partial match");
1791 #if !defined NODFA
1792 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1793 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1794 bptr + use_offsets[0]);
1795 #endif
1796 fprintf(outfile, "\n");
1797 break; /* Out of the /g loop */
1798 }
1799
1800 /* Failed to match. If this is a /g or /G loop and we previously set
1801 g_notempty after a null match, this is not necessarily the end.
1802 We want to advance the start offset, and continue. In the case of UTF-8
1803 matching, the advance must be one character, not one byte. Fudge the
1804 offset values to achieve this. We won't be at the end of the string -
1805 that was checked before setting g_notempty. */
1806
1807 else
1808 {
1809 if (g_notempty != 0)
1810 {
1811 int onechar = 1;
1812 use_offsets[0] = start_offset;
1813 if (use_utf8)
1814 {
1815 while (start_offset + onechar < len)
1816 {
1817 int tb = bptr[start_offset+onechar];
1818 if (tb <= 127) break;
1819 tb &= 0xc0;
1820 if (tb != 0 && tb != 0xc0) onechar++;
1821 }
1822 }
1823 use_offsets[1] = start_offset + onechar;
1824 }
1825 else
1826 {
1827 if (count == PCRE_ERROR_NOMATCH)
1828 {
1829 if (gmatched == 0) fprintf(outfile, "No match\n");
1830 }
1831 else fprintf(outfile, "Error %d\n", count);
1832 break; /* Out of the /g loop */
1833 }
1834 }
1835
1836 /* If not /g or /G we are done */
1837
1838 if (!do_g && !do_G) break;
1839
1840 /* If we have matched an empty string, first check to see if we are at
1841 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1842 what Perl's /g options does. This turns out to be rather cunning. First
1843 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1844 same point. If this fails (picked up above) we advance to the next
1845 character. */
1846
1847 g_notempty = 0;
1848 if (use_offsets[0] == use_offsets[1])
1849 {
1850 if (use_offsets[0] == len) break;
1851 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1852 }
1853
1854 /* For /g, update the start offset, leaving the rest alone */
1855
1856 if (do_g) start_offset = use_offsets[1];
1857
1858 /* For /G, update the pointer and length */
1859
1860 else
1861 {
1862 bptr += use_offsets[1];
1863 len -= use_offsets[1];
1864 }
1865 } /* End of loop for /g and /G */
1866 } /* End of loop for data lines */
1867
1868 CONTINUE:
1869
1870 #if !defined NOPOSIX
1871 if (posix || do_posix) regfree(&preg);
1872 #endif
1873
1874 if (re != NULL) new_free(re);
1875 if (extra != NULL) new_free(extra);
1876 if (tables != NULL)
1877 {
1878 new_free((void *)tables);
1879 setlocale(LC_CTYPE, "C");
1880 }
1881 }
1882
1883 if (infile == stdin) fprintf(outfile, "\n");
1884
1885 EXIT:
1886
1887 if (infile != NULL && infile != stdin) fclose(infile);
1888 if (outfile != NULL && outfile != stdout) fclose(outfile);
1889
1890 free(buffer);
1891 free(dbuffer);
1892 free(pbuffer);
1893 free(offsets);
1894
1895 return yield;
1896 }
1897
1898 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12