/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 107 - (show annotations) (download)
Wed Mar 7 11:02:28 2007 UTC (7 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 66990 byte(s)
Added some additional #ifdef SUPPORT_UTF8 to minimize the code when UTF-8 
support is not compiled.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47
48 /* A number of things vary for Windows builds. Originally, pcretest opened its
49 input and output without "b"; then I was told that "b" was needed in some
50 environments, so it was added for release 5.0 to both the input and output. (It
51 makes no difference on Unix-like systems.) Later I was told that it is wrong
52 for the input on Windows. I've now abstracted the modes into two macros that
53 are set here, to make it easier to fiddle with them, and removed "b" from the
54 input mode under Windows. */
55
56 #if defined(_WIN32) || defined(WIN32)
57 #include <io.h> /* For _setmode() */
58 #include <fcntl.h> /* For _O_BINARY */
59 #define INPUT_MODE "r"
60 #define OUTPUT_MODE "wb"
61
62 #else
63 #include <sys/time.h> /* These two includes are needed */
64 #include <sys/resource.h> /* for setrlimit(). */
65 #define INPUT_MODE "rb"
66 #define OUTPUT_MODE "wb"
67 #endif
68
69
70 #define PCRE_SPY /* For Win32 build, import data, not export */
71
72 /* We include pcre_internal.h because we need the internal info for displaying
73 the results of pcre_study() and we also need to know about the internal
74 macros, structures, and other internal data values; pcretest has "inside
75 information" compared to a program that strictly follows the PCRE API. */
76
77 #include "pcre_internal.h"
78
79 /* We need access to the data tables that PCRE uses. So as not to have to keep
80 two copies, we include the source file here, changing the names of the external
81 symbols to prevent clashes. */
82
83 #define _pcre_utf8_table1 utf8_table1
84 #define _pcre_utf8_table1_size utf8_table1_size
85 #define _pcre_utf8_table2 utf8_table2
86 #define _pcre_utf8_table3 utf8_table3
87 #define _pcre_utf8_table4 utf8_table4
88 #define _pcre_utt utt
89 #define _pcre_utt_size utt_size
90 #define _pcre_OP_lengths OP_lengths
91
92 #include "pcre_tables.c"
93
94 /* We also need the pcre_printint() function for printing out compiled
95 patterns. This function is in a separate file so that it can be included in
96 pcre_compile.c when that module is compiled with debugging enabled.
97
98 The definition of the macro PRINTABLE, which determines whether to print an
99 output character as-is or as a hex value when showing compiled patterns, is
100 contained in this file. We uses it here also, in cases when the locale has not
101 been explicitly changed, so as to get consistent output from systems that
102 differ in their output from isprint() even in the "C" locale. */
103
104 #include "pcre_printint.src"
105
106 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107
108
109 /* It is possible to compile this test program without including support for
110 testing the POSIX interface, though this is not available via the standard
111 Makefile. */
112
113 #if !defined NOPOSIX
114 #include "pcreposix.h"
115 #endif
116
117 /* It is also possible, for the benefit of the version currently imported into
118 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119 interface to the DFA matcher (NODFA), and without the doublecheck of the old
120 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121 UTF8 support if PCRE is built without it. */
122
123 #ifndef SUPPORT_UTF8
124 #ifndef NOUTF8
125 #define NOUTF8
126 #endif
127 #endif
128
129
130 /* Other parameters */
131
132 #ifndef CLOCKS_PER_SEC
133 #ifdef CLK_TCK
134 #define CLOCKS_PER_SEC CLK_TCK
135 #else
136 #define CLOCKS_PER_SEC 100
137 #endif
138 #endif
139
140 /* This is the default loop count for timing. */
141
142 #define LOOPREPEAT 500000
143
144 /* Static variables */
145
146 static FILE *outfile;
147 static int log_store = 0;
148 static int callout_count;
149 static int callout_extra;
150 static int callout_fail_count;
151 static int callout_fail_id;
152 static int first_callout;
153 static int locale_set = 0;
154 static int show_malloc;
155 static int use_utf8;
156 static size_t gotten_store;
157
158 /* The buffers grow automatically if very long input lines are encountered. */
159
160 static int buffer_size = 50000;
161 static uschar *buffer = NULL;
162 static uschar *dbuffer = NULL;
163 static uschar *pbuffer = NULL;
164
165
166
167 /*************************************************
168 * Read or extend an input line *
169 *************************************************/
170
171 /* Input lines are read into buffer, but both patterns and data lines can be
172 continued over multiple input lines. In addition, if the buffer fills up, we
173 want to automatically expand it so as to be able to handle extremely large
174 lines that are needed for certain stress tests. When the input buffer is
175 expanded, the other two buffers must also be expanded likewise, and the
176 contents of pbuffer, which are a copy of the input for callouts, must be
177 preserved (for when expansion happens for a data line). This is not the most
178 optimal way of handling this, but hey, this is just a test program!
179
180 Arguments:
181 f the file to read
182 start where in buffer to start (this *must* be within buffer)
183
184 Returns: pointer to the start of new data
185 could be a copy of start, or could be moved
186 NULL if no data read and EOF reached
187 */
188
189 static uschar *
190 extend_inputline(FILE *f, uschar *start)
191 {
192 uschar *here = start;
193
194 for (;;)
195 {
196 int rlen = buffer_size - (here - buffer);
197
198 if (rlen > 1000)
199 {
200 int dlen;
201 if (fgets((char *)here, rlen, f) == NULL)
202 return (here == start)? NULL : start;
203 dlen = (int)strlen((char *)here);
204 if (dlen > 0 && here[dlen - 1] == '\n') return start;
205 here += dlen;
206 }
207
208 else
209 {
210 int new_buffer_size = 2*buffer_size;
211 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214
215 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216 {
217 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218 exit(1);
219 }
220
221 memcpy(new_buffer, buffer, buffer_size);
222 memcpy(new_pbuffer, pbuffer, buffer_size);
223
224 buffer_size = new_buffer_size;
225
226 start = new_buffer + (start - buffer);
227 here = new_buffer + (here - buffer);
228
229 free(buffer);
230 free(dbuffer);
231 free(pbuffer);
232
233 buffer = new_buffer;
234 dbuffer = new_dbuffer;
235 pbuffer = new_pbuffer;
236 }
237 }
238
239 return NULL; /* Control never gets here */
240 }
241
242
243
244
245
246
247
248 /*************************************************
249 * Read number from string *
250 *************************************************/
251
252 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253 around with conditional compilation, just do the job by hand. It is only used
254 for unpicking arguments, so just keep it simple.
255
256 Arguments:
257 str string to be converted
258 endptr where to put the end pointer
259
260 Returns: the unsigned long
261 */
262
263 static int
264 get_value(unsigned char *str, unsigned char **endptr)
265 {
266 int result = 0;
267 while(*str != 0 && isspace(*str)) str++;
268 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
269 *endptr = str;
270 return(result);
271 }
272
273
274
275
276 /*************************************************
277 * Convert UTF-8 string to value *
278 *************************************************/
279
280 /* This function takes one or more bytes that represents a UTF-8 character,
281 and returns the value of the character.
282
283 Argument:
284 utf8bytes a pointer to the byte vector
285 vptr a pointer to an int to receive the value
286
287 Returns: > 0 => the number of bytes consumed
288 -6 to 0 => malformed UTF-8 character at offset = (-return)
289 */
290
291 #if !defined NOUTF8
292
293 static int
294 utf82ord(unsigned char *utf8bytes, int *vptr)
295 {
296 int c = *utf8bytes++;
297 int d = c;
298 int i, j, s;
299
300 for (i = -1; i < 6; i++) /* i is number of additional bytes */
301 {
302 if ((d & 0x80) == 0) break;
303 d <<= 1;
304 }
305
306 if (i == -1) { *vptr = c; return 1; } /* ascii character */
307 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
308
309 /* i now has a value in the range 1-5 */
310
311 s = 6*i;
312 d = (c & utf8_table3[i]) << s;
313
314 for (j = 0; j < i; j++)
315 {
316 c = *utf8bytes++;
317 if ((c & 0xc0) != 0x80) return -(j+1);
318 s -= 6;
319 d |= (c & 0x3f) << s;
320 }
321
322 /* Check that encoding was the correct unique one */
323
324 for (j = 0; j < utf8_table1_size; j++)
325 if (d <= utf8_table1[j]) break;
326 if (j != i) return -(i+1);
327
328 /* Valid value */
329
330 *vptr = d;
331 return i+1;
332 }
333
334 #endif
335
336
337
338 /*************************************************
339 * Convert character value to UTF-8 *
340 *************************************************/
341
342 /* This function takes an integer value in the range 0 - 0x7fffffff
343 and encodes it as a UTF-8 character in 0 to 6 bytes.
344
345 Arguments:
346 cvalue the character value
347 utf8bytes pointer to buffer for result - at least 6 bytes long
348
349 Returns: number of characters placed in the buffer
350 */
351
352 #if !defined NOUTF8
353
354 static int
355 ord2utf8(int cvalue, uschar *utf8bytes)
356 {
357 register int i, j;
358 for (i = 0; i < utf8_table1_size; i++)
359 if (cvalue <= utf8_table1[i]) break;
360 utf8bytes += i;
361 for (j = i; j > 0; j--)
362 {
363 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364 cvalue >>= 6;
365 }
366 *utf8bytes = utf8_table2[i] | cvalue;
367 return i + 1;
368 }
369
370 #endif
371
372
373
374 /*************************************************
375 * Print character string *
376 *************************************************/
377
378 /* Character string printing function. Must handle UTF-8 strings in utf8
379 mode. Yields number of characters printed. If handed a NULL file, just counts
380 chars without printing. */
381
382 static int pchars(unsigned char *p, int length, FILE *f)
383 {
384 int c = 0;
385 int yield = 0;
386
387 while (length-- > 0)
388 {
389 #if !defined NOUTF8
390 if (use_utf8)
391 {
392 int rc = utf82ord(p, &c);
393
394 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
395 {
396 length -= rc - 1;
397 p += rc;
398 if (PRINTHEX(c))
399 {
400 if (f != NULL) fprintf(f, "%c", c);
401 yield++;
402 }
403 else
404 {
405 int n = 4;
406 if (f != NULL) fprintf(f, "\\x{%02x}", c);
407 yield += (n <= 0x000000ff)? 2 :
408 (n <= 0x00000fff)? 3 :
409 (n <= 0x0000ffff)? 4 :
410 (n <= 0x000fffff)? 5 : 6;
411 }
412 continue;
413 }
414 }
415 #endif
416
417 /* Not UTF-8, or malformed UTF-8 */
418
419 c = *p++;
420 if (PRINTHEX(c))
421 {
422 if (f != NULL) fprintf(f, "%c", c);
423 yield++;
424 }
425 else
426 {
427 if (f != NULL) fprintf(f, "\\x%02x", c);
428 yield += 4;
429 }
430 }
431
432 return yield;
433 }
434
435
436
437 /*************************************************
438 * Callout function *
439 *************************************************/
440
441 /* Called from PCRE as a result of the (?C) item. We print out where we are in
442 the match. Yield zero unless more callouts than the fail count, or the callout
443 data is not zero. */
444
445 static int callout(pcre_callout_block *cb)
446 {
447 FILE *f = (first_callout | callout_extra)? outfile : NULL;
448 int i, pre_start, post_start, subject_length;
449
450 if (callout_extra)
451 {
452 fprintf(f, "Callout %d: last capture = %d\n",
453 cb->callout_number, cb->capture_last);
454
455 for (i = 0; i < cb->capture_top * 2; i += 2)
456 {
457 if (cb->offset_vector[i] < 0)
458 fprintf(f, "%2d: <unset>\n", i/2);
459 else
460 {
461 fprintf(f, "%2d: ", i/2);
462 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
463 cb->offset_vector[i+1] - cb->offset_vector[i], f);
464 fprintf(f, "\n");
465 }
466 }
467 }
468
469 /* Re-print the subject in canonical form, the first time or if giving full
470 datails. On subsequent calls in the same match, we use pchars just to find the
471 printed lengths of the substrings. */
472
473 if (f != NULL) fprintf(f, "--->");
474
475 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
476 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
477 cb->current_position - cb->start_match, f);
478
479 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
480
481 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
482 cb->subject_length - cb->current_position, f);
483
484 if (f != NULL) fprintf(f, "\n");
485
486 /* Always print appropriate indicators, with callout number if not already
487 shown. For automatic callouts, show the pattern offset. */
488
489 if (cb->callout_number == 255)
490 {
491 fprintf(outfile, "%+3d ", cb->pattern_position);
492 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
493 }
494 else
495 {
496 if (callout_extra) fprintf(outfile, " ");
497 else fprintf(outfile, "%3d ", cb->callout_number);
498 }
499
500 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
501 fprintf(outfile, "^");
502
503 if (post_start > 0)
504 {
505 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
506 fprintf(outfile, "^");
507 }
508
509 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
510 fprintf(outfile, " ");
511
512 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
513 pbuffer + cb->pattern_position);
514
515 fprintf(outfile, "\n");
516 first_callout = 0;
517
518 if (cb->callout_data != NULL)
519 {
520 int callout_data = *((int *)(cb->callout_data));
521 if (callout_data != 0)
522 {
523 fprintf(outfile, "Callout data = %d\n", callout_data);
524 return callout_data;
525 }
526 }
527
528 return (cb->callout_number != callout_fail_id)? 0 :
529 (++callout_count >= callout_fail_count)? 1 : 0;
530 }
531
532
533 /*************************************************
534 * Local malloc functions *
535 *************************************************/
536
537 /* Alternative malloc function, to test functionality and show the size of the
538 compiled re. */
539
540 static void *new_malloc(size_t size)
541 {
542 void *block = malloc(size);
543 gotten_store = size;
544 if (show_malloc)
545 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
546 return block;
547 }
548
549 static void new_free(void *block)
550 {
551 if (show_malloc)
552 fprintf(outfile, "free %p\n", block);
553 free(block);
554 }
555
556
557 /* For recursion malloc/free, to test stacking calls */
558
559 static void *stack_malloc(size_t size)
560 {
561 void *block = malloc(size);
562 if (show_malloc)
563 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
564 return block;
565 }
566
567 static void stack_free(void *block)
568 {
569 if (show_malloc)
570 fprintf(outfile, "stack_free %p\n", block);
571 free(block);
572 }
573
574
575 /*************************************************
576 * Call pcre_fullinfo() *
577 *************************************************/
578
579 /* Get one piece of information from the pcre_fullinfo() function */
580
581 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
582 {
583 int rc;
584 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
585 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
586 }
587
588
589
590 /*************************************************
591 * Byte flipping function *
592 *************************************************/
593
594 static unsigned long int
595 byteflip(unsigned long int value, int n)
596 {
597 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598 return ((value & 0x000000ff) << 24) |
599 ((value & 0x0000ff00) << 8) |
600 ((value & 0x00ff0000) >> 8) |
601 ((value & 0xff000000) >> 24);
602 }
603
604
605
606
607 /*************************************************
608 * Check match or recursion limit *
609 *************************************************/
610
611 static int
612 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613 int start_offset, int options, int *use_offsets, int use_size_offsets,
614 int flag, unsigned long int *limit, int errnumber, const char *msg)
615 {
616 int count;
617 int min = 0;
618 int mid = 64;
619 int max = -1;
620
621 extra->flags |= flag;
622
623 for (;;)
624 {
625 *limit = mid;
626
627 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628 use_offsets, use_size_offsets);
629
630 if (count == errnumber)
631 {
632 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633 min = mid;
634 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635 }
636
637 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638 count == PCRE_ERROR_PARTIAL)
639 {
640 if (mid == min + 1)
641 {
642 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643 break;
644 }
645 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646 max = mid;
647 mid = (min + mid)/2;
648 }
649 else break; /* Some other error */
650 }
651
652 extra->flags &= ~flag;
653 return count;
654 }
655
656
657
658 /*************************************************
659 * Check newline indicator *
660 *************************************************/
661
662 /* This is used both at compile and run-time to check for <xxx> escapes, where
663 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664
665 Arguments:
666 p points after the leading '<'
667 f file for error message
668
669 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
670 */
671
672 static int
673 check_newline(uschar *p, FILE *f)
674 {
675 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679 fprintf(f, "Unknown newline type at: <%s\n", p);
680 return 0;
681 }
682
683
684
685 /*************************************************
686 * Usage function *
687 *************************************************/
688
689 static void
690 usage(void)
691 {
692 printf("Usage: pcretest [options] [<input> [<output>]]\n");
693 printf(" -b show compiled code (bytecode)\n");
694 printf(" -C show PCRE compile-time options and exit\n");
695 printf(" -d debug: show compiled code and information (-b and -i)\n");
696 #if !defined NODFA
697 printf(" -dfa force DFA matching for all subjects\n");
698 #endif
699 printf(" -help show usage information\n");
700 printf(" -i show information about compiled patterns\n"
701 " -m output memory used information\n"
702 " -o <n> set size of offsets vector to <n>\n");
703 #if !defined NOPOSIX
704 printf(" -p use POSIX interface\n");
705 #endif
706 printf(" -q quiet: do not output PCRE version number at start\n");
707 printf(" -S <n> set stack size to <n> megabytes\n");
708 printf(" -s output store (memory) used information\n"
709 " -t time compilation and execution\n");
710 printf(" -t <n> time compilation and execution, repeating <n> times\n");
711 printf(" -tm time execution (matching) only\n");
712 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
713 }
714
715
716
717 /*************************************************
718 * Main Program *
719 *************************************************/
720
721 /* Read lines from named file or stdin and write to named file or stdout; lines
722 consist of a regular expression, in delimiters and optionally followed by
723 options, followed by a set of test data, terminated by an empty line. */
724
725 int main(int argc, char **argv)
726 {
727 FILE *infile = stdin;
728 int options = 0;
729 int study_options = 0;
730 int op = 1;
731 int timeit = 0;
732 int timeitm = 0;
733 int showinfo = 0;
734 int showstore = 0;
735 int quiet = 0;
736 int size_offsets = 45;
737 int size_offsets_max;
738 int *offsets = NULL;
739 #if !defined NOPOSIX
740 int posix = 0;
741 #endif
742 int debug = 0;
743 int done = 0;
744 int all_use_dfa = 0;
745 int yield = 0;
746 int stack_size;
747
748 /* These vectors store, end-to-end, a list of captured substring names. Assume
749 that 1024 is plenty long enough for the few names we'll be testing. */
750
751 uschar copynames[1024];
752 uschar getnames[1024];
753
754 uschar *copynamesptr;
755 uschar *getnamesptr;
756
757 /* Get buffers from malloc() so that Electric Fence will check their misuse
758 when I am debugging. They grow automatically when very long lines are read. */
759
760 buffer = (unsigned char *)malloc(buffer_size);
761 dbuffer = (unsigned char *)malloc(buffer_size);
762 pbuffer = (unsigned char *)malloc(buffer_size);
763
764 /* The outfile variable is static so that new_malloc can use it. */
765
766 outfile = stdout;
767
768 /* The following _setmode() stuff is some Windows magic that tells its runtime
769 library to translate CRLF into a single LF character. At least, that's what
770 I've been told: never having used Windows I take this all on trust. Originally
771 it set 0x8000, but then I was advised that _O_BINARY was better. */
772
773 #if defined(_WIN32) || defined(WIN32)
774 _setmode( _fileno( stdout ), _O_BINARY );
775 #endif
776
777 /* Scan options */
778
779 while (argc > 1 && argv[op][0] == '-')
780 {
781 unsigned char *endptr;
782
783 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784 showstore = 1;
785 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786 else if (strcmp(argv[op], "-b") == 0) debug = 1;
787 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789 #if !defined NODFA
790 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
791 #endif
792 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
793 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794 *endptr == 0))
795 {
796 op++;
797 argc--;
798 }
799 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800 {
801 int both = argv[op][2] == 0;
802 int temp;
803 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804 *endptr == 0))
805 {
806 timeitm = temp;
807 op++;
808 argc--;
809 }
810 else timeitm = LOOPREPEAT;
811 if (both) timeit = timeitm;
812 }
813 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815 *endptr == 0))
816 {
817 #if defined(_WIN32) || defined(WIN32)
818 printf("PCRE: -S not supported on this OS\n");
819 exit(1);
820 #else
821 int rc;
822 struct rlimit rlim;
823 getrlimit(RLIMIT_STACK, &rlim);
824 rlim.rlim_cur = stack_size * 1024 * 1024;
825 rc = setrlimit(RLIMIT_STACK, &rlim);
826 if (rc != 0)
827 {
828 printf("PCRE: setrlimit() failed with error %d\n", rc);
829 exit(1);
830 }
831 op++;
832 argc--;
833 #endif
834 }
835 #if !defined NOPOSIX
836 else if (strcmp(argv[op], "-p") == 0) posix = 1;
837 #endif
838 else if (strcmp(argv[op], "-C") == 0)
839 {
840 int rc;
841 printf("PCRE version %s\n", pcre_version());
842 printf("Compiled with\n");
843 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
844 printf(" %sUTF-8 support\n", rc? "" : "No ");
845 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846 printf(" %sUnicode properties support\n", rc? "" : "No ");
847 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
849 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850 (rc == -1)? "ANY" : "???");
851 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852 printf(" Internal link size = %d\n", rc);
853 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854 printf(" POSIX malloc threshold = %d\n", rc);
855 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856 printf(" Default match limit = %d\n", rc);
857 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858 printf(" Default recursion depth limit = %d\n", rc);
859 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
861 exit(0);
862 }
863 else if (strcmp(argv[op], "-help") == 0 ||
864 strcmp(argv[op], "--help") == 0)
865 {
866 usage();
867 goto EXIT;
868 }
869 else
870 {
871 printf("** Unknown or malformed option %s\n", argv[op]);
872 usage();
873 yield = 1;
874 goto EXIT;
875 }
876 op++;
877 argc--;
878 }
879
880 /* Get the store for the offsets vector, and remember what it was */
881
882 size_offsets_max = size_offsets;
883 offsets = (int *)malloc(size_offsets_max * sizeof(int));
884 if (offsets == NULL)
885 {
886 printf("** Failed to get %d bytes of memory for offsets vector\n",
887 size_offsets_max * sizeof(int));
888 yield = 1;
889 goto EXIT;
890 }
891
892 /* Sort out the input and output files */
893
894 if (argc > 1)
895 {
896 infile = fopen(argv[op], INPUT_MODE);
897 if (infile == NULL)
898 {
899 printf("** Failed to open %s\n", argv[op]);
900 yield = 1;
901 goto EXIT;
902 }
903 }
904
905 if (argc > 2)
906 {
907 outfile = fopen(argv[op+1], OUTPUT_MODE);
908 if (outfile == NULL)
909 {
910 printf("** Failed to open %s\n", argv[op+1]);
911 yield = 1;
912 goto EXIT;
913 }
914 }
915
916 /* Set alternative malloc function */
917
918 pcre_malloc = new_malloc;
919 pcre_free = new_free;
920 pcre_stack_malloc = stack_malloc;
921 pcre_stack_free = stack_free;
922
923 /* Heading line unless quiet, then prompt for first regex if stdin */
924
925 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926
927 /* Main loop */
928
929 while (!done)
930 {
931 pcre *re = NULL;
932 pcre_extra *extra = NULL;
933
934 #if !defined NOPOSIX /* There are still compilers that require no indent */
935 regex_t preg;
936 int do_posix = 0;
937 #endif
938
939 const char *error;
940 unsigned char *p, *pp, *ppp;
941 unsigned char *to_file = NULL;
942 const unsigned char *tables = NULL;
943 unsigned long int true_size, true_study_size = 0;
944 size_t size, regex_gotten_store;
945 int do_study = 0;
946 int do_debug = debug;
947 int do_G = 0;
948 int do_g = 0;
949 int do_showinfo = showinfo;
950 int do_showrest = 0;
951 int do_flip = 0;
952 int erroroffset, len, delimiter, poffset;
953
954 use_utf8 = 0;
955
956 if (infile == stdin) printf(" re> ");
957 if (extend_inputline(infile, buffer) == NULL) break;
958 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
959 fflush(outfile);
960
961 p = buffer;
962 while (isspace(*p)) p++;
963 if (*p == 0) continue;
964
965 /* See if the pattern is to be loaded pre-compiled from a file. */
966
967 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
968 {
969 unsigned long int magic, get_options;
970 uschar sbuf[8];
971 FILE *f;
972
973 p++;
974 pp = p + (int)strlen((char *)p);
975 while (isspace(pp[-1])) pp--;
976 *pp = 0;
977
978 f = fopen((char *)p, "rb");
979 if (f == NULL)
980 {
981 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
982 continue;
983 }
984
985 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
986
987 true_size =
988 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
989 true_study_size =
990 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
991
992 re = (real_pcre *)new_malloc(true_size);
993 regex_gotten_store = gotten_store;
994
995 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
996
997 magic = ((real_pcre *)re)->magic_number;
998 if (magic != MAGIC_NUMBER)
999 {
1000 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1001 {
1002 do_flip = 1;
1003 }
1004 else
1005 {
1006 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1007 fclose(f);
1008 continue;
1009 }
1010 }
1011
1012 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1013 do_flip? " (byte-inverted)" : "", p);
1014
1015 /* Need to know if UTF-8 for printing data strings */
1016
1017 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1018 use_utf8 = (get_options & PCRE_UTF8) != 0;
1019
1020 /* Now see if there is any following study data */
1021
1022 if (true_study_size != 0)
1023 {
1024 pcre_study_data *psd;
1025
1026 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1027 extra->flags = PCRE_EXTRA_STUDY_DATA;
1028
1029 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1030 extra->study_data = psd;
1031
1032 if (fread(psd, 1, true_study_size, f) != true_study_size)
1033 {
1034 FAIL_READ:
1035 fprintf(outfile, "Failed to read data from %s\n", p);
1036 if (extra != NULL) new_free(extra);
1037 if (re != NULL) new_free(re);
1038 fclose(f);
1039 continue;
1040 }
1041 fprintf(outfile, "Study data loaded from %s\n", p);
1042 do_study = 1; /* To get the data output if requested */
1043 }
1044 else fprintf(outfile, "No study data\n");
1045
1046 fclose(f);
1047 goto SHOW_INFO;
1048 }
1049
1050 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1051 the pattern; if is isn't complete, read more. */
1052
1053 delimiter = *p++;
1054
1055 if (isalnum(delimiter) || delimiter == '\\')
1056 {
1057 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1058 goto SKIP_DATA;
1059 }
1060
1061 pp = p;
1062 poffset = p - buffer;
1063
1064 for(;;)
1065 {
1066 while (*pp != 0)
1067 {
1068 if (*pp == '\\' && pp[1] != 0) pp++;
1069 else if (*pp == delimiter) break;
1070 pp++;
1071 }
1072 if (*pp != 0) break;
1073 if (infile == stdin) printf(" > ");
1074 if ((pp = extend_inputline(infile, pp)) == NULL)
1075 {
1076 fprintf(outfile, "** Unexpected EOF\n");
1077 done = 1;
1078 goto CONTINUE;
1079 }
1080 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1081 }
1082
1083 /* The buffer may have moved while being extended; reset the start of data
1084 pointer to the correct relative point in the buffer. */
1085
1086 p = buffer + poffset;
1087
1088 /* If the first character after the delimiter is backslash, make
1089 the pattern end with backslash. This is purely to provide a way
1090 of testing for the error message when a pattern ends with backslash. */
1091
1092 if (pp[1] == '\\') *pp++ = '\\';
1093
1094 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1095 for callouts. */
1096
1097 *pp++ = 0;
1098 strcpy((char *)pbuffer, (char *)p);
1099
1100 /* Look for options after final delimiter */
1101
1102 options = 0;
1103 study_options = 0;
1104 log_store = showstore; /* default from command line */
1105
1106 while (*pp != 0)
1107 {
1108 switch (*pp++)
1109 {
1110 case 'f': options |= PCRE_FIRSTLINE; break;
1111 case 'g': do_g = 1; break;
1112 case 'i': options |= PCRE_CASELESS; break;
1113 case 'm': options |= PCRE_MULTILINE; break;
1114 case 's': options |= PCRE_DOTALL; break;
1115 case 'x': options |= PCRE_EXTENDED; break;
1116
1117 case '+': do_showrest = 1; break;
1118 case 'A': options |= PCRE_ANCHORED; break;
1119 case 'B': do_debug = 1; break;
1120 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1121 case 'D': do_debug = do_showinfo = 1; break;
1122 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1123 case 'F': do_flip = 1; break;
1124 case 'G': do_G = 1; break;
1125 case 'I': do_showinfo = 1; break;
1126 case 'J': options |= PCRE_DUPNAMES; break;
1127 case 'M': log_store = 1; break;
1128 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1129
1130 #if !defined NOPOSIX
1131 case 'P': do_posix = 1; break;
1132 #endif
1133
1134 case 'S': do_study = 1; break;
1135 case 'U': options |= PCRE_UNGREEDY; break;
1136 case 'X': options |= PCRE_EXTRA; break;
1137 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1138 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1139
1140 case 'L':
1141 ppp = pp;
1142 /* The '\r' test here is so that it works on Windows. */
1143 /* The '0' test is just in case this is an unterminated line. */
1144 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1145 *ppp = 0;
1146 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1147 {
1148 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1149 goto SKIP_DATA;
1150 }
1151 locale_set = 1;
1152 tables = pcre_maketables();
1153 pp = ppp;
1154 break;
1155
1156 case '>':
1157 to_file = pp;
1158 while (*pp != 0) pp++;
1159 while (isspace(pp[-1])) pp--;
1160 *pp = 0;
1161 break;
1162
1163 case '<':
1164 {
1165 int x = check_newline(pp, outfile);
1166 if (x == 0) goto SKIP_DATA;
1167 options |= x;
1168 while (*pp++ != '>');
1169 }
1170 break;
1171
1172 case '\r': /* So that it works in Windows */
1173 case '\n':
1174 case ' ':
1175 break;
1176
1177 default:
1178 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1179 goto SKIP_DATA;
1180 }
1181 }
1182
1183 /* Handle compiling via the POSIX interface, which doesn't support the
1184 timing, showing, or debugging options, nor the ability to pass over
1185 local character tables. */
1186
1187 #if !defined NOPOSIX
1188 if (posix || do_posix)
1189 {
1190 int rc;
1191 int cflags = 0;
1192
1193 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1194 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1195 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1196 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1197 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1198
1199 rc = regcomp(&preg, (char *)p, cflags);
1200
1201 /* Compilation failed; go back for another re, skipping to blank line
1202 if non-interactive. */
1203
1204 if (rc != 0)
1205 {
1206 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1207 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1208 goto SKIP_DATA;
1209 }
1210 }
1211
1212 /* Handle compiling via the native interface */
1213
1214 else
1215 #endif /* !defined NOPOSIX */
1216
1217 {
1218 if (timeit > 0)
1219 {
1220 register int i;
1221 clock_t time_taken;
1222 clock_t start_time = clock();
1223 for (i = 0; i < timeit; i++)
1224 {
1225 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1226 if (re != NULL) free(re);
1227 }
1228 time_taken = clock() - start_time;
1229 fprintf(outfile, "Compile time %.4f milliseconds\n",
1230 (((double)time_taken * 1000.0) / (double)timeit) /
1231 (double)CLOCKS_PER_SEC);
1232 }
1233
1234 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1235
1236 /* Compilation failed; go back for another re, skipping to blank line
1237 if non-interactive. */
1238
1239 if (re == NULL)
1240 {
1241 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1242 SKIP_DATA:
1243 if (infile != stdin)
1244 {
1245 for (;;)
1246 {
1247 if (extend_inputline(infile, buffer) == NULL)
1248 {
1249 done = 1;
1250 goto CONTINUE;
1251 }
1252 len = (int)strlen((char *)buffer);
1253 while (len > 0 && isspace(buffer[len-1])) len--;
1254 if (len == 0) break;
1255 }
1256 fprintf(outfile, "\n");
1257 }
1258 goto CONTINUE;
1259 }
1260
1261 /* Compilation succeeded; print data if required. There are now two
1262 info-returning functions. The old one has a limited interface and
1263 returns only limited data. Check that it agrees with the newer one. */
1264
1265 if (log_store)
1266 fprintf(outfile, "Memory allocation (code space): %d\n",
1267 (int)(gotten_store -
1268 sizeof(real_pcre) -
1269 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1270
1271 /* Extract the size for possible writing before possibly flipping it,
1272 and remember the store that was got. */
1273
1274 true_size = ((real_pcre *)re)->size;
1275 regex_gotten_store = gotten_store;
1276
1277 /* If /S was present, study the regexp to generate additional info to
1278 help with the matching. */
1279
1280 if (do_study)
1281 {
1282 if (timeit > 0)
1283 {
1284 register int i;
1285 clock_t time_taken;
1286 clock_t start_time = clock();
1287 for (i = 0; i < timeit; i++)
1288 extra = pcre_study(re, study_options, &error);
1289 time_taken = clock() - start_time;
1290 if (extra != NULL) free(extra);
1291 fprintf(outfile, " Study time %.4f milliseconds\n",
1292 (((double)time_taken * 1000.0) / (double)timeit) /
1293 (double)CLOCKS_PER_SEC);
1294 }
1295 extra = pcre_study(re, study_options, &error);
1296 if (error != NULL)
1297 fprintf(outfile, "Failed to study: %s\n", error);
1298 else if (extra != NULL)
1299 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1300 }
1301
1302 /* If the 'F' option was present, we flip the bytes of all the integer
1303 fields in the regex data block and the study block. This is to make it
1304 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1305 compiled on a different architecture. */
1306
1307 if (do_flip)
1308 {
1309 real_pcre *rre = (real_pcre *)re;
1310 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1311 rre->size = byteflip(rre->size, sizeof(rre->size));
1312 rre->options = byteflip(rre->options, sizeof(rre->options));
1313 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1314 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1315 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1316 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1317 rre->name_table_offset = byteflip(rre->name_table_offset,
1318 sizeof(rre->name_table_offset));
1319 rre->name_entry_size = byteflip(rre->name_entry_size,
1320 sizeof(rre->name_entry_size));
1321 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1322
1323 if (extra != NULL)
1324 {
1325 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1326 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1327 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1328 }
1329 }
1330
1331 /* Extract information from the compiled data if required */
1332
1333 SHOW_INFO:
1334
1335 if (do_debug)
1336 {
1337 fprintf(outfile, "------------------------------------------------------------------\n");
1338 pcre_printint(re, outfile);
1339 }
1340
1341 if (do_showinfo)
1342 {
1343 unsigned long int get_options, all_options;
1344 #if !defined NOINFOCHECK
1345 int old_first_char, old_options, old_count;
1346 #endif
1347 int count, backrefmax, first_char, need_char;
1348 int nameentrysize, namecount;
1349 const uschar *nametable;
1350
1351 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1352 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1353 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1354 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1355 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1356 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1357 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1358 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1359 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1360
1361 #if !defined NOINFOCHECK
1362 old_count = pcre_info(re, &old_options, &old_first_char);
1363 if (count < 0) fprintf(outfile,
1364 "Error %d from pcre_info()\n", count);
1365 else
1366 {
1367 if (old_count != count) fprintf(outfile,
1368 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1369 old_count);
1370
1371 if (old_first_char != first_char) fprintf(outfile,
1372 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1373 first_char, old_first_char);
1374
1375 if (old_options != (int)get_options) fprintf(outfile,
1376 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1377 get_options, old_options);
1378 }
1379 #endif
1380
1381 if (size != regex_gotten_store) fprintf(outfile,
1382 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1383 (int)size, (int)regex_gotten_store);
1384
1385 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1386 if (backrefmax > 0)
1387 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1388
1389 if (namecount > 0)
1390 {
1391 fprintf(outfile, "Named capturing subpatterns:\n");
1392 while (namecount-- > 0)
1393 {
1394 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1395 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1396 GET2(nametable, 0));
1397 nametable += nameentrysize;
1398 }
1399 }
1400
1401 /* The NOPARTIAL bit is a private bit in the options, so we have
1402 to fish it out via out back door */
1403
1404 all_options = ((real_pcre *)re)->options;
1405 if (do_flip)
1406 {
1407 all_options = byteflip(all_options, sizeof(all_options));
1408 }
1409
1410 if ((all_options & PCRE_NOPARTIAL) != 0)
1411 fprintf(outfile, "Partial matching not supported\n");
1412
1413 if (get_options == 0) fprintf(outfile, "No options\n");
1414 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1415 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1416 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1417 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1418 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1419 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1420 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1421 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1422 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1423 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1424 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1425 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1426 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1427 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1428
1429 switch (get_options & PCRE_NEWLINE_BITS)
1430 {
1431 case PCRE_NEWLINE_CR:
1432 fprintf(outfile, "Forced newline sequence: CR\n");
1433 break;
1434
1435 case PCRE_NEWLINE_LF:
1436 fprintf(outfile, "Forced newline sequence: LF\n");
1437 break;
1438
1439 case PCRE_NEWLINE_CRLF:
1440 fprintf(outfile, "Forced newline sequence: CRLF\n");
1441 break;
1442
1443 case PCRE_NEWLINE_ANY:
1444 fprintf(outfile, "Forced newline sequence: ANY\n");
1445 break;
1446
1447 default:
1448 break;
1449 }
1450
1451 if (first_char == -1)
1452 {
1453 fprintf(outfile, "First char at start or follows newline\n");
1454 }
1455 else if (first_char < 0)
1456 {
1457 fprintf(outfile, "No first char\n");
1458 }
1459 else
1460 {
1461 int ch = first_char & 255;
1462 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1463 "" : " (caseless)";
1464 if (PRINTHEX(ch))
1465 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1466 else
1467 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1468 }
1469
1470 if (need_char < 0)
1471 {
1472 fprintf(outfile, "No need char\n");
1473 }
1474 else
1475 {
1476 int ch = need_char & 255;
1477 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1478 "" : " (caseless)";
1479 if (PRINTHEX(ch))
1480 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1481 else
1482 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1483 }
1484
1485 /* Don't output study size; at present it is in any case a fixed
1486 value, but it varies, depending on the computer architecture, and
1487 so messes up the test suite. (And with the /F option, it might be
1488 flipped.) */
1489
1490 if (do_study)
1491 {
1492 if (extra == NULL)
1493 fprintf(outfile, "Study returned NULL\n");
1494 else
1495 {
1496 uschar *start_bits = NULL;
1497 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1498
1499 if (start_bits == NULL)
1500 fprintf(outfile, "No starting byte set\n");
1501 else
1502 {
1503 int i;
1504 int c = 24;
1505 fprintf(outfile, "Starting byte set: ");
1506 for (i = 0; i < 256; i++)
1507 {
1508 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1509 {
1510 if (c > 75)
1511 {
1512 fprintf(outfile, "\n ");
1513 c = 2;
1514 }
1515 if (PRINTHEX(i) && i != ' ')
1516 {
1517 fprintf(outfile, "%c ", i);
1518 c += 2;
1519 }
1520 else
1521 {
1522 fprintf(outfile, "\\x%02x ", i);
1523 c += 5;
1524 }
1525 }
1526 }
1527 fprintf(outfile, "\n");
1528 }
1529 }
1530 }
1531 }
1532
1533 /* If the '>' option was present, we write out the regex to a file, and
1534 that is all. The first 8 bytes of the file are the regex length and then
1535 the study length, in big-endian order. */
1536
1537 if (to_file != NULL)
1538 {
1539 FILE *f = fopen((char *)to_file, "wb");
1540 if (f == NULL)
1541 {
1542 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1543 }
1544 else
1545 {
1546 uschar sbuf[8];
1547 sbuf[0] = (true_size >> 24) & 255;
1548 sbuf[1] = (true_size >> 16) & 255;
1549 sbuf[2] = (true_size >> 8) & 255;
1550 sbuf[3] = (true_size) & 255;
1551
1552 sbuf[4] = (true_study_size >> 24) & 255;
1553 sbuf[5] = (true_study_size >> 16) & 255;
1554 sbuf[6] = (true_study_size >> 8) & 255;
1555 sbuf[7] = (true_study_size) & 255;
1556
1557 if (fwrite(sbuf, 1, 8, f) < 8 ||
1558 fwrite(re, 1, true_size, f) < true_size)
1559 {
1560 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1561 }
1562 else
1563 {
1564 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1565 if (extra != NULL)
1566 {
1567 if (fwrite(extra->study_data, 1, true_study_size, f) <
1568 true_study_size)
1569 {
1570 fprintf(outfile, "Write error on %s: %s\n", to_file,
1571 strerror(errno));
1572 }
1573 else fprintf(outfile, "Study data written to %s\n", to_file);
1574
1575 }
1576 }
1577 fclose(f);
1578 }
1579
1580 new_free(re);
1581 if (extra != NULL) new_free(extra);
1582 if (tables != NULL) new_free((void *)tables);
1583 continue; /* With next regex */
1584 }
1585 } /* End of non-POSIX compile */
1586
1587 /* Read data lines and test them */
1588
1589 for (;;)
1590 {
1591 uschar *q;
1592 uschar *bptr = dbuffer;
1593 int *use_offsets = offsets;
1594 int use_size_offsets = size_offsets;
1595 int callout_data = 0;
1596 int callout_data_set = 0;
1597 int count, c;
1598 int copystrings = 0;
1599 int find_match_limit = 0;
1600 int getstrings = 0;
1601 int getlist = 0;
1602 int gmatched = 0;
1603 int start_offset = 0;
1604 int g_notempty = 0;
1605 int use_dfa = 0;
1606
1607 options = 0;
1608
1609 *copynames = 0;
1610 *getnames = 0;
1611
1612 copynamesptr = copynames;
1613 getnamesptr = getnames;
1614
1615 pcre_callout = callout;
1616 first_callout = 1;
1617 callout_extra = 0;
1618 callout_count = 0;
1619 callout_fail_count = 999999;
1620 callout_fail_id = -1;
1621 show_malloc = 0;
1622
1623 if (extra != NULL) extra->flags &=
1624 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1625
1626 len = 0;
1627 for (;;)
1628 {
1629 if (infile == stdin) printf("data> ");
1630 if (extend_inputline(infile, buffer + len) == NULL)
1631 {
1632 if (len > 0) break;
1633 done = 1;
1634 goto CONTINUE;
1635 }
1636 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1637 len = (int)strlen((char *)buffer);
1638 if (buffer[len-1] == '\n') break;
1639 }
1640
1641 while (len > 0 && isspace(buffer[len-1])) len--;
1642 buffer[len] = 0;
1643 if (len == 0) break;
1644
1645 p = buffer;
1646 while (isspace(*p)) p++;
1647
1648 q = dbuffer;
1649 while ((c = *p++) != 0)
1650 {
1651 int i = 0;
1652 int n = 0;
1653
1654 if (c == '\\') switch ((c = *p++))
1655 {
1656 case 'a': c = 7; break;
1657 case 'b': c = '\b'; break;
1658 case 'e': c = 27; break;
1659 case 'f': c = '\f'; break;
1660 case 'n': c = '\n'; break;
1661 case 'r': c = '\r'; break;
1662 case 't': c = '\t'; break;
1663 case 'v': c = '\v'; break;
1664
1665 case '0': case '1': case '2': case '3':
1666 case '4': case '5': case '6': case '7':
1667 c -= '0';
1668 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1669 c = c * 8 + *p++ - '0';
1670
1671 #if !defined NOUTF8
1672 if (use_utf8 && c > 255)
1673 {
1674 unsigned char buff8[8];
1675 int ii, utn;
1676 utn = ord2utf8(c, buff8);
1677 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1678 c = buff8[ii]; /* Last byte */
1679 }
1680 #endif
1681 break;
1682
1683 case 'x':
1684
1685 /* Handle \x{..} specially - new Perl thing for utf8 */
1686
1687 #if !defined NOUTF8
1688 if (*p == '{')
1689 {
1690 unsigned char *pt = p;
1691 c = 0;
1692 while (isxdigit(*(++pt)))
1693 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1694 if (*pt == '}')
1695 {
1696 unsigned char buff8[8];
1697 int ii, utn;
1698 utn = ord2utf8(c, buff8);
1699 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1700 c = buff8[ii]; /* Last byte */
1701 p = pt + 1;
1702 break;
1703 }
1704 /* Not correct form; fall through */
1705 }
1706 #endif
1707
1708 /* Ordinary \x */
1709
1710 c = 0;
1711 while (i++ < 2 && isxdigit(*p))
1712 {
1713 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1714 p++;
1715 }
1716 break;
1717
1718 case 0: /* \ followed by EOF allows for an empty line */
1719 p--;
1720 continue;
1721
1722 case '>':
1723 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1724 continue;
1725
1726 case 'A': /* Option setting */
1727 options |= PCRE_ANCHORED;
1728 continue;
1729
1730 case 'B':
1731 options |= PCRE_NOTBOL;
1732 continue;
1733
1734 case 'C':
1735 if (isdigit(*p)) /* Set copy string */
1736 {
1737 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1738 copystrings |= 1 << n;
1739 }
1740 else if (isalnum(*p))
1741 {
1742 uschar *npp = copynamesptr;
1743 while (isalnum(*p)) *npp++ = *p++;
1744 *npp++ = 0;
1745 *npp = 0;
1746 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1747 if (n < 0)
1748 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1749 copynamesptr = npp;
1750 }
1751 else if (*p == '+')
1752 {
1753 callout_extra = 1;
1754 p++;
1755 }
1756 else if (*p == '-')
1757 {
1758 pcre_callout = NULL;
1759 p++;
1760 }
1761 else if (*p == '!')
1762 {
1763 callout_fail_id = 0;
1764 p++;
1765 while(isdigit(*p))
1766 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1767 callout_fail_count = 0;
1768 if (*p == '!')
1769 {
1770 p++;
1771 while(isdigit(*p))
1772 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1773 }
1774 }
1775 else if (*p == '*')
1776 {
1777 int sign = 1;
1778 callout_data = 0;
1779 if (*(++p) == '-') { sign = -1; p++; }
1780 while(isdigit(*p))
1781 callout_data = callout_data * 10 + *p++ - '0';
1782 callout_data *= sign;
1783 callout_data_set = 1;
1784 }
1785 continue;
1786
1787 #if !defined NODFA
1788 case 'D':
1789 #if !defined NOPOSIX
1790 if (posix || do_posix)
1791 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1792 else
1793 #endif
1794 use_dfa = 1;
1795 continue;
1796
1797 case 'F':
1798 options |= PCRE_DFA_SHORTEST;
1799 continue;
1800 #endif
1801
1802 case 'G':
1803 if (isdigit(*p))
1804 {
1805 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1806 getstrings |= 1 << n;
1807 }
1808 else if (isalnum(*p))
1809 {
1810 uschar *npp = getnamesptr;
1811 while (isalnum(*p)) *npp++ = *p++;
1812 *npp++ = 0;
1813 *npp = 0;
1814 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1815 if (n < 0)
1816 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1817 getnamesptr = npp;
1818 }
1819 continue;
1820
1821 case 'L':
1822 getlist = 1;
1823 continue;
1824
1825 case 'M':
1826 find_match_limit = 1;
1827 continue;
1828
1829 case 'N':
1830 options |= PCRE_NOTEMPTY;
1831 continue;
1832
1833 case 'O':
1834 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1835 if (n > size_offsets_max)
1836 {
1837 size_offsets_max = n;
1838 free(offsets);
1839 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1840 if (offsets == NULL)
1841 {
1842 printf("** Failed to get %d bytes of memory for offsets vector\n",
1843 size_offsets_max * sizeof(int));
1844 yield = 1;
1845 goto EXIT;
1846 }
1847 }
1848 use_size_offsets = n;
1849 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1850 continue;
1851
1852 case 'P':
1853 options |= PCRE_PARTIAL;
1854 continue;
1855
1856 case 'Q':
1857 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1858 if (extra == NULL)
1859 {
1860 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1861 extra->flags = 0;
1862 }
1863 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1864 extra->match_limit_recursion = n;
1865 continue;
1866
1867 case 'q':
1868 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1869 if (extra == NULL)
1870 {
1871 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1872 extra->flags = 0;
1873 }
1874 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1875 extra->match_limit = n;
1876 continue;
1877
1878 #if !defined NODFA
1879 case 'R':
1880 options |= PCRE_DFA_RESTART;
1881 continue;
1882 #endif
1883
1884 case 'S':
1885 show_malloc = 1;
1886 continue;
1887
1888 case 'Z':
1889 options |= PCRE_NOTEOL;
1890 continue;
1891
1892 case '?':
1893 options |= PCRE_NO_UTF8_CHECK;
1894 continue;
1895
1896 case '<':
1897 {
1898 int x = check_newline(p, outfile);
1899 if (x == 0) goto NEXT_DATA;
1900 options |= x;
1901 while (*p++ != '>');
1902 }
1903 continue;
1904 }
1905 *q++ = c;
1906 }
1907 *q = 0;
1908 len = q - dbuffer;
1909
1910 if ((all_use_dfa || use_dfa) && find_match_limit)
1911 {
1912 printf("**Match limit not relevant for DFA matching: ignored\n");
1913 find_match_limit = 0;
1914 }
1915
1916 /* Handle matching via the POSIX interface, which does not
1917 support timing or playing with the match limit or callout data. */
1918
1919 #if !defined NOPOSIX
1920 if (posix || do_posix)
1921 {
1922 int rc;
1923 int eflags = 0;
1924 regmatch_t *pmatch = NULL;
1925 if (use_size_offsets > 0)
1926 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1927 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1928 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1929
1930 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1931
1932 if (rc != 0)
1933 {
1934 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1935 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1936 }
1937 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1938 != 0)
1939 {
1940 fprintf(outfile, "Matched with REG_NOSUB\n");
1941 }
1942 else
1943 {
1944 size_t i;
1945 for (i = 0; i < (size_t)use_size_offsets; i++)
1946 {
1947 if (pmatch[i].rm_so >= 0)
1948 {
1949 fprintf(outfile, "%2d: ", (int)i);
1950 (void)pchars(dbuffer + pmatch[i].rm_so,
1951 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1952 fprintf(outfile, "\n");
1953 if (i == 0 && do_showrest)
1954 {
1955 fprintf(outfile, " 0+ ");
1956 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1957 outfile);
1958 fprintf(outfile, "\n");
1959 }
1960 }
1961 }
1962 }
1963 free(pmatch);
1964 }
1965
1966 /* Handle matching via the native interface - repeats for /g and /G */
1967
1968 else
1969 #endif /* !defined NOPOSIX */
1970
1971 for (;; gmatched++) /* Loop for /g or /G */
1972 {
1973 if (timeitm > 0)
1974 {
1975 register int i;
1976 clock_t time_taken;
1977 clock_t start_time = clock();
1978
1979 #if !defined NODFA
1980 if (all_use_dfa || use_dfa)
1981 {
1982 int workspace[1000];
1983 for (i = 0; i < timeitm; i++)
1984 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1985 options | g_notempty, use_offsets, use_size_offsets, workspace,
1986 sizeof(workspace)/sizeof(int));
1987 }
1988 else
1989 #endif
1990
1991 for (i = 0; i < timeitm; i++)
1992 count = pcre_exec(re, extra, (char *)bptr, len,
1993 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1994
1995 time_taken = clock() - start_time;
1996 fprintf(outfile, "Execute time %.4f milliseconds\n",
1997 (((double)time_taken * 1000.0) / (double)timeitm) /
1998 (double)CLOCKS_PER_SEC);
1999 }
2000
2001 /* If find_match_limit is set, we want to do repeated matches with
2002 varying limits in order to find the minimum value for the match limit and
2003 for the recursion limit. */
2004
2005 if (find_match_limit)
2006 {
2007 if (extra == NULL)
2008 {
2009 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2010 extra->flags = 0;
2011 }
2012
2013 (void)check_match_limit(re, extra, bptr, len, start_offset,
2014 options|g_notempty, use_offsets, use_size_offsets,
2015 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2016 PCRE_ERROR_MATCHLIMIT, "match()");
2017
2018 count = check_match_limit(re, extra, bptr, len, start_offset,
2019 options|g_notempty, use_offsets, use_size_offsets,
2020 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2021 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2022 }
2023
2024 /* If callout_data is set, use the interface with additional data */
2025
2026 else if (callout_data_set)
2027 {
2028 if (extra == NULL)
2029 {
2030 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2031 extra->flags = 0;
2032 }
2033 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2034 extra->callout_data = &callout_data;
2035 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2036 options | g_notempty, use_offsets, use_size_offsets);
2037 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2038 }
2039
2040 /* The normal case is just to do the match once, with the default
2041 value of match_limit. */
2042
2043 #if !defined NODFA
2044 else if (all_use_dfa || use_dfa)
2045 {
2046 int workspace[1000];
2047 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2048 options | g_notempty, use_offsets, use_size_offsets, workspace,
2049 sizeof(workspace)/sizeof(int));
2050 if (count == 0)
2051 {
2052 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2053 count = use_size_offsets/2;
2054 }
2055 }
2056 #endif
2057
2058 else
2059 {
2060 count = pcre_exec(re, extra, (char *)bptr, len,
2061 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2062 if (count == 0)
2063 {
2064 fprintf(outfile, "Matched, but too many substrings\n");
2065 count = use_size_offsets/3;
2066 }
2067 }
2068
2069 /* Matched */
2070
2071 if (count >= 0)
2072 {
2073 int i, maxcount;
2074
2075 #if !defined NODFA
2076 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2077 #endif
2078 maxcount = use_size_offsets/3;
2079
2080 /* This is a check against a lunatic return value. */
2081
2082 if (count > maxcount)
2083 {
2084 fprintf(outfile,
2085 "** PCRE error: returned count %d is too big for offset size %d\n",
2086 count, use_size_offsets);
2087 count = use_size_offsets/3;
2088 if (do_g || do_G)
2089 {
2090 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2091 do_g = do_G = FALSE; /* Break g/G loop */
2092 }
2093 }
2094
2095 for (i = 0; i < count * 2; i += 2)
2096 {
2097 if (use_offsets[i] < 0)
2098 fprintf(outfile, "%2d: <unset>\n", i/2);
2099 else
2100 {
2101 fprintf(outfile, "%2d: ", i/2);
2102 (void)pchars(bptr + use_offsets[i],
2103 use_offsets[i+1] - use_offsets[i], outfile);
2104 fprintf(outfile, "\n");
2105 if (i == 0)
2106 {
2107 if (do_showrest)
2108 {
2109 fprintf(outfile, " 0+ ");
2110 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2111 outfile);
2112 fprintf(outfile, "\n");
2113 }
2114 }
2115 }
2116 }
2117
2118 for (i = 0; i < 32; i++)
2119 {
2120 if ((copystrings & (1 << i)) != 0)
2121 {
2122 char copybuffer[256];
2123 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2124 i, copybuffer, sizeof(copybuffer));
2125 if (rc < 0)
2126 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2127 else
2128 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2129 }
2130 }
2131
2132 for (copynamesptr = copynames;
2133 *copynamesptr != 0;
2134 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2135 {
2136 char copybuffer[256];
2137 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2138 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2139 if (rc < 0)
2140 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2141 else
2142 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2143 }
2144
2145 for (i = 0; i < 32; i++)
2146 {
2147 if ((getstrings & (1 << i)) != 0)
2148 {
2149 const char *substring;
2150 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2151 i, &substring);
2152 if (rc < 0)
2153 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2154 else
2155 {
2156 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2157 pcre_free_substring(substring);
2158 }
2159 }
2160 }
2161
2162 for (getnamesptr = getnames;
2163 *getnamesptr != 0;
2164 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2165 {
2166 const char *substring;
2167 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2168 count, (char *)getnamesptr, &substring);
2169 if (rc < 0)
2170 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2171 else
2172 {
2173 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2174 pcre_free_substring(substring);
2175 }
2176 }
2177
2178 if (getlist)
2179 {
2180 const char **stringlist;
2181 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2182 &stringlist);
2183 if (rc < 0)
2184 fprintf(outfile, "get substring list failed %d\n", rc);
2185 else
2186 {
2187 for (i = 0; i < count; i++)
2188 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2189 if (stringlist[i] != NULL)
2190 fprintf(outfile, "string list not terminated by NULL\n");
2191 /* free((void *)stringlist); */
2192 pcre_free_substring_list(stringlist);
2193 }
2194 }
2195 }
2196
2197 /* There was a partial match */
2198
2199 else if (count == PCRE_ERROR_PARTIAL)
2200 {
2201 fprintf(outfile, "Partial match");
2202 #if !defined NODFA
2203 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2204 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2205 bptr + use_offsets[0]);
2206 #endif
2207 fprintf(outfile, "\n");
2208 break; /* Out of the /g loop */
2209 }
2210
2211 /* Failed to match. If this is a /g or /G loop and we previously set
2212 g_notempty after a null match, this is not necessarily the end.
2213 We want to advance the start offset, and continue. In the case of UTF-8
2214 matching, the advance must be one character, not one byte. Fudge the
2215 offset values to achieve this. We won't be at the end of the string -
2216 that was checked before setting g_notempty. */
2217
2218 else
2219 {
2220 if (g_notempty != 0)
2221 {
2222 int onechar = 1;
2223 use_offsets[0] = start_offset;
2224 if (use_utf8)
2225 {
2226 while (start_offset + onechar < len)
2227 {
2228 int tb = bptr[start_offset+onechar];
2229 if (tb <= 127) break;
2230 tb &= 0xc0;
2231 if (tb != 0 && tb != 0xc0) onechar++;
2232 }
2233 }
2234 use_offsets[1] = start_offset + onechar;
2235 }
2236 else
2237 {
2238 if (count == PCRE_ERROR_NOMATCH)
2239 {
2240 if (gmatched == 0) fprintf(outfile, "No match\n");
2241 }
2242 else fprintf(outfile, "Error %d\n", count);
2243 break; /* Out of the /g loop */
2244 }
2245 }
2246
2247 /* If not /g or /G we are done */
2248
2249 if (!do_g && !do_G) break;
2250
2251 /* If we have matched an empty string, first check to see if we are at
2252 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2253 what Perl's /g options does. This turns out to be rather cunning. First
2254 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2255 same point. If this fails (picked up above) we advance to the next
2256 character. */
2257
2258 g_notempty = 0;
2259 if (use_offsets[0] == use_offsets[1])
2260 {
2261 if (use_offsets[0] == len) break;
2262 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2263 }
2264
2265 /* For /g, update the start offset, leaving the rest alone */
2266
2267 if (do_g) start_offset = use_offsets[1];
2268
2269 /* For /G, update the pointer and length */
2270
2271 else
2272 {
2273 bptr += use_offsets[1];
2274 len -= use_offsets[1];
2275 }
2276 } /* End of loop for /g and /G */
2277
2278 NEXT_DATA: continue;
2279 } /* End of loop for data lines */
2280
2281 CONTINUE:
2282
2283 #if !defined NOPOSIX
2284 if (posix || do_posix) regfree(&preg);
2285 #endif
2286
2287 if (re != NULL) new_free(re);
2288 if (extra != NULL) new_free(extra);
2289 if (tables != NULL)
2290 {
2291 new_free((void *)tables);
2292 setlocale(LC_CTYPE, "C");
2293 locale_set = 0;
2294 }
2295 }
2296
2297 if (infile == stdin) fprintf(outfile, "\n");
2298
2299 EXIT:
2300
2301 if (infile != NULL && infile != stdin) fclose(infile);
2302 if (outfile != NULL && outfile != stdout) fclose(outfile);
2303
2304 free(buffer);
2305 free(dbuffer);
2306 free(pbuffer);
2307 free(offsets);
2308
2309 return yield;
2310 }
2311
2312 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12