/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 231 - (show annotations) (download)
Tue Sep 11 11:15:33 2007 UTC (6 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 69873 byte(s)
Add facility to make \R match only CR, LF, or CRLF.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_OP_lengths OP_lengths
98
99 #include "pcre_tables.c"
100
101 /* We also need the pcre_printint() function for printing out compiled
102 patterns. This function is in a separate file so that it can be included in
103 pcre_compile.c when that module is compiled with debugging enabled.
104
105 The definition of the macro PRINTABLE, which determines whether to print an
106 output character as-is or as a hex value when showing compiled patterns, is
107 contained in this file. We uses it here also, in cases when the locale has not
108 been explicitly changed, so as to get consistent output from systems that
109 differ in their output from isprint() even in the "C" locale. */
110
111 #include "pcre_printint.src"
112
113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114
115
116 /* It is possible to compile this test program without including support for
117 testing the POSIX interface, though this is not available via the standard
118 Makefile. */
119
120 #if !defined NOPOSIX
121 #include "pcreposix.h"
122 #endif
123
124 /* It is also possible, for the benefit of the version currently imported into
125 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126 interface to the DFA matcher (NODFA), and without the doublecheck of the old
127 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128 UTF8 support if PCRE is built without it. */
129
130 #ifndef SUPPORT_UTF8
131 #ifndef NOUTF8
132 #define NOUTF8
133 #endif
134 #endif
135
136
137 /* Other parameters */
138
139 #ifndef CLOCKS_PER_SEC
140 #ifdef CLK_TCK
141 #define CLOCKS_PER_SEC CLK_TCK
142 #else
143 #define CLOCKS_PER_SEC 100
144 #endif
145 #endif
146
147 /* This is the default loop count for timing. */
148
149 #define LOOPREPEAT 500000
150
151 /* Static variables */
152
153 static FILE *outfile;
154 static int log_store = 0;
155 static int callout_count;
156 static int callout_extra;
157 static int callout_fail_count;
158 static int callout_fail_id;
159 static int debug_lengths;
160 static int first_callout;
161 static int locale_set = 0;
162 static int show_malloc;
163 static int use_utf8;
164 static size_t gotten_store;
165
166 /* The buffers grow automatically if very long input lines are encountered. */
167
168 static int buffer_size = 50000;
169 static uschar *buffer = NULL;
170 static uschar *dbuffer = NULL;
171 static uschar *pbuffer = NULL;
172
173
174
175 /*************************************************
176 * Read or extend an input line *
177 *************************************************/
178
179 /* Input lines are read into buffer, but both patterns and data lines can be
180 continued over multiple input lines. In addition, if the buffer fills up, we
181 want to automatically expand it so as to be able to handle extremely large
182 lines that are needed for certain stress tests. When the input buffer is
183 expanded, the other two buffers must also be expanded likewise, and the
184 contents of pbuffer, which are a copy of the input for callouts, must be
185 preserved (for when expansion happens for a data line). This is not the most
186 optimal way of handling this, but hey, this is just a test program!
187
188 Arguments:
189 f the file to read
190 start where in buffer to start (this *must* be within buffer)
191
192 Returns: pointer to the start of new data
193 could be a copy of start, or could be moved
194 NULL if no data read and EOF reached
195 */
196
197 static uschar *
198 extend_inputline(FILE *f, uschar *start)
199 {
200 uschar *here = start;
201
202 for (;;)
203 {
204 int rlen = buffer_size - (here - buffer);
205
206 if (rlen > 1000)
207 {
208 int dlen;
209 if (fgets((char *)here, rlen, f) == NULL)
210 return (here == start)? NULL : start;
211 dlen = (int)strlen((char *)here);
212 if (dlen > 0 && here[dlen - 1] == '\n') return start;
213 here += dlen;
214 }
215
216 else
217 {
218 int new_buffer_size = 2*buffer_size;
219 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222
223 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224 {
225 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226 exit(1);
227 }
228
229 memcpy(new_buffer, buffer, buffer_size);
230 memcpy(new_pbuffer, pbuffer, buffer_size);
231
232 buffer_size = new_buffer_size;
233
234 start = new_buffer + (start - buffer);
235 here = new_buffer + (here - buffer);
236
237 free(buffer);
238 free(dbuffer);
239 free(pbuffer);
240
241 buffer = new_buffer;
242 dbuffer = new_dbuffer;
243 pbuffer = new_pbuffer;
244 }
245 }
246
247 return NULL; /* Control never gets here */
248 }
249
250
251
252
253
254
255
256 /*************************************************
257 * Read number from string *
258 *************************************************/
259
260 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261 around with conditional compilation, just do the job by hand. It is only used
262 for unpicking arguments, so just keep it simple.
263
264 Arguments:
265 str string to be converted
266 endptr where to put the end pointer
267
268 Returns: the unsigned long
269 */
270
271 static int
272 get_value(unsigned char *str, unsigned char **endptr)
273 {
274 int result = 0;
275 while(*str != 0 && isspace(*str)) str++;
276 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277 *endptr = str;
278 return(result);
279 }
280
281
282
283
284 /*************************************************
285 * Convert UTF-8 string to value *
286 *************************************************/
287
288 /* This function takes one or more bytes that represents a UTF-8 character,
289 and returns the value of the character.
290
291 Argument:
292 utf8bytes a pointer to the byte vector
293 vptr a pointer to an int to receive the value
294
295 Returns: > 0 => the number of bytes consumed
296 -6 to 0 => malformed UTF-8 character at offset = (-return)
297 */
298
299 #if !defined NOUTF8
300
301 static int
302 utf82ord(unsigned char *utf8bytes, int *vptr)
303 {
304 int c = *utf8bytes++;
305 int d = c;
306 int i, j, s;
307
308 for (i = -1; i < 6; i++) /* i is number of additional bytes */
309 {
310 if ((d & 0x80) == 0) break;
311 d <<= 1;
312 }
313
314 if (i == -1) { *vptr = c; return 1; } /* ascii character */
315 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316
317 /* i now has a value in the range 1-5 */
318
319 s = 6*i;
320 d = (c & utf8_table3[i]) << s;
321
322 for (j = 0; j < i; j++)
323 {
324 c = *utf8bytes++;
325 if ((c & 0xc0) != 0x80) return -(j+1);
326 s -= 6;
327 d |= (c & 0x3f) << s;
328 }
329
330 /* Check that encoding was the correct unique one */
331
332 for (j = 0; j < utf8_table1_size; j++)
333 if (d <= utf8_table1[j]) break;
334 if (j != i) return -(i+1);
335
336 /* Valid value */
337
338 *vptr = d;
339 return i+1;
340 }
341
342 #endif
343
344
345
346 /*************************************************
347 * Convert character value to UTF-8 *
348 *************************************************/
349
350 /* This function takes an integer value in the range 0 - 0x7fffffff
351 and encodes it as a UTF-8 character in 0 to 6 bytes.
352
353 Arguments:
354 cvalue the character value
355 utf8bytes pointer to buffer for result - at least 6 bytes long
356
357 Returns: number of characters placed in the buffer
358 */
359
360 #if !defined NOUTF8
361
362 static int
363 ord2utf8(int cvalue, uschar *utf8bytes)
364 {
365 register int i, j;
366 for (i = 0; i < utf8_table1_size; i++)
367 if (cvalue <= utf8_table1[i]) break;
368 utf8bytes += i;
369 for (j = i; j > 0; j--)
370 {
371 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 cvalue >>= 6;
373 }
374 *utf8bytes = utf8_table2[i] | cvalue;
375 return i + 1;
376 }
377
378 #endif
379
380
381
382 /*************************************************
383 * Print character string *
384 *************************************************/
385
386 /* Character string printing function. Must handle UTF-8 strings in utf8
387 mode. Yields number of characters printed. If handed a NULL file, just counts
388 chars without printing. */
389
390 static int pchars(unsigned char *p, int length, FILE *f)
391 {
392 int c = 0;
393 int yield = 0;
394
395 while (length-- > 0)
396 {
397 #if !defined NOUTF8
398 if (use_utf8)
399 {
400 int rc = utf82ord(p, &c);
401
402 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403 {
404 length -= rc - 1;
405 p += rc;
406 if (PRINTHEX(c))
407 {
408 if (f != NULL) fprintf(f, "%c", c);
409 yield++;
410 }
411 else
412 {
413 int n = 4;
414 if (f != NULL) fprintf(f, "\\x{%02x}", c);
415 yield += (n <= 0x000000ff)? 2 :
416 (n <= 0x00000fff)? 3 :
417 (n <= 0x0000ffff)? 4 :
418 (n <= 0x000fffff)? 5 : 6;
419 }
420 continue;
421 }
422 }
423 #endif
424
425 /* Not UTF-8, or malformed UTF-8 */
426
427 c = *p++;
428 if (PRINTHEX(c))
429 {
430 if (f != NULL) fprintf(f, "%c", c);
431 yield++;
432 }
433 else
434 {
435 if (f != NULL) fprintf(f, "\\x%02x", c);
436 yield += 4;
437 }
438 }
439
440 return yield;
441 }
442
443
444
445 /*************************************************
446 * Callout function *
447 *************************************************/
448
449 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450 the match. Yield zero unless more callouts than the fail count, or the callout
451 data is not zero. */
452
453 static int callout(pcre_callout_block *cb)
454 {
455 FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 int i, pre_start, post_start, subject_length;
457
458 if (callout_extra)
459 {
460 fprintf(f, "Callout %d: last capture = %d\n",
461 cb->callout_number, cb->capture_last);
462
463 for (i = 0; i < cb->capture_top * 2; i += 2)
464 {
465 if (cb->offset_vector[i] < 0)
466 fprintf(f, "%2d: <unset>\n", i/2);
467 else
468 {
469 fprintf(f, "%2d: ", i/2);
470 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471 cb->offset_vector[i+1] - cb->offset_vector[i], f);
472 fprintf(f, "\n");
473 }
474 }
475 }
476
477 /* Re-print the subject in canonical form, the first time or if giving full
478 datails. On subsequent calls in the same match, we use pchars just to find the
479 printed lengths of the substrings. */
480
481 if (f != NULL) fprintf(f, "--->");
482
483 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485 cb->current_position - cb->start_match, f);
486
487 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488
489 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490 cb->subject_length - cb->current_position, f);
491
492 if (f != NULL) fprintf(f, "\n");
493
494 /* Always print appropriate indicators, with callout number if not already
495 shown. For automatic callouts, show the pattern offset. */
496
497 if (cb->callout_number == 255)
498 {
499 fprintf(outfile, "%+3d ", cb->pattern_position);
500 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501 }
502 else
503 {
504 if (callout_extra) fprintf(outfile, " ");
505 else fprintf(outfile, "%3d ", cb->callout_number);
506 }
507
508 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510
511 if (post_start > 0)
512 {
513 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514 fprintf(outfile, "^");
515 }
516
517 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518 fprintf(outfile, " ");
519
520 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521 pbuffer + cb->pattern_position);
522
523 fprintf(outfile, "\n");
524 first_callout = 0;
525
526 if (cb->callout_data != NULL)
527 {
528 int callout_data = *((int *)(cb->callout_data));
529 if (callout_data != 0)
530 {
531 fprintf(outfile, "Callout data = %d\n", callout_data);
532 return callout_data;
533 }
534 }
535
536 return (cb->callout_number != callout_fail_id)? 0 :
537 (++callout_count >= callout_fail_count)? 1 : 0;
538 }
539
540
541 /*************************************************
542 * Local malloc functions *
543 *************************************************/
544
545 /* Alternative malloc function, to test functionality and show the size of the
546 compiled re. */
547
548 static void *new_malloc(size_t size)
549 {
550 void *block = malloc(size);
551 gotten_store = size;
552 if (show_malloc)
553 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 return block;
555 }
556
557 static void new_free(void *block)
558 {
559 if (show_malloc)
560 fprintf(outfile, "free %p\n", block);
561 free(block);
562 }
563
564
565 /* For recursion malloc/free, to test stacking calls */
566
567 static void *stack_malloc(size_t size)
568 {
569 void *block = malloc(size);
570 if (show_malloc)
571 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 return block;
573 }
574
575 static void stack_free(void *block)
576 {
577 if (show_malloc)
578 fprintf(outfile, "stack_free %p\n", block);
579 free(block);
580 }
581
582
583 /*************************************************
584 * Call pcre_fullinfo() *
585 *************************************************/
586
587 /* Get one piece of information from the pcre_fullinfo() function */
588
589 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590 {
591 int rc;
592 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594 }
595
596
597
598 /*************************************************
599 * Byte flipping function *
600 *************************************************/
601
602 static unsigned long int
603 byteflip(unsigned long int value, int n)
604 {
605 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606 return ((value & 0x000000ff) << 24) |
607 ((value & 0x0000ff00) << 8) |
608 ((value & 0x00ff0000) >> 8) |
609 ((value & 0xff000000) >> 24);
610 }
611
612
613
614
615 /*************************************************
616 * Check match or recursion limit *
617 *************************************************/
618
619 static int
620 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621 int start_offset, int options, int *use_offsets, int use_size_offsets,
622 int flag, unsigned long int *limit, int errnumber, const char *msg)
623 {
624 int count;
625 int min = 0;
626 int mid = 64;
627 int max = -1;
628
629 extra->flags |= flag;
630
631 for (;;)
632 {
633 *limit = mid;
634
635 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636 use_offsets, use_size_offsets);
637
638 if (count == errnumber)
639 {
640 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641 min = mid;
642 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643 }
644
645 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646 count == PCRE_ERROR_PARTIAL)
647 {
648 if (mid == min + 1)
649 {
650 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651 break;
652 }
653 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654 max = mid;
655 mid = (min + mid)/2;
656 }
657 else break; /* Some other error */
658 }
659
660 extra->flags &= ~flag;
661 return count;
662 }
663
664
665
666 /*************************************************
667 * Case-independent strncmp() function *
668 *************************************************/
669
670 /*
671 Arguments:
672 s first string
673 t second string
674 n number of characters to compare
675
676 Returns: < 0, = 0, or > 0, according to the comparison
677 */
678
679 static int
680 strncmpic(uschar *s, uschar *t, int n)
681 {
682 while (n--)
683 {
684 int c = tolower(*s++) - tolower(*t++);
685 if (c) return c;
686 }
687 return 0;
688 }
689
690
691
692 /*************************************************
693 * Check newline indicator *
694 *************************************************/
695
696 /* This is used both at compile and run-time to check for <xxx> escapes, where
697 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698 no match.
699
700 Arguments:
701 p points after the leading '<'
702 f file for error message
703
704 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
705 */
706
707 static int
708 check_newline(uschar *p, FILE *f)
709 {
710 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
716 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
717 fprintf(f, "Unknown newline type at: <%s\n", p);
718 return 0;
719 }
720
721
722
723 /*************************************************
724 * Usage function *
725 *************************************************/
726
727 static void
728 usage(void)
729 {
730 printf("Usage: pcretest [options] [<input> [<output>]]\n");
731 printf(" -b show compiled code (bytecode)\n");
732 printf(" -C show PCRE compile-time options and exit\n");
733 printf(" -d debug: show compiled code and information (-b and -i)\n");
734 #if !defined NODFA
735 printf(" -dfa force DFA matching for all subjects\n");
736 #endif
737 printf(" -help show usage information\n");
738 printf(" -i show information about compiled patterns\n"
739 " -m output memory used information\n"
740 " -o <n> set size of offsets vector to <n>\n");
741 #if !defined NOPOSIX
742 printf(" -p use POSIX interface\n");
743 #endif
744 printf(" -q quiet: do not output PCRE version number at start\n");
745 printf(" -S <n> set stack size to <n> megabytes\n");
746 printf(" -s output store (memory) used information\n"
747 " -t time compilation and execution\n");
748 printf(" -t <n> time compilation and execution, repeating <n> times\n");
749 printf(" -tm time execution (matching) only\n");
750 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
751 }
752
753
754
755 /*************************************************
756 * Main Program *
757 *************************************************/
758
759 /* Read lines from named file or stdin and write to named file or stdout; lines
760 consist of a regular expression, in delimiters and optionally followed by
761 options, followed by a set of test data, terminated by an empty line. */
762
763 int main(int argc, char **argv)
764 {
765 FILE *infile = stdin;
766 int options = 0;
767 int study_options = 0;
768 int op = 1;
769 int timeit = 0;
770 int timeitm = 0;
771 int showinfo = 0;
772 int showstore = 0;
773 int quiet = 0;
774 int size_offsets = 45;
775 int size_offsets_max;
776 int *offsets = NULL;
777 #if !defined NOPOSIX
778 int posix = 0;
779 #endif
780 int debug = 0;
781 int done = 0;
782 int all_use_dfa = 0;
783 int yield = 0;
784 int stack_size;
785
786 /* These vectors store, end-to-end, a list of captured substring names. Assume
787 that 1024 is plenty long enough for the few names we'll be testing. */
788
789 uschar copynames[1024];
790 uschar getnames[1024];
791
792 uschar *copynamesptr;
793 uschar *getnamesptr;
794
795 /* Get buffers from malloc() so that Electric Fence will check their misuse
796 when I am debugging. They grow automatically when very long lines are read. */
797
798 buffer = (unsigned char *)malloc(buffer_size);
799 dbuffer = (unsigned char *)malloc(buffer_size);
800 pbuffer = (unsigned char *)malloc(buffer_size);
801
802 /* The outfile variable is static so that new_malloc can use it. */
803
804 outfile = stdout;
805
806 /* The following _setmode() stuff is some Windows magic that tells its runtime
807 library to translate CRLF into a single LF character. At least, that's what
808 I've been told: never having used Windows I take this all on trust. Originally
809 it set 0x8000, but then I was advised that _O_BINARY was better. */
810
811 #if defined(_WIN32) || defined(WIN32)
812 _setmode( _fileno( stdout ), _O_BINARY );
813 #endif
814
815 /* Scan options */
816
817 while (argc > 1 && argv[op][0] == '-')
818 {
819 unsigned char *endptr;
820
821 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
822 showstore = 1;
823 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
824 else if (strcmp(argv[op], "-b") == 0) debug = 1;
825 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
826 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
827 #if !defined NODFA
828 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
829 #endif
830 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
831 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
832 *endptr == 0))
833 {
834 op++;
835 argc--;
836 }
837 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
838 {
839 int both = argv[op][2] == 0;
840 int temp;
841 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
842 *endptr == 0))
843 {
844 timeitm = temp;
845 op++;
846 argc--;
847 }
848 else timeitm = LOOPREPEAT;
849 if (both) timeit = timeitm;
850 }
851 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
852 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
853 *endptr == 0))
854 {
855 #if defined(_WIN32) || defined(WIN32)
856 printf("PCRE: -S not supported on this OS\n");
857 exit(1);
858 #else
859 int rc;
860 struct rlimit rlim;
861 getrlimit(RLIMIT_STACK, &rlim);
862 rlim.rlim_cur = stack_size * 1024 * 1024;
863 rc = setrlimit(RLIMIT_STACK, &rlim);
864 if (rc != 0)
865 {
866 printf("PCRE: setrlimit() failed with error %d\n", rc);
867 exit(1);
868 }
869 op++;
870 argc--;
871 #endif
872 }
873 #if !defined NOPOSIX
874 else if (strcmp(argv[op], "-p") == 0) posix = 1;
875 #endif
876 else if (strcmp(argv[op], "-C") == 0)
877 {
878 int rc;
879 printf("PCRE version %s\n", pcre_version());
880 printf("Compiled with\n");
881 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
882 printf(" %sUTF-8 support\n", rc? "" : "No ");
883 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
884 printf(" %sUnicode properties support\n", rc? "" : "No ");
885 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
886 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
887 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
888 (rc == -2)? "ANYCRLF" :
889 (rc == -1)? "ANY" : "???");
890 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
891 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
892 "all Unicode newlines");
893 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
894 printf(" Internal link size = %d\n", rc);
895 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
896 printf(" POSIX malloc threshold = %d\n", rc);
897 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
898 printf(" Default match limit = %d\n", rc);
899 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
900 printf(" Default recursion depth limit = %d\n", rc);
901 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
902 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
903 goto EXIT;
904 }
905 else if (strcmp(argv[op], "-help") == 0 ||
906 strcmp(argv[op], "--help") == 0)
907 {
908 usage();
909 goto EXIT;
910 }
911 else
912 {
913 printf("** Unknown or malformed option %s\n", argv[op]);
914 usage();
915 yield = 1;
916 goto EXIT;
917 }
918 op++;
919 argc--;
920 }
921
922 /* Get the store for the offsets vector, and remember what it was */
923
924 size_offsets_max = size_offsets;
925 offsets = (int *)malloc(size_offsets_max * sizeof(int));
926 if (offsets == NULL)
927 {
928 printf("** Failed to get %d bytes of memory for offsets vector\n",
929 (int)(size_offsets_max * sizeof(int)));
930 yield = 1;
931 goto EXIT;
932 }
933
934 /* Sort out the input and output files */
935
936 if (argc > 1)
937 {
938 infile = fopen(argv[op], INPUT_MODE);
939 if (infile == NULL)
940 {
941 printf("** Failed to open %s\n", argv[op]);
942 yield = 1;
943 goto EXIT;
944 }
945 }
946
947 if (argc > 2)
948 {
949 outfile = fopen(argv[op+1], OUTPUT_MODE);
950 if (outfile == NULL)
951 {
952 printf("** Failed to open %s\n", argv[op+1]);
953 yield = 1;
954 goto EXIT;
955 }
956 }
957
958 /* Set alternative malloc function */
959
960 pcre_malloc = new_malloc;
961 pcre_free = new_free;
962 pcre_stack_malloc = stack_malloc;
963 pcre_stack_free = stack_free;
964
965 /* Heading line unless quiet, then prompt for first regex if stdin */
966
967 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
968
969 /* Main loop */
970
971 while (!done)
972 {
973 pcre *re = NULL;
974 pcre_extra *extra = NULL;
975
976 #if !defined NOPOSIX /* There are still compilers that require no indent */
977 regex_t preg;
978 int do_posix = 0;
979 #endif
980
981 const char *error;
982 unsigned char *p, *pp, *ppp;
983 unsigned char *to_file = NULL;
984 const unsigned char *tables = NULL;
985 unsigned long int true_size, true_study_size = 0;
986 size_t size, regex_gotten_store;
987 int do_study = 0;
988 int do_debug = debug;
989 int do_G = 0;
990 int do_g = 0;
991 int do_showinfo = showinfo;
992 int do_showrest = 0;
993 int do_flip = 0;
994 int erroroffset, len, delimiter, poffset;
995
996 use_utf8 = 0;
997 debug_lengths = 1;
998
999 if (infile == stdin) printf(" re> ");
1000 if (extend_inputline(infile, buffer) == NULL) break;
1001 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1002 fflush(outfile);
1003
1004 p = buffer;
1005 while (isspace(*p)) p++;
1006 if (*p == 0) continue;
1007
1008 /* See if the pattern is to be loaded pre-compiled from a file. */
1009
1010 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1011 {
1012 unsigned long int magic, get_options;
1013 uschar sbuf[8];
1014 FILE *f;
1015
1016 p++;
1017 pp = p + (int)strlen((char *)p);
1018 while (isspace(pp[-1])) pp--;
1019 *pp = 0;
1020
1021 f = fopen((char *)p, "rb");
1022 if (f == NULL)
1023 {
1024 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1025 continue;
1026 }
1027
1028 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1029
1030 true_size =
1031 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1032 true_study_size =
1033 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1034
1035 re = (real_pcre *)new_malloc(true_size);
1036 regex_gotten_store = gotten_store;
1037
1038 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1039
1040 magic = ((real_pcre *)re)->magic_number;
1041 if (magic != MAGIC_NUMBER)
1042 {
1043 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1044 {
1045 do_flip = 1;
1046 }
1047 else
1048 {
1049 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1050 fclose(f);
1051 continue;
1052 }
1053 }
1054
1055 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1056 do_flip? " (byte-inverted)" : "", p);
1057
1058 /* Need to know if UTF-8 for printing data strings */
1059
1060 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1061 use_utf8 = (get_options & PCRE_UTF8) != 0;
1062
1063 /* Now see if there is any following study data */
1064
1065 if (true_study_size != 0)
1066 {
1067 pcre_study_data *psd;
1068
1069 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1070 extra->flags = PCRE_EXTRA_STUDY_DATA;
1071
1072 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1073 extra->study_data = psd;
1074
1075 if (fread(psd, 1, true_study_size, f) != true_study_size)
1076 {
1077 FAIL_READ:
1078 fprintf(outfile, "Failed to read data from %s\n", p);
1079 if (extra != NULL) new_free(extra);
1080 if (re != NULL) new_free(re);
1081 fclose(f);
1082 continue;
1083 }
1084 fprintf(outfile, "Study data loaded from %s\n", p);
1085 do_study = 1; /* To get the data output if requested */
1086 }
1087 else fprintf(outfile, "No study data\n");
1088
1089 fclose(f);
1090 goto SHOW_INFO;
1091 }
1092
1093 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1094 the pattern; if is isn't complete, read more. */
1095
1096 delimiter = *p++;
1097
1098 if (isalnum(delimiter) || delimiter == '\\')
1099 {
1100 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1101 goto SKIP_DATA;
1102 }
1103
1104 pp = p;
1105 poffset = p - buffer;
1106
1107 for(;;)
1108 {
1109 while (*pp != 0)
1110 {
1111 if (*pp == '\\' && pp[1] != 0) pp++;
1112 else if (*pp == delimiter) break;
1113 pp++;
1114 }
1115 if (*pp != 0) break;
1116 if (infile == stdin) printf(" > ");
1117 if ((pp = extend_inputline(infile, pp)) == NULL)
1118 {
1119 fprintf(outfile, "** Unexpected EOF\n");
1120 done = 1;
1121 goto CONTINUE;
1122 }
1123 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1124 }
1125
1126 /* The buffer may have moved while being extended; reset the start of data
1127 pointer to the correct relative point in the buffer. */
1128
1129 p = buffer + poffset;
1130
1131 /* If the first character after the delimiter is backslash, make
1132 the pattern end with backslash. This is purely to provide a way
1133 of testing for the error message when a pattern ends with backslash. */
1134
1135 if (pp[1] == '\\') *pp++ = '\\';
1136
1137 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1138 for callouts. */
1139
1140 *pp++ = 0;
1141 strcpy((char *)pbuffer, (char *)p);
1142
1143 /* Look for options after final delimiter */
1144
1145 options = 0;
1146 study_options = 0;
1147 log_store = showstore; /* default from command line */
1148
1149 while (*pp != 0)
1150 {
1151 switch (*pp++)
1152 {
1153 case 'f': options |= PCRE_FIRSTLINE; break;
1154 case 'g': do_g = 1; break;
1155 case 'i': options |= PCRE_CASELESS; break;
1156 case 'm': options |= PCRE_MULTILINE; break;
1157 case 's': options |= PCRE_DOTALL; break;
1158 case 'x': options |= PCRE_EXTENDED; break;
1159
1160 case '+': do_showrest = 1; break;
1161 case 'A': options |= PCRE_ANCHORED; break;
1162 case 'B': do_debug = 1; break;
1163 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1164 case 'D': do_debug = do_showinfo = 1; break;
1165 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1166 case 'F': do_flip = 1; break;
1167 case 'G': do_G = 1; break;
1168 case 'I': do_showinfo = 1; break;
1169 case 'J': options |= PCRE_DUPNAMES; break;
1170 case 'M': log_store = 1; break;
1171 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1172
1173 #if !defined NOPOSIX
1174 case 'P': do_posix = 1; break;
1175 #endif
1176
1177 case 'S': do_study = 1; break;
1178 case 'U': options |= PCRE_UNGREEDY; break;
1179 case 'X': options |= PCRE_EXTRA; break;
1180 case 'Z': debug_lengths = 0; break;
1181 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1182 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1183
1184 case 'L':
1185 ppp = pp;
1186 /* The '\r' test here is so that it works on Windows. */
1187 /* The '0' test is just in case this is an unterminated line. */
1188 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1189 *ppp = 0;
1190 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1191 {
1192 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1193 goto SKIP_DATA;
1194 }
1195 locale_set = 1;
1196 tables = pcre_maketables();
1197 pp = ppp;
1198 break;
1199
1200 case '>':
1201 to_file = pp;
1202 while (*pp != 0) pp++;
1203 while (isspace(pp[-1])) pp--;
1204 *pp = 0;
1205 break;
1206
1207 case '<':
1208 {
1209 int x = check_newline(pp, outfile);
1210 if (x == 0) goto SKIP_DATA;
1211 options |= x;
1212 while (*pp++ != '>');
1213 }
1214 break;
1215
1216 case '\r': /* So that it works in Windows */
1217 case '\n':
1218 case ' ':
1219 break;
1220
1221 default:
1222 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1223 goto SKIP_DATA;
1224 }
1225 }
1226
1227 /* Handle compiling via the POSIX interface, which doesn't support the
1228 timing, showing, or debugging options, nor the ability to pass over
1229 local character tables. */
1230
1231 #if !defined NOPOSIX
1232 if (posix || do_posix)
1233 {
1234 int rc;
1235 int cflags = 0;
1236
1237 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1238 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1239 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1240 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1241 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1242
1243 rc = regcomp(&preg, (char *)p, cflags);
1244
1245 /* Compilation failed; go back for another re, skipping to blank line
1246 if non-interactive. */
1247
1248 if (rc != 0)
1249 {
1250 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1251 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1252 goto SKIP_DATA;
1253 }
1254 }
1255
1256 /* Handle compiling via the native interface */
1257
1258 else
1259 #endif /* !defined NOPOSIX */
1260
1261 {
1262 if (timeit > 0)
1263 {
1264 register int i;
1265 clock_t time_taken;
1266 clock_t start_time = clock();
1267 for (i = 0; i < timeit; i++)
1268 {
1269 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1270 if (re != NULL) free(re);
1271 }
1272 time_taken = clock() - start_time;
1273 fprintf(outfile, "Compile time %.4f milliseconds\n",
1274 (((double)time_taken * 1000.0) / (double)timeit) /
1275 (double)CLOCKS_PER_SEC);
1276 }
1277
1278 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1279
1280 /* Compilation failed; go back for another re, skipping to blank line
1281 if non-interactive. */
1282
1283 if (re == NULL)
1284 {
1285 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1286 SKIP_DATA:
1287 if (infile != stdin)
1288 {
1289 for (;;)
1290 {
1291 if (extend_inputline(infile, buffer) == NULL)
1292 {
1293 done = 1;
1294 goto CONTINUE;
1295 }
1296 len = (int)strlen((char *)buffer);
1297 while (len > 0 && isspace(buffer[len-1])) len--;
1298 if (len == 0) break;
1299 }
1300 fprintf(outfile, "\n");
1301 }
1302 goto CONTINUE;
1303 }
1304
1305 /* Compilation succeeded; print data if required. There are now two
1306 info-returning functions. The old one has a limited interface and
1307 returns only limited data. Check that it agrees with the newer one. */
1308
1309 if (log_store)
1310 fprintf(outfile, "Memory allocation (code space): %d\n",
1311 (int)(gotten_store -
1312 sizeof(real_pcre) -
1313 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1314
1315 /* Extract the size for possible writing before possibly flipping it,
1316 and remember the store that was got. */
1317
1318 true_size = ((real_pcre *)re)->size;
1319 regex_gotten_store = gotten_store;
1320
1321 /* If /S was present, study the regexp to generate additional info to
1322 help with the matching. */
1323
1324 if (do_study)
1325 {
1326 if (timeit > 0)
1327 {
1328 register int i;
1329 clock_t time_taken;
1330 clock_t start_time = clock();
1331 for (i = 0; i < timeit; i++)
1332 extra = pcre_study(re, study_options, &error);
1333 time_taken = clock() - start_time;
1334 if (extra != NULL) free(extra);
1335 fprintf(outfile, " Study time %.4f milliseconds\n",
1336 (((double)time_taken * 1000.0) / (double)timeit) /
1337 (double)CLOCKS_PER_SEC);
1338 }
1339 extra = pcre_study(re, study_options, &error);
1340 if (error != NULL)
1341 fprintf(outfile, "Failed to study: %s\n", error);
1342 else if (extra != NULL)
1343 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1344 }
1345
1346 /* If the 'F' option was present, we flip the bytes of all the integer
1347 fields in the regex data block and the study block. This is to make it
1348 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1349 compiled on a different architecture. */
1350
1351 if (do_flip)
1352 {
1353 real_pcre *rre = (real_pcre *)re;
1354 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1355 rre->size = byteflip(rre->size, sizeof(rre->size));
1356 rre->options = byteflip(rre->options, sizeof(rre->options));
1357 rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1358 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1359 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1360 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1361 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1362 rre->name_table_offset = byteflip(rre->name_table_offset,
1363 sizeof(rre->name_table_offset));
1364 rre->name_entry_size = byteflip(rre->name_entry_size,
1365 sizeof(rre->name_entry_size));
1366 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1367
1368 if (extra != NULL)
1369 {
1370 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1371 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1372 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1373 }
1374 }
1375
1376 /* Extract information from the compiled data if required */
1377
1378 SHOW_INFO:
1379
1380 if (do_debug)
1381 {
1382 fprintf(outfile, "------------------------------------------------------------------\n");
1383 pcre_printint(re, outfile, debug_lengths);
1384 }
1385
1386 if (do_showinfo)
1387 {
1388 unsigned long int get_options, all_options;
1389 #if !defined NOINFOCHECK
1390 int old_first_char, old_options, old_count;
1391 #endif
1392 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1393 hascrorlf;
1394 int nameentrysize, namecount;
1395 const uschar *nametable;
1396
1397 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1398 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1399 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1400 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1401 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1402 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1403 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1404 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1405 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1406 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1407 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1408 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1409
1410 #if !defined NOINFOCHECK
1411 old_count = pcre_info(re, &old_options, &old_first_char);
1412 if (count < 0) fprintf(outfile,
1413 "Error %d from pcre_info()\n", count);
1414 else
1415 {
1416 if (old_count != count) fprintf(outfile,
1417 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1418 old_count);
1419
1420 if (old_first_char != first_char) fprintf(outfile,
1421 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1422 first_char, old_first_char);
1423
1424 if (old_options != (int)get_options) fprintf(outfile,
1425 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1426 get_options, old_options);
1427 }
1428 #endif
1429
1430 if (size != regex_gotten_store) fprintf(outfile,
1431 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1432 (int)size, (int)regex_gotten_store);
1433
1434 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1435 if (backrefmax > 0)
1436 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1437
1438 if (namecount > 0)
1439 {
1440 fprintf(outfile, "Named capturing subpatterns:\n");
1441 while (namecount-- > 0)
1442 {
1443 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1444 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1445 GET2(nametable, 0));
1446 nametable += nameentrysize;
1447 }
1448 }
1449
1450 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1451 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1452
1453 all_options = ((real_pcre *)re)->options;
1454 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1455
1456 if (get_options == 0) fprintf(outfile, "No options\n");
1457 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1458 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1459 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1460 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1461 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1462 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1463 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1464 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1465 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1466 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1467 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1468 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1469 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1470 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1471 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1472 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1473
1474 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1475
1476 switch (get_options & PCRE_NEWLINE_BITS)
1477 {
1478 case PCRE_NEWLINE_CR:
1479 fprintf(outfile, "Forced newline sequence: CR\n");
1480 break;
1481
1482 case PCRE_NEWLINE_LF:
1483 fprintf(outfile, "Forced newline sequence: LF\n");
1484 break;
1485
1486 case PCRE_NEWLINE_CRLF:
1487 fprintf(outfile, "Forced newline sequence: CRLF\n");
1488 break;
1489
1490 case PCRE_NEWLINE_ANYCRLF:
1491 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1492 break;
1493
1494 case PCRE_NEWLINE_ANY:
1495 fprintf(outfile, "Forced newline sequence: ANY\n");
1496 break;
1497
1498 default:
1499 break;
1500 }
1501
1502 if (first_char == -1)
1503 {
1504 fprintf(outfile, "First char at start or follows newline\n");
1505 }
1506 else if (first_char < 0)
1507 {
1508 fprintf(outfile, "No first char\n");
1509 }
1510 else
1511 {
1512 int ch = first_char & 255;
1513 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1514 "" : " (caseless)";
1515 if (PRINTHEX(ch))
1516 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1517 else
1518 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1519 }
1520
1521 if (need_char < 0)
1522 {
1523 fprintf(outfile, "No need char\n");
1524 }
1525 else
1526 {
1527 int ch = need_char & 255;
1528 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1529 "" : " (caseless)";
1530 if (PRINTHEX(ch))
1531 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1532 else
1533 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1534 }
1535
1536 /* Don't output study size; at present it is in any case a fixed
1537 value, but it varies, depending on the computer architecture, and
1538 so messes up the test suite. (And with the /F option, it might be
1539 flipped.) */
1540
1541 if (do_study)
1542 {
1543 if (extra == NULL)
1544 fprintf(outfile, "Study returned NULL\n");
1545 else
1546 {
1547 uschar *start_bits = NULL;
1548 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1549
1550 if (start_bits == NULL)
1551 fprintf(outfile, "No starting byte set\n");
1552 else
1553 {
1554 int i;
1555 int c = 24;
1556 fprintf(outfile, "Starting byte set: ");
1557 for (i = 0; i < 256; i++)
1558 {
1559 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1560 {
1561 if (c > 75)
1562 {
1563 fprintf(outfile, "\n ");
1564 c = 2;
1565 }
1566 if (PRINTHEX(i) && i != ' ')
1567 {
1568 fprintf(outfile, "%c ", i);
1569 c += 2;
1570 }
1571 else
1572 {
1573 fprintf(outfile, "\\x%02x ", i);
1574 c += 5;
1575 }
1576 }
1577 }
1578 fprintf(outfile, "\n");
1579 }
1580 }
1581 }
1582 }
1583
1584 /* If the '>' option was present, we write out the regex to a file, and
1585 that is all. The first 8 bytes of the file are the regex length and then
1586 the study length, in big-endian order. */
1587
1588 if (to_file != NULL)
1589 {
1590 FILE *f = fopen((char *)to_file, "wb");
1591 if (f == NULL)
1592 {
1593 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1594 }
1595 else
1596 {
1597 uschar sbuf[8];
1598 sbuf[0] = (true_size >> 24) & 255;
1599 sbuf[1] = (true_size >> 16) & 255;
1600 sbuf[2] = (true_size >> 8) & 255;
1601 sbuf[3] = (true_size) & 255;
1602
1603 sbuf[4] = (true_study_size >> 24) & 255;
1604 sbuf[5] = (true_study_size >> 16) & 255;
1605 sbuf[6] = (true_study_size >> 8) & 255;
1606 sbuf[7] = (true_study_size) & 255;
1607
1608 if (fwrite(sbuf, 1, 8, f) < 8 ||
1609 fwrite(re, 1, true_size, f) < true_size)
1610 {
1611 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1612 }
1613 else
1614 {
1615 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1616 if (extra != NULL)
1617 {
1618 if (fwrite(extra->study_data, 1, true_study_size, f) <
1619 true_study_size)
1620 {
1621 fprintf(outfile, "Write error on %s: %s\n", to_file,
1622 strerror(errno));
1623 }
1624 else fprintf(outfile, "Study data written to %s\n", to_file);
1625
1626 }
1627 }
1628 fclose(f);
1629 }
1630
1631 new_free(re);
1632 if (extra != NULL) new_free(extra);
1633 if (tables != NULL) new_free((void *)tables);
1634 continue; /* With next regex */
1635 }
1636 } /* End of non-POSIX compile */
1637
1638 /* Read data lines and test them */
1639
1640 for (;;)
1641 {
1642 uschar *q;
1643 uschar *bptr;
1644 int *use_offsets = offsets;
1645 int use_size_offsets = size_offsets;
1646 int callout_data = 0;
1647 int callout_data_set = 0;
1648 int count, c;
1649 int copystrings = 0;
1650 int find_match_limit = 0;
1651 int getstrings = 0;
1652 int getlist = 0;
1653 int gmatched = 0;
1654 int start_offset = 0;
1655 int g_notempty = 0;
1656 int use_dfa = 0;
1657
1658 options = 0;
1659
1660 *copynames = 0;
1661 *getnames = 0;
1662
1663 copynamesptr = copynames;
1664 getnamesptr = getnames;
1665
1666 pcre_callout = callout;
1667 first_callout = 1;
1668 callout_extra = 0;
1669 callout_count = 0;
1670 callout_fail_count = 999999;
1671 callout_fail_id = -1;
1672 show_malloc = 0;
1673
1674 if (extra != NULL) extra->flags &=
1675 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1676
1677 len = 0;
1678 for (;;)
1679 {
1680 if (infile == stdin) printf("data> ");
1681 if (extend_inputline(infile, buffer + len) == NULL)
1682 {
1683 if (len > 0) break;
1684 done = 1;
1685 goto CONTINUE;
1686 }
1687 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1688 len = (int)strlen((char *)buffer);
1689 if (buffer[len-1] == '\n') break;
1690 }
1691
1692 while (len > 0 && isspace(buffer[len-1])) len--;
1693 buffer[len] = 0;
1694 if (len == 0) break;
1695
1696 p = buffer;
1697 while (isspace(*p)) p++;
1698
1699 bptr = q = dbuffer;
1700 while ((c = *p++) != 0)
1701 {
1702 int i = 0;
1703 int n = 0;
1704
1705 if (c == '\\') switch ((c = *p++))
1706 {
1707 case 'a': c = 7; break;
1708 case 'b': c = '\b'; break;
1709 case 'e': c = 27; break;
1710 case 'f': c = '\f'; break;
1711 case 'n': c = '\n'; break;
1712 case 'r': c = '\r'; break;
1713 case 't': c = '\t'; break;
1714 case 'v': c = '\v'; break;
1715
1716 case '0': case '1': case '2': case '3':
1717 case '4': case '5': case '6': case '7':
1718 c -= '0';
1719 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1720 c = c * 8 + *p++ - '0';
1721
1722 #if !defined NOUTF8
1723 if (use_utf8 && c > 255)
1724 {
1725 unsigned char buff8[8];
1726 int ii, utn;
1727 utn = ord2utf8(c, buff8);
1728 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1729 c = buff8[ii]; /* Last byte */
1730 }
1731 #endif
1732 break;
1733
1734 case 'x':
1735
1736 /* Handle \x{..} specially - new Perl thing for utf8 */
1737
1738 #if !defined NOUTF8
1739 if (*p == '{')
1740 {
1741 unsigned char *pt = p;
1742 c = 0;
1743 while (isxdigit(*(++pt)))
1744 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1745 if (*pt == '}')
1746 {
1747 unsigned char buff8[8];
1748 int ii, utn;
1749 utn = ord2utf8(c, buff8);
1750 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1751 c = buff8[ii]; /* Last byte */
1752 p = pt + 1;
1753 break;
1754 }
1755 /* Not correct form; fall through */
1756 }
1757 #endif
1758
1759 /* Ordinary \x */
1760
1761 c = 0;
1762 while (i++ < 2 && isxdigit(*p))
1763 {
1764 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1765 p++;
1766 }
1767 break;
1768
1769 case 0: /* \ followed by EOF allows for an empty line */
1770 p--;
1771 continue;
1772
1773 case '>':
1774 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1775 continue;
1776
1777 case 'A': /* Option setting */
1778 options |= PCRE_ANCHORED;
1779 continue;
1780
1781 case 'B':
1782 options |= PCRE_NOTBOL;
1783 continue;
1784
1785 case 'C':
1786 if (isdigit(*p)) /* Set copy string */
1787 {
1788 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1789 copystrings |= 1 << n;
1790 }
1791 else if (isalnum(*p))
1792 {
1793 uschar *npp = copynamesptr;
1794 while (isalnum(*p)) *npp++ = *p++;
1795 *npp++ = 0;
1796 *npp = 0;
1797 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1798 if (n < 0)
1799 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1800 copynamesptr = npp;
1801 }
1802 else if (*p == '+')
1803 {
1804 callout_extra = 1;
1805 p++;
1806 }
1807 else if (*p == '-')
1808 {
1809 pcre_callout = NULL;
1810 p++;
1811 }
1812 else if (*p == '!')
1813 {
1814 callout_fail_id = 0;
1815 p++;
1816 while(isdigit(*p))
1817 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1818 callout_fail_count = 0;
1819 if (*p == '!')
1820 {
1821 p++;
1822 while(isdigit(*p))
1823 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1824 }
1825 }
1826 else if (*p == '*')
1827 {
1828 int sign = 1;
1829 callout_data = 0;
1830 if (*(++p) == '-') { sign = -1; p++; }
1831 while(isdigit(*p))
1832 callout_data = callout_data * 10 + *p++ - '0';
1833 callout_data *= sign;
1834 callout_data_set = 1;
1835 }
1836 continue;
1837
1838 #if !defined NODFA
1839 case 'D':
1840 #if !defined NOPOSIX
1841 if (posix || do_posix)
1842 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1843 else
1844 #endif
1845 use_dfa = 1;
1846 continue;
1847
1848 case 'F':
1849 options |= PCRE_DFA_SHORTEST;
1850 continue;
1851 #endif
1852
1853 case 'G':
1854 if (isdigit(*p))
1855 {
1856 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1857 getstrings |= 1 << n;
1858 }
1859 else if (isalnum(*p))
1860 {
1861 uschar *npp = getnamesptr;
1862 while (isalnum(*p)) *npp++ = *p++;
1863 *npp++ = 0;
1864 *npp = 0;
1865 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1866 if (n < 0)
1867 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1868 getnamesptr = npp;
1869 }
1870 continue;
1871
1872 case 'L':
1873 getlist = 1;
1874 continue;
1875
1876 case 'M':
1877 find_match_limit = 1;
1878 continue;
1879
1880 case 'N':
1881 options |= PCRE_NOTEMPTY;
1882 continue;
1883
1884 case 'O':
1885 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1886 if (n > size_offsets_max)
1887 {
1888 size_offsets_max = n;
1889 free(offsets);
1890 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1891 if (offsets == NULL)
1892 {
1893 printf("** Failed to get %d bytes of memory for offsets vector\n",
1894 (int)(size_offsets_max * sizeof(int)));
1895 yield = 1;
1896 goto EXIT;
1897 }
1898 }
1899 use_size_offsets = n;
1900 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1901 continue;
1902
1903 case 'P':
1904 options |= PCRE_PARTIAL;
1905 continue;
1906
1907 case 'Q':
1908 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1909 if (extra == NULL)
1910 {
1911 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1912 extra->flags = 0;
1913 }
1914 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1915 extra->match_limit_recursion = n;
1916 continue;
1917
1918 case 'q':
1919 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1920 if (extra == NULL)
1921 {
1922 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1923 extra->flags = 0;
1924 }
1925 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1926 extra->match_limit = n;
1927 continue;
1928
1929 #if !defined NODFA
1930 case 'R':
1931 options |= PCRE_DFA_RESTART;
1932 continue;
1933 #endif
1934
1935 case 'S':
1936 show_malloc = 1;
1937 continue;
1938
1939 case 'Z':
1940 options |= PCRE_NOTEOL;
1941 continue;
1942
1943 case '?':
1944 options |= PCRE_NO_UTF8_CHECK;
1945 continue;
1946
1947 case '<':
1948 {
1949 int x = check_newline(p, outfile);
1950 if (x == 0) goto NEXT_DATA;
1951 options |= x;
1952 while (*p++ != '>');
1953 }
1954 continue;
1955 }
1956 *q++ = c;
1957 }
1958 *q = 0;
1959 len = q - dbuffer;
1960
1961 if ((all_use_dfa || use_dfa) && find_match_limit)
1962 {
1963 printf("**Match limit not relevant for DFA matching: ignored\n");
1964 find_match_limit = 0;
1965 }
1966
1967 /* Handle matching via the POSIX interface, which does not
1968 support timing or playing with the match limit or callout data. */
1969
1970 #if !defined NOPOSIX
1971 if (posix || do_posix)
1972 {
1973 int rc;
1974 int eflags = 0;
1975 regmatch_t *pmatch = NULL;
1976 if (use_size_offsets > 0)
1977 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1978 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1979 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1980
1981 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1982
1983 if (rc != 0)
1984 {
1985 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1986 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1987 }
1988 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1989 != 0)
1990 {
1991 fprintf(outfile, "Matched with REG_NOSUB\n");
1992 }
1993 else
1994 {
1995 size_t i;
1996 for (i = 0; i < (size_t)use_size_offsets; i++)
1997 {
1998 if (pmatch[i].rm_so >= 0)
1999 {
2000 fprintf(outfile, "%2d: ", (int)i);
2001 (void)pchars(dbuffer + pmatch[i].rm_so,
2002 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2003 fprintf(outfile, "\n");
2004 if (i == 0 && do_showrest)
2005 {
2006 fprintf(outfile, " 0+ ");
2007 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2008 outfile);
2009 fprintf(outfile, "\n");
2010 }
2011 }
2012 }
2013 }
2014 free(pmatch);
2015 }
2016
2017 /* Handle matching via the native interface - repeats for /g and /G */
2018
2019 else
2020 #endif /* !defined NOPOSIX */
2021
2022 for (;; gmatched++) /* Loop for /g or /G */
2023 {
2024 if (timeitm > 0)
2025 {
2026 register int i;
2027 clock_t time_taken;
2028 clock_t start_time = clock();
2029
2030 #if !defined NODFA
2031 if (all_use_dfa || use_dfa)
2032 {
2033 int workspace[1000];
2034 for (i = 0; i < timeitm; i++)
2035 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2036 options | g_notempty, use_offsets, use_size_offsets, workspace,
2037 sizeof(workspace)/sizeof(int));
2038 }
2039 else
2040 #endif
2041
2042 for (i = 0; i < timeitm; i++)
2043 count = pcre_exec(re, extra, (char *)bptr, len,
2044 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2045
2046 time_taken = clock() - start_time;
2047 fprintf(outfile, "Execute time %.4f milliseconds\n",
2048 (((double)time_taken * 1000.0) / (double)timeitm) /
2049 (double)CLOCKS_PER_SEC);
2050 }
2051
2052 /* If find_match_limit is set, we want to do repeated matches with
2053 varying limits in order to find the minimum value for the match limit and
2054 for the recursion limit. */
2055
2056 if (find_match_limit)
2057 {
2058 if (extra == NULL)
2059 {
2060 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2061 extra->flags = 0;
2062 }
2063
2064 (void)check_match_limit(re, extra, bptr, len, start_offset,
2065 options|g_notempty, use_offsets, use_size_offsets,
2066 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2067 PCRE_ERROR_MATCHLIMIT, "match()");
2068
2069 count = check_match_limit(re, extra, bptr, len, start_offset,
2070 options|g_notempty, use_offsets, use_size_offsets,
2071 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2072 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2073 }
2074
2075 /* If callout_data is set, use the interface with additional data */
2076
2077 else if (callout_data_set)
2078 {
2079 if (extra == NULL)
2080 {
2081 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2082 extra->flags = 0;
2083 }
2084 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2085 extra->callout_data = &callout_data;
2086 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2087 options | g_notempty, use_offsets, use_size_offsets);
2088 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2089 }
2090
2091 /* The normal case is just to do the match once, with the default
2092 value of match_limit. */
2093
2094 #if !defined NODFA
2095 else if (all_use_dfa || use_dfa)
2096 {
2097 int workspace[1000];
2098 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2099 options | g_notempty, use_offsets, use_size_offsets, workspace,
2100 sizeof(workspace)/sizeof(int));
2101 if (count == 0)
2102 {
2103 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2104 count = use_size_offsets/2;
2105 }
2106 }
2107 #endif
2108
2109 else
2110 {
2111 count = pcre_exec(re, extra, (char *)bptr, len,
2112 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2113 if (count == 0)
2114 {
2115 fprintf(outfile, "Matched, but too many substrings\n");
2116 count = use_size_offsets/3;
2117 }
2118 }
2119
2120 /* Matched */
2121
2122 if (count >= 0)
2123 {
2124 int i, maxcount;
2125
2126 #if !defined NODFA
2127 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2128 #endif
2129 maxcount = use_size_offsets/3;
2130
2131 /* This is a check against a lunatic return value. */
2132
2133 if (count > maxcount)
2134 {
2135 fprintf(outfile,
2136 "** PCRE error: returned count %d is too big for offset size %d\n",
2137 count, use_size_offsets);
2138 count = use_size_offsets/3;
2139 if (do_g || do_G)
2140 {
2141 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2142 do_g = do_G = FALSE; /* Break g/G loop */
2143 }
2144 }
2145
2146 for (i = 0; i < count * 2; i += 2)
2147 {
2148 if (use_offsets[i] < 0)
2149 fprintf(outfile, "%2d: <unset>\n", i/2);
2150 else
2151 {
2152 fprintf(outfile, "%2d: ", i/2);
2153 (void)pchars(bptr + use_offsets[i],
2154 use_offsets[i+1] - use_offsets[i], outfile);
2155 fprintf(outfile, "\n");
2156 if (i == 0)
2157 {
2158 if (do_showrest)
2159 {
2160 fprintf(outfile, " 0+ ");
2161 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2162 outfile);
2163 fprintf(outfile, "\n");
2164 }
2165 }
2166 }
2167 }
2168
2169 for (i = 0; i < 32; i++)
2170 {
2171 if ((copystrings & (1 << i)) != 0)
2172 {
2173 char copybuffer[256];
2174 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2175 i, copybuffer, sizeof(copybuffer));
2176 if (rc < 0)
2177 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2178 else
2179 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2180 }
2181 }
2182
2183 for (copynamesptr = copynames;
2184 *copynamesptr != 0;
2185 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2186 {
2187 char copybuffer[256];
2188 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2189 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2190 if (rc < 0)
2191 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2192 else
2193 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2194 }
2195
2196 for (i = 0; i < 32; i++)
2197 {
2198 if ((getstrings & (1 << i)) != 0)
2199 {
2200 const char *substring;
2201 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2202 i, &substring);
2203 if (rc < 0)
2204 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2205 else
2206 {
2207 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2208 pcre_free_substring(substring);
2209 }
2210 }
2211 }
2212
2213 for (getnamesptr = getnames;
2214 *getnamesptr != 0;
2215 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2216 {
2217 const char *substring;
2218 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2219 count, (char *)getnamesptr, &substring);
2220 if (rc < 0)
2221 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2222 else
2223 {
2224 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2225 pcre_free_substring(substring);
2226 }
2227 }
2228
2229 if (getlist)
2230 {
2231 const char **stringlist;
2232 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2233 &stringlist);
2234 if (rc < 0)
2235 fprintf(outfile, "get substring list failed %d\n", rc);
2236 else
2237 {
2238 for (i = 0; i < count; i++)
2239 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2240 if (stringlist[i] != NULL)
2241 fprintf(outfile, "string list not terminated by NULL\n");
2242 /* free((void *)stringlist); */
2243 pcre_free_substring_list(stringlist);
2244 }
2245 }
2246 }
2247
2248 /* There was a partial match */
2249
2250 else if (count == PCRE_ERROR_PARTIAL)
2251 {
2252 fprintf(outfile, "Partial match");
2253 #if !defined NODFA
2254 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2255 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2256 bptr + use_offsets[0]);
2257 #endif
2258 fprintf(outfile, "\n");
2259 break; /* Out of the /g loop */
2260 }
2261
2262 /* Failed to match. If this is a /g or /G loop and we previously set
2263 g_notempty after a null match, this is not necessarily the end. We want
2264 to advance the start offset, and continue. We won't be at the end of the
2265 string - that was checked before setting g_notempty.
2266
2267 Complication arises in the case when the newline option is "any" or
2268 "anycrlf". If the previous match was at the end of a line terminated by
2269 CRLF, an advance of one character just passes the \r, whereas we should
2270 prefer the longer newline sequence, as does the code in pcre_exec().
2271 Fudge the offset value to achieve this.
2272
2273 Otherwise, in the case of UTF-8 matching, the advance must be one
2274 character, not one byte. */
2275
2276 else
2277 {
2278 if (g_notempty != 0)
2279 {
2280 int onechar = 1;
2281 unsigned int obits = ((real_pcre *)re)->options;
2282 use_offsets[0] = start_offset;
2283 if ((obits & PCRE_NEWLINE_BITS) == 0)
2284 {
2285 int d;
2286 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2287 obits = (d == '\r')? PCRE_NEWLINE_CR :
2288 (d == '\n')? PCRE_NEWLINE_LF :
2289 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2290 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2291 (d == -1)? PCRE_NEWLINE_ANY : 0;
2292 }
2293 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2294 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2295 &&
2296 start_offset < len - 1 &&
2297 bptr[start_offset] == '\r' &&
2298 bptr[start_offset+1] == '\n')
2299 onechar++;
2300 else if (use_utf8)
2301 {
2302 while (start_offset + onechar < len)
2303 {
2304 int tb = bptr[start_offset+onechar];
2305 if (tb <= 127) break;
2306 tb &= 0xc0;
2307 if (tb != 0 && tb != 0xc0) onechar++;
2308 }
2309 }
2310 use_offsets[1] = start_offset + onechar;
2311 }
2312 else
2313 {
2314 if (count == PCRE_ERROR_NOMATCH)
2315 {
2316 if (gmatched == 0) fprintf(outfile, "No match\n");
2317 }
2318 else fprintf(outfile, "Error %d\n", count);
2319 break; /* Out of the /g loop */
2320 }
2321 }
2322
2323 /* If not /g or /G we are done */
2324
2325 if (!do_g && !do_G) break;
2326
2327 /* If we have matched an empty string, first check to see if we are at
2328 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2329 what Perl's /g options does. This turns out to be rather cunning. First
2330 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2331 same point. If this fails (picked up above) we advance to the next
2332 character. */
2333
2334 g_notempty = 0;
2335
2336 if (use_offsets[0] == use_offsets[1])
2337 {
2338 if (use_offsets[0] == len) break;
2339 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2340 }
2341
2342 /* For /g, update the start offset, leaving the rest alone */
2343
2344 if (do_g) start_offset = use_offsets[1];
2345
2346 /* For /G, update the pointer and length */
2347
2348 else
2349 {
2350 bptr += use_offsets[1];
2351 len -= use_offsets[1];
2352 }
2353 } /* End of loop for /g and /G */
2354
2355 NEXT_DATA: continue;
2356 } /* End of loop for data lines */
2357
2358 CONTINUE:
2359
2360 #if !defined NOPOSIX
2361 if (posix || do_posix) regfree(&preg);
2362 #endif
2363
2364 if (re != NULL) new_free(re);
2365 if (extra != NULL) new_free(extra);
2366 if (tables != NULL)
2367 {
2368 new_free((void *)tables);
2369 setlocale(LC_CTYPE, "C");
2370 locale_set = 0;
2371 }
2372 }
2373
2374 if (infile == stdin) fprintf(outfile, "\n");
2375
2376 EXIT:
2377
2378 if (infile != NULL && infile != stdin) fclose(infile);
2379 if (outfile != NULL && outfile != stdout) fclose(outfile);
2380
2381 free(buffer);
2382 free(dbuffer);
2383 free(pbuffer);
2384 free(offsets);
2385
2386 return yield;
2387 }
2388
2389 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12