/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 227 - (show annotations) (download)
Tue Aug 21 15:00:15 2007 UTC (7 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 69339 byte(s)
Add (*CR) etc.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_OP_lengths OP_lengths
98
99 #include "pcre_tables.c"
100
101 /* We also need the pcre_printint() function for printing out compiled
102 patterns. This function is in a separate file so that it can be included in
103 pcre_compile.c when that module is compiled with debugging enabled.
104
105 The definition of the macro PRINTABLE, which determines whether to print an
106 output character as-is or as a hex value when showing compiled patterns, is
107 contained in this file. We uses it here also, in cases when the locale has not
108 been explicitly changed, so as to get consistent output from systems that
109 differ in their output from isprint() even in the "C" locale. */
110
111 #include "pcre_printint.src"
112
113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114
115
116 /* It is possible to compile this test program without including support for
117 testing the POSIX interface, though this is not available via the standard
118 Makefile. */
119
120 #if !defined NOPOSIX
121 #include "pcreposix.h"
122 #endif
123
124 /* It is also possible, for the benefit of the version currently imported into
125 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126 interface to the DFA matcher (NODFA), and without the doublecheck of the old
127 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128 UTF8 support if PCRE is built without it. */
129
130 #ifndef SUPPORT_UTF8
131 #ifndef NOUTF8
132 #define NOUTF8
133 #endif
134 #endif
135
136
137 /* Other parameters */
138
139 #ifndef CLOCKS_PER_SEC
140 #ifdef CLK_TCK
141 #define CLOCKS_PER_SEC CLK_TCK
142 #else
143 #define CLOCKS_PER_SEC 100
144 #endif
145 #endif
146
147 /* This is the default loop count for timing. */
148
149 #define LOOPREPEAT 500000
150
151 /* Static variables */
152
153 static FILE *outfile;
154 static int log_store = 0;
155 static int callout_count;
156 static int callout_extra;
157 static int callout_fail_count;
158 static int callout_fail_id;
159 static int debug_lengths;
160 static int first_callout;
161 static int locale_set = 0;
162 static int show_malloc;
163 static int use_utf8;
164 static size_t gotten_store;
165
166 /* The buffers grow automatically if very long input lines are encountered. */
167
168 static int buffer_size = 50000;
169 static uschar *buffer = NULL;
170 static uschar *dbuffer = NULL;
171 static uschar *pbuffer = NULL;
172
173
174
175 /*************************************************
176 * Read or extend an input line *
177 *************************************************/
178
179 /* Input lines are read into buffer, but both patterns and data lines can be
180 continued over multiple input lines. In addition, if the buffer fills up, we
181 want to automatically expand it so as to be able to handle extremely large
182 lines that are needed for certain stress tests. When the input buffer is
183 expanded, the other two buffers must also be expanded likewise, and the
184 contents of pbuffer, which are a copy of the input for callouts, must be
185 preserved (for when expansion happens for a data line). This is not the most
186 optimal way of handling this, but hey, this is just a test program!
187
188 Arguments:
189 f the file to read
190 start where in buffer to start (this *must* be within buffer)
191
192 Returns: pointer to the start of new data
193 could be a copy of start, or could be moved
194 NULL if no data read and EOF reached
195 */
196
197 static uschar *
198 extend_inputline(FILE *f, uschar *start)
199 {
200 uschar *here = start;
201
202 for (;;)
203 {
204 int rlen = buffer_size - (here - buffer);
205
206 if (rlen > 1000)
207 {
208 int dlen;
209 if (fgets((char *)here, rlen, f) == NULL)
210 return (here == start)? NULL : start;
211 dlen = (int)strlen((char *)here);
212 if (dlen > 0 && here[dlen - 1] == '\n') return start;
213 here += dlen;
214 }
215
216 else
217 {
218 int new_buffer_size = 2*buffer_size;
219 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222
223 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224 {
225 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226 exit(1);
227 }
228
229 memcpy(new_buffer, buffer, buffer_size);
230 memcpy(new_pbuffer, pbuffer, buffer_size);
231
232 buffer_size = new_buffer_size;
233
234 start = new_buffer + (start - buffer);
235 here = new_buffer + (here - buffer);
236
237 free(buffer);
238 free(dbuffer);
239 free(pbuffer);
240
241 buffer = new_buffer;
242 dbuffer = new_dbuffer;
243 pbuffer = new_pbuffer;
244 }
245 }
246
247 return NULL; /* Control never gets here */
248 }
249
250
251
252
253
254
255
256 /*************************************************
257 * Read number from string *
258 *************************************************/
259
260 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261 around with conditional compilation, just do the job by hand. It is only used
262 for unpicking arguments, so just keep it simple.
263
264 Arguments:
265 str string to be converted
266 endptr where to put the end pointer
267
268 Returns: the unsigned long
269 */
270
271 static int
272 get_value(unsigned char *str, unsigned char **endptr)
273 {
274 int result = 0;
275 while(*str != 0 && isspace(*str)) str++;
276 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277 *endptr = str;
278 return(result);
279 }
280
281
282
283
284 /*************************************************
285 * Convert UTF-8 string to value *
286 *************************************************/
287
288 /* This function takes one or more bytes that represents a UTF-8 character,
289 and returns the value of the character.
290
291 Argument:
292 utf8bytes a pointer to the byte vector
293 vptr a pointer to an int to receive the value
294
295 Returns: > 0 => the number of bytes consumed
296 -6 to 0 => malformed UTF-8 character at offset = (-return)
297 */
298
299 #if !defined NOUTF8
300
301 static int
302 utf82ord(unsigned char *utf8bytes, int *vptr)
303 {
304 int c = *utf8bytes++;
305 int d = c;
306 int i, j, s;
307
308 for (i = -1; i < 6; i++) /* i is number of additional bytes */
309 {
310 if ((d & 0x80) == 0) break;
311 d <<= 1;
312 }
313
314 if (i == -1) { *vptr = c; return 1; } /* ascii character */
315 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316
317 /* i now has a value in the range 1-5 */
318
319 s = 6*i;
320 d = (c & utf8_table3[i]) << s;
321
322 for (j = 0; j < i; j++)
323 {
324 c = *utf8bytes++;
325 if ((c & 0xc0) != 0x80) return -(j+1);
326 s -= 6;
327 d |= (c & 0x3f) << s;
328 }
329
330 /* Check that encoding was the correct unique one */
331
332 for (j = 0; j < utf8_table1_size; j++)
333 if (d <= utf8_table1[j]) break;
334 if (j != i) return -(i+1);
335
336 /* Valid value */
337
338 *vptr = d;
339 return i+1;
340 }
341
342 #endif
343
344
345
346 /*************************************************
347 * Convert character value to UTF-8 *
348 *************************************************/
349
350 /* This function takes an integer value in the range 0 - 0x7fffffff
351 and encodes it as a UTF-8 character in 0 to 6 bytes.
352
353 Arguments:
354 cvalue the character value
355 utf8bytes pointer to buffer for result - at least 6 bytes long
356
357 Returns: number of characters placed in the buffer
358 */
359
360 #if !defined NOUTF8
361
362 static int
363 ord2utf8(int cvalue, uschar *utf8bytes)
364 {
365 register int i, j;
366 for (i = 0; i < utf8_table1_size; i++)
367 if (cvalue <= utf8_table1[i]) break;
368 utf8bytes += i;
369 for (j = i; j > 0; j--)
370 {
371 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 cvalue >>= 6;
373 }
374 *utf8bytes = utf8_table2[i] | cvalue;
375 return i + 1;
376 }
377
378 #endif
379
380
381
382 /*************************************************
383 * Print character string *
384 *************************************************/
385
386 /* Character string printing function. Must handle UTF-8 strings in utf8
387 mode. Yields number of characters printed. If handed a NULL file, just counts
388 chars without printing. */
389
390 static int pchars(unsigned char *p, int length, FILE *f)
391 {
392 int c = 0;
393 int yield = 0;
394
395 while (length-- > 0)
396 {
397 #if !defined NOUTF8
398 if (use_utf8)
399 {
400 int rc = utf82ord(p, &c);
401
402 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403 {
404 length -= rc - 1;
405 p += rc;
406 if (PRINTHEX(c))
407 {
408 if (f != NULL) fprintf(f, "%c", c);
409 yield++;
410 }
411 else
412 {
413 int n = 4;
414 if (f != NULL) fprintf(f, "\\x{%02x}", c);
415 yield += (n <= 0x000000ff)? 2 :
416 (n <= 0x00000fff)? 3 :
417 (n <= 0x0000ffff)? 4 :
418 (n <= 0x000fffff)? 5 : 6;
419 }
420 continue;
421 }
422 }
423 #endif
424
425 /* Not UTF-8, or malformed UTF-8 */
426
427 c = *p++;
428 if (PRINTHEX(c))
429 {
430 if (f != NULL) fprintf(f, "%c", c);
431 yield++;
432 }
433 else
434 {
435 if (f != NULL) fprintf(f, "\\x%02x", c);
436 yield += 4;
437 }
438 }
439
440 return yield;
441 }
442
443
444
445 /*************************************************
446 * Callout function *
447 *************************************************/
448
449 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450 the match. Yield zero unless more callouts than the fail count, or the callout
451 data is not zero. */
452
453 static int callout(pcre_callout_block *cb)
454 {
455 FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 int i, pre_start, post_start, subject_length;
457
458 if (callout_extra)
459 {
460 fprintf(f, "Callout %d: last capture = %d\n",
461 cb->callout_number, cb->capture_last);
462
463 for (i = 0; i < cb->capture_top * 2; i += 2)
464 {
465 if (cb->offset_vector[i] < 0)
466 fprintf(f, "%2d: <unset>\n", i/2);
467 else
468 {
469 fprintf(f, "%2d: ", i/2);
470 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471 cb->offset_vector[i+1] - cb->offset_vector[i], f);
472 fprintf(f, "\n");
473 }
474 }
475 }
476
477 /* Re-print the subject in canonical form, the first time or if giving full
478 datails. On subsequent calls in the same match, we use pchars just to find the
479 printed lengths of the substrings. */
480
481 if (f != NULL) fprintf(f, "--->");
482
483 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485 cb->current_position - cb->start_match, f);
486
487 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488
489 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490 cb->subject_length - cb->current_position, f);
491
492 if (f != NULL) fprintf(f, "\n");
493
494 /* Always print appropriate indicators, with callout number if not already
495 shown. For automatic callouts, show the pattern offset. */
496
497 if (cb->callout_number == 255)
498 {
499 fprintf(outfile, "%+3d ", cb->pattern_position);
500 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501 }
502 else
503 {
504 if (callout_extra) fprintf(outfile, " ");
505 else fprintf(outfile, "%3d ", cb->callout_number);
506 }
507
508 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510
511 if (post_start > 0)
512 {
513 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514 fprintf(outfile, "^");
515 }
516
517 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518 fprintf(outfile, " ");
519
520 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521 pbuffer + cb->pattern_position);
522
523 fprintf(outfile, "\n");
524 first_callout = 0;
525
526 if (cb->callout_data != NULL)
527 {
528 int callout_data = *((int *)(cb->callout_data));
529 if (callout_data != 0)
530 {
531 fprintf(outfile, "Callout data = %d\n", callout_data);
532 return callout_data;
533 }
534 }
535
536 return (cb->callout_number != callout_fail_id)? 0 :
537 (++callout_count >= callout_fail_count)? 1 : 0;
538 }
539
540
541 /*************************************************
542 * Local malloc functions *
543 *************************************************/
544
545 /* Alternative malloc function, to test functionality and show the size of the
546 compiled re. */
547
548 static void *new_malloc(size_t size)
549 {
550 void *block = malloc(size);
551 gotten_store = size;
552 if (show_malloc)
553 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 return block;
555 }
556
557 static void new_free(void *block)
558 {
559 if (show_malloc)
560 fprintf(outfile, "free %p\n", block);
561 free(block);
562 }
563
564
565 /* For recursion malloc/free, to test stacking calls */
566
567 static void *stack_malloc(size_t size)
568 {
569 void *block = malloc(size);
570 if (show_malloc)
571 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 return block;
573 }
574
575 static void stack_free(void *block)
576 {
577 if (show_malloc)
578 fprintf(outfile, "stack_free %p\n", block);
579 free(block);
580 }
581
582
583 /*************************************************
584 * Call pcre_fullinfo() *
585 *************************************************/
586
587 /* Get one piece of information from the pcre_fullinfo() function */
588
589 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590 {
591 int rc;
592 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594 }
595
596
597
598 /*************************************************
599 * Byte flipping function *
600 *************************************************/
601
602 static unsigned long int
603 byteflip(unsigned long int value, int n)
604 {
605 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606 return ((value & 0x000000ff) << 24) |
607 ((value & 0x0000ff00) << 8) |
608 ((value & 0x00ff0000) >> 8) |
609 ((value & 0xff000000) >> 24);
610 }
611
612
613
614
615 /*************************************************
616 * Check match or recursion limit *
617 *************************************************/
618
619 static int
620 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621 int start_offset, int options, int *use_offsets, int use_size_offsets,
622 int flag, unsigned long int *limit, int errnumber, const char *msg)
623 {
624 int count;
625 int min = 0;
626 int mid = 64;
627 int max = -1;
628
629 extra->flags |= flag;
630
631 for (;;)
632 {
633 *limit = mid;
634
635 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636 use_offsets, use_size_offsets);
637
638 if (count == errnumber)
639 {
640 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641 min = mid;
642 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643 }
644
645 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646 count == PCRE_ERROR_PARTIAL)
647 {
648 if (mid == min + 1)
649 {
650 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651 break;
652 }
653 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654 max = mid;
655 mid = (min + mid)/2;
656 }
657 else break; /* Some other error */
658 }
659
660 extra->flags &= ~flag;
661 return count;
662 }
663
664
665
666 /*************************************************
667 * Case-independent strncmp() function *
668 *************************************************/
669
670 /*
671 Arguments:
672 s first string
673 t second string
674 n number of characters to compare
675
676 Returns: < 0, = 0, or > 0, according to the comparison
677 */
678
679 static int
680 strncmpic(uschar *s, uschar *t, int n)
681 {
682 while (n--)
683 {
684 int c = tolower(*s++) - tolower(*t++);
685 if (c) return c;
686 }
687 return 0;
688 }
689
690
691
692 /*************************************************
693 * Check newline indicator *
694 *************************************************/
695
696 /* This is used both at compile and run-time to check for <xxx> escapes, where
697 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698 no match.
699
700 Arguments:
701 p points after the leading '<'
702 f file for error message
703
704 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
705 */
706
707 static int
708 check_newline(uschar *p, FILE *f)
709 {
710 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715 fprintf(f, "Unknown newline type at: <%s\n", p);
716 return 0;
717 }
718
719
720
721 /*************************************************
722 * Usage function *
723 *************************************************/
724
725 static void
726 usage(void)
727 {
728 printf("Usage: pcretest [options] [<input> [<output>]]\n");
729 printf(" -b show compiled code (bytecode)\n");
730 printf(" -C show PCRE compile-time options and exit\n");
731 printf(" -d debug: show compiled code and information (-b and -i)\n");
732 #if !defined NODFA
733 printf(" -dfa force DFA matching for all subjects\n");
734 #endif
735 printf(" -help show usage information\n");
736 printf(" -i show information about compiled patterns\n"
737 " -m output memory used information\n"
738 " -o <n> set size of offsets vector to <n>\n");
739 #if !defined NOPOSIX
740 printf(" -p use POSIX interface\n");
741 #endif
742 printf(" -q quiet: do not output PCRE version number at start\n");
743 printf(" -S <n> set stack size to <n> megabytes\n");
744 printf(" -s output store (memory) used information\n"
745 " -t time compilation and execution\n");
746 printf(" -t <n> time compilation and execution, repeating <n> times\n");
747 printf(" -tm time execution (matching) only\n");
748 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
749 }
750
751
752
753 /*************************************************
754 * Main Program *
755 *************************************************/
756
757 /* Read lines from named file or stdin and write to named file or stdout; lines
758 consist of a regular expression, in delimiters and optionally followed by
759 options, followed by a set of test data, terminated by an empty line. */
760
761 int main(int argc, char **argv)
762 {
763 FILE *infile = stdin;
764 int options = 0;
765 int study_options = 0;
766 int op = 1;
767 int timeit = 0;
768 int timeitm = 0;
769 int showinfo = 0;
770 int showstore = 0;
771 int quiet = 0;
772 int size_offsets = 45;
773 int size_offsets_max;
774 int *offsets = NULL;
775 #if !defined NOPOSIX
776 int posix = 0;
777 #endif
778 int debug = 0;
779 int done = 0;
780 int all_use_dfa = 0;
781 int yield = 0;
782 int stack_size;
783
784 /* These vectors store, end-to-end, a list of captured substring names. Assume
785 that 1024 is plenty long enough for the few names we'll be testing. */
786
787 uschar copynames[1024];
788 uschar getnames[1024];
789
790 uschar *copynamesptr;
791 uschar *getnamesptr;
792
793 /* Get buffers from malloc() so that Electric Fence will check their misuse
794 when I am debugging. They grow automatically when very long lines are read. */
795
796 buffer = (unsigned char *)malloc(buffer_size);
797 dbuffer = (unsigned char *)malloc(buffer_size);
798 pbuffer = (unsigned char *)malloc(buffer_size);
799
800 /* The outfile variable is static so that new_malloc can use it. */
801
802 outfile = stdout;
803
804 /* The following _setmode() stuff is some Windows magic that tells its runtime
805 library to translate CRLF into a single LF character. At least, that's what
806 I've been told: never having used Windows I take this all on trust. Originally
807 it set 0x8000, but then I was advised that _O_BINARY was better. */
808
809 #if defined(_WIN32) || defined(WIN32)
810 _setmode( _fileno( stdout ), _O_BINARY );
811 #endif
812
813 /* Scan options */
814
815 while (argc > 1 && argv[op][0] == '-')
816 {
817 unsigned char *endptr;
818
819 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
820 showstore = 1;
821 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
822 else if (strcmp(argv[op], "-b") == 0) debug = 1;
823 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
824 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
825 #if !defined NODFA
826 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
827 #endif
828 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
829 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
830 *endptr == 0))
831 {
832 op++;
833 argc--;
834 }
835 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
836 {
837 int both = argv[op][2] == 0;
838 int temp;
839 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
840 *endptr == 0))
841 {
842 timeitm = temp;
843 op++;
844 argc--;
845 }
846 else timeitm = LOOPREPEAT;
847 if (both) timeit = timeitm;
848 }
849 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
850 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
851 *endptr == 0))
852 {
853 #if defined(_WIN32) || defined(WIN32)
854 printf("PCRE: -S not supported on this OS\n");
855 exit(1);
856 #else
857 int rc;
858 struct rlimit rlim;
859 getrlimit(RLIMIT_STACK, &rlim);
860 rlim.rlim_cur = stack_size * 1024 * 1024;
861 rc = setrlimit(RLIMIT_STACK, &rlim);
862 if (rc != 0)
863 {
864 printf("PCRE: setrlimit() failed with error %d\n", rc);
865 exit(1);
866 }
867 op++;
868 argc--;
869 #endif
870 }
871 #if !defined NOPOSIX
872 else if (strcmp(argv[op], "-p") == 0) posix = 1;
873 #endif
874 else if (strcmp(argv[op], "-C") == 0)
875 {
876 int rc;
877 printf("PCRE version %s\n", pcre_version());
878 printf("Compiled with\n");
879 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
880 printf(" %sUTF-8 support\n", rc? "" : "No ");
881 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
882 printf(" %sUnicode properties support\n", rc? "" : "No ");
883 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
885 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886 (rc == -2)? "ANYCRLF" :
887 (rc == -1)? "ANY" : "???");
888 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889 printf(" Internal link size = %d\n", rc);
890 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
891 printf(" POSIX malloc threshold = %d\n", rc);
892 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
893 printf(" Default match limit = %d\n", rc);
894 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
895 printf(" Default recursion depth limit = %d\n", rc);
896 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
898 goto EXIT;
899 }
900 else if (strcmp(argv[op], "-help") == 0 ||
901 strcmp(argv[op], "--help") == 0)
902 {
903 usage();
904 goto EXIT;
905 }
906 else
907 {
908 printf("** Unknown or malformed option %s\n", argv[op]);
909 usage();
910 yield = 1;
911 goto EXIT;
912 }
913 op++;
914 argc--;
915 }
916
917 /* Get the store for the offsets vector, and remember what it was */
918
919 size_offsets_max = size_offsets;
920 offsets = (int *)malloc(size_offsets_max * sizeof(int));
921 if (offsets == NULL)
922 {
923 printf("** Failed to get %d bytes of memory for offsets vector\n",
924 (int)(size_offsets_max * sizeof(int)));
925 yield = 1;
926 goto EXIT;
927 }
928
929 /* Sort out the input and output files */
930
931 if (argc > 1)
932 {
933 infile = fopen(argv[op], INPUT_MODE);
934 if (infile == NULL)
935 {
936 printf("** Failed to open %s\n", argv[op]);
937 yield = 1;
938 goto EXIT;
939 }
940 }
941
942 if (argc > 2)
943 {
944 outfile = fopen(argv[op+1], OUTPUT_MODE);
945 if (outfile == NULL)
946 {
947 printf("** Failed to open %s\n", argv[op+1]);
948 yield = 1;
949 goto EXIT;
950 }
951 }
952
953 /* Set alternative malloc function */
954
955 pcre_malloc = new_malloc;
956 pcre_free = new_free;
957 pcre_stack_malloc = stack_malloc;
958 pcre_stack_free = stack_free;
959
960 /* Heading line unless quiet, then prompt for first regex if stdin */
961
962 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
963
964 /* Main loop */
965
966 while (!done)
967 {
968 pcre *re = NULL;
969 pcre_extra *extra = NULL;
970
971 #if !defined NOPOSIX /* There are still compilers that require no indent */
972 regex_t preg;
973 int do_posix = 0;
974 #endif
975
976 const char *error;
977 unsigned char *p, *pp, *ppp;
978 unsigned char *to_file = NULL;
979 const unsigned char *tables = NULL;
980 unsigned long int true_size, true_study_size = 0;
981 size_t size, regex_gotten_store;
982 int do_study = 0;
983 int do_debug = debug;
984 int do_G = 0;
985 int do_g = 0;
986 int do_showinfo = showinfo;
987 int do_showrest = 0;
988 int do_flip = 0;
989 int erroroffset, len, delimiter, poffset;
990
991 use_utf8 = 0;
992 debug_lengths = 1;
993
994 if (infile == stdin) printf(" re> ");
995 if (extend_inputline(infile, buffer) == NULL) break;
996 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
997 fflush(outfile);
998
999 p = buffer;
1000 while (isspace(*p)) p++;
1001 if (*p == 0) continue;
1002
1003 /* See if the pattern is to be loaded pre-compiled from a file. */
1004
1005 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1006 {
1007 unsigned long int magic, get_options;
1008 uschar sbuf[8];
1009 FILE *f;
1010
1011 p++;
1012 pp = p + (int)strlen((char *)p);
1013 while (isspace(pp[-1])) pp--;
1014 *pp = 0;
1015
1016 f = fopen((char *)p, "rb");
1017 if (f == NULL)
1018 {
1019 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1020 continue;
1021 }
1022
1023 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1024
1025 true_size =
1026 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1027 true_study_size =
1028 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1029
1030 re = (real_pcre *)new_malloc(true_size);
1031 regex_gotten_store = gotten_store;
1032
1033 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1034
1035 magic = ((real_pcre *)re)->magic_number;
1036 if (magic != MAGIC_NUMBER)
1037 {
1038 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1039 {
1040 do_flip = 1;
1041 }
1042 else
1043 {
1044 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1045 fclose(f);
1046 continue;
1047 }
1048 }
1049
1050 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1051 do_flip? " (byte-inverted)" : "", p);
1052
1053 /* Need to know if UTF-8 for printing data strings */
1054
1055 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1056 use_utf8 = (get_options & PCRE_UTF8) != 0;
1057
1058 /* Now see if there is any following study data */
1059
1060 if (true_study_size != 0)
1061 {
1062 pcre_study_data *psd;
1063
1064 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1065 extra->flags = PCRE_EXTRA_STUDY_DATA;
1066
1067 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1068 extra->study_data = psd;
1069
1070 if (fread(psd, 1, true_study_size, f) != true_study_size)
1071 {
1072 FAIL_READ:
1073 fprintf(outfile, "Failed to read data from %s\n", p);
1074 if (extra != NULL) new_free(extra);
1075 if (re != NULL) new_free(re);
1076 fclose(f);
1077 continue;
1078 }
1079 fprintf(outfile, "Study data loaded from %s\n", p);
1080 do_study = 1; /* To get the data output if requested */
1081 }
1082 else fprintf(outfile, "No study data\n");
1083
1084 fclose(f);
1085 goto SHOW_INFO;
1086 }
1087
1088 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1089 the pattern; if is isn't complete, read more. */
1090
1091 delimiter = *p++;
1092
1093 if (isalnum(delimiter) || delimiter == '\\')
1094 {
1095 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1096 goto SKIP_DATA;
1097 }
1098
1099 pp = p;
1100 poffset = p - buffer;
1101
1102 for(;;)
1103 {
1104 while (*pp != 0)
1105 {
1106 if (*pp == '\\' && pp[1] != 0) pp++;
1107 else if (*pp == delimiter) break;
1108 pp++;
1109 }
1110 if (*pp != 0) break;
1111 if (infile == stdin) printf(" > ");
1112 if ((pp = extend_inputline(infile, pp)) == NULL)
1113 {
1114 fprintf(outfile, "** Unexpected EOF\n");
1115 done = 1;
1116 goto CONTINUE;
1117 }
1118 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1119 }
1120
1121 /* The buffer may have moved while being extended; reset the start of data
1122 pointer to the correct relative point in the buffer. */
1123
1124 p = buffer + poffset;
1125
1126 /* If the first character after the delimiter is backslash, make
1127 the pattern end with backslash. This is purely to provide a way
1128 of testing for the error message when a pattern ends with backslash. */
1129
1130 if (pp[1] == '\\') *pp++ = '\\';
1131
1132 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1133 for callouts. */
1134
1135 *pp++ = 0;
1136 strcpy((char *)pbuffer, (char *)p);
1137
1138 /* Look for options after final delimiter */
1139
1140 options = 0;
1141 study_options = 0;
1142 log_store = showstore; /* default from command line */
1143
1144 while (*pp != 0)
1145 {
1146 switch (*pp++)
1147 {
1148 case 'f': options |= PCRE_FIRSTLINE; break;
1149 case 'g': do_g = 1; break;
1150 case 'i': options |= PCRE_CASELESS; break;
1151 case 'm': options |= PCRE_MULTILINE; break;
1152 case 's': options |= PCRE_DOTALL; break;
1153 case 'x': options |= PCRE_EXTENDED; break;
1154
1155 case '+': do_showrest = 1; break;
1156 case 'A': options |= PCRE_ANCHORED; break;
1157 case 'B': do_debug = 1; break;
1158 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1159 case 'D': do_debug = do_showinfo = 1; break;
1160 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1161 case 'F': do_flip = 1; break;
1162 case 'G': do_G = 1; break;
1163 case 'I': do_showinfo = 1; break;
1164 case 'J': options |= PCRE_DUPNAMES; break;
1165 case 'M': log_store = 1; break;
1166 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1167
1168 #if !defined NOPOSIX
1169 case 'P': do_posix = 1; break;
1170 #endif
1171
1172 case 'S': do_study = 1; break;
1173 case 'U': options |= PCRE_UNGREEDY; break;
1174 case 'X': options |= PCRE_EXTRA; break;
1175 case 'Z': debug_lengths = 0; break;
1176 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178
1179 case 'L':
1180 ppp = pp;
1181 /* The '\r' test here is so that it works on Windows. */
1182 /* The '0' test is just in case this is an unterminated line. */
1183 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1184 *ppp = 0;
1185 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1186 {
1187 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1188 goto SKIP_DATA;
1189 }
1190 locale_set = 1;
1191 tables = pcre_maketables();
1192 pp = ppp;
1193 break;
1194
1195 case '>':
1196 to_file = pp;
1197 while (*pp != 0) pp++;
1198 while (isspace(pp[-1])) pp--;
1199 *pp = 0;
1200 break;
1201
1202 case '<':
1203 {
1204 int x = check_newline(pp, outfile);
1205 if (x == 0) goto SKIP_DATA;
1206 options |= x;
1207 while (*pp++ != '>');
1208 }
1209 break;
1210
1211 case '\r': /* So that it works in Windows */
1212 case '\n':
1213 case ' ':
1214 break;
1215
1216 default:
1217 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1218 goto SKIP_DATA;
1219 }
1220 }
1221
1222 /* Handle compiling via the POSIX interface, which doesn't support the
1223 timing, showing, or debugging options, nor the ability to pass over
1224 local character tables. */
1225
1226 #if !defined NOPOSIX
1227 if (posix || do_posix)
1228 {
1229 int rc;
1230 int cflags = 0;
1231
1232 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1233 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1234 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1235 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1236 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1237
1238 rc = regcomp(&preg, (char *)p, cflags);
1239
1240 /* Compilation failed; go back for another re, skipping to blank line
1241 if non-interactive. */
1242
1243 if (rc != 0)
1244 {
1245 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1246 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1247 goto SKIP_DATA;
1248 }
1249 }
1250
1251 /* Handle compiling via the native interface */
1252
1253 else
1254 #endif /* !defined NOPOSIX */
1255
1256 {
1257 if (timeit > 0)
1258 {
1259 register int i;
1260 clock_t time_taken;
1261 clock_t start_time = clock();
1262 for (i = 0; i < timeit; i++)
1263 {
1264 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1265 if (re != NULL) free(re);
1266 }
1267 time_taken = clock() - start_time;
1268 fprintf(outfile, "Compile time %.4f milliseconds\n",
1269 (((double)time_taken * 1000.0) / (double)timeit) /
1270 (double)CLOCKS_PER_SEC);
1271 }
1272
1273 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1274
1275 /* Compilation failed; go back for another re, skipping to blank line
1276 if non-interactive. */
1277
1278 if (re == NULL)
1279 {
1280 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1281 SKIP_DATA:
1282 if (infile != stdin)
1283 {
1284 for (;;)
1285 {
1286 if (extend_inputline(infile, buffer) == NULL)
1287 {
1288 done = 1;
1289 goto CONTINUE;
1290 }
1291 len = (int)strlen((char *)buffer);
1292 while (len > 0 && isspace(buffer[len-1])) len--;
1293 if (len == 0) break;
1294 }
1295 fprintf(outfile, "\n");
1296 }
1297 goto CONTINUE;
1298 }
1299
1300 /* Compilation succeeded; print data if required. There are now two
1301 info-returning functions. The old one has a limited interface and
1302 returns only limited data. Check that it agrees with the newer one. */
1303
1304 if (log_store)
1305 fprintf(outfile, "Memory allocation (code space): %d\n",
1306 (int)(gotten_store -
1307 sizeof(real_pcre) -
1308 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1309
1310 /* Extract the size for possible writing before possibly flipping it,
1311 and remember the store that was got. */
1312
1313 true_size = ((real_pcre *)re)->size;
1314 regex_gotten_store = gotten_store;
1315
1316 /* If /S was present, study the regexp to generate additional info to
1317 help with the matching. */
1318
1319 if (do_study)
1320 {
1321 if (timeit > 0)
1322 {
1323 register int i;
1324 clock_t time_taken;
1325 clock_t start_time = clock();
1326 for (i = 0; i < timeit; i++)
1327 extra = pcre_study(re, study_options, &error);
1328 time_taken = clock() - start_time;
1329 if (extra != NULL) free(extra);
1330 fprintf(outfile, " Study time %.4f milliseconds\n",
1331 (((double)time_taken * 1000.0) / (double)timeit) /
1332 (double)CLOCKS_PER_SEC);
1333 }
1334 extra = pcre_study(re, study_options, &error);
1335 if (error != NULL)
1336 fprintf(outfile, "Failed to study: %s\n", error);
1337 else if (extra != NULL)
1338 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1339 }
1340
1341 /* If the 'F' option was present, we flip the bytes of all the integer
1342 fields in the regex data block and the study block. This is to make it
1343 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1344 compiled on a different architecture. */
1345
1346 if (do_flip)
1347 {
1348 real_pcre *rre = (real_pcre *)re;
1349 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1350 rre->size = byteflip(rre->size, sizeof(rre->size));
1351 rre->options = byteflip(rre->options, sizeof(rre->options));
1352 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1353 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1354 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1355 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1356 rre->name_table_offset = byteflip(rre->name_table_offset,
1357 sizeof(rre->name_table_offset));
1358 rre->name_entry_size = byteflip(rre->name_entry_size,
1359 sizeof(rre->name_entry_size));
1360 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1361
1362 if (extra != NULL)
1363 {
1364 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1365 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1366 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1367 }
1368 }
1369
1370 /* Extract information from the compiled data if required */
1371
1372 SHOW_INFO:
1373
1374 if (do_debug)
1375 {
1376 fprintf(outfile, "------------------------------------------------------------------\n");
1377 pcre_printint(re, outfile, debug_lengths);
1378 }
1379
1380 if (do_showinfo)
1381 {
1382 unsigned long int get_options, all_options;
1383 #if !defined NOINFOCHECK
1384 int old_first_char, old_options, old_count;
1385 #endif
1386 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1387 hascrorlf;
1388 int nameentrysize, namecount;
1389 const uschar *nametable;
1390
1391 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1392 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1393 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1394 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1395 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1396 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1397 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1398 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1399 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1400 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1401 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1402 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1403
1404 #if !defined NOINFOCHECK
1405 old_count = pcre_info(re, &old_options, &old_first_char);
1406 if (count < 0) fprintf(outfile,
1407 "Error %d from pcre_info()\n", count);
1408 else
1409 {
1410 if (old_count != count) fprintf(outfile,
1411 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1412 old_count);
1413
1414 if (old_first_char != first_char) fprintf(outfile,
1415 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1416 first_char, old_first_char);
1417
1418 if (old_options != (int)get_options) fprintf(outfile,
1419 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1420 get_options, old_options);
1421 }
1422 #endif
1423
1424 if (size != regex_gotten_store) fprintf(outfile,
1425 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1426 (int)size, (int)regex_gotten_store);
1427
1428 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1429 if (backrefmax > 0)
1430 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1431
1432 if (namecount > 0)
1433 {
1434 fprintf(outfile, "Named capturing subpatterns:\n");
1435 while (namecount-- > 0)
1436 {
1437 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1438 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1439 GET2(nametable, 0));
1440 nametable += nameentrysize;
1441 }
1442 }
1443
1444 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1445 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1446
1447 all_options = ((real_pcre *)re)->options;
1448 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1449
1450 if (get_options == 0) fprintf(outfile, "No options\n");
1451 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1452 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1453 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1454 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1455 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1456 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1457 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1458 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1459 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1460 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1461 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1462 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1463 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1464 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1465
1466 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1467
1468 switch (get_options & PCRE_NEWLINE_BITS)
1469 {
1470 case PCRE_NEWLINE_CR:
1471 fprintf(outfile, "Forced newline sequence: CR\n");
1472 break;
1473
1474 case PCRE_NEWLINE_LF:
1475 fprintf(outfile, "Forced newline sequence: LF\n");
1476 break;
1477
1478 case PCRE_NEWLINE_CRLF:
1479 fprintf(outfile, "Forced newline sequence: CRLF\n");
1480 break;
1481
1482 case PCRE_NEWLINE_ANYCRLF:
1483 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1484 break;
1485
1486 case PCRE_NEWLINE_ANY:
1487 fprintf(outfile, "Forced newline sequence: ANY\n");
1488 break;
1489
1490 default:
1491 break;
1492 }
1493
1494 if (first_char == -1)
1495 {
1496 fprintf(outfile, "First char at start or follows newline\n");
1497 }
1498 else if (first_char < 0)
1499 {
1500 fprintf(outfile, "No first char\n");
1501 }
1502 else
1503 {
1504 int ch = first_char & 255;
1505 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1506 "" : " (caseless)";
1507 if (PRINTHEX(ch))
1508 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1509 else
1510 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1511 }
1512
1513 if (need_char < 0)
1514 {
1515 fprintf(outfile, "No need char\n");
1516 }
1517 else
1518 {
1519 int ch = need_char & 255;
1520 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1521 "" : " (caseless)";
1522 if (PRINTHEX(ch))
1523 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1524 else
1525 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1526 }
1527
1528 /* Don't output study size; at present it is in any case a fixed
1529 value, but it varies, depending on the computer architecture, and
1530 so messes up the test suite. (And with the /F option, it might be
1531 flipped.) */
1532
1533 if (do_study)
1534 {
1535 if (extra == NULL)
1536 fprintf(outfile, "Study returned NULL\n");
1537 else
1538 {
1539 uschar *start_bits = NULL;
1540 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1541
1542 if (start_bits == NULL)
1543 fprintf(outfile, "No starting byte set\n");
1544 else
1545 {
1546 int i;
1547 int c = 24;
1548 fprintf(outfile, "Starting byte set: ");
1549 for (i = 0; i < 256; i++)
1550 {
1551 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1552 {
1553 if (c > 75)
1554 {
1555 fprintf(outfile, "\n ");
1556 c = 2;
1557 }
1558 if (PRINTHEX(i) && i != ' ')
1559 {
1560 fprintf(outfile, "%c ", i);
1561 c += 2;
1562 }
1563 else
1564 {
1565 fprintf(outfile, "\\x%02x ", i);
1566 c += 5;
1567 }
1568 }
1569 }
1570 fprintf(outfile, "\n");
1571 }
1572 }
1573 }
1574 }
1575
1576 /* If the '>' option was present, we write out the regex to a file, and
1577 that is all. The first 8 bytes of the file are the regex length and then
1578 the study length, in big-endian order. */
1579
1580 if (to_file != NULL)
1581 {
1582 FILE *f = fopen((char *)to_file, "wb");
1583 if (f == NULL)
1584 {
1585 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1586 }
1587 else
1588 {
1589 uschar sbuf[8];
1590 sbuf[0] = (true_size >> 24) & 255;
1591 sbuf[1] = (true_size >> 16) & 255;
1592 sbuf[2] = (true_size >> 8) & 255;
1593 sbuf[3] = (true_size) & 255;
1594
1595 sbuf[4] = (true_study_size >> 24) & 255;
1596 sbuf[5] = (true_study_size >> 16) & 255;
1597 sbuf[6] = (true_study_size >> 8) & 255;
1598 sbuf[7] = (true_study_size) & 255;
1599
1600 if (fwrite(sbuf, 1, 8, f) < 8 ||
1601 fwrite(re, 1, true_size, f) < true_size)
1602 {
1603 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1604 }
1605 else
1606 {
1607 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1608 if (extra != NULL)
1609 {
1610 if (fwrite(extra->study_data, 1, true_study_size, f) <
1611 true_study_size)
1612 {
1613 fprintf(outfile, "Write error on %s: %s\n", to_file,
1614 strerror(errno));
1615 }
1616 else fprintf(outfile, "Study data written to %s\n", to_file);
1617
1618 }
1619 }
1620 fclose(f);
1621 }
1622
1623 new_free(re);
1624 if (extra != NULL) new_free(extra);
1625 if (tables != NULL) new_free((void *)tables);
1626 continue; /* With next regex */
1627 }
1628 } /* End of non-POSIX compile */
1629
1630 /* Read data lines and test them */
1631
1632 for (;;)
1633 {
1634 uschar *q;
1635 uschar *bptr;
1636 int *use_offsets = offsets;
1637 int use_size_offsets = size_offsets;
1638 int callout_data = 0;
1639 int callout_data_set = 0;
1640 int count, c;
1641 int copystrings = 0;
1642 int find_match_limit = 0;
1643 int getstrings = 0;
1644 int getlist = 0;
1645 int gmatched = 0;
1646 int start_offset = 0;
1647 int g_notempty = 0;
1648 int use_dfa = 0;
1649
1650 options = 0;
1651
1652 *copynames = 0;
1653 *getnames = 0;
1654
1655 copynamesptr = copynames;
1656 getnamesptr = getnames;
1657
1658 pcre_callout = callout;
1659 first_callout = 1;
1660 callout_extra = 0;
1661 callout_count = 0;
1662 callout_fail_count = 999999;
1663 callout_fail_id = -1;
1664 show_malloc = 0;
1665
1666 if (extra != NULL) extra->flags &=
1667 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1668
1669 len = 0;
1670 for (;;)
1671 {
1672 if (infile == stdin) printf("data> ");
1673 if (extend_inputline(infile, buffer + len) == NULL)
1674 {
1675 if (len > 0) break;
1676 done = 1;
1677 goto CONTINUE;
1678 }
1679 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1680 len = (int)strlen((char *)buffer);
1681 if (buffer[len-1] == '\n') break;
1682 }
1683
1684 while (len > 0 && isspace(buffer[len-1])) len--;
1685 buffer[len] = 0;
1686 if (len == 0) break;
1687
1688 p = buffer;
1689 while (isspace(*p)) p++;
1690
1691 bptr = q = dbuffer;
1692 while ((c = *p++) != 0)
1693 {
1694 int i = 0;
1695 int n = 0;
1696
1697 if (c == '\\') switch ((c = *p++))
1698 {
1699 case 'a': c = 7; break;
1700 case 'b': c = '\b'; break;
1701 case 'e': c = 27; break;
1702 case 'f': c = '\f'; break;
1703 case 'n': c = '\n'; break;
1704 case 'r': c = '\r'; break;
1705 case 't': c = '\t'; break;
1706 case 'v': c = '\v'; break;
1707
1708 case '0': case '1': case '2': case '3':
1709 case '4': case '5': case '6': case '7':
1710 c -= '0';
1711 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1712 c = c * 8 + *p++ - '0';
1713
1714 #if !defined NOUTF8
1715 if (use_utf8 && c > 255)
1716 {
1717 unsigned char buff8[8];
1718 int ii, utn;
1719 utn = ord2utf8(c, buff8);
1720 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1721 c = buff8[ii]; /* Last byte */
1722 }
1723 #endif
1724 break;
1725
1726 case 'x':
1727
1728 /* Handle \x{..} specially - new Perl thing for utf8 */
1729
1730 #if !defined NOUTF8
1731 if (*p == '{')
1732 {
1733 unsigned char *pt = p;
1734 c = 0;
1735 while (isxdigit(*(++pt)))
1736 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1737 if (*pt == '}')
1738 {
1739 unsigned char buff8[8];
1740 int ii, utn;
1741 utn = ord2utf8(c, buff8);
1742 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1743 c = buff8[ii]; /* Last byte */
1744 p = pt + 1;
1745 break;
1746 }
1747 /* Not correct form; fall through */
1748 }
1749 #endif
1750
1751 /* Ordinary \x */
1752
1753 c = 0;
1754 while (i++ < 2 && isxdigit(*p))
1755 {
1756 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1757 p++;
1758 }
1759 break;
1760
1761 case 0: /* \ followed by EOF allows for an empty line */
1762 p--;
1763 continue;
1764
1765 case '>':
1766 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1767 continue;
1768
1769 case 'A': /* Option setting */
1770 options |= PCRE_ANCHORED;
1771 continue;
1772
1773 case 'B':
1774 options |= PCRE_NOTBOL;
1775 continue;
1776
1777 case 'C':
1778 if (isdigit(*p)) /* Set copy string */
1779 {
1780 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1781 copystrings |= 1 << n;
1782 }
1783 else if (isalnum(*p))
1784 {
1785 uschar *npp = copynamesptr;
1786 while (isalnum(*p)) *npp++ = *p++;
1787 *npp++ = 0;
1788 *npp = 0;
1789 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1790 if (n < 0)
1791 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1792 copynamesptr = npp;
1793 }
1794 else if (*p == '+')
1795 {
1796 callout_extra = 1;
1797 p++;
1798 }
1799 else if (*p == '-')
1800 {
1801 pcre_callout = NULL;
1802 p++;
1803 }
1804 else if (*p == '!')
1805 {
1806 callout_fail_id = 0;
1807 p++;
1808 while(isdigit(*p))
1809 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1810 callout_fail_count = 0;
1811 if (*p == '!')
1812 {
1813 p++;
1814 while(isdigit(*p))
1815 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1816 }
1817 }
1818 else if (*p == '*')
1819 {
1820 int sign = 1;
1821 callout_data = 0;
1822 if (*(++p) == '-') { sign = -1; p++; }
1823 while(isdigit(*p))
1824 callout_data = callout_data * 10 + *p++ - '0';
1825 callout_data *= sign;
1826 callout_data_set = 1;
1827 }
1828 continue;
1829
1830 #if !defined NODFA
1831 case 'D':
1832 #if !defined NOPOSIX
1833 if (posix || do_posix)
1834 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1835 else
1836 #endif
1837 use_dfa = 1;
1838 continue;
1839
1840 case 'F':
1841 options |= PCRE_DFA_SHORTEST;
1842 continue;
1843 #endif
1844
1845 case 'G':
1846 if (isdigit(*p))
1847 {
1848 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1849 getstrings |= 1 << n;
1850 }
1851 else if (isalnum(*p))
1852 {
1853 uschar *npp = getnamesptr;
1854 while (isalnum(*p)) *npp++ = *p++;
1855 *npp++ = 0;
1856 *npp = 0;
1857 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1858 if (n < 0)
1859 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1860 getnamesptr = npp;
1861 }
1862 continue;
1863
1864 case 'L':
1865 getlist = 1;
1866 continue;
1867
1868 case 'M':
1869 find_match_limit = 1;
1870 continue;
1871
1872 case 'N':
1873 options |= PCRE_NOTEMPTY;
1874 continue;
1875
1876 case 'O':
1877 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1878 if (n > size_offsets_max)
1879 {
1880 size_offsets_max = n;
1881 free(offsets);
1882 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1883 if (offsets == NULL)
1884 {
1885 printf("** Failed to get %d bytes of memory for offsets vector\n",
1886 (int)(size_offsets_max * sizeof(int)));
1887 yield = 1;
1888 goto EXIT;
1889 }
1890 }
1891 use_size_offsets = n;
1892 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1893 continue;
1894
1895 case 'P':
1896 options |= PCRE_PARTIAL;
1897 continue;
1898
1899 case 'Q':
1900 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1901 if (extra == NULL)
1902 {
1903 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1904 extra->flags = 0;
1905 }
1906 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1907 extra->match_limit_recursion = n;
1908 continue;
1909
1910 case 'q':
1911 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1912 if (extra == NULL)
1913 {
1914 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1915 extra->flags = 0;
1916 }
1917 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1918 extra->match_limit = n;
1919 continue;
1920
1921 #if !defined NODFA
1922 case 'R':
1923 options |= PCRE_DFA_RESTART;
1924 continue;
1925 #endif
1926
1927 case 'S':
1928 show_malloc = 1;
1929 continue;
1930
1931 case 'Z':
1932 options |= PCRE_NOTEOL;
1933 continue;
1934
1935 case '?':
1936 options |= PCRE_NO_UTF8_CHECK;
1937 continue;
1938
1939 case '<':
1940 {
1941 int x = check_newline(p, outfile);
1942 if (x == 0) goto NEXT_DATA;
1943 options |= x;
1944 while (*p++ != '>');
1945 }
1946 continue;
1947 }
1948 *q++ = c;
1949 }
1950 *q = 0;
1951 len = q - dbuffer;
1952
1953 if ((all_use_dfa || use_dfa) && find_match_limit)
1954 {
1955 printf("**Match limit not relevant for DFA matching: ignored\n");
1956 find_match_limit = 0;
1957 }
1958
1959 /* Handle matching via the POSIX interface, which does not
1960 support timing or playing with the match limit or callout data. */
1961
1962 #if !defined NOPOSIX
1963 if (posix || do_posix)
1964 {
1965 int rc;
1966 int eflags = 0;
1967 regmatch_t *pmatch = NULL;
1968 if (use_size_offsets > 0)
1969 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1970 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1971 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1972
1973 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1974
1975 if (rc != 0)
1976 {
1977 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1978 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1979 }
1980 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1981 != 0)
1982 {
1983 fprintf(outfile, "Matched with REG_NOSUB\n");
1984 }
1985 else
1986 {
1987 size_t i;
1988 for (i = 0; i < (size_t)use_size_offsets; i++)
1989 {
1990 if (pmatch[i].rm_so >= 0)
1991 {
1992 fprintf(outfile, "%2d: ", (int)i);
1993 (void)pchars(dbuffer + pmatch[i].rm_so,
1994 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1995 fprintf(outfile, "\n");
1996 if (i == 0 && do_showrest)
1997 {
1998 fprintf(outfile, " 0+ ");
1999 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2000 outfile);
2001 fprintf(outfile, "\n");
2002 }
2003 }
2004 }
2005 }
2006 free(pmatch);
2007 }
2008
2009 /* Handle matching via the native interface - repeats for /g and /G */
2010
2011 else
2012 #endif /* !defined NOPOSIX */
2013
2014 for (;; gmatched++) /* Loop for /g or /G */
2015 {
2016 if (timeitm > 0)
2017 {
2018 register int i;
2019 clock_t time_taken;
2020 clock_t start_time = clock();
2021
2022 #if !defined NODFA
2023 if (all_use_dfa || use_dfa)
2024 {
2025 int workspace[1000];
2026 for (i = 0; i < timeitm; i++)
2027 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2028 options | g_notempty, use_offsets, use_size_offsets, workspace,
2029 sizeof(workspace)/sizeof(int));
2030 }
2031 else
2032 #endif
2033
2034 for (i = 0; i < timeitm; i++)
2035 count = pcre_exec(re, extra, (char *)bptr, len,
2036 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2037
2038 time_taken = clock() - start_time;
2039 fprintf(outfile, "Execute time %.4f milliseconds\n",
2040 (((double)time_taken * 1000.0) / (double)timeitm) /
2041 (double)CLOCKS_PER_SEC);
2042 }
2043
2044 /* If find_match_limit is set, we want to do repeated matches with
2045 varying limits in order to find the minimum value for the match limit and
2046 for the recursion limit. */
2047
2048 if (find_match_limit)
2049 {
2050 if (extra == NULL)
2051 {
2052 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2053 extra->flags = 0;
2054 }
2055
2056 (void)check_match_limit(re, extra, bptr, len, start_offset,
2057 options|g_notempty, use_offsets, use_size_offsets,
2058 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2059 PCRE_ERROR_MATCHLIMIT, "match()");
2060
2061 count = check_match_limit(re, extra, bptr, len, start_offset,
2062 options|g_notempty, use_offsets, use_size_offsets,
2063 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2064 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2065 }
2066
2067 /* If callout_data is set, use the interface with additional data */
2068
2069 else if (callout_data_set)
2070 {
2071 if (extra == NULL)
2072 {
2073 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2074 extra->flags = 0;
2075 }
2076 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2077 extra->callout_data = &callout_data;
2078 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2079 options | g_notempty, use_offsets, use_size_offsets);
2080 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2081 }
2082
2083 /* The normal case is just to do the match once, with the default
2084 value of match_limit. */
2085
2086 #if !defined NODFA
2087 else if (all_use_dfa || use_dfa)
2088 {
2089 int workspace[1000];
2090 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2091 options | g_notempty, use_offsets, use_size_offsets, workspace,
2092 sizeof(workspace)/sizeof(int));
2093 if (count == 0)
2094 {
2095 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2096 count = use_size_offsets/2;
2097 }
2098 }
2099 #endif
2100
2101 else
2102 {
2103 count = pcre_exec(re, extra, (char *)bptr, len,
2104 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2105 if (count == 0)
2106 {
2107 fprintf(outfile, "Matched, but too many substrings\n");
2108 count = use_size_offsets/3;
2109 }
2110 }
2111
2112 /* Matched */
2113
2114 if (count >= 0)
2115 {
2116 int i, maxcount;
2117
2118 #if !defined NODFA
2119 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2120 #endif
2121 maxcount = use_size_offsets/3;
2122
2123 /* This is a check against a lunatic return value. */
2124
2125 if (count > maxcount)
2126 {
2127 fprintf(outfile,
2128 "** PCRE error: returned count %d is too big for offset size %d\n",
2129 count, use_size_offsets);
2130 count = use_size_offsets/3;
2131 if (do_g || do_G)
2132 {
2133 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2134 do_g = do_G = FALSE; /* Break g/G loop */
2135 }
2136 }
2137
2138 for (i = 0; i < count * 2; i += 2)
2139 {
2140 if (use_offsets[i] < 0)
2141 fprintf(outfile, "%2d: <unset>\n", i/2);
2142 else
2143 {
2144 fprintf(outfile, "%2d: ", i/2);
2145 (void)pchars(bptr + use_offsets[i],
2146 use_offsets[i+1] - use_offsets[i], outfile);
2147 fprintf(outfile, "\n");
2148 if (i == 0)
2149 {
2150 if (do_showrest)
2151 {
2152 fprintf(outfile, " 0+ ");
2153 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2154 outfile);
2155 fprintf(outfile, "\n");
2156 }
2157 }
2158 }
2159 }
2160
2161 for (i = 0; i < 32; i++)
2162 {
2163 if ((copystrings & (1 << i)) != 0)
2164 {
2165 char copybuffer[256];
2166 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2167 i, copybuffer, sizeof(copybuffer));
2168 if (rc < 0)
2169 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2170 else
2171 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2172 }
2173 }
2174
2175 for (copynamesptr = copynames;
2176 *copynamesptr != 0;
2177 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2178 {
2179 char copybuffer[256];
2180 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2181 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2182 if (rc < 0)
2183 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2184 else
2185 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2186 }
2187
2188 for (i = 0; i < 32; i++)
2189 {
2190 if ((getstrings & (1 << i)) != 0)
2191 {
2192 const char *substring;
2193 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2194 i, &substring);
2195 if (rc < 0)
2196 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2197 else
2198 {
2199 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2200 pcre_free_substring(substring);
2201 }
2202 }
2203 }
2204
2205 for (getnamesptr = getnames;
2206 *getnamesptr != 0;
2207 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2208 {
2209 const char *substring;
2210 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2211 count, (char *)getnamesptr, &substring);
2212 if (rc < 0)
2213 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2214 else
2215 {
2216 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2217 pcre_free_substring(substring);
2218 }
2219 }
2220
2221 if (getlist)
2222 {
2223 const char **stringlist;
2224 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2225 &stringlist);
2226 if (rc < 0)
2227 fprintf(outfile, "get substring list failed %d\n", rc);
2228 else
2229 {
2230 for (i = 0; i < count; i++)
2231 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2232 if (stringlist[i] != NULL)
2233 fprintf(outfile, "string list not terminated by NULL\n");
2234 /* free((void *)stringlist); */
2235 pcre_free_substring_list(stringlist);
2236 }
2237 }
2238 }
2239
2240 /* There was a partial match */
2241
2242 else if (count == PCRE_ERROR_PARTIAL)
2243 {
2244 fprintf(outfile, "Partial match");
2245 #if !defined NODFA
2246 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2247 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2248 bptr + use_offsets[0]);
2249 #endif
2250 fprintf(outfile, "\n");
2251 break; /* Out of the /g loop */
2252 }
2253
2254 /* Failed to match. If this is a /g or /G loop and we previously set
2255 g_notempty after a null match, this is not necessarily the end. We want
2256 to advance the start offset, and continue. We won't be at the end of the
2257 string - that was checked before setting g_notempty.
2258
2259 Complication arises in the case when the newline option is "any" or
2260 "anycrlf". If the previous match was at the end of a line terminated by
2261 CRLF, an advance of one character just passes the \r, whereas we should
2262 prefer the longer newline sequence, as does the code in pcre_exec().
2263 Fudge the offset value to achieve this.
2264
2265 Otherwise, in the case of UTF-8 matching, the advance must be one
2266 character, not one byte. */
2267
2268 else
2269 {
2270 if (g_notempty != 0)
2271 {
2272 int onechar = 1;
2273 unsigned int obits = ((real_pcre *)re)->options;
2274 use_offsets[0] = start_offset;
2275 if ((obits & PCRE_NEWLINE_BITS) == 0)
2276 {
2277 int d;
2278 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2279 obits = (d == '\r')? PCRE_NEWLINE_CR :
2280 (d == '\n')? PCRE_NEWLINE_LF :
2281 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2282 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2283 (d == -1)? PCRE_NEWLINE_ANY : 0;
2284 }
2285 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2286 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2287 &&
2288 start_offset < len - 1 &&
2289 bptr[start_offset] == '\r' &&
2290 bptr[start_offset+1] == '\n')
2291 onechar++;
2292 else if (use_utf8)
2293 {
2294 while (start_offset + onechar < len)
2295 {
2296 int tb = bptr[start_offset+onechar];
2297 if (tb <= 127) break;
2298 tb &= 0xc0;
2299 if (tb != 0 && tb != 0xc0) onechar++;
2300 }
2301 }
2302 use_offsets[1] = start_offset + onechar;
2303 }
2304 else
2305 {
2306 if (count == PCRE_ERROR_NOMATCH)
2307 {
2308 if (gmatched == 0) fprintf(outfile, "No match\n");
2309 }
2310 else fprintf(outfile, "Error %d\n", count);
2311 break; /* Out of the /g loop */
2312 }
2313 }
2314
2315 /* If not /g or /G we are done */
2316
2317 if (!do_g && !do_G) break;
2318
2319 /* If we have matched an empty string, first check to see if we are at
2320 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2321 what Perl's /g options does. This turns out to be rather cunning. First
2322 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2323 same point. If this fails (picked up above) we advance to the next
2324 character. */
2325
2326 g_notempty = 0;
2327
2328 if (use_offsets[0] == use_offsets[1])
2329 {
2330 if (use_offsets[0] == len) break;
2331 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2332 }
2333
2334 /* For /g, update the start offset, leaving the rest alone */
2335
2336 if (do_g) start_offset = use_offsets[1];
2337
2338 /* For /G, update the pointer and length */
2339
2340 else
2341 {
2342 bptr += use_offsets[1];
2343 len -= use_offsets[1];
2344 }
2345 } /* End of loop for /g and /G */
2346
2347 NEXT_DATA: continue;
2348 } /* End of loop for data lines */
2349
2350 CONTINUE:
2351
2352 #if !defined NOPOSIX
2353 if (posix || do_posix) regfree(&preg);
2354 #endif
2355
2356 if (re != NULL) new_free(re);
2357 if (extra != NULL) new_free(extra);
2358 if (tables != NULL)
2359 {
2360 new_free((void *)tables);
2361 setlocale(LC_CTYPE, "C");
2362 locale_set = 0;
2363 }
2364 }
2365
2366 if (infile == stdin) fprintf(outfile, "\n");
2367
2368 EXIT:
2369
2370 if (infile != NULL && infile != stdin) fclose(infile);
2371 if (outfile != NULL && outfile != stdout) fclose(outfile);
2372
2373 free(buffer);
2374 free(dbuffer);
2375 free(pbuffer);
2376 free(offsets);
2377
2378 return yield;
2379 }
2380
2381 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12