/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 240 - (show annotations) (download)
Tue Sep 11 15:47:20 2007 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 69914 byte(s)
Refactoring to reduce the number of relocations in a shared library.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_utt_names utt_names
98 #define _pcre_OP_lengths OP_lengths
99
100 #include "pcre_tables.c"
101
102 /* We also need the pcre_printint() function for printing out compiled
103 patterns. This function is in a separate file so that it can be included in
104 pcre_compile.c when that module is compiled with debugging enabled.
105
106 The definition of the macro PRINTABLE, which determines whether to print an
107 output character as-is or as a hex value when showing compiled patterns, is
108 contained in this file. We uses it here also, in cases when the locale has not
109 been explicitly changed, so as to get consistent output from systems that
110 differ in their output from isprint() even in the "C" locale. */
111
112 #include "pcre_printint.src"
113
114 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
115
116
117 /* It is possible to compile this test program without including support for
118 testing the POSIX interface, though this is not available via the standard
119 Makefile. */
120
121 #if !defined NOPOSIX
122 #include "pcreposix.h"
123 #endif
124
125 /* It is also possible, for the benefit of the version currently imported into
126 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
127 interface to the DFA matcher (NODFA), and without the doublecheck of the old
128 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
129 UTF8 support if PCRE is built without it. */
130
131 #ifndef SUPPORT_UTF8
132 #ifndef NOUTF8
133 #define NOUTF8
134 #endif
135 #endif
136
137
138 /* Other parameters */
139
140 #ifndef CLOCKS_PER_SEC
141 #ifdef CLK_TCK
142 #define CLOCKS_PER_SEC CLK_TCK
143 #else
144 #define CLOCKS_PER_SEC 100
145 #endif
146 #endif
147
148 /* This is the default loop count for timing. */
149
150 #define LOOPREPEAT 500000
151
152 /* Static variables */
153
154 static FILE *outfile;
155 static int log_store = 0;
156 static int callout_count;
157 static int callout_extra;
158 static int callout_fail_count;
159 static int callout_fail_id;
160 static int debug_lengths;
161 static int first_callout;
162 static int locale_set = 0;
163 static int show_malloc;
164 static int use_utf8;
165 static size_t gotten_store;
166
167 /* The buffers grow automatically if very long input lines are encountered. */
168
169 static int buffer_size = 50000;
170 static uschar *buffer = NULL;
171 static uschar *dbuffer = NULL;
172 static uschar *pbuffer = NULL;
173
174
175
176 /*************************************************
177 * Read or extend an input line *
178 *************************************************/
179
180 /* Input lines are read into buffer, but both patterns and data lines can be
181 continued over multiple input lines. In addition, if the buffer fills up, we
182 want to automatically expand it so as to be able to handle extremely large
183 lines that are needed for certain stress tests. When the input buffer is
184 expanded, the other two buffers must also be expanded likewise, and the
185 contents of pbuffer, which are a copy of the input for callouts, must be
186 preserved (for when expansion happens for a data line). This is not the most
187 optimal way of handling this, but hey, this is just a test program!
188
189 Arguments:
190 f the file to read
191 start where in buffer to start (this *must* be within buffer)
192
193 Returns: pointer to the start of new data
194 could be a copy of start, or could be moved
195 NULL if no data read and EOF reached
196 */
197
198 static uschar *
199 extend_inputline(FILE *f, uschar *start)
200 {
201 uschar *here = start;
202
203 for (;;)
204 {
205 int rlen = buffer_size - (here - buffer);
206
207 if (rlen > 1000)
208 {
209 int dlen;
210 if (fgets((char *)here, rlen, f) == NULL)
211 return (here == start)? NULL : start;
212 dlen = (int)strlen((char *)here);
213 if (dlen > 0 && here[dlen - 1] == '\n') return start;
214 here += dlen;
215 }
216
217 else
218 {
219 int new_buffer_size = 2*buffer_size;
220 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
221 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
222 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
223
224 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
225 {
226 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
227 exit(1);
228 }
229
230 memcpy(new_buffer, buffer, buffer_size);
231 memcpy(new_pbuffer, pbuffer, buffer_size);
232
233 buffer_size = new_buffer_size;
234
235 start = new_buffer + (start - buffer);
236 here = new_buffer + (here - buffer);
237
238 free(buffer);
239 free(dbuffer);
240 free(pbuffer);
241
242 buffer = new_buffer;
243 dbuffer = new_dbuffer;
244 pbuffer = new_pbuffer;
245 }
246 }
247
248 return NULL; /* Control never gets here */
249 }
250
251
252
253
254
255
256
257 /*************************************************
258 * Read number from string *
259 *************************************************/
260
261 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
262 around with conditional compilation, just do the job by hand. It is only used
263 for unpicking arguments, so just keep it simple.
264
265 Arguments:
266 str string to be converted
267 endptr where to put the end pointer
268
269 Returns: the unsigned long
270 */
271
272 static int
273 get_value(unsigned char *str, unsigned char **endptr)
274 {
275 int result = 0;
276 while(*str != 0 && isspace(*str)) str++;
277 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
278 *endptr = str;
279 return(result);
280 }
281
282
283
284
285 /*************************************************
286 * Convert UTF-8 string to value *
287 *************************************************/
288
289 /* This function takes one or more bytes that represents a UTF-8 character,
290 and returns the value of the character.
291
292 Argument:
293 utf8bytes a pointer to the byte vector
294 vptr a pointer to an int to receive the value
295
296 Returns: > 0 => the number of bytes consumed
297 -6 to 0 => malformed UTF-8 character at offset = (-return)
298 */
299
300 #if !defined NOUTF8
301
302 static int
303 utf82ord(unsigned char *utf8bytes, int *vptr)
304 {
305 int c = *utf8bytes++;
306 int d = c;
307 int i, j, s;
308
309 for (i = -1; i < 6; i++) /* i is number of additional bytes */
310 {
311 if ((d & 0x80) == 0) break;
312 d <<= 1;
313 }
314
315 if (i == -1) { *vptr = c; return 1; } /* ascii character */
316 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
317
318 /* i now has a value in the range 1-5 */
319
320 s = 6*i;
321 d = (c & utf8_table3[i]) << s;
322
323 for (j = 0; j < i; j++)
324 {
325 c = *utf8bytes++;
326 if ((c & 0xc0) != 0x80) return -(j+1);
327 s -= 6;
328 d |= (c & 0x3f) << s;
329 }
330
331 /* Check that encoding was the correct unique one */
332
333 for (j = 0; j < utf8_table1_size; j++)
334 if (d <= utf8_table1[j]) break;
335 if (j != i) return -(i+1);
336
337 /* Valid value */
338
339 *vptr = d;
340 return i+1;
341 }
342
343 #endif
344
345
346
347 /*************************************************
348 * Convert character value to UTF-8 *
349 *************************************************/
350
351 /* This function takes an integer value in the range 0 - 0x7fffffff
352 and encodes it as a UTF-8 character in 0 to 6 bytes.
353
354 Arguments:
355 cvalue the character value
356 utf8bytes pointer to buffer for result - at least 6 bytes long
357
358 Returns: number of characters placed in the buffer
359 */
360
361 #if !defined NOUTF8
362
363 static int
364 ord2utf8(int cvalue, uschar *utf8bytes)
365 {
366 register int i, j;
367 for (i = 0; i < utf8_table1_size; i++)
368 if (cvalue <= utf8_table1[i]) break;
369 utf8bytes += i;
370 for (j = i; j > 0; j--)
371 {
372 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
373 cvalue >>= 6;
374 }
375 *utf8bytes = utf8_table2[i] | cvalue;
376 return i + 1;
377 }
378
379 #endif
380
381
382
383 /*************************************************
384 * Print character string *
385 *************************************************/
386
387 /* Character string printing function. Must handle UTF-8 strings in utf8
388 mode. Yields number of characters printed. If handed a NULL file, just counts
389 chars without printing. */
390
391 static int pchars(unsigned char *p, int length, FILE *f)
392 {
393 int c = 0;
394 int yield = 0;
395
396 while (length-- > 0)
397 {
398 #if !defined NOUTF8
399 if (use_utf8)
400 {
401 int rc = utf82ord(p, &c);
402
403 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
404 {
405 length -= rc - 1;
406 p += rc;
407 if (PRINTHEX(c))
408 {
409 if (f != NULL) fprintf(f, "%c", c);
410 yield++;
411 }
412 else
413 {
414 int n = 4;
415 if (f != NULL) fprintf(f, "\\x{%02x}", c);
416 yield += (n <= 0x000000ff)? 2 :
417 (n <= 0x00000fff)? 3 :
418 (n <= 0x0000ffff)? 4 :
419 (n <= 0x000fffff)? 5 : 6;
420 }
421 continue;
422 }
423 }
424 #endif
425
426 /* Not UTF-8, or malformed UTF-8 */
427
428 c = *p++;
429 if (PRINTHEX(c))
430 {
431 if (f != NULL) fprintf(f, "%c", c);
432 yield++;
433 }
434 else
435 {
436 if (f != NULL) fprintf(f, "\\x%02x", c);
437 yield += 4;
438 }
439 }
440
441 return yield;
442 }
443
444
445
446 /*************************************************
447 * Callout function *
448 *************************************************/
449
450 /* Called from PCRE as a result of the (?C) item. We print out where we are in
451 the match. Yield zero unless more callouts than the fail count, or the callout
452 data is not zero. */
453
454 static int callout(pcre_callout_block *cb)
455 {
456 FILE *f = (first_callout | callout_extra)? outfile : NULL;
457 int i, pre_start, post_start, subject_length;
458
459 if (callout_extra)
460 {
461 fprintf(f, "Callout %d: last capture = %d\n",
462 cb->callout_number, cb->capture_last);
463
464 for (i = 0; i < cb->capture_top * 2; i += 2)
465 {
466 if (cb->offset_vector[i] < 0)
467 fprintf(f, "%2d: <unset>\n", i/2);
468 else
469 {
470 fprintf(f, "%2d: ", i/2);
471 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
472 cb->offset_vector[i+1] - cb->offset_vector[i], f);
473 fprintf(f, "\n");
474 }
475 }
476 }
477
478 /* Re-print the subject in canonical form, the first time or if giving full
479 datails. On subsequent calls in the same match, we use pchars just to find the
480 printed lengths of the substrings. */
481
482 if (f != NULL) fprintf(f, "--->");
483
484 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
485 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
486 cb->current_position - cb->start_match, f);
487
488 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
489
490 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
491 cb->subject_length - cb->current_position, f);
492
493 if (f != NULL) fprintf(f, "\n");
494
495 /* Always print appropriate indicators, with callout number if not already
496 shown. For automatic callouts, show the pattern offset. */
497
498 if (cb->callout_number == 255)
499 {
500 fprintf(outfile, "%+3d ", cb->pattern_position);
501 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
502 }
503 else
504 {
505 if (callout_extra) fprintf(outfile, " ");
506 else fprintf(outfile, "%3d ", cb->callout_number);
507 }
508
509 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
510 fprintf(outfile, "^");
511
512 if (post_start > 0)
513 {
514 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
515 fprintf(outfile, "^");
516 }
517
518 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
519 fprintf(outfile, " ");
520
521 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
522 pbuffer + cb->pattern_position);
523
524 fprintf(outfile, "\n");
525 first_callout = 0;
526
527 if (cb->callout_data != NULL)
528 {
529 int callout_data = *((int *)(cb->callout_data));
530 if (callout_data != 0)
531 {
532 fprintf(outfile, "Callout data = %d\n", callout_data);
533 return callout_data;
534 }
535 }
536
537 return (cb->callout_number != callout_fail_id)? 0 :
538 (++callout_count >= callout_fail_count)? 1 : 0;
539 }
540
541
542 /*************************************************
543 * Local malloc functions *
544 *************************************************/
545
546 /* Alternative malloc function, to test functionality and show the size of the
547 compiled re. */
548
549 static void *new_malloc(size_t size)
550 {
551 void *block = malloc(size);
552 gotten_store = size;
553 if (show_malloc)
554 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
555 return block;
556 }
557
558 static void new_free(void *block)
559 {
560 if (show_malloc)
561 fprintf(outfile, "free %p\n", block);
562 free(block);
563 }
564
565
566 /* For recursion malloc/free, to test stacking calls */
567
568 static void *stack_malloc(size_t size)
569 {
570 void *block = malloc(size);
571 if (show_malloc)
572 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
573 return block;
574 }
575
576 static void stack_free(void *block)
577 {
578 if (show_malloc)
579 fprintf(outfile, "stack_free %p\n", block);
580 free(block);
581 }
582
583
584 /*************************************************
585 * Call pcre_fullinfo() *
586 *************************************************/
587
588 /* Get one piece of information from the pcre_fullinfo() function */
589
590 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
591 {
592 int rc;
593 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
594 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
595 }
596
597
598
599 /*************************************************
600 * Byte flipping function *
601 *************************************************/
602
603 static unsigned long int
604 byteflip(unsigned long int value, int n)
605 {
606 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
607 return ((value & 0x000000ff) << 24) |
608 ((value & 0x0000ff00) << 8) |
609 ((value & 0x00ff0000) >> 8) |
610 ((value & 0xff000000) >> 24);
611 }
612
613
614
615
616 /*************************************************
617 * Check match or recursion limit *
618 *************************************************/
619
620 static int
621 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
622 int start_offset, int options, int *use_offsets, int use_size_offsets,
623 int flag, unsigned long int *limit, int errnumber, const char *msg)
624 {
625 int count;
626 int min = 0;
627 int mid = 64;
628 int max = -1;
629
630 extra->flags |= flag;
631
632 for (;;)
633 {
634 *limit = mid;
635
636 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
637 use_offsets, use_size_offsets);
638
639 if (count == errnumber)
640 {
641 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
642 min = mid;
643 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
644 }
645
646 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
647 count == PCRE_ERROR_PARTIAL)
648 {
649 if (mid == min + 1)
650 {
651 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
652 break;
653 }
654 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
655 max = mid;
656 mid = (min + mid)/2;
657 }
658 else break; /* Some other error */
659 }
660
661 extra->flags &= ~flag;
662 return count;
663 }
664
665
666
667 /*************************************************
668 * Case-independent strncmp() function *
669 *************************************************/
670
671 /*
672 Arguments:
673 s first string
674 t second string
675 n number of characters to compare
676
677 Returns: < 0, = 0, or > 0, according to the comparison
678 */
679
680 static int
681 strncmpic(uschar *s, uschar *t, int n)
682 {
683 while (n--)
684 {
685 int c = tolower(*s++) - tolower(*t++);
686 if (c) return c;
687 }
688 return 0;
689 }
690
691
692
693 /*************************************************
694 * Check newline indicator *
695 *************************************************/
696
697 /* This is used both at compile and run-time to check for <xxx> escapes, where
698 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
699 no match.
700
701 Arguments:
702 p points after the leading '<'
703 f file for error message
704
705 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
706 */
707
708 static int
709 check_newline(uschar *p, FILE *f)
710 {
711 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
712 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
713 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
714 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
715 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
716 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
717 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
718 fprintf(f, "Unknown newline type at: <%s\n", p);
719 return 0;
720 }
721
722
723
724 /*************************************************
725 * Usage function *
726 *************************************************/
727
728 static void
729 usage(void)
730 {
731 printf("Usage: pcretest [options] [<input> [<output>]]\n");
732 printf(" -b show compiled code (bytecode)\n");
733 printf(" -C show PCRE compile-time options and exit\n");
734 printf(" -d debug: show compiled code and information (-b and -i)\n");
735 #if !defined NODFA
736 printf(" -dfa force DFA matching for all subjects\n");
737 #endif
738 printf(" -help show usage information\n");
739 printf(" -i show information about compiled patterns\n"
740 " -m output memory used information\n"
741 " -o <n> set size of offsets vector to <n>\n");
742 #if !defined NOPOSIX
743 printf(" -p use POSIX interface\n");
744 #endif
745 printf(" -q quiet: do not output PCRE version number at start\n");
746 printf(" -S <n> set stack size to <n> megabytes\n");
747 printf(" -s output store (memory) used information\n"
748 " -t time compilation and execution\n");
749 printf(" -t <n> time compilation and execution, repeating <n> times\n");
750 printf(" -tm time execution (matching) only\n");
751 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
752 }
753
754
755
756 /*************************************************
757 * Main Program *
758 *************************************************/
759
760 /* Read lines from named file or stdin and write to named file or stdout; lines
761 consist of a regular expression, in delimiters and optionally followed by
762 options, followed by a set of test data, terminated by an empty line. */
763
764 int main(int argc, char **argv)
765 {
766 FILE *infile = stdin;
767 int options = 0;
768 int study_options = 0;
769 int op = 1;
770 int timeit = 0;
771 int timeitm = 0;
772 int showinfo = 0;
773 int showstore = 0;
774 int quiet = 0;
775 int size_offsets = 45;
776 int size_offsets_max;
777 int *offsets = NULL;
778 #if !defined NOPOSIX
779 int posix = 0;
780 #endif
781 int debug = 0;
782 int done = 0;
783 int all_use_dfa = 0;
784 int yield = 0;
785 int stack_size;
786
787 /* These vectors store, end-to-end, a list of captured substring names. Assume
788 that 1024 is plenty long enough for the few names we'll be testing. */
789
790 uschar copynames[1024];
791 uschar getnames[1024];
792
793 uschar *copynamesptr;
794 uschar *getnamesptr;
795
796 /* Get buffers from malloc() so that Electric Fence will check their misuse
797 when I am debugging. They grow automatically when very long lines are read. */
798
799 buffer = (unsigned char *)malloc(buffer_size);
800 dbuffer = (unsigned char *)malloc(buffer_size);
801 pbuffer = (unsigned char *)malloc(buffer_size);
802
803 /* The outfile variable is static so that new_malloc can use it. */
804
805 outfile = stdout;
806
807 /* The following _setmode() stuff is some Windows magic that tells its runtime
808 library to translate CRLF into a single LF character. At least, that's what
809 I've been told: never having used Windows I take this all on trust. Originally
810 it set 0x8000, but then I was advised that _O_BINARY was better. */
811
812 #if defined(_WIN32) || defined(WIN32)
813 _setmode( _fileno( stdout ), _O_BINARY );
814 #endif
815
816 /* Scan options */
817
818 while (argc > 1 && argv[op][0] == '-')
819 {
820 unsigned char *endptr;
821
822 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
823 showstore = 1;
824 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
825 else if (strcmp(argv[op], "-b") == 0) debug = 1;
826 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
827 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
828 #if !defined NODFA
829 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
830 #endif
831 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
832 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
833 *endptr == 0))
834 {
835 op++;
836 argc--;
837 }
838 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
839 {
840 int both = argv[op][2] == 0;
841 int temp;
842 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
843 *endptr == 0))
844 {
845 timeitm = temp;
846 op++;
847 argc--;
848 }
849 else timeitm = LOOPREPEAT;
850 if (both) timeit = timeitm;
851 }
852 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
853 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
854 *endptr == 0))
855 {
856 #if defined(_WIN32) || defined(WIN32)
857 printf("PCRE: -S not supported on this OS\n");
858 exit(1);
859 #else
860 int rc;
861 struct rlimit rlim;
862 getrlimit(RLIMIT_STACK, &rlim);
863 rlim.rlim_cur = stack_size * 1024 * 1024;
864 rc = setrlimit(RLIMIT_STACK, &rlim);
865 if (rc != 0)
866 {
867 printf("PCRE: setrlimit() failed with error %d\n", rc);
868 exit(1);
869 }
870 op++;
871 argc--;
872 #endif
873 }
874 #if !defined NOPOSIX
875 else if (strcmp(argv[op], "-p") == 0) posix = 1;
876 #endif
877 else if (strcmp(argv[op], "-C") == 0)
878 {
879 int rc;
880 printf("PCRE version %s\n", pcre_version());
881 printf("Compiled with\n");
882 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
883 printf(" %sUTF-8 support\n", rc? "" : "No ");
884 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
885 printf(" %sUnicode properties support\n", rc? "" : "No ");
886 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
887 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
888 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
889 (rc == -2)? "ANYCRLF" :
890 (rc == -1)? "ANY" : "???");
891 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
892 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
893 "all Unicode newlines");
894 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
895 printf(" Internal link size = %d\n", rc);
896 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
897 printf(" POSIX malloc threshold = %d\n", rc);
898 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
899 printf(" Default match limit = %d\n", rc);
900 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
901 printf(" Default recursion depth limit = %d\n", rc);
902 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
903 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
904 goto EXIT;
905 }
906 else if (strcmp(argv[op], "-help") == 0 ||
907 strcmp(argv[op], "--help") == 0)
908 {
909 usage();
910 goto EXIT;
911 }
912 else
913 {
914 printf("** Unknown or malformed option %s\n", argv[op]);
915 usage();
916 yield = 1;
917 goto EXIT;
918 }
919 op++;
920 argc--;
921 }
922
923 /* Get the store for the offsets vector, and remember what it was */
924
925 size_offsets_max = size_offsets;
926 offsets = (int *)malloc(size_offsets_max * sizeof(int));
927 if (offsets == NULL)
928 {
929 printf("** Failed to get %d bytes of memory for offsets vector\n",
930 (int)(size_offsets_max * sizeof(int)));
931 yield = 1;
932 goto EXIT;
933 }
934
935 /* Sort out the input and output files */
936
937 if (argc > 1)
938 {
939 infile = fopen(argv[op], INPUT_MODE);
940 if (infile == NULL)
941 {
942 printf("** Failed to open %s\n", argv[op]);
943 yield = 1;
944 goto EXIT;
945 }
946 }
947
948 if (argc > 2)
949 {
950 outfile = fopen(argv[op+1], OUTPUT_MODE);
951 if (outfile == NULL)
952 {
953 printf("** Failed to open %s\n", argv[op+1]);
954 yield = 1;
955 goto EXIT;
956 }
957 }
958
959 /* Set alternative malloc function */
960
961 pcre_malloc = new_malloc;
962 pcre_free = new_free;
963 pcre_stack_malloc = stack_malloc;
964 pcre_stack_free = stack_free;
965
966 /* Heading line unless quiet, then prompt for first regex if stdin */
967
968 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
969
970 /* Main loop */
971
972 while (!done)
973 {
974 pcre *re = NULL;
975 pcre_extra *extra = NULL;
976
977 #if !defined NOPOSIX /* There are still compilers that require no indent */
978 regex_t preg;
979 int do_posix = 0;
980 #endif
981
982 const char *error;
983 unsigned char *p, *pp, *ppp;
984 unsigned char *to_file = NULL;
985 const unsigned char *tables = NULL;
986 unsigned long int true_size, true_study_size = 0;
987 size_t size, regex_gotten_store;
988 int do_study = 0;
989 int do_debug = debug;
990 int do_G = 0;
991 int do_g = 0;
992 int do_showinfo = showinfo;
993 int do_showrest = 0;
994 int do_flip = 0;
995 int erroroffset, len, delimiter, poffset;
996
997 use_utf8 = 0;
998 debug_lengths = 1;
999
1000 if (infile == stdin) printf(" re> ");
1001 if (extend_inputline(infile, buffer) == NULL) break;
1002 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1003 fflush(outfile);
1004
1005 p = buffer;
1006 while (isspace(*p)) p++;
1007 if (*p == 0) continue;
1008
1009 /* See if the pattern is to be loaded pre-compiled from a file. */
1010
1011 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1012 {
1013 unsigned long int magic, get_options;
1014 uschar sbuf[8];
1015 FILE *f;
1016
1017 p++;
1018 pp = p + (int)strlen((char *)p);
1019 while (isspace(pp[-1])) pp--;
1020 *pp = 0;
1021
1022 f = fopen((char *)p, "rb");
1023 if (f == NULL)
1024 {
1025 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1026 continue;
1027 }
1028
1029 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1030
1031 true_size =
1032 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1033 true_study_size =
1034 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1035
1036 re = (real_pcre *)new_malloc(true_size);
1037 regex_gotten_store = gotten_store;
1038
1039 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1040
1041 magic = ((real_pcre *)re)->magic_number;
1042 if (magic != MAGIC_NUMBER)
1043 {
1044 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1045 {
1046 do_flip = 1;
1047 }
1048 else
1049 {
1050 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1051 fclose(f);
1052 continue;
1053 }
1054 }
1055
1056 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1057 do_flip? " (byte-inverted)" : "", p);
1058
1059 /* Need to know if UTF-8 for printing data strings */
1060
1061 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1062 use_utf8 = (get_options & PCRE_UTF8) != 0;
1063
1064 /* Now see if there is any following study data */
1065
1066 if (true_study_size != 0)
1067 {
1068 pcre_study_data *psd;
1069
1070 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1071 extra->flags = PCRE_EXTRA_STUDY_DATA;
1072
1073 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1074 extra->study_data = psd;
1075
1076 if (fread(psd, 1, true_study_size, f) != true_study_size)
1077 {
1078 FAIL_READ:
1079 fprintf(outfile, "Failed to read data from %s\n", p);
1080 if (extra != NULL) new_free(extra);
1081 if (re != NULL) new_free(re);
1082 fclose(f);
1083 continue;
1084 }
1085 fprintf(outfile, "Study data loaded from %s\n", p);
1086 do_study = 1; /* To get the data output if requested */
1087 }
1088 else fprintf(outfile, "No study data\n");
1089
1090 fclose(f);
1091 goto SHOW_INFO;
1092 }
1093
1094 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1095 the pattern; if is isn't complete, read more. */
1096
1097 delimiter = *p++;
1098
1099 if (isalnum(delimiter) || delimiter == '\\')
1100 {
1101 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1102 goto SKIP_DATA;
1103 }
1104
1105 pp = p;
1106 poffset = p - buffer;
1107
1108 for(;;)
1109 {
1110 while (*pp != 0)
1111 {
1112 if (*pp == '\\' && pp[1] != 0) pp++;
1113 else if (*pp == delimiter) break;
1114 pp++;
1115 }
1116 if (*pp != 0) break;
1117 if (infile == stdin) printf(" > ");
1118 if ((pp = extend_inputline(infile, pp)) == NULL)
1119 {
1120 fprintf(outfile, "** Unexpected EOF\n");
1121 done = 1;
1122 goto CONTINUE;
1123 }
1124 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1125 }
1126
1127 /* The buffer may have moved while being extended; reset the start of data
1128 pointer to the correct relative point in the buffer. */
1129
1130 p = buffer + poffset;
1131
1132 /* If the first character after the delimiter is backslash, make
1133 the pattern end with backslash. This is purely to provide a way
1134 of testing for the error message when a pattern ends with backslash. */
1135
1136 if (pp[1] == '\\') *pp++ = '\\';
1137
1138 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1139 for callouts. */
1140
1141 *pp++ = 0;
1142 strcpy((char *)pbuffer, (char *)p);
1143
1144 /* Look for options after final delimiter */
1145
1146 options = 0;
1147 study_options = 0;
1148 log_store = showstore; /* default from command line */
1149
1150 while (*pp != 0)
1151 {
1152 switch (*pp++)
1153 {
1154 case 'f': options |= PCRE_FIRSTLINE; break;
1155 case 'g': do_g = 1; break;
1156 case 'i': options |= PCRE_CASELESS; break;
1157 case 'm': options |= PCRE_MULTILINE; break;
1158 case 's': options |= PCRE_DOTALL; break;
1159 case 'x': options |= PCRE_EXTENDED; break;
1160
1161 case '+': do_showrest = 1; break;
1162 case 'A': options |= PCRE_ANCHORED; break;
1163 case 'B': do_debug = 1; break;
1164 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1165 case 'D': do_debug = do_showinfo = 1; break;
1166 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1167 case 'F': do_flip = 1; break;
1168 case 'G': do_G = 1; break;
1169 case 'I': do_showinfo = 1; break;
1170 case 'J': options |= PCRE_DUPNAMES; break;
1171 case 'M': log_store = 1; break;
1172 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1173
1174 #if !defined NOPOSIX
1175 case 'P': do_posix = 1; break;
1176 #endif
1177
1178 case 'S': do_study = 1; break;
1179 case 'U': options |= PCRE_UNGREEDY; break;
1180 case 'X': options |= PCRE_EXTRA; break;
1181 case 'Z': debug_lengths = 0; break;
1182 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1183 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1184
1185 case 'L':
1186 ppp = pp;
1187 /* The '\r' test here is so that it works on Windows. */
1188 /* The '0' test is just in case this is an unterminated line. */
1189 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1190 *ppp = 0;
1191 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1192 {
1193 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1194 goto SKIP_DATA;
1195 }
1196 locale_set = 1;
1197 tables = pcre_maketables();
1198 pp = ppp;
1199 break;
1200
1201 case '>':
1202 to_file = pp;
1203 while (*pp != 0) pp++;
1204 while (isspace(pp[-1])) pp--;
1205 *pp = 0;
1206 break;
1207
1208 case '<':
1209 {
1210 int x = check_newline(pp, outfile);
1211 if (x == 0) goto SKIP_DATA;
1212 options |= x;
1213 while (*pp++ != '>');
1214 }
1215 break;
1216
1217 case '\r': /* So that it works in Windows */
1218 case '\n':
1219 case ' ':
1220 break;
1221
1222 default:
1223 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1224 goto SKIP_DATA;
1225 }
1226 }
1227
1228 /* Handle compiling via the POSIX interface, which doesn't support the
1229 timing, showing, or debugging options, nor the ability to pass over
1230 local character tables. */
1231
1232 #if !defined NOPOSIX
1233 if (posix || do_posix)
1234 {
1235 int rc;
1236 int cflags = 0;
1237
1238 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1239 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1240 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1241 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1242 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1243
1244 rc = regcomp(&preg, (char *)p, cflags);
1245
1246 /* Compilation failed; go back for another re, skipping to blank line
1247 if non-interactive. */
1248
1249 if (rc != 0)
1250 {
1251 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1252 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1253 goto SKIP_DATA;
1254 }
1255 }
1256
1257 /* Handle compiling via the native interface */
1258
1259 else
1260 #endif /* !defined NOPOSIX */
1261
1262 {
1263 if (timeit > 0)
1264 {
1265 register int i;
1266 clock_t time_taken;
1267 clock_t start_time = clock();
1268 for (i = 0; i < timeit; i++)
1269 {
1270 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1271 if (re != NULL) free(re);
1272 }
1273 time_taken = clock() - start_time;
1274 fprintf(outfile, "Compile time %.4f milliseconds\n",
1275 (((double)time_taken * 1000.0) / (double)timeit) /
1276 (double)CLOCKS_PER_SEC);
1277 }
1278
1279 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1280
1281 /* Compilation failed; go back for another re, skipping to blank line
1282 if non-interactive. */
1283
1284 if (re == NULL)
1285 {
1286 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1287 SKIP_DATA:
1288 if (infile != stdin)
1289 {
1290 for (;;)
1291 {
1292 if (extend_inputline(infile, buffer) == NULL)
1293 {
1294 done = 1;
1295 goto CONTINUE;
1296 }
1297 len = (int)strlen((char *)buffer);
1298 while (len > 0 && isspace(buffer[len-1])) len--;
1299 if (len == 0) break;
1300 }
1301 fprintf(outfile, "\n");
1302 }
1303 goto CONTINUE;
1304 }
1305
1306 /* Compilation succeeded; print data if required. There are now two
1307 info-returning functions. The old one has a limited interface and
1308 returns only limited data. Check that it agrees with the newer one. */
1309
1310 if (log_store)
1311 fprintf(outfile, "Memory allocation (code space): %d\n",
1312 (int)(gotten_store -
1313 sizeof(real_pcre) -
1314 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1315
1316 /* Extract the size for possible writing before possibly flipping it,
1317 and remember the store that was got. */
1318
1319 true_size = ((real_pcre *)re)->size;
1320 regex_gotten_store = gotten_store;
1321
1322 /* If /S was present, study the regexp to generate additional info to
1323 help with the matching. */
1324
1325 if (do_study)
1326 {
1327 if (timeit > 0)
1328 {
1329 register int i;
1330 clock_t time_taken;
1331 clock_t start_time = clock();
1332 for (i = 0; i < timeit; i++)
1333 extra = pcre_study(re, study_options, &error);
1334 time_taken = clock() - start_time;
1335 if (extra != NULL) free(extra);
1336 fprintf(outfile, " Study time %.4f milliseconds\n",
1337 (((double)time_taken * 1000.0) / (double)timeit) /
1338 (double)CLOCKS_PER_SEC);
1339 }
1340 extra = pcre_study(re, study_options, &error);
1341 if (error != NULL)
1342 fprintf(outfile, "Failed to study: %s\n", error);
1343 else if (extra != NULL)
1344 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1345 }
1346
1347 /* If the 'F' option was present, we flip the bytes of all the integer
1348 fields in the regex data block and the study block. This is to make it
1349 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1350 compiled on a different architecture. */
1351
1352 if (do_flip)
1353 {
1354 real_pcre *rre = (real_pcre *)re;
1355 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1356 rre->size = byteflip(rre->size, sizeof(rre->size));
1357 rre->options = byteflip(rre->options, sizeof(rre->options));
1358 rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1359 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1360 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1361 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1362 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1363 rre->name_table_offset = byteflip(rre->name_table_offset,
1364 sizeof(rre->name_table_offset));
1365 rre->name_entry_size = byteflip(rre->name_entry_size,
1366 sizeof(rre->name_entry_size));
1367 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1368
1369 if (extra != NULL)
1370 {
1371 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1372 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1373 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1374 }
1375 }
1376
1377 /* Extract information from the compiled data if required */
1378
1379 SHOW_INFO:
1380
1381 if (do_debug)
1382 {
1383 fprintf(outfile, "------------------------------------------------------------------\n");
1384 pcre_printint(re, outfile, debug_lengths);
1385 }
1386
1387 if (do_showinfo)
1388 {
1389 unsigned long int get_options, all_options;
1390 #if !defined NOINFOCHECK
1391 int old_first_char, old_options, old_count;
1392 #endif
1393 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1394 hascrorlf;
1395 int nameentrysize, namecount;
1396 const uschar *nametable;
1397
1398 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1399 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1400 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1401 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1402 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1403 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1404 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1405 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1406 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1407 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1408 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1409 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1410
1411 #if !defined NOINFOCHECK
1412 old_count = pcre_info(re, &old_options, &old_first_char);
1413 if (count < 0) fprintf(outfile,
1414 "Error %d from pcre_info()\n", count);
1415 else
1416 {
1417 if (old_count != count) fprintf(outfile,
1418 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1419 old_count);
1420
1421 if (old_first_char != first_char) fprintf(outfile,
1422 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1423 first_char, old_first_char);
1424
1425 if (old_options != (int)get_options) fprintf(outfile,
1426 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1427 get_options, old_options);
1428 }
1429 #endif
1430
1431 if (size != regex_gotten_store) fprintf(outfile,
1432 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1433 (int)size, (int)regex_gotten_store);
1434
1435 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1436 if (backrefmax > 0)
1437 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1438
1439 if (namecount > 0)
1440 {
1441 fprintf(outfile, "Named capturing subpatterns:\n");
1442 while (namecount-- > 0)
1443 {
1444 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1445 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1446 GET2(nametable, 0));
1447 nametable += nameentrysize;
1448 }
1449 }
1450
1451 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1452 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1453
1454 all_options = ((real_pcre *)re)->options;
1455 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1456
1457 if (get_options == 0) fprintf(outfile, "No options\n");
1458 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1459 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1460 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1461 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1462 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1463 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1464 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1465 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1466 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1467 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1468 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1469 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1470 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1471 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1472 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1473 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1474
1475 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1476
1477 switch (get_options & PCRE_NEWLINE_BITS)
1478 {
1479 case PCRE_NEWLINE_CR:
1480 fprintf(outfile, "Forced newline sequence: CR\n");
1481 break;
1482
1483 case PCRE_NEWLINE_LF:
1484 fprintf(outfile, "Forced newline sequence: LF\n");
1485 break;
1486
1487 case PCRE_NEWLINE_CRLF:
1488 fprintf(outfile, "Forced newline sequence: CRLF\n");
1489 break;
1490
1491 case PCRE_NEWLINE_ANYCRLF:
1492 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1493 break;
1494
1495 case PCRE_NEWLINE_ANY:
1496 fprintf(outfile, "Forced newline sequence: ANY\n");
1497 break;
1498
1499 default:
1500 break;
1501 }
1502
1503 if (first_char == -1)
1504 {
1505 fprintf(outfile, "First char at start or follows newline\n");
1506 }
1507 else if (first_char < 0)
1508 {
1509 fprintf(outfile, "No first char\n");
1510 }
1511 else
1512 {
1513 int ch = first_char & 255;
1514 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1515 "" : " (caseless)";
1516 if (PRINTHEX(ch))
1517 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1518 else
1519 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1520 }
1521
1522 if (need_char < 0)
1523 {
1524 fprintf(outfile, "No need char\n");
1525 }
1526 else
1527 {
1528 int ch = need_char & 255;
1529 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1530 "" : " (caseless)";
1531 if (PRINTHEX(ch))
1532 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1533 else
1534 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1535 }
1536
1537 /* Don't output study size; at present it is in any case a fixed
1538 value, but it varies, depending on the computer architecture, and
1539 so messes up the test suite. (And with the /F option, it might be
1540 flipped.) */
1541
1542 if (do_study)
1543 {
1544 if (extra == NULL)
1545 fprintf(outfile, "Study returned NULL\n");
1546 else
1547 {
1548 uschar *start_bits = NULL;
1549 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1550
1551 if (start_bits == NULL)
1552 fprintf(outfile, "No starting byte set\n");
1553 else
1554 {
1555 int i;
1556 int c = 24;
1557 fprintf(outfile, "Starting byte set: ");
1558 for (i = 0; i < 256; i++)
1559 {
1560 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1561 {
1562 if (c > 75)
1563 {
1564 fprintf(outfile, "\n ");
1565 c = 2;
1566 }
1567 if (PRINTHEX(i) && i != ' ')
1568 {
1569 fprintf(outfile, "%c ", i);
1570 c += 2;
1571 }
1572 else
1573 {
1574 fprintf(outfile, "\\x%02x ", i);
1575 c += 5;
1576 }
1577 }
1578 }
1579 fprintf(outfile, "\n");
1580 }
1581 }
1582 }
1583 }
1584
1585 /* If the '>' option was present, we write out the regex to a file, and
1586 that is all. The first 8 bytes of the file are the regex length and then
1587 the study length, in big-endian order. */
1588
1589 if (to_file != NULL)
1590 {
1591 FILE *f = fopen((char *)to_file, "wb");
1592 if (f == NULL)
1593 {
1594 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1595 }
1596 else
1597 {
1598 uschar sbuf[8];
1599 sbuf[0] = (true_size >> 24) & 255;
1600 sbuf[1] = (true_size >> 16) & 255;
1601 sbuf[2] = (true_size >> 8) & 255;
1602 sbuf[3] = (true_size) & 255;
1603
1604 sbuf[4] = (true_study_size >> 24) & 255;
1605 sbuf[5] = (true_study_size >> 16) & 255;
1606 sbuf[6] = (true_study_size >> 8) & 255;
1607 sbuf[7] = (true_study_size) & 255;
1608
1609 if (fwrite(sbuf, 1, 8, f) < 8 ||
1610 fwrite(re, 1, true_size, f) < true_size)
1611 {
1612 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1613 }
1614 else
1615 {
1616 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1617 if (extra != NULL)
1618 {
1619 if (fwrite(extra->study_data, 1, true_study_size, f) <
1620 true_study_size)
1621 {
1622 fprintf(outfile, "Write error on %s: %s\n", to_file,
1623 strerror(errno));
1624 }
1625 else fprintf(outfile, "Study data written to %s\n", to_file);
1626
1627 }
1628 }
1629 fclose(f);
1630 }
1631
1632 new_free(re);
1633 if (extra != NULL) new_free(extra);
1634 if (tables != NULL) new_free((void *)tables);
1635 continue; /* With next regex */
1636 }
1637 } /* End of non-POSIX compile */
1638
1639 /* Read data lines and test them */
1640
1641 for (;;)
1642 {
1643 uschar *q;
1644 uschar *bptr;
1645 int *use_offsets = offsets;
1646 int use_size_offsets = size_offsets;
1647 int callout_data = 0;
1648 int callout_data_set = 0;
1649 int count, c;
1650 int copystrings = 0;
1651 int find_match_limit = 0;
1652 int getstrings = 0;
1653 int getlist = 0;
1654 int gmatched = 0;
1655 int start_offset = 0;
1656 int g_notempty = 0;
1657 int use_dfa = 0;
1658
1659 options = 0;
1660
1661 *copynames = 0;
1662 *getnames = 0;
1663
1664 copynamesptr = copynames;
1665 getnamesptr = getnames;
1666
1667 pcre_callout = callout;
1668 first_callout = 1;
1669 callout_extra = 0;
1670 callout_count = 0;
1671 callout_fail_count = 999999;
1672 callout_fail_id = -1;
1673 show_malloc = 0;
1674
1675 if (extra != NULL) extra->flags &=
1676 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1677
1678 len = 0;
1679 for (;;)
1680 {
1681 if (infile == stdin) printf("data> ");
1682 if (extend_inputline(infile, buffer + len) == NULL)
1683 {
1684 if (len > 0) break;
1685 done = 1;
1686 goto CONTINUE;
1687 }
1688 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1689 len = (int)strlen((char *)buffer);
1690 if (buffer[len-1] == '\n') break;
1691 }
1692
1693 while (len > 0 && isspace(buffer[len-1])) len--;
1694 buffer[len] = 0;
1695 if (len == 0) break;
1696
1697 p = buffer;
1698 while (isspace(*p)) p++;
1699
1700 bptr = q = dbuffer;
1701 while ((c = *p++) != 0)
1702 {
1703 int i = 0;
1704 int n = 0;
1705
1706 if (c == '\\') switch ((c = *p++))
1707 {
1708 case 'a': c = 7; break;
1709 case 'b': c = '\b'; break;
1710 case 'e': c = 27; break;
1711 case 'f': c = '\f'; break;
1712 case 'n': c = '\n'; break;
1713 case 'r': c = '\r'; break;
1714 case 't': c = '\t'; break;
1715 case 'v': c = '\v'; break;
1716
1717 case '0': case '1': case '2': case '3':
1718 case '4': case '5': case '6': case '7':
1719 c -= '0';
1720 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1721 c = c * 8 + *p++ - '0';
1722
1723 #if !defined NOUTF8
1724 if (use_utf8 && c > 255)
1725 {
1726 unsigned char buff8[8];
1727 int ii, utn;
1728 utn = ord2utf8(c, buff8);
1729 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1730 c = buff8[ii]; /* Last byte */
1731 }
1732 #endif
1733 break;
1734
1735 case 'x':
1736
1737 /* Handle \x{..} specially - new Perl thing for utf8 */
1738
1739 #if !defined NOUTF8
1740 if (*p == '{')
1741 {
1742 unsigned char *pt = p;
1743 c = 0;
1744 while (isxdigit(*(++pt)))
1745 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1746 if (*pt == '}')
1747 {
1748 unsigned char buff8[8];
1749 int ii, utn;
1750 utn = ord2utf8(c, buff8);
1751 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1752 c = buff8[ii]; /* Last byte */
1753 p = pt + 1;
1754 break;
1755 }
1756 /* Not correct form; fall through */
1757 }
1758 #endif
1759
1760 /* Ordinary \x */
1761
1762 c = 0;
1763 while (i++ < 2 && isxdigit(*p))
1764 {
1765 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1766 p++;
1767 }
1768 break;
1769
1770 case 0: /* \ followed by EOF allows for an empty line */
1771 p--;
1772 continue;
1773
1774 case '>':
1775 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1776 continue;
1777
1778 case 'A': /* Option setting */
1779 options |= PCRE_ANCHORED;
1780 continue;
1781
1782 case 'B':
1783 options |= PCRE_NOTBOL;
1784 continue;
1785
1786 case 'C':
1787 if (isdigit(*p)) /* Set copy string */
1788 {
1789 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1790 copystrings |= 1 << n;
1791 }
1792 else if (isalnum(*p))
1793 {
1794 uschar *npp = copynamesptr;
1795 while (isalnum(*p)) *npp++ = *p++;
1796 *npp++ = 0;
1797 *npp = 0;
1798 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1799 if (n < 0)
1800 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1801 copynamesptr = npp;
1802 }
1803 else if (*p == '+')
1804 {
1805 callout_extra = 1;
1806 p++;
1807 }
1808 else if (*p == '-')
1809 {
1810 pcre_callout = NULL;
1811 p++;
1812 }
1813 else if (*p == '!')
1814 {
1815 callout_fail_id = 0;
1816 p++;
1817 while(isdigit(*p))
1818 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1819 callout_fail_count = 0;
1820 if (*p == '!')
1821 {
1822 p++;
1823 while(isdigit(*p))
1824 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1825 }
1826 }
1827 else if (*p == '*')
1828 {
1829 int sign = 1;
1830 callout_data = 0;
1831 if (*(++p) == '-') { sign = -1; p++; }
1832 while(isdigit(*p))
1833 callout_data = callout_data * 10 + *p++ - '0';
1834 callout_data *= sign;
1835 callout_data_set = 1;
1836 }
1837 continue;
1838
1839 #if !defined NODFA
1840 case 'D':
1841 #if !defined NOPOSIX
1842 if (posix || do_posix)
1843 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1844 else
1845 #endif
1846 use_dfa = 1;
1847 continue;
1848
1849 case 'F':
1850 options |= PCRE_DFA_SHORTEST;
1851 continue;
1852 #endif
1853
1854 case 'G':
1855 if (isdigit(*p))
1856 {
1857 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1858 getstrings |= 1 << n;
1859 }
1860 else if (isalnum(*p))
1861 {
1862 uschar *npp = getnamesptr;
1863 while (isalnum(*p)) *npp++ = *p++;
1864 *npp++ = 0;
1865 *npp = 0;
1866 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1867 if (n < 0)
1868 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1869 getnamesptr = npp;
1870 }
1871 continue;
1872
1873 case 'L':
1874 getlist = 1;
1875 continue;
1876
1877 case 'M':
1878 find_match_limit = 1;
1879 continue;
1880
1881 case 'N':
1882 options |= PCRE_NOTEMPTY;
1883 continue;
1884
1885 case 'O':
1886 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1887 if (n > size_offsets_max)
1888 {
1889 size_offsets_max = n;
1890 free(offsets);
1891 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1892 if (offsets == NULL)
1893 {
1894 printf("** Failed to get %d bytes of memory for offsets vector\n",
1895 (int)(size_offsets_max * sizeof(int)));
1896 yield = 1;
1897 goto EXIT;
1898 }
1899 }
1900 use_size_offsets = n;
1901 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1902 continue;
1903
1904 case 'P':
1905 options |= PCRE_PARTIAL;
1906 continue;
1907
1908 case 'Q':
1909 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1910 if (extra == NULL)
1911 {
1912 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1913 extra->flags = 0;
1914 }
1915 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1916 extra->match_limit_recursion = n;
1917 continue;
1918
1919 case 'q':
1920 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1921 if (extra == NULL)
1922 {
1923 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1924 extra->flags = 0;
1925 }
1926 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1927 extra->match_limit = n;
1928 continue;
1929
1930 #if !defined NODFA
1931 case 'R':
1932 options |= PCRE_DFA_RESTART;
1933 continue;
1934 #endif
1935
1936 case 'S':
1937 show_malloc = 1;
1938 continue;
1939
1940 case 'Z':
1941 options |= PCRE_NOTEOL;
1942 continue;
1943
1944 case '?':
1945 options |= PCRE_NO_UTF8_CHECK;
1946 continue;
1947
1948 case '<':
1949 {
1950 int x = check_newline(p, outfile);
1951 if (x == 0) goto NEXT_DATA;
1952 options |= x;
1953 while (*p++ != '>');
1954 }
1955 continue;
1956 }
1957 *q++ = c;
1958 }
1959 *q = 0;
1960 len = q - dbuffer;
1961
1962 if ((all_use_dfa || use_dfa) && find_match_limit)
1963 {
1964 printf("**Match limit not relevant for DFA matching: ignored\n");
1965 find_match_limit = 0;
1966 }
1967
1968 /* Handle matching via the POSIX interface, which does not
1969 support timing or playing with the match limit or callout data. */
1970
1971 #if !defined NOPOSIX
1972 if (posix || do_posix)
1973 {
1974 int rc;
1975 int eflags = 0;
1976 regmatch_t *pmatch = NULL;
1977 if (use_size_offsets > 0)
1978 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1979 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1980 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1981
1982 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1983
1984 if (rc != 0)
1985 {
1986 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1987 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1988 }
1989 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1990 != 0)
1991 {
1992 fprintf(outfile, "Matched with REG_NOSUB\n");
1993 }
1994 else
1995 {
1996 size_t i;
1997 for (i = 0; i < (size_t)use_size_offsets; i++)
1998 {
1999 if (pmatch[i].rm_so >= 0)
2000 {
2001 fprintf(outfile, "%2d: ", (int)i);
2002 (void)pchars(dbuffer + pmatch[i].rm_so,
2003 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2004 fprintf(outfile, "\n");
2005 if (i == 0 && do_showrest)
2006 {
2007 fprintf(outfile, " 0+ ");
2008 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2009 outfile);
2010 fprintf(outfile, "\n");
2011 }
2012 }
2013 }
2014 }
2015 free(pmatch);
2016 }
2017
2018 /* Handle matching via the native interface - repeats for /g and /G */
2019
2020 else
2021 #endif /* !defined NOPOSIX */
2022
2023 for (;; gmatched++) /* Loop for /g or /G */
2024 {
2025 if (timeitm > 0)
2026 {
2027 register int i;
2028 clock_t time_taken;
2029 clock_t start_time = clock();
2030
2031 #if !defined NODFA
2032 if (all_use_dfa || use_dfa)
2033 {
2034 int workspace[1000];
2035 for (i = 0; i < timeitm; i++)
2036 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2037 options | g_notempty, use_offsets, use_size_offsets, workspace,
2038 sizeof(workspace)/sizeof(int));
2039 }
2040 else
2041 #endif
2042
2043 for (i = 0; i < timeitm; i++)
2044 count = pcre_exec(re, extra, (char *)bptr, len,
2045 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2046
2047 time_taken = clock() - start_time;
2048 fprintf(outfile, "Execute time %.4f milliseconds\n",
2049 (((double)time_taken * 1000.0) / (double)timeitm) /
2050 (double)CLOCKS_PER_SEC);
2051 }
2052
2053 /* If find_match_limit is set, we want to do repeated matches with
2054 varying limits in order to find the minimum value for the match limit and
2055 for the recursion limit. */
2056
2057 if (find_match_limit)
2058 {
2059 if (extra == NULL)
2060 {
2061 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2062 extra->flags = 0;
2063 }
2064
2065 (void)check_match_limit(re, extra, bptr, len, start_offset,
2066 options|g_notempty, use_offsets, use_size_offsets,
2067 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2068 PCRE_ERROR_MATCHLIMIT, "match()");
2069
2070 count = check_match_limit(re, extra, bptr, len, start_offset,
2071 options|g_notempty, use_offsets, use_size_offsets,
2072 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2073 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2074 }
2075
2076 /* If callout_data is set, use the interface with additional data */
2077
2078 else if (callout_data_set)
2079 {
2080 if (extra == NULL)
2081 {
2082 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2083 extra->flags = 0;
2084 }
2085 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2086 extra->callout_data = &callout_data;
2087 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2088 options | g_notempty, use_offsets, use_size_offsets);
2089 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2090 }
2091
2092 /* The normal case is just to do the match once, with the default
2093 value of match_limit. */
2094
2095 #if !defined NODFA
2096 else if (all_use_dfa || use_dfa)
2097 {
2098 int workspace[1000];
2099 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2100 options | g_notempty, use_offsets, use_size_offsets, workspace,
2101 sizeof(workspace)/sizeof(int));
2102 if (count == 0)
2103 {
2104 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2105 count = use_size_offsets/2;
2106 }
2107 }
2108 #endif
2109
2110 else
2111 {
2112 count = pcre_exec(re, extra, (char *)bptr, len,
2113 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2114 if (count == 0)
2115 {
2116 fprintf(outfile, "Matched, but too many substrings\n");
2117 count = use_size_offsets/3;
2118 }
2119 }
2120
2121 /* Matched */
2122
2123 if (count >= 0)
2124 {
2125 int i, maxcount;
2126
2127 #if !defined NODFA
2128 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2129 #endif
2130 maxcount = use_size_offsets/3;
2131
2132 /* This is a check against a lunatic return value. */
2133
2134 if (count > maxcount)
2135 {
2136 fprintf(outfile,
2137 "** PCRE error: returned count %d is too big for offset size %d\n",
2138 count, use_size_offsets);
2139 count = use_size_offsets/3;
2140 if (do_g || do_G)
2141 {
2142 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2143 do_g = do_G = FALSE; /* Break g/G loop */
2144 }
2145 }
2146
2147 for (i = 0; i < count * 2; i += 2)
2148 {
2149 if (use_offsets[i] < 0)
2150 fprintf(outfile, "%2d: <unset>\n", i/2);
2151 else
2152 {
2153 fprintf(outfile, "%2d: ", i/2);
2154 (void)pchars(bptr + use_offsets[i],
2155 use_offsets[i+1] - use_offsets[i], outfile);
2156 fprintf(outfile, "\n");
2157 if (i == 0)
2158 {
2159 if (do_showrest)
2160 {
2161 fprintf(outfile, " 0+ ");
2162 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2163 outfile);
2164 fprintf(outfile, "\n");
2165 }
2166 }
2167 }
2168 }
2169
2170 for (i = 0; i < 32; i++)
2171 {
2172 if ((copystrings & (1 << i)) != 0)
2173 {
2174 char copybuffer[256];
2175 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2176 i, copybuffer, sizeof(copybuffer));
2177 if (rc < 0)
2178 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2179 else
2180 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2181 }
2182 }
2183
2184 for (copynamesptr = copynames;
2185 *copynamesptr != 0;
2186 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2187 {
2188 char copybuffer[256];
2189 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2190 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2191 if (rc < 0)
2192 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2193 else
2194 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2195 }
2196
2197 for (i = 0; i < 32; i++)
2198 {
2199 if ((getstrings & (1 << i)) != 0)
2200 {
2201 const char *substring;
2202 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2203 i, &substring);
2204 if (rc < 0)
2205 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2206 else
2207 {
2208 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2209 pcre_free_substring(substring);
2210 }
2211 }
2212 }
2213
2214 for (getnamesptr = getnames;
2215 *getnamesptr != 0;
2216 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2217 {
2218 const char *substring;
2219 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2220 count, (char *)getnamesptr, &substring);
2221 if (rc < 0)
2222 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2223 else
2224 {
2225 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2226 pcre_free_substring(substring);
2227 }
2228 }
2229
2230 if (getlist)
2231 {
2232 const char **stringlist;
2233 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2234 &stringlist);
2235 if (rc < 0)
2236 fprintf(outfile, "get substring list failed %d\n", rc);
2237 else
2238 {
2239 for (i = 0; i < count; i++)
2240 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2241 if (stringlist[i] != NULL)
2242 fprintf(outfile, "string list not terminated by NULL\n");
2243 /* free((void *)stringlist); */
2244 pcre_free_substring_list(stringlist);
2245 }
2246 }
2247 }
2248
2249 /* There was a partial match */
2250
2251 else if (count == PCRE_ERROR_PARTIAL)
2252 {
2253 fprintf(outfile, "Partial match");
2254 #if !defined NODFA
2255 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2256 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2257 bptr + use_offsets[0]);
2258 #endif
2259 fprintf(outfile, "\n");
2260 break; /* Out of the /g loop */
2261 }
2262
2263 /* Failed to match. If this is a /g or /G loop and we previously set
2264 g_notempty after a null match, this is not necessarily the end. We want
2265 to advance the start offset, and continue. We won't be at the end of the
2266 string - that was checked before setting g_notempty.
2267
2268 Complication arises in the case when the newline option is "any" or
2269 "anycrlf". If the previous match was at the end of a line terminated by
2270 CRLF, an advance of one character just passes the \r, whereas we should
2271 prefer the longer newline sequence, as does the code in pcre_exec().
2272 Fudge the offset value to achieve this.
2273
2274 Otherwise, in the case of UTF-8 matching, the advance must be one
2275 character, not one byte. */
2276
2277 else
2278 {
2279 if (g_notempty != 0)
2280 {
2281 int onechar = 1;
2282 unsigned int obits = ((real_pcre *)re)->options;
2283 use_offsets[0] = start_offset;
2284 if ((obits & PCRE_NEWLINE_BITS) == 0)
2285 {
2286 int d;
2287 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2288 obits = (d == '\r')? PCRE_NEWLINE_CR :
2289 (d == '\n')? PCRE_NEWLINE_LF :
2290 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2291 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2292 (d == -1)? PCRE_NEWLINE_ANY : 0;
2293 }
2294 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2295 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2296 &&
2297 start_offset < len - 1 &&
2298 bptr[start_offset] == '\r' &&
2299 bptr[start_offset+1] == '\n')
2300 onechar++;
2301 else if (use_utf8)
2302 {
2303 while (start_offset + onechar < len)
2304 {
2305 int tb = bptr[start_offset+onechar];
2306 if (tb <= 127) break;
2307 tb &= 0xc0;
2308 if (tb != 0 && tb != 0xc0) onechar++;
2309 }
2310 }
2311 use_offsets[1] = start_offset + onechar;
2312 }
2313 else
2314 {
2315 if (count == PCRE_ERROR_NOMATCH)
2316 {
2317 if (gmatched == 0) fprintf(outfile, "No match\n");
2318 }
2319 else fprintf(outfile, "Error %d\n", count);
2320 break; /* Out of the /g loop */
2321 }
2322 }
2323
2324 /* If not /g or /G we are done */
2325
2326 if (!do_g && !do_G) break;
2327
2328 /* If we have matched an empty string, first check to see if we are at
2329 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2330 what Perl's /g options does. This turns out to be rather cunning. First
2331 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2332 same point. If this fails (picked up above) we advance to the next
2333 character. */
2334
2335 g_notempty = 0;
2336
2337 if (use_offsets[0] == use_offsets[1])
2338 {
2339 if (use_offsets[0] == len) break;
2340 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2341 }
2342
2343 /* For /g, update the start offset, leaving the rest alone */
2344
2345 if (do_g) start_offset = use_offsets[1];
2346
2347 /* For /G, update the pointer and length */
2348
2349 else
2350 {
2351 bptr += use_offsets[1];
2352 len -= use_offsets[1];
2353 }
2354 } /* End of loop for /g and /G */
2355
2356 NEXT_DATA: continue;
2357 } /* End of loop for data lines */
2358
2359 CONTINUE:
2360
2361 #if !defined NOPOSIX
2362 if (posix || do_posix) regfree(&preg);
2363 #endif
2364
2365 if (re != NULL) new_free(re);
2366 if (extra != NULL) new_free(extra);
2367 if (tables != NULL)
2368 {
2369 new_free((void *)tables);
2370 setlocale(LC_CTYPE, "C");
2371 locale_set = 0;
2372 }
2373 }
2374
2375 if (infile == stdin) fprintf(outfile, "\n");
2376
2377 EXIT:
2378
2379 if (infile != NULL && infile != stdin) fclose(infile);
2380 if (outfile != NULL && outfile != stdout) fclose(outfile);
2381
2382 free(buffer);
2383 free(dbuffer);
2384 free(pbuffer);
2385 free(offsets);
2386
2387 return yield;
2388 }
2389
2390 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12