/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 255 - (show annotations) (download)
Wed Sep 19 08:50:04 2007 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 70172 byte(s)
Add casts to pcretest.c to avoid compiler warnings.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_utt_names utt_names
98 #define _pcre_OP_lengths OP_lengths
99
100 #include "pcre_tables.c"
101
102 /* We also need the pcre_printint() function for printing out compiled
103 patterns. This function is in a separate file so that it can be included in
104 pcre_compile.c when that module is compiled with debugging enabled.
105
106 The definition of the macro PRINTABLE, which determines whether to print an
107 output character as-is or as a hex value when showing compiled patterns, is
108 contained in this file. We uses it here also, in cases when the locale has not
109 been explicitly changed, so as to get consistent output from systems that
110 differ in their output from isprint() even in the "C" locale. */
111
112 #include "pcre_printint.src"
113
114 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
115
116
117 /* It is possible to compile this test program without including support for
118 testing the POSIX interface, though this is not available via the standard
119 Makefile. */
120
121 #if !defined NOPOSIX
122 #include "pcreposix.h"
123 #endif
124
125 /* It is also possible, for the benefit of the version currently imported into
126 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
127 interface to the DFA matcher (NODFA), and without the doublecheck of the old
128 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
129 UTF8 support if PCRE is built without it. */
130
131 #ifndef SUPPORT_UTF8
132 #ifndef NOUTF8
133 #define NOUTF8
134 #endif
135 #endif
136
137
138 /* Other parameters */
139
140 #ifndef CLOCKS_PER_SEC
141 #ifdef CLK_TCK
142 #define CLOCKS_PER_SEC CLK_TCK
143 #else
144 #define CLOCKS_PER_SEC 100
145 #endif
146 #endif
147
148 /* This is the default loop count for timing. */
149
150 #define LOOPREPEAT 500000
151
152 /* Static variables */
153
154 static FILE *outfile;
155 static int log_store = 0;
156 static int callout_count;
157 static int callout_extra;
158 static int callout_fail_count;
159 static int callout_fail_id;
160 static int debug_lengths;
161 static int first_callout;
162 static int locale_set = 0;
163 static int show_malloc;
164 static int use_utf8;
165 static size_t gotten_store;
166
167 /* The buffers grow automatically if very long input lines are encountered. */
168
169 static int buffer_size = 50000;
170 static uschar *buffer = NULL;
171 static uschar *dbuffer = NULL;
172 static uschar *pbuffer = NULL;
173
174
175
176 /*************************************************
177 * Read or extend an input line *
178 *************************************************/
179
180 /* Input lines are read into buffer, but both patterns and data lines can be
181 continued over multiple input lines. In addition, if the buffer fills up, we
182 want to automatically expand it so as to be able to handle extremely large
183 lines that are needed for certain stress tests. When the input buffer is
184 expanded, the other two buffers must also be expanded likewise, and the
185 contents of pbuffer, which are a copy of the input for callouts, must be
186 preserved (for when expansion happens for a data line). This is not the most
187 optimal way of handling this, but hey, this is just a test program!
188
189 Arguments:
190 f the file to read
191 start where in buffer to start (this *must* be within buffer)
192
193 Returns: pointer to the start of new data
194 could be a copy of start, or could be moved
195 NULL if no data read and EOF reached
196 */
197
198 static uschar *
199 extend_inputline(FILE *f, uschar *start)
200 {
201 uschar *here = start;
202
203 for (;;)
204 {
205 int rlen = buffer_size - (here - buffer);
206
207 if (rlen > 1000)
208 {
209 int dlen;
210 if (fgets((char *)here, rlen, f) == NULL)
211 return (here == start)? NULL : start;
212 dlen = (int)strlen((char *)here);
213 if (dlen > 0 && here[dlen - 1] == '\n') return start;
214 here += dlen;
215 }
216
217 else
218 {
219 int new_buffer_size = 2*buffer_size;
220 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
221 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
222 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
223
224 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
225 {
226 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
227 exit(1);
228 }
229
230 memcpy(new_buffer, buffer, buffer_size);
231 memcpy(new_pbuffer, pbuffer, buffer_size);
232
233 buffer_size = new_buffer_size;
234
235 start = new_buffer + (start - buffer);
236 here = new_buffer + (here - buffer);
237
238 free(buffer);
239 free(dbuffer);
240 free(pbuffer);
241
242 buffer = new_buffer;
243 dbuffer = new_dbuffer;
244 pbuffer = new_pbuffer;
245 }
246 }
247
248 return NULL; /* Control never gets here */
249 }
250
251
252
253
254
255
256
257 /*************************************************
258 * Read number from string *
259 *************************************************/
260
261 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
262 around with conditional compilation, just do the job by hand. It is only used
263 for unpicking arguments, so just keep it simple.
264
265 Arguments:
266 str string to be converted
267 endptr where to put the end pointer
268
269 Returns: the unsigned long
270 */
271
272 static int
273 get_value(unsigned char *str, unsigned char **endptr)
274 {
275 int result = 0;
276 while(*str != 0 && isspace(*str)) str++;
277 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
278 *endptr = str;
279 return(result);
280 }
281
282
283
284
285 /*************************************************
286 * Convert UTF-8 string to value *
287 *************************************************/
288
289 /* This function takes one or more bytes that represents a UTF-8 character,
290 and returns the value of the character.
291
292 Argument:
293 utf8bytes a pointer to the byte vector
294 vptr a pointer to an int to receive the value
295
296 Returns: > 0 => the number of bytes consumed
297 -6 to 0 => malformed UTF-8 character at offset = (-return)
298 */
299
300 #if !defined NOUTF8
301
302 static int
303 utf82ord(unsigned char *utf8bytes, int *vptr)
304 {
305 int c = *utf8bytes++;
306 int d = c;
307 int i, j, s;
308
309 for (i = -1; i < 6; i++) /* i is number of additional bytes */
310 {
311 if ((d & 0x80) == 0) break;
312 d <<= 1;
313 }
314
315 if (i == -1) { *vptr = c; return 1; } /* ascii character */
316 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
317
318 /* i now has a value in the range 1-5 */
319
320 s = 6*i;
321 d = (c & utf8_table3[i]) << s;
322
323 for (j = 0; j < i; j++)
324 {
325 c = *utf8bytes++;
326 if ((c & 0xc0) != 0x80) return -(j+1);
327 s -= 6;
328 d |= (c & 0x3f) << s;
329 }
330
331 /* Check that encoding was the correct unique one */
332
333 for (j = 0; j < utf8_table1_size; j++)
334 if (d <= utf8_table1[j]) break;
335 if (j != i) return -(i+1);
336
337 /* Valid value */
338
339 *vptr = d;
340 return i+1;
341 }
342
343 #endif
344
345
346
347 /*************************************************
348 * Convert character value to UTF-8 *
349 *************************************************/
350
351 /* This function takes an integer value in the range 0 - 0x7fffffff
352 and encodes it as a UTF-8 character in 0 to 6 bytes.
353
354 Arguments:
355 cvalue the character value
356 utf8bytes pointer to buffer for result - at least 6 bytes long
357
358 Returns: number of characters placed in the buffer
359 */
360
361 #if !defined NOUTF8
362
363 static int
364 ord2utf8(int cvalue, uschar *utf8bytes)
365 {
366 register int i, j;
367 for (i = 0; i < utf8_table1_size; i++)
368 if (cvalue <= utf8_table1[i]) break;
369 utf8bytes += i;
370 for (j = i; j > 0; j--)
371 {
372 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
373 cvalue >>= 6;
374 }
375 *utf8bytes = utf8_table2[i] | cvalue;
376 return i + 1;
377 }
378
379 #endif
380
381
382
383 /*************************************************
384 * Print character string *
385 *************************************************/
386
387 /* Character string printing function. Must handle UTF-8 strings in utf8
388 mode. Yields number of characters printed. If handed a NULL file, just counts
389 chars without printing. */
390
391 static int pchars(unsigned char *p, int length, FILE *f)
392 {
393 int c = 0;
394 int yield = 0;
395
396 while (length-- > 0)
397 {
398 #if !defined NOUTF8
399 if (use_utf8)
400 {
401 int rc = utf82ord(p, &c);
402
403 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
404 {
405 length -= rc - 1;
406 p += rc;
407 if (PRINTHEX(c))
408 {
409 if (f != NULL) fprintf(f, "%c", c);
410 yield++;
411 }
412 else
413 {
414 int n = 4;
415 if (f != NULL) fprintf(f, "\\x{%02x}", c);
416 yield += (n <= 0x000000ff)? 2 :
417 (n <= 0x00000fff)? 3 :
418 (n <= 0x0000ffff)? 4 :
419 (n <= 0x000fffff)? 5 : 6;
420 }
421 continue;
422 }
423 }
424 #endif
425
426 /* Not UTF-8, or malformed UTF-8 */
427
428 c = *p++;
429 if (PRINTHEX(c))
430 {
431 if (f != NULL) fprintf(f, "%c", c);
432 yield++;
433 }
434 else
435 {
436 if (f != NULL) fprintf(f, "\\x%02x", c);
437 yield += 4;
438 }
439 }
440
441 return yield;
442 }
443
444
445
446 /*************************************************
447 * Callout function *
448 *************************************************/
449
450 /* Called from PCRE as a result of the (?C) item. We print out where we are in
451 the match. Yield zero unless more callouts than the fail count, or the callout
452 data is not zero. */
453
454 static int callout(pcre_callout_block *cb)
455 {
456 FILE *f = (first_callout | callout_extra)? outfile : NULL;
457 int i, pre_start, post_start, subject_length;
458
459 if (callout_extra)
460 {
461 fprintf(f, "Callout %d: last capture = %d\n",
462 cb->callout_number, cb->capture_last);
463
464 for (i = 0; i < cb->capture_top * 2; i += 2)
465 {
466 if (cb->offset_vector[i] < 0)
467 fprintf(f, "%2d: <unset>\n", i/2);
468 else
469 {
470 fprintf(f, "%2d: ", i/2);
471 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
472 cb->offset_vector[i+1] - cb->offset_vector[i], f);
473 fprintf(f, "\n");
474 }
475 }
476 }
477
478 /* Re-print the subject in canonical form, the first time or if giving full
479 datails. On subsequent calls in the same match, we use pchars just to find the
480 printed lengths of the substrings. */
481
482 if (f != NULL) fprintf(f, "--->");
483
484 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
485 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
486 cb->current_position - cb->start_match, f);
487
488 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
489
490 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
491 cb->subject_length - cb->current_position, f);
492
493 if (f != NULL) fprintf(f, "\n");
494
495 /* Always print appropriate indicators, with callout number if not already
496 shown. For automatic callouts, show the pattern offset. */
497
498 if (cb->callout_number == 255)
499 {
500 fprintf(outfile, "%+3d ", cb->pattern_position);
501 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
502 }
503 else
504 {
505 if (callout_extra) fprintf(outfile, " ");
506 else fprintf(outfile, "%3d ", cb->callout_number);
507 }
508
509 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
510 fprintf(outfile, "^");
511
512 if (post_start > 0)
513 {
514 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
515 fprintf(outfile, "^");
516 }
517
518 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
519 fprintf(outfile, " ");
520
521 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
522 pbuffer + cb->pattern_position);
523
524 fprintf(outfile, "\n");
525 first_callout = 0;
526
527 if (cb->callout_data != NULL)
528 {
529 int callout_data = *((int *)(cb->callout_data));
530 if (callout_data != 0)
531 {
532 fprintf(outfile, "Callout data = %d\n", callout_data);
533 return callout_data;
534 }
535 }
536
537 return (cb->callout_number != callout_fail_id)? 0 :
538 (++callout_count >= callout_fail_count)? 1 : 0;
539 }
540
541
542 /*************************************************
543 * Local malloc functions *
544 *************************************************/
545
546 /* Alternative malloc function, to test functionality and show the size of the
547 compiled re. */
548
549 static void *new_malloc(size_t size)
550 {
551 void *block = malloc(size);
552 gotten_store = size;
553 if (show_malloc)
554 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
555 return block;
556 }
557
558 static void new_free(void *block)
559 {
560 if (show_malloc)
561 fprintf(outfile, "free %p\n", block);
562 free(block);
563 }
564
565
566 /* For recursion malloc/free, to test stacking calls */
567
568 static void *stack_malloc(size_t size)
569 {
570 void *block = malloc(size);
571 if (show_malloc)
572 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
573 return block;
574 }
575
576 static void stack_free(void *block)
577 {
578 if (show_malloc)
579 fprintf(outfile, "stack_free %p\n", block);
580 free(block);
581 }
582
583
584 /*************************************************
585 * Call pcre_fullinfo() *
586 *************************************************/
587
588 /* Get one piece of information from the pcre_fullinfo() function */
589
590 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
591 {
592 int rc;
593 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
594 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
595 }
596
597
598
599 /*************************************************
600 * Byte flipping function *
601 *************************************************/
602
603 static unsigned long int
604 byteflip(unsigned long int value, int n)
605 {
606 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
607 return ((value & 0x000000ff) << 24) |
608 ((value & 0x0000ff00) << 8) |
609 ((value & 0x00ff0000) >> 8) |
610 ((value & 0xff000000) >> 24);
611 }
612
613
614
615
616 /*************************************************
617 * Check match or recursion limit *
618 *************************************************/
619
620 static int
621 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
622 int start_offset, int options, int *use_offsets, int use_size_offsets,
623 int flag, unsigned long int *limit, int errnumber, const char *msg)
624 {
625 int count;
626 int min = 0;
627 int mid = 64;
628 int max = -1;
629
630 extra->flags |= flag;
631
632 for (;;)
633 {
634 *limit = mid;
635
636 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
637 use_offsets, use_size_offsets);
638
639 if (count == errnumber)
640 {
641 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
642 min = mid;
643 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
644 }
645
646 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
647 count == PCRE_ERROR_PARTIAL)
648 {
649 if (mid == min + 1)
650 {
651 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
652 break;
653 }
654 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
655 max = mid;
656 mid = (min + mid)/2;
657 }
658 else break; /* Some other error */
659 }
660
661 extra->flags &= ~flag;
662 return count;
663 }
664
665
666
667 /*************************************************
668 * Case-independent strncmp() function *
669 *************************************************/
670
671 /*
672 Arguments:
673 s first string
674 t second string
675 n number of characters to compare
676
677 Returns: < 0, = 0, or > 0, according to the comparison
678 */
679
680 static int
681 strncmpic(uschar *s, uschar *t, int n)
682 {
683 while (n--)
684 {
685 int c = tolower(*s++) - tolower(*t++);
686 if (c) return c;
687 }
688 return 0;
689 }
690
691
692
693 /*************************************************
694 * Check newline indicator *
695 *************************************************/
696
697 /* This is used both at compile and run-time to check for <xxx> escapes, where
698 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
699 no match.
700
701 Arguments:
702 p points after the leading '<'
703 f file for error message
704
705 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
706 */
707
708 static int
709 check_newline(uschar *p, FILE *f)
710 {
711 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
712 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
713 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
714 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
715 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
716 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
717 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
718 fprintf(f, "Unknown newline type at: <%s\n", p);
719 return 0;
720 }
721
722
723
724 /*************************************************
725 * Usage function *
726 *************************************************/
727
728 static void
729 usage(void)
730 {
731 printf("Usage: pcretest [options] [<input> [<output>]]\n");
732 printf(" -b show compiled code (bytecode)\n");
733 printf(" -C show PCRE compile-time options and exit\n");
734 printf(" -d debug: show compiled code and information (-b and -i)\n");
735 #if !defined NODFA
736 printf(" -dfa force DFA matching for all subjects\n");
737 #endif
738 printf(" -help show usage information\n");
739 printf(" -i show information about compiled patterns\n"
740 " -m output memory used information\n"
741 " -o <n> set size of offsets vector to <n>\n");
742 #if !defined NOPOSIX
743 printf(" -p use POSIX interface\n");
744 #endif
745 printf(" -q quiet: do not output PCRE version number at start\n");
746 printf(" -S <n> set stack size to <n> megabytes\n");
747 printf(" -s output store (memory) used information\n"
748 " -t time compilation and execution\n");
749 printf(" -t <n> time compilation and execution, repeating <n> times\n");
750 printf(" -tm time execution (matching) only\n");
751 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
752 }
753
754
755
756 /*************************************************
757 * Main Program *
758 *************************************************/
759
760 /* Read lines from named file or stdin and write to named file or stdout; lines
761 consist of a regular expression, in delimiters and optionally followed by
762 options, followed by a set of test data, terminated by an empty line. */
763
764 int main(int argc, char **argv)
765 {
766 FILE *infile = stdin;
767 int options = 0;
768 int study_options = 0;
769 int op = 1;
770 int timeit = 0;
771 int timeitm = 0;
772 int showinfo = 0;
773 int showstore = 0;
774 int quiet = 0;
775 int size_offsets = 45;
776 int size_offsets_max;
777 int *offsets = NULL;
778 #if !defined NOPOSIX
779 int posix = 0;
780 #endif
781 int debug = 0;
782 int done = 0;
783 int all_use_dfa = 0;
784 int yield = 0;
785 int stack_size;
786
787 /* These vectors store, end-to-end, a list of captured substring names. Assume
788 that 1024 is plenty long enough for the few names we'll be testing. */
789
790 uschar copynames[1024];
791 uschar getnames[1024];
792
793 uschar *copynamesptr;
794 uschar *getnamesptr;
795
796 /* Get buffers from malloc() so that Electric Fence will check their misuse
797 when I am debugging. They grow automatically when very long lines are read. */
798
799 buffer = (unsigned char *)malloc(buffer_size);
800 dbuffer = (unsigned char *)malloc(buffer_size);
801 pbuffer = (unsigned char *)malloc(buffer_size);
802
803 /* The outfile variable is static so that new_malloc can use it. */
804
805 outfile = stdout;
806
807 /* The following _setmode() stuff is some Windows magic that tells its runtime
808 library to translate CRLF into a single LF character. At least, that's what
809 I've been told: never having used Windows I take this all on trust. Originally
810 it set 0x8000, but then I was advised that _O_BINARY was better. */
811
812 #if defined(_WIN32) || defined(WIN32)
813 _setmode( _fileno( stdout ), _O_BINARY );
814 #endif
815
816 /* Scan options */
817
818 while (argc > 1 && argv[op][0] == '-')
819 {
820 unsigned char *endptr;
821
822 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
823 showstore = 1;
824 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
825 else if (strcmp(argv[op], "-b") == 0) debug = 1;
826 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
827 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
828 #if !defined NODFA
829 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
830 #endif
831 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
832 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
833 *endptr == 0))
834 {
835 op++;
836 argc--;
837 }
838 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
839 {
840 int both = argv[op][2] == 0;
841 int temp;
842 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
843 *endptr == 0))
844 {
845 timeitm = temp;
846 op++;
847 argc--;
848 }
849 else timeitm = LOOPREPEAT;
850 if (both) timeit = timeitm;
851 }
852 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
853 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
854 *endptr == 0))
855 {
856 #if defined(_WIN32) || defined(WIN32)
857 printf("PCRE: -S not supported on this OS\n");
858 exit(1);
859 #else
860 int rc;
861 struct rlimit rlim;
862 getrlimit(RLIMIT_STACK, &rlim);
863 rlim.rlim_cur = stack_size * 1024 * 1024;
864 rc = setrlimit(RLIMIT_STACK, &rlim);
865 if (rc != 0)
866 {
867 printf("PCRE: setrlimit() failed with error %d\n", rc);
868 exit(1);
869 }
870 op++;
871 argc--;
872 #endif
873 }
874 #if !defined NOPOSIX
875 else if (strcmp(argv[op], "-p") == 0) posix = 1;
876 #endif
877 else if (strcmp(argv[op], "-C") == 0)
878 {
879 int rc;
880 printf("PCRE version %s\n", pcre_version());
881 printf("Compiled with\n");
882 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
883 printf(" %sUTF-8 support\n", rc? "" : "No ");
884 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
885 printf(" %sUnicode properties support\n", rc? "" : "No ");
886 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
887 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
888 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
889 (rc == -2)? "ANYCRLF" :
890 (rc == -1)? "ANY" : "???");
891 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
892 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
893 "all Unicode newlines");
894 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
895 printf(" Internal link size = %d\n", rc);
896 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
897 printf(" POSIX malloc threshold = %d\n", rc);
898 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
899 printf(" Default match limit = %d\n", rc);
900 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
901 printf(" Default recursion depth limit = %d\n", rc);
902 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
903 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
904 goto EXIT;
905 }
906 else if (strcmp(argv[op], "-help") == 0 ||
907 strcmp(argv[op], "--help") == 0)
908 {
909 usage();
910 goto EXIT;
911 }
912 else
913 {
914 printf("** Unknown or malformed option %s\n", argv[op]);
915 usage();
916 yield = 1;
917 goto EXIT;
918 }
919 op++;
920 argc--;
921 }
922
923 /* Get the store for the offsets vector, and remember what it was */
924
925 size_offsets_max = size_offsets;
926 offsets = (int *)malloc(size_offsets_max * sizeof(int));
927 if (offsets == NULL)
928 {
929 printf("** Failed to get %d bytes of memory for offsets vector\n",
930 (int)(size_offsets_max * sizeof(int)));
931 yield = 1;
932 goto EXIT;
933 }
934
935 /* Sort out the input and output files */
936
937 if (argc > 1)
938 {
939 infile = fopen(argv[op], INPUT_MODE);
940 if (infile == NULL)
941 {
942 printf("** Failed to open %s\n", argv[op]);
943 yield = 1;
944 goto EXIT;
945 }
946 }
947
948 if (argc > 2)
949 {
950 outfile = fopen(argv[op+1], OUTPUT_MODE);
951 if (outfile == NULL)
952 {
953 printf("** Failed to open %s\n", argv[op+1]);
954 yield = 1;
955 goto EXIT;
956 }
957 }
958
959 /* Set alternative malloc function */
960
961 pcre_malloc = new_malloc;
962 pcre_free = new_free;
963 pcre_stack_malloc = stack_malloc;
964 pcre_stack_free = stack_free;
965
966 /* Heading line unless quiet, then prompt for first regex if stdin */
967
968 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
969
970 /* Main loop */
971
972 while (!done)
973 {
974 pcre *re = NULL;
975 pcre_extra *extra = NULL;
976
977 #if !defined NOPOSIX /* There are still compilers that require no indent */
978 regex_t preg;
979 int do_posix = 0;
980 #endif
981
982 const char *error;
983 unsigned char *p, *pp, *ppp;
984 unsigned char *to_file = NULL;
985 const unsigned char *tables = NULL;
986 unsigned long int true_size, true_study_size = 0;
987 size_t size, regex_gotten_store;
988 int do_study = 0;
989 int do_debug = debug;
990 int do_G = 0;
991 int do_g = 0;
992 int do_showinfo = showinfo;
993 int do_showrest = 0;
994 int do_flip = 0;
995 int erroroffset, len, delimiter, poffset;
996
997 use_utf8 = 0;
998 debug_lengths = 1;
999
1000 if (infile == stdin) printf(" re> ");
1001 if (extend_inputline(infile, buffer) == NULL) break;
1002 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1003 fflush(outfile);
1004
1005 p = buffer;
1006 while (isspace(*p)) p++;
1007 if (*p == 0) continue;
1008
1009 /* See if the pattern is to be loaded pre-compiled from a file. */
1010
1011 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1012 {
1013 unsigned long int magic, get_options;
1014 uschar sbuf[8];
1015 FILE *f;
1016
1017 p++;
1018 pp = p + (int)strlen((char *)p);
1019 while (isspace(pp[-1])) pp--;
1020 *pp = 0;
1021
1022 f = fopen((char *)p, "rb");
1023 if (f == NULL)
1024 {
1025 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1026 continue;
1027 }
1028
1029 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1030
1031 true_size =
1032 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1033 true_study_size =
1034 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1035
1036 re = (real_pcre *)new_malloc(true_size);
1037 regex_gotten_store = gotten_store;
1038
1039 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1040
1041 magic = ((real_pcre *)re)->magic_number;
1042 if (magic != MAGIC_NUMBER)
1043 {
1044 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1045 {
1046 do_flip = 1;
1047 }
1048 else
1049 {
1050 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1051 fclose(f);
1052 continue;
1053 }
1054 }
1055
1056 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1057 do_flip? " (byte-inverted)" : "", p);
1058
1059 /* Need to know if UTF-8 for printing data strings */
1060
1061 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1062 use_utf8 = (get_options & PCRE_UTF8) != 0;
1063
1064 /* Now see if there is any following study data */
1065
1066 if (true_study_size != 0)
1067 {
1068 pcre_study_data *psd;
1069
1070 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1071 extra->flags = PCRE_EXTRA_STUDY_DATA;
1072
1073 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1074 extra->study_data = psd;
1075
1076 if (fread(psd, 1, true_study_size, f) != true_study_size)
1077 {
1078 FAIL_READ:
1079 fprintf(outfile, "Failed to read data from %s\n", p);
1080 if (extra != NULL) new_free(extra);
1081 if (re != NULL) new_free(re);
1082 fclose(f);
1083 continue;
1084 }
1085 fprintf(outfile, "Study data loaded from %s\n", p);
1086 do_study = 1; /* To get the data output if requested */
1087 }
1088 else fprintf(outfile, "No study data\n");
1089
1090 fclose(f);
1091 goto SHOW_INFO;
1092 }
1093
1094 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1095 the pattern; if is isn't complete, read more. */
1096
1097 delimiter = *p++;
1098
1099 if (isalnum(delimiter) || delimiter == '\\')
1100 {
1101 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1102 goto SKIP_DATA;
1103 }
1104
1105 pp = p;
1106 poffset = p - buffer;
1107
1108 for(;;)
1109 {
1110 while (*pp != 0)
1111 {
1112 if (*pp == '\\' && pp[1] != 0) pp++;
1113 else if (*pp == delimiter) break;
1114 pp++;
1115 }
1116 if (*pp != 0) break;
1117 if (infile == stdin) printf(" > ");
1118 if ((pp = extend_inputline(infile, pp)) == NULL)
1119 {
1120 fprintf(outfile, "** Unexpected EOF\n");
1121 done = 1;
1122 goto CONTINUE;
1123 }
1124 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1125 }
1126
1127 /* The buffer may have moved while being extended; reset the start of data
1128 pointer to the correct relative point in the buffer. */
1129
1130 p = buffer + poffset;
1131
1132 /* If the first character after the delimiter is backslash, make
1133 the pattern end with backslash. This is purely to provide a way
1134 of testing for the error message when a pattern ends with backslash. */
1135
1136 if (pp[1] == '\\') *pp++ = '\\';
1137
1138 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1139 for callouts. */
1140
1141 *pp++ = 0;
1142 strcpy((char *)pbuffer, (char *)p);
1143
1144 /* Look for options after final delimiter */
1145
1146 options = 0;
1147 study_options = 0;
1148 log_store = showstore; /* default from command line */
1149
1150 while (*pp != 0)
1151 {
1152 switch (*pp++)
1153 {
1154 case 'f': options |= PCRE_FIRSTLINE; break;
1155 case 'g': do_g = 1; break;
1156 case 'i': options |= PCRE_CASELESS; break;
1157 case 'm': options |= PCRE_MULTILINE; break;
1158 case 's': options |= PCRE_DOTALL; break;
1159 case 'x': options |= PCRE_EXTENDED; break;
1160
1161 case '+': do_showrest = 1; break;
1162 case 'A': options |= PCRE_ANCHORED; break;
1163 case 'B': do_debug = 1; break;
1164 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1165 case 'D': do_debug = do_showinfo = 1; break;
1166 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1167 case 'F': do_flip = 1; break;
1168 case 'G': do_G = 1; break;
1169 case 'I': do_showinfo = 1; break;
1170 case 'J': options |= PCRE_DUPNAMES; break;
1171 case 'M': log_store = 1; break;
1172 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1173
1174 #if !defined NOPOSIX
1175 case 'P': do_posix = 1; break;
1176 #endif
1177
1178 case 'S': do_study = 1; break;
1179 case 'U': options |= PCRE_UNGREEDY; break;
1180 case 'X': options |= PCRE_EXTRA; break;
1181 case 'Z': debug_lengths = 0; break;
1182 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1183 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1184
1185 case 'L':
1186 ppp = pp;
1187 /* The '\r' test here is so that it works on Windows. */
1188 /* The '0' test is just in case this is an unterminated line. */
1189 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1190 *ppp = 0;
1191 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1192 {
1193 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1194 goto SKIP_DATA;
1195 }
1196 locale_set = 1;
1197 tables = pcre_maketables();
1198 pp = ppp;
1199 break;
1200
1201 case '>':
1202 to_file = pp;
1203 while (*pp != 0) pp++;
1204 while (isspace(pp[-1])) pp--;
1205 *pp = 0;
1206 break;
1207
1208 case '<':
1209 {
1210 int x = check_newline(pp, outfile);
1211 if (x == 0) goto SKIP_DATA;
1212 options |= x;
1213 while (*pp++ != '>');
1214 }
1215 break;
1216
1217 case '\r': /* So that it works in Windows */
1218 case '\n':
1219 case ' ':
1220 break;
1221
1222 default:
1223 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1224 goto SKIP_DATA;
1225 }
1226 }
1227
1228 /* Handle compiling via the POSIX interface, which doesn't support the
1229 timing, showing, or debugging options, nor the ability to pass over
1230 local character tables. */
1231
1232 #if !defined NOPOSIX
1233 if (posix || do_posix)
1234 {
1235 int rc;
1236 int cflags = 0;
1237
1238 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1239 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1240 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1241 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1242 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1243
1244 rc = regcomp(&preg, (char *)p, cflags);
1245
1246 /* Compilation failed; go back for another re, skipping to blank line
1247 if non-interactive. */
1248
1249 if (rc != 0)
1250 {
1251 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1252 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1253 goto SKIP_DATA;
1254 }
1255 }
1256
1257 /* Handle compiling via the native interface */
1258
1259 else
1260 #endif /* !defined NOPOSIX */
1261
1262 {
1263 if (timeit > 0)
1264 {
1265 register int i;
1266 clock_t time_taken;
1267 clock_t start_time = clock();
1268 for (i = 0; i < timeit; i++)
1269 {
1270 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1271 if (re != NULL) free(re);
1272 }
1273 time_taken = clock() - start_time;
1274 fprintf(outfile, "Compile time %.4f milliseconds\n",
1275 (((double)time_taken * 1000.0) / (double)timeit) /
1276 (double)CLOCKS_PER_SEC);
1277 }
1278
1279 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1280
1281 /* Compilation failed; go back for another re, skipping to blank line
1282 if non-interactive. */
1283
1284 if (re == NULL)
1285 {
1286 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1287 SKIP_DATA:
1288 if (infile != stdin)
1289 {
1290 for (;;)
1291 {
1292 if (extend_inputline(infile, buffer) == NULL)
1293 {
1294 done = 1;
1295 goto CONTINUE;
1296 }
1297 len = (int)strlen((char *)buffer);
1298 while (len > 0 && isspace(buffer[len-1])) len--;
1299 if (len == 0) break;
1300 }
1301 fprintf(outfile, "\n");
1302 }
1303 goto CONTINUE;
1304 }
1305
1306 /* Compilation succeeded; print data if required. There are now two
1307 info-returning functions. The old one has a limited interface and
1308 returns only limited data. Check that it agrees with the newer one. */
1309
1310 if (log_store)
1311 fprintf(outfile, "Memory allocation (code space): %d\n",
1312 (int)(gotten_store -
1313 sizeof(real_pcre) -
1314 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1315
1316 /* Extract the size for possible writing before possibly flipping it,
1317 and remember the store that was got. */
1318
1319 true_size = ((real_pcre *)re)->size;
1320 regex_gotten_store = gotten_store;
1321
1322 /* If /S was present, study the regexp to generate additional info to
1323 help with the matching. */
1324
1325 if (do_study)
1326 {
1327 if (timeit > 0)
1328 {
1329 register int i;
1330 clock_t time_taken;
1331 clock_t start_time = clock();
1332 for (i = 0; i < timeit; i++)
1333 extra = pcre_study(re, study_options, &error);
1334 time_taken = clock() - start_time;
1335 if (extra != NULL) free(extra);
1336 fprintf(outfile, " Study time %.4f milliseconds\n",
1337 (((double)time_taken * 1000.0) / (double)timeit) /
1338 (double)CLOCKS_PER_SEC);
1339 }
1340 extra = pcre_study(re, study_options, &error);
1341 if (error != NULL)
1342 fprintf(outfile, "Failed to study: %s\n", error);
1343 else if (extra != NULL)
1344 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1345 }
1346
1347 /* If the 'F' option was present, we flip the bytes of all the integer
1348 fields in the regex data block and the study block. This is to make it
1349 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1350 compiled on a different architecture. */
1351
1352 if (do_flip)
1353 {
1354 real_pcre *rre = (real_pcre *)re;
1355 rre->magic_number =
1356 byteflip(rre->magic_number, sizeof(rre->magic_number));
1357 rre->size = byteflip(rre->size, sizeof(rre->size));
1358 rre->options = byteflip(rre->options, sizeof(rre->options));
1359 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1360 rre->top_bracket =
1361 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1362 rre->top_backref =
1363 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1364 rre->first_byte =
1365 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1366 rre->req_byte =
1367 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1368 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1369 sizeof(rre->name_table_offset));
1370 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1371 sizeof(rre->name_entry_size));
1372 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1373 sizeof(rre->name_count));
1374
1375 if (extra != NULL)
1376 {
1377 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1378 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1379 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1380 }
1381 }
1382
1383 /* Extract information from the compiled data if required */
1384
1385 SHOW_INFO:
1386
1387 if (do_debug)
1388 {
1389 fprintf(outfile, "------------------------------------------------------------------\n");
1390 pcre_printint(re, outfile, debug_lengths);
1391 }
1392
1393 if (do_showinfo)
1394 {
1395 unsigned long int get_options, all_options;
1396 #if !defined NOINFOCHECK
1397 int old_first_char, old_options, old_count;
1398 #endif
1399 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1400 hascrorlf;
1401 int nameentrysize, namecount;
1402 const uschar *nametable;
1403
1404 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1405 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1406 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1407 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1408 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1409 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1410 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1411 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1412 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1413 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1414 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1415 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1416
1417 #if !defined NOINFOCHECK
1418 old_count = pcre_info(re, &old_options, &old_first_char);
1419 if (count < 0) fprintf(outfile,
1420 "Error %d from pcre_info()\n", count);
1421 else
1422 {
1423 if (old_count != count) fprintf(outfile,
1424 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1425 old_count);
1426
1427 if (old_first_char != first_char) fprintf(outfile,
1428 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1429 first_char, old_first_char);
1430
1431 if (old_options != (int)get_options) fprintf(outfile,
1432 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1433 get_options, old_options);
1434 }
1435 #endif
1436
1437 if (size != regex_gotten_store) fprintf(outfile,
1438 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1439 (int)size, (int)regex_gotten_store);
1440
1441 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1442 if (backrefmax > 0)
1443 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1444
1445 if (namecount > 0)
1446 {
1447 fprintf(outfile, "Named capturing subpatterns:\n");
1448 while (namecount-- > 0)
1449 {
1450 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1451 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1452 GET2(nametable, 0));
1453 nametable += nameentrysize;
1454 }
1455 }
1456
1457 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1458 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1459
1460 all_options = ((real_pcre *)re)->options;
1461 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1462
1463 if (get_options == 0) fprintf(outfile, "No options\n");
1464 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1465 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1466 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1467 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1468 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1469 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1470 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1471 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1472 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1473 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1474 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1475 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1476 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1477 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1478 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1479 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1480
1481 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1482
1483 switch (get_options & PCRE_NEWLINE_BITS)
1484 {
1485 case PCRE_NEWLINE_CR:
1486 fprintf(outfile, "Forced newline sequence: CR\n");
1487 break;
1488
1489 case PCRE_NEWLINE_LF:
1490 fprintf(outfile, "Forced newline sequence: LF\n");
1491 break;
1492
1493 case PCRE_NEWLINE_CRLF:
1494 fprintf(outfile, "Forced newline sequence: CRLF\n");
1495 break;
1496
1497 case PCRE_NEWLINE_ANYCRLF:
1498 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1499 break;
1500
1501 case PCRE_NEWLINE_ANY:
1502 fprintf(outfile, "Forced newline sequence: ANY\n");
1503 break;
1504
1505 default:
1506 break;
1507 }
1508
1509 if (first_char == -1)
1510 {
1511 fprintf(outfile, "First char at start or follows newline\n");
1512 }
1513 else if (first_char < 0)
1514 {
1515 fprintf(outfile, "No first char\n");
1516 }
1517 else
1518 {
1519 int ch = first_char & 255;
1520 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1521 "" : " (caseless)";
1522 if (PRINTHEX(ch))
1523 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1524 else
1525 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1526 }
1527
1528 if (need_char < 0)
1529 {
1530 fprintf(outfile, "No need char\n");
1531 }
1532 else
1533 {
1534 int ch = need_char & 255;
1535 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1536 "" : " (caseless)";
1537 if (PRINTHEX(ch))
1538 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1539 else
1540 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1541 }
1542
1543 /* Don't output study size; at present it is in any case a fixed
1544 value, but it varies, depending on the computer architecture, and
1545 so messes up the test suite. (And with the /F option, it might be
1546 flipped.) */
1547
1548 if (do_study)
1549 {
1550 if (extra == NULL)
1551 fprintf(outfile, "Study returned NULL\n");
1552 else
1553 {
1554 uschar *start_bits = NULL;
1555 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1556
1557 if (start_bits == NULL)
1558 fprintf(outfile, "No starting byte set\n");
1559 else
1560 {
1561 int i;
1562 int c = 24;
1563 fprintf(outfile, "Starting byte set: ");
1564 for (i = 0; i < 256; i++)
1565 {
1566 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1567 {
1568 if (c > 75)
1569 {
1570 fprintf(outfile, "\n ");
1571 c = 2;
1572 }
1573 if (PRINTHEX(i) && i != ' ')
1574 {
1575 fprintf(outfile, "%c ", i);
1576 c += 2;
1577 }
1578 else
1579 {
1580 fprintf(outfile, "\\x%02x ", i);
1581 c += 5;
1582 }
1583 }
1584 }
1585 fprintf(outfile, "\n");
1586 }
1587 }
1588 }
1589 }
1590
1591 /* If the '>' option was present, we write out the regex to a file, and
1592 that is all. The first 8 bytes of the file are the regex length and then
1593 the study length, in big-endian order. */
1594
1595 if (to_file != NULL)
1596 {
1597 FILE *f = fopen((char *)to_file, "wb");
1598 if (f == NULL)
1599 {
1600 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1601 }
1602 else
1603 {
1604 uschar sbuf[8];
1605 sbuf[0] = (uschar)((true_size >> 24) & 255);
1606 sbuf[1] = (uschar)((true_size >> 16) & 255);
1607 sbuf[2] = (uschar)((true_size >> 8) & 255);
1608 sbuf[3] = (uschar)((true_size) & 255);
1609
1610 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1611 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1612 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1613 sbuf[7] = (uschar)((true_study_size) & 255);
1614
1615 if (fwrite(sbuf, 1, 8, f) < 8 ||
1616 fwrite(re, 1, true_size, f) < true_size)
1617 {
1618 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1619 }
1620 else
1621 {
1622 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1623 if (extra != NULL)
1624 {
1625 if (fwrite(extra->study_data, 1, true_study_size, f) <
1626 true_study_size)
1627 {
1628 fprintf(outfile, "Write error on %s: %s\n", to_file,
1629 strerror(errno));
1630 }
1631 else fprintf(outfile, "Study data written to %s\n", to_file);
1632
1633 }
1634 }
1635 fclose(f);
1636 }
1637
1638 new_free(re);
1639 if (extra != NULL) new_free(extra);
1640 if (tables != NULL) new_free((void *)tables);
1641 continue; /* With next regex */
1642 }
1643 } /* End of non-POSIX compile */
1644
1645 /* Read data lines and test them */
1646
1647 for (;;)
1648 {
1649 uschar *q;
1650 uschar *bptr;
1651 int *use_offsets = offsets;
1652 int use_size_offsets = size_offsets;
1653 int callout_data = 0;
1654 int callout_data_set = 0;
1655 int count, c;
1656 int copystrings = 0;
1657 int find_match_limit = 0;
1658 int getstrings = 0;
1659 int getlist = 0;
1660 int gmatched = 0;
1661 int start_offset = 0;
1662 int g_notempty = 0;
1663 int use_dfa = 0;
1664
1665 options = 0;
1666
1667 *copynames = 0;
1668 *getnames = 0;
1669
1670 copynamesptr = copynames;
1671 getnamesptr = getnames;
1672
1673 pcre_callout = callout;
1674 first_callout = 1;
1675 callout_extra = 0;
1676 callout_count = 0;
1677 callout_fail_count = 999999;
1678 callout_fail_id = -1;
1679 show_malloc = 0;
1680
1681 if (extra != NULL) extra->flags &=
1682 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1683
1684 len = 0;
1685 for (;;)
1686 {
1687 if (infile == stdin) printf("data> ");
1688 if (extend_inputline(infile, buffer + len) == NULL)
1689 {
1690 if (len > 0) break;
1691 done = 1;
1692 goto CONTINUE;
1693 }
1694 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1695 len = (int)strlen((char *)buffer);
1696 if (buffer[len-1] == '\n') break;
1697 }
1698
1699 while (len > 0 && isspace(buffer[len-1])) len--;
1700 buffer[len] = 0;
1701 if (len == 0) break;
1702
1703 p = buffer;
1704 while (isspace(*p)) p++;
1705
1706 bptr = q = dbuffer;
1707 while ((c = *p++) != 0)
1708 {
1709 int i = 0;
1710 int n = 0;
1711
1712 if (c == '\\') switch ((c = *p++))
1713 {
1714 case 'a': c = 7; break;
1715 case 'b': c = '\b'; break;
1716 case 'e': c = 27; break;
1717 case 'f': c = '\f'; break;
1718 case 'n': c = '\n'; break;
1719 case 'r': c = '\r'; break;
1720 case 't': c = '\t'; break;
1721 case 'v': c = '\v'; break;
1722
1723 case '0': case '1': case '2': case '3':
1724 case '4': case '5': case '6': case '7':
1725 c -= '0';
1726 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1727 c = c * 8 + *p++ - '0';
1728
1729 #if !defined NOUTF8
1730 if (use_utf8 && c > 255)
1731 {
1732 unsigned char buff8[8];
1733 int ii, utn;
1734 utn = ord2utf8(c, buff8);
1735 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1736 c = buff8[ii]; /* Last byte */
1737 }
1738 #endif
1739 break;
1740
1741 case 'x':
1742
1743 /* Handle \x{..} specially - new Perl thing for utf8 */
1744
1745 #if !defined NOUTF8
1746 if (*p == '{')
1747 {
1748 unsigned char *pt = p;
1749 c = 0;
1750 while (isxdigit(*(++pt)))
1751 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1752 if (*pt == '}')
1753 {
1754 unsigned char buff8[8];
1755 int ii, utn;
1756 utn = ord2utf8(c, buff8);
1757 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1758 c = buff8[ii]; /* Last byte */
1759 p = pt + 1;
1760 break;
1761 }
1762 /* Not correct form; fall through */
1763 }
1764 #endif
1765
1766 /* Ordinary \x */
1767
1768 c = 0;
1769 while (i++ < 2 && isxdigit(*p))
1770 {
1771 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1772 p++;
1773 }
1774 break;
1775
1776 case 0: /* \ followed by EOF allows for an empty line */
1777 p--;
1778 continue;
1779
1780 case '>':
1781 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1782 continue;
1783
1784 case 'A': /* Option setting */
1785 options |= PCRE_ANCHORED;
1786 continue;
1787
1788 case 'B':
1789 options |= PCRE_NOTBOL;
1790 continue;
1791
1792 case 'C':
1793 if (isdigit(*p)) /* Set copy string */
1794 {
1795 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1796 copystrings |= 1 << n;
1797 }
1798 else if (isalnum(*p))
1799 {
1800 uschar *npp = copynamesptr;
1801 while (isalnum(*p)) *npp++ = *p++;
1802 *npp++ = 0;
1803 *npp = 0;
1804 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1805 if (n < 0)
1806 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1807 copynamesptr = npp;
1808 }
1809 else if (*p == '+')
1810 {
1811 callout_extra = 1;
1812 p++;
1813 }
1814 else if (*p == '-')
1815 {
1816 pcre_callout = NULL;
1817 p++;
1818 }
1819 else if (*p == '!')
1820 {
1821 callout_fail_id = 0;
1822 p++;
1823 while(isdigit(*p))
1824 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1825 callout_fail_count = 0;
1826 if (*p == '!')
1827 {
1828 p++;
1829 while(isdigit(*p))
1830 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1831 }
1832 }
1833 else if (*p == '*')
1834 {
1835 int sign = 1;
1836 callout_data = 0;
1837 if (*(++p) == '-') { sign = -1; p++; }
1838 while(isdigit(*p))
1839 callout_data = callout_data * 10 + *p++ - '0';
1840 callout_data *= sign;
1841 callout_data_set = 1;
1842 }
1843 continue;
1844
1845 #if !defined NODFA
1846 case 'D':
1847 #if !defined NOPOSIX
1848 if (posix || do_posix)
1849 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1850 else
1851 #endif
1852 use_dfa = 1;
1853 continue;
1854
1855 case 'F':
1856 options |= PCRE_DFA_SHORTEST;
1857 continue;
1858 #endif
1859
1860 case 'G':
1861 if (isdigit(*p))
1862 {
1863 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1864 getstrings |= 1 << n;
1865 }
1866 else if (isalnum(*p))
1867 {
1868 uschar *npp = getnamesptr;
1869 while (isalnum(*p)) *npp++ = *p++;
1870 *npp++ = 0;
1871 *npp = 0;
1872 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1873 if (n < 0)
1874 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1875 getnamesptr = npp;
1876 }
1877 continue;
1878
1879 case 'L':
1880 getlist = 1;
1881 continue;
1882
1883 case 'M':
1884 find_match_limit = 1;
1885 continue;
1886
1887 case 'N':
1888 options |= PCRE_NOTEMPTY;
1889 continue;
1890
1891 case 'O':
1892 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1893 if (n > size_offsets_max)
1894 {
1895 size_offsets_max = n;
1896 free(offsets);
1897 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1898 if (offsets == NULL)
1899 {
1900 printf("** Failed to get %d bytes of memory for offsets vector\n",
1901 (int)(size_offsets_max * sizeof(int)));
1902 yield = 1;
1903 goto EXIT;
1904 }
1905 }
1906 use_size_offsets = n;
1907 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1908 continue;
1909
1910 case 'P':
1911 options |= PCRE_PARTIAL;
1912 continue;
1913
1914 case 'Q':
1915 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1916 if (extra == NULL)
1917 {
1918 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1919 extra->flags = 0;
1920 }
1921 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1922 extra->match_limit_recursion = n;
1923 continue;
1924
1925 case 'q':
1926 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1927 if (extra == NULL)
1928 {
1929 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1930 extra->flags = 0;
1931 }
1932 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1933 extra->match_limit = n;
1934 continue;
1935
1936 #if !defined NODFA
1937 case 'R':
1938 options |= PCRE_DFA_RESTART;
1939 continue;
1940 #endif
1941
1942 case 'S':
1943 show_malloc = 1;
1944 continue;
1945
1946 case 'Z':
1947 options |= PCRE_NOTEOL;
1948 continue;
1949
1950 case '?':
1951 options |= PCRE_NO_UTF8_CHECK;
1952 continue;
1953
1954 case '<':
1955 {
1956 int x = check_newline(p, outfile);
1957 if (x == 0) goto NEXT_DATA;
1958 options |= x;
1959 while (*p++ != '>');
1960 }
1961 continue;
1962 }
1963 *q++ = c;
1964 }
1965 *q = 0;
1966 len = q - dbuffer;
1967
1968 if ((all_use_dfa || use_dfa) && find_match_limit)
1969 {
1970 printf("**Match limit not relevant for DFA matching: ignored\n");
1971 find_match_limit = 0;
1972 }
1973
1974 /* Handle matching via the POSIX interface, which does not
1975 support timing or playing with the match limit or callout data. */
1976
1977 #if !defined NOPOSIX
1978 if (posix || do_posix)
1979 {
1980 int rc;
1981 int eflags = 0;
1982 regmatch_t *pmatch = NULL;
1983 if (use_size_offsets > 0)
1984 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1985 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1986 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1987
1988 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1989
1990 if (rc != 0)
1991 {
1992 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1993 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1994 }
1995 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1996 != 0)
1997 {
1998 fprintf(outfile, "Matched with REG_NOSUB\n");
1999 }
2000 else
2001 {
2002 size_t i;
2003 for (i = 0; i < (size_t)use_size_offsets; i++)
2004 {
2005 if (pmatch[i].rm_so >= 0)
2006 {
2007 fprintf(outfile, "%2d: ", (int)i);
2008 (void)pchars(dbuffer + pmatch[i].rm_so,
2009 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2010 fprintf(outfile, "\n");
2011 if (i == 0 && do_showrest)
2012 {
2013 fprintf(outfile, " 0+ ");
2014 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2015 outfile);
2016 fprintf(outfile, "\n");
2017 }
2018 }
2019 }
2020 }
2021 free(pmatch);
2022 }
2023
2024 /* Handle matching via the native interface - repeats for /g and /G */
2025
2026 else
2027 #endif /* !defined NOPOSIX */
2028
2029 for (;; gmatched++) /* Loop for /g or /G */
2030 {
2031 if (timeitm > 0)
2032 {
2033 register int i;
2034 clock_t time_taken;
2035 clock_t start_time = clock();
2036
2037 #if !defined NODFA
2038 if (all_use_dfa || use_dfa)
2039 {
2040 int workspace[1000];
2041 for (i = 0; i < timeitm; i++)
2042 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2043 options | g_notempty, use_offsets, use_size_offsets, workspace,
2044 sizeof(workspace)/sizeof(int));
2045 }
2046 else
2047 #endif
2048
2049 for (i = 0; i < timeitm; i++)
2050 count = pcre_exec(re, extra, (char *)bptr, len,
2051 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2052
2053 time_taken = clock() - start_time;
2054 fprintf(outfile, "Execute time %.4f milliseconds\n",
2055 (((double)time_taken * 1000.0) / (double)timeitm) /
2056 (double)CLOCKS_PER_SEC);
2057 }
2058
2059 /* If find_match_limit is set, we want to do repeated matches with
2060 varying limits in order to find the minimum value for the match limit and
2061 for the recursion limit. */
2062
2063 if (find_match_limit)
2064 {
2065 if (extra == NULL)
2066 {
2067 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2068 extra->flags = 0;
2069 }
2070
2071 (void)check_match_limit(re, extra, bptr, len, start_offset,
2072 options|g_notempty, use_offsets, use_size_offsets,
2073 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2074 PCRE_ERROR_MATCHLIMIT, "match()");
2075
2076 count = check_match_limit(re, extra, bptr, len, start_offset,
2077 options|g_notempty, use_offsets, use_size_offsets,
2078 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2079 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2080 }
2081
2082 /* If callout_data is set, use the interface with additional data */
2083
2084 else if (callout_data_set)
2085 {
2086 if (extra == NULL)
2087 {
2088 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2089 extra->flags = 0;
2090 }
2091 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2092 extra->callout_data = &callout_data;
2093 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2094 options | g_notempty, use_offsets, use_size_offsets);
2095 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2096 }
2097
2098 /* The normal case is just to do the match once, with the default
2099 value of match_limit. */
2100
2101 #if !defined NODFA
2102 else if (all_use_dfa || use_dfa)
2103 {
2104 int workspace[1000];
2105 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2106 options | g_notempty, use_offsets, use_size_offsets, workspace,
2107 sizeof(workspace)/sizeof(int));
2108 if (count == 0)
2109 {
2110 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2111 count = use_size_offsets/2;
2112 }
2113 }
2114 #endif
2115
2116 else
2117 {
2118 count = pcre_exec(re, extra, (char *)bptr, len,
2119 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2120 if (count == 0)
2121 {
2122 fprintf(outfile, "Matched, but too many substrings\n");
2123 count = use_size_offsets/3;
2124 }
2125 }
2126
2127 /* Matched */
2128
2129 if (count >= 0)
2130 {
2131 int i, maxcount;
2132
2133 #if !defined NODFA
2134 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2135 #endif
2136 maxcount = use_size_offsets/3;
2137
2138 /* This is a check against a lunatic return value. */
2139
2140 if (count > maxcount)
2141 {
2142 fprintf(outfile,
2143 "** PCRE error: returned count %d is too big for offset size %d\n",
2144 count, use_size_offsets);
2145 count = use_size_offsets/3;
2146 if (do_g || do_G)
2147 {
2148 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2149 do_g = do_G = FALSE; /* Break g/G loop */
2150 }
2151 }
2152
2153 for (i = 0; i < count * 2; i += 2)
2154 {
2155 if (use_offsets[i] < 0)
2156 fprintf(outfile, "%2d: <unset>\n", i/2);
2157 else
2158 {
2159 fprintf(outfile, "%2d: ", i/2);
2160 (void)pchars(bptr + use_offsets[i],
2161 use_offsets[i+1] - use_offsets[i], outfile);
2162 fprintf(outfile, "\n");
2163 if (i == 0)
2164 {
2165 if (do_showrest)
2166 {
2167 fprintf(outfile, " 0+ ");
2168 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2169 outfile);
2170 fprintf(outfile, "\n");
2171 }
2172 }
2173 }
2174 }
2175
2176 for (i = 0; i < 32; i++)
2177 {
2178 if ((copystrings & (1 << i)) != 0)
2179 {
2180 char copybuffer[256];
2181 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2182 i, copybuffer, sizeof(copybuffer));
2183 if (rc < 0)
2184 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2185 else
2186 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2187 }
2188 }
2189
2190 for (copynamesptr = copynames;
2191 *copynamesptr != 0;
2192 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2193 {
2194 char copybuffer[256];
2195 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2196 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2197 if (rc < 0)
2198 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2199 else
2200 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2201 }
2202
2203 for (i = 0; i < 32; i++)
2204 {
2205 if ((getstrings & (1 << i)) != 0)
2206 {
2207 const char *substring;
2208 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2209 i, &substring);
2210 if (rc < 0)
2211 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2212 else
2213 {
2214 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2215 pcre_free_substring(substring);
2216 }
2217 }
2218 }
2219
2220 for (getnamesptr = getnames;
2221 *getnamesptr != 0;
2222 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2223 {
2224 const char *substring;
2225 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2226 count, (char *)getnamesptr, &substring);
2227 if (rc < 0)
2228 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2229 else
2230 {
2231 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2232 pcre_free_substring(substring);
2233 }
2234 }
2235
2236 if (getlist)
2237 {
2238 const char **stringlist;
2239 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2240 &stringlist);
2241 if (rc < 0)
2242 fprintf(outfile, "get substring list failed %d\n", rc);
2243 else
2244 {
2245 for (i = 0; i < count; i++)
2246 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2247 if (stringlist[i] != NULL)
2248 fprintf(outfile, "string list not terminated by NULL\n");
2249 /* free((void *)stringlist); */
2250 pcre_free_substring_list(stringlist);
2251 }
2252 }
2253 }
2254
2255 /* There was a partial match */
2256
2257 else if (count == PCRE_ERROR_PARTIAL)
2258 {
2259 fprintf(outfile, "Partial match");
2260 #if !defined NODFA
2261 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2262 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2263 bptr + use_offsets[0]);
2264 #endif
2265 fprintf(outfile, "\n");
2266 break; /* Out of the /g loop */
2267 }
2268
2269 /* Failed to match. If this is a /g or /G loop and we previously set
2270 g_notempty after a null match, this is not necessarily the end. We want
2271 to advance the start offset, and continue. We won't be at the end of the
2272 string - that was checked before setting g_notempty.
2273
2274 Complication arises in the case when the newline option is "any" or
2275 "anycrlf". If the previous match was at the end of a line terminated by
2276 CRLF, an advance of one character just passes the \r, whereas we should
2277 prefer the longer newline sequence, as does the code in pcre_exec().
2278 Fudge the offset value to achieve this.
2279
2280 Otherwise, in the case of UTF-8 matching, the advance must be one
2281 character, not one byte. */
2282
2283 else
2284 {
2285 if (g_notempty != 0)
2286 {
2287 int onechar = 1;
2288 unsigned int obits = ((real_pcre *)re)->options;
2289 use_offsets[0] = start_offset;
2290 if ((obits & PCRE_NEWLINE_BITS) == 0)
2291 {
2292 int d;
2293 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2294 obits = (d == '\r')? PCRE_NEWLINE_CR :
2295 (d == '\n')? PCRE_NEWLINE_LF :
2296 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2297 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2298 (d == -1)? PCRE_NEWLINE_ANY : 0;
2299 }
2300 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2301 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2302 &&
2303 start_offset < len - 1 &&
2304 bptr[start_offset] == '\r' &&
2305 bptr[start_offset+1] == '\n')
2306 onechar++;
2307 else if (use_utf8)
2308 {
2309 while (start_offset + onechar < len)
2310 {
2311 int tb = bptr[start_offset+onechar];
2312 if (tb <= 127) break;
2313 tb &= 0xc0;
2314 if (tb != 0 && tb != 0xc0) onechar++;
2315 }
2316 }
2317 use_offsets[1] = start_offset + onechar;
2318 }
2319 else
2320 {
2321 if (count == PCRE_ERROR_NOMATCH)
2322 {
2323 if (gmatched == 0) fprintf(outfile, "No match\n");
2324 }
2325 else fprintf(outfile, "Error %d\n", count);
2326 break; /* Out of the /g loop */
2327 }
2328 }
2329
2330 /* If not /g or /G we are done */
2331
2332 if (!do_g && !do_G) break;
2333
2334 /* If we have matched an empty string, first check to see if we are at
2335 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2336 what Perl's /g options does. This turns out to be rather cunning. First
2337 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2338 same point. If this fails (picked up above) we advance to the next
2339 character. */
2340
2341 g_notempty = 0;
2342
2343 if (use_offsets[0] == use_offsets[1])
2344 {
2345 if (use_offsets[0] == len) break;
2346 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2347 }
2348
2349 /* For /g, update the start offset, leaving the rest alone */
2350
2351 if (do_g) start_offset = use_offsets[1];
2352
2353 /* For /G, update the pointer and length */
2354
2355 else
2356 {
2357 bptr += use_offsets[1];
2358 len -= use_offsets[1];
2359 }
2360 } /* End of loop for /g and /G */
2361
2362 NEXT_DATA: continue;
2363 } /* End of loop for data lines */
2364
2365 CONTINUE:
2366
2367 #if !defined NOPOSIX
2368 if (posix || do_posix) regfree(&preg);
2369 #endif
2370
2371 if (re != NULL) new_free(re);
2372 if (extra != NULL) new_free(extra);
2373 if (tables != NULL)
2374 {
2375 new_free((void *)tables);
2376 setlocale(LC_CTYPE, "C");
2377 locale_set = 0;
2378 }
2379 }
2380
2381 if (infile == stdin) fprintf(outfile, "\n");
2382
2383 EXIT:
2384
2385 if (infile != NULL && infile != stdin) fclose(infile);
2386 if (outfile != NULL && outfile != stdout) fclose(outfile);
2387
2388 free(buffer);
2389 free(dbuffer);
2390 free(pbuffer);
2391 free(offsets);
2392
2393 return yield;
2394 }
2395
2396 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12