/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 336 - (show annotations) (download)
Sat Apr 12 15:59:03 2008 UTC (6 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 71441 byte(s)
Added PCRE_JAVASCRIPT_COMPAT option.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #include <unistd.h>
53 #include <readline/readline.h>
54 #include <readline/history.h>
55 #endif
56
57
58 /* A number of things vary for Windows builds. Originally, pcretest opened its
59 input and output without "b"; then I was told that "b" was needed in some
60 environments, so it was added for release 5.0 to both the input and output. (It
61 makes no difference on Unix-like systems.) Later I was told that it is wrong
62 for the input on Windows. I've now abstracted the modes into two macros that
63 are set here, to make it easier to fiddle with them, and removed "b" from the
64 input mode under Windows. */
65
66 #if defined(_WIN32) || defined(WIN32)
67 #include <io.h> /* For _setmode() */
68 #include <fcntl.h> /* For _O_BINARY */
69 #define INPUT_MODE "r"
70 #define OUTPUT_MODE "wb"
71
72 #else
73 #include <sys/time.h> /* These two includes are needed */
74 #include <sys/resource.h> /* for setrlimit(). */
75 #define INPUT_MODE "rb"
76 #define OUTPUT_MODE "wb"
77 #endif
78
79
80 /* We have to include pcre_internal.h because we need the internal info for
81 displaying the results of pcre_study() and we also need to know about the
82 internal macros, structures, and other internal data values; pcretest has
83 "inside information" compared to a program that strictly follows the PCRE API.
84
85 Although pcre_internal.h does itself include pcre.h, we explicitly include it
86 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87 appropriately for an application, not for building PCRE. */
88
89 #include "pcre.h"
90 #include "pcre_internal.h"
91
92 /* We need access to the data tables that PCRE uses. So as not to have to keep
93 two copies, we include the source file here, changing the names of the external
94 symbols to prevent clashes. */
95
96 #define _pcre_utf8_table1 utf8_table1
97 #define _pcre_utf8_table1_size utf8_table1_size
98 #define _pcre_utf8_table2 utf8_table2
99 #define _pcre_utf8_table3 utf8_table3
100 #define _pcre_utf8_table4 utf8_table4
101 #define _pcre_utt utt
102 #define _pcre_utt_size utt_size
103 #define _pcre_utt_names utt_names
104 #define _pcre_OP_lengths OP_lengths
105
106 #include "pcre_tables.c"
107
108 /* We also need the pcre_printint() function for printing out compiled
109 patterns. This function is in a separate file so that it can be included in
110 pcre_compile.c when that module is compiled with debugging enabled.
111
112 The definition of the macro PRINTABLE, which determines whether to print an
113 output character as-is or as a hex value when showing compiled patterns, is
114 contained in this file. We uses it here also, in cases when the locale has not
115 been explicitly changed, so as to get consistent output from systems that
116 differ in their output from isprint() even in the "C" locale. */
117
118 #include "pcre_printint.src"
119
120 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
121
122
123 /* It is possible to compile this test program without including support for
124 testing the POSIX interface, though this is not available via the standard
125 Makefile. */
126
127 #if !defined NOPOSIX
128 #include "pcreposix.h"
129 #endif
130
131 /* It is also possible, for the benefit of the version currently imported into
132 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133 interface to the DFA matcher (NODFA), and without the doublecheck of the old
134 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135 UTF8 support if PCRE is built without it. */
136
137 #ifndef SUPPORT_UTF8
138 #ifndef NOUTF8
139 #define NOUTF8
140 #endif
141 #endif
142
143
144 /* Other parameters */
145
146 #ifndef CLOCKS_PER_SEC
147 #ifdef CLK_TCK
148 #define CLOCKS_PER_SEC CLK_TCK
149 #else
150 #define CLOCKS_PER_SEC 100
151 #endif
152 #endif
153
154 /* This is the default loop count for timing. */
155
156 #define LOOPREPEAT 500000
157
158 /* Static variables */
159
160 static FILE *outfile;
161 static int log_store = 0;
162 static int callout_count;
163 static int callout_extra;
164 static int callout_fail_count;
165 static int callout_fail_id;
166 static int debug_lengths;
167 static int first_callout;
168 static int locale_set = 0;
169 static int show_malloc;
170 static int use_utf8;
171 static size_t gotten_store;
172
173 /* The buffers grow automatically if very long input lines are encountered. */
174
175 static int buffer_size = 50000;
176 static uschar *buffer = NULL;
177 static uschar *dbuffer = NULL;
178 static uschar *pbuffer = NULL;
179
180
181
182 /*************************************************
183 * Read or extend an input line *
184 *************************************************/
185
186 /* Input lines are read into buffer, but both patterns and data lines can be
187 continued over multiple input lines. In addition, if the buffer fills up, we
188 want to automatically expand it so as to be able to handle extremely large
189 lines that are needed for certain stress tests. When the input buffer is
190 expanded, the other two buffers must also be expanded likewise, and the
191 contents of pbuffer, which are a copy of the input for callouts, must be
192 preserved (for when expansion happens for a data line). This is not the most
193 optimal way of handling this, but hey, this is just a test program!
194
195 Arguments:
196 f the file to read
197 start where in buffer to start (this *must* be within buffer)
198 prompt for stdin or readline()
199
200 Returns: pointer to the start of new data
201 could be a copy of start, or could be moved
202 NULL if no data read and EOF reached
203 */
204
205 static uschar *
206 extend_inputline(FILE *f, uschar *start, const char *prompt)
207 {
208 uschar *here = start;
209
210 for (;;)
211 {
212 int rlen = buffer_size - (here - buffer);
213
214 if (rlen > 1000)
215 {
216 int dlen;
217
218 /* If libreadline support is required, use readline() to read a line if the
219 input is a terminal. Note that readline() removes the trailing newline, so
220 we must put it back again, to be compatible with fgets(). */
221
222 #ifdef SUPPORT_LIBREADLINE
223 if (isatty(fileno(f)))
224 {
225 size_t len;
226 char *s = readline(prompt);
227 if (s == NULL) return (here == start)? NULL : start;
228 len = strlen(s);
229 if (len > 0) add_history(s);
230 if (len > rlen - 1) len = rlen - 1;
231 memcpy(here, s, len);
232 here[len] = '\n';
233 here[len+1] = 0;
234 free(s);
235 }
236 else
237 #endif
238
239 /* Read the next line by normal means, prompting if the file is stdin. */
240
241 {
242 if (f == stdin) printf(prompt);
243 if (fgets((char *)here, rlen, f) == NULL)
244 return (here == start)? NULL : start;
245 }
246
247 dlen = (int)strlen((char *)here);
248 if (dlen > 0 && here[dlen - 1] == '\n') return start;
249 here += dlen;
250 }
251
252 else
253 {
254 int new_buffer_size = 2*buffer_size;
255 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
258
259 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
260 {
261 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
262 exit(1);
263 }
264
265 memcpy(new_buffer, buffer, buffer_size);
266 memcpy(new_pbuffer, pbuffer, buffer_size);
267
268 buffer_size = new_buffer_size;
269
270 start = new_buffer + (start - buffer);
271 here = new_buffer + (here - buffer);
272
273 free(buffer);
274 free(dbuffer);
275 free(pbuffer);
276
277 buffer = new_buffer;
278 dbuffer = new_dbuffer;
279 pbuffer = new_pbuffer;
280 }
281 }
282
283 return NULL; /* Control never gets here */
284 }
285
286
287
288
289
290
291
292 /*************************************************
293 * Read number from string *
294 *************************************************/
295
296 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297 around with conditional compilation, just do the job by hand. It is only used
298 for unpicking arguments, so just keep it simple.
299
300 Arguments:
301 str string to be converted
302 endptr where to put the end pointer
303
304 Returns: the unsigned long
305 */
306
307 static int
308 get_value(unsigned char *str, unsigned char **endptr)
309 {
310 int result = 0;
311 while(*str != 0 && isspace(*str)) str++;
312 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
313 *endptr = str;
314 return(result);
315 }
316
317
318
319
320 /*************************************************
321 * Convert UTF-8 string to value *
322 *************************************************/
323
324 /* This function takes one or more bytes that represents a UTF-8 character,
325 and returns the value of the character.
326
327 Argument:
328 utf8bytes a pointer to the byte vector
329 vptr a pointer to an int to receive the value
330
331 Returns: > 0 => the number of bytes consumed
332 -6 to 0 => malformed UTF-8 character at offset = (-return)
333 */
334
335 #if !defined NOUTF8
336
337 static int
338 utf82ord(unsigned char *utf8bytes, int *vptr)
339 {
340 int c = *utf8bytes++;
341 int d = c;
342 int i, j, s;
343
344 for (i = -1; i < 6; i++) /* i is number of additional bytes */
345 {
346 if ((d & 0x80) == 0) break;
347 d <<= 1;
348 }
349
350 if (i == -1) { *vptr = c; return 1; } /* ascii character */
351 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
352
353 /* i now has a value in the range 1-5 */
354
355 s = 6*i;
356 d = (c & utf8_table3[i]) << s;
357
358 for (j = 0; j < i; j++)
359 {
360 c = *utf8bytes++;
361 if ((c & 0xc0) != 0x80) return -(j+1);
362 s -= 6;
363 d |= (c & 0x3f) << s;
364 }
365
366 /* Check that encoding was the correct unique one */
367
368 for (j = 0; j < utf8_table1_size; j++)
369 if (d <= utf8_table1[j]) break;
370 if (j != i) return -(i+1);
371
372 /* Valid value */
373
374 *vptr = d;
375 return i+1;
376 }
377
378 #endif
379
380
381
382 /*************************************************
383 * Convert character value to UTF-8 *
384 *************************************************/
385
386 /* This function takes an integer value in the range 0 - 0x7fffffff
387 and encodes it as a UTF-8 character in 0 to 6 bytes.
388
389 Arguments:
390 cvalue the character value
391 utf8bytes pointer to buffer for result - at least 6 bytes long
392
393 Returns: number of characters placed in the buffer
394 */
395
396 #if !defined NOUTF8
397
398 static int
399 ord2utf8(int cvalue, uschar *utf8bytes)
400 {
401 register int i, j;
402 for (i = 0; i < utf8_table1_size; i++)
403 if (cvalue <= utf8_table1[i]) break;
404 utf8bytes += i;
405 for (j = i; j > 0; j--)
406 {
407 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
408 cvalue >>= 6;
409 }
410 *utf8bytes = utf8_table2[i] | cvalue;
411 return i + 1;
412 }
413
414 #endif
415
416
417
418 /*************************************************
419 * Print character string *
420 *************************************************/
421
422 /* Character string printing function. Must handle UTF-8 strings in utf8
423 mode. Yields number of characters printed. If handed a NULL file, just counts
424 chars without printing. */
425
426 static int pchars(unsigned char *p, int length, FILE *f)
427 {
428 int c = 0;
429 int yield = 0;
430
431 while (length-- > 0)
432 {
433 #if !defined NOUTF8
434 if (use_utf8)
435 {
436 int rc = utf82ord(p, &c);
437
438 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
439 {
440 length -= rc - 1;
441 p += rc;
442 if (PRINTHEX(c))
443 {
444 if (f != NULL) fprintf(f, "%c", c);
445 yield++;
446 }
447 else
448 {
449 int n = 4;
450 if (f != NULL) fprintf(f, "\\x{%02x}", c);
451 yield += (n <= 0x000000ff)? 2 :
452 (n <= 0x00000fff)? 3 :
453 (n <= 0x0000ffff)? 4 :
454 (n <= 0x000fffff)? 5 : 6;
455 }
456 continue;
457 }
458 }
459 #endif
460
461 /* Not UTF-8, or malformed UTF-8 */
462
463 c = *p++;
464 if (PRINTHEX(c))
465 {
466 if (f != NULL) fprintf(f, "%c", c);
467 yield++;
468 }
469 else
470 {
471 if (f != NULL) fprintf(f, "\\x%02x", c);
472 yield += 4;
473 }
474 }
475
476 return yield;
477 }
478
479
480
481 /*************************************************
482 * Callout function *
483 *************************************************/
484
485 /* Called from PCRE as a result of the (?C) item. We print out where we are in
486 the match. Yield zero unless more callouts than the fail count, or the callout
487 data is not zero. */
488
489 static int callout(pcre_callout_block *cb)
490 {
491 FILE *f = (first_callout | callout_extra)? outfile : NULL;
492 int i, pre_start, post_start, subject_length;
493
494 if (callout_extra)
495 {
496 fprintf(f, "Callout %d: last capture = %d\n",
497 cb->callout_number, cb->capture_last);
498
499 for (i = 0; i < cb->capture_top * 2; i += 2)
500 {
501 if (cb->offset_vector[i] < 0)
502 fprintf(f, "%2d: <unset>\n", i/2);
503 else
504 {
505 fprintf(f, "%2d: ", i/2);
506 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
507 cb->offset_vector[i+1] - cb->offset_vector[i], f);
508 fprintf(f, "\n");
509 }
510 }
511 }
512
513 /* Re-print the subject in canonical form, the first time or if giving full
514 datails. On subsequent calls in the same match, we use pchars just to find the
515 printed lengths of the substrings. */
516
517 if (f != NULL) fprintf(f, "--->");
518
519 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
520 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
521 cb->current_position - cb->start_match, f);
522
523 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
524
525 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
526 cb->subject_length - cb->current_position, f);
527
528 if (f != NULL) fprintf(f, "\n");
529
530 /* Always print appropriate indicators, with callout number if not already
531 shown. For automatic callouts, show the pattern offset. */
532
533 if (cb->callout_number == 255)
534 {
535 fprintf(outfile, "%+3d ", cb->pattern_position);
536 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
537 }
538 else
539 {
540 if (callout_extra) fprintf(outfile, " ");
541 else fprintf(outfile, "%3d ", cb->callout_number);
542 }
543
544 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
545 fprintf(outfile, "^");
546
547 if (post_start > 0)
548 {
549 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
550 fprintf(outfile, "^");
551 }
552
553 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
554 fprintf(outfile, " ");
555
556 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
557 pbuffer + cb->pattern_position);
558
559 fprintf(outfile, "\n");
560 first_callout = 0;
561
562 if (cb->callout_data != NULL)
563 {
564 int callout_data = *((int *)(cb->callout_data));
565 if (callout_data != 0)
566 {
567 fprintf(outfile, "Callout data = %d\n", callout_data);
568 return callout_data;
569 }
570 }
571
572 return (cb->callout_number != callout_fail_id)? 0 :
573 (++callout_count >= callout_fail_count)? 1 : 0;
574 }
575
576
577 /*************************************************
578 * Local malloc functions *
579 *************************************************/
580
581 /* Alternative malloc function, to test functionality and show the size of the
582 compiled re. */
583
584 static void *new_malloc(size_t size)
585 {
586 void *block = malloc(size);
587 gotten_store = size;
588 if (show_malloc)
589 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
590 return block;
591 }
592
593 static void new_free(void *block)
594 {
595 if (show_malloc)
596 fprintf(outfile, "free %p\n", block);
597 free(block);
598 }
599
600
601 /* For recursion malloc/free, to test stacking calls */
602
603 static void *stack_malloc(size_t size)
604 {
605 void *block = malloc(size);
606 if (show_malloc)
607 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
608 return block;
609 }
610
611 static void stack_free(void *block)
612 {
613 if (show_malloc)
614 fprintf(outfile, "stack_free %p\n", block);
615 free(block);
616 }
617
618
619 /*************************************************
620 * Call pcre_fullinfo() *
621 *************************************************/
622
623 /* Get one piece of information from the pcre_fullinfo() function */
624
625 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
626 {
627 int rc;
628 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
629 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
630 }
631
632
633
634 /*************************************************
635 * Byte flipping function *
636 *************************************************/
637
638 static unsigned long int
639 byteflip(unsigned long int value, int n)
640 {
641 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642 return ((value & 0x000000ff) << 24) |
643 ((value & 0x0000ff00) << 8) |
644 ((value & 0x00ff0000) >> 8) |
645 ((value & 0xff000000) >> 24);
646 }
647
648
649
650
651 /*************************************************
652 * Check match or recursion limit *
653 *************************************************/
654
655 static int
656 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657 int start_offset, int options, int *use_offsets, int use_size_offsets,
658 int flag, unsigned long int *limit, int errnumber, const char *msg)
659 {
660 int count;
661 int min = 0;
662 int mid = 64;
663 int max = -1;
664
665 extra->flags |= flag;
666
667 for (;;)
668 {
669 *limit = mid;
670
671 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672 use_offsets, use_size_offsets);
673
674 if (count == errnumber)
675 {
676 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
677 min = mid;
678 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
679 }
680
681 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682 count == PCRE_ERROR_PARTIAL)
683 {
684 if (mid == min + 1)
685 {
686 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
687 break;
688 }
689 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
690 max = mid;
691 mid = (min + mid)/2;
692 }
693 else break; /* Some other error */
694 }
695
696 extra->flags &= ~flag;
697 return count;
698 }
699
700
701
702 /*************************************************
703 * Case-independent strncmp() function *
704 *************************************************/
705
706 /*
707 Arguments:
708 s first string
709 t second string
710 n number of characters to compare
711
712 Returns: < 0, = 0, or > 0, according to the comparison
713 */
714
715 static int
716 strncmpic(uschar *s, uschar *t, int n)
717 {
718 while (n--)
719 {
720 int c = tolower(*s++) - tolower(*t++);
721 if (c) return c;
722 }
723 return 0;
724 }
725
726
727
728 /*************************************************
729 * Check newline indicator *
730 *************************************************/
731
732 /* This is used both at compile and run-time to check for <xxx> escapes, where
733 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734 no match.
735
736 Arguments:
737 p points after the leading '<'
738 f file for error message
739
740 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
741 */
742
743 static int
744 check_newline(uschar *p, FILE *f)
745 {
746 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753 fprintf(f, "Unknown newline type at: <%s\n", p);
754 return 0;
755 }
756
757
758
759 /*************************************************
760 * Usage function *
761 *************************************************/
762
763 static void
764 usage(void)
765 {
766 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
767 printf("Input and output default to stdin and stdout.\n");
768 #ifdef SUPPORT_LIBREADLINE
769 printf("If input is a terminal, readline() is used to read from it.\n");
770 #else
771 printf("This version of pcretest is not linked with readline().\n");
772 #endif
773 printf("\nOptions:\n");
774 printf(" -b show compiled code (bytecode)\n");
775 printf(" -C show PCRE compile-time options and exit\n");
776 printf(" -d debug: show compiled code and information (-b and -i)\n");
777 #if !defined NODFA
778 printf(" -dfa force DFA matching for all subjects\n");
779 #endif
780 printf(" -help show usage information\n");
781 printf(" -i show information about compiled patterns\n"
782 " -m output memory used information\n"
783 " -o <n> set size of offsets vector to <n>\n");
784 #if !defined NOPOSIX
785 printf(" -p use POSIX interface\n");
786 #endif
787 printf(" -q quiet: do not output PCRE version number at start\n");
788 printf(" -S <n> set stack size to <n> megabytes\n");
789 printf(" -s output store (memory) used information\n"
790 " -t time compilation and execution\n");
791 printf(" -t <n> time compilation and execution, repeating <n> times\n");
792 printf(" -tm time execution (matching) only\n");
793 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
794 }
795
796
797
798 /*************************************************
799 * Main Program *
800 *************************************************/
801
802 /* Read lines from named file or stdin and write to named file or stdout; lines
803 consist of a regular expression, in delimiters and optionally followed by
804 options, followed by a set of test data, terminated by an empty line. */
805
806 int main(int argc, char **argv)
807 {
808 FILE *infile = stdin;
809 int options = 0;
810 int study_options = 0;
811 int op = 1;
812 int timeit = 0;
813 int timeitm = 0;
814 int showinfo = 0;
815 int showstore = 0;
816 int quiet = 0;
817 int size_offsets = 45;
818 int size_offsets_max;
819 int *offsets = NULL;
820 #if !defined NOPOSIX
821 int posix = 0;
822 #endif
823 int debug = 0;
824 int done = 0;
825 int all_use_dfa = 0;
826 int yield = 0;
827 int stack_size;
828
829 /* These vectors store, end-to-end, a list of captured substring names. Assume
830 that 1024 is plenty long enough for the few names we'll be testing. */
831
832 uschar copynames[1024];
833 uschar getnames[1024];
834
835 uschar *copynamesptr;
836 uschar *getnamesptr;
837
838 /* Get buffers from malloc() so that Electric Fence will check their misuse
839 when I am debugging. They grow automatically when very long lines are read. */
840
841 buffer = (unsigned char *)malloc(buffer_size);
842 dbuffer = (unsigned char *)malloc(buffer_size);
843 pbuffer = (unsigned char *)malloc(buffer_size);
844
845 /* The outfile variable is static so that new_malloc can use it. */
846
847 outfile = stdout;
848
849 /* The following _setmode() stuff is some Windows magic that tells its runtime
850 library to translate CRLF into a single LF character. At least, that's what
851 I've been told: never having used Windows I take this all on trust. Originally
852 it set 0x8000, but then I was advised that _O_BINARY was better. */
853
854 #if defined(_WIN32) || defined(WIN32)
855 _setmode( _fileno( stdout ), _O_BINARY );
856 #endif
857
858 /* Scan options */
859
860 while (argc > 1 && argv[op][0] == '-')
861 {
862 unsigned char *endptr;
863
864 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
865 showstore = 1;
866 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867 else if (strcmp(argv[op], "-b") == 0) debug = 1;
868 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
870 #if !defined NODFA
871 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
872 #endif
873 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
874 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
875 *endptr == 0))
876 {
877 op++;
878 argc--;
879 }
880 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
881 {
882 int both = argv[op][2] == 0;
883 int temp;
884 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
885 *endptr == 0))
886 {
887 timeitm = temp;
888 op++;
889 argc--;
890 }
891 else timeitm = LOOPREPEAT;
892 if (both) timeit = timeitm;
893 }
894 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
896 *endptr == 0))
897 {
898 #if defined(_WIN32) || defined(WIN32)
899 printf("PCRE: -S not supported on this OS\n");
900 exit(1);
901 #else
902 int rc;
903 struct rlimit rlim;
904 getrlimit(RLIMIT_STACK, &rlim);
905 rlim.rlim_cur = stack_size * 1024 * 1024;
906 rc = setrlimit(RLIMIT_STACK, &rlim);
907 if (rc != 0)
908 {
909 printf("PCRE: setrlimit() failed with error %d\n", rc);
910 exit(1);
911 }
912 op++;
913 argc--;
914 #endif
915 }
916 #if !defined NOPOSIX
917 else if (strcmp(argv[op], "-p") == 0) posix = 1;
918 #endif
919 else if (strcmp(argv[op], "-C") == 0)
920 {
921 int rc;
922 printf("PCRE version %s\n", pcre_version());
923 printf("Compiled with\n");
924 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
925 printf(" %sUTF-8 support\n", rc? "" : "No ");
926 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927 printf(" %sUnicode properties support\n", rc? "" : "No ");
928 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
930 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931 (rc == -2)? "ANYCRLF" :
932 (rc == -1)? "ANY" : "???");
933 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935 "all Unicode newlines");
936 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937 printf(" Internal link size = %d\n", rc);
938 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939 printf(" POSIX malloc threshold = %d\n", rc);
940 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941 printf(" Default match limit = %d\n", rc);
942 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943 printf(" Default recursion depth limit = %d\n", rc);
944 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
946 goto EXIT;
947 }
948 else if (strcmp(argv[op], "-help") == 0 ||
949 strcmp(argv[op], "--help") == 0)
950 {
951 usage();
952 goto EXIT;
953 }
954 else
955 {
956 printf("** Unknown or malformed option %s\n", argv[op]);
957 usage();
958 yield = 1;
959 goto EXIT;
960 }
961 op++;
962 argc--;
963 }
964
965 /* Get the store for the offsets vector, and remember what it was */
966
967 size_offsets_max = size_offsets;
968 offsets = (int *)malloc(size_offsets_max * sizeof(int));
969 if (offsets == NULL)
970 {
971 printf("** Failed to get %d bytes of memory for offsets vector\n",
972 (int)(size_offsets_max * sizeof(int)));
973 yield = 1;
974 goto EXIT;
975 }
976
977 /* Sort out the input and output files */
978
979 if (argc > 1)
980 {
981 infile = fopen(argv[op], INPUT_MODE);
982 if (infile == NULL)
983 {
984 printf("** Failed to open %s\n", argv[op]);
985 yield = 1;
986 goto EXIT;
987 }
988 }
989
990 if (argc > 2)
991 {
992 outfile = fopen(argv[op+1], OUTPUT_MODE);
993 if (outfile == NULL)
994 {
995 printf("** Failed to open %s\n", argv[op+1]);
996 yield = 1;
997 goto EXIT;
998 }
999 }
1000
1001 /* Set alternative malloc function */
1002
1003 pcre_malloc = new_malloc;
1004 pcre_free = new_free;
1005 pcre_stack_malloc = stack_malloc;
1006 pcre_stack_free = stack_free;
1007
1008 /* Heading line unless quiet, then prompt for first regex if stdin */
1009
1010 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1011
1012 /* Main loop */
1013
1014 while (!done)
1015 {
1016 pcre *re = NULL;
1017 pcre_extra *extra = NULL;
1018
1019 #if !defined NOPOSIX /* There are still compilers that require no indent */
1020 regex_t preg;
1021 int do_posix = 0;
1022 #endif
1023
1024 const char *error;
1025 unsigned char *p, *pp, *ppp;
1026 unsigned char *to_file = NULL;
1027 const unsigned char *tables = NULL;
1028 unsigned long int true_size, true_study_size = 0;
1029 size_t size, regex_gotten_store;
1030 int do_study = 0;
1031 int do_debug = debug;
1032 int do_G = 0;
1033 int do_g = 0;
1034 int do_showinfo = showinfo;
1035 int do_showrest = 0;
1036 int do_flip = 0;
1037 int erroroffset, len, delimiter, poffset;
1038
1039 use_utf8 = 0;
1040 debug_lengths = 1;
1041
1042 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1043 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044 fflush(outfile);
1045
1046 p = buffer;
1047 while (isspace(*p)) p++;
1048 if (*p == 0) continue;
1049
1050 /* See if the pattern is to be loaded pre-compiled from a file. */
1051
1052 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1053 {
1054 unsigned long int magic, get_options;
1055 uschar sbuf[8];
1056 FILE *f;
1057
1058 p++;
1059 pp = p + (int)strlen((char *)p);
1060 while (isspace(pp[-1])) pp--;
1061 *pp = 0;
1062
1063 f = fopen((char *)p, "rb");
1064 if (f == NULL)
1065 {
1066 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1067 continue;
1068 }
1069
1070 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1071
1072 true_size =
1073 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1074 true_study_size =
1075 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1076
1077 re = (real_pcre *)new_malloc(true_size);
1078 regex_gotten_store = gotten_store;
1079
1080 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1081
1082 magic = ((real_pcre *)re)->magic_number;
1083 if (magic != MAGIC_NUMBER)
1084 {
1085 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1086 {
1087 do_flip = 1;
1088 }
1089 else
1090 {
1091 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1092 fclose(f);
1093 continue;
1094 }
1095 }
1096
1097 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1098 do_flip? " (byte-inverted)" : "", p);
1099
1100 /* Need to know if UTF-8 for printing data strings */
1101
1102 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103 use_utf8 = (get_options & PCRE_UTF8) != 0;
1104
1105 /* Now see if there is any following study data */
1106
1107 if (true_study_size != 0)
1108 {
1109 pcre_study_data *psd;
1110
1111 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1112 extra->flags = PCRE_EXTRA_STUDY_DATA;
1113
1114 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1115 extra->study_data = psd;
1116
1117 if (fread(psd, 1, true_study_size, f) != true_study_size)
1118 {
1119 FAIL_READ:
1120 fprintf(outfile, "Failed to read data from %s\n", p);
1121 if (extra != NULL) new_free(extra);
1122 if (re != NULL) new_free(re);
1123 fclose(f);
1124 continue;
1125 }
1126 fprintf(outfile, "Study data loaded from %s\n", p);
1127 do_study = 1; /* To get the data output if requested */
1128 }
1129 else fprintf(outfile, "No study data\n");
1130
1131 fclose(f);
1132 goto SHOW_INFO;
1133 }
1134
1135 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1136 the pattern; if is isn't complete, read more. */
1137
1138 delimiter = *p++;
1139
1140 if (isalnum(delimiter) || delimiter == '\\')
1141 {
1142 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143 goto SKIP_DATA;
1144 }
1145
1146 pp = p;
1147 poffset = p - buffer;
1148
1149 for(;;)
1150 {
1151 while (*pp != 0)
1152 {
1153 if (*pp == '\\' && pp[1] != 0) pp++;
1154 else if (*pp == delimiter) break;
1155 pp++;
1156 }
1157 if (*pp != 0) break;
1158 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1159 {
1160 fprintf(outfile, "** Unexpected EOF\n");
1161 done = 1;
1162 goto CONTINUE;
1163 }
1164 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1165 }
1166
1167 /* The buffer may have moved while being extended; reset the start of data
1168 pointer to the correct relative point in the buffer. */
1169
1170 p = buffer + poffset;
1171
1172 /* If the first character after the delimiter is backslash, make
1173 the pattern end with backslash. This is purely to provide a way
1174 of testing for the error message when a pattern ends with backslash. */
1175
1176 if (pp[1] == '\\') *pp++ = '\\';
1177
1178 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1179 for callouts. */
1180
1181 *pp++ = 0;
1182 strcpy((char *)pbuffer, (char *)p);
1183
1184 /* Look for options after final delimiter */
1185
1186 options = 0;
1187 study_options = 0;
1188 log_store = showstore; /* default from command line */
1189
1190 while (*pp != 0)
1191 {
1192 switch (*pp++)
1193 {
1194 case 'f': options |= PCRE_FIRSTLINE; break;
1195 case 'g': do_g = 1; break;
1196 case 'i': options |= PCRE_CASELESS; break;
1197 case 'm': options |= PCRE_MULTILINE; break;
1198 case 's': options |= PCRE_DOTALL; break;
1199 case 'x': options |= PCRE_EXTENDED; break;
1200
1201 case '+': do_showrest = 1; break;
1202 case 'A': options |= PCRE_ANCHORED; break;
1203 case 'B': do_debug = 1; break;
1204 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205 case 'D': do_debug = do_showinfo = 1; break;
1206 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207 case 'F': do_flip = 1; break;
1208 case 'G': do_G = 1; break;
1209 case 'I': do_showinfo = 1; break;
1210 case 'J': options |= PCRE_DUPNAMES; break;
1211 case 'M': log_store = 1; break;
1212 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1213
1214 #if !defined NOPOSIX
1215 case 'P': do_posix = 1; break;
1216 #endif
1217
1218 case 'S': do_study = 1; break;
1219 case 'U': options |= PCRE_UNGREEDY; break;
1220 case 'X': options |= PCRE_EXTRA; break;
1221 case 'Z': debug_lengths = 0; break;
1222 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224
1225 case 'L':
1226 ppp = pp;
1227 /* The '\r' test here is so that it works on Windows. */
1228 /* The '0' test is just in case this is an unterminated line. */
1229 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1230 *ppp = 0;
1231 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1232 {
1233 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1234 goto SKIP_DATA;
1235 }
1236 locale_set = 1;
1237 tables = pcre_maketables();
1238 pp = ppp;
1239 break;
1240
1241 case '>':
1242 to_file = pp;
1243 while (*pp != 0) pp++;
1244 while (isspace(pp[-1])) pp--;
1245 *pp = 0;
1246 break;
1247
1248 case '<':
1249 {
1250 if (strncmp((char *)pp, "JS>", 3) == 0)
1251 {
1252 options |= PCRE_JAVASCRIPT_COMPAT;
1253 pp += 3;
1254 }
1255 else
1256 {
1257 int x = check_newline(pp, outfile);
1258 if (x == 0) goto SKIP_DATA;
1259 options |= x;
1260 while (*pp++ != '>');
1261 }
1262 }
1263 break;
1264
1265 case '\r': /* So that it works in Windows */
1266 case '\n':
1267 case ' ':
1268 break;
1269
1270 default:
1271 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1272 goto SKIP_DATA;
1273 }
1274 }
1275
1276 /* Handle compiling via the POSIX interface, which doesn't support the
1277 timing, showing, or debugging options, nor the ability to pass over
1278 local character tables. */
1279
1280 #if !defined NOPOSIX
1281 if (posix || do_posix)
1282 {
1283 int rc;
1284 int cflags = 0;
1285
1286 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1287 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1288 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1289 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1290 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1291
1292 rc = regcomp(&preg, (char *)p, cflags);
1293
1294 /* Compilation failed; go back for another re, skipping to blank line
1295 if non-interactive. */
1296
1297 if (rc != 0)
1298 {
1299 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1300 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1301 goto SKIP_DATA;
1302 }
1303 }
1304
1305 /* Handle compiling via the native interface */
1306
1307 else
1308 #endif /* !defined NOPOSIX */
1309
1310 {
1311 if (timeit > 0)
1312 {
1313 register int i;
1314 clock_t time_taken;
1315 clock_t start_time = clock();
1316 for (i = 0; i < timeit; i++)
1317 {
1318 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1319 if (re != NULL) free(re);
1320 }
1321 time_taken = clock() - start_time;
1322 fprintf(outfile, "Compile time %.4f milliseconds\n",
1323 (((double)time_taken * 1000.0) / (double)timeit) /
1324 (double)CLOCKS_PER_SEC);
1325 }
1326
1327 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1328
1329 /* Compilation failed; go back for another re, skipping to blank line
1330 if non-interactive. */
1331
1332 if (re == NULL)
1333 {
1334 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1335 SKIP_DATA:
1336 if (infile != stdin)
1337 {
1338 for (;;)
1339 {
1340 if (extend_inputline(infile, buffer, NULL) == NULL)
1341 {
1342 done = 1;
1343 goto CONTINUE;
1344 }
1345 len = (int)strlen((char *)buffer);
1346 while (len > 0 && isspace(buffer[len-1])) len--;
1347 if (len == 0) break;
1348 }
1349 fprintf(outfile, "\n");
1350 }
1351 goto CONTINUE;
1352 }
1353
1354 /* Compilation succeeded; print data if required. There are now two
1355 info-returning functions. The old one has a limited interface and
1356 returns only limited data. Check that it agrees with the newer one. */
1357
1358 if (log_store)
1359 fprintf(outfile, "Memory allocation (code space): %d\n",
1360 (int)(gotten_store -
1361 sizeof(real_pcre) -
1362 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1363
1364 /* Extract the size for possible writing before possibly flipping it,
1365 and remember the store that was got. */
1366
1367 true_size = ((real_pcre *)re)->size;
1368 regex_gotten_store = gotten_store;
1369
1370 /* If /S was present, study the regexp to generate additional info to
1371 help with the matching. */
1372
1373 if (do_study)
1374 {
1375 if (timeit > 0)
1376 {
1377 register int i;
1378 clock_t time_taken;
1379 clock_t start_time = clock();
1380 for (i = 0; i < timeit; i++)
1381 extra = pcre_study(re, study_options, &error);
1382 time_taken = clock() - start_time;
1383 if (extra != NULL) free(extra);
1384 fprintf(outfile, " Study time %.4f milliseconds\n",
1385 (((double)time_taken * 1000.0) / (double)timeit) /
1386 (double)CLOCKS_PER_SEC);
1387 }
1388 extra = pcre_study(re, study_options, &error);
1389 if (error != NULL)
1390 fprintf(outfile, "Failed to study: %s\n", error);
1391 else if (extra != NULL)
1392 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1393 }
1394
1395 /* If the 'F' option was present, we flip the bytes of all the integer
1396 fields in the regex data block and the study block. This is to make it
1397 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1398 compiled on a different architecture. */
1399
1400 if (do_flip)
1401 {
1402 real_pcre *rre = (real_pcre *)re;
1403 rre->magic_number =
1404 byteflip(rre->magic_number, sizeof(rre->magic_number));
1405 rre->size = byteflip(rre->size, sizeof(rre->size));
1406 rre->options = byteflip(rre->options, sizeof(rre->options));
1407 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1408 rre->top_bracket =
1409 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1410 rre->top_backref =
1411 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1412 rre->first_byte =
1413 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1414 rre->req_byte =
1415 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1416 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1417 sizeof(rre->name_table_offset));
1418 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1419 sizeof(rre->name_entry_size));
1420 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1421 sizeof(rre->name_count));
1422
1423 if (extra != NULL)
1424 {
1425 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1426 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1427 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1428 }
1429 }
1430
1431 /* Extract information from the compiled data if required */
1432
1433 SHOW_INFO:
1434
1435 if (do_debug)
1436 {
1437 fprintf(outfile, "------------------------------------------------------------------\n");
1438 pcre_printint(re, outfile, debug_lengths);
1439 }
1440
1441 if (do_showinfo)
1442 {
1443 unsigned long int get_options, all_options;
1444 #if !defined NOINFOCHECK
1445 int old_first_char, old_options, old_count;
1446 #endif
1447 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1448 hascrorlf;
1449 int nameentrysize, namecount;
1450 const uschar *nametable;
1451
1452 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1453 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1454 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1455 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1456 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1457 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1458 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1459 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1460 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1461 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1462 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1463 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1464
1465 #if !defined NOINFOCHECK
1466 old_count = pcre_info(re, &old_options, &old_first_char);
1467 if (count < 0) fprintf(outfile,
1468 "Error %d from pcre_info()\n", count);
1469 else
1470 {
1471 if (old_count != count) fprintf(outfile,
1472 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1473 old_count);
1474
1475 if (old_first_char != first_char) fprintf(outfile,
1476 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1477 first_char, old_first_char);
1478
1479 if (old_options != (int)get_options) fprintf(outfile,
1480 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1481 get_options, old_options);
1482 }
1483 #endif
1484
1485 if (size != regex_gotten_store) fprintf(outfile,
1486 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1487 (int)size, (int)regex_gotten_store);
1488
1489 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1490 if (backrefmax > 0)
1491 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1492
1493 if (namecount > 0)
1494 {
1495 fprintf(outfile, "Named capturing subpatterns:\n");
1496 while (namecount-- > 0)
1497 {
1498 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1499 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1500 GET2(nametable, 0));
1501 nametable += nameentrysize;
1502 }
1503 }
1504
1505 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1506 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1507
1508 all_options = ((real_pcre *)re)->options;
1509 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1510
1511 if (get_options == 0) fprintf(outfile, "No options\n");
1512 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1513 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1514 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1515 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1516 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1517 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1518 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1519 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1520 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1521 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1522 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1523 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1524 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1525 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1526 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1527 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1528
1529 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1530
1531 switch (get_options & PCRE_NEWLINE_BITS)
1532 {
1533 case PCRE_NEWLINE_CR:
1534 fprintf(outfile, "Forced newline sequence: CR\n");
1535 break;
1536
1537 case PCRE_NEWLINE_LF:
1538 fprintf(outfile, "Forced newline sequence: LF\n");
1539 break;
1540
1541 case PCRE_NEWLINE_CRLF:
1542 fprintf(outfile, "Forced newline sequence: CRLF\n");
1543 break;
1544
1545 case PCRE_NEWLINE_ANYCRLF:
1546 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1547 break;
1548
1549 case PCRE_NEWLINE_ANY:
1550 fprintf(outfile, "Forced newline sequence: ANY\n");
1551 break;
1552
1553 default:
1554 break;
1555 }
1556
1557 if (first_char == -1)
1558 {
1559 fprintf(outfile, "First char at start or follows newline\n");
1560 }
1561 else if (first_char < 0)
1562 {
1563 fprintf(outfile, "No first char\n");
1564 }
1565 else
1566 {
1567 int ch = first_char & 255;
1568 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1569 "" : " (caseless)";
1570 if (PRINTHEX(ch))
1571 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1572 else
1573 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1574 }
1575
1576 if (need_char < 0)
1577 {
1578 fprintf(outfile, "No need char\n");
1579 }
1580 else
1581 {
1582 int ch = need_char & 255;
1583 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1584 "" : " (caseless)";
1585 if (PRINTHEX(ch))
1586 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1587 else
1588 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1589 }
1590
1591 /* Don't output study size; at present it is in any case a fixed
1592 value, but it varies, depending on the computer architecture, and
1593 so messes up the test suite. (And with the /F option, it might be
1594 flipped.) */
1595
1596 if (do_study)
1597 {
1598 if (extra == NULL)
1599 fprintf(outfile, "Study returned NULL\n");
1600 else
1601 {
1602 uschar *start_bits = NULL;
1603 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1604
1605 if (start_bits == NULL)
1606 fprintf(outfile, "No starting byte set\n");
1607 else
1608 {
1609 int i;
1610 int c = 24;
1611 fprintf(outfile, "Starting byte set: ");
1612 for (i = 0; i < 256; i++)
1613 {
1614 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1615 {
1616 if (c > 75)
1617 {
1618 fprintf(outfile, "\n ");
1619 c = 2;
1620 }
1621 if (PRINTHEX(i) && i != ' ')
1622 {
1623 fprintf(outfile, "%c ", i);
1624 c += 2;
1625 }
1626 else
1627 {
1628 fprintf(outfile, "\\x%02x ", i);
1629 c += 5;
1630 }
1631 }
1632 }
1633 fprintf(outfile, "\n");
1634 }
1635 }
1636 }
1637 }
1638
1639 /* If the '>' option was present, we write out the regex to a file, and
1640 that is all. The first 8 bytes of the file are the regex length and then
1641 the study length, in big-endian order. */
1642
1643 if (to_file != NULL)
1644 {
1645 FILE *f = fopen((char *)to_file, "wb");
1646 if (f == NULL)
1647 {
1648 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1649 }
1650 else
1651 {
1652 uschar sbuf[8];
1653 sbuf[0] = (uschar)((true_size >> 24) & 255);
1654 sbuf[1] = (uschar)((true_size >> 16) & 255);
1655 sbuf[2] = (uschar)((true_size >> 8) & 255);
1656 sbuf[3] = (uschar)((true_size) & 255);
1657
1658 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1659 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1660 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1661 sbuf[7] = (uschar)((true_study_size) & 255);
1662
1663 if (fwrite(sbuf, 1, 8, f) < 8 ||
1664 fwrite(re, 1, true_size, f) < true_size)
1665 {
1666 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1667 }
1668 else
1669 {
1670 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1671 if (extra != NULL)
1672 {
1673 if (fwrite(extra->study_data, 1, true_study_size, f) <
1674 true_study_size)
1675 {
1676 fprintf(outfile, "Write error on %s: %s\n", to_file,
1677 strerror(errno));
1678 }
1679 else fprintf(outfile, "Study data written to %s\n", to_file);
1680
1681 }
1682 }
1683 fclose(f);
1684 }
1685
1686 new_free(re);
1687 if (extra != NULL) new_free(extra);
1688 if (tables != NULL) new_free((void *)tables);
1689 continue; /* With next regex */
1690 }
1691 } /* End of non-POSIX compile */
1692
1693 /* Read data lines and test them */
1694
1695 for (;;)
1696 {
1697 uschar *q;
1698 uschar *bptr;
1699 int *use_offsets = offsets;
1700 int use_size_offsets = size_offsets;
1701 int callout_data = 0;
1702 int callout_data_set = 0;
1703 int count, c;
1704 int copystrings = 0;
1705 int find_match_limit = 0;
1706 int getstrings = 0;
1707 int getlist = 0;
1708 int gmatched = 0;
1709 int start_offset = 0;
1710 int g_notempty = 0;
1711 int use_dfa = 0;
1712
1713 options = 0;
1714
1715 *copynames = 0;
1716 *getnames = 0;
1717
1718 copynamesptr = copynames;
1719 getnamesptr = getnames;
1720
1721 pcre_callout = callout;
1722 first_callout = 1;
1723 callout_extra = 0;
1724 callout_count = 0;
1725 callout_fail_count = 999999;
1726 callout_fail_id = -1;
1727 show_malloc = 0;
1728
1729 if (extra != NULL) extra->flags &=
1730 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1731
1732 len = 0;
1733 for (;;)
1734 {
1735 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1736 {
1737 if (len > 0) break;
1738 done = 1;
1739 goto CONTINUE;
1740 }
1741 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1742 len = (int)strlen((char *)buffer);
1743 if (buffer[len-1] == '\n') break;
1744 }
1745
1746 while (len > 0 && isspace(buffer[len-1])) len--;
1747 buffer[len] = 0;
1748 if (len == 0) break;
1749
1750 p = buffer;
1751 while (isspace(*p)) p++;
1752
1753 bptr = q = dbuffer;
1754 while ((c = *p++) != 0)
1755 {
1756 int i = 0;
1757 int n = 0;
1758
1759 if (c == '\\') switch ((c = *p++))
1760 {
1761 case 'a': c = 7; break;
1762 case 'b': c = '\b'; break;
1763 case 'e': c = 27; break;
1764 case 'f': c = '\f'; break;
1765 case 'n': c = '\n'; break;
1766 case 'r': c = '\r'; break;
1767 case 't': c = '\t'; break;
1768 case 'v': c = '\v'; break;
1769
1770 case '0': case '1': case '2': case '3':
1771 case '4': case '5': case '6': case '7':
1772 c -= '0';
1773 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1774 c = c * 8 + *p++ - '0';
1775
1776 #if !defined NOUTF8
1777 if (use_utf8 && c > 255)
1778 {
1779 unsigned char buff8[8];
1780 int ii, utn;
1781 utn = ord2utf8(c, buff8);
1782 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1783 c = buff8[ii]; /* Last byte */
1784 }
1785 #endif
1786 break;
1787
1788 case 'x':
1789
1790 /* Handle \x{..} specially - new Perl thing for utf8 */
1791
1792 #if !defined NOUTF8
1793 if (*p == '{')
1794 {
1795 unsigned char *pt = p;
1796 c = 0;
1797 while (isxdigit(*(++pt)))
1798 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1799 if (*pt == '}')
1800 {
1801 unsigned char buff8[8];
1802 int ii, utn;
1803 utn = ord2utf8(c, buff8);
1804 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1805 c = buff8[ii]; /* Last byte */
1806 p = pt + 1;
1807 break;
1808 }
1809 /* Not correct form; fall through */
1810 }
1811 #endif
1812
1813 /* Ordinary \x */
1814
1815 c = 0;
1816 while (i++ < 2 && isxdigit(*p))
1817 {
1818 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1819 p++;
1820 }
1821 break;
1822
1823 case 0: /* \ followed by EOF allows for an empty line */
1824 p--;
1825 continue;
1826
1827 case '>':
1828 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1829 continue;
1830
1831 case 'A': /* Option setting */
1832 options |= PCRE_ANCHORED;
1833 continue;
1834
1835 case 'B':
1836 options |= PCRE_NOTBOL;
1837 continue;
1838
1839 case 'C':
1840 if (isdigit(*p)) /* Set copy string */
1841 {
1842 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1843 copystrings |= 1 << n;
1844 }
1845 else if (isalnum(*p))
1846 {
1847 uschar *npp = copynamesptr;
1848 while (isalnum(*p)) *npp++ = *p++;
1849 *npp++ = 0;
1850 *npp = 0;
1851 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1852 if (n < 0)
1853 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1854 copynamesptr = npp;
1855 }
1856 else if (*p == '+')
1857 {
1858 callout_extra = 1;
1859 p++;
1860 }
1861 else if (*p == '-')
1862 {
1863 pcre_callout = NULL;
1864 p++;
1865 }
1866 else if (*p == '!')
1867 {
1868 callout_fail_id = 0;
1869 p++;
1870 while(isdigit(*p))
1871 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1872 callout_fail_count = 0;
1873 if (*p == '!')
1874 {
1875 p++;
1876 while(isdigit(*p))
1877 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1878 }
1879 }
1880 else if (*p == '*')
1881 {
1882 int sign = 1;
1883 callout_data = 0;
1884 if (*(++p) == '-') { sign = -1; p++; }
1885 while(isdigit(*p))
1886 callout_data = callout_data * 10 + *p++ - '0';
1887 callout_data *= sign;
1888 callout_data_set = 1;
1889 }
1890 continue;
1891
1892 #if !defined NODFA
1893 case 'D':
1894 #if !defined NOPOSIX
1895 if (posix || do_posix)
1896 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1897 else
1898 #endif
1899 use_dfa = 1;
1900 continue;
1901
1902 case 'F':
1903 options |= PCRE_DFA_SHORTEST;
1904 continue;
1905 #endif
1906
1907 case 'G':
1908 if (isdigit(*p))
1909 {
1910 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1911 getstrings |= 1 << n;
1912 }
1913 else if (isalnum(*p))
1914 {
1915 uschar *npp = getnamesptr;
1916 while (isalnum(*p)) *npp++ = *p++;
1917 *npp++ = 0;
1918 *npp = 0;
1919 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1920 if (n < 0)
1921 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1922 getnamesptr = npp;
1923 }
1924 continue;
1925
1926 case 'L':
1927 getlist = 1;
1928 continue;
1929
1930 case 'M':
1931 find_match_limit = 1;
1932 continue;
1933
1934 case 'N':
1935 options |= PCRE_NOTEMPTY;
1936 continue;
1937
1938 case 'O':
1939 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1940 if (n > size_offsets_max)
1941 {
1942 size_offsets_max = n;
1943 free(offsets);
1944 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1945 if (offsets == NULL)
1946 {
1947 printf("** Failed to get %d bytes of memory for offsets vector\n",
1948 (int)(size_offsets_max * sizeof(int)));
1949 yield = 1;
1950 goto EXIT;
1951 }
1952 }
1953 use_size_offsets = n;
1954 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1955 continue;
1956
1957 case 'P':
1958 options |= PCRE_PARTIAL;
1959 continue;
1960
1961 case 'Q':
1962 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1963 if (extra == NULL)
1964 {
1965 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1966 extra->flags = 0;
1967 }
1968 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1969 extra->match_limit_recursion = n;
1970 continue;
1971
1972 case 'q':
1973 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1974 if (extra == NULL)
1975 {
1976 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1977 extra->flags = 0;
1978 }
1979 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1980 extra->match_limit = n;
1981 continue;
1982
1983 #if !defined NODFA
1984 case 'R':
1985 options |= PCRE_DFA_RESTART;
1986 continue;
1987 #endif
1988
1989 case 'S':
1990 show_malloc = 1;
1991 continue;
1992
1993 case 'Z':
1994 options |= PCRE_NOTEOL;
1995 continue;
1996
1997 case '?':
1998 options |= PCRE_NO_UTF8_CHECK;
1999 continue;
2000
2001 case '<':
2002 {
2003 int x = check_newline(p, outfile);
2004 if (x == 0) goto NEXT_DATA;
2005 options |= x;
2006 while (*p++ != '>');
2007 }
2008 continue;
2009 }
2010 *q++ = c;
2011 }
2012 *q = 0;
2013 len = q - dbuffer;
2014
2015 if ((all_use_dfa || use_dfa) && find_match_limit)
2016 {
2017 printf("**Match limit not relevant for DFA matching: ignored\n");
2018 find_match_limit = 0;
2019 }
2020
2021 /* Handle matching via the POSIX interface, which does not
2022 support timing or playing with the match limit or callout data. */
2023
2024 #if !defined NOPOSIX
2025 if (posix || do_posix)
2026 {
2027 int rc;
2028 int eflags = 0;
2029 regmatch_t *pmatch = NULL;
2030 if (use_size_offsets > 0)
2031 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2032 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2033 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2034
2035 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2036
2037 if (rc != 0)
2038 {
2039 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2040 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2041 }
2042 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2043 != 0)
2044 {
2045 fprintf(outfile, "Matched with REG_NOSUB\n");
2046 }
2047 else
2048 {
2049 size_t i;
2050 for (i = 0; i < (size_t)use_size_offsets; i++)
2051 {
2052 if (pmatch[i].rm_so >= 0)
2053 {
2054 fprintf(outfile, "%2d: ", (int)i);
2055 (void)pchars(dbuffer + pmatch[i].rm_so,
2056 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2057 fprintf(outfile, "\n");
2058 if (i == 0 && do_showrest)
2059 {
2060 fprintf(outfile, " 0+ ");
2061 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2062 outfile);
2063 fprintf(outfile, "\n");
2064 }
2065 }
2066 }
2067 }
2068 free(pmatch);
2069 }
2070
2071 /* Handle matching via the native interface - repeats for /g and /G */
2072
2073 else
2074 #endif /* !defined NOPOSIX */
2075
2076 for (;; gmatched++) /* Loop for /g or /G */
2077 {
2078 if (timeitm > 0)
2079 {
2080 register int i;
2081 clock_t time_taken;
2082 clock_t start_time = clock();
2083
2084 #if !defined NODFA
2085 if (all_use_dfa || use_dfa)
2086 {
2087 int workspace[1000];
2088 for (i = 0; i < timeitm; i++)
2089 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2090 options | g_notempty, use_offsets, use_size_offsets, workspace,
2091 sizeof(workspace)/sizeof(int));
2092 }
2093 else
2094 #endif
2095
2096 for (i = 0; i < timeitm; i++)
2097 count = pcre_exec(re, extra, (char *)bptr, len,
2098 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2099
2100 time_taken = clock() - start_time;
2101 fprintf(outfile, "Execute time %.4f milliseconds\n",
2102 (((double)time_taken * 1000.0) / (double)timeitm) /
2103 (double)CLOCKS_PER_SEC);
2104 }
2105
2106 /* If find_match_limit is set, we want to do repeated matches with
2107 varying limits in order to find the minimum value for the match limit and
2108 for the recursion limit. */
2109
2110 if (find_match_limit)
2111 {
2112 if (extra == NULL)
2113 {
2114 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2115 extra->flags = 0;
2116 }
2117
2118 (void)check_match_limit(re, extra, bptr, len, start_offset,
2119 options|g_notempty, use_offsets, use_size_offsets,
2120 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2121 PCRE_ERROR_MATCHLIMIT, "match()");
2122
2123 count = check_match_limit(re, extra, bptr, len, start_offset,
2124 options|g_notempty, use_offsets, use_size_offsets,
2125 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2126 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2127 }
2128
2129 /* If callout_data is set, use the interface with additional data */
2130
2131 else if (callout_data_set)
2132 {
2133 if (extra == NULL)
2134 {
2135 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2136 extra->flags = 0;
2137 }
2138 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2139 extra->callout_data = &callout_data;
2140 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2141 options | g_notempty, use_offsets, use_size_offsets);
2142 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2143 }
2144
2145 /* The normal case is just to do the match once, with the default
2146 value of match_limit. */
2147
2148 #if !defined NODFA
2149 else if (all_use_dfa || use_dfa)
2150 {
2151 int workspace[1000];
2152 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2153 options | g_notempty, use_offsets, use_size_offsets, workspace,
2154 sizeof(workspace)/sizeof(int));
2155 if (count == 0)
2156 {
2157 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2158 count = use_size_offsets/2;
2159 }
2160 }
2161 #endif
2162
2163 else
2164 {
2165 count = pcre_exec(re, extra, (char *)bptr, len,
2166 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2167 if (count == 0)
2168 {
2169 fprintf(outfile, "Matched, but too many substrings\n");
2170 count = use_size_offsets/3;
2171 }
2172 }
2173
2174 /* Matched */
2175
2176 if (count >= 0)
2177 {
2178 int i, maxcount;
2179
2180 #if !defined NODFA
2181 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2182 #endif
2183 maxcount = use_size_offsets/3;
2184
2185 /* This is a check against a lunatic return value. */
2186
2187 if (count > maxcount)
2188 {
2189 fprintf(outfile,
2190 "** PCRE error: returned count %d is too big for offset size %d\n",
2191 count, use_size_offsets);
2192 count = use_size_offsets/3;
2193 if (do_g || do_G)
2194 {
2195 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2196 do_g = do_G = FALSE; /* Break g/G loop */
2197 }
2198 }
2199
2200 for (i = 0; i < count * 2; i += 2)
2201 {
2202 if (use_offsets[i] < 0)
2203 fprintf(outfile, "%2d: <unset>\n", i/2);
2204 else
2205 {
2206 fprintf(outfile, "%2d: ", i/2);
2207 (void)pchars(bptr + use_offsets[i],
2208 use_offsets[i+1] - use_offsets[i], outfile);
2209 fprintf(outfile, "\n");
2210 if (i == 0)
2211 {
2212 if (do_showrest)
2213 {
2214 fprintf(outfile, " 0+ ");
2215 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2216 outfile);
2217 fprintf(outfile, "\n");
2218 }
2219 }
2220 }
2221 }
2222
2223 for (i = 0; i < 32; i++)
2224 {
2225 if ((copystrings & (1 << i)) != 0)
2226 {
2227 char copybuffer[256];
2228 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2229 i, copybuffer, sizeof(copybuffer));
2230 if (rc < 0)
2231 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2232 else
2233 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2234 }
2235 }
2236
2237 for (copynamesptr = copynames;
2238 *copynamesptr != 0;
2239 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2240 {
2241 char copybuffer[256];
2242 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2243 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2244 if (rc < 0)
2245 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2246 else
2247 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2248 }
2249
2250 for (i = 0; i < 32; i++)
2251 {
2252 if ((getstrings & (1 << i)) != 0)
2253 {
2254 const char *substring;
2255 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2256 i, &substring);
2257 if (rc < 0)
2258 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2259 else
2260 {
2261 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2262 pcre_free_substring(substring);
2263 }
2264 }
2265 }
2266
2267 for (getnamesptr = getnames;
2268 *getnamesptr != 0;
2269 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2270 {
2271 const char *substring;
2272 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2273 count, (char *)getnamesptr, &substring);
2274 if (rc < 0)
2275 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2276 else
2277 {
2278 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2279 pcre_free_substring(substring);
2280 }
2281 }
2282
2283 if (getlist)
2284 {
2285 const char **stringlist;
2286 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2287 &stringlist);
2288 if (rc < 0)
2289 fprintf(outfile, "get substring list failed %d\n", rc);
2290 else
2291 {
2292 for (i = 0; i < count; i++)
2293 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2294 if (stringlist[i] != NULL)
2295 fprintf(outfile, "string list not terminated by NULL\n");
2296 /* free((void *)stringlist); */
2297 pcre_free_substring_list(stringlist);
2298 }
2299 }
2300 }
2301
2302 /* There was a partial match */
2303
2304 else if (count == PCRE_ERROR_PARTIAL)
2305 {
2306 fprintf(outfile, "Partial match");
2307 #if !defined NODFA
2308 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2309 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2310 bptr + use_offsets[0]);
2311 #endif
2312 fprintf(outfile, "\n");
2313 break; /* Out of the /g loop */
2314 }
2315
2316 /* Failed to match. If this is a /g or /G loop and we previously set
2317 g_notempty after a null match, this is not necessarily the end. We want
2318 to advance the start offset, and continue. We won't be at the end of the
2319 string - that was checked before setting g_notempty.
2320
2321 Complication arises in the case when the newline option is "any" or
2322 "anycrlf". If the previous match was at the end of a line terminated by
2323 CRLF, an advance of one character just passes the \r, whereas we should
2324 prefer the longer newline sequence, as does the code in pcre_exec().
2325 Fudge the offset value to achieve this.
2326
2327 Otherwise, in the case of UTF-8 matching, the advance must be one
2328 character, not one byte. */
2329
2330 else
2331 {
2332 if (g_notempty != 0)
2333 {
2334 int onechar = 1;
2335 unsigned int obits = ((real_pcre *)re)->options;
2336 use_offsets[0] = start_offset;
2337 if ((obits & PCRE_NEWLINE_BITS) == 0)
2338 {
2339 int d;
2340 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2341 obits = (d == '\r')? PCRE_NEWLINE_CR :
2342 (d == '\n')? PCRE_NEWLINE_LF :
2343 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2344 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2345 (d == -1)? PCRE_NEWLINE_ANY : 0;
2346 }
2347 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2348 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2349 &&
2350 start_offset < len - 1 &&
2351 bptr[start_offset] == '\r' &&
2352 bptr[start_offset+1] == '\n')
2353 onechar++;
2354 else if (use_utf8)
2355 {
2356 while (start_offset + onechar < len)
2357 {
2358 int tb = bptr[start_offset+onechar];
2359 if (tb <= 127) break;
2360 tb &= 0xc0;
2361 if (tb != 0 && tb != 0xc0) onechar++;
2362 }
2363 }
2364 use_offsets[1] = start_offset + onechar;
2365 }
2366 else
2367 {
2368 if (count == PCRE_ERROR_NOMATCH)
2369 {
2370 if (gmatched == 0) fprintf(outfile, "No match\n");
2371 }
2372 else fprintf(outfile, "Error %d\n", count);
2373 break; /* Out of the /g loop */
2374 }
2375 }
2376
2377 /* If not /g or /G we are done */
2378
2379 if (!do_g && !do_G) break;
2380
2381 /* If we have matched an empty string, first check to see if we are at
2382 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2383 what Perl's /g options does. This turns out to be rather cunning. First
2384 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2385 same point. If this fails (picked up above) we advance to the next
2386 character. */
2387
2388 g_notempty = 0;
2389
2390 if (use_offsets[0] == use_offsets[1])
2391 {
2392 if (use_offsets[0] == len) break;
2393 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2394 }
2395
2396 /* For /g, update the start offset, leaving the rest alone */
2397
2398 if (do_g) start_offset = use_offsets[1];
2399
2400 /* For /G, update the pointer and length */
2401
2402 else
2403 {
2404 bptr += use_offsets[1];
2405 len -= use_offsets[1];
2406 }
2407 } /* End of loop for /g and /G */
2408
2409 NEXT_DATA: continue;
2410 } /* End of loop for data lines */
2411
2412 CONTINUE:
2413
2414 #if !defined NOPOSIX
2415 if (posix || do_posix) regfree(&preg);
2416 #endif
2417
2418 if (re != NULL) new_free(re);
2419 if (extra != NULL) new_free(extra);
2420 if (tables != NULL)
2421 {
2422 new_free((void *)tables);
2423 setlocale(LC_CTYPE, "C");
2424 locale_set = 0;
2425 }
2426 }
2427
2428 if (infile == stdin) fprintf(outfile, "\n");
2429
2430 EXIT:
2431
2432 if (infile != NULL && infile != stdin) fclose(infile);
2433 if (outfile != NULL && outfile != stdout) fclose(outfile);
2434
2435 free(buffer);
2436 free(dbuffer);
2437 free(pbuffer);
2438 free(offsets);
2439
2440 return yield;
2441 }
2442
2443 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12