/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 376 - (show annotations) (download)
Sun Mar 1 12:00:59 2009 UTC (5 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 72504 byte(s)
Bug fixes: forgot to commit these individually.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #define isatty _isatty /* This is what Windows calls them, I'm told */
75 #define fileno _fileno
76
77 #else
78 #include <sys/time.h> /* These two includes are needed */
79 #include <sys/resource.h> /* for setrlimit(). */
80 #define INPUT_MODE "rb"
81 #define OUTPUT_MODE "wb"
82 #endif
83
84
85 /* We have to include pcre_internal.h because we need the internal info for
86 displaying the results of pcre_study() and we also need to know about the
87 internal macros, structures, and other internal data values; pcretest has
88 "inside information" compared to a program that strictly follows the PCRE API.
89
90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92 appropriately for an application, not for building PCRE. */
93
94 #include "pcre.h"
95 #include "pcre_internal.h"
96
97 /* We need access to some of the data tables that PCRE uses. So as not to have
98 to keep two copies, we include the source file here, changing the names of the
99 external symbols to prevent clashes. */
100
101 #define _pcre_ucp_gentype ucp_gentype
102 #define _pcre_utf8_table1 utf8_table1
103 #define _pcre_utf8_table1_size utf8_table1_size
104 #define _pcre_utf8_table2 utf8_table2
105 #define _pcre_utf8_table3 utf8_table3
106 #define _pcre_utf8_table4 utf8_table4
107 #define _pcre_utt utt
108 #define _pcre_utt_size utt_size
109 #define _pcre_utt_names utt_names
110 #define _pcre_OP_lengths OP_lengths
111
112 #include "pcre_tables.c"
113
114 /* We also need the pcre_printint() function for printing out compiled
115 patterns. This function is in a separate file so that it can be included in
116 pcre_compile.c when that module is compiled with debugging enabled.
117
118 The definition of the macro PRINTABLE, which determines whether to print an
119 output character as-is or as a hex value when showing compiled patterns, is
120 contained in this file. We uses it here also, in cases when the locale has not
121 been explicitly changed, so as to get consistent output from systems that
122 differ in their output from isprint() even in the "C" locale. */
123
124 #include "pcre_printint.src"
125
126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127
128
129 /* It is possible to compile this test program without including support for
130 testing the POSIX interface, though this is not available via the standard
131 Makefile. */
132
133 #if !defined NOPOSIX
134 #include "pcreposix.h"
135 #endif
136
137 /* It is also possible, for the benefit of the version currently imported into
138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141 UTF8 support if PCRE is built without it. */
142
143 #ifndef SUPPORT_UTF8
144 #ifndef NOUTF8
145 #define NOUTF8
146 #endif
147 #endif
148
149
150 /* Other parameters */
151
152 #ifndef CLOCKS_PER_SEC
153 #ifdef CLK_TCK
154 #define CLOCKS_PER_SEC CLK_TCK
155 #else
156 #define CLOCKS_PER_SEC 100
157 #endif
158 #endif
159
160 /* This is the default loop count for timing. */
161
162 #define LOOPREPEAT 500000
163
164 /* Static variables */
165
166 static FILE *outfile;
167 static int log_store = 0;
168 static int callout_count;
169 static int callout_extra;
170 static int callout_fail_count;
171 static int callout_fail_id;
172 static int debug_lengths;
173 static int first_callout;
174 static int locale_set = 0;
175 static int show_malloc;
176 static int use_utf8;
177 static size_t gotten_store;
178
179 /* The buffers grow automatically if very long input lines are encountered. */
180
181 static int buffer_size = 50000;
182 static uschar *buffer = NULL;
183 static uschar *dbuffer = NULL;
184 static uschar *pbuffer = NULL;
185
186
187
188 /*************************************************
189 * Read or extend an input line *
190 *************************************************/
191
192 /* Input lines are read into buffer, but both patterns and data lines can be
193 continued over multiple input lines. In addition, if the buffer fills up, we
194 want to automatically expand it so as to be able to handle extremely large
195 lines that are needed for certain stress tests. When the input buffer is
196 expanded, the other two buffers must also be expanded likewise, and the
197 contents of pbuffer, which are a copy of the input for callouts, must be
198 preserved (for when expansion happens for a data line). This is not the most
199 optimal way of handling this, but hey, this is just a test program!
200
201 Arguments:
202 f the file to read
203 start where in buffer to start (this *must* be within buffer)
204 prompt for stdin or readline()
205
206 Returns: pointer to the start of new data
207 could be a copy of start, or could be moved
208 NULL if no data read and EOF reached
209 */
210
211 static uschar *
212 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 {
214 uschar *here = start;
215
216 for (;;)
217 {
218 int rlen = buffer_size - (here - buffer);
219
220 if (rlen > 1000)
221 {
222 int dlen;
223
224 /* If libreadline support is required, use readline() to read a line if the
225 input is a terminal. Note that readline() removes the trailing newline, so
226 we must put it back again, to be compatible with fgets(). */
227
228 #ifdef SUPPORT_LIBREADLINE
229 if (isatty(fileno(f)))
230 {
231 size_t len;
232 char *s = readline(prompt);
233 if (s == NULL) return (here == start)? NULL : start;
234 len = strlen(s);
235 if (len > 0) add_history(s);
236 if (len > rlen - 1) len = rlen - 1;
237 memcpy(here, s, len);
238 here[len] = '\n';
239 here[len+1] = 0;
240 free(s);
241 }
242 else
243 #endif
244
245 /* Read the next line by normal means, prompting if the file is stdin. */
246
247 {
248 if (f == stdin) printf(prompt);
249 if (fgets((char *)here, rlen, f) == NULL)
250 return (here == start)? NULL : start;
251 }
252
253 dlen = (int)strlen((char *)here);
254 if (dlen > 0 && here[dlen - 1] == '\n') return start;
255 here += dlen;
256 }
257
258 else
259 {
260 int new_buffer_size = 2*buffer_size;
261 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264
265 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266 {
267 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268 exit(1);
269 }
270
271 memcpy(new_buffer, buffer, buffer_size);
272 memcpy(new_pbuffer, pbuffer, buffer_size);
273
274 buffer_size = new_buffer_size;
275
276 start = new_buffer + (start - buffer);
277 here = new_buffer + (here - buffer);
278
279 free(buffer);
280 free(dbuffer);
281 free(pbuffer);
282
283 buffer = new_buffer;
284 dbuffer = new_dbuffer;
285 pbuffer = new_pbuffer;
286 }
287 }
288
289 return NULL; /* Control never gets here */
290 }
291
292
293
294
295
296
297
298 /*************************************************
299 * Read number from string *
300 *************************************************/
301
302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303 around with conditional compilation, just do the job by hand. It is only used
304 for unpicking arguments, so just keep it simple.
305
306 Arguments:
307 str string to be converted
308 endptr where to put the end pointer
309
310 Returns: the unsigned long
311 */
312
313 static int
314 get_value(unsigned char *str, unsigned char **endptr)
315 {
316 int result = 0;
317 while(*str != 0 && isspace(*str)) str++;
318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319 *endptr = str;
320 return(result);
321 }
322
323
324
325
326 /*************************************************
327 * Convert UTF-8 string to value *
328 *************************************************/
329
330 /* This function takes one or more bytes that represents a UTF-8 character,
331 and returns the value of the character.
332
333 Argument:
334 utf8bytes a pointer to the byte vector
335 vptr a pointer to an int to receive the value
336
337 Returns: > 0 => the number of bytes consumed
338 -6 to 0 => malformed UTF-8 character at offset = (-return)
339 */
340
341 #if !defined NOUTF8
342
343 static int
344 utf82ord(unsigned char *utf8bytes, int *vptr)
345 {
346 int c = *utf8bytes++;
347 int d = c;
348 int i, j, s;
349
350 for (i = -1; i < 6; i++) /* i is number of additional bytes */
351 {
352 if ((d & 0x80) == 0) break;
353 d <<= 1;
354 }
355
356 if (i == -1) { *vptr = c; return 1; } /* ascii character */
357 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358
359 /* i now has a value in the range 1-5 */
360
361 s = 6*i;
362 d = (c & utf8_table3[i]) << s;
363
364 for (j = 0; j < i; j++)
365 {
366 c = *utf8bytes++;
367 if ((c & 0xc0) != 0x80) return -(j+1);
368 s -= 6;
369 d |= (c & 0x3f) << s;
370 }
371
372 /* Check that encoding was the correct unique one */
373
374 for (j = 0; j < utf8_table1_size; j++)
375 if (d <= utf8_table1[j]) break;
376 if (j != i) return -(i+1);
377
378 /* Valid value */
379
380 *vptr = d;
381 return i+1;
382 }
383
384 #endif
385
386
387
388 /*************************************************
389 * Convert character value to UTF-8 *
390 *************************************************/
391
392 /* This function takes an integer value in the range 0 - 0x7fffffff
393 and encodes it as a UTF-8 character in 0 to 6 bytes.
394
395 Arguments:
396 cvalue the character value
397 utf8bytes pointer to buffer for result - at least 6 bytes long
398
399 Returns: number of characters placed in the buffer
400 */
401
402 #if !defined NOUTF8
403
404 static int
405 ord2utf8(int cvalue, uschar *utf8bytes)
406 {
407 register int i, j;
408 for (i = 0; i < utf8_table1_size; i++)
409 if (cvalue <= utf8_table1[i]) break;
410 utf8bytes += i;
411 for (j = i; j > 0; j--)
412 {
413 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 cvalue >>= 6;
415 }
416 *utf8bytes = utf8_table2[i] | cvalue;
417 return i + 1;
418 }
419
420 #endif
421
422
423
424 /*************************************************
425 * Print character string *
426 *************************************************/
427
428 /* Character string printing function. Must handle UTF-8 strings in utf8
429 mode. Yields number of characters printed. If handed a NULL file, just counts
430 chars without printing. */
431
432 static int pchars(unsigned char *p, int length, FILE *f)
433 {
434 int c = 0;
435 int yield = 0;
436
437 while (length-- > 0)
438 {
439 #if !defined NOUTF8
440 if (use_utf8)
441 {
442 int rc = utf82ord(p, &c);
443
444 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445 {
446 length -= rc - 1;
447 p += rc;
448 if (PRINTHEX(c))
449 {
450 if (f != NULL) fprintf(f, "%c", c);
451 yield++;
452 }
453 else
454 {
455 int n = 4;
456 if (f != NULL) fprintf(f, "\\x{%02x}", c);
457 yield += (n <= 0x000000ff)? 2 :
458 (n <= 0x00000fff)? 3 :
459 (n <= 0x0000ffff)? 4 :
460 (n <= 0x000fffff)? 5 : 6;
461 }
462 continue;
463 }
464 }
465 #endif
466
467 /* Not UTF-8, or malformed UTF-8 */
468
469 c = *p++;
470 if (PRINTHEX(c))
471 {
472 if (f != NULL) fprintf(f, "%c", c);
473 yield++;
474 }
475 else
476 {
477 if (f != NULL) fprintf(f, "\\x%02x", c);
478 yield += 4;
479 }
480 }
481
482 return yield;
483 }
484
485
486
487 /*************************************************
488 * Callout function *
489 *************************************************/
490
491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492 the match. Yield zero unless more callouts than the fail count, or the callout
493 data is not zero. */
494
495 static int callout(pcre_callout_block *cb)
496 {
497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 int i, pre_start, post_start, subject_length;
499
500 if (callout_extra)
501 {
502 fprintf(f, "Callout %d: last capture = %d\n",
503 cb->callout_number, cb->capture_last);
504
505 for (i = 0; i < cb->capture_top * 2; i += 2)
506 {
507 if (cb->offset_vector[i] < 0)
508 fprintf(f, "%2d: <unset>\n", i/2);
509 else
510 {
511 fprintf(f, "%2d: ", i/2);
512 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513 cb->offset_vector[i+1] - cb->offset_vector[i], f);
514 fprintf(f, "\n");
515 }
516 }
517 }
518
519 /* Re-print the subject in canonical form, the first time or if giving full
520 datails. On subsequent calls in the same match, we use pchars just to find the
521 printed lengths of the substrings. */
522
523 if (f != NULL) fprintf(f, "--->");
524
525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527 cb->current_position - cb->start_match, f);
528
529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530
531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532 cb->subject_length - cb->current_position, f);
533
534 if (f != NULL) fprintf(f, "\n");
535
536 /* Always print appropriate indicators, with callout number if not already
537 shown. For automatic callouts, show the pattern offset. */
538
539 if (cb->callout_number == 255)
540 {
541 fprintf(outfile, "%+3d ", cb->pattern_position);
542 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543 }
544 else
545 {
546 if (callout_extra) fprintf(outfile, " ");
547 else fprintf(outfile, "%3d ", cb->callout_number);
548 }
549
550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551 fprintf(outfile, "^");
552
553 if (post_start > 0)
554 {
555 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556 fprintf(outfile, "^");
557 }
558
559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560 fprintf(outfile, " ");
561
562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563 pbuffer + cb->pattern_position);
564
565 fprintf(outfile, "\n");
566 first_callout = 0;
567
568 if (cb->callout_data != NULL)
569 {
570 int callout_data = *((int *)(cb->callout_data));
571 if (callout_data != 0)
572 {
573 fprintf(outfile, "Callout data = %d\n", callout_data);
574 return callout_data;
575 }
576 }
577
578 return (cb->callout_number != callout_fail_id)? 0 :
579 (++callout_count >= callout_fail_count)? 1 : 0;
580 }
581
582
583 /*************************************************
584 * Local malloc functions *
585 *************************************************/
586
587 /* Alternative malloc function, to test functionality and show the size of the
588 compiled re. */
589
590 static void *new_malloc(size_t size)
591 {
592 void *block = malloc(size);
593 gotten_store = size;
594 if (show_malloc)
595 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 return block;
597 }
598
599 static void new_free(void *block)
600 {
601 if (show_malloc)
602 fprintf(outfile, "free %p\n", block);
603 free(block);
604 }
605
606
607 /* For recursion malloc/free, to test stacking calls */
608
609 static void *stack_malloc(size_t size)
610 {
611 void *block = malloc(size);
612 if (show_malloc)
613 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 return block;
615 }
616
617 static void stack_free(void *block)
618 {
619 if (show_malloc)
620 fprintf(outfile, "stack_free %p\n", block);
621 free(block);
622 }
623
624
625 /*************************************************
626 * Call pcre_fullinfo() *
627 *************************************************/
628
629 /* Get one piece of information from the pcre_fullinfo() function */
630
631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632 {
633 int rc;
634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636 }
637
638
639
640 /*************************************************
641 * Byte flipping function *
642 *************************************************/
643
644 static unsigned long int
645 byteflip(unsigned long int value, int n)
646 {
647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648 return ((value & 0x000000ff) << 24) |
649 ((value & 0x0000ff00) << 8) |
650 ((value & 0x00ff0000) >> 8) |
651 ((value & 0xff000000) >> 24);
652 }
653
654
655
656
657 /*************************************************
658 * Check match or recursion limit *
659 *************************************************/
660
661 static int
662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663 int start_offset, int options, int *use_offsets, int use_size_offsets,
664 int flag, unsigned long int *limit, int errnumber, const char *msg)
665 {
666 int count;
667 int min = 0;
668 int mid = 64;
669 int max = -1;
670
671 extra->flags |= flag;
672
673 for (;;)
674 {
675 *limit = mid;
676
677 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678 use_offsets, use_size_offsets);
679
680 if (count == errnumber)
681 {
682 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683 min = mid;
684 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685 }
686
687 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688 count == PCRE_ERROR_PARTIAL)
689 {
690 if (mid == min + 1)
691 {
692 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693 break;
694 }
695 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696 max = mid;
697 mid = (min + mid)/2;
698 }
699 else break; /* Some other error */
700 }
701
702 extra->flags &= ~flag;
703 return count;
704 }
705
706
707
708 /*************************************************
709 * Case-independent strncmp() function *
710 *************************************************/
711
712 /*
713 Arguments:
714 s first string
715 t second string
716 n number of characters to compare
717
718 Returns: < 0, = 0, or > 0, according to the comparison
719 */
720
721 static int
722 strncmpic(uschar *s, uschar *t, int n)
723 {
724 while (n--)
725 {
726 int c = tolower(*s++) - tolower(*t++);
727 if (c) return c;
728 }
729 return 0;
730 }
731
732
733
734 /*************************************************
735 * Check newline indicator *
736 *************************************************/
737
738 /* This is used both at compile and run-time to check for <xxx> escapes, where
739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740 no match.
741
742 Arguments:
743 p points after the leading '<'
744 f file for error message
745
746 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747 */
748
749 static int
750 check_newline(uschar *p, FILE *f)
751 {
752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 fprintf(f, "Unknown newline type at: <%s\n", p);
760 return 0;
761 }
762
763
764
765 /*************************************************
766 * Usage function *
767 *************************************************/
768
769 static void
770 usage(void)
771 {
772 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773 printf("Input and output default to stdin and stdout.\n");
774 #ifdef SUPPORT_LIBREADLINE
775 printf("If input is a terminal, readline() is used to read from it.\n");
776 #else
777 printf("This version of pcretest is not linked with readline().\n");
778 #endif
779 printf("\nOptions:\n");
780 printf(" -b show compiled code (bytecode)\n");
781 printf(" -C show PCRE compile-time options and exit\n");
782 printf(" -d debug: show compiled code and information (-b and -i)\n");
783 #if !defined NODFA
784 printf(" -dfa force DFA matching for all subjects\n");
785 #endif
786 printf(" -help show usage information\n");
787 printf(" -i show information about compiled patterns\n"
788 " -m output memory used information\n"
789 " -o <n> set size of offsets vector to <n>\n");
790 #if !defined NOPOSIX
791 printf(" -p use POSIX interface\n");
792 #endif
793 printf(" -q quiet: do not output PCRE version number at start\n");
794 printf(" -S <n> set stack size to <n> megabytes\n");
795 printf(" -s output store (memory) used information\n"
796 " -t time compilation and execution\n");
797 printf(" -t <n> time compilation and execution, repeating <n> times\n");
798 printf(" -tm time execution (matching) only\n");
799 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
800 }
801
802
803
804 /*************************************************
805 * Main Program *
806 *************************************************/
807
808 /* Read lines from named file or stdin and write to named file or stdout; lines
809 consist of a regular expression, in delimiters and optionally followed by
810 options, followed by a set of test data, terminated by an empty line. */
811
812 int main(int argc, char **argv)
813 {
814 FILE *infile = stdin;
815 int options = 0;
816 int study_options = 0;
817 int op = 1;
818 int timeit = 0;
819 int timeitm = 0;
820 int showinfo = 0;
821 int showstore = 0;
822 int quiet = 0;
823 int size_offsets = 45;
824 int size_offsets_max;
825 int *offsets = NULL;
826 #if !defined NOPOSIX
827 int posix = 0;
828 #endif
829 int debug = 0;
830 int done = 0;
831 int all_use_dfa = 0;
832 int yield = 0;
833 int stack_size;
834
835 /* These vectors store, end-to-end, a list of captured substring names. Assume
836 that 1024 is plenty long enough for the few names we'll be testing. */
837
838 uschar copynames[1024];
839 uschar getnames[1024];
840
841 uschar *copynamesptr;
842 uschar *getnamesptr;
843
844 /* Get buffers from malloc() so that Electric Fence will check their misuse
845 when I am debugging. They grow automatically when very long lines are read. */
846
847 buffer = (unsigned char *)malloc(buffer_size);
848 dbuffer = (unsigned char *)malloc(buffer_size);
849 pbuffer = (unsigned char *)malloc(buffer_size);
850
851 /* The outfile variable is static so that new_malloc can use it. */
852
853 outfile = stdout;
854
855 /* The following _setmode() stuff is some Windows magic that tells its runtime
856 library to translate CRLF into a single LF character. At least, that's what
857 I've been told: never having used Windows I take this all on trust. Originally
858 it set 0x8000, but then I was advised that _O_BINARY was better. */
859
860 #if defined(_WIN32) || defined(WIN32)
861 _setmode( _fileno( stdout ), _O_BINARY );
862 #endif
863
864 /* Scan options */
865
866 while (argc > 1 && argv[op][0] == '-')
867 {
868 unsigned char *endptr;
869
870 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
871 showstore = 1;
872 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
873 else if (strcmp(argv[op], "-b") == 0) debug = 1;
874 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
875 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
876 #if !defined NODFA
877 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
878 #endif
879 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
880 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
881 *endptr == 0))
882 {
883 op++;
884 argc--;
885 }
886 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
887 {
888 int both = argv[op][2] == 0;
889 int temp;
890 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891 *endptr == 0))
892 {
893 timeitm = temp;
894 op++;
895 argc--;
896 }
897 else timeitm = LOOPREPEAT;
898 if (both) timeit = timeitm;
899 }
900 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
901 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
902 *endptr == 0))
903 {
904 #if defined(_WIN32) || defined(WIN32)
905 printf("PCRE: -S not supported on this OS\n");
906 exit(1);
907 #else
908 int rc;
909 struct rlimit rlim;
910 getrlimit(RLIMIT_STACK, &rlim);
911 rlim.rlim_cur = stack_size * 1024 * 1024;
912 rc = setrlimit(RLIMIT_STACK, &rlim);
913 if (rc != 0)
914 {
915 printf("PCRE: setrlimit() failed with error %d\n", rc);
916 exit(1);
917 }
918 op++;
919 argc--;
920 #endif
921 }
922 #if !defined NOPOSIX
923 else if (strcmp(argv[op], "-p") == 0) posix = 1;
924 #endif
925 else if (strcmp(argv[op], "-C") == 0)
926 {
927 int rc;
928 unsigned long int lrc;
929 printf("PCRE version %s\n", pcre_version());
930 printf("Compiled with\n");
931 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
932 printf(" %sUTF-8 support\n", rc? "" : "No ");
933 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
934 printf(" %sUnicode properties support\n", rc? "" : "No ");
935 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
936 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
937 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
938 (rc == -2)? "ANYCRLF" :
939 (rc == -1)? "ANY" : "???");
940 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
941 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
942 "all Unicode newlines");
943 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
944 printf(" Internal link size = %d\n", rc);
945 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
946 printf(" POSIX malloc threshold = %d\n", rc);
947 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
948 printf(" Default match limit = %ld\n", lrc);
949 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
950 printf(" Default recursion depth limit = %ld\n", lrc);
951 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
952 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
953 goto EXIT;
954 }
955 else if (strcmp(argv[op], "-help") == 0 ||
956 strcmp(argv[op], "--help") == 0)
957 {
958 usage();
959 goto EXIT;
960 }
961 else
962 {
963 printf("** Unknown or malformed option %s\n", argv[op]);
964 usage();
965 yield = 1;
966 goto EXIT;
967 }
968 op++;
969 argc--;
970 }
971
972 /* Get the store for the offsets vector, and remember what it was */
973
974 size_offsets_max = size_offsets;
975 offsets = (int *)malloc(size_offsets_max * sizeof(int));
976 if (offsets == NULL)
977 {
978 printf("** Failed to get %d bytes of memory for offsets vector\n",
979 (int)(size_offsets_max * sizeof(int)));
980 yield = 1;
981 goto EXIT;
982 }
983
984 /* Sort out the input and output files */
985
986 if (argc > 1)
987 {
988 infile = fopen(argv[op], INPUT_MODE);
989 if (infile == NULL)
990 {
991 printf("** Failed to open %s\n", argv[op]);
992 yield = 1;
993 goto EXIT;
994 }
995 }
996
997 if (argc > 2)
998 {
999 outfile = fopen(argv[op+1], OUTPUT_MODE);
1000 if (outfile == NULL)
1001 {
1002 printf("** Failed to open %s\n", argv[op+1]);
1003 yield = 1;
1004 goto EXIT;
1005 }
1006 }
1007
1008 /* Set alternative malloc function */
1009
1010 pcre_malloc = new_malloc;
1011 pcre_free = new_free;
1012 pcre_stack_malloc = stack_malloc;
1013 pcre_stack_free = stack_free;
1014
1015 /* Heading line unless quiet, then prompt for first regex if stdin */
1016
1017 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1018
1019 /* Main loop */
1020
1021 while (!done)
1022 {
1023 pcre *re = NULL;
1024 pcre_extra *extra = NULL;
1025
1026 #if !defined NOPOSIX /* There are still compilers that require no indent */
1027 regex_t preg;
1028 int do_posix = 0;
1029 #endif
1030
1031 const char *error;
1032 unsigned char *p, *pp, *ppp;
1033 unsigned char *to_file = NULL;
1034 const unsigned char *tables = NULL;
1035 unsigned long int true_size, true_study_size = 0;
1036 size_t size, regex_gotten_store;
1037 int do_study = 0;
1038 int do_debug = debug;
1039 int do_G = 0;
1040 int do_g = 0;
1041 int do_showinfo = showinfo;
1042 int do_showrest = 0;
1043 int do_flip = 0;
1044 int erroroffset, len, delimiter, poffset;
1045
1046 use_utf8 = 0;
1047 debug_lengths = 1;
1048
1049 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1050 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1051 fflush(outfile);
1052
1053 p = buffer;
1054 while (isspace(*p)) p++;
1055 if (*p == 0) continue;
1056
1057 /* See if the pattern is to be loaded pre-compiled from a file. */
1058
1059 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1060 {
1061 unsigned long int magic, get_options;
1062 uschar sbuf[8];
1063 FILE *f;
1064
1065 p++;
1066 pp = p + (int)strlen((char *)p);
1067 while (isspace(pp[-1])) pp--;
1068 *pp = 0;
1069
1070 f = fopen((char *)p, "rb");
1071 if (f == NULL)
1072 {
1073 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1074 continue;
1075 }
1076
1077 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1078
1079 true_size =
1080 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1081 true_study_size =
1082 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1083
1084 re = (real_pcre *)new_malloc(true_size);
1085 regex_gotten_store = gotten_store;
1086
1087 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1088
1089 magic = ((real_pcre *)re)->magic_number;
1090 if (magic != MAGIC_NUMBER)
1091 {
1092 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1093 {
1094 do_flip = 1;
1095 }
1096 else
1097 {
1098 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1099 fclose(f);
1100 continue;
1101 }
1102 }
1103
1104 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1105 do_flip? " (byte-inverted)" : "", p);
1106
1107 /* Need to know if UTF-8 for printing data strings */
1108
1109 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1110 use_utf8 = (get_options & PCRE_UTF8) != 0;
1111
1112 /* Now see if there is any following study data */
1113
1114 if (true_study_size != 0)
1115 {
1116 pcre_study_data *psd;
1117
1118 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1119 extra->flags = PCRE_EXTRA_STUDY_DATA;
1120
1121 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1122 extra->study_data = psd;
1123
1124 if (fread(psd, 1, true_study_size, f) != true_study_size)
1125 {
1126 FAIL_READ:
1127 fprintf(outfile, "Failed to read data from %s\n", p);
1128 if (extra != NULL) new_free(extra);
1129 if (re != NULL) new_free(re);
1130 fclose(f);
1131 continue;
1132 }
1133 fprintf(outfile, "Study data loaded from %s\n", p);
1134 do_study = 1; /* To get the data output if requested */
1135 }
1136 else fprintf(outfile, "No study data\n");
1137
1138 fclose(f);
1139 goto SHOW_INFO;
1140 }
1141
1142 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1143 the pattern; if is isn't complete, read more. */
1144
1145 delimiter = *p++;
1146
1147 if (isalnum(delimiter) || delimiter == '\\')
1148 {
1149 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1150 goto SKIP_DATA;
1151 }
1152
1153 pp = p;
1154 poffset = p - buffer;
1155
1156 for(;;)
1157 {
1158 while (*pp != 0)
1159 {
1160 if (*pp == '\\' && pp[1] != 0) pp++;
1161 else if (*pp == delimiter) break;
1162 pp++;
1163 }
1164 if (*pp != 0) break;
1165 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1166 {
1167 fprintf(outfile, "** Unexpected EOF\n");
1168 done = 1;
1169 goto CONTINUE;
1170 }
1171 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1172 }
1173
1174 /* The buffer may have moved while being extended; reset the start of data
1175 pointer to the correct relative point in the buffer. */
1176
1177 p = buffer + poffset;
1178
1179 /* If the first character after the delimiter is backslash, make
1180 the pattern end with backslash. This is purely to provide a way
1181 of testing for the error message when a pattern ends with backslash. */
1182
1183 if (pp[1] == '\\') *pp++ = '\\';
1184
1185 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1186 for callouts. */
1187
1188 *pp++ = 0;
1189 strcpy((char *)pbuffer, (char *)p);
1190
1191 /* Look for options after final delimiter */
1192
1193 options = 0;
1194 study_options = 0;
1195 log_store = showstore; /* default from command line */
1196
1197 while (*pp != 0)
1198 {
1199 switch (*pp++)
1200 {
1201 case 'f': options |= PCRE_FIRSTLINE; break;
1202 case 'g': do_g = 1; break;
1203 case 'i': options |= PCRE_CASELESS; break;
1204 case 'm': options |= PCRE_MULTILINE; break;
1205 case 's': options |= PCRE_DOTALL; break;
1206 case 'x': options |= PCRE_EXTENDED; break;
1207
1208 case '+': do_showrest = 1; break;
1209 case 'A': options |= PCRE_ANCHORED; break;
1210 case 'B': do_debug = 1; break;
1211 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1212 case 'D': do_debug = do_showinfo = 1; break;
1213 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1214 case 'F': do_flip = 1; break;
1215 case 'G': do_G = 1; break;
1216 case 'I': do_showinfo = 1; break;
1217 case 'J': options |= PCRE_DUPNAMES; break;
1218 case 'M': log_store = 1; break;
1219 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1220
1221 #if !defined NOPOSIX
1222 case 'P': do_posix = 1; break;
1223 #endif
1224
1225 case 'S': do_study = 1; break;
1226 case 'U': options |= PCRE_UNGREEDY; break;
1227 case 'X': options |= PCRE_EXTRA; break;
1228 case 'Z': debug_lengths = 0; break;
1229 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1230 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1231
1232 case 'L':
1233 ppp = pp;
1234 /* The '\r' test here is so that it works on Windows. */
1235 /* The '0' test is just in case this is an unterminated line. */
1236 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1237 *ppp = 0;
1238 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1239 {
1240 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1241 goto SKIP_DATA;
1242 }
1243 locale_set = 1;
1244 tables = pcre_maketables();
1245 pp = ppp;
1246 break;
1247
1248 case '>':
1249 to_file = pp;
1250 while (*pp != 0) pp++;
1251 while (isspace(pp[-1])) pp--;
1252 *pp = 0;
1253 break;
1254
1255 case '<':
1256 {
1257 if (strncmp((char *)pp, "JS>", 3) == 0)
1258 {
1259 options |= PCRE_JAVASCRIPT_COMPAT;
1260 pp += 3;
1261 }
1262 else
1263 {
1264 int x = check_newline(pp, outfile);
1265 if (x == 0) goto SKIP_DATA;
1266 options |= x;
1267 while (*pp++ != '>');
1268 }
1269 }
1270 break;
1271
1272 case '\r': /* So that it works in Windows */
1273 case '\n':
1274 case ' ':
1275 break;
1276
1277 default:
1278 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1279 goto SKIP_DATA;
1280 }
1281 }
1282
1283 /* Handle compiling via the POSIX interface, which doesn't support the
1284 timing, showing, or debugging options, nor the ability to pass over
1285 local character tables. */
1286
1287 #if !defined NOPOSIX
1288 if (posix || do_posix)
1289 {
1290 int rc;
1291 int cflags = 0;
1292
1293 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1294 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1295 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1296 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1297 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1298
1299 rc = regcomp(&preg, (char *)p, cflags);
1300
1301 /* Compilation failed; go back for another re, skipping to blank line
1302 if non-interactive. */
1303
1304 if (rc != 0)
1305 {
1306 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1307 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1308 goto SKIP_DATA;
1309 }
1310 }
1311
1312 /* Handle compiling via the native interface */
1313
1314 else
1315 #endif /* !defined NOPOSIX */
1316
1317 {
1318 if (timeit > 0)
1319 {
1320 register int i;
1321 clock_t time_taken;
1322 clock_t start_time = clock();
1323 for (i = 0; i < timeit; i++)
1324 {
1325 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1326 if (re != NULL) free(re);
1327 }
1328 time_taken = clock() - start_time;
1329 fprintf(outfile, "Compile time %.4f milliseconds\n",
1330 (((double)time_taken * 1000.0) / (double)timeit) /
1331 (double)CLOCKS_PER_SEC);
1332 }
1333
1334 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1335
1336 /* Compilation failed; go back for another re, skipping to blank line
1337 if non-interactive. */
1338
1339 if (re == NULL)
1340 {
1341 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1342 SKIP_DATA:
1343 if (infile != stdin)
1344 {
1345 for (;;)
1346 {
1347 if (extend_inputline(infile, buffer, NULL) == NULL)
1348 {
1349 done = 1;
1350 goto CONTINUE;
1351 }
1352 len = (int)strlen((char *)buffer);
1353 while (len > 0 && isspace(buffer[len-1])) len--;
1354 if (len == 0) break;
1355 }
1356 fprintf(outfile, "\n");
1357 }
1358 goto CONTINUE;
1359 }
1360
1361 /* Compilation succeeded; print data if required. There are now two
1362 info-returning functions. The old one has a limited interface and
1363 returns only limited data. Check that it agrees with the newer one. */
1364
1365 if (log_store)
1366 fprintf(outfile, "Memory allocation (code space): %d\n",
1367 (int)(gotten_store -
1368 sizeof(real_pcre) -
1369 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1370
1371 /* Extract the size for possible writing before possibly flipping it,
1372 and remember the store that was got. */
1373
1374 true_size = ((real_pcre *)re)->size;
1375 regex_gotten_store = gotten_store;
1376
1377 /* If /S was present, study the regexp to generate additional info to
1378 help with the matching. */
1379
1380 if (do_study)
1381 {
1382 if (timeit > 0)
1383 {
1384 register int i;
1385 clock_t time_taken;
1386 clock_t start_time = clock();
1387 for (i = 0; i < timeit; i++)
1388 extra = pcre_study(re, study_options, &error);
1389 time_taken = clock() - start_time;
1390 if (extra != NULL) free(extra);
1391 fprintf(outfile, " Study time %.4f milliseconds\n",
1392 (((double)time_taken * 1000.0) / (double)timeit) /
1393 (double)CLOCKS_PER_SEC);
1394 }
1395 extra = pcre_study(re, study_options, &error);
1396 if (error != NULL)
1397 fprintf(outfile, "Failed to study: %s\n", error);
1398 else if (extra != NULL)
1399 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1400 }
1401
1402 /* If the 'F' option was present, we flip the bytes of all the integer
1403 fields in the regex data block and the study block. This is to make it
1404 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1405 compiled on a different architecture. */
1406
1407 if (do_flip)
1408 {
1409 real_pcre *rre = (real_pcre *)re;
1410 rre->magic_number =
1411 byteflip(rre->magic_number, sizeof(rre->magic_number));
1412 rre->size = byteflip(rre->size, sizeof(rre->size));
1413 rre->options = byteflip(rre->options, sizeof(rre->options));
1414 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1415 rre->top_bracket =
1416 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1417 rre->top_backref =
1418 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1419 rre->first_byte =
1420 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1421 rre->req_byte =
1422 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1423 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1424 sizeof(rre->name_table_offset));
1425 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1426 sizeof(rre->name_entry_size));
1427 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1428 sizeof(rre->name_count));
1429
1430 if (extra != NULL)
1431 {
1432 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1433 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1434 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1435 }
1436 }
1437
1438 /* Extract information from the compiled data if required */
1439
1440 SHOW_INFO:
1441
1442 if (do_debug)
1443 {
1444 fprintf(outfile, "------------------------------------------------------------------\n");
1445 pcre_printint(re, outfile, debug_lengths);
1446 }
1447
1448 if (do_showinfo)
1449 {
1450 unsigned long int get_options, all_options;
1451 #if !defined NOINFOCHECK
1452 int old_first_char, old_options, old_count;
1453 #endif
1454 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1455 hascrorlf;
1456 int nameentrysize, namecount;
1457 const uschar *nametable;
1458
1459 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1460 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1461 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1462 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1463 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1464 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1465 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1466 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1467 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1468 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1469 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1470 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1471
1472 #if !defined NOINFOCHECK
1473 old_count = pcre_info(re, &old_options, &old_first_char);
1474 if (count < 0) fprintf(outfile,
1475 "Error %d from pcre_info()\n", count);
1476 else
1477 {
1478 if (old_count != count) fprintf(outfile,
1479 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1480 old_count);
1481
1482 if (old_first_char != first_char) fprintf(outfile,
1483 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1484 first_char, old_first_char);
1485
1486 if (old_options != (int)get_options) fprintf(outfile,
1487 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1488 get_options, old_options);
1489 }
1490 #endif
1491
1492 if (size != regex_gotten_store) fprintf(outfile,
1493 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1494 (int)size, (int)regex_gotten_store);
1495
1496 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1497 if (backrefmax > 0)
1498 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1499
1500 if (namecount > 0)
1501 {
1502 fprintf(outfile, "Named capturing subpatterns:\n");
1503 while (namecount-- > 0)
1504 {
1505 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1506 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1507 GET2(nametable, 0));
1508 nametable += nameentrysize;
1509 }
1510 }
1511
1512 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1513 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1514
1515 all_options = ((real_pcre *)re)->options;
1516 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1517
1518 if (get_options == 0) fprintf(outfile, "No options\n");
1519 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1520 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1521 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1522 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1523 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1524 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1525 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1526 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1527 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1528 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1529 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1530 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1531 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1532 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1533 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1534 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1535
1536 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1537
1538 switch (get_options & PCRE_NEWLINE_BITS)
1539 {
1540 case PCRE_NEWLINE_CR:
1541 fprintf(outfile, "Forced newline sequence: CR\n");
1542 break;
1543
1544 case PCRE_NEWLINE_LF:
1545 fprintf(outfile, "Forced newline sequence: LF\n");
1546 break;
1547
1548 case PCRE_NEWLINE_CRLF:
1549 fprintf(outfile, "Forced newline sequence: CRLF\n");
1550 break;
1551
1552 case PCRE_NEWLINE_ANYCRLF:
1553 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1554 break;
1555
1556 case PCRE_NEWLINE_ANY:
1557 fprintf(outfile, "Forced newline sequence: ANY\n");
1558 break;
1559
1560 default:
1561 break;
1562 }
1563
1564 if (first_char == -1)
1565 {
1566 fprintf(outfile, "First char at start or follows newline\n");
1567 }
1568 else if (first_char < 0)
1569 {
1570 fprintf(outfile, "No first char\n");
1571 }
1572 else
1573 {
1574 int ch = first_char & 255;
1575 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1576 "" : " (caseless)";
1577 if (PRINTHEX(ch))
1578 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1579 else
1580 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1581 }
1582
1583 if (need_char < 0)
1584 {
1585 fprintf(outfile, "No need char\n");
1586 }
1587 else
1588 {
1589 int ch = need_char & 255;
1590 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1591 "" : " (caseless)";
1592 if (PRINTHEX(ch))
1593 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1594 else
1595 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1596 }
1597
1598 /* Don't output study size; at present it is in any case a fixed
1599 value, but it varies, depending on the computer architecture, and
1600 so messes up the test suite. (And with the /F option, it might be
1601 flipped.) */
1602
1603 if (do_study)
1604 {
1605 if (extra == NULL)
1606 fprintf(outfile, "Study returned NULL\n");
1607 else
1608 {
1609 uschar *start_bits = NULL;
1610 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1611
1612 if (start_bits == NULL)
1613 fprintf(outfile, "No starting byte set\n");
1614 else
1615 {
1616 int i;
1617 int c = 24;
1618 fprintf(outfile, "Starting byte set: ");
1619 for (i = 0; i < 256; i++)
1620 {
1621 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1622 {
1623 if (c > 75)
1624 {
1625 fprintf(outfile, "\n ");
1626 c = 2;
1627 }
1628 if (PRINTHEX(i) && i != ' ')
1629 {
1630 fprintf(outfile, "%c ", i);
1631 c += 2;
1632 }
1633 else
1634 {
1635 fprintf(outfile, "\\x%02x ", i);
1636 c += 5;
1637 }
1638 }
1639 }
1640 fprintf(outfile, "\n");
1641 }
1642 }
1643 }
1644 }
1645
1646 /* If the '>' option was present, we write out the regex to a file, and
1647 that is all. The first 8 bytes of the file are the regex length and then
1648 the study length, in big-endian order. */
1649
1650 if (to_file != NULL)
1651 {
1652 FILE *f = fopen((char *)to_file, "wb");
1653 if (f == NULL)
1654 {
1655 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1656 }
1657 else
1658 {
1659 uschar sbuf[8];
1660 sbuf[0] = (uschar)((true_size >> 24) & 255);
1661 sbuf[1] = (uschar)((true_size >> 16) & 255);
1662 sbuf[2] = (uschar)((true_size >> 8) & 255);
1663 sbuf[3] = (uschar)((true_size) & 255);
1664
1665 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1666 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1667 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1668 sbuf[7] = (uschar)((true_study_size) & 255);
1669
1670 if (fwrite(sbuf, 1, 8, f) < 8 ||
1671 fwrite(re, 1, true_size, f) < true_size)
1672 {
1673 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1674 }
1675 else
1676 {
1677 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1678 if (extra != NULL)
1679 {
1680 if (fwrite(extra->study_data, 1, true_study_size, f) <
1681 true_study_size)
1682 {
1683 fprintf(outfile, "Write error on %s: %s\n", to_file,
1684 strerror(errno));
1685 }
1686 else fprintf(outfile, "Study data written to %s\n", to_file);
1687
1688 }
1689 }
1690 fclose(f);
1691 }
1692
1693 new_free(re);
1694 if (extra != NULL) new_free(extra);
1695 if (tables != NULL) new_free((void *)tables);
1696 continue; /* With next regex */
1697 }
1698 } /* End of non-POSIX compile */
1699
1700 /* Read data lines and test them */
1701
1702 for (;;)
1703 {
1704 uschar *q;
1705 uschar *bptr;
1706 int *use_offsets = offsets;
1707 int use_size_offsets = size_offsets;
1708 int callout_data = 0;
1709 int callout_data_set = 0;
1710 int count, c;
1711 int copystrings = 0;
1712 int find_match_limit = 0;
1713 int getstrings = 0;
1714 int getlist = 0;
1715 int gmatched = 0;
1716 int start_offset = 0;
1717 int g_notempty = 0;
1718 int use_dfa = 0;
1719
1720 options = 0;
1721
1722 *copynames = 0;
1723 *getnames = 0;
1724
1725 copynamesptr = copynames;
1726 getnamesptr = getnames;
1727
1728 pcre_callout = callout;
1729 first_callout = 1;
1730 callout_extra = 0;
1731 callout_count = 0;
1732 callout_fail_count = 999999;
1733 callout_fail_id = -1;
1734 show_malloc = 0;
1735
1736 if (extra != NULL) extra->flags &=
1737 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1738
1739 len = 0;
1740 for (;;)
1741 {
1742 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1743 {
1744 if (len > 0) break;
1745 done = 1;
1746 goto CONTINUE;
1747 }
1748 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1749 len = (int)strlen((char *)buffer);
1750 if (buffer[len-1] == '\n') break;
1751 }
1752
1753 while (len > 0 && isspace(buffer[len-1])) len--;
1754 buffer[len] = 0;
1755 if (len == 0) break;
1756
1757 p = buffer;
1758 while (isspace(*p)) p++;
1759
1760 bptr = q = dbuffer;
1761 while ((c = *p++) != 0)
1762 {
1763 int i = 0;
1764 int n = 0;
1765
1766 if (c == '\\') switch ((c = *p++))
1767 {
1768 case 'a': c = 7; break;
1769 case 'b': c = '\b'; break;
1770 case 'e': c = 27; break;
1771 case 'f': c = '\f'; break;
1772 case 'n': c = '\n'; break;
1773 case 'r': c = '\r'; break;
1774 case 't': c = '\t'; break;
1775 case 'v': c = '\v'; break;
1776
1777 case '0': case '1': case '2': case '3':
1778 case '4': case '5': case '6': case '7':
1779 c -= '0';
1780 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1781 c = c * 8 + *p++ - '0';
1782
1783 #if !defined NOUTF8
1784 if (use_utf8 && c > 255)
1785 {
1786 unsigned char buff8[8];
1787 int ii, utn;
1788 utn = ord2utf8(c, buff8);
1789 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1790 c = buff8[ii]; /* Last byte */
1791 }
1792 #endif
1793 break;
1794
1795 case 'x':
1796
1797 /* Handle \x{..} specially - new Perl thing for utf8 */
1798
1799 #if !defined NOUTF8
1800 if (*p == '{')
1801 {
1802 unsigned char *pt = p;
1803 c = 0;
1804 while (isxdigit(*(++pt)))
1805 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1806 if (*pt == '}')
1807 {
1808 unsigned char buff8[8];
1809 int ii, utn;
1810 if (use_utf8)
1811 {
1812 utn = ord2utf8(c, buff8);
1813 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1814 c = buff8[ii]; /* Last byte */
1815 }
1816 else
1817 {
1818 if (c > 255)
1819 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1820 "UTF-8 mode is not enabled.\n"
1821 "** Truncation will probably give the wrong result.\n", c);
1822 }
1823 p = pt + 1;
1824 break;
1825 }
1826 /* Not correct form; fall through */
1827 }
1828 #endif
1829
1830 /* Ordinary \x */
1831
1832 c = 0;
1833 while (i++ < 2 && isxdigit(*p))
1834 {
1835 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1836 p++;
1837 }
1838 break;
1839
1840 case 0: /* \ followed by EOF allows for an empty line */
1841 p--;
1842 continue;
1843
1844 case '>':
1845 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1846 continue;
1847
1848 case 'A': /* Option setting */
1849 options |= PCRE_ANCHORED;
1850 continue;
1851
1852 case 'B':
1853 options |= PCRE_NOTBOL;
1854 continue;
1855
1856 case 'C':
1857 if (isdigit(*p)) /* Set copy string */
1858 {
1859 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1860 copystrings |= 1 << n;
1861 }
1862 else if (isalnum(*p))
1863 {
1864 uschar *npp = copynamesptr;
1865 while (isalnum(*p)) *npp++ = *p++;
1866 *npp++ = 0;
1867 *npp = 0;
1868 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1869 if (n < 0)
1870 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1871 copynamesptr = npp;
1872 }
1873 else if (*p == '+')
1874 {
1875 callout_extra = 1;
1876 p++;
1877 }
1878 else if (*p == '-')
1879 {
1880 pcre_callout = NULL;
1881 p++;
1882 }
1883 else if (*p == '!')
1884 {
1885 callout_fail_id = 0;
1886 p++;
1887 while(isdigit(*p))
1888 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1889 callout_fail_count = 0;
1890 if (*p == '!')
1891 {
1892 p++;
1893 while(isdigit(*p))
1894 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1895 }
1896 }
1897 else if (*p == '*')
1898 {
1899 int sign = 1;
1900 callout_data = 0;
1901 if (*(++p) == '-') { sign = -1; p++; }
1902 while(isdigit(*p))
1903 callout_data = callout_data * 10 + *p++ - '0';
1904 callout_data *= sign;
1905 callout_data_set = 1;
1906 }
1907 continue;
1908
1909 #if !defined NODFA
1910 case 'D':
1911 #if !defined NOPOSIX
1912 if (posix || do_posix)
1913 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1914 else
1915 #endif
1916 use_dfa = 1;
1917 continue;
1918
1919 case 'F':
1920 options |= PCRE_DFA_SHORTEST;
1921 continue;
1922 #endif
1923
1924 case 'G':
1925 if (isdigit(*p))
1926 {
1927 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1928 getstrings |= 1 << n;
1929 }
1930 else if (isalnum(*p))
1931 {
1932 uschar *npp = getnamesptr;
1933 while (isalnum(*p)) *npp++ = *p++;
1934 *npp++ = 0;
1935 *npp = 0;
1936 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1937 if (n < 0)
1938 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1939 getnamesptr = npp;
1940 }
1941 continue;
1942
1943 case 'L':
1944 getlist = 1;
1945 continue;
1946
1947 case 'M':
1948 find_match_limit = 1;
1949 continue;
1950
1951 case 'N':
1952 options |= PCRE_NOTEMPTY;
1953 continue;
1954
1955 case 'O':
1956 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1957 if (n > size_offsets_max)
1958 {
1959 size_offsets_max = n;
1960 free(offsets);
1961 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1962 if (offsets == NULL)
1963 {
1964 printf("** Failed to get %d bytes of memory for offsets vector\n",
1965 (int)(size_offsets_max * sizeof(int)));
1966 yield = 1;
1967 goto EXIT;
1968 }
1969 }
1970 use_size_offsets = n;
1971 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1972 continue;
1973
1974 case 'P':
1975 options |= PCRE_PARTIAL;
1976 continue;
1977
1978 case 'Q':
1979 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1980 if (extra == NULL)
1981 {
1982 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1983 extra->flags = 0;
1984 }
1985 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1986 extra->match_limit_recursion = n;
1987 continue;
1988
1989 case 'q':
1990 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1991 if (extra == NULL)
1992 {
1993 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1994 extra->flags = 0;
1995 }
1996 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1997 extra->match_limit = n;
1998 continue;
1999
2000 #if !defined NODFA
2001 case 'R':
2002 options |= PCRE_DFA_RESTART;
2003 continue;
2004 #endif
2005
2006 case 'S':
2007 show_malloc = 1;
2008 continue;
2009
2010 case 'Z':
2011 options |= PCRE_NOTEOL;
2012 continue;
2013
2014 case '?':
2015 options |= PCRE_NO_UTF8_CHECK;
2016 continue;
2017
2018 case '<':
2019 {
2020 int x = check_newline(p, outfile);
2021 if (x == 0) goto NEXT_DATA;
2022 options |= x;
2023 while (*p++ != '>');
2024 }
2025 continue;
2026 }
2027 *q++ = c;
2028 }
2029 *q = 0;
2030 len = q - dbuffer;
2031
2032 /* Move the data to the end of the buffer so that a read over the end of
2033 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2034 we are using the POSIX interface, we must include the terminating zero. */
2035
2036 #if !defined NOPOSIX
2037 if (posix || do_posix)
2038 {
2039 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2040 bptr += buffer_size - len - 1;
2041 }
2042 else
2043 #endif
2044 {
2045 memmove(bptr + buffer_size - len, bptr, len);
2046 bptr += buffer_size - len;
2047 }
2048
2049 if ((all_use_dfa || use_dfa) && find_match_limit)
2050 {
2051 printf("**Match limit not relevant for DFA matching: ignored\n");
2052 find_match_limit = 0;
2053 }
2054
2055 /* Handle matching via the POSIX interface, which does not
2056 support timing or playing with the match limit or callout data. */
2057
2058 #if !defined NOPOSIX
2059 if (posix || do_posix)
2060 {
2061 int rc;
2062 int eflags = 0;
2063 regmatch_t *pmatch = NULL;
2064 if (use_size_offsets > 0)
2065 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2066 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2067 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2068
2069 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2070
2071 if (rc != 0)
2072 {
2073 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2074 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2075 }
2076 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2077 != 0)
2078 {
2079 fprintf(outfile, "Matched with REG_NOSUB\n");
2080 }
2081 else
2082 {
2083 size_t i;
2084 for (i = 0; i < (size_t)use_size_offsets; i++)
2085 {
2086 if (pmatch[i].rm_so >= 0)
2087 {
2088 fprintf(outfile, "%2d: ", (int)i);
2089 (void)pchars(dbuffer + pmatch[i].rm_so,
2090 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2091 fprintf(outfile, "\n");
2092 if (i == 0 && do_showrest)
2093 {
2094 fprintf(outfile, " 0+ ");
2095 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2096 outfile);
2097 fprintf(outfile, "\n");
2098 }
2099 }
2100 }
2101 }
2102 free(pmatch);
2103 }
2104
2105 /* Handle matching via the native interface - repeats for /g and /G */
2106
2107 else
2108 #endif /* !defined NOPOSIX */
2109
2110 for (;; gmatched++) /* Loop for /g or /G */
2111 {
2112 if (timeitm > 0)
2113 {
2114 register int i;
2115 clock_t time_taken;
2116 clock_t start_time = clock();
2117
2118 #if !defined NODFA
2119 if (all_use_dfa || use_dfa)
2120 {
2121 int workspace[1000];
2122 for (i = 0; i < timeitm; i++)
2123 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2124 options | g_notempty, use_offsets, use_size_offsets, workspace,
2125 sizeof(workspace)/sizeof(int));
2126 }
2127 else
2128 #endif
2129
2130 for (i = 0; i < timeitm; i++)
2131 count = pcre_exec(re, extra, (char *)bptr, len,
2132 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2133
2134 time_taken = clock() - start_time;
2135 fprintf(outfile, "Execute time %.4f milliseconds\n",
2136 (((double)time_taken * 1000.0) / (double)timeitm) /
2137 (double)CLOCKS_PER_SEC);
2138 }
2139
2140 /* If find_match_limit is set, we want to do repeated matches with
2141 varying limits in order to find the minimum value for the match limit and
2142 for the recursion limit. */
2143
2144 if (find_match_limit)
2145 {
2146 if (extra == NULL)
2147 {
2148 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2149 extra->flags = 0;
2150 }
2151
2152 (void)check_match_limit(re, extra, bptr, len, start_offset,
2153 options|g_notempty, use_offsets, use_size_offsets,
2154 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2155 PCRE_ERROR_MATCHLIMIT, "match()");
2156
2157 count = check_match_limit(re, extra, bptr, len, start_offset,
2158 options|g_notempty, use_offsets, use_size_offsets,
2159 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2160 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2161 }
2162
2163 /* If callout_data is set, use the interface with additional data */
2164
2165 else if (callout_data_set)
2166 {
2167 if (extra == NULL)
2168 {
2169 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2170 extra->flags = 0;
2171 }
2172 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2173 extra->callout_data = &callout_data;
2174 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2175 options | g_notempty, use_offsets, use_size_offsets);
2176 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2177 }
2178
2179 /* The normal case is just to do the match once, with the default
2180 value of match_limit. */
2181
2182 #if !defined NODFA
2183 else if (all_use_dfa || use_dfa)
2184 {
2185 int workspace[1000];
2186 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2187 options | g_notempty, use_offsets, use_size_offsets, workspace,
2188 sizeof(workspace)/sizeof(int));
2189 if (count == 0)
2190 {
2191 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2192 count = use_size_offsets/2;
2193 }
2194 }
2195 #endif
2196
2197 else
2198 {
2199 count = pcre_exec(re, extra, (char *)bptr, len,
2200 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2201 if (count == 0)
2202 {
2203 fprintf(outfile, "Matched, but too many substrings\n");
2204 count = use_size_offsets/3;
2205 }
2206 }
2207
2208 /* Matched */
2209
2210 if (count >= 0)
2211 {
2212 int i, maxcount;
2213
2214 #if !defined NODFA
2215 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2216 #endif
2217 maxcount = use_size_offsets/3;
2218
2219 /* This is a check against a lunatic return value. */
2220
2221 if (count > maxcount)
2222 {
2223 fprintf(outfile,
2224 "** PCRE error: returned count %d is too big for offset size %d\n",
2225 count, use_size_offsets);
2226 count = use_size_offsets/3;
2227 if (do_g || do_G)
2228 {
2229 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2230 do_g = do_G = FALSE; /* Break g/G loop */
2231 }
2232 }
2233
2234 for (i = 0; i < count * 2; i += 2)
2235 {
2236 if (use_offsets[i] < 0)
2237 fprintf(outfile, "%2d: <unset>\n", i/2);
2238 else
2239 {
2240 fprintf(outfile, "%2d: ", i/2);
2241 (void)pchars(bptr + use_offsets[i],
2242 use_offsets[i+1] - use_offsets[i], outfile);
2243 fprintf(outfile, "\n");
2244 if (i == 0)
2245 {
2246 if (do_showrest)
2247 {
2248 fprintf(outfile, " 0+ ");
2249 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2250 outfile);
2251 fprintf(outfile, "\n");
2252 }
2253 }
2254 }
2255 }
2256
2257 for (i = 0; i < 32; i++)
2258 {
2259 if ((copystrings & (1 << i)) != 0)
2260 {
2261 char copybuffer[256];
2262 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2263 i, copybuffer, sizeof(copybuffer));
2264 if (rc < 0)
2265 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2266 else
2267 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2268 }
2269 }
2270
2271 for (copynamesptr = copynames;
2272 *copynamesptr != 0;
2273 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2274 {
2275 char copybuffer[256];
2276 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2277 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2278 if (rc < 0)
2279 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2280 else
2281 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2282 }
2283
2284 for (i = 0; i < 32; i++)
2285 {
2286 if ((getstrings & (1 << i)) != 0)
2287 {
2288 const char *substring;
2289 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2290 i, &substring);
2291 if (rc < 0)
2292 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2293 else
2294 {
2295 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2296 pcre_free_substring(substring);
2297 }
2298 }
2299 }
2300
2301 for (getnamesptr = getnames;
2302 *getnamesptr != 0;
2303 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2304 {
2305 const char *substring;
2306 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2307 count, (char *)getnamesptr, &substring);
2308 if (rc < 0)
2309 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2310 else
2311 {
2312 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2313 pcre_free_substring(substring);
2314 }
2315 }
2316
2317 if (getlist)
2318 {
2319 const char **stringlist;
2320 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2321 &stringlist);
2322 if (rc < 0)
2323 fprintf(outfile, "get substring list failed %d\n", rc);
2324 else
2325 {
2326 for (i = 0; i < count; i++)
2327 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2328 if (stringlist[i] != NULL)
2329 fprintf(outfile, "string list not terminated by NULL\n");
2330 /* free((void *)stringlist); */
2331 pcre_free_substring_list(stringlist);
2332 }
2333 }
2334 }
2335
2336 /* There was a partial match */
2337
2338 else if (count == PCRE_ERROR_PARTIAL)
2339 {
2340 fprintf(outfile, "Partial match");
2341 #if !defined NODFA
2342 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2343 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2344 bptr + use_offsets[0]);
2345 #endif
2346 fprintf(outfile, "\n");
2347 break; /* Out of the /g loop */
2348 }
2349
2350 /* Failed to match. If this is a /g or /G loop and we previously set
2351 g_notempty after a null match, this is not necessarily the end. We want
2352 to advance the start offset, and continue. We won't be at the end of the
2353 string - that was checked before setting g_notempty.
2354
2355 Complication arises in the case when the newline option is "any" or
2356 "anycrlf". If the previous match was at the end of a line terminated by
2357 CRLF, an advance of one character just passes the \r, whereas we should
2358 prefer the longer newline sequence, as does the code in pcre_exec().
2359 Fudge the offset value to achieve this.
2360
2361 Otherwise, in the case of UTF-8 matching, the advance must be one
2362 character, not one byte. */
2363
2364 else
2365 {
2366 if (g_notempty != 0)
2367 {
2368 int onechar = 1;
2369 unsigned int obits = ((real_pcre *)re)->options;
2370 use_offsets[0] = start_offset;
2371 if ((obits & PCRE_NEWLINE_BITS) == 0)
2372 {
2373 int d;
2374 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2375 obits = (d == '\r')? PCRE_NEWLINE_CR :
2376 (d == '\n')? PCRE_NEWLINE_LF :
2377 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2378 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2379 (d == -1)? PCRE_NEWLINE_ANY : 0;
2380 }
2381 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2382 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2383 &&
2384 start_offset < len - 1 &&
2385 bptr[start_offset] == '\r' &&
2386 bptr[start_offset+1] == '\n')
2387 onechar++;
2388 else if (use_utf8)
2389 {
2390 while (start_offset + onechar < len)
2391 {
2392 int tb = bptr[start_offset+onechar];
2393 if (tb <= 127) break;
2394 tb &= 0xc0;
2395 if (tb != 0 && tb != 0xc0) onechar++;
2396 }
2397 }
2398 use_offsets[1] = start_offset + onechar;
2399 }
2400 else
2401 {
2402 if (count == PCRE_ERROR_NOMATCH)
2403 {
2404 if (gmatched == 0) fprintf(outfile, "No match\n");
2405 }
2406 else fprintf(outfile, "Error %d\n", count);
2407 break; /* Out of the /g loop */
2408 }
2409 }
2410
2411 /* If not /g or /G we are done */
2412
2413 if (!do_g && !do_G) break;
2414
2415 /* If we have matched an empty string, first check to see if we are at
2416 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2417 what Perl's /g options does. This turns out to be rather cunning. First
2418 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2419 same point. If this fails (picked up above) we advance to the next
2420 character. */
2421
2422 g_notempty = 0;
2423
2424 if (use_offsets[0] == use_offsets[1])
2425 {
2426 if (use_offsets[0] == len) break;
2427 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2428 }
2429
2430 /* For /g, update the start offset, leaving the rest alone */
2431
2432 if (do_g) start_offset = use_offsets[1];
2433
2434 /* For /G, update the pointer and length */
2435
2436 else
2437 {
2438 bptr += use_offsets[1];
2439 len -= use_offsets[1];
2440 }
2441 } /* End of loop for /g and /G */
2442
2443 NEXT_DATA: continue;
2444 } /* End of loop for data lines */
2445
2446 CONTINUE:
2447
2448 #if !defined NOPOSIX
2449 if (posix || do_posix) regfree(&preg);
2450 #endif
2451
2452 if (re != NULL) new_free(re);
2453 if (extra != NULL) new_free(extra);
2454 if (tables != NULL)
2455 {
2456 new_free((void *)tables);
2457 setlocale(LC_CTYPE, "C");
2458 locale_set = 0;
2459 }
2460 }
2461
2462 if (infile == stdin) fprintf(outfile, "\n");
2463
2464 EXIT:
2465
2466 if (infile != NULL && infile != stdin) fclose(infile);
2467 if (outfile != NULL && outfile != stdout) fclose(outfile);
2468
2469 free(buffer);
2470 free(dbuffer);
2471 free(pbuffer);
2472 free(offsets);
2473
2474 return yield;
2475 }
2476
2477 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12