/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 351 - (show annotations) (download)
Fri Jul 4 18:27:16 2008 UTC (6 years, 4 months ago) by ph10
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 71614 byte(s)
Final tidies for new Unicode property code; upgrade to Unicode 5.1.0.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #define isatty _isatty /* This is what Windows calls them, I'm told */
75 #define fileno _fileno
76
77 #else
78 #include <sys/time.h> /* These two includes are needed */
79 #include <sys/resource.h> /* for setrlimit(). */
80 #define INPUT_MODE "rb"
81 #define OUTPUT_MODE "wb"
82 #endif
83
84
85 /* We have to include pcre_internal.h because we need the internal info for
86 displaying the results of pcre_study() and we also need to know about the
87 internal macros, structures, and other internal data values; pcretest has
88 "inside information" compared to a program that strictly follows the PCRE API.
89
90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92 appropriately for an application, not for building PCRE. */
93
94 #include "pcre.h"
95 #include "pcre_internal.h"
96
97 /* We need access to some of the data tables that PCRE uses. So as not to have
98 to keep two copies, we include the source file here, changing the names of the
99 external symbols to prevent clashes. */
100
101 #define _pcre_ucp_gentype ucp_gentype
102 #define _pcre_utf8_table1 utf8_table1
103 #define _pcre_utf8_table1_size utf8_table1_size
104 #define _pcre_utf8_table2 utf8_table2
105 #define _pcre_utf8_table3 utf8_table3
106 #define _pcre_utf8_table4 utf8_table4
107 #define _pcre_utt utt
108 #define _pcre_utt_size utt_size
109 #define _pcre_utt_names utt_names
110 #define _pcre_OP_lengths OP_lengths
111
112 #include "pcre_tables.c"
113
114 /* We also need the pcre_printint() function for printing out compiled
115 patterns. This function is in a separate file so that it can be included in
116 pcre_compile.c when that module is compiled with debugging enabled.
117
118 The definition of the macro PRINTABLE, which determines whether to print an
119 output character as-is or as a hex value when showing compiled patterns, is
120 contained in this file. We uses it here also, in cases when the locale has not
121 been explicitly changed, so as to get consistent output from systems that
122 differ in their output from isprint() even in the "C" locale. */
123
124 #include "pcre_printint.src"
125
126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127
128
129 /* It is possible to compile this test program without including support for
130 testing the POSIX interface, though this is not available via the standard
131 Makefile. */
132
133 #if !defined NOPOSIX
134 #include "pcreposix.h"
135 #endif
136
137 /* It is also possible, for the benefit of the version currently imported into
138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141 UTF8 support if PCRE is built without it. */
142
143 #ifndef SUPPORT_UTF8
144 #ifndef NOUTF8
145 #define NOUTF8
146 #endif
147 #endif
148
149
150 /* Other parameters */
151
152 #ifndef CLOCKS_PER_SEC
153 #ifdef CLK_TCK
154 #define CLOCKS_PER_SEC CLK_TCK
155 #else
156 #define CLOCKS_PER_SEC 100
157 #endif
158 #endif
159
160 /* This is the default loop count for timing. */
161
162 #define LOOPREPEAT 500000
163
164 /* Static variables */
165
166 static FILE *outfile;
167 static int log_store = 0;
168 static int callout_count;
169 static int callout_extra;
170 static int callout_fail_count;
171 static int callout_fail_id;
172 static int debug_lengths;
173 static int first_callout;
174 static int locale_set = 0;
175 static int show_malloc;
176 static int use_utf8;
177 static size_t gotten_store;
178
179 /* The buffers grow automatically if very long input lines are encountered. */
180
181 static int buffer_size = 50000;
182 static uschar *buffer = NULL;
183 static uschar *dbuffer = NULL;
184 static uschar *pbuffer = NULL;
185
186
187
188 /*************************************************
189 * Read or extend an input line *
190 *************************************************/
191
192 /* Input lines are read into buffer, but both patterns and data lines can be
193 continued over multiple input lines. In addition, if the buffer fills up, we
194 want to automatically expand it so as to be able to handle extremely large
195 lines that are needed for certain stress tests. When the input buffer is
196 expanded, the other two buffers must also be expanded likewise, and the
197 contents of pbuffer, which are a copy of the input for callouts, must be
198 preserved (for when expansion happens for a data line). This is not the most
199 optimal way of handling this, but hey, this is just a test program!
200
201 Arguments:
202 f the file to read
203 start where in buffer to start (this *must* be within buffer)
204 prompt for stdin or readline()
205
206 Returns: pointer to the start of new data
207 could be a copy of start, or could be moved
208 NULL if no data read and EOF reached
209 */
210
211 static uschar *
212 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 {
214 uschar *here = start;
215
216 for (;;)
217 {
218 int rlen = buffer_size - (here - buffer);
219
220 if (rlen > 1000)
221 {
222 int dlen;
223
224 /* If libreadline support is required, use readline() to read a line if the
225 input is a terminal. Note that readline() removes the trailing newline, so
226 we must put it back again, to be compatible with fgets(). */
227
228 #ifdef SUPPORT_LIBREADLINE
229 if (isatty(fileno(f)))
230 {
231 size_t len;
232 char *s = readline(prompt);
233 if (s == NULL) return (here == start)? NULL : start;
234 len = strlen(s);
235 if (len > 0) add_history(s);
236 if (len > rlen - 1) len = rlen - 1;
237 memcpy(here, s, len);
238 here[len] = '\n';
239 here[len+1] = 0;
240 free(s);
241 }
242 else
243 #endif
244
245 /* Read the next line by normal means, prompting if the file is stdin. */
246
247 {
248 if (f == stdin) printf(prompt);
249 if (fgets((char *)here, rlen, f) == NULL)
250 return (here == start)? NULL : start;
251 }
252
253 dlen = (int)strlen((char *)here);
254 if (dlen > 0 && here[dlen - 1] == '\n') return start;
255 here += dlen;
256 }
257
258 else
259 {
260 int new_buffer_size = 2*buffer_size;
261 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264
265 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266 {
267 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268 exit(1);
269 }
270
271 memcpy(new_buffer, buffer, buffer_size);
272 memcpy(new_pbuffer, pbuffer, buffer_size);
273
274 buffer_size = new_buffer_size;
275
276 start = new_buffer + (start - buffer);
277 here = new_buffer + (here - buffer);
278
279 free(buffer);
280 free(dbuffer);
281 free(pbuffer);
282
283 buffer = new_buffer;
284 dbuffer = new_dbuffer;
285 pbuffer = new_pbuffer;
286 }
287 }
288
289 return NULL; /* Control never gets here */
290 }
291
292
293
294
295
296
297
298 /*************************************************
299 * Read number from string *
300 *************************************************/
301
302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303 around with conditional compilation, just do the job by hand. It is only used
304 for unpicking arguments, so just keep it simple.
305
306 Arguments:
307 str string to be converted
308 endptr where to put the end pointer
309
310 Returns: the unsigned long
311 */
312
313 static int
314 get_value(unsigned char *str, unsigned char **endptr)
315 {
316 int result = 0;
317 while(*str != 0 && isspace(*str)) str++;
318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319 *endptr = str;
320 return(result);
321 }
322
323
324
325
326 /*************************************************
327 * Convert UTF-8 string to value *
328 *************************************************/
329
330 /* This function takes one or more bytes that represents a UTF-8 character,
331 and returns the value of the character.
332
333 Argument:
334 utf8bytes a pointer to the byte vector
335 vptr a pointer to an int to receive the value
336
337 Returns: > 0 => the number of bytes consumed
338 -6 to 0 => malformed UTF-8 character at offset = (-return)
339 */
340
341 #if !defined NOUTF8
342
343 static int
344 utf82ord(unsigned char *utf8bytes, int *vptr)
345 {
346 int c = *utf8bytes++;
347 int d = c;
348 int i, j, s;
349
350 for (i = -1; i < 6; i++) /* i is number of additional bytes */
351 {
352 if ((d & 0x80) == 0) break;
353 d <<= 1;
354 }
355
356 if (i == -1) { *vptr = c; return 1; } /* ascii character */
357 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358
359 /* i now has a value in the range 1-5 */
360
361 s = 6*i;
362 d = (c & utf8_table3[i]) << s;
363
364 for (j = 0; j < i; j++)
365 {
366 c = *utf8bytes++;
367 if ((c & 0xc0) != 0x80) return -(j+1);
368 s -= 6;
369 d |= (c & 0x3f) << s;
370 }
371
372 /* Check that encoding was the correct unique one */
373
374 for (j = 0; j < utf8_table1_size; j++)
375 if (d <= utf8_table1[j]) break;
376 if (j != i) return -(i+1);
377
378 /* Valid value */
379
380 *vptr = d;
381 return i+1;
382 }
383
384 #endif
385
386
387
388 /*************************************************
389 * Convert character value to UTF-8 *
390 *************************************************/
391
392 /* This function takes an integer value in the range 0 - 0x7fffffff
393 and encodes it as a UTF-8 character in 0 to 6 bytes.
394
395 Arguments:
396 cvalue the character value
397 utf8bytes pointer to buffer for result - at least 6 bytes long
398
399 Returns: number of characters placed in the buffer
400 */
401
402 #if !defined NOUTF8
403
404 static int
405 ord2utf8(int cvalue, uschar *utf8bytes)
406 {
407 register int i, j;
408 for (i = 0; i < utf8_table1_size; i++)
409 if (cvalue <= utf8_table1[i]) break;
410 utf8bytes += i;
411 for (j = i; j > 0; j--)
412 {
413 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 cvalue >>= 6;
415 }
416 *utf8bytes = utf8_table2[i] | cvalue;
417 return i + 1;
418 }
419
420 #endif
421
422
423
424 /*************************************************
425 * Print character string *
426 *************************************************/
427
428 /* Character string printing function. Must handle UTF-8 strings in utf8
429 mode. Yields number of characters printed. If handed a NULL file, just counts
430 chars without printing. */
431
432 static int pchars(unsigned char *p, int length, FILE *f)
433 {
434 int c = 0;
435 int yield = 0;
436
437 while (length-- > 0)
438 {
439 #if !defined NOUTF8
440 if (use_utf8)
441 {
442 int rc = utf82ord(p, &c);
443
444 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445 {
446 length -= rc - 1;
447 p += rc;
448 if (PRINTHEX(c))
449 {
450 if (f != NULL) fprintf(f, "%c", c);
451 yield++;
452 }
453 else
454 {
455 int n = 4;
456 if (f != NULL) fprintf(f, "\\x{%02x}", c);
457 yield += (n <= 0x000000ff)? 2 :
458 (n <= 0x00000fff)? 3 :
459 (n <= 0x0000ffff)? 4 :
460 (n <= 0x000fffff)? 5 : 6;
461 }
462 continue;
463 }
464 }
465 #endif
466
467 /* Not UTF-8, or malformed UTF-8 */
468
469 c = *p++;
470 if (PRINTHEX(c))
471 {
472 if (f != NULL) fprintf(f, "%c", c);
473 yield++;
474 }
475 else
476 {
477 if (f != NULL) fprintf(f, "\\x%02x", c);
478 yield += 4;
479 }
480 }
481
482 return yield;
483 }
484
485
486
487 /*************************************************
488 * Callout function *
489 *************************************************/
490
491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492 the match. Yield zero unless more callouts than the fail count, or the callout
493 data is not zero. */
494
495 static int callout(pcre_callout_block *cb)
496 {
497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 int i, pre_start, post_start, subject_length;
499
500 if (callout_extra)
501 {
502 fprintf(f, "Callout %d: last capture = %d\n",
503 cb->callout_number, cb->capture_last);
504
505 for (i = 0; i < cb->capture_top * 2; i += 2)
506 {
507 if (cb->offset_vector[i] < 0)
508 fprintf(f, "%2d: <unset>\n", i/2);
509 else
510 {
511 fprintf(f, "%2d: ", i/2);
512 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513 cb->offset_vector[i+1] - cb->offset_vector[i], f);
514 fprintf(f, "\n");
515 }
516 }
517 }
518
519 /* Re-print the subject in canonical form, the first time or if giving full
520 datails. On subsequent calls in the same match, we use pchars just to find the
521 printed lengths of the substrings. */
522
523 if (f != NULL) fprintf(f, "--->");
524
525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527 cb->current_position - cb->start_match, f);
528
529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530
531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532 cb->subject_length - cb->current_position, f);
533
534 if (f != NULL) fprintf(f, "\n");
535
536 /* Always print appropriate indicators, with callout number if not already
537 shown. For automatic callouts, show the pattern offset. */
538
539 if (cb->callout_number == 255)
540 {
541 fprintf(outfile, "%+3d ", cb->pattern_position);
542 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543 }
544 else
545 {
546 if (callout_extra) fprintf(outfile, " ");
547 else fprintf(outfile, "%3d ", cb->callout_number);
548 }
549
550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551 fprintf(outfile, "^");
552
553 if (post_start > 0)
554 {
555 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556 fprintf(outfile, "^");
557 }
558
559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560 fprintf(outfile, " ");
561
562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563 pbuffer + cb->pattern_position);
564
565 fprintf(outfile, "\n");
566 first_callout = 0;
567
568 if (cb->callout_data != NULL)
569 {
570 int callout_data = *((int *)(cb->callout_data));
571 if (callout_data != 0)
572 {
573 fprintf(outfile, "Callout data = %d\n", callout_data);
574 return callout_data;
575 }
576 }
577
578 return (cb->callout_number != callout_fail_id)? 0 :
579 (++callout_count >= callout_fail_count)? 1 : 0;
580 }
581
582
583 /*************************************************
584 * Local malloc functions *
585 *************************************************/
586
587 /* Alternative malloc function, to test functionality and show the size of the
588 compiled re. */
589
590 static void *new_malloc(size_t size)
591 {
592 void *block = malloc(size);
593 gotten_store = size;
594 if (show_malloc)
595 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 return block;
597 }
598
599 static void new_free(void *block)
600 {
601 if (show_malloc)
602 fprintf(outfile, "free %p\n", block);
603 free(block);
604 }
605
606
607 /* For recursion malloc/free, to test stacking calls */
608
609 static void *stack_malloc(size_t size)
610 {
611 void *block = malloc(size);
612 if (show_malloc)
613 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 return block;
615 }
616
617 static void stack_free(void *block)
618 {
619 if (show_malloc)
620 fprintf(outfile, "stack_free %p\n", block);
621 free(block);
622 }
623
624
625 /*************************************************
626 * Call pcre_fullinfo() *
627 *************************************************/
628
629 /* Get one piece of information from the pcre_fullinfo() function */
630
631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632 {
633 int rc;
634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636 }
637
638
639
640 /*************************************************
641 * Byte flipping function *
642 *************************************************/
643
644 static unsigned long int
645 byteflip(unsigned long int value, int n)
646 {
647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648 return ((value & 0x000000ff) << 24) |
649 ((value & 0x0000ff00) << 8) |
650 ((value & 0x00ff0000) >> 8) |
651 ((value & 0xff000000) >> 24);
652 }
653
654
655
656
657 /*************************************************
658 * Check match or recursion limit *
659 *************************************************/
660
661 static int
662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663 int start_offset, int options, int *use_offsets, int use_size_offsets,
664 int flag, unsigned long int *limit, int errnumber, const char *msg)
665 {
666 int count;
667 int min = 0;
668 int mid = 64;
669 int max = -1;
670
671 extra->flags |= flag;
672
673 for (;;)
674 {
675 *limit = mid;
676
677 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678 use_offsets, use_size_offsets);
679
680 if (count == errnumber)
681 {
682 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683 min = mid;
684 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685 }
686
687 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688 count == PCRE_ERROR_PARTIAL)
689 {
690 if (mid == min + 1)
691 {
692 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693 break;
694 }
695 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696 max = mid;
697 mid = (min + mid)/2;
698 }
699 else break; /* Some other error */
700 }
701
702 extra->flags &= ~flag;
703 return count;
704 }
705
706
707
708 /*************************************************
709 * Case-independent strncmp() function *
710 *************************************************/
711
712 /*
713 Arguments:
714 s first string
715 t second string
716 n number of characters to compare
717
718 Returns: < 0, = 0, or > 0, according to the comparison
719 */
720
721 static int
722 strncmpic(uschar *s, uschar *t, int n)
723 {
724 while (n--)
725 {
726 int c = tolower(*s++) - tolower(*t++);
727 if (c) return c;
728 }
729 return 0;
730 }
731
732
733
734 /*************************************************
735 * Check newline indicator *
736 *************************************************/
737
738 /* This is used both at compile and run-time to check for <xxx> escapes, where
739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740 no match.
741
742 Arguments:
743 p points after the leading '<'
744 f file for error message
745
746 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747 */
748
749 static int
750 check_newline(uschar *p, FILE *f)
751 {
752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 fprintf(f, "Unknown newline type at: <%s\n", p);
760 return 0;
761 }
762
763
764
765 /*************************************************
766 * Usage function *
767 *************************************************/
768
769 static void
770 usage(void)
771 {
772 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773 printf("Input and output default to stdin and stdout.\n");
774 #ifdef SUPPORT_LIBREADLINE
775 printf("If input is a terminal, readline() is used to read from it.\n");
776 #else
777 printf("This version of pcretest is not linked with readline().\n");
778 #endif
779 printf("\nOptions:\n");
780 printf(" -b show compiled code (bytecode)\n");
781 printf(" -C show PCRE compile-time options and exit\n");
782 printf(" -d debug: show compiled code and information (-b and -i)\n");
783 #if !defined NODFA
784 printf(" -dfa force DFA matching for all subjects\n");
785 #endif
786 printf(" -help show usage information\n");
787 printf(" -i show information about compiled patterns\n"
788 " -m output memory used information\n"
789 " -o <n> set size of offsets vector to <n>\n");
790 #if !defined NOPOSIX
791 printf(" -p use POSIX interface\n");
792 #endif
793 printf(" -q quiet: do not output PCRE version number at start\n");
794 printf(" -S <n> set stack size to <n> megabytes\n");
795 printf(" -s output store (memory) used information\n"
796 " -t time compilation and execution\n");
797 printf(" -t <n> time compilation and execution, repeating <n> times\n");
798 printf(" -tm time execution (matching) only\n");
799 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
800 }
801
802
803
804 /*************************************************
805 * Main Program *
806 *************************************************/
807
808 /* Read lines from named file or stdin and write to named file or stdout; lines
809 consist of a regular expression, in delimiters and optionally followed by
810 options, followed by a set of test data, terminated by an empty line. */
811
812 int main(int argc, char **argv)
813 {
814 FILE *infile = stdin;
815 int options = 0;
816 int study_options = 0;
817 int op = 1;
818 int timeit = 0;
819 int timeitm = 0;
820 int showinfo = 0;
821 int showstore = 0;
822 int quiet = 0;
823 int size_offsets = 45;
824 int size_offsets_max;
825 int *offsets = NULL;
826 #if !defined NOPOSIX
827 int posix = 0;
828 #endif
829 int debug = 0;
830 int done = 0;
831 int all_use_dfa = 0;
832 int yield = 0;
833 int stack_size;
834
835 /* These vectors store, end-to-end, a list of captured substring names. Assume
836 that 1024 is plenty long enough for the few names we'll be testing. */
837
838 uschar copynames[1024];
839 uschar getnames[1024];
840
841 uschar *copynamesptr;
842 uschar *getnamesptr;
843
844 /* Get buffers from malloc() so that Electric Fence will check their misuse
845 when I am debugging. They grow automatically when very long lines are read. */
846
847 buffer = (unsigned char *)malloc(buffer_size);
848 dbuffer = (unsigned char *)malloc(buffer_size);
849 pbuffer = (unsigned char *)malloc(buffer_size);
850
851 /* The outfile variable is static so that new_malloc can use it. */
852
853 outfile = stdout;
854
855 /* The following _setmode() stuff is some Windows magic that tells its runtime
856 library to translate CRLF into a single LF character. At least, that's what
857 I've been told: never having used Windows I take this all on trust. Originally
858 it set 0x8000, but then I was advised that _O_BINARY was better. */
859
860 #if defined(_WIN32) || defined(WIN32)
861 _setmode( _fileno( stdout ), _O_BINARY );
862 #endif
863
864 /* Scan options */
865
866 while (argc > 1 && argv[op][0] == '-')
867 {
868 unsigned char *endptr;
869
870 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
871 showstore = 1;
872 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
873 else if (strcmp(argv[op], "-b") == 0) debug = 1;
874 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
875 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
876 #if !defined NODFA
877 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
878 #endif
879 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
880 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
881 *endptr == 0))
882 {
883 op++;
884 argc--;
885 }
886 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
887 {
888 int both = argv[op][2] == 0;
889 int temp;
890 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891 *endptr == 0))
892 {
893 timeitm = temp;
894 op++;
895 argc--;
896 }
897 else timeitm = LOOPREPEAT;
898 if (both) timeit = timeitm;
899 }
900 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
901 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
902 *endptr == 0))
903 {
904 #if defined(_WIN32) || defined(WIN32)
905 printf("PCRE: -S not supported on this OS\n");
906 exit(1);
907 #else
908 int rc;
909 struct rlimit rlim;
910 getrlimit(RLIMIT_STACK, &rlim);
911 rlim.rlim_cur = stack_size * 1024 * 1024;
912 rc = setrlimit(RLIMIT_STACK, &rlim);
913 if (rc != 0)
914 {
915 printf("PCRE: setrlimit() failed with error %d\n", rc);
916 exit(1);
917 }
918 op++;
919 argc--;
920 #endif
921 }
922 #if !defined NOPOSIX
923 else if (strcmp(argv[op], "-p") == 0) posix = 1;
924 #endif
925 else if (strcmp(argv[op], "-C") == 0)
926 {
927 int rc;
928 printf("PCRE version %s\n", pcre_version());
929 printf("Compiled with\n");
930 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
931 printf(" %sUTF-8 support\n", rc? "" : "No ");
932 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
933 printf(" %sUnicode properties support\n", rc? "" : "No ");
934 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
935 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
936 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
937 (rc == -2)? "ANYCRLF" :
938 (rc == -1)? "ANY" : "???");
939 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
940 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
941 "all Unicode newlines");
942 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
943 printf(" Internal link size = %d\n", rc);
944 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
945 printf(" POSIX malloc threshold = %d\n", rc);
946 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
947 printf(" Default match limit = %d\n", rc);
948 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
949 printf(" Default recursion depth limit = %d\n", rc);
950 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
951 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
952 goto EXIT;
953 }
954 else if (strcmp(argv[op], "-help") == 0 ||
955 strcmp(argv[op], "--help") == 0)
956 {
957 usage();
958 goto EXIT;
959 }
960 else
961 {
962 printf("** Unknown or malformed option %s\n", argv[op]);
963 usage();
964 yield = 1;
965 goto EXIT;
966 }
967 op++;
968 argc--;
969 }
970
971 /* Get the store for the offsets vector, and remember what it was */
972
973 size_offsets_max = size_offsets;
974 offsets = (int *)malloc(size_offsets_max * sizeof(int));
975 if (offsets == NULL)
976 {
977 printf("** Failed to get %d bytes of memory for offsets vector\n",
978 (int)(size_offsets_max * sizeof(int)));
979 yield = 1;
980 goto EXIT;
981 }
982
983 /* Sort out the input and output files */
984
985 if (argc > 1)
986 {
987 infile = fopen(argv[op], INPUT_MODE);
988 if (infile == NULL)
989 {
990 printf("** Failed to open %s\n", argv[op]);
991 yield = 1;
992 goto EXIT;
993 }
994 }
995
996 if (argc > 2)
997 {
998 outfile = fopen(argv[op+1], OUTPUT_MODE);
999 if (outfile == NULL)
1000 {
1001 printf("** Failed to open %s\n", argv[op+1]);
1002 yield = 1;
1003 goto EXIT;
1004 }
1005 }
1006
1007 /* Set alternative malloc function */
1008
1009 pcre_malloc = new_malloc;
1010 pcre_free = new_free;
1011 pcre_stack_malloc = stack_malloc;
1012 pcre_stack_free = stack_free;
1013
1014 /* Heading line unless quiet, then prompt for first regex if stdin */
1015
1016 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1017
1018 /* Main loop */
1019
1020 while (!done)
1021 {
1022 pcre *re = NULL;
1023 pcre_extra *extra = NULL;
1024
1025 #if !defined NOPOSIX /* There are still compilers that require no indent */
1026 regex_t preg;
1027 int do_posix = 0;
1028 #endif
1029
1030 const char *error;
1031 unsigned char *p, *pp, *ppp;
1032 unsigned char *to_file = NULL;
1033 const unsigned char *tables = NULL;
1034 unsigned long int true_size, true_study_size = 0;
1035 size_t size, regex_gotten_store;
1036 int do_study = 0;
1037 int do_debug = debug;
1038 int do_G = 0;
1039 int do_g = 0;
1040 int do_showinfo = showinfo;
1041 int do_showrest = 0;
1042 int do_flip = 0;
1043 int erroroffset, len, delimiter, poffset;
1044
1045 use_utf8 = 0;
1046 debug_lengths = 1;
1047
1048 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1049 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1050 fflush(outfile);
1051
1052 p = buffer;
1053 while (isspace(*p)) p++;
1054 if (*p == 0) continue;
1055
1056 /* See if the pattern is to be loaded pre-compiled from a file. */
1057
1058 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1059 {
1060 unsigned long int magic, get_options;
1061 uschar sbuf[8];
1062 FILE *f;
1063
1064 p++;
1065 pp = p + (int)strlen((char *)p);
1066 while (isspace(pp[-1])) pp--;
1067 *pp = 0;
1068
1069 f = fopen((char *)p, "rb");
1070 if (f == NULL)
1071 {
1072 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1073 continue;
1074 }
1075
1076 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1077
1078 true_size =
1079 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1080 true_study_size =
1081 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1082
1083 re = (real_pcre *)new_malloc(true_size);
1084 regex_gotten_store = gotten_store;
1085
1086 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1087
1088 magic = ((real_pcre *)re)->magic_number;
1089 if (magic != MAGIC_NUMBER)
1090 {
1091 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1092 {
1093 do_flip = 1;
1094 }
1095 else
1096 {
1097 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1098 fclose(f);
1099 continue;
1100 }
1101 }
1102
1103 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1104 do_flip? " (byte-inverted)" : "", p);
1105
1106 /* Need to know if UTF-8 for printing data strings */
1107
1108 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1109 use_utf8 = (get_options & PCRE_UTF8) != 0;
1110
1111 /* Now see if there is any following study data */
1112
1113 if (true_study_size != 0)
1114 {
1115 pcre_study_data *psd;
1116
1117 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1118 extra->flags = PCRE_EXTRA_STUDY_DATA;
1119
1120 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1121 extra->study_data = psd;
1122
1123 if (fread(psd, 1, true_study_size, f) != true_study_size)
1124 {
1125 FAIL_READ:
1126 fprintf(outfile, "Failed to read data from %s\n", p);
1127 if (extra != NULL) new_free(extra);
1128 if (re != NULL) new_free(re);
1129 fclose(f);
1130 continue;
1131 }
1132 fprintf(outfile, "Study data loaded from %s\n", p);
1133 do_study = 1; /* To get the data output if requested */
1134 }
1135 else fprintf(outfile, "No study data\n");
1136
1137 fclose(f);
1138 goto SHOW_INFO;
1139 }
1140
1141 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1142 the pattern; if is isn't complete, read more. */
1143
1144 delimiter = *p++;
1145
1146 if (isalnum(delimiter) || delimiter == '\\')
1147 {
1148 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1149 goto SKIP_DATA;
1150 }
1151
1152 pp = p;
1153 poffset = p - buffer;
1154
1155 for(;;)
1156 {
1157 while (*pp != 0)
1158 {
1159 if (*pp == '\\' && pp[1] != 0) pp++;
1160 else if (*pp == delimiter) break;
1161 pp++;
1162 }
1163 if (*pp != 0) break;
1164 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1165 {
1166 fprintf(outfile, "** Unexpected EOF\n");
1167 done = 1;
1168 goto CONTINUE;
1169 }
1170 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1171 }
1172
1173 /* The buffer may have moved while being extended; reset the start of data
1174 pointer to the correct relative point in the buffer. */
1175
1176 p = buffer + poffset;
1177
1178 /* If the first character after the delimiter is backslash, make
1179 the pattern end with backslash. This is purely to provide a way
1180 of testing for the error message when a pattern ends with backslash. */
1181
1182 if (pp[1] == '\\') *pp++ = '\\';
1183
1184 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1185 for callouts. */
1186
1187 *pp++ = 0;
1188 strcpy((char *)pbuffer, (char *)p);
1189
1190 /* Look for options after final delimiter */
1191
1192 options = 0;
1193 study_options = 0;
1194 log_store = showstore; /* default from command line */
1195
1196 while (*pp != 0)
1197 {
1198 switch (*pp++)
1199 {
1200 case 'f': options |= PCRE_FIRSTLINE; break;
1201 case 'g': do_g = 1; break;
1202 case 'i': options |= PCRE_CASELESS; break;
1203 case 'm': options |= PCRE_MULTILINE; break;
1204 case 's': options |= PCRE_DOTALL; break;
1205 case 'x': options |= PCRE_EXTENDED; break;
1206
1207 case '+': do_showrest = 1; break;
1208 case 'A': options |= PCRE_ANCHORED; break;
1209 case 'B': do_debug = 1; break;
1210 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1211 case 'D': do_debug = do_showinfo = 1; break;
1212 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1213 case 'F': do_flip = 1; break;
1214 case 'G': do_G = 1; break;
1215 case 'I': do_showinfo = 1; break;
1216 case 'J': options |= PCRE_DUPNAMES; break;
1217 case 'M': log_store = 1; break;
1218 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1219
1220 #if !defined NOPOSIX
1221 case 'P': do_posix = 1; break;
1222 #endif
1223
1224 case 'S': do_study = 1; break;
1225 case 'U': options |= PCRE_UNGREEDY; break;
1226 case 'X': options |= PCRE_EXTRA; break;
1227 case 'Z': debug_lengths = 0; break;
1228 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1229 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1230
1231 case 'L':
1232 ppp = pp;
1233 /* The '\r' test here is so that it works on Windows. */
1234 /* The '0' test is just in case this is an unterminated line. */
1235 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1236 *ppp = 0;
1237 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1238 {
1239 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1240 goto SKIP_DATA;
1241 }
1242 locale_set = 1;
1243 tables = pcre_maketables();
1244 pp = ppp;
1245 break;
1246
1247 case '>':
1248 to_file = pp;
1249 while (*pp != 0) pp++;
1250 while (isspace(pp[-1])) pp--;
1251 *pp = 0;
1252 break;
1253
1254 case '<':
1255 {
1256 if (strncmp((char *)pp, "JS>", 3) == 0)
1257 {
1258 options |= PCRE_JAVASCRIPT_COMPAT;
1259 pp += 3;
1260 }
1261 else
1262 {
1263 int x = check_newline(pp, outfile);
1264 if (x == 0) goto SKIP_DATA;
1265 options |= x;
1266 while (*pp++ != '>');
1267 }
1268 }
1269 break;
1270
1271 case '\r': /* So that it works in Windows */
1272 case '\n':
1273 case ' ':
1274 break;
1275
1276 default:
1277 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1278 goto SKIP_DATA;
1279 }
1280 }
1281
1282 /* Handle compiling via the POSIX interface, which doesn't support the
1283 timing, showing, or debugging options, nor the ability to pass over
1284 local character tables. */
1285
1286 #if !defined NOPOSIX
1287 if (posix || do_posix)
1288 {
1289 int rc;
1290 int cflags = 0;
1291
1292 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1293 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1294 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1295 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1296 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1297
1298 rc = regcomp(&preg, (char *)p, cflags);
1299
1300 /* Compilation failed; go back for another re, skipping to blank line
1301 if non-interactive. */
1302
1303 if (rc != 0)
1304 {
1305 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1306 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1307 goto SKIP_DATA;
1308 }
1309 }
1310
1311 /* Handle compiling via the native interface */
1312
1313 else
1314 #endif /* !defined NOPOSIX */
1315
1316 {
1317 if (timeit > 0)
1318 {
1319 register int i;
1320 clock_t time_taken;
1321 clock_t start_time = clock();
1322 for (i = 0; i < timeit; i++)
1323 {
1324 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1325 if (re != NULL) free(re);
1326 }
1327 time_taken = clock() - start_time;
1328 fprintf(outfile, "Compile time %.4f milliseconds\n",
1329 (((double)time_taken * 1000.0) / (double)timeit) /
1330 (double)CLOCKS_PER_SEC);
1331 }
1332
1333 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1334
1335 /* Compilation failed; go back for another re, skipping to blank line
1336 if non-interactive. */
1337
1338 if (re == NULL)
1339 {
1340 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1341 SKIP_DATA:
1342 if (infile != stdin)
1343 {
1344 for (;;)
1345 {
1346 if (extend_inputline(infile, buffer, NULL) == NULL)
1347 {
1348 done = 1;
1349 goto CONTINUE;
1350 }
1351 len = (int)strlen((char *)buffer);
1352 while (len > 0 && isspace(buffer[len-1])) len--;
1353 if (len == 0) break;
1354 }
1355 fprintf(outfile, "\n");
1356 }
1357 goto CONTINUE;
1358 }
1359
1360 /* Compilation succeeded; print data if required. There are now two
1361 info-returning functions. The old one has a limited interface and
1362 returns only limited data. Check that it agrees with the newer one. */
1363
1364 if (log_store)
1365 fprintf(outfile, "Memory allocation (code space): %d\n",
1366 (int)(gotten_store -
1367 sizeof(real_pcre) -
1368 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1369
1370 /* Extract the size for possible writing before possibly flipping it,
1371 and remember the store that was got. */
1372
1373 true_size = ((real_pcre *)re)->size;
1374 regex_gotten_store = gotten_store;
1375
1376 /* If /S was present, study the regexp to generate additional info to
1377 help with the matching. */
1378
1379 if (do_study)
1380 {
1381 if (timeit > 0)
1382 {
1383 register int i;
1384 clock_t time_taken;
1385 clock_t start_time = clock();
1386 for (i = 0; i < timeit; i++)
1387 extra = pcre_study(re, study_options, &error);
1388 time_taken = clock() - start_time;
1389 if (extra != NULL) free(extra);
1390 fprintf(outfile, " Study time %.4f milliseconds\n",
1391 (((double)time_taken * 1000.0) / (double)timeit) /
1392 (double)CLOCKS_PER_SEC);
1393 }
1394 extra = pcre_study(re, study_options, &error);
1395 if (error != NULL)
1396 fprintf(outfile, "Failed to study: %s\n", error);
1397 else if (extra != NULL)
1398 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1399 }
1400
1401 /* If the 'F' option was present, we flip the bytes of all the integer
1402 fields in the regex data block and the study block. This is to make it
1403 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1404 compiled on a different architecture. */
1405
1406 if (do_flip)
1407 {
1408 real_pcre *rre = (real_pcre *)re;
1409 rre->magic_number =
1410 byteflip(rre->magic_number, sizeof(rre->magic_number));
1411 rre->size = byteflip(rre->size, sizeof(rre->size));
1412 rre->options = byteflip(rre->options, sizeof(rre->options));
1413 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1414 rre->top_bracket =
1415 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1416 rre->top_backref =
1417 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1418 rre->first_byte =
1419 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1420 rre->req_byte =
1421 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1422 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1423 sizeof(rre->name_table_offset));
1424 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1425 sizeof(rre->name_entry_size));
1426 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1427 sizeof(rre->name_count));
1428
1429 if (extra != NULL)
1430 {
1431 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1432 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1433 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1434 }
1435 }
1436
1437 /* Extract information from the compiled data if required */
1438
1439 SHOW_INFO:
1440
1441 if (do_debug)
1442 {
1443 fprintf(outfile, "------------------------------------------------------------------\n");
1444 pcre_printint(re, outfile, debug_lengths);
1445 }
1446
1447 if (do_showinfo)
1448 {
1449 unsigned long int get_options, all_options;
1450 #if !defined NOINFOCHECK
1451 int old_first_char, old_options, old_count;
1452 #endif
1453 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1454 hascrorlf;
1455 int nameentrysize, namecount;
1456 const uschar *nametable;
1457
1458 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1459 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1460 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1461 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1462 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1463 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1464 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1465 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1466 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1467 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1468 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1469 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1470
1471 #if !defined NOINFOCHECK
1472 old_count = pcre_info(re, &old_options, &old_first_char);
1473 if (count < 0) fprintf(outfile,
1474 "Error %d from pcre_info()\n", count);
1475 else
1476 {
1477 if (old_count != count) fprintf(outfile,
1478 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1479 old_count);
1480
1481 if (old_first_char != first_char) fprintf(outfile,
1482 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1483 first_char, old_first_char);
1484
1485 if (old_options != (int)get_options) fprintf(outfile,
1486 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1487 get_options, old_options);
1488 }
1489 #endif
1490
1491 if (size != regex_gotten_store) fprintf(outfile,
1492 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1493 (int)size, (int)regex_gotten_store);
1494
1495 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1496 if (backrefmax > 0)
1497 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1498
1499 if (namecount > 0)
1500 {
1501 fprintf(outfile, "Named capturing subpatterns:\n");
1502 while (namecount-- > 0)
1503 {
1504 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1505 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1506 GET2(nametable, 0));
1507 nametable += nameentrysize;
1508 }
1509 }
1510
1511 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1512 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1513
1514 all_options = ((real_pcre *)re)->options;
1515 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1516
1517 if (get_options == 0) fprintf(outfile, "No options\n");
1518 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1519 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1520 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1521 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1522 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1523 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1524 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1525 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1526 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1527 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1528 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1529 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1530 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1531 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1532 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1533 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1534
1535 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1536
1537 switch (get_options & PCRE_NEWLINE_BITS)
1538 {
1539 case PCRE_NEWLINE_CR:
1540 fprintf(outfile, "Forced newline sequence: CR\n");
1541 break;
1542
1543 case PCRE_NEWLINE_LF:
1544 fprintf(outfile, "Forced newline sequence: LF\n");
1545 break;
1546
1547 case PCRE_NEWLINE_CRLF:
1548 fprintf(outfile, "Forced newline sequence: CRLF\n");
1549 break;
1550
1551 case PCRE_NEWLINE_ANYCRLF:
1552 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1553 break;
1554
1555 case PCRE_NEWLINE_ANY:
1556 fprintf(outfile, "Forced newline sequence: ANY\n");
1557 break;
1558
1559 default:
1560 break;
1561 }
1562
1563 if (first_char == -1)
1564 {
1565 fprintf(outfile, "First char at start or follows newline\n");
1566 }
1567 else if (first_char < 0)
1568 {
1569 fprintf(outfile, "No first char\n");
1570 }
1571 else
1572 {
1573 int ch = first_char & 255;
1574 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1575 "" : " (caseless)";
1576 if (PRINTHEX(ch))
1577 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1578 else
1579 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1580 }
1581
1582 if (need_char < 0)
1583 {
1584 fprintf(outfile, "No need char\n");
1585 }
1586 else
1587 {
1588 int ch = need_char & 255;
1589 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1590 "" : " (caseless)";
1591 if (PRINTHEX(ch))
1592 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1593 else
1594 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1595 }
1596
1597 /* Don't output study size; at present it is in any case a fixed
1598 value, but it varies, depending on the computer architecture, and
1599 so messes up the test suite. (And with the /F option, it might be
1600 flipped.) */
1601
1602 if (do_study)
1603 {
1604 if (extra == NULL)
1605 fprintf(outfile, "Study returned NULL\n");
1606 else
1607 {
1608 uschar *start_bits = NULL;
1609 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1610
1611 if (start_bits == NULL)
1612 fprintf(outfile, "No starting byte set\n");
1613 else
1614 {
1615 int i;
1616 int c = 24;
1617 fprintf(outfile, "Starting byte set: ");
1618 for (i = 0; i < 256; i++)
1619 {
1620 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1621 {
1622 if (c > 75)
1623 {
1624 fprintf(outfile, "\n ");
1625 c = 2;
1626 }
1627 if (PRINTHEX(i) && i != ' ')
1628 {
1629 fprintf(outfile, "%c ", i);
1630 c += 2;
1631 }
1632 else
1633 {
1634 fprintf(outfile, "\\x%02x ", i);
1635 c += 5;
1636 }
1637 }
1638 }
1639 fprintf(outfile, "\n");
1640 }
1641 }
1642 }
1643 }
1644
1645 /* If the '>' option was present, we write out the regex to a file, and
1646 that is all. The first 8 bytes of the file are the regex length and then
1647 the study length, in big-endian order. */
1648
1649 if (to_file != NULL)
1650 {
1651 FILE *f = fopen((char *)to_file, "wb");
1652 if (f == NULL)
1653 {
1654 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1655 }
1656 else
1657 {
1658 uschar sbuf[8];
1659 sbuf[0] = (uschar)((true_size >> 24) & 255);
1660 sbuf[1] = (uschar)((true_size >> 16) & 255);
1661 sbuf[2] = (uschar)((true_size >> 8) & 255);
1662 sbuf[3] = (uschar)((true_size) & 255);
1663
1664 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1665 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1666 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1667 sbuf[7] = (uschar)((true_study_size) & 255);
1668
1669 if (fwrite(sbuf, 1, 8, f) < 8 ||
1670 fwrite(re, 1, true_size, f) < true_size)
1671 {
1672 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1673 }
1674 else
1675 {
1676 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1677 if (extra != NULL)
1678 {
1679 if (fwrite(extra->study_data, 1, true_study_size, f) <
1680 true_study_size)
1681 {
1682 fprintf(outfile, "Write error on %s: %s\n", to_file,
1683 strerror(errno));
1684 }
1685 else fprintf(outfile, "Study data written to %s\n", to_file);
1686
1687 }
1688 }
1689 fclose(f);
1690 }
1691
1692 new_free(re);
1693 if (extra != NULL) new_free(extra);
1694 if (tables != NULL) new_free((void *)tables);
1695 continue; /* With next regex */
1696 }
1697 } /* End of non-POSIX compile */
1698
1699 /* Read data lines and test them */
1700
1701 for (;;)
1702 {
1703 uschar *q;
1704 uschar *bptr;
1705 int *use_offsets = offsets;
1706 int use_size_offsets = size_offsets;
1707 int callout_data = 0;
1708 int callout_data_set = 0;
1709 int count, c;
1710 int copystrings = 0;
1711 int find_match_limit = 0;
1712 int getstrings = 0;
1713 int getlist = 0;
1714 int gmatched = 0;
1715 int start_offset = 0;
1716 int g_notempty = 0;
1717 int use_dfa = 0;
1718
1719 options = 0;
1720
1721 *copynames = 0;
1722 *getnames = 0;
1723
1724 copynamesptr = copynames;
1725 getnamesptr = getnames;
1726
1727 pcre_callout = callout;
1728 first_callout = 1;
1729 callout_extra = 0;
1730 callout_count = 0;
1731 callout_fail_count = 999999;
1732 callout_fail_id = -1;
1733 show_malloc = 0;
1734
1735 if (extra != NULL) extra->flags &=
1736 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1737
1738 len = 0;
1739 for (;;)
1740 {
1741 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1742 {
1743 if (len > 0) break;
1744 done = 1;
1745 goto CONTINUE;
1746 }
1747 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1748 len = (int)strlen((char *)buffer);
1749 if (buffer[len-1] == '\n') break;
1750 }
1751
1752 while (len > 0 && isspace(buffer[len-1])) len--;
1753 buffer[len] = 0;
1754 if (len == 0) break;
1755
1756 p = buffer;
1757 while (isspace(*p)) p++;
1758
1759 bptr = q = dbuffer;
1760 while ((c = *p++) != 0)
1761 {
1762 int i = 0;
1763 int n = 0;
1764
1765 if (c == '\\') switch ((c = *p++))
1766 {
1767 case 'a': c = 7; break;
1768 case 'b': c = '\b'; break;
1769 case 'e': c = 27; break;
1770 case 'f': c = '\f'; break;
1771 case 'n': c = '\n'; break;
1772 case 'r': c = '\r'; break;
1773 case 't': c = '\t'; break;
1774 case 'v': c = '\v'; break;
1775
1776 case '0': case '1': case '2': case '3':
1777 case '4': case '5': case '6': case '7':
1778 c -= '0';
1779 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1780 c = c * 8 + *p++ - '0';
1781
1782 #if !defined NOUTF8
1783 if (use_utf8 && c > 255)
1784 {
1785 unsigned char buff8[8];
1786 int ii, utn;
1787 utn = ord2utf8(c, buff8);
1788 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1789 c = buff8[ii]; /* Last byte */
1790 }
1791 #endif
1792 break;
1793
1794 case 'x':
1795
1796 /* Handle \x{..} specially - new Perl thing for utf8 */
1797
1798 #if !defined NOUTF8
1799 if (*p == '{')
1800 {
1801 unsigned char *pt = p;
1802 c = 0;
1803 while (isxdigit(*(++pt)))
1804 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1805 if (*pt == '}')
1806 {
1807 unsigned char buff8[8];
1808 int ii, utn;
1809 utn = ord2utf8(c, buff8);
1810 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1811 c = buff8[ii]; /* Last byte */
1812 p = pt + 1;
1813 break;
1814 }
1815 /* Not correct form; fall through */
1816 }
1817 #endif
1818
1819 /* Ordinary \x */
1820
1821 c = 0;
1822 while (i++ < 2 && isxdigit(*p))
1823 {
1824 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1825 p++;
1826 }
1827 break;
1828
1829 case 0: /* \ followed by EOF allows for an empty line */
1830 p--;
1831 continue;
1832
1833 case '>':
1834 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1835 continue;
1836
1837 case 'A': /* Option setting */
1838 options |= PCRE_ANCHORED;
1839 continue;
1840
1841 case 'B':
1842 options |= PCRE_NOTBOL;
1843 continue;
1844
1845 case 'C':
1846 if (isdigit(*p)) /* Set copy string */
1847 {
1848 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1849 copystrings |= 1 << n;
1850 }
1851 else if (isalnum(*p))
1852 {
1853 uschar *npp = copynamesptr;
1854 while (isalnum(*p)) *npp++ = *p++;
1855 *npp++ = 0;
1856 *npp = 0;
1857 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1858 if (n < 0)
1859 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1860 copynamesptr = npp;
1861 }
1862 else if (*p == '+')
1863 {
1864 callout_extra = 1;
1865 p++;
1866 }
1867 else if (*p == '-')
1868 {
1869 pcre_callout = NULL;
1870 p++;
1871 }
1872 else if (*p == '!')
1873 {
1874 callout_fail_id = 0;
1875 p++;
1876 while(isdigit(*p))
1877 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1878 callout_fail_count = 0;
1879 if (*p == '!')
1880 {
1881 p++;
1882 while(isdigit(*p))
1883 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1884 }
1885 }
1886 else if (*p == '*')
1887 {
1888 int sign = 1;
1889 callout_data = 0;
1890 if (*(++p) == '-') { sign = -1; p++; }
1891 while(isdigit(*p))
1892 callout_data = callout_data * 10 + *p++ - '0';
1893 callout_data *= sign;
1894 callout_data_set = 1;
1895 }
1896 continue;
1897
1898 #if !defined NODFA
1899 case 'D':
1900 #if !defined NOPOSIX
1901 if (posix || do_posix)
1902 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1903 else
1904 #endif
1905 use_dfa = 1;
1906 continue;
1907
1908 case 'F':
1909 options |= PCRE_DFA_SHORTEST;
1910 continue;
1911 #endif
1912
1913 case 'G':
1914 if (isdigit(*p))
1915 {
1916 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1917 getstrings |= 1 << n;
1918 }
1919 else if (isalnum(*p))
1920 {
1921 uschar *npp = getnamesptr;
1922 while (isalnum(*p)) *npp++ = *p++;
1923 *npp++ = 0;
1924 *npp = 0;
1925 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1926 if (n < 0)
1927 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1928 getnamesptr = npp;
1929 }
1930 continue;
1931
1932 case 'L':
1933 getlist = 1;
1934 continue;
1935
1936 case 'M':
1937 find_match_limit = 1;
1938 continue;
1939
1940 case 'N':
1941 options |= PCRE_NOTEMPTY;
1942 continue;
1943
1944 case 'O':
1945 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1946 if (n > size_offsets_max)
1947 {
1948 size_offsets_max = n;
1949 free(offsets);
1950 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1951 if (offsets == NULL)
1952 {
1953 printf("** Failed to get %d bytes of memory for offsets vector\n",
1954 (int)(size_offsets_max * sizeof(int)));
1955 yield = 1;
1956 goto EXIT;
1957 }
1958 }
1959 use_size_offsets = n;
1960 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1961 continue;
1962
1963 case 'P':
1964 options |= PCRE_PARTIAL;
1965 continue;
1966
1967 case 'Q':
1968 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1969 if (extra == NULL)
1970 {
1971 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1972 extra->flags = 0;
1973 }
1974 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1975 extra->match_limit_recursion = n;
1976 continue;
1977
1978 case 'q':
1979 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1980 if (extra == NULL)
1981 {
1982 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1983 extra->flags = 0;
1984 }
1985 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1986 extra->match_limit = n;
1987 continue;
1988
1989 #if !defined NODFA
1990 case 'R':
1991 options |= PCRE_DFA_RESTART;
1992 continue;
1993 #endif
1994
1995 case 'S':
1996 show_malloc = 1;
1997 continue;
1998
1999 case 'Z':
2000 options |= PCRE_NOTEOL;
2001 continue;
2002
2003 case '?':
2004 options |= PCRE_NO_UTF8_CHECK;
2005 continue;
2006
2007 case '<':
2008 {
2009 int x = check_newline(p, outfile);
2010 if (x == 0) goto NEXT_DATA;
2011 options |= x;
2012 while (*p++ != '>');
2013 }
2014 continue;
2015 }
2016 *q++ = c;
2017 }
2018 *q = 0;
2019 len = q - dbuffer;
2020
2021 if ((all_use_dfa || use_dfa) && find_match_limit)
2022 {
2023 printf("**Match limit not relevant for DFA matching: ignored\n");
2024 find_match_limit = 0;
2025 }
2026
2027 /* Handle matching via the POSIX interface, which does not
2028 support timing or playing with the match limit or callout data. */
2029
2030 #if !defined NOPOSIX
2031 if (posix || do_posix)
2032 {
2033 int rc;
2034 int eflags = 0;
2035 regmatch_t *pmatch = NULL;
2036 if (use_size_offsets > 0)
2037 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2038 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2039 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2040
2041 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2042
2043 if (rc != 0)
2044 {
2045 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2046 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2047 }
2048 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2049 != 0)
2050 {
2051 fprintf(outfile, "Matched with REG_NOSUB\n");
2052 }
2053 else
2054 {
2055 size_t i;
2056 for (i = 0; i < (size_t)use_size_offsets; i++)
2057 {
2058 if (pmatch[i].rm_so >= 0)
2059 {
2060 fprintf(outfile, "%2d: ", (int)i);
2061 (void)pchars(dbuffer + pmatch[i].rm_so,
2062 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2063 fprintf(outfile, "\n");
2064 if (i == 0 && do_showrest)
2065 {
2066 fprintf(outfile, " 0+ ");
2067 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2068 outfile);
2069 fprintf(outfile, "\n");
2070 }
2071 }
2072 }
2073 }
2074 free(pmatch);
2075 }
2076
2077 /* Handle matching via the native interface - repeats for /g and /G */
2078
2079 else
2080 #endif /* !defined NOPOSIX */
2081
2082 for (;; gmatched++) /* Loop for /g or /G */
2083 {
2084 if (timeitm > 0)
2085 {
2086 register int i;
2087 clock_t time_taken;
2088 clock_t start_time = clock();
2089
2090 #if !defined NODFA
2091 if (all_use_dfa || use_dfa)
2092 {
2093 int workspace[1000];
2094 for (i = 0; i < timeitm; i++)
2095 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2096 options | g_notempty, use_offsets, use_size_offsets, workspace,
2097 sizeof(workspace)/sizeof(int));
2098 }
2099 else
2100 #endif
2101
2102 for (i = 0; i < timeitm; i++)
2103 count = pcre_exec(re, extra, (char *)bptr, len,
2104 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2105
2106 time_taken = clock() - start_time;
2107 fprintf(outfile, "Execute time %.4f milliseconds\n",
2108 (((double)time_taken * 1000.0) / (double)timeitm) /
2109 (double)CLOCKS_PER_SEC);
2110 }
2111
2112 /* If find_match_limit is set, we want to do repeated matches with
2113 varying limits in order to find the minimum value for the match limit and
2114 for the recursion limit. */
2115
2116 if (find_match_limit)
2117 {
2118 if (extra == NULL)
2119 {
2120 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2121 extra->flags = 0;
2122 }
2123
2124 (void)check_match_limit(re, extra, bptr, len, start_offset,
2125 options|g_notempty, use_offsets, use_size_offsets,
2126 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2127 PCRE_ERROR_MATCHLIMIT, "match()");
2128
2129 count = check_match_limit(re, extra, bptr, len, start_offset,
2130 options|g_notempty, use_offsets, use_size_offsets,
2131 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2132 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2133 }
2134
2135 /* If callout_data is set, use the interface with additional data */
2136
2137 else if (callout_data_set)
2138 {
2139 if (extra == NULL)
2140 {
2141 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2142 extra->flags = 0;
2143 }
2144 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2145 extra->callout_data = &callout_data;
2146 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2147 options | g_notempty, use_offsets, use_size_offsets);
2148 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2149 }
2150
2151 /* The normal case is just to do the match once, with the default
2152 value of match_limit. */
2153
2154 #if !defined NODFA
2155 else if (all_use_dfa || use_dfa)
2156 {
2157 int workspace[1000];
2158 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2159 options | g_notempty, use_offsets, use_size_offsets, workspace,
2160 sizeof(workspace)/sizeof(int));
2161 if (count == 0)
2162 {
2163 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2164 count = use_size_offsets/2;
2165 }
2166 }
2167 #endif
2168
2169 else
2170 {
2171 count = pcre_exec(re, extra, (char *)bptr, len,
2172 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2173 if (count == 0)
2174 {
2175 fprintf(outfile, "Matched, but too many substrings\n");
2176 count = use_size_offsets/3;
2177 }
2178 }
2179
2180 /* Matched */
2181
2182 if (count >= 0)
2183 {
2184 int i, maxcount;
2185
2186 #if !defined NODFA
2187 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2188 #endif
2189 maxcount = use_size_offsets/3;
2190
2191 /* This is a check against a lunatic return value. */
2192
2193 if (count > maxcount)
2194 {
2195 fprintf(outfile,
2196 "** PCRE error: returned count %d is too big for offset size %d\n",
2197 count, use_size_offsets);
2198 count = use_size_offsets/3;
2199 if (do_g || do_G)
2200 {
2201 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2202 do_g = do_G = FALSE; /* Break g/G loop */
2203 }
2204 }
2205
2206 for (i = 0; i < count * 2; i += 2)
2207 {
2208 if (use_offsets[i] < 0)
2209 fprintf(outfile, "%2d: <unset>\n", i/2);
2210 else
2211 {
2212 fprintf(outfile, "%2d: ", i/2);
2213 (void)pchars(bptr + use_offsets[i],
2214 use_offsets[i+1] - use_offsets[i], outfile);
2215 fprintf(outfile, "\n");
2216 if (i == 0)
2217 {
2218 if (do_showrest)
2219 {
2220 fprintf(outfile, " 0+ ");
2221 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2222 outfile);
2223 fprintf(outfile, "\n");
2224 }
2225 }
2226 }
2227 }
2228
2229 for (i = 0; i < 32; i++)
2230 {
2231 if ((copystrings & (1 << i)) != 0)
2232 {
2233 char copybuffer[256];
2234 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2235 i, copybuffer, sizeof(copybuffer));
2236 if (rc < 0)
2237 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2238 else
2239 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2240 }
2241 }
2242
2243 for (copynamesptr = copynames;
2244 *copynamesptr != 0;
2245 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2246 {
2247 char copybuffer[256];
2248 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2249 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2250 if (rc < 0)
2251 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2252 else
2253 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2254 }
2255
2256 for (i = 0; i < 32; i++)
2257 {
2258 if ((getstrings & (1 << i)) != 0)
2259 {
2260 const char *substring;
2261 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2262 i, &substring);
2263 if (rc < 0)
2264 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2265 else
2266 {
2267 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2268 pcre_free_substring(substring);
2269 }
2270 }
2271 }
2272
2273 for (getnamesptr = getnames;
2274 *getnamesptr != 0;
2275 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2276 {
2277 const char *substring;
2278 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2279 count, (char *)getnamesptr, &substring);
2280 if (rc < 0)
2281 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2282 else
2283 {
2284 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2285 pcre_free_substring(substring);
2286 }
2287 }
2288
2289 if (getlist)
2290 {
2291 const char **stringlist;
2292 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2293 &stringlist);
2294 if (rc < 0)
2295 fprintf(outfile, "get substring list failed %d\n", rc);
2296 else
2297 {
2298 for (i = 0; i < count; i++)
2299 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2300 if (stringlist[i] != NULL)
2301 fprintf(outfile, "string list not terminated by NULL\n");
2302 /* free((void *)stringlist); */
2303 pcre_free_substring_list(stringlist);
2304 }
2305 }
2306 }
2307
2308 /* There was a partial match */
2309
2310 else if (count == PCRE_ERROR_PARTIAL)
2311 {
2312 fprintf(outfile, "Partial match");
2313 #if !defined NODFA
2314 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2315 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2316 bptr + use_offsets[0]);
2317 #endif
2318 fprintf(outfile, "\n");
2319 break; /* Out of the /g loop */
2320 }
2321
2322 /* Failed to match. If this is a /g or /G loop and we previously set
2323 g_notempty after a null match, this is not necessarily the end. We want
2324 to advance the start offset, and continue. We won't be at the end of the
2325 string - that was checked before setting g_notempty.
2326
2327 Complication arises in the case when the newline option is "any" or
2328 "anycrlf". If the previous match was at the end of a line terminated by
2329 CRLF, an advance of one character just passes the \r, whereas we should
2330 prefer the longer newline sequence, as does the code in pcre_exec().
2331 Fudge the offset value to achieve this.
2332
2333 Otherwise, in the case of UTF-8 matching, the advance must be one
2334 character, not one byte. */
2335
2336 else
2337 {
2338 if (g_notempty != 0)
2339 {
2340 int onechar = 1;
2341 unsigned int obits = ((real_pcre *)re)->options;
2342 use_offsets[0] = start_offset;
2343 if ((obits & PCRE_NEWLINE_BITS) == 0)
2344 {
2345 int d;
2346 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2347 obits = (d == '\r')? PCRE_NEWLINE_CR :
2348 (d == '\n')? PCRE_NEWLINE_LF :
2349 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2350 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2351 (d == -1)? PCRE_NEWLINE_ANY : 0;
2352 }
2353 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2354 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2355 &&
2356 start_offset < len - 1 &&
2357 bptr[start_offset] == '\r' &&
2358 bptr[start_offset+1] == '\n')
2359 onechar++;
2360 else if (use_utf8)
2361 {
2362 while (start_offset + onechar < len)
2363 {
2364 int tb = bptr[start_offset+onechar];
2365 if (tb <= 127) break;
2366 tb &= 0xc0;
2367 if (tb != 0 && tb != 0xc0) onechar++;
2368 }
2369 }
2370 use_offsets[1] = start_offset + onechar;
2371 }
2372 else
2373 {
2374 if (count == PCRE_ERROR_NOMATCH)
2375 {
2376 if (gmatched == 0) fprintf(outfile, "No match\n");
2377 }
2378 else fprintf(outfile, "Error %d\n", count);
2379 break; /* Out of the /g loop */
2380 }
2381 }
2382
2383 /* If not /g or /G we are done */
2384
2385 if (!do_g && !do_G) break;
2386
2387 /* If we have matched an empty string, first check to see if we are at
2388 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2389 what Perl's /g options does. This turns out to be rather cunning. First
2390 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2391 same point. If this fails (picked up above) we advance to the next
2392 character. */
2393
2394 g_notempty = 0;
2395
2396 if (use_offsets[0] == use_offsets[1])
2397 {
2398 if (use_offsets[0] == len) break;
2399 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2400 }
2401
2402 /* For /g, update the start offset, leaving the rest alone */
2403
2404 if (do_g) start_offset = use_offsets[1];
2405
2406 /* For /G, update the pointer and length */
2407
2408 else
2409 {
2410 bptr += use_offsets[1];
2411 len -= use_offsets[1];
2412 }
2413 } /* End of loop for /g and /G */
2414
2415 NEXT_DATA: continue;
2416 } /* End of loop for data lines */
2417
2418 CONTINUE:
2419
2420 #if !defined NOPOSIX
2421 if (posix || do_posix) regfree(&preg);
2422 #endif
2423
2424 if (re != NULL) new_free(re);
2425 if (extra != NULL) new_free(extra);
2426 if (tables != NULL)
2427 {
2428 new_free((void *)tables);
2429 setlocale(LC_CTYPE, "C");
2430 locale_set = 0;
2431 }
2432 }
2433
2434 if (infile == stdin) fprintf(outfile, "\n");
2435
2436 EXIT:
2437
2438 if (infile != NULL && infile != stdin) fclose(infile);
2439 if (outfile != NULL && outfile != stdout) fclose(outfile);
2440
2441 free(buffer);
2442 free(dbuffer);
2443 free(pbuffer);
2444 free(offsets);
2445
2446 return yield;
2447 }
2448
2449 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12