/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 371 - (show annotations) (download)
Mon Aug 25 18:28:05 2008 UTC (6 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 72470 byte(s)
Source tidies for 7.8-RC1 

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #define isatty _isatty /* This is what Windows calls them, I'm told */
75 #define fileno _fileno
76
77 #else
78 #include <sys/time.h> /* These two includes are needed */
79 #include <sys/resource.h> /* for setrlimit(). */
80 #define INPUT_MODE "rb"
81 #define OUTPUT_MODE "wb"
82 #endif
83
84
85 /* We have to include pcre_internal.h because we need the internal info for
86 displaying the results of pcre_study() and we also need to know about the
87 internal macros, structures, and other internal data values; pcretest has
88 "inside information" compared to a program that strictly follows the PCRE API.
89
90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92 appropriately for an application, not for building PCRE. */
93
94 #include "pcre.h"
95 #include "pcre_internal.h"
96
97 /* We need access to some of the data tables that PCRE uses. So as not to have
98 to keep two copies, we include the source file here, changing the names of the
99 external symbols to prevent clashes. */
100
101 #define _pcre_ucp_gentype ucp_gentype
102 #define _pcre_utf8_table1 utf8_table1
103 #define _pcre_utf8_table1_size utf8_table1_size
104 #define _pcre_utf8_table2 utf8_table2
105 #define _pcre_utf8_table3 utf8_table3
106 #define _pcre_utf8_table4 utf8_table4
107 #define _pcre_utt utt
108 #define _pcre_utt_size utt_size
109 #define _pcre_utt_names utt_names
110 #define _pcre_OP_lengths OP_lengths
111
112 #include "pcre_tables.c"
113
114 /* We also need the pcre_printint() function for printing out compiled
115 patterns. This function is in a separate file so that it can be included in
116 pcre_compile.c when that module is compiled with debugging enabled.
117
118 The definition of the macro PRINTABLE, which determines whether to print an
119 output character as-is or as a hex value when showing compiled patterns, is
120 contained in this file. We uses it here also, in cases when the locale has not
121 been explicitly changed, so as to get consistent output from systems that
122 differ in their output from isprint() even in the "C" locale. */
123
124 #include "pcre_printint.src"
125
126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127
128
129 /* It is possible to compile this test program without including support for
130 testing the POSIX interface, though this is not available via the standard
131 Makefile. */
132
133 #if !defined NOPOSIX
134 #include "pcreposix.h"
135 #endif
136
137 /* It is also possible, for the benefit of the version currently imported into
138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141 UTF8 support if PCRE is built without it. */
142
143 #ifndef SUPPORT_UTF8
144 #ifndef NOUTF8
145 #define NOUTF8
146 #endif
147 #endif
148
149
150 /* Other parameters */
151
152 #ifndef CLOCKS_PER_SEC
153 #ifdef CLK_TCK
154 #define CLOCKS_PER_SEC CLK_TCK
155 #else
156 #define CLOCKS_PER_SEC 100
157 #endif
158 #endif
159
160 /* This is the default loop count for timing. */
161
162 #define LOOPREPEAT 500000
163
164 /* Static variables */
165
166 static FILE *outfile;
167 static int log_store = 0;
168 static int callout_count;
169 static int callout_extra;
170 static int callout_fail_count;
171 static int callout_fail_id;
172 static int debug_lengths;
173 static int first_callout;
174 static int locale_set = 0;
175 static int show_malloc;
176 static int use_utf8;
177 static size_t gotten_store;
178
179 /* The buffers grow automatically if very long input lines are encountered. */
180
181 static int buffer_size = 50000;
182 static uschar *buffer = NULL;
183 static uschar *dbuffer = NULL;
184 static uschar *pbuffer = NULL;
185
186
187
188 /*************************************************
189 * Read or extend an input line *
190 *************************************************/
191
192 /* Input lines are read into buffer, but both patterns and data lines can be
193 continued over multiple input lines. In addition, if the buffer fills up, we
194 want to automatically expand it so as to be able to handle extremely large
195 lines that are needed for certain stress tests. When the input buffer is
196 expanded, the other two buffers must also be expanded likewise, and the
197 contents of pbuffer, which are a copy of the input for callouts, must be
198 preserved (for when expansion happens for a data line). This is not the most
199 optimal way of handling this, but hey, this is just a test program!
200
201 Arguments:
202 f the file to read
203 start where in buffer to start (this *must* be within buffer)
204 prompt for stdin or readline()
205
206 Returns: pointer to the start of new data
207 could be a copy of start, or could be moved
208 NULL if no data read and EOF reached
209 */
210
211 static uschar *
212 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 {
214 uschar *here = start;
215
216 for (;;)
217 {
218 int rlen = buffer_size - (here - buffer);
219
220 if (rlen > 1000)
221 {
222 int dlen;
223
224 /* If libreadline support is required, use readline() to read a line if the
225 input is a terminal. Note that readline() removes the trailing newline, so
226 we must put it back again, to be compatible with fgets(). */
227
228 #ifdef SUPPORT_LIBREADLINE
229 if (isatty(fileno(f)))
230 {
231 size_t len;
232 char *s = readline(prompt);
233 if (s == NULL) return (here == start)? NULL : start;
234 len = strlen(s);
235 if (len > 0) add_history(s);
236 if (len > rlen - 1) len = rlen - 1;
237 memcpy(here, s, len);
238 here[len] = '\n';
239 here[len+1] = 0;
240 free(s);
241 }
242 else
243 #endif
244
245 /* Read the next line by normal means, prompting if the file is stdin. */
246
247 {
248 if (f == stdin) printf(prompt);
249 if (fgets((char *)here, rlen, f) == NULL)
250 return (here == start)? NULL : start;
251 }
252
253 dlen = (int)strlen((char *)here);
254 if (dlen > 0 && here[dlen - 1] == '\n') return start;
255 here += dlen;
256 }
257
258 else
259 {
260 int new_buffer_size = 2*buffer_size;
261 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264
265 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266 {
267 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268 exit(1);
269 }
270
271 memcpy(new_buffer, buffer, buffer_size);
272 memcpy(new_pbuffer, pbuffer, buffer_size);
273
274 buffer_size = new_buffer_size;
275
276 start = new_buffer + (start - buffer);
277 here = new_buffer + (here - buffer);
278
279 free(buffer);
280 free(dbuffer);
281 free(pbuffer);
282
283 buffer = new_buffer;
284 dbuffer = new_dbuffer;
285 pbuffer = new_pbuffer;
286 }
287 }
288
289 return NULL; /* Control never gets here */
290 }
291
292
293
294
295
296
297
298 /*************************************************
299 * Read number from string *
300 *************************************************/
301
302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303 around with conditional compilation, just do the job by hand. It is only used
304 for unpicking arguments, so just keep it simple.
305
306 Arguments:
307 str string to be converted
308 endptr where to put the end pointer
309
310 Returns: the unsigned long
311 */
312
313 static int
314 get_value(unsigned char *str, unsigned char **endptr)
315 {
316 int result = 0;
317 while(*str != 0 && isspace(*str)) str++;
318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319 *endptr = str;
320 return(result);
321 }
322
323
324
325
326 /*************************************************
327 * Convert UTF-8 string to value *
328 *************************************************/
329
330 /* This function takes one or more bytes that represents a UTF-8 character,
331 and returns the value of the character.
332
333 Argument:
334 utf8bytes a pointer to the byte vector
335 vptr a pointer to an int to receive the value
336
337 Returns: > 0 => the number of bytes consumed
338 -6 to 0 => malformed UTF-8 character at offset = (-return)
339 */
340
341 #if !defined NOUTF8
342
343 static int
344 utf82ord(unsigned char *utf8bytes, int *vptr)
345 {
346 int c = *utf8bytes++;
347 int d = c;
348 int i, j, s;
349
350 for (i = -1; i < 6; i++) /* i is number of additional bytes */
351 {
352 if ((d & 0x80) == 0) break;
353 d <<= 1;
354 }
355
356 if (i == -1) { *vptr = c; return 1; } /* ascii character */
357 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358
359 /* i now has a value in the range 1-5 */
360
361 s = 6*i;
362 d = (c & utf8_table3[i]) << s;
363
364 for (j = 0; j < i; j++)
365 {
366 c = *utf8bytes++;
367 if ((c & 0xc0) != 0x80) return -(j+1);
368 s -= 6;
369 d |= (c & 0x3f) << s;
370 }
371
372 /* Check that encoding was the correct unique one */
373
374 for (j = 0; j < utf8_table1_size; j++)
375 if (d <= utf8_table1[j]) break;
376 if (j != i) return -(i+1);
377
378 /* Valid value */
379
380 *vptr = d;
381 return i+1;
382 }
383
384 #endif
385
386
387
388 /*************************************************
389 * Convert character value to UTF-8 *
390 *************************************************/
391
392 /* This function takes an integer value in the range 0 - 0x7fffffff
393 and encodes it as a UTF-8 character in 0 to 6 bytes.
394
395 Arguments:
396 cvalue the character value
397 utf8bytes pointer to buffer for result - at least 6 bytes long
398
399 Returns: number of characters placed in the buffer
400 */
401
402 #if !defined NOUTF8
403
404 static int
405 ord2utf8(int cvalue, uschar *utf8bytes)
406 {
407 register int i, j;
408 for (i = 0; i < utf8_table1_size; i++)
409 if (cvalue <= utf8_table1[i]) break;
410 utf8bytes += i;
411 for (j = i; j > 0; j--)
412 {
413 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 cvalue >>= 6;
415 }
416 *utf8bytes = utf8_table2[i] | cvalue;
417 return i + 1;
418 }
419
420 #endif
421
422
423
424 /*************************************************
425 * Print character string *
426 *************************************************/
427
428 /* Character string printing function. Must handle UTF-8 strings in utf8
429 mode. Yields number of characters printed. If handed a NULL file, just counts
430 chars without printing. */
431
432 static int pchars(unsigned char *p, int length, FILE *f)
433 {
434 int c = 0;
435 int yield = 0;
436
437 while (length-- > 0)
438 {
439 #if !defined NOUTF8
440 if (use_utf8)
441 {
442 int rc = utf82ord(p, &c);
443
444 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445 {
446 length -= rc - 1;
447 p += rc;
448 if (PRINTHEX(c))
449 {
450 if (f != NULL) fprintf(f, "%c", c);
451 yield++;
452 }
453 else
454 {
455 int n = 4;
456 if (f != NULL) fprintf(f, "\\x{%02x}", c);
457 yield += (n <= 0x000000ff)? 2 :
458 (n <= 0x00000fff)? 3 :
459 (n <= 0x0000ffff)? 4 :
460 (n <= 0x000fffff)? 5 : 6;
461 }
462 continue;
463 }
464 }
465 #endif
466
467 /* Not UTF-8, or malformed UTF-8 */
468
469 c = *p++;
470 if (PRINTHEX(c))
471 {
472 if (f != NULL) fprintf(f, "%c", c);
473 yield++;
474 }
475 else
476 {
477 if (f != NULL) fprintf(f, "\\x%02x", c);
478 yield += 4;
479 }
480 }
481
482 return yield;
483 }
484
485
486
487 /*************************************************
488 * Callout function *
489 *************************************************/
490
491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492 the match. Yield zero unless more callouts than the fail count, or the callout
493 data is not zero. */
494
495 static int callout(pcre_callout_block *cb)
496 {
497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 int i, pre_start, post_start, subject_length;
499
500 if (callout_extra)
501 {
502 fprintf(f, "Callout %d: last capture = %d\n",
503 cb->callout_number, cb->capture_last);
504
505 for (i = 0; i < cb->capture_top * 2; i += 2)
506 {
507 if (cb->offset_vector[i] < 0)
508 fprintf(f, "%2d: <unset>\n", i/2);
509 else
510 {
511 fprintf(f, "%2d: ", i/2);
512 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513 cb->offset_vector[i+1] - cb->offset_vector[i], f);
514 fprintf(f, "\n");
515 }
516 }
517 }
518
519 /* Re-print the subject in canonical form, the first time or if giving full
520 datails. On subsequent calls in the same match, we use pchars just to find the
521 printed lengths of the substrings. */
522
523 if (f != NULL) fprintf(f, "--->");
524
525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527 cb->current_position - cb->start_match, f);
528
529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530
531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532 cb->subject_length - cb->current_position, f);
533
534 if (f != NULL) fprintf(f, "\n");
535
536 /* Always print appropriate indicators, with callout number if not already
537 shown. For automatic callouts, show the pattern offset. */
538
539 if (cb->callout_number == 255)
540 {
541 fprintf(outfile, "%+3d ", cb->pattern_position);
542 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543 }
544 else
545 {
546 if (callout_extra) fprintf(outfile, " ");
547 else fprintf(outfile, "%3d ", cb->callout_number);
548 }
549
550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551 fprintf(outfile, "^");
552
553 if (post_start > 0)
554 {
555 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556 fprintf(outfile, "^");
557 }
558
559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560 fprintf(outfile, " ");
561
562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563 pbuffer + cb->pattern_position);
564
565 fprintf(outfile, "\n");
566 first_callout = 0;
567
568 if (cb->callout_data != NULL)
569 {
570 int callout_data = *((int *)(cb->callout_data));
571 if (callout_data != 0)
572 {
573 fprintf(outfile, "Callout data = %d\n", callout_data);
574 return callout_data;
575 }
576 }
577
578 return (cb->callout_number != callout_fail_id)? 0 :
579 (++callout_count >= callout_fail_count)? 1 : 0;
580 }
581
582
583 /*************************************************
584 * Local malloc functions *
585 *************************************************/
586
587 /* Alternative malloc function, to test functionality and show the size of the
588 compiled re. */
589
590 static void *new_malloc(size_t size)
591 {
592 void *block = malloc(size);
593 gotten_store = size;
594 if (show_malloc)
595 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 return block;
597 }
598
599 static void new_free(void *block)
600 {
601 if (show_malloc)
602 fprintf(outfile, "free %p\n", block);
603 free(block);
604 }
605
606
607 /* For recursion malloc/free, to test stacking calls */
608
609 static void *stack_malloc(size_t size)
610 {
611 void *block = malloc(size);
612 if (show_malloc)
613 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 return block;
615 }
616
617 static void stack_free(void *block)
618 {
619 if (show_malloc)
620 fprintf(outfile, "stack_free %p\n", block);
621 free(block);
622 }
623
624
625 /*************************************************
626 * Call pcre_fullinfo() *
627 *************************************************/
628
629 /* Get one piece of information from the pcre_fullinfo() function */
630
631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632 {
633 int rc;
634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636 }
637
638
639
640 /*************************************************
641 * Byte flipping function *
642 *************************************************/
643
644 static unsigned long int
645 byteflip(unsigned long int value, int n)
646 {
647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648 return ((value & 0x000000ff) << 24) |
649 ((value & 0x0000ff00) << 8) |
650 ((value & 0x00ff0000) >> 8) |
651 ((value & 0xff000000) >> 24);
652 }
653
654
655
656
657 /*************************************************
658 * Check match or recursion limit *
659 *************************************************/
660
661 static int
662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663 int start_offset, int options, int *use_offsets, int use_size_offsets,
664 int flag, unsigned long int *limit, int errnumber, const char *msg)
665 {
666 int count;
667 int min = 0;
668 int mid = 64;
669 int max = -1;
670
671 extra->flags |= flag;
672
673 for (;;)
674 {
675 *limit = mid;
676
677 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678 use_offsets, use_size_offsets);
679
680 if (count == errnumber)
681 {
682 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683 min = mid;
684 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685 }
686
687 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688 count == PCRE_ERROR_PARTIAL)
689 {
690 if (mid == min + 1)
691 {
692 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693 break;
694 }
695 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696 max = mid;
697 mid = (min + mid)/2;
698 }
699 else break; /* Some other error */
700 }
701
702 extra->flags &= ~flag;
703 return count;
704 }
705
706
707
708 /*************************************************
709 * Case-independent strncmp() function *
710 *************************************************/
711
712 /*
713 Arguments:
714 s first string
715 t second string
716 n number of characters to compare
717
718 Returns: < 0, = 0, or > 0, according to the comparison
719 */
720
721 static int
722 strncmpic(uschar *s, uschar *t, int n)
723 {
724 while (n--)
725 {
726 int c = tolower(*s++) - tolower(*t++);
727 if (c) return c;
728 }
729 return 0;
730 }
731
732
733
734 /*************************************************
735 * Check newline indicator *
736 *************************************************/
737
738 /* This is used both at compile and run-time to check for <xxx> escapes, where
739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740 no match.
741
742 Arguments:
743 p points after the leading '<'
744 f file for error message
745
746 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747 */
748
749 static int
750 check_newline(uschar *p, FILE *f)
751 {
752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 fprintf(f, "Unknown newline type at: <%s\n", p);
760 return 0;
761 }
762
763
764
765 /*************************************************
766 * Usage function *
767 *************************************************/
768
769 static void
770 usage(void)
771 {
772 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773 printf("Input and output default to stdin and stdout.\n");
774 #ifdef SUPPORT_LIBREADLINE
775 printf("If input is a terminal, readline() is used to read from it.\n");
776 #else
777 printf("This version of pcretest is not linked with readline().\n");
778 #endif
779 printf("\nOptions:\n");
780 printf(" -b show compiled code (bytecode)\n");
781 printf(" -C show PCRE compile-time options and exit\n");
782 printf(" -d debug: show compiled code and information (-b and -i)\n");
783 #if !defined NODFA
784 printf(" -dfa force DFA matching for all subjects\n");
785 #endif
786 printf(" -help show usage information\n");
787 printf(" -i show information about compiled patterns\n"
788 " -m output memory used information\n"
789 " -o <n> set size of offsets vector to <n>\n");
790 #if !defined NOPOSIX
791 printf(" -p use POSIX interface\n");
792 #endif
793 printf(" -q quiet: do not output PCRE version number at start\n");
794 printf(" -S <n> set stack size to <n> megabytes\n");
795 printf(" -s output store (memory) used information\n"
796 " -t time compilation and execution\n");
797 printf(" -t <n> time compilation and execution, repeating <n> times\n");
798 printf(" -tm time execution (matching) only\n");
799 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
800 }
801
802
803
804 /*************************************************
805 * Main Program *
806 *************************************************/
807
808 /* Read lines from named file or stdin and write to named file or stdout; lines
809 consist of a regular expression, in delimiters and optionally followed by
810 options, followed by a set of test data, terminated by an empty line. */
811
812 int main(int argc, char **argv)
813 {
814 FILE *infile = stdin;
815 int options = 0;
816 int study_options = 0;
817 int op = 1;
818 int timeit = 0;
819 int timeitm = 0;
820 int showinfo = 0;
821 int showstore = 0;
822 int quiet = 0;
823 int size_offsets = 45;
824 int size_offsets_max;
825 int *offsets = NULL;
826 #if !defined NOPOSIX
827 int posix = 0;
828 #endif
829 int debug = 0;
830 int done = 0;
831 int all_use_dfa = 0;
832 int yield = 0;
833 int stack_size;
834
835 /* These vectors store, end-to-end, a list of captured substring names. Assume
836 that 1024 is plenty long enough for the few names we'll be testing. */
837
838 uschar copynames[1024];
839 uschar getnames[1024];
840
841 uschar *copynamesptr;
842 uschar *getnamesptr;
843
844 /* Get buffers from malloc() so that Electric Fence will check their misuse
845 when I am debugging. They grow automatically when very long lines are read. */
846
847 buffer = (unsigned char *)malloc(buffer_size);
848 dbuffer = (unsigned char *)malloc(buffer_size);
849 pbuffer = (unsigned char *)malloc(buffer_size);
850
851 /* The outfile variable is static so that new_malloc can use it. */
852
853 outfile = stdout;
854
855 /* The following _setmode() stuff is some Windows magic that tells its runtime
856 library to translate CRLF into a single LF character. At least, that's what
857 I've been told: never having used Windows I take this all on trust. Originally
858 it set 0x8000, but then I was advised that _O_BINARY was better. */
859
860 #if defined(_WIN32) || defined(WIN32)
861 _setmode( _fileno( stdout ), _O_BINARY );
862 #endif
863
864 /* Scan options */
865
866 while (argc > 1 && argv[op][0] == '-')
867 {
868 unsigned char *endptr;
869
870 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
871 showstore = 1;
872 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
873 else if (strcmp(argv[op], "-b") == 0) debug = 1;
874 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
875 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
876 #if !defined NODFA
877 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
878 #endif
879 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
880 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
881 *endptr == 0))
882 {
883 op++;
884 argc--;
885 }
886 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
887 {
888 int both = argv[op][2] == 0;
889 int temp;
890 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891 *endptr == 0))
892 {
893 timeitm = temp;
894 op++;
895 argc--;
896 }
897 else timeitm = LOOPREPEAT;
898 if (both) timeit = timeitm;
899 }
900 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
901 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
902 *endptr == 0))
903 {
904 #if defined(_WIN32) || defined(WIN32)
905 printf("PCRE: -S not supported on this OS\n");
906 exit(1);
907 #else
908 int rc;
909 struct rlimit rlim;
910 getrlimit(RLIMIT_STACK, &rlim);
911 rlim.rlim_cur = stack_size * 1024 * 1024;
912 rc = setrlimit(RLIMIT_STACK, &rlim);
913 if (rc != 0)
914 {
915 printf("PCRE: setrlimit() failed with error %d\n", rc);
916 exit(1);
917 }
918 op++;
919 argc--;
920 #endif
921 }
922 #if !defined NOPOSIX
923 else if (strcmp(argv[op], "-p") == 0) posix = 1;
924 #endif
925 else if (strcmp(argv[op], "-C") == 0)
926 {
927 int rc;
928 printf("PCRE version %s\n", pcre_version());
929 printf("Compiled with\n");
930 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
931 printf(" %sUTF-8 support\n", rc? "" : "No ");
932 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
933 printf(" %sUnicode properties support\n", rc? "" : "No ");
934 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
935 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
936 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
937 (rc == -2)? "ANYCRLF" :
938 (rc == -1)? "ANY" : "???");
939 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
940 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
941 "all Unicode newlines");
942 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
943 printf(" Internal link size = %d\n", rc);
944 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
945 printf(" POSIX malloc threshold = %d\n", rc);
946 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
947 printf(" Default match limit = %d\n", rc);
948 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
949 printf(" Default recursion depth limit = %d\n", rc);
950 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
951 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
952 goto EXIT;
953 }
954 else if (strcmp(argv[op], "-help") == 0 ||
955 strcmp(argv[op], "--help") == 0)
956 {
957 usage();
958 goto EXIT;
959 }
960 else
961 {
962 printf("** Unknown or malformed option %s\n", argv[op]);
963 usage();
964 yield = 1;
965 goto EXIT;
966 }
967 op++;
968 argc--;
969 }
970
971 /* Get the store for the offsets vector, and remember what it was */
972
973 size_offsets_max = size_offsets;
974 offsets = (int *)malloc(size_offsets_max * sizeof(int));
975 if (offsets == NULL)
976 {
977 printf("** Failed to get %d bytes of memory for offsets vector\n",
978 (int)(size_offsets_max * sizeof(int)));
979 yield = 1;
980 goto EXIT;
981 }
982
983 /* Sort out the input and output files */
984
985 if (argc > 1)
986 {
987 infile = fopen(argv[op], INPUT_MODE);
988 if (infile == NULL)
989 {
990 printf("** Failed to open %s\n", argv[op]);
991 yield = 1;
992 goto EXIT;
993 }
994 }
995
996 if (argc > 2)
997 {
998 outfile = fopen(argv[op+1], OUTPUT_MODE);
999 if (outfile == NULL)
1000 {
1001 printf("** Failed to open %s\n", argv[op+1]);
1002 yield = 1;
1003 goto EXIT;
1004 }
1005 }
1006
1007 /* Set alternative malloc function */
1008
1009 pcre_malloc = new_malloc;
1010 pcre_free = new_free;
1011 pcre_stack_malloc = stack_malloc;
1012 pcre_stack_free = stack_free;
1013
1014 /* Heading line unless quiet, then prompt for first regex if stdin */
1015
1016 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1017
1018 /* Main loop */
1019
1020 while (!done)
1021 {
1022 pcre *re = NULL;
1023 pcre_extra *extra = NULL;
1024
1025 #if !defined NOPOSIX /* There are still compilers that require no indent */
1026 regex_t preg;
1027 int do_posix = 0;
1028 #endif
1029
1030 const char *error;
1031 unsigned char *p, *pp, *ppp;
1032 unsigned char *to_file = NULL;
1033 const unsigned char *tables = NULL;
1034 unsigned long int true_size, true_study_size = 0;
1035 size_t size, regex_gotten_store;
1036 int do_study = 0;
1037 int do_debug = debug;
1038 int do_G = 0;
1039 int do_g = 0;
1040 int do_showinfo = showinfo;
1041 int do_showrest = 0;
1042 int do_flip = 0;
1043 int erroroffset, len, delimiter, poffset;
1044
1045 use_utf8 = 0;
1046 debug_lengths = 1;
1047
1048 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1049 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1050 fflush(outfile);
1051
1052 p = buffer;
1053 while (isspace(*p)) p++;
1054 if (*p == 0) continue;
1055
1056 /* See if the pattern is to be loaded pre-compiled from a file. */
1057
1058 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1059 {
1060 unsigned long int magic, get_options;
1061 uschar sbuf[8];
1062 FILE *f;
1063
1064 p++;
1065 pp = p + (int)strlen((char *)p);
1066 while (isspace(pp[-1])) pp--;
1067 *pp = 0;
1068
1069 f = fopen((char *)p, "rb");
1070 if (f == NULL)
1071 {
1072 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1073 continue;
1074 }
1075
1076 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1077
1078 true_size =
1079 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1080 true_study_size =
1081 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1082
1083 re = (real_pcre *)new_malloc(true_size);
1084 regex_gotten_store = gotten_store;
1085
1086 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1087
1088 magic = ((real_pcre *)re)->magic_number;
1089 if (magic != MAGIC_NUMBER)
1090 {
1091 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1092 {
1093 do_flip = 1;
1094 }
1095 else
1096 {
1097 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1098 fclose(f);
1099 continue;
1100 }
1101 }
1102
1103 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1104 do_flip? " (byte-inverted)" : "", p);
1105
1106 /* Need to know if UTF-8 for printing data strings */
1107
1108 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1109 use_utf8 = (get_options & PCRE_UTF8) != 0;
1110
1111 /* Now see if there is any following study data */
1112
1113 if (true_study_size != 0)
1114 {
1115 pcre_study_data *psd;
1116
1117 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1118 extra->flags = PCRE_EXTRA_STUDY_DATA;
1119
1120 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1121 extra->study_data = psd;
1122
1123 if (fread(psd, 1, true_study_size, f) != true_study_size)
1124 {
1125 FAIL_READ:
1126 fprintf(outfile, "Failed to read data from %s\n", p);
1127 if (extra != NULL) new_free(extra);
1128 if (re != NULL) new_free(re);
1129 fclose(f);
1130 continue;
1131 }
1132 fprintf(outfile, "Study data loaded from %s\n", p);
1133 do_study = 1; /* To get the data output if requested */
1134 }
1135 else fprintf(outfile, "No study data\n");
1136
1137 fclose(f);
1138 goto SHOW_INFO;
1139 }
1140
1141 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1142 the pattern; if is isn't complete, read more. */
1143
1144 delimiter = *p++;
1145
1146 if (isalnum(delimiter) || delimiter == '\\')
1147 {
1148 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1149 goto SKIP_DATA;
1150 }
1151
1152 pp = p;
1153 poffset = p - buffer;
1154
1155 for(;;)
1156 {
1157 while (*pp != 0)
1158 {
1159 if (*pp == '\\' && pp[1] != 0) pp++;
1160 else if (*pp == delimiter) break;
1161 pp++;
1162 }
1163 if (*pp != 0) break;
1164 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1165 {
1166 fprintf(outfile, "** Unexpected EOF\n");
1167 done = 1;
1168 goto CONTINUE;
1169 }
1170 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1171 }
1172
1173 /* The buffer may have moved while being extended; reset the start of data
1174 pointer to the correct relative point in the buffer. */
1175
1176 p = buffer + poffset;
1177
1178 /* If the first character after the delimiter is backslash, make
1179 the pattern end with backslash. This is purely to provide a way
1180 of testing for the error message when a pattern ends with backslash. */
1181
1182 if (pp[1] == '\\') *pp++ = '\\';
1183
1184 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1185 for callouts. */
1186
1187 *pp++ = 0;
1188 strcpy((char *)pbuffer, (char *)p);
1189
1190 /* Look for options after final delimiter */
1191
1192 options = 0;
1193 study_options = 0;
1194 log_store = showstore; /* default from command line */
1195
1196 while (*pp != 0)
1197 {
1198 switch (*pp++)
1199 {
1200 case 'f': options |= PCRE_FIRSTLINE; break;
1201 case 'g': do_g = 1; break;
1202 case 'i': options |= PCRE_CASELESS; break;
1203 case 'm': options |= PCRE_MULTILINE; break;
1204 case 's': options |= PCRE_DOTALL; break;
1205 case 'x': options |= PCRE_EXTENDED; break;
1206
1207 case '+': do_showrest = 1; break;
1208 case 'A': options |= PCRE_ANCHORED; break;
1209 case 'B': do_debug = 1; break;
1210 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1211 case 'D': do_debug = do_showinfo = 1; break;
1212 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1213 case 'F': do_flip = 1; break;
1214 case 'G': do_G = 1; break;
1215 case 'I': do_showinfo = 1; break;
1216 case 'J': options |= PCRE_DUPNAMES; break;
1217 case 'M': log_store = 1; break;
1218 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1219
1220 #if !defined NOPOSIX
1221 case 'P': do_posix = 1; break;
1222 #endif
1223
1224 case 'S': do_study = 1; break;
1225 case 'U': options |= PCRE_UNGREEDY; break;
1226 case 'X': options |= PCRE_EXTRA; break;
1227 case 'Z': debug_lengths = 0; break;
1228 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1229 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1230
1231 case 'L':
1232 ppp = pp;
1233 /* The '\r' test here is so that it works on Windows. */
1234 /* The '0' test is just in case this is an unterminated line. */
1235 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1236 *ppp = 0;
1237 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1238 {
1239 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1240 goto SKIP_DATA;
1241 }
1242 locale_set = 1;
1243 tables = pcre_maketables();
1244 pp = ppp;
1245 break;
1246
1247 case '>':
1248 to_file = pp;
1249 while (*pp != 0) pp++;
1250 while (isspace(pp[-1])) pp--;
1251 *pp = 0;
1252 break;
1253
1254 case '<':
1255 {
1256 if (strncmp((char *)pp, "JS>", 3) == 0)
1257 {
1258 options |= PCRE_JAVASCRIPT_COMPAT;
1259 pp += 3;
1260 }
1261 else
1262 {
1263 int x = check_newline(pp, outfile);
1264 if (x == 0) goto SKIP_DATA;
1265 options |= x;
1266 while (*pp++ != '>');
1267 }
1268 }
1269 break;
1270
1271 case '\r': /* So that it works in Windows */
1272 case '\n':
1273 case ' ':
1274 break;
1275
1276 default:
1277 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1278 goto SKIP_DATA;
1279 }
1280 }
1281
1282 /* Handle compiling via the POSIX interface, which doesn't support the
1283 timing, showing, or debugging options, nor the ability to pass over
1284 local character tables. */
1285
1286 #if !defined NOPOSIX
1287 if (posix || do_posix)
1288 {
1289 int rc;
1290 int cflags = 0;
1291
1292 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1293 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1294 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1295 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1296 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1297
1298 rc = regcomp(&preg, (char *)p, cflags);
1299
1300 /* Compilation failed; go back for another re, skipping to blank line
1301 if non-interactive. */
1302
1303 if (rc != 0)
1304 {
1305 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1306 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1307 goto SKIP_DATA;
1308 }
1309 }
1310
1311 /* Handle compiling via the native interface */
1312
1313 else
1314 #endif /* !defined NOPOSIX */
1315
1316 {
1317 if (timeit > 0)
1318 {
1319 register int i;
1320 clock_t time_taken;
1321 clock_t start_time = clock();
1322 for (i = 0; i < timeit; i++)
1323 {
1324 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1325 if (re != NULL) free(re);
1326 }
1327 time_taken = clock() - start_time;
1328 fprintf(outfile, "Compile time %.4f milliseconds\n",
1329 (((double)time_taken * 1000.0) / (double)timeit) /
1330 (double)CLOCKS_PER_SEC);
1331 }
1332
1333 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1334
1335 /* Compilation failed; go back for another re, skipping to blank line
1336 if non-interactive. */
1337
1338 if (re == NULL)
1339 {
1340 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1341 SKIP_DATA:
1342 if (infile != stdin)
1343 {
1344 for (;;)
1345 {
1346 if (extend_inputline(infile, buffer, NULL) == NULL)
1347 {
1348 done = 1;
1349 goto CONTINUE;
1350 }
1351 len = (int)strlen((char *)buffer);
1352 while (len > 0 && isspace(buffer[len-1])) len--;
1353 if (len == 0) break;
1354 }
1355 fprintf(outfile, "\n");
1356 }
1357 goto CONTINUE;
1358 }
1359
1360 /* Compilation succeeded; print data if required. There are now two
1361 info-returning functions. The old one has a limited interface and
1362 returns only limited data. Check that it agrees with the newer one. */
1363
1364 if (log_store)
1365 fprintf(outfile, "Memory allocation (code space): %d\n",
1366 (int)(gotten_store -
1367 sizeof(real_pcre) -
1368 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1369
1370 /* Extract the size for possible writing before possibly flipping it,
1371 and remember the store that was got. */
1372
1373 true_size = ((real_pcre *)re)->size;
1374 regex_gotten_store = gotten_store;
1375
1376 /* If /S was present, study the regexp to generate additional info to
1377 help with the matching. */
1378
1379 if (do_study)
1380 {
1381 if (timeit > 0)
1382 {
1383 register int i;
1384 clock_t time_taken;
1385 clock_t start_time = clock();
1386 for (i = 0; i < timeit; i++)
1387 extra = pcre_study(re, study_options, &error);
1388 time_taken = clock() - start_time;
1389 if (extra != NULL) free(extra);
1390 fprintf(outfile, " Study time %.4f milliseconds\n",
1391 (((double)time_taken * 1000.0) / (double)timeit) /
1392 (double)CLOCKS_PER_SEC);
1393 }
1394 extra = pcre_study(re, study_options, &error);
1395 if (error != NULL)
1396 fprintf(outfile, "Failed to study: %s\n", error);
1397 else if (extra != NULL)
1398 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1399 }
1400
1401 /* If the 'F' option was present, we flip the bytes of all the integer
1402 fields in the regex data block and the study block. This is to make it
1403 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1404 compiled on a different architecture. */
1405
1406 if (do_flip)
1407 {
1408 real_pcre *rre = (real_pcre *)re;
1409 rre->magic_number =
1410 byteflip(rre->magic_number, sizeof(rre->magic_number));
1411 rre->size = byteflip(rre->size, sizeof(rre->size));
1412 rre->options = byteflip(rre->options, sizeof(rre->options));
1413 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1414 rre->top_bracket =
1415 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1416 rre->top_backref =
1417 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1418 rre->first_byte =
1419 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1420 rre->req_byte =
1421 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1422 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1423 sizeof(rre->name_table_offset));
1424 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1425 sizeof(rre->name_entry_size));
1426 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1427 sizeof(rre->name_count));
1428
1429 if (extra != NULL)
1430 {
1431 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1432 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1433 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1434 }
1435 }
1436
1437 /* Extract information from the compiled data if required */
1438
1439 SHOW_INFO:
1440
1441 if (do_debug)
1442 {
1443 fprintf(outfile, "------------------------------------------------------------------\n");
1444 pcre_printint(re, outfile, debug_lengths);
1445 }
1446
1447 if (do_showinfo)
1448 {
1449 unsigned long int get_options, all_options;
1450 #if !defined NOINFOCHECK
1451 int old_first_char, old_options, old_count;
1452 #endif
1453 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1454 hascrorlf;
1455 int nameentrysize, namecount;
1456 const uschar *nametable;
1457
1458 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1459 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1460 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1461 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1462 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1463 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1464 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1465 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1466 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1467 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1468 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1469 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1470
1471 #if !defined NOINFOCHECK
1472 old_count = pcre_info(re, &old_options, &old_first_char);
1473 if (count < 0) fprintf(outfile,
1474 "Error %d from pcre_info()\n", count);
1475 else
1476 {
1477 if (old_count != count) fprintf(outfile,
1478 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1479 old_count);
1480
1481 if (old_first_char != first_char) fprintf(outfile,
1482 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1483 first_char, old_first_char);
1484
1485 if (old_options != (int)get_options) fprintf(outfile,
1486 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1487 get_options, old_options);
1488 }
1489 #endif
1490
1491 if (size != regex_gotten_store) fprintf(outfile,
1492 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1493 (int)size, (int)regex_gotten_store);
1494
1495 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1496 if (backrefmax > 0)
1497 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1498
1499 if (namecount > 0)
1500 {
1501 fprintf(outfile, "Named capturing subpatterns:\n");
1502 while (namecount-- > 0)
1503 {
1504 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1505 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1506 GET2(nametable, 0));
1507 nametable += nameentrysize;
1508 }
1509 }
1510
1511 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1512 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1513
1514 all_options = ((real_pcre *)re)->options;
1515 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1516
1517 if (get_options == 0) fprintf(outfile, "No options\n");
1518 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1519 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1520 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1521 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1522 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1523 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1524 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1525 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1526 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1527 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1528 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1529 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1530 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1531 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1532 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1533 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1534
1535 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1536
1537 switch (get_options & PCRE_NEWLINE_BITS)
1538 {
1539 case PCRE_NEWLINE_CR:
1540 fprintf(outfile, "Forced newline sequence: CR\n");
1541 break;
1542
1543 case PCRE_NEWLINE_LF:
1544 fprintf(outfile, "Forced newline sequence: LF\n");
1545 break;
1546
1547 case PCRE_NEWLINE_CRLF:
1548 fprintf(outfile, "Forced newline sequence: CRLF\n");
1549 break;
1550
1551 case PCRE_NEWLINE_ANYCRLF:
1552 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1553 break;
1554
1555 case PCRE_NEWLINE_ANY:
1556 fprintf(outfile, "Forced newline sequence: ANY\n");
1557 break;
1558
1559 default:
1560 break;
1561 }
1562
1563 if (first_char == -1)
1564 {
1565 fprintf(outfile, "First char at start or follows newline\n");
1566 }
1567 else if (first_char < 0)
1568 {
1569 fprintf(outfile, "No first char\n");
1570 }
1571 else
1572 {
1573 int ch = first_char & 255;
1574 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1575 "" : " (caseless)";
1576 if (PRINTHEX(ch))
1577 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1578 else
1579 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1580 }
1581
1582 if (need_char < 0)
1583 {
1584 fprintf(outfile, "No need char\n");
1585 }
1586 else
1587 {
1588 int ch = need_char & 255;
1589 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1590 "" : " (caseless)";
1591 if (PRINTHEX(ch))
1592 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1593 else
1594 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1595 }
1596
1597 /* Don't output study size; at present it is in any case a fixed
1598 value, but it varies, depending on the computer architecture, and
1599 so messes up the test suite. (And with the /F option, it might be
1600 flipped.) */
1601
1602 if (do_study)
1603 {
1604 if (extra == NULL)
1605 fprintf(outfile, "Study returned NULL\n");
1606 else
1607 {
1608 uschar *start_bits = NULL;
1609 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1610
1611 if (start_bits == NULL)
1612 fprintf(outfile, "No starting byte set\n");
1613 else
1614 {
1615 int i;
1616 int c = 24;
1617 fprintf(outfile, "Starting byte set: ");
1618 for (i = 0; i < 256; i++)
1619 {
1620 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1621 {
1622 if (c > 75)
1623 {
1624 fprintf(outfile, "\n ");
1625 c = 2;
1626 }
1627 if (PRINTHEX(i) && i != ' ')
1628 {
1629 fprintf(outfile, "%c ", i);
1630 c += 2;
1631 }
1632 else
1633 {
1634 fprintf(outfile, "\\x%02x ", i);
1635 c += 5;
1636 }
1637 }
1638 }
1639 fprintf(outfile, "\n");
1640 }
1641 }
1642 }
1643 }
1644
1645 /* If the '>' option was present, we write out the regex to a file, and
1646 that is all. The first 8 bytes of the file are the regex length and then
1647 the study length, in big-endian order. */
1648
1649 if (to_file != NULL)
1650 {
1651 FILE *f = fopen((char *)to_file, "wb");
1652 if (f == NULL)
1653 {
1654 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1655 }
1656 else
1657 {
1658 uschar sbuf[8];
1659 sbuf[0] = (uschar)((true_size >> 24) & 255);
1660 sbuf[1] = (uschar)((true_size >> 16) & 255);
1661 sbuf[2] = (uschar)((true_size >> 8) & 255);
1662 sbuf[3] = (uschar)((true_size) & 255);
1663
1664 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1665 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1666 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1667 sbuf[7] = (uschar)((true_study_size) & 255);
1668
1669 if (fwrite(sbuf, 1, 8, f) < 8 ||
1670 fwrite(re, 1, true_size, f) < true_size)
1671 {
1672 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1673 }
1674 else
1675 {
1676 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1677 if (extra != NULL)
1678 {
1679 if (fwrite(extra->study_data, 1, true_study_size, f) <
1680 true_study_size)
1681 {
1682 fprintf(outfile, "Write error on %s: %s\n", to_file,
1683 strerror(errno));
1684 }
1685 else fprintf(outfile, "Study data written to %s\n", to_file);
1686
1687 }
1688 }
1689 fclose(f);
1690 }
1691
1692 new_free(re);
1693 if (extra != NULL) new_free(extra);
1694 if (tables != NULL) new_free((void *)tables);
1695 continue; /* With next regex */
1696 }
1697 } /* End of non-POSIX compile */
1698
1699 /* Read data lines and test them */
1700
1701 for (;;)
1702 {
1703 uschar *q;
1704 uschar *bptr;
1705 int *use_offsets = offsets;
1706 int use_size_offsets = size_offsets;
1707 int callout_data = 0;
1708 int callout_data_set = 0;
1709 int count, c;
1710 int copystrings = 0;
1711 int find_match_limit = 0;
1712 int getstrings = 0;
1713 int getlist = 0;
1714 int gmatched = 0;
1715 int start_offset = 0;
1716 int g_notempty = 0;
1717 int use_dfa = 0;
1718
1719 options = 0;
1720
1721 *copynames = 0;
1722 *getnames = 0;
1723
1724 copynamesptr = copynames;
1725 getnamesptr = getnames;
1726
1727 pcre_callout = callout;
1728 first_callout = 1;
1729 callout_extra = 0;
1730 callout_count = 0;
1731 callout_fail_count = 999999;
1732 callout_fail_id = -1;
1733 show_malloc = 0;
1734
1735 if (extra != NULL) extra->flags &=
1736 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1737
1738 len = 0;
1739 for (;;)
1740 {
1741 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1742 {
1743 if (len > 0) break;
1744 done = 1;
1745 goto CONTINUE;
1746 }
1747 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1748 len = (int)strlen((char *)buffer);
1749 if (buffer[len-1] == '\n') break;
1750 }
1751
1752 while (len > 0 && isspace(buffer[len-1])) len--;
1753 buffer[len] = 0;
1754 if (len == 0) break;
1755
1756 p = buffer;
1757 while (isspace(*p)) p++;
1758
1759 bptr = q = dbuffer;
1760 while ((c = *p++) != 0)
1761 {
1762 int i = 0;
1763 int n = 0;
1764
1765 if (c == '\\') switch ((c = *p++))
1766 {
1767 case 'a': c = 7; break;
1768 case 'b': c = '\b'; break;
1769 case 'e': c = 27; break;
1770 case 'f': c = '\f'; break;
1771 case 'n': c = '\n'; break;
1772 case 'r': c = '\r'; break;
1773 case 't': c = '\t'; break;
1774 case 'v': c = '\v'; break;
1775
1776 case '0': case '1': case '2': case '3':
1777 case '4': case '5': case '6': case '7':
1778 c -= '0';
1779 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1780 c = c * 8 + *p++ - '0';
1781
1782 #if !defined NOUTF8
1783 if (use_utf8 && c > 255)
1784 {
1785 unsigned char buff8[8];
1786 int ii, utn;
1787 utn = ord2utf8(c, buff8);
1788 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1789 c = buff8[ii]; /* Last byte */
1790 }
1791 #endif
1792 break;
1793
1794 case 'x':
1795
1796 /* Handle \x{..} specially - new Perl thing for utf8 */
1797
1798 #if !defined NOUTF8
1799 if (*p == '{')
1800 {
1801 unsigned char *pt = p;
1802 c = 0;
1803 while (isxdigit(*(++pt)))
1804 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1805 if (*pt == '}')
1806 {
1807 unsigned char buff8[8];
1808 int ii, utn;
1809 if (use_utf8)
1810 {
1811 utn = ord2utf8(c, buff8);
1812 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1813 c = buff8[ii]; /* Last byte */
1814 }
1815 else
1816 {
1817 if (c > 255)
1818 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1819 "UTF-8 mode is not enabled.\n"
1820 "** Truncation will probably give the wrong result.\n", c);
1821 }
1822 p = pt + 1;
1823 break;
1824 }
1825 /* Not correct form; fall through */
1826 }
1827 #endif
1828
1829 /* Ordinary \x */
1830
1831 c = 0;
1832 while (i++ < 2 && isxdigit(*p))
1833 {
1834 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1835 p++;
1836 }
1837 break;
1838
1839 case 0: /* \ followed by EOF allows for an empty line */
1840 p--;
1841 continue;
1842
1843 case '>':
1844 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1845 continue;
1846
1847 case 'A': /* Option setting */
1848 options |= PCRE_ANCHORED;
1849 continue;
1850
1851 case 'B':
1852 options |= PCRE_NOTBOL;
1853 continue;
1854
1855 case 'C':
1856 if (isdigit(*p)) /* Set copy string */
1857 {
1858 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1859 copystrings |= 1 << n;
1860 }
1861 else if (isalnum(*p))
1862 {
1863 uschar *npp = copynamesptr;
1864 while (isalnum(*p)) *npp++ = *p++;
1865 *npp++ = 0;
1866 *npp = 0;
1867 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1868 if (n < 0)
1869 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1870 copynamesptr = npp;
1871 }
1872 else if (*p == '+')
1873 {
1874 callout_extra = 1;
1875 p++;
1876 }
1877 else if (*p == '-')
1878 {
1879 pcre_callout = NULL;
1880 p++;
1881 }
1882 else if (*p == '!')
1883 {
1884 callout_fail_id = 0;
1885 p++;
1886 while(isdigit(*p))
1887 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1888 callout_fail_count = 0;
1889 if (*p == '!')
1890 {
1891 p++;
1892 while(isdigit(*p))
1893 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1894 }
1895 }
1896 else if (*p == '*')
1897 {
1898 int sign = 1;
1899 callout_data = 0;
1900 if (*(++p) == '-') { sign = -1; p++; }
1901 while(isdigit(*p))
1902 callout_data = callout_data * 10 + *p++ - '0';
1903 callout_data *= sign;
1904 callout_data_set = 1;
1905 }
1906 continue;
1907
1908 #if !defined NODFA
1909 case 'D':
1910 #if !defined NOPOSIX
1911 if (posix || do_posix)
1912 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1913 else
1914 #endif
1915 use_dfa = 1;
1916 continue;
1917
1918 case 'F':
1919 options |= PCRE_DFA_SHORTEST;
1920 continue;
1921 #endif
1922
1923 case 'G':
1924 if (isdigit(*p))
1925 {
1926 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1927 getstrings |= 1 << n;
1928 }
1929 else if (isalnum(*p))
1930 {
1931 uschar *npp = getnamesptr;
1932 while (isalnum(*p)) *npp++ = *p++;
1933 *npp++ = 0;
1934 *npp = 0;
1935 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1936 if (n < 0)
1937 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1938 getnamesptr = npp;
1939 }
1940 continue;
1941
1942 case 'L':
1943 getlist = 1;
1944 continue;
1945
1946 case 'M':
1947 find_match_limit = 1;
1948 continue;
1949
1950 case 'N':
1951 options |= PCRE_NOTEMPTY;
1952 continue;
1953
1954 case 'O':
1955 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1956 if (n > size_offsets_max)
1957 {
1958 size_offsets_max = n;
1959 free(offsets);
1960 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1961 if (offsets == NULL)
1962 {
1963 printf("** Failed to get %d bytes of memory for offsets vector\n",
1964 (int)(size_offsets_max * sizeof(int)));
1965 yield = 1;
1966 goto EXIT;
1967 }
1968 }
1969 use_size_offsets = n;
1970 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1971 continue;
1972
1973 case 'P':
1974 options |= PCRE_PARTIAL;
1975 continue;
1976
1977 case 'Q':
1978 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1979 if (extra == NULL)
1980 {
1981 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1982 extra->flags = 0;
1983 }
1984 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1985 extra->match_limit_recursion = n;
1986 continue;
1987
1988 case 'q':
1989 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1990 if (extra == NULL)
1991 {
1992 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1993 extra->flags = 0;
1994 }
1995 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1996 extra->match_limit = n;
1997 continue;
1998
1999 #if !defined NODFA
2000 case 'R':
2001 options |= PCRE_DFA_RESTART;
2002 continue;
2003 #endif
2004
2005 case 'S':
2006 show_malloc = 1;
2007 continue;
2008
2009 case 'Z':
2010 options |= PCRE_NOTEOL;
2011 continue;
2012
2013 case '?':
2014 options |= PCRE_NO_UTF8_CHECK;
2015 continue;
2016
2017 case '<':
2018 {
2019 int x = check_newline(p, outfile);
2020 if (x == 0) goto NEXT_DATA;
2021 options |= x;
2022 while (*p++ != '>');
2023 }
2024 continue;
2025 }
2026 *q++ = c;
2027 }
2028 *q = 0;
2029 len = q - dbuffer;
2030
2031 /* Move the data to the end of the buffer so that a read over the end of
2032 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2033 we are using the POSIX interface, we must include the terminating zero. */
2034
2035 #if !defined NOPOSIX
2036 if (posix || do_posix)
2037 {
2038 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2039 bptr += buffer_size - len - 1;
2040 }
2041 else
2042 #endif
2043 {
2044 memmove(bptr + buffer_size - len, bptr, len);
2045 bptr += buffer_size - len;
2046 }
2047
2048 if ((all_use_dfa || use_dfa) && find_match_limit)
2049 {
2050 printf("**Match limit not relevant for DFA matching: ignored\n");
2051 find_match_limit = 0;
2052 }
2053
2054 /* Handle matching via the POSIX interface, which does not
2055 support timing or playing with the match limit or callout data. */
2056
2057 #if !defined NOPOSIX
2058 if (posix || do_posix)
2059 {
2060 int rc;
2061 int eflags = 0;
2062 regmatch_t *pmatch = NULL;
2063 if (use_size_offsets > 0)
2064 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2065 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2066 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2067
2068 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2069
2070 if (rc != 0)
2071 {
2072 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2073 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2074 }
2075 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2076 != 0)
2077 {
2078 fprintf(outfile, "Matched with REG_NOSUB\n");
2079 }
2080 else
2081 {
2082 size_t i;
2083 for (i = 0; i < (size_t)use_size_offsets; i++)
2084 {
2085 if (pmatch[i].rm_so >= 0)
2086 {
2087 fprintf(outfile, "%2d: ", (int)i);
2088 (void)pchars(dbuffer + pmatch[i].rm_so,
2089 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2090 fprintf(outfile, "\n");
2091 if (i == 0 && do_showrest)
2092 {
2093 fprintf(outfile, " 0+ ");
2094 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2095 outfile);
2096 fprintf(outfile, "\n");
2097 }
2098 }
2099 }
2100 }
2101 free(pmatch);
2102 }
2103
2104 /* Handle matching via the native interface - repeats for /g and /G */
2105
2106 else
2107 #endif /* !defined NOPOSIX */
2108
2109 for (;; gmatched++) /* Loop for /g or /G */
2110 {
2111 if (timeitm > 0)
2112 {
2113 register int i;
2114 clock_t time_taken;
2115 clock_t start_time = clock();
2116
2117 #if !defined NODFA
2118 if (all_use_dfa || use_dfa)
2119 {
2120 int workspace[1000];
2121 for (i = 0; i < timeitm; i++)
2122 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2123 options | g_notempty, use_offsets, use_size_offsets, workspace,
2124 sizeof(workspace)/sizeof(int));
2125 }
2126 else
2127 #endif
2128
2129 for (i = 0; i < timeitm; i++)
2130 count = pcre_exec(re, extra, (char *)bptr, len,
2131 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2132
2133 time_taken = clock() - start_time;
2134 fprintf(outfile, "Execute time %.4f milliseconds\n",
2135 (((double)time_taken * 1000.0) / (double)timeitm) /
2136 (double)CLOCKS_PER_SEC);
2137 }
2138
2139 /* If find_match_limit is set, we want to do repeated matches with
2140 varying limits in order to find the minimum value for the match limit and
2141 for the recursion limit. */
2142
2143 if (find_match_limit)
2144 {
2145 if (extra == NULL)
2146 {
2147 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2148 extra->flags = 0;
2149 }
2150
2151 (void)check_match_limit(re, extra, bptr, len, start_offset,
2152 options|g_notempty, use_offsets, use_size_offsets,
2153 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2154 PCRE_ERROR_MATCHLIMIT, "match()");
2155
2156 count = check_match_limit(re, extra, bptr, len, start_offset,
2157 options|g_notempty, use_offsets, use_size_offsets,
2158 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2159 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2160 }
2161
2162 /* If callout_data is set, use the interface with additional data */
2163
2164 else if (callout_data_set)
2165 {
2166 if (extra == NULL)
2167 {
2168 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2169 extra->flags = 0;
2170 }
2171 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2172 extra->callout_data = &callout_data;
2173 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2174 options | g_notempty, use_offsets, use_size_offsets);
2175 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2176 }
2177
2178 /* The normal case is just to do the match once, with the default
2179 value of match_limit. */
2180
2181 #if !defined NODFA
2182 else if (all_use_dfa || use_dfa)
2183 {
2184 int workspace[1000];
2185 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2186 options | g_notempty, use_offsets, use_size_offsets, workspace,
2187 sizeof(workspace)/sizeof(int));
2188 if (count == 0)
2189 {
2190 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2191 count = use_size_offsets/2;
2192 }
2193 }
2194 #endif
2195
2196 else
2197 {
2198 count = pcre_exec(re, extra, (char *)bptr, len,
2199 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2200 if (count == 0)
2201 {
2202 fprintf(outfile, "Matched, but too many substrings\n");
2203 count = use_size_offsets/3;
2204 }
2205 }
2206
2207 /* Matched */
2208
2209 if (count >= 0)
2210 {
2211 int i, maxcount;
2212
2213 #if !defined NODFA
2214 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2215 #endif
2216 maxcount = use_size_offsets/3;
2217
2218 /* This is a check against a lunatic return value. */
2219
2220 if (count > maxcount)
2221 {
2222 fprintf(outfile,
2223 "** PCRE error: returned count %d is too big for offset size %d\n",
2224 count, use_size_offsets);
2225 count = use_size_offsets/3;
2226 if (do_g || do_G)
2227 {
2228 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2229 do_g = do_G = FALSE; /* Break g/G loop */
2230 }
2231 }
2232
2233 for (i = 0; i < count * 2; i += 2)
2234 {
2235 if (use_offsets[i] < 0)
2236 fprintf(outfile, "%2d: <unset>\n", i/2);
2237 else
2238 {
2239 fprintf(outfile, "%2d: ", i/2);
2240 (void)pchars(bptr + use_offsets[i],
2241 use_offsets[i+1] - use_offsets[i], outfile);
2242 fprintf(outfile, "\n");
2243 if (i == 0)
2244 {
2245 if (do_showrest)
2246 {
2247 fprintf(outfile, " 0+ ");
2248 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2249 outfile);
2250 fprintf(outfile, "\n");
2251 }
2252 }
2253 }
2254 }
2255
2256 for (i = 0; i < 32; i++)
2257 {
2258 if ((copystrings & (1 << i)) != 0)
2259 {
2260 char copybuffer[256];
2261 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2262 i, copybuffer, sizeof(copybuffer));
2263 if (rc < 0)
2264 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2265 else
2266 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2267 }
2268 }
2269
2270 for (copynamesptr = copynames;
2271 *copynamesptr != 0;
2272 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2273 {
2274 char copybuffer[256];
2275 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2276 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2277 if (rc < 0)
2278 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2279 else
2280 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2281 }
2282
2283 for (i = 0; i < 32; i++)
2284 {
2285 if ((getstrings & (1 << i)) != 0)
2286 {
2287 const char *substring;
2288 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2289 i, &substring);
2290 if (rc < 0)
2291 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2292 else
2293 {
2294 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2295 pcre_free_substring(substring);
2296 }
2297 }
2298 }
2299
2300 for (getnamesptr = getnames;
2301 *getnamesptr != 0;
2302 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2303 {
2304 const char *substring;
2305 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2306 count, (char *)getnamesptr, &substring);
2307 if (rc < 0)
2308 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2309 else
2310 {
2311 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2312 pcre_free_substring(substring);
2313 }
2314 }
2315
2316 if (getlist)
2317 {
2318 const char **stringlist;
2319 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2320 &stringlist);
2321 if (rc < 0)
2322 fprintf(outfile, "get substring list failed %d\n", rc);
2323 else
2324 {
2325 for (i = 0; i < count; i++)
2326 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2327 if (stringlist[i] != NULL)
2328 fprintf(outfile, "string list not terminated by NULL\n");
2329 /* free((void *)stringlist); */
2330 pcre_free_substring_list(stringlist);
2331 }
2332 }
2333 }
2334
2335 /* There was a partial match */
2336
2337 else if (count == PCRE_ERROR_PARTIAL)
2338 {
2339 fprintf(outfile, "Partial match");
2340 #if !defined NODFA
2341 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2342 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2343 bptr + use_offsets[0]);
2344 #endif
2345 fprintf(outfile, "\n");
2346 break; /* Out of the /g loop */
2347 }
2348
2349 /* Failed to match. If this is a /g or /G loop and we previously set
2350 g_notempty after a null match, this is not necessarily the end. We want
2351 to advance the start offset, and continue. We won't be at the end of the
2352 string - that was checked before setting g_notempty.
2353
2354 Complication arises in the case when the newline option is "any" or
2355 "anycrlf". If the previous match was at the end of a line terminated by
2356 CRLF, an advance of one character just passes the \r, whereas we should
2357 prefer the longer newline sequence, as does the code in pcre_exec().
2358 Fudge the offset value to achieve this.
2359
2360 Otherwise, in the case of UTF-8 matching, the advance must be one
2361 character, not one byte. */
2362
2363 else
2364 {
2365 if (g_notempty != 0)
2366 {
2367 int onechar = 1;
2368 unsigned int obits = ((real_pcre *)re)->options;
2369 use_offsets[0] = start_offset;
2370 if ((obits & PCRE_NEWLINE_BITS) == 0)
2371 {
2372 int d;
2373 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2374 obits = (d == '\r')? PCRE_NEWLINE_CR :
2375 (d == '\n')? PCRE_NEWLINE_LF :
2376 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2377 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2378 (d == -1)? PCRE_NEWLINE_ANY : 0;
2379 }
2380 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2381 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2382 &&
2383 start_offset < len - 1 &&
2384 bptr[start_offset] == '\r' &&
2385 bptr[start_offset+1] == '\n')
2386 onechar++;
2387 else if (use_utf8)
2388 {
2389 while (start_offset + onechar < len)
2390 {
2391 int tb = bptr[start_offset+onechar];
2392 if (tb <= 127) break;
2393 tb &= 0xc0;
2394 if (tb != 0 && tb != 0xc0) onechar++;
2395 }
2396 }
2397 use_offsets[1] = start_offset + onechar;
2398 }
2399 else
2400 {
2401 if (count == PCRE_ERROR_NOMATCH)
2402 {
2403 if (gmatched == 0) fprintf(outfile, "No match\n");
2404 }
2405 else fprintf(outfile, "Error %d\n", count);
2406 break; /* Out of the /g loop */
2407 }
2408 }
2409
2410 /* If not /g or /G we are done */
2411
2412 if (!do_g && !do_G) break;
2413
2414 /* If we have matched an empty string, first check to see if we are at
2415 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2416 what Perl's /g options does. This turns out to be rather cunning. First
2417 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2418 same point. If this fails (picked up above) we advance to the next
2419 character. */
2420
2421 g_notempty = 0;
2422
2423 if (use_offsets[0] == use_offsets[1])
2424 {
2425 if (use_offsets[0] == len) break;
2426 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2427 }
2428
2429 /* For /g, update the start offset, leaving the rest alone */
2430
2431 if (do_g) start_offset = use_offsets[1];
2432
2433 /* For /G, update the pointer and length */
2434
2435 else
2436 {
2437 bptr += use_offsets[1];
2438 len -= use_offsets[1];
2439 }
2440 } /* End of loop for /g and /G */
2441
2442 NEXT_DATA: continue;
2443 } /* End of loop for data lines */
2444
2445 CONTINUE:
2446
2447 #if !defined NOPOSIX
2448 if (posix || do_posix) regfree(&preg);
2449 #endif
2450
2451 if (re != NULL) new_free(re);
2452 if (extra != NULL) new_free(extra);
2453 if (tables != NULL)
2454 {
2455 new_free((void *)tables);
2456 setlocale(LC_CTYPE, "C");
2457 locale_set = 0;
2458 }
2459 }
2460
2461 if (infile == stdin) fprintf(outfile, "\n");
2462
2463 EXIT:
2464
2465 if (infile != NULL && infile != stdin) fclose(infile);
2466 if (outfile != NULL && outfile != stdout) fclose(outfile);
2467
2468 free(buffer);
2469 free(dbuffer);
2470 free(pbuffer);
2471 free(offsets);
2472
2473 return yield;
2474 }
2475
2476 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12