/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 389 - (show annotations) (download)
Sun Mar 15 18:24:05 2009 UTC (5 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 72861 byte(s)
Add PCRE_NO_START_OPTIMIZE

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #define isatty _isatty /* This is what Windows calls them, I'm told */
75 #define fileno _fileno
76
77 #else
78 #include <sys/time.h> /* These two includes are needed */
79 #include <sys/resource.h> /* for setrlimit(). */
80 #define INPUT_MODE "rb"
81 #define OUTPUT_MODE "wb"
82 #endif
83
84
85 /* We have to include pcre_internal.h because we need the internal info for
86 displaying the results of pcre_study() and we also need to know about the
87 internal macros, structures, and other internal data values; pcretest has
88 "inside information" compared to a program that strictly follows the PCRE API.
89
90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92 appropriately for an application, not for building PCRE. */
93
94 #include "pcre.h"
95 #include "pcre_internal.h"
96
97 /* We need access to some of the data tables that PCRE uses. So as not to have
98 to keep two copies, we include the source file here, changing the names of the
99 external symbols to prevent clashes. */
100
101 #define _pcre_ucp_gentype ucp_gentype
102 #define _pcre_utf8_table1 utf8_table1
103 #define _pcre_utf8_table1_size utf8_table1_size
104 #define _pcre_utf8_table2 utf8_table2
105 #define _pcre_utf8_table3 utf8_table3
106 #define _pcre_utf8_table4 utf8_table4
107 #define _pcre_utt utt
108 #define _pcre_utt_size utt_size
109 #define _pcre_utt_names utt_names
110 #define _pcre_OP_lengths OP_lengths
111
112 #include "pcre_tables.c"
113
114 /* We also need the pcre_printint() function for printing out compiled
115 patterns. This function is in a separate file so that it can be included in
116 pcre_compile.c when that module is compiled with debugging enabled.
117
118 The definition of the macro PRINTABLE, which determines whether to print an
119 output character as-is or as a hex value when showing compiled patterns, is
120 contained in this file. We uses it here also, in cases when the locale has not
121 been explicitly changed, so as to get consistent output from systems that
122 differ in their output from isprint() even in the "C" locale. */
123
124 #include "pcre_printint.src"
125
126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127
128
129 /* It is possible to compile this test program without including support for
130 testing the POSIX interface, though this is not available via the standard
131 Makefile. */
132
133 #if !defined NOPOSIX
134 #include "pcreposix.h"
135 #endif
136
137 /* It is also possible, for the benefit of the version currently imported into
138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141 UTF8 support if PCRE is built without it. */
142
143 #ifndef SUPPORT_UTF8
144 #ifndef NOUTF8
145 #define NOUTF8
146 #endif
147 #endif
148
149
150 /* Other parameters */
151
152 #ifndef CLOCKS_PER_SEC
153 #ifdef CLK_TCK
154 #define CLOCKS_PER_SEC CLK_TCK
155 #else
156 #define CLOCKS_PER_SEC 100
157 #endif
158 #endif
159
160 /* This is the default loop count for timing. */
161
162 #define LOOPREPEAT 500000
163
164 /* Static variables */
165
166 static FILE *outfile;
167 static int log_store = 0;
168 static int callout_count;
169 static int callout_extra;
170 static int callout_fail_count;
171 static int callout_fail_id;
172 static int debug_lengths;
173 static int first_callout;
174 static int locale_set = 0;
175 static int show_malloc;
176 static int use_utf8;
177 static size_t gotten_store;
178
179 /* The buffers grow automatically if very long input lines are encountered. */
180
181 static int buffer_size = 50000;
182 static uschar *buffer = NULL;
183 static uschar *dbuffer = NULL;
184 static uschar *pbuffer = NULL;
185
186
187
188 /*************************************************
189 * Read or extend an input line *
190 *************************************************/
191
192 /* Input lines are read into buffer, but both patterns and data lines can be
193 continued over multiple input lines. In addition, if the buffer fills up, we
194 want to automatically expand it so as to be able to handle extremely large
195 lines that are needed for certain stress tests. When the input buffer is
196 expanded, the other two buffers must also be expanded likewise, and the
197 contents of pbuffer, which are a copy of the input for callouts, must be
198 preserved (for when expansion happens for a data line). This is not the most
199 optimal way of handling this, but hey, this is just a test program!
200
201 Arguments:
202 f the file to read
203 start where in buffer to start (this *must* be within buffer)
204 prompt for stdin or readline()
205
206 Returns: pointer to the start of new data
207 could be a copy of start, or could be moved
208 NULL if no data read and EOF reached
209 */
210
211 static uschar *
212 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 {
214 uschar *here = start;
215
216 for (;;)
217 {
218 int rlen = buffer_size - (here - buffer);
219
220 if (rlen > 1000)
221 {
222 int dlen;
223
224 /* If libreadline support is required, use readline() to read a line if the
225 input is a terminal. Note that readline() removes the trailing newline, so
226 we must put it back again, to be compatible with fgets(). */
227
228 #ifdef SUPPORT_LIBREADLINE
229 if (isatty(fileno(f)))
230 {
231 size_t len;
232 char *s = readline(prompt);
233 if (s == NULL) return (here == start)? NULL : start;
234 len = strlen(s);
235 if (len > 0) add_history(s);
236 if (len > rlen - 1) len = rlen - 1;
237 memcpy(here, s, len);
238 here[len] = '\n';
239 here[len+1] = 0;
240 free(s);
241 }
242 else
243 #endif
244
245 /* Read the next line by normal means, prompting if the file is stdin. */
246
247 {
248 if (f == stdin) printf(prompt);
249 if (fgets((char *)here, rlen, f) == NULL)
250 return (here == start)? NULL : start;
251 }
252
253 dlen = (int)strlen((char *)here);
254 if (dlen > 0 && here[dlen - 1] == '\n') return start;
255 here += dlen;
256 }
257
258 else
259 {
260 int new_buffer_size = 2*buffer_size;
261 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264
265 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266 {
267 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268 exit(1);
269 }
270
271 memcpy(new_buffer, buffer, buffer_size);
272 memcpy(new_pbuffer, pbuffer, buffer_size);
273
274 buffer_size = new_buffer_size;
275
276 start = new_buffer + (start - buffer);
277 here = new_buffer + (here - buffer);
278
279 free(buffer);
280 free(dbuffer);
281 free(pbuffer);
282
283 buffer = new_buffer;
284 dbuffer = new_dbuffer;
285 pbuffer = new_pbuffer;
286 }
287 }
288
289 return NULL; /* Control never gets here */
290 }
291
292
293
294
295
296
297
298 /*************************************************
299 * Read number from string *
300 *************************************************/
301
302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303 around with conditional compilation, just do the job by hand. It is only used
304 for unpicking arguments, so just keep it simple.
305
306 Arguments:
307 str string to be converted
308 endptr where to put the end pointer
309
310 Returns: the unsigned long
311 */
312
313 static int
314 get_value(unsigned char *str, unsigned char **endptr)
315 {
316 int result = 0;
317 while(*str != 0 && isspace(*str)) str++;
318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319 *endptr = str;
320 return(result);
321 }
322
323
324
325
326 /*************************************************
327 * Convert UTF-8 string to value *
328 *************************************************/
329
330 /* This function takes one or more bytes that represents a UTF-8 character,
331 and returns the value of the character.
332
333 Argument:
334 utf8bytes a pointer to the byte vector
335 vptr a pointer to an int to receive the value
336
337 Returns: > 0 => the number of bytes consumed
338 -6 to 0 => malformed UTF-8 character at offset = (-return)
339 */
340
341 #if !defined NOUTF8
342
343 static int
344 utf82ord(unsigned char *utf8bytes, int *vptr)
345 {
346 int c = *utf8bytes++;
347 int d = c;
348 int i, j, s;
349
350 for (i = -1; i < 6; i++) /* i is number of additional bytes */
351 {
352 if ((d & 0x80) == 0) break;
353 d <<= 1;
354 }
355
356 if (i == -1) { *vptr = c; return 1; } /* ascii character */
357 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358
359 /* i now has a value in the range 1-5 */
360
361 s = 6*i;
362 d = (c & utf8_table3[i]) << s;
363
364 for (j = 0; j < i; j++)
365 {
366 c = *utf8bytes++;
367 if ((c & 0xc0) != 0x80) return -(j+1);
368 s -= 6;
369 d |= (c & 0x3f) << s;
370 }
371
372 /* Check that encoding was the correct unique one */
373
374 for (j = 0; j < utf8_table1_size; j++)
375 if (d <= utf8_table1[j]) break;
376 if (j != i) return -(i+1);
377
378 /* Valid value */
379
380 *vptr = d;
381 return i+1;
382 }
383
384 #endif
385
386
387
388 /*************************************************
389 * Convert character value to UTF-8 *
390 *************************************************/
391
392 /* This function takes an integer value in the range 0 - 0x7fffffff
393 and encodes it as a UTF-8 character in 0 to 6 bytes.
394
395 Arguments:
396 cvalue the character value
397 utf8bytes pointer to buffer for result - at least 6 bytes long
398
399 Returns: number of characters placed in the buffer
400 */
401
402 #if !defined NOUTF8
403
404 static int
405 ord2utf8(int cvalue, uschar *utf8bytes)
406 {
407 register int i, j;
408 for (i = 0; i < utf8_table1_size; i++)
409 if (cvalue <= utf8_table1[i]) break;
410 utf8bytes += i;
411 for (j = i; j > 0; j--)
412 {
413 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 cvalue >>= 6;
415 }
416 *utf8bytes = utf8_table2[i] | cvalue;
417 return i + 1;
418 }
419
420 #endif
421
422
423
424 /*************************************************
425 * Print character string *
426 *************************************************/
427
428 /* Character string printing function. Must handle UTF-8 strings in utf8
429 mode. Yields number of characters printed. If handed a NULL file, just counts
430 chars without printing. */
431
432 static int pchars(unsigned char *p, int length, FILE *f)
433 {
434 int c = 0;
435 int yield = 0;
436
437 while (length-- > 0)
438 {
439 #if !defined NOUTF8
440 if (use_utf8)
441 {
442 int rc = utf82ord(p, &c);
443
444 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445 {
446 length -= rc - 1;
447 p += rc;
448 if (PRINTHEX(c))
449 {
450 if (f != NULL) fprintf(f, "%c", c);
451 yield++;
452 }
453 else
454 {
455 int n = 4;
456 if (f != NULL) fprintf(f, "\\x{%02x}", c);
457 yield += (n <= 0x000000ff)? 2 :
458 (n <= 0x00000fff)? 3 :
459 (n <= 0x0000ffff)? 4 :
460 (n <= 0x000fffff)? 5 : 6;
461 }
462 continue;
463 }
464 }
465 #endif
466
467 /* Not UTF-8, or malformed UTF-8 */
468
469 c = *p++;
470 if (PRINTHEX(c))
471 {
472 if (f != NULL) fprintf(f, "%c", c);
473 yield++;
474 }
475 else
476 {
477 if (f != NULL) fprintf(f, "\\x%02x", c);
478 yield += 4;
479 }
480 }
481
482 return yield;
483 }
484
485
486
487 /*************************************************
488 * Callout function *
489 *************************************************/
490
491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492 the match. Yield zero unless more callouts than the fail count, or the callout
493 data is not zero. */
494
495 static int callout(pcre_callout_block *cb)
496 {
497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 int i, pre_start, post_start, subject_length;
499
500 if (callout_extra)
501 {
502 fprintf(f, "Callout %d: last capture = %d\n",
503 cb->callout_number, cb->capture_last);
504
505 for (i = 0; i < cb->capture_top * 2; i += 2)
506 {
507 if (cb->offset_vector[i] < 0)
508 fprintf(f, "%2d: <unset>\n", i/2);
509 else
510 {
511 fprintf(f, "%2d: ", i/2);
512 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513 cb->offset_vector[i+1] - cb->offset_vector[i], f);
514 fprintf(f, "\n");
515 }
516 }
517 }
518
519 /* Re-print the subject in canonical form, the first time or if giving full
520 datails. On subsequent calls in the same match, we use pchars just to find the
521 printed lengths of the substrings. */
522
523 if (f != NULL) fprintf(f, "--->");
524
525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527 cb->current_position - cb->start_match, f);
528
529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530
531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532 cb->subject_length - cb->current_position, f);
533
534 if (f != NULL) fprintf(f, "\n");
535
536 /* Always print appropriate indicators, with callout number if not already
537 shown. For automatic callouts, show the pattern offset. */
538
539 if (cb->callout_number == 255)
540 {
541 fprintf(outfile, "%+3d ", cb->pattern_position);
542 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543 }
544 else
545 {
546 if (callout_extra) fprintf(outfile, " ");
547 else fprintf(outfile, "%3d ", cb->callout_number);
548 }
549
550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551 fprintf(outfile, "^");
552
553 if (post_start > 0)
554 {
555 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556 fprintf(outfile, "^");
557 }
558
559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560 fprintf(outfile, " ");
561
562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563 pbuffer + cb->pattern_position);
564
565 fprintf(outfile, "\n");
566 first_callout = 0;
567
568 if (cb->callout_data != NULL)
569 {
570 int callout_data = *((int *)(cb->callout_data));
571 if (callout_data != 0)
572 {
573 fprintf(outfile, "Callout data = %d\n", callout_data);
574 return callout_data;
575 }
576 }
577
578 return (cb->callout_number != callout_fail_id)? 0 :
579 (++callout_count >= callout_fail_count)? 1 : 0;
580 }
581
582
583 /*************************************************
584 * Local malloc functions *
585 *************************************************/
586
587 /* Alternative malloc function, to test functionality and show the size of the
588 compiled re. */
589
590 static void *new_malloc(size_t size)
591 {
592 void *block = malloc(size);
593 gotten_store = size;
594 if (show_malloc)
595 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 return block;
597 }
598
599 static void new_free(void *block)
600 {
601 if (show_malloc)
602 fprintf(outfile, "free %p\n", block);
603 free(block);
604 }
605
606
607 /* For recursion malloc/free, to test stacking calls */
608
609 static void *stack_malloc(size_t size)
610 {
611 void *block = malloc(size);
612 if (show_malloc)
613 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 return block;
615 }
616
617 static void stack_free(void *block)
618 {
619 if (show_malloc)
620 fprintf(outfile, "stack_free %p\n", block);
621 free(block);
622 }
623
624
625 /*************************************************
626 * Call pcre_fullinfo() *
627 *************************************************/
628
629 /* Get one piece of information from the pcre_fullinfo() function */
630
631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632 {
633 int rc;
634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636 }
637
638
639
640 /*************************************************
641 * Byte flipping function *
642 *************************************************/
643
644 static unsigned long int
645 byteflip(unsigned long int value, int n)
646 {
647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648 return ((value & 0x000000ff) << 24) |
649 ((value & 0x0000ff00) << 8) |
650 ((value & 0x00ff0000) >> 8) |
651 ((value & 0xff000000) >> 24);
652 }
653
654
655
656
657 /*************************************************
658 * Check match or recursion limit *
659 *************************************************/
660
661 static int
662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663 int start_offset, int options, int *use_offsets, int use_size_offsets,
664 int flag, unsigned long int *limit, int errnumber, const char *msg)
665 {
666 int count;
667 int min = 0;
668 int mid = 64;
669 int max = -1;
670
671 extra->flags |= flag;
672
673 for (;;)
674 {
675 *limit = mid;
676
677 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678 use_offsets, use_size_offsets);
679
680 if (count == errnumber)
681 {
682 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683 min = mid;
684 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685 }
686
687 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688 count == PCRE_ERROR_PARTIAL)
689 {
690 if (mid == min + 1)
691 {
692 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693 break;
694 }
695 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696 max = mid;
697 mid = (min + mid)/2;
698 }
699 else break; /* Some other error */
700 }
701
702 extra->flags &= ~flag;
703 return count;
704 }
705
706
707
708 /*************************************************
709 * Case-independent strncmp() function *
710 *************************************************/
711
712 /*
713 Arguments:
714 s first string
715 t second string
716 n number of characters to compare
717
718 Returns: < 0, = 0, or > 0, according to the comparison
719 */
720
721 static int
722 strncmpic(uschar *s, uschar *t, int n)
723 {
724 while (n--)
725 {
726 int c = tolower(*s++) - tolower(*t++);
727 if (c) return c;
728 }
729 return 0;
730 }
731
732
733
734 /*************************************************
735 * Check newline indicator *
736 *************************************************/
737
738 /* This is used both at compile and run-time to check for <xxx> escapes, where
739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740 no match.
741
742 Arguments:
743 p points after the leading '<'
744 f file for error message
745
746 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747 */
748
749 static int
750 check_newline(uschar *p, FILE *f)
751 {
752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 fprintf(f, "Unknown newline type at: <%s\n", p);
760 return 0;
761 }
762
763
764
765 /*************************************************
766 * Usage function *
767 *************************************************/
768
769 static void
770 usage(void)
771 {
772 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773 printf("Input and output default to stdin and stdout.\n");
774 #ifdef SUPPORT_LIBREADLINE
775 printf("If input is a terminal, readline() is used to read from it.\n");
776 #else
777 printf("This version of pcretest is not linked with readline().\n");
778 #endif
779 printf("\nOptions:\n");
780 printf(" -b show compiled code (bytecode)\n");
781 printf(" -C show PCRE compile-time options and exit\n");
782 printf(" -d debug: show compiled code and information (-b and -i)\n");
783 #if !defined NODFA
784 printf(" -dfa force DFA matching for all subjects\n");
785 #endif
786 printf(" -help show usage information\n");
787 printf(" -i show information about compiled patterns\n"
788 " -M find MATCH_LIMIT minimum for each subject\n"
789 " -m output memory used information\n"
790 " -o <n> set size of offsets vector to <n>\n");
791 #if !defined NOPOSIX
792 printf(" -p use POSIX interface\n");
793 #endif
794 printf(" -q quiet: do not output PCRE version number at start\n");
795 printf(" -S <n> set stack size to <n> megabytes\n");
796 printf(" -s output store (memory) used information\n"
797 " -t time compilation and execution\n");
798 printf(" -t <n> time compilation and execution, repeating <n> times\n");
799 printf(" -tm time execution (matching) only\n");
800 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
801 }
802
803
804
805 /*************************************************
806 * Main Program *
807 *************************************************/
808
809 /* Read lines from named file or stdin and write to named file or stdout; lines
810 consist of a regular expression, in delimiters and optionally followed by
811 options, followed by a set of test data, terminated by an empty line. */
812
813 int main(int argc, char **argv)
814 {
815 FILE *infile = stdin;
816 int options = 0;
817 int study_options = 0;
818 int default_find_match_limit = FALSE;
819 int op = 1;
820 int timeit = 0;
821 int timeitm = 0;
822 int showinfo = 0;
823 int showstore = 0;
824 int quiet = 0;
825 int size_offsets = 45;
826 int size_offsets_max;
827 int *offsets = NULL;
828 #if !defined NOPOSIX
829 int posix = 0;
830 #endif
831 int debug = 0;
832 int done = 0;
833 int all_use_dfa = 0;
834 int yield = 0;
835 int stack_size;
836
837 /* These vectors store, end-to-end, a list of captured substring names. Assume
838 that 1024 is plenty long enough for the few names we'll be testing. */
839
840 uschar copynames[1024];
841 uschar getnames[1024];
842
843 uschar *copynamesptr;
844 uschar *getnamesptr;
845
846 /* Get buffers from malloc() so that Electric Fence will check their misuse
847 when I am debugging. They grow automatically when very long lines are read. */
848
849 buffer = (unsigned char *)malloc(buffer_size);
850 dbuffer = (unsigned char *)malloc(buffer_size);
851 pbuffer = (unsigned char *)malloc(buffer_size);
852
853 /* The outfile variable is static so that new_malloc can use it. */
854
855 outfile = stdout;
856
857 /* The following _setmode() stuff is some Windows magic that tells its runtime
858 library to translate CRLF into a single LF character. At least, that's what
859 I've been told: never having used Windows I take this all on trust. Originally
860 it set 0x8000, but then I was advised that _O_BINARY was better. */
861
862 #if defined(_WIN32) || defined(WIN32)
863 _setmode( _fileno( stdout ), _O_BINARY );
864 #endif
865
866 /* Scan options */
867
868 while (argc > 1 && argv[op][0] == '-')
869 {
870 unsigned char *endptr;
871
872 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873 showstore = 1;
874 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875 else if (strcmp(argv[op], "-b") == 0) debug = 1;
876 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879 #if !defined NODFA
880 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881 #endif
882 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884 *endptr == 0))
885 {
886 op++;
887 argc--;
888 }
889 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890 {
891 int both = argv[op][2] == 0;
892 int temp;
893 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894 *endptr == 0))
895 {
896 timeitm = temp;
897 op++;
898 argc--;
899 }
900 else timeitm = LOOPREPEAT;
901 if (both) timeit = timeitm;
902 }
903 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905 *endptr == 0))
906 {
907 #if defined(_WIN32) || defined(WIN32)
908 printf("PCRE: -S not supported on this OS\n");
909 exit(1);
910 #else
911 int rc;
912 struct rlimit rlim;
913 getrlimit(RLIMIT_STACK, &rlim);
914 rlim.rlim_cur = stack_size * 1024 * 1024;
915 rc = setrlimit(RLIMIT_STACK, &rlim);
916 if (rc != 0)
917 {
918 printf("PCRE: setrlimit() failed with error %d\n", rc);
919 exit(1);
920 }
921 op++;
922 argc--;
923 #endif
924 }
925 #if !defined NOPOSIX
926 else if (strcmp(argv[op], "-p") == 0) posix = 1;
927 #endif
928 else if (strcmp(argv[op], "-C") == 0)
929 {
930 int rc;
931 unsigned long int lrc;
932 printf("PCRE version %s\n", pcre_version());
933 printf("Compiled with\n");
934 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935 printf(" %sUTF-8 support\n", rc? "" : "No ");
936 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937 printf(" %sUnicode properties support\n", rc? "" : "No ");
938 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
940 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
941 (rc == -2)? "ANYCRLF" :
942 (rc == -1)? "ANY" : "???");
943 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
944 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
945 "all Unicode newlines");
946 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
947 printf(" Internal link size = %d\n", rc);
948 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
949 printf(" POSIX malloc threshold = %d\n", rc);
950 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
951 printf(" Default match limit = %ld\n", lrc);
952 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
953 printf(" Default recursion depth limit = %ld\n", lrc);
954 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
955 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
956 goto EXIT;
957 }
958 else if (strcmp(argv[op], "-help") == 0 ||
959 strcmp(argv[op], "--help") == 0)
960 {
961 usage();
962 goto EXIT;
963 }
964 else
965 {
966 printf("** Unknown or malformed option %s\n", argv[op]);
967 usage();
968 yield = 1;
969 goto EXIT;
970 }
971 op++;
972 argc--;
973 }
974
975 /* Get the store for the offsets vector, and remember what it was */
976
977 size_offsets_max = size_offsets;
978 offsets = (int *)malloc(size_offsets_max * sizeof(int));
979 if (offsets == NULL)
980 {
981 printf("** Failed to get %d bytes of memory for offsets vector\n",
982 (int)(size_offsets_max * sizeof(int)));
983 yield = 1;
984 goto EXIT;
985 }
986
987 /* Sort out the input and output files */
988
989 if (argc > 1)
990 {
991 infile = fopen(argv[op], INPUT_MODE);
992 if (infile == NULL)
993 {
994 printf("** Failed to open %s\n", argv[op]);
995 yield = 1;
996 goto EXIT;
997 }
998 }
999
1000 if (argc > 2)
1001 {
1002 outfile = fopen(argv[op+1], OUTPUT_MODE);
1003 if (outfile == NULL)
1004 {
1005 printf("** Failed to open %s\n", argv[op+1]);
1006 yield = 1;
1007 goto EXIT;
1008 }
1009 }
1010
1011 /* Set alternative malloc function */
1012
1013 pcre_malloc = new_malloc;
1014 pcre_free = new_free;
1015 pcre_stack_malloc = stack_malloc;
1016 pcre_stack_free = stack_free;
1017
1018 /* Heading line unless quiet, then prompt for first regex if stdin */
1019
1020 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1021
1022 /* Main loop */
1023
1024 while (!done)
1025 {
1026 pcre *re = NULL;
1027 pcre_extra *extra = NULL;
1028
1029 #if !defined NOPOSIX /* There are still compilers that require no indent */
1030 regex_t preg;
1031 int do_posix = 0;
1032 #endif
1033
1034 const char *error;
1035 unsigned char *p, *pp, *ppp;
1036 unsigned char *to_file = NULL;
1037 const unsigned char *tables = NULL;
1038 unsigned long int true_size, true_study_size = 0;
1039 size_t size, regex_gotten_store;
1040 int do_study = 0;
1041 int do_debug = debug;
1042 int do_G = 0;
1043 int do_g = 0;
1044 int do_showinfo = showinfo;
1045 int do_showrest = 0;
1046 int do_flip = 0;
1047 int erroroffset, len, delimiter, poffset;
1048
1049 use_utf8 = 0;
1050 debug_lengths = 1;
1051
1052 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1053 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1054 fflush(outfile);
1055
1056 p = buffer;
1057 while (isspace(*p)) p++;
1058 if (*p == 0) continue;
1059
1060 /* See if the pattern is to be loaded pre-compiled from a file. */
1061
1062 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1063 {
1064 unsigned long int magic, get_options;
1065 uschar sbuf[8];
1066 FILE *f;
1067
1068 p++;
1069 pp = p + (int)strlen((char *)p);
1070 while (isspace(pp[-1])) pp--;
1071 *pp = 0;
1072
1073 f = fopen((char *)p, "rb");
1074 if (f == NULL)
1075 {
1076 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1077 continue;
1078 }
1079
1080 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1081
1082 true_size =
1083 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1084 true_study_size =
1085 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1086
1087 re = (real_pcre *)new_malloc(true_size);
1088 regex_gotten_store = gotten_store;
1089
1090 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1091
1092 magic = ((real_pcre *)re)->magic_number;
1093 if (magic != MAGIC_NUMBER)
1094 {
1095 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1096 {
1097 do_flip = 1;
1098 }
1099 else
1100 {
1101 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1102 fclose(f);
1103 continue;
1104 }
1105 }
1106
1107 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1108 do_flip? " (byte-inverted)" : "", p);
1109
1110 /* Need to know if UTF-8 for printing data strings */
1111
1112 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1113 use_utf8 = (get_options & PCRE_UTF8) != 0;
1114
1115 /* Now see if there is any following study data */
1116
1117 if (true_study_size != 0)
1118 {
1119 pcre_study_data *psd;
1120
1121 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1122 extra->flags = PCRE_EXTRA_STUDY_DATA;
1123
1124 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1125 extra->study_data = psd;
1126
1127 if (fread(psd, 1, true_study_size, f) != true_study_size)
1128 {
1129 FAIL_READ:
1130 fprintf(outfile, "Failed to read data from %s\n", p);
1131 if (extra != NULL) new_free(extra);
1132 if (re != NULL) new_free(re);
1133 fclose(f);
1134 continue;
1135 }
1136 fprintf(outfile, "Study data loaded from %s\n", p);
1137 do_study = 1; /* To get the data output if requested */
1138 }
1139 else fprintf(outfile, "No study data\n");
1140
1141 fclose(f);
1142 goto SHOW_INFO;
1143 }
1144
1145 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1146 the pattern; if is isn't complete, read more. */
1147
1148 delimiter = *p++;
1149
1150 if (isalnum(delimiter) || delimiter == '\\')
1151 {
1152 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1153 goto SKIP_DATA;
1154 }
1155
1156 pp = p;
1157 poffset = p - buffer;
1158
1159 for(;;)
1160 {
1161 while (*pp != 0)
1162 {
1163 if (*pp == '\\' && pp[1] != 0) pp++;
1164 else if (*pp == delimiter) break;
1165 pp++;
1166 }
1167 if (*pp != 0) break;
1168 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1169 {
1170 fprintf(outfile, "** Unexpected EOF\n");
1171 done = 1;
1172 goto CONTINUE;
1173 }
1174 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1175 }
1176
1177 /* The buffer may have moved while being extended; reset the start of data
1178 pointer to the correct relative point in the buffer. */
1179
1180 p = buffer + poffset;
1181
1182 /* If the first character after the delimiter is backslash, make
1183 the pattern end with backslash. This is purely to provide a way
1184 of testing for the error message when a pattern ends with backslash. */
1185
1186 if (pp[1] == '\\') *pp++ = '\\';
1187
1188 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1189 for callouts. */
1190
1191 *pp++ = 0;
1192 strcpy((char *)pbuffer, (char *)p);
1193
1194 /* Look for options after final delimiter */
1195
1196 options = 0;
1197 study_options = 0;
1198 log_store = showstore; /* default from command line */
1199
1200 while (*pp != 0)
1201 {
1202 switch (*pp++)
1203 {
1204 case 'f': options |= PCRE_FIRSTLINE; break;
1205 case 'g': do_g = 1; break;
1206 case 'i': options |= PCRE_CASELESS; break;
1207 case 'm': options |= PCRE_MULTILINE; break;
1208 case 's': options |= PCRE_DOTALL; break;
1209 case 'x': options |= PCRE_EXTENDED; break;
1210
1211 case '+': do_showrest = 1; break;
1212 case 'A': options |= PCRE_ANCHORED; break;
1213 case 'B': do_debug = 1; break;
1214 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1215 case 'D': do_debug = do_showinfo = 1; break;
1216 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1217 case 'F': do_flip = 1; break;
1218 case 'G': do_G = 1; break;
1219 case 'I': do_showinfo = 1; break;
1220 case 'J': options |= PCRE_DUPNAMES; break;
1221 case 'M': log_store = 1; break;
1222 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1223
1224 #if !defined NOPOSIX
1225 case 'P': do_posix = 1; break;
1226 #endif
1227
1228 case 'S': do_study = 1; break;
1229 case 'U': options |= PCRE_UNGREEDY; break;
1230 case 'X': options |= PCRE_EXTRA; break;
1231 case 'Z': debug_lengths = 0; break;
1232 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1233 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1234
1235 case 'L':
1236 ppp = pp;
1237 /* The '\r' test here is so that it works on Windows. */
1238 /* The '0' test is just in case this is an unterminated line. */
1239 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1240 *ppp = 0;
1241 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1242 {
1243 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1244 goto SKIP_DATA;
1245 }
1246 locale_set = 1;
1247 tables = pcre_maketables();
1248 pp = ppp;
1249 break;
1250
1251 case '>':
1252 to_file = pp;
1253 while (*pp != 0) pp++;
1254 while (isspace(pp[-1])) pp--;
1255 *pp = 0;
1256 break;
1257
1258 case '<':
1259 {
1260 if (strncmp((char *)pp, "JS>", 3) == 0)
1261 {
1262 options |= PCRE_JAVASCRIPT_COMPAT;
1263 pp += 3;
1264 }
1265 else
1266 {
1267 int x = check_newline(pp, outfile);
1268 if (x == 0) goto SKIP_DATA;
1269 options |= x;
1270 while (*pp++ != '>');
1271 }
1272 }
1273 break;
1274
1275 case '\r': /* So that it works in Windows */
1276 case '\n':
1277 case ' ':
1278 break;
1279
1280 default:
1281 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1282 goto SKIP_DATA;
1283 }
1284 }
1285
1286 /* Handle compiling via the POSIX interface, which doesn't support the
1287 timing, showing, or debugging options, nor the ability to pass over
1288 local character tables. */
1289
1290 #if !defined NOPOSIX
1291 if (posix || do_posix)
1292 {
1293 int rc;
1294 int cflags = 0;
1295
1296 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1297 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1298 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1299 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1300 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1301
1302 rc = regcomp(&preg, (char *)p, cflags);
1303
1304 /* Compilation failed; go back for another re, skipping to blank line
1305 if non-interactive. */
1306
1307 if (rc != 0)
1308 {
1309 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1310 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1311 goto SKIP_DATA;
1312 }
1313 }
1314
1315 /* Handle compiling via the native interface */
1316
1317 else
1318 #endif /* !defined NOPOSIX */
1319
1320 {
1321 if (timeit > 0)
1322 {
1323 register int i;
1324 clock_t time_taken;
1325 clock_t start_time = clock();
1326 for (i = 0; i < timeit; i++)
1327 {
1328 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1329 if (re != NULL) free(re);
1330 }
1331 time_taken = clock() - start_time;
1332 fprintf(outfile, "Compile time %.4f milliseconds\n",
1333 (((double)time_taken * 1000.0) / (double)timeit) /
1334 (double)CLOCKS_PER_SEC);
1335 }
1336
1337 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1338
1339 /* Compilation failed; go back for another re, skipping to blank line
1340 if non-interactive. */
1341
1342 if (re == NULL)
1343 {
1344 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1345 SKIP_DATA:
1346 if (infile != stdin)
1347 {
1348 for (;;)
1349 {
1350 if (extend_inputline(infile, buffer, NULL) == NULL)
1351 {
1352 done = 1;
1353 goto CONTINUE;
1354 }
1355 len = (int)strlen((char *)buffer);
1356 while (len > 0 && isspace(buffer[len-1])) len--;
1357 if (len == 0) break;
1358 }
1359 fprintf(outfile, "\n");
1360 }
1361 goto CONTINUE;
1362 }
1363
1364 /* Compilation succeeded; print data if required. There are now two
1365 info-returning functions. The old one has a limited interface and
1366 returns only limited data. Check that it agrees with the newer one. */
1367
1368 if (log_store)
1369 fprintf(outfile, "Memory allocation (code space): %d\n",
1370 (int)(gotten_store -
1371 sizeof(real_pcre) -
1372 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1373
1374 /* Extract the size for possible writing before possibly flipping it,
1375 and remember the store that was got. */
1376
1377 true_size = ((real_pcre *)re)->size;
1378 regex_gotten_store = gotten_store;
1379
1380 /* If /S was present, study the regexp to generate additional info to
1381 help with the matching. */
1382
1383 if (do_study)
1384 {
1385 if (timeit > 0)
1386 {
1387 register int i;
1388 clock_t time_taken;
1389 clock_t start_time = clock();
1390 for (i = 0; i < timeit; i++)
1391 extra = pcre_study(re, study_options, &error);
1392 time_taken = clock() - start_time;
1393 if (extra != NULL) free(extra);
1394 fprintf(outfile, " Study time %.4f milliseconds\n",
1395 (((double)time_taken * 1000.0) / (double)timeit) /
1396 (double)CLOCKS_PER_SEC);
1397 }
1398 extra = pcre_study(re, study_options, &error);
1399 if (error != NULL)
1400 fprintf(outfile, "Failed to study: %s\n", error);
1401 else if (extra != NULL)
1402 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1403 }
1404
1405 /* If the 'F' option was present, we flip the bytes of all the integer
1406 fields in the regex data block and the study block. This is to make it
1407 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1408 compiled on a different architecture. */
1409
1410 if (do_flip)
1411 {
1412 real_pcre *rre = (real_pcre *)re;
1413 rre->magic_number =
1414 byteflip(rre->magic_number, sizeof(rre->magic_number));
1415 rre->size = byteflip(rre->size, sizeof(rre->size));
1416 rre->options = byteflip(rre->options, sizeof(rre->options));
1417 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1418 rre->top_bracket =
1419 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1420 rre->top_backref =
1421 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1422 rre->first_byte =
1423 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1424 rre->req_byte =
1425 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1426 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1427 sizeof(rre->name_table_offset));
1428 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1429 sizeof(rre->name_entry_size));
1430 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1431 sizeof(rre->name_count));
1432
1433 if (extra != NULL)
1434 {
1435 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1436 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1437 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1438 }
1439 }
1440
1441 /* Extract information from the compiled data if required */
1442
1443 SHOW_INFO:
1444
1445 if (do_debug)
1446 {
1447 fprintf(outfile, "------------------------------------------------------------------\n");
1448 pcre_printint(re, outfile, debug_lengths);
1449 }
1450
1451 if (do_showinfo)
1452 {
1453 unsigned long int get_options, all_options;
1454 #if !defined NOINFOCHECK
1455 int old_first_char, old_options, old_count;
1456 #endif
1457 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1458 hascrorlf;
1459 int nameentrysize, namecount;
1460 const uschar *nametable;
1461
1462 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1463 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1464 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1465 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1466 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1467 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1468 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1469 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1470 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1471 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1472 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1473 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1474
1475 #if !defined NOINFOCHECK
1476 old_count = pcre_info(re, &old_options, &old_first_char);
1477 if (count < 0) fprintf(outfile,
1478 "Error %d from pcre_info()\n", count);
1479 else
1480 {
1481 if (old_count != count) fprintf(outfile,
1482 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1483 old_count);
1484
1485 if (old_first_char != first_char) fprintf(outfile,
1486 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1487 first_char, old_first_char);
1488
1489 if (old_options != (int)get_options) fprintf(outfile,
1490 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1491 get_options, old_options);
1492 }
1493 #endif
1494
1495 if (size != regex_gotten_store) fprintf(outfile,
1496 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1497 (int)size, (int)regex_gotten_store);
1498
1499 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1500 if (backrefmax > 0)
1501 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1502
1503 if (namecount > 0)
1504 {
1505 fprintf(outfile, "Named capturing subpatterns:\n");
1506 while (namecount-- > 0)
1507 {
1508 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1509 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1510 GET2(nametable, 0));
1511 nametable += nameentrysize;
1512 }
1513 }
1514
1515 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1516 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1517
1518 all_options = ((real_pcre *)re)->options;
1519 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1520
1521 if (get_options == 0) fprintf(outfile, "No options\n");
1522 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1523 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1524 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1525 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1526 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1527 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1528 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1529 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1530 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1531 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1532 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1533 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1534 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1535 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1536 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1537 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1538
1539 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1540
1541 switch (get_options & PCRE_NEWLINE_BITS)
1542 {
1543 case PCRE_NEWLINE_CR:
1544 fprintf(outfile, "Forced newline sequence: CR\n");
1545 break;
1546
1547 case PCRE_NEWLINE_LF:
1548 fprintf(outfile, "Forced newline sequence: LF\n");
1549 break;
1550
1551 case PCRE_NEWLINE_CRLF:
1552 fprintf(outfile, "Forced newline sequence: CRLF\n");
1553 break;
1554
1555 case PCRE_NEWLINE_ANYCRLF:
1556 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1557 break;
1558
1559 case PCRE_NEWLINE_ANY:
1560 fprintf(outfile, "Forced newline sequence: ANY\n");
1561 break;
1562
1563 default:
1564 break;
1565 }
1566
1567 if (first_char == -1)
1568 {
1569 fprintf(outfile, "First char at start or follows newline\n");
1570 }
1571 else if (first_char < 0)
1572 {
1573 fprintf(outfile, "No first char\n");
1574 }
1575 else
1576 {
1577 int ch = first_char & 255;
1578 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1579 "" : " (caseless)";
1580 if (PRINTHEX(ch))
1581 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1582 else
1583 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1584 }
1585
1586 if (need_char < 0)
1587 {
1588 fprintf(outfile, "No need char\n");
1589 }
1590 else
1591 {
1592 int ch = need_char & 255;
1593 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1594 "" : " (caseless)";
1595 if (PRINTHEX(ch))
1596 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1597 else
1598 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1599 }
1600
1601 /* Don't output study size; at present it is in any case a fixed
1602 value, but it varies, depending on the computer architecture, and
1603 so messes up the test suite. (And with the /F option, it might be
1604 flipped.) */
1605
1606 if (do_study)
1607 {
1608 if (extra == NULL)
1609 fprintf(outfile, "Study returned NULL\n");
1610 else
1611 {
1612 uschar *start_bits = NULL;
1613 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1614
1615 if (start_bits == NULL)
1616 fprintf(outfile, "No starting byte set\n");
1617 else
1618 {
1619 int i;
1620 int c = 24;
1621 fprintf(outfile, "Starting byte set: ");
1622 for (i = 0; i < 256; i++)
1623 {
1624 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1625 {
1626 if (c > 75)
1627 {
1628 fprintf(outfile, "\n ");
1629 c = 2;
1630 }
1631 if (PRINTHEX(i) && i != ' ')
1632 {
1633 fprintf(outfile, "%c ", i);
1634 c += 2;
1635 }
1636 else
1637 {
1638 fprintf(outfile, "\\x%02x ", i);
1639 c += 5;
1640 }
1641 }
1642 }
1643 fprintf(outfile, "\n");
1644 }
1645 }
1646 }
1647 }
1648
1649 /* If the '>' option was present, we write out the regex to a file, and
1650 that is all. The first 8 bytes of the file are the regex length and then
1651 the study length, in big-endian order. */
1652
1653 if (to_file != NULL)
1654 {
1655 FILE *f = fopen((char *)to_file, "wb");
1656 if (f == NULL)
1657 {
1658 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1659 }
1660 else
1661 {
1662 uschar sbuf[8];
1663 sbuf[0] = (uschar)((true_size >> 24) & 255);
1664 sbuf[1] = (uschar)((true_size >> 16) & 255);
1665 sbuf[2] = (uschar)((true_size >> 8) & 255);
1666 sbuf[3] = (uschar)((true_size) & 255);
1667
1668 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1669 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1670 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1671 sbuf[7] = (uschar)((true_study_size) & 255);
1672
1673 if (fwrite(sbuf, 1, 8, f) < 8 ||
1674 fwrite(re, 1, true_size, f) < true_size)
1675 {
1676 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1677 }
1678 else
1679 {
1680 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1681 if (extra != NULL)
1682 {
1683 if (fwrite(extra->study_data, 1, true_study_size, f) <
1684 true_study_size)
1685 {
1686 fprintf(outfile, "Write error on %s: %s\n", to_file,
1687 strerror(errno));
1688 }
1689 else fprintf(outfile, "Study data written to %s\n", to_file);
1690
1691 }
1692 }
1693 fclose(f);
1694 }
1695
1696 new_free(re);
1697 if (extra != NULL) new_free(extra);
1698 if (tables != NULL) new_free((void *)tables);
1699 continue; /* With next regex */
1700 }
1701 } /* End of non-POSIX compile */
1702
1703 /* Read data lines and test them */
1704
1705 for (;;)
1706 {
1707 uschar *q;
1708 uschar *bptr;
1709 int *use_offsets = offsets;
1710 int use_size_offsets = size_offsets;
1711 int callout_data = 0;
1712 int callout_data_set = 0;
1713 int count, c;
1714 int copystrings = 0;
1715 int find_match_limit = default_find_match_limit;
1716 int getstrings = 0;
1717 int getlist = 0;
1718 int gmatched = 0;
1719 int start_offset = 0;
1720 int g_notempty = 0;
1721 int use_dfa = 0;
1722
1723 options = 0;
1724
1725 *copynames = 0;
1726 *getnames = 0;
1727
1728 copynamesptr = copynames;
1729 getnamesptr = getnames;
1730
1731 pcre_callout = callout;
1732 first_callout = 1;
1733 callout_extra = 0;
1734 callout_count = 0;
1735 callout_fail_count = 999999;
1736 callout_fail_id = -1;
1737 show_malloc = 0;
1738
1739 if (extra != NULL) extra->flags &=
1740 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1741
1742 len = 0;
1743 for (;;)
1744 {
1745 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1746 {
1747 if (len > 0) break;
1748 done = 1;
1749 goto CONTINUE;
1750 }
1751 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1752 len = (int)strlen((char *)buffer);
1753 if (buffer[len-1] == '\n') break;
1754 }
1755
1756 while (len > 0 && isspace(buffer[len-1])) len--;
1757 buffer[len] = 0;
1758 if (len == 0) break;
1759
1760 p = buffer;
1761 while (isspace(*p)) p++;
1762
1763 bptr = q = dbuffer;
1764 while ((c = *p++) != 0)
1765 {
1766 int i = 0;
1767 int n = 0;
1768
1769 if (c == '\\') switch ((c = *p++))
1770 {
1771 case 'a': c = 7; break;
1772 case 'b': c = '\b'; break;
1773 case 'e': c = 27; break;
1774 case 'f': c = '\f'; break;
1775 case 'n': c = '\n'; break;
1776 case 'r': c = '\r'; break;
1777 case 't': c = '\t'; break;
1778 case 'v': c = '\v'; break;
1779
1780 case '0': case '1': case '2': case '3':
1781 case '4': case '5': case '6': case '7':
1782 c -= '0';
1783 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1784 c = c * 8 + *p++ - '0';
1785
1786 #if !defined NOUTF8
1787 if (use_utf8 && c > 255)
1788 {
1789 unsigned char buff8[8];
1790 int ii, utn;
1791 utn = ord2utf8(c, buff8);
1792 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1793 c = buff8[ii]; /* Last byte */
1794 }
1795 #endif
1796 break;
1797
1798 case 'x':
1799
1800 /* Handle \x{..} specially - new Perl thing for utf8 */
1801
1802 #if !defined NOUTF8
1803 if (*p == '{')
1804 {
1805 unsigned char *pt = p;
1806 c = 0;
1807 while (isxdigit(*(++pt)))
1808 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1809 if (*pt == '}')
1810 {
1811 unsigned char buff8[8];
1812 int ii, utn;
1813 if (use_utf8)
1814 {
1815 utn = ord2utf8(c, buff8);
1816 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817 c = buff8[ii]; /* Last byte */
1818 }
1819 else
1820 {
1821 if (c > 255)
1822 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1823 "UTF-8 mode is not enabled.\n"
1824 "** Truncation will probably give the wrong result.\n", c);
1825 }
1826 p = pt + 1;
1827 break;
1828 }
1829 /* Not correct form; fall through */
1830 }
1831 #endif
1832
1833 /* Ordinary \x */
1834
1835 c = 0;
1836 while (i++ < 2 && isxdigit(*p))
1837 {
1838 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1839 p++;
1840 }
1841 break;
1842
1843 case 0: /* \ followed by EOF allows for an empty line */
1844 p--;
1845 continue;
1846
1847 case '>':
1848 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1849 continue;
1850
1851 case 'A': /* Option setting */
1852 options |= PCRE_ANCHORED;
1853 continue;
1854
1855 case 'B':
1856 options |= PCRE_NOTBOL;
1857 continue;
1858
1859 case 'C':
1860 if (isdigit(*p)) /* Set copy string */
1861 {
1862 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863 copystrings |= 1 << n;
1864 }
1865 else if (isalnum(*p))
1866 {
1867 uschar *npp = copynamesptr;
1868 while (isalnum(*p)) *npp++ = *p++;
1869 *npp++ = 0;
1870 *npp = 0;
1871 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1872 if (n < 0)
1873 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1874 copynamesptr = npp;
1875 }
1876 else if (*p == '+')
1877 {
1878 callout_extra = 1;
1879 p++;
1880 }
1881 else if (*p == '-')
1882 {
1883 pcre_callout = NULL;
1884 p++;
1885 }
1886 else if (*p == '!')
1887 {
1888 callout_fail_id = 0;
1889 p++;
1890 while(isdigit(*p))
1891 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1892 callout_fail_count = 0;
1893 if (*p == '!')
1894 {
1895 p++;
1896 while(isdigit(*p))
1897 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1898 }
1899 }
1900 else if (*p == '*')
1901 {
1902 int sign = 1;
1903 callout_data = 0;
1904 if (*(++p) == '-') { sign = -1; p++; }
1905 while(isdigit(*p))
1906 callout_data = callout_data * 10 + *p++ - '0';
1907 callout_data *= sign;
1908 callout_data_set = 1;
1909 }
1910 continue;
1911
1912 #if !defined NODFA
1913 case 'D':
1914 #if !defined NOPOSIX
1915 if (posix || do_posix)
1916 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1917 else
1918 #endif
1919 use_dfa = 1;
1920 continue;
1921
1922 case 'F':
1923 options |= PCRE_DFA_SHORTEST;
1924 continue;
1925 #endif
1926
1927 case 'G':
1928 if (isdigit(*p))
1929 {
1930 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1931 getstrings |= 1 << n;
1932 }
1933 else if (isalnum(*p))
1934 {
1935 uschar *npp = getnamesptr;
1936 while (isalnum(*p)) *npp++ = *p++;
1937 *npp++ = 0;
1938 *npp = 0;
1939 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1940 if (n < 0)
1941 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1942 getnamesptr = npp;
1943 }
1944 continue;
1945
1946 case 'L':
1947 getlist = 1;
1948 continue;
1949
1950 case 'M':
1951 find_match_limit = 1;
1952 continue;
1953
1954 case 'N':
1955 options |= PCRE_NOTEMPTY;
1956 continue;
1957
1958 case 'O':
1959 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1960 if (n > size_offsets_max)
1961 {
1962 size_offsets_max = n;
1963 free(offsets);
1964 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1965 if (offsets == NULL)
1966 {
1967 printf("** Failed to get %d bytes of memory for offsets vector\n",
1968 (int)(size_offsets_max * sizeof(int)));
1969 yield = 1;
1970 goto EXIT;
1971 }
1972 }
1973 use_size_offsets = n;
1974 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1975 continue;
1976
1977 case 'P':
1978 options |= PCRE_PARTIAL;
1979 continue;
1980
1981 case 'Q':
1982 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1983 if (extra == NULL)
1984 {
1985 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1986 extra->flags = 0;
1987 }
1988 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1989 extra->match_limit_recursion = n;
1990 continue;
1991
1992 case 'q':
1993 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1994 if (extra == NULL)
1995 {
1996 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1997 extra->flags = 0;
1998 }
1999 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2000 extra->match_limit = n;
2001 continue;
2002
2003 #if !defined NODFA
2004 case 'R':
2005 options |= PCRE_DFA_RESTART;
2006 continue;
2007 #endif
2008
2009 case 'S':
2010 show_malloc = 1;
2011 continue;
2012
2013 case 'Y':
2014 options |= PCRE_NO_START_OPTIMIZE;
2015 continue;
2016
2017 case 'Z':
2018 options |= PCRE_NOTEOL;
2019 continue;
2020
2021 case '?':
2022 options |= PCRE_NO_UTF8_CHECK;
2023 continue;
2024
2025 case '<':
2026 {
2027 int x = check_newline(p, outfile);
2028 if (x == 0) goto NEXT_DATA;
2029 options |= x;
2030 while (*p++ != '>');
2031 }
2032 continue;
2033 }
2034 *q++ = c;
2035 }
2036 *q = 0;
2037 len = q - dbuffer;
2038
2039 /* Move the data to the end of the buffer so that a read over the end of
2040 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2041 we are using the POSIX interface, we must include the terminating zero. */
2042
2043 #if !defined NOPOSIX
2044 if (posix || do_posix)
2045 {
2046 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2047 bptr += buffer_size - len - 1;
2048 }
2049 else
2050 #endif
2051 {
2052 memmove(bptr + buffer_size - len, bptr, len);
2053 bptr += buffer_size - len;
2054 }
2055
2056 if ((all_use_dfa || use_dfa) && find_match_limit)
2057 {
2058 printf("**Match limit not relevant for DFA matching: ignored\n");
2059 find_match_limit = 0;
2060 }
2061
2062 /* Handle matching via the POSIX interface, which does not
2063 support timing or playing with the match limit or callout data. */
2064
2065 #if !defined NOPOSIX
2066 if (posix || do_posix)
2067 {
2068 int rc;
2069 int eflags = 0;
2070 regmatch_t *pmatch = NULL;
2071 if (use_size_offsets > 0)
2072 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2073 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2074 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2075 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2076
2077 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2078
2079 if (rc != 0)
2080 {
2081 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2082 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2083 }
2084 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2085 != 0)
2086 {
2087 fprintf(outfile, "Matched with REG_NOSUB\n");
2088 }
2089 else
2090 {
2091 size_t i;
2092 for (i = 0; i < (size_t)use_size_offsets; i++)
2093 {
2094 if (pmatch[i].rm_so >= 0)
2095 {
2096 fprintf(outfile, "%2d: ", (int)i);
2097 (void)pchars(dbuffer + pmatch[i].rm_so,
2098 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2099 fprintf(outfile, "\n");
2100 if (i == 0 && do_showrest)
2101 {
2102 fprintf(outfile, " 0+ ");
2103 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2104 outfile);
2105 fprintf(outfile, "\n");
2106 }
2107 }
2108 }
2109 }
2110 free(pmatch);
2111 }
2112
2113 /* Handle matching via the native interface - repeats for /g and /G */
2114
2115 else
2116 #endif /* !defined NOPOSIX */
2117
2118 for (;; gmatched++) /* Loop for /g or /G */
2119 {
2120 if (timeitm > 0)
2121 {
2122 register int i;
2123 clock_t time_taken;
2124 clock_t start_time = clock();
2125
2126 #if !defined NODFA
2127 if (all_use_dfa || use_dfa)
2128 {
2129 int workspace[1000];
2130 for (i = 0; i < timeitm; i++)
2131 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2132 options | g_notempty, use_offsets, use_size_offsets, workspace,
2133 sizeof(workspace)/sizeof(int));
2134 }
2135 else
2136 #endif
2137
2138 for (i = 0; i < timeitm; i++)
2139 count = pcre_exec(re, extra, (char *)bptr, len,
2140 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2141
2142 time_taken = clock() - start_time;
2143 fprintf(outfile, "Execute time %.4f milliseconds\n",
2144 (((double)time_taken * 1000.0) / (double)timeitm) /
2145 (double)CLOCKS_PER_SEC);
2146 }
2147
2148 /* If find_match_limit is set, we want to do repeated matches with
2149 varying limits in order to find the minimum value for the match limit and
2150 for the recursion limit. */
2151
2152 if (find_match_limit)
2153 {
2154 if (extra == NULL)
2155 {
2156 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2157 extra->flags = 0;
2158 }
2159
2160 (void)check_match_limit(re, extra, bptr, len, start_offset,
2161 options|g_notempty, use_offsets, use_size_offsets,
2162 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2163 PCRE_ERROR_MATCHLIMIT, "match()");
2164
2165 count = check_match_limit(re, extra, bptr, len, start_offset,
2166 options|g_notempty, use_offsets, use_size_offsets,
2167 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2168 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2169 }
2170
2171 /* If callout_data is set, use the interface with additional data */
2172
2173 else if (callout_data_set)
2174 {
2175 if (extra == NULL)
2176 {
2177 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2178 extra->flags = 0;
2179 }
2180 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2181 extra->callout_data = &callout_data;
2182 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2183 options | g_notempty, use_offsets, use_size_offsets);
2184 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2185 }
2186
2187 /* The normal case is just to do the match once, with the default
2188 value of match_limit. */
2189
2190 #if !defined NODFA
2191 else if (all_use_dfa || use_dfa)
2192 {
2193 int workspace[1000];
2194 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2195 options | g_notempty, use_offsets, use_size_offsets, workspace,
2196 sizeof(workspace)/sizeof(int));
2197 if (count == 0)
2198 {
2199 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2200 count = use_size_offsets/2;
2201 }
2202 }
2203 #endif
2204
2205 else
2206 {
2207 count = pcre_exec(re, extra, (char *)bptr, len,
2208 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2209 if (count == 0)
2210 {
2211 fprintf(outfile, "Matched, but too many substrings\n");
2212 count = use_size_offsets/3;
2213 }
2214 }
2215
2216 /* Matched */
2217
2218 if (count >= 0)
2219 {
2220 int i, maxcount;
2221
2222 #if !defined NODFA
2223 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2224 #endif
2225 maxcount = use_size_offsets/3;
2226
2227 /* This is a check against a lunatic return value. */
2228
2229 if (count > maxcount)
2230 {
2231 fprintf(outfile,
2232 "** PCRE error: returned count %d is too big for offset size %d\n",
2233 count, use_size_offsets);
2234 count = use_size_offsets/3;
2235 if (do_g || do_G)
2236 {
2237 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2238 do_g = do_G = FALSE; /* Break g/G loop */
2239 }
2240 }
2241
2242 for (i = 0; i < count * 2; i += 2)
2243 {
2244 if (use_offsets[i] < 0)
2245 fprintf(outfile, "%2d: <unset>\n", i/2);
2246 else
2247 {
2248 fprintf(outfile, "%2d: ", i/2);
2249 (void)pchars(bptr + use_offsets[i],
2250 use_offsets[i+1] - use_offsets[i], outfile);
2251 fprintf(outfile, "\n");
2252 if (i == 0)
2253 {
2254 if (do_showrest)
2255 {
2256 fprintf(outfile, " 0+ ");
2257 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2258 outfile);
2259 fprintf(outfile, "\n");
2260 }
2261 }
2262 }
2263 }
2264
2265 for (i = 0; i < 32; i++)
2266 {
2267 if ((copystrings & (1 << i)) != 0)
2268 {
2269 char copybuffer[256];
2270 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2271 i, copybuffer, sizeof(copybuffer));
2272 if (rc < 0)
2273 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2274 else
2275 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2276 }
2277 }
2278
2279 for (copynamesptr = copynames;
2280 *copynamesptr != 0;
2281 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2282 {
2283 char copybuffer[256];
2284 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2285 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2286 if (rc < 0)
2287 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2288 else
2289 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2290 }
2291
2292 for (i = 0; i < 32; i++)
2293 {
2294 if ((getstrings & (1 << i)) != 0)
2295 {
2296 const char *substring;
2297 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2298 i, &substring);
2299 if (rc < 0)
2300 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2301 else
2302 {
2303 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2304 pcre_free_substring(substring);
2305 }
2306 }
2307 }
2308
2309 for (getnamesptr = getnames;
2310 *getnamesptr != 0;
2311 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2312 {
2313 const char *substring;
2314 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2315 count, (char *)getnamesptr, &substring);
2316 if (rc < 0)
2317 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2318 else
2319 {
2320 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2321 pcre_free_substring(substring);
2322 }
2323 }
2324
2325 if (getlist)
2326 {
2327 const char **stringlist;
2328 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2329 &stringlist);
2330 if (rc < 0)
2331 fprintf(outfile, "get substring list failed %d\n", rc);
2332 else
2333 {
2334 for (i = 0; i < count; i++)
2335 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2336 if (stringlist[i] != NULL)
2337 fprintf(outfile, "string list not terminated by NULL\n");
2338 /* free((void *)stringlist); */
2339 pcre_free_substring_list(stringlist);
2340 }
2341 }
2342 }
2343
2344 /* There was a partial match */
2345
2346 else if (count == PCRE_ERROR_PARTIAL)
2347 {
2348 fprintf(outfile, "Partial match");
2349 #if !defined NODFA
2350 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2351 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2352 bptr + use_offsets[0]);
2353 #endif
2354 fprintf(outfile, "\n");
2355 break; /* Out of the /g loop */
2356 }
2357
2358 /* Failed to match. If this is a /g or /G loop and we previously set
2359 g_notempty after a null match, this is not necessarily the end. We want
2360 to advance the start offset, and continue. We won't be at the end of the
2361 string - that was checked before setting g_notempty.
2362
2363 Complication arises in the case when the newline option is "any" or
2364 "anycrlf". If the previous match was at the end of a line terminated by
2365 CRLF, an advance of one character just passes the \r, whereas we should
2366 prefer the longer newline sequence, as does the code in pcre_exec().
2367 Fudge the offset value to achieve this.
2368
2369 Otherwise, in the case of UTF-8 matching, the advance must be one
2370 character, not one byte. */
2371
2372 else
2373 {
2374 if (g_notempty != 0)
2375 {
2376 int onechar = 1;
2377 unsigned int obits = ((real_pcre *)re)->options;
2378 use_offsets[0] = start_offset;
2379 if ((obits & PCRE_NEWLINE_BITS) == 0)
2380 {
2381 int d;
2382 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2383 obits = (d == '\r')? PCRE_NEWLINE_CR :
2384 (d == '\n')? PCRE_NEWLINE_LF :
2385 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2386 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2387 (d == -1)? PCRE_NEWLINE_ANY : 0;
2388 }
2389 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2390 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2391 &&
2392 start_offset < len - 1 &&
2393 bptr[start_offset] == '\r' &&
2394 bptr[start_offset+1] == '\n')
2395 onechar++;
2396 else if (use_utf8)
2397 {
2398 while (start_offset + onechar < len)
2399 {
2400 int tb = bptr[start_offset+onechar];
2401 if (tb <= 127) break;
2402 tb &= 0xc0;
2403 if (tb != 0 && tb != 0xc0) onechar++;
2404 }
2405 }
2406 use_offsets[1] = start_offset + onechar;
2407 }
2408 else
2409 {
2410 if (count == PCRE_ERROR_NOMATCH)
2411 {
2412 if (gmatched == 0) fprintf(outfile, "No match\n");
2413 }
2414 else fprintf(outfile, "Error %d\n", count);
2415 break; /* Out of the /g loop */
2416 }
2417 }
2418
2419 /* If not /g or /G we are done */
2420
2421 if (!do_g && !do_G) break;
2422
2423 /* If we have matched an empty string, first check to see if we are at
2424 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2425 what Perl's /g options does. This turns out to be rather cunning. First
2426 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2427 same point. If this fails (picked up above) we advance to the next
2428 character. */
2429
2430 g_notempty = 0;
2431
2432 if (use_offsets[0] == use_offsets[1])
2433 {
2434 if (use_offsets[0] == len) break;
2435 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2436 }
2437
2438 /* For /g, update the start offset, leaving the rest alone */
2439
2440 if (do_g) start_offset = use_offsets[1];
2441
2442 /* For /G, update the pointer and length */
2443
2444 else
2445 {
2446 bptr += use_offsets[1];
2447 len -= use_offsets[1];
2448 }
2449 } /* End of loop for /g and /G */
2450
2451 NEXT_DATA: continue;
2452 } /* End of loop for data lines */
2453
2454 CONTINUE:
2455
2456 #if !defined NOPOSIX
2457 if (posix || do_posix) regfree(&preg);
2458 #endif
2459
2460 if (re != NULL) new_free(re);
2461 if (extra != NULL) new_free(extra);
2462 if (tables != NULL)
2463 {
2464 new_free((void *)tables);
2465 setlocale(LC_CTYPE, "C");
2466 locale_set = 0;
2467 }
2468 }
2469
2470 if (infile == stdin) fprintf(outfile, "\n");
2471
2472 EXIT:
2473
2474 if (infile != NULL && infile != stdin) fclose(infile);
2475 if (outfile != NULL && outfile != stdout) fclose(outfile);
2476
2477 free(buffer);
2478 free(dbuffer);
2479 free(pbuffer);
2480 free(offsets);
2481
2482 return yield;
2483 }
2484
2485 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12