/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 386 - (show annotations) (download)
Tue Mar 10 11:48:33 2009 UTC (5 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 72703 byte(s)
Add the -M option to pcretest.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #define isatty _isatty /* This is what Windows calls them, I'm told */
75 #define fileno _fileno
76
77 #else
78 #include <sys/time.h> /* These two includes are needed */
79 #include <sys/resource.h> /* for setrlimit(). */
80 #define INPUT_MODE "rb"
81 #define OUTPUT_MODE "wb"
82 #endif
83
84
85 /* We have to include pcre_internal.h because we need the internal info for
86 displaying the results of pcre_study() and we also need to know about the
87 internal macros, structures, and other internal data values; pcretest has
88 "inside information" compared to a program that strictly follows the PCRE API.
89
90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92 appropriately for an application, not for building PCRE. */
93
94 #include "pcre.h"
95 #include "pcre_internal.h"
96
97 /* We need access to some of the data tables that PCRE uses. So as not to have
98 to keep two copies, we include the source file here, changing the names of the
99 external symbols to prevent clashes. */
100
101 #define _pcre_ucp_gentype ucp_gentype
102 #define _pcre_utf8_table1 utf8_table1
103 #define _pcre_utf8_table1_size utf8_table1_size
104 #define _pcre_utf8_table2 utf8_table2
105 #define _pcre_utf8_table3 utf8_table3
106 #define _pcre_utf8_table4 utf8_table4
107 #define _pcre_utt utt
108 #define _pcre_utt_size utt_size
109 #define _pcre_utt_names utt_names
110 #define _pcre_OP_lengths OP_lengths
111
112 #include "pcre_tables.c"
113
114 /* We also need the pcre_printint() function for printing out compiled
115 patterns. This function is in a separate file so that it can be included in
116 pcre_compile.c when that module is compiled with debugging enabled.
117
118 The definition of the macro PRINTABLE, which determines whether to print an
119 output character as-is or as a hex value when showing compiled patterns, is
120 contained in this file. We uses it here also, in cases when the locale has not
121 been explicitly changed, so as to get consistent output from systems that
122 differ in their output from isprint() even in the "C" locale. */
123
124 #include "pcre_printint.src"
125
126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127
128
129 /* It is possible to compile this test program without including support for
130 testing the POSIX interface, though this is not available via the standard
131 Makefile. */
132
133 #if !defined NOPOSIX
134 #include "pcreposix.h"
135 #endif
136
137 /* It is also possible, for the benefit of the version currently imported into
138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141 UTF8 support if PCRE is built without it. */
142
143 #ifndef SUPPORT_UTF8
144 #ifndef NOUTF8
145 #define NOUTF8
146 #endif
147 #endif
148
149
150 /* Other parameters */
151
152 #ifndef CLOCKS_PER_SEC
153 #ifdef CLK_TCK
154 #define CLOCKS_PER_SEC CLK_TCK
155 #else
156 #define CLOCKS_PER_SEC 100
157 #endif
158 #endif
159
160 /* This is the default loop count for timing. */
161
162 #define LOOPREPEAT 500000
163
164 /* Static variables */
165
166 static FILE *outfile;
167 static int log_store = 0;
168 static int callout_count;
169 static int callout_extra;
170 static int callout_fail_count;
171 static int callout_fail_id;
172 static int debug_lengths;
173 static int first_callout;
174 static int locale_set = 0;
175 static int show_malloc;
176 static int use_utf8;
177 static size_t gotten_store;
178
179 /* The buffers grow automatically if very long input lines are encountered. */
180
181 static int buffer_size = 50000;
182 static uschar *buffer = NULL;
183 static uschar *dbuffer = NULL;
184 static uschar *pbuffer = NULL;
185
186
187
188 /*************************************************
189 * Read or extend an input line *
190 *************************************************/
191
192 /* Input lines are read into buffer, but both patterns and data lines can be
193 continued over multiple input lines. In addition, if the buffer fills up, we
194 want to automatically expand it so as to be able to handle extremely large
195 lines that are needed for certain stress tests. When the input buffer is
196 expanded, the other two buffers must also be expanded likewise, and the
197 contents of pbuffer, which are a copy of the input for callouts, must be
198 preserved (for when expansion happens for a data line). This is not the most
199 optimal way of handling this, but hey, this is just a test program!
200
201 Arguments:
202 f the file to read
203 start where in buffer to start (this *must* be within buffer)
204 prompt for stdin or readline()
205
206 Returns: pointer to the start of new data
207 could be a copy of start, or could be moved
208 NULL if no data read and EOF reached
209 */
210
211 static uschar *
212 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 {
214 uschar *here = start;
215
216 for (;;)
217 {
218 int rlen = buffer_size - (here - buffer);
219
220 if (rlen > 1000)
221 {
222 int dlen;
223
224 /* If libreadline support is required, use readline() to read a line if the
225 input is a terminal. Note that readline() removes the trailing newline, so
226 we must put it back again, to be compatible with fgets(). */
227
228 #ifdef SUPPORT_LIBREADLINE
229 if (isatty(fileno(f)))
230 {
231 size_t len;
232 char *s = readline(prompt);
233 if (s == NULL) return (here == start)? NULL : start;
234 len = strlen(s);
235 if (len > 0) add_history(s);
236 if (len > rlen - 1) len = rlen - 1;
237 memcpy(here, s, len);
238 here[len] = '\n';
239 here[len+1] = 0;
240 free(s);
241 }
242 else
243 #endif
244
245 /* Read the next line by normal means, prompting if the file is stdin. */
246
247 {
248 if (f == stdin) printf(prompt);
249 if (fgets((char *)here, rlen, f) == NULL)
250 return (here == start)? NULL : start;
251 }
252
253 dlen = (int)strlen((char *)here);
254 if (dlen > 0 && here[dlen - 1] == '\n') return start;
255 here += dlen;
256 }
257
258 else
259 {
260 int new_buffer_size = 2*buffer_size;
261 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264
265 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266 {
267 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268 exit(1);
269 }
270
271 memcpy(new_buffer, buffer, buffer_size);
272 memcpy(new_pbuffer, pbuffer, buffer_size);
273
274 buffer_size = new_buffer_size;
275
276 start = new_buffer + (start - buffer);
277 here = new_buffer + (here - buffer);
278
279 free(buffer);
280 free(dbuffer);
281 free(pbuffer);
282
283 buffer = new_buffer;
284 dbuffer = new_dbuffer;
285 pbuffer = new_pbuffer;
286 }
287 }
288
289 return NULL; /* Control never gets here */
290 }
291
292
293
294
295
296
297
298 /*************************************************
299 * Read number from string *
300 *************************************************/
301
302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303 around with conditional compilation, just do the job by hand. It is only used
304 for unpicking arguments, so just keep it simple.
305
306 Arguments:
307 str string to be converted
308 endptr where to put the end pointer
309
310 Returns: the unsigned long
311 */
312
313 static int
314 get_value(unsigned char *str, unsigned char **endptr)
315 {
316 int result = 0;
317 while(*str != 0 && isspace(*str)) str++;
318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319 *endptr = str;
320 return(result);
321 }
322
323
324
325
326 /*************************************************
327 * Convert UTF-8 string to value *
328 *************************************************/
329
330 /* This function takes one or more bytes that represents a UTF-8 character,
331 and returns the value of the character.
332
333 Argument:
334 utf8bytes a pointer to the byte vector
335 vptr a pointer to an int to receive the value
336
337 Returns: > 0 => the number of bytes consumed
338 -6 to 0 => malformed UTF-8 character at offset = (-return)
339 */
340
341 #if !defined NOUTF8
342
343 static int
344 utf82ord(unsigned char *utf8bytes, int *vptr)
345 {
346 int c = *utf8bytes++;
347 int d = c;
348 int i, j, s;
349
350 for (i = -1; i < 6; i++) /* i is number of additional bytes */
351 {
352 if ((d & 0x80) == 0) break;
353 d <<= 1;
354 }
355
356 if (i == -1) { *vptr = c; return 1; } /* ascii character */
357 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358
359 /* i now has a value in the range 1-5 */
360
361 s = 6*i;
362 d = (c & utf8_table3[i]) << s;
363
364 for (j = 0; j < i; j++)
365 {
366 c = *utf8bytes++;
367 if ((c & 0xc0) != 0x80) return -(j+1);
368 s -= 6;
369 d |= (c & 0x3f) << s;
370 }
371
372 /* Check that encoding was the correct unique one */
373
374 for (j = 0; j < utf8_table1_size; j++)
375 if (d <= utf8_table1[j]) break;
376 if (j != i) return -(i+1);
377
378 /* Valid value */
379
380 *vptr = d;
381 return i+1;
382 }
383
384 #endif
385
386
387
388 /*************************************************
389 * Convert character value to UTF-8 *
390 *************************************************/
391
392 /* This function takes an integer value in the range 0 - 0x7fffffff
393 and encodes it as a UTF-8 character in 0 to 6 bytes.
394
395 Arguments:
396 cvalue the character value
397 utf8bytes pointer to buffer for result - at least 6 bytes long
398
399 Returns: number of characters placed in the buffer
400 */
401
402 #if !defined NOUTF8
403
404 static int
405 ord2utf8(int cvalue, uschar *utf8bytes)
406 {
407 register int i, j;
408 for (i = 0; i < utf8_table1_size; i++)
409 if (cvalue <= utf8_table1[i]) break;
410 utf8bytes += i;
411 for (j = i; j > 0; j--)
412 {
413 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 cvalue >>= 6;
415 }
416 *utf8bytes = utf8_table2[i] | cvalue;
417 return i + 1;
418 }
419
420 #endif
421
422
423
424 /*************************************************
425 * Print character string *
426 *************************************************/
427
428 /* Character string printing function. Must handle UTF-8 strings in utf8
429 mode. Yields number of characters printed. If handed a NULL file, just counts
430 chars without printing. */
431
432 static int pchars(unsigned char *p, int length, FILE *f)
433 {
434 int c = 0;
435 int yield = 0;
436
437 while (length-- > 0)
438 {
439 #if !defined NOUTF8
440 if (use_utf8)
441 {
442 int rc = utf82ord(p, &c);
443
444 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445 {
446 length -= rc - 1;
447 p += rc;
448 if (PRINTHEX(c))
449 {
450 if (f != NULL) fprintf(f, "%c", c);
451 yield++;
452 }
453 else
454 {
455 int n = 4;
456 if (f != NULL) fprintf(f, "\\x{%02x}", c);
457 yield += (n <= 0x000000ff)? 2 :
458 (n <= 0x00000fff)? 3 :
459 (n <= 0x0000ffff)? 4 :
460 (n <= 0x000fffff)? 5 : 6;
461 }
462 continue;
463 }
464 }
465 #endif
466
467 /* Not UTF-8, or malformed UTF-8 */
468
469 c = *p++;
470 if (PRINTHEX(c))
471 {
472 if (f != NULL) fprintf(f, "%c", c);
473 yield++;
474 }
475 else
476 {
477 if (f != NULL) fprintf(f, "\\x%02x", c);
478 yield += 4;
479 }
480 }
481
482 return yield;
483 }
484
485
486
487 /*************************************************
488 * Callout function *
489 *************************************************/
490
491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492 the match. Yield zero unless more callouts than the fail count, or the callout
493 data is not zero. */
494
495 static int callout(pcre_callout_block *cb)
496 {
497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 int i, pre_start, post_start, subject_length;
499
500 if (callout_extra)
501 {
502 fprintf(f, "Callout %d: last capture = %d\n",
503 cb->callout_number, cb->capture_last);
504
505 for (i = 0; i < cb->capture_top * 2; i += 2)
506 {
507 if (cb->offset_vector[i] < 0)
508 fprintf(f, "%2d: <unset>\n", i/2);
509 else
510 {
511 fprintf(f, "%2d: ", i/2);
512 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513 cb->offset_vector[i+1] - cb->offset_vector[i], f);
514 fprintf(f, "\n");
515 }
516 }
517 }
518
519 /* Re-print the subject in canonical form, the first time or if giving full
520 datails. On subsequent calls in the same match, we use pchars just to find the
521 printed lengths of the substrings. */
522
523 if (f != NULL) fprintf(f, "--->");
524
525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527 cb->current_position - cb->start_match, f);
528
529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530
531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532 cb->subject_length - cb->current_position, f);
533
534 if (f != NULL) fprintf(f, "\n");
535
536 /* Always print appropriate indicators, with callout number if not already
537 shown. For automatic callouts, show the pattern offset. */
538
539 if (cb->callout_number == 255)
540 {
541 fprintf(outfile, "%+3d ", cb->pattern_position);
542 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543 }
544 else
545 {
546 if (callout_extra) fprintf(outfile, " ");
547 else fprintf(outfile, "%3d ", cb->callout_number);
548 }
549
550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551 fprintf(outfile, "^");
552
553 if (post_start > 0)
554 {
555 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556 fprintf(outfile, "^");
557 }
558
559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560 fprintf(outfile, " ");
561
562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563 pbuffer + cb->pattern_position);
564
565 fprintf(outfile, "\n");
566 first_callout = 0;
567
568 if (cb->callout_data != NULL)
569 {
570 int callout_data = *((int *)(cb->callout_data));
571 if (callout_data != 0)
572 {
573 fprintf(outfile, "Callout data = %d\n", callout_data);
574 return callout_data;
575 }
576 }
577
578 return (cb->callout_number != callout_fail_id)? 0 :
579 (++callout_count >= callout_fail_count)? 1 : 0;
580 }
581
582
583 /*************************************************
584 * Local malloc functions *
585 *************************************************/
586
587 /* Alternative malloc function, to test functionality and show the size of the
588 compiled re. */
589
590 static void *new_malloc(size_t size)
591 {
592 void *block = malloc(size);
593 gotten_store = size;
594 if (show_malloc)
595 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 return block;
597 }
598
599 static void new_free(void *block)
600 {
601 if (show_malloc)
602 fprintf(outfile, "free %p\n", block);
603 free(block);
604 }
605
606
607 /* For recursion malloc/free, to test stacking calls */
608
609 static void *stack_malloc(size_t size)
610 {
611 void *block = malloc(size);
612 if (show_malloc)
613 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 return block;
615 }
616
617 static void stack_free(void *block)
618 {
619 if (show_malloc)
620 fprintf(outfile, "stack_free %p\n", block);
621 free(block);
622 }
623
624
625 /*************************************************
626 * Call pcre_fullinfo() *
627 *************************************************/
628
629 /* Get one piece of information from the pcre_fullinfo() function */
630
631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632 {
633 int rc;
634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636 }
637
638
639
640 /*************************************************
641 * Byte flipping function *
642 *************************************************/
643
644 static unsigned long int
645 byteflip(unsigned long int value, int n)
646 {
647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648 return ((value & 0x000000ff) << 24) |
649 ((value & 0x0000ff00) << 8) |
650 ((value & 0x00ff0000) >> 8) |
651 ((value & 0xff000000) >> 24);
652 }
653
654
655
656
657 /*************************************************
658 * Check match or recursion limit *
659 *************************************************/
660
661 static int
662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663 int start_offset, int options, int *use_offsets, int use_size_offsets,
664 int flag, unsigned long int *limit, int errnumber, const char *msg)
665 {
666 int count;
667 int min = 0;
668 int mid = 64;
669 int max = -1;
670
671 extra->flags |= flag;
672
673 for (;;)
674 {
675 *limit = mid;
676
677 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678 use_offsets, use_size_offsets);
679
680 if (count == errnumber)
681 {
682 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683 min = mid;
684 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685 }
686
687 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688 count == PCRE_ERROR_PARTIAL)
689 {
690 if (mid == min + 1)
691 {
692 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693 break;
694 }
695 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696 max = mid;
697 mid = (min + mid)/2;
698 }
699 else break; /* Some other error */
700 }
701
702 extra->flags &= ~flag;
703 return count;
704 }
705
706
707
708 /*************************************************
709 * Case-independent strncmp() function *
710 *************************************************/
711
712 /*
713 Arguments:
714 s first string
715 t second string
716 n number of characters to compare
717
718 Returns: < 0, = 0, or > 0, according to the comparison
719 */
720
721 static int
722 strncmpic(uschar *s, uschar *t, int n)
723 {
724 while (n--)
725 {
726 int c = tolower(*s++) - tolower(*t++);
727 if (c) return c;
728 }
729 return 0;
730 }
731
732
733
734 /*************************************************
735 * Check newline indicator *
736 *************************************************/
737
738 /* This is used both at compile and run-time to check for <xxx> escapes, where
739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740 no match.
741
742 Arguments:
743 p points after the leading '<'
744 f file for error message
745
746 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747 */
748
749 static int
750 check_newline(uschar *p, FILE *f)
751 {
752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 fprintf(f, "Unknown newline type at: <%s\n", p);
760 return 0;
761 }
762
763
764
765 /*************************************************
766 * Usage function *
767 *************************************************/
768
769 static void
770 usage(void)
771 {
772 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773 printf("Input and output default to stdin and stdout.\n");
774 #ifdef SUPPORT_LIBREADLINE
775 printf("If input is a terminal, readline() is used to read from it.\n");
776 #else
777 printf("This version of pcretest is not linked with readline().\n");
778 #endif
779 printf("\nOptions:\n");
780 printf(" -b show compiled code (bytecode)\n");
781 printf(" -C show PCRE compile-time options and exit\n");
782 printf(" -d debug: show compiled code and information (-b and -i)\n");
783 #if !defined NODFA
784 printf(" -dfa force DFA matching for all subjects\n");
785 #endif
786 printf(" -help show usage information\n");
787 printf(" -i show information about compiled patterns\n"
788 " -M find MATCH_LIMIT minimum for each subject\n"
789 " -m output memory used information\n"
790 " -o <n> set size of offsets vector to <n>\n");
791 #if !defined NOPOSIX
792 printf(" -p use POSIX interface\n");
793 #endif
794 printf(" -q quiet: do not output PCRE version number at start\n");
795 printf(" -S <n> set stack size to <n> megabytes\n");
796 printf(" -s output store (memory) used information\n"
797 " -t time compilation and execution\n");
798 printf(" -t <n> time compilation and execution, repeating <n> times\n");
799 printf(" -tm time execution (matching) only\n");
800 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
801 }
802
803
804
805 /*************************************************
806 * Main Program *
807 *************************************************/
808
809 /* Read lines from named file or stdin and write to named file or stdout; lines
810 consist of a regular expression, in delimiters and optionally followed by
811 options, followed by a set of test data, terminated by an empty line. */
812
813 int main(int argc, char **argv)
814 {
815 FILE *infile = stdin;
816 int options = 0;
817 int study_options = 0;
818 int default_find_match_limit = FALSE;
819 int op = 1;
820 int timeit = 0;
821 int timeitm = 0;
822 int showinfo = 0;
823 int showstore = 0;
824 int quiet = 0;
825 int size_offsets = 45;
826 int size_offsets_max;
827 int *offsets = NULL;
828 #if !defined NOPOSIX
829 int posix = 0;
830 #endif
831 int debug = 0;
832 int done = 0;
833 int all_use_dfa = 0;
834 int yield = 0;
835 int stack_size;
836
837 /* These vectors store, end-to-end, a list of captured substring names. Assume
838 that 1024 is plenty long enough for the few names we'll be testing. */
839
840 uschar copynames[1024];
841 uschar getnames[1024];
842
843 uschar *copynamesptr;
844 uschar *getnamesptr;
845
846 /* Get buffers from malloc() so that Electric Fence will check their misuse
847 when I am debugging. They grow automatically when very long lines are read. */
848
849 buffer = (unsigned char *)malloc(buffer_size);
850 dbuffer = (unsigned char *)malloc(buffer_size);
851 pbuffer = (unsigned char *)malloc(buffer_size);
852
853 /* The outfile variable is static so that new_malloc can use it. */
854
855 outfile = stdout;
856
857 /* The following _setmode() stuff is some Windows magic that tells its runtime
858 library to translate CRLF into a single LF character. At least, that's what
859 I've been told: never having used Windows I take this all on trust. Originally
860 it set 0x8000, but then I was advised that _O_BINARY was better. */
861
862 #if defined(_WIN32) || defined(WIN32)
863 _setmode( _fileno( stdout ), _O_BINARY );
864 #endif
865
866 /* Scan options */
867
868 while (argc > 1 && argv[op][0] == '-')
869 {
870 unsigned char *endptr;
871
872 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873 showstore = 1;
874 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875 else if (strcmp(argv[op], "-b") == 0) debug = 1;
876 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879 #if !defined NODFA
880 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881 #endif
882 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884 *endptr == 0))
885 {
886 op++;
887 argc--;
888 }
889 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890 {
891 int both = argv[op][2] == 0;
892 int temp;
893 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894 *endptr == 0))
895 {
896 timeitm = temp;
897 op++;
898 argc--;
899 }
900 else timeitm = LOOPREPEAT;
901 if (both) timeit = timeitm;
902 }
903 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905 *endptr == 0))
906 {
907 #if defined(_WIN32) || defined(WIN32)
908 printf("PCRE: -S not supported on this OS\n");
909 exit(1);
910 #else
911 int rc;
912 struct rlimit rlim;
913 getrlimit(RLIMIT_STACK, &rlim);
914 rlim.rlim_cur = stack_size * 1024 * 1024;
915 rc = setrlimit(RLIMIT_STACK, &rlim);
916 if (rc != 0)
917 {
918 printf("PCRE: setrlimit() failed with error %d\n", rc);
919 exit(1);
920 }
921 op++;
922 argc--;
923 #endif
924 }
925 #if !defined NOPOSIX
926 else if (strcmp(argv[op], "-p") == 0) posix = 1;
927 #endif
928 else if (strcmp(argv[op], "-C") == 0)
929 {
930 int rc;
931 unsigned long int lrc;
932 printf("PCRE version %s\n", pcre_version());
933 printf("Compiled with\n");
934 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935 printf(" %sUTF-8 support\n", rc? "" : "No ");
936 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937 printf(" %sUnicode properties support\n", rc? "" : "No ");
938 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
940 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
941 (rc == -2)? "ANYCRLF" :
942 (rc == -1)? "ANY" : "???");
943 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
944 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
945 "all Unicode newlines");
946 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
947 printf(" Internal link size = %d\n", rc);
948 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
949 printf(" POSIX malloc threshold = %d\n", rc);
950 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
951 printf(" Default match limit = %ld\n", lrc);
952 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
953 printf(" Default recursion depth limit = %ld\n", lrc);
954 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
955 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
956 goto EXIT;
957 }
958 else if (strcmp(argv[op], "-help") == 0 ||
959 strcmp(argv[op], "--help") == 0)
960 {
961 usage();
962 goto EXIT;
963 }
964 else
965 {
966 printf("** Unknown or malformed option %s\n", argv[op]);
967 usage();
968 yield = 1;
969 goto EXIT;
970 }
971 op++;
972 argc--;
973 }
974
975 /* Get the store for the offsets vector, and remember what it was */
976
977 size_offsets_max = size_offsets;
978 offsets = (int *)malloc(size_offsets_max * sizeof(int));
979 if (offsets == NULL)
980 {
981 printf("** Failed to get %d bytes of memory for offsets vector\n",
982 (int)(size_offsets_max * sizeof(int)));
983 yield = 1;
984 goto EXIT;
985 }
986
987 /* Sort out the input and output files */
988
989 if (argc > 1)
990 {
991 infile = fopen(argv[op], INPUT_MODE);
992 if (infile == NULL)
993 {
994 printf("** Failed to open %s\n", argv[op]);
995 yield = 1;
996 goto EXIT;
997 }
998 }
999
1000 if (argc > 2)
1001 {
1002 outfile = fopen(argv[op+1], OUTPUT_MODE);
1003 if (outfile == NULL)
1004 {
1005 printf("** Failed to open %s\n", argv[op+1]);
1006 yield = 1;
1007 goto EXIT;
1008 }
1009 }
1010
1011 /* Set alternative malloc function */
1012
1013 pcre_malloc = new_malloc;
1014 pcre_free = new_free;
1015 pcre_stack_malloc = stack_malloc;
1016 pcre_stack_free = stack_free;
1017
1018 /* Heading line unless quiet, then prompt for first regex if stdin */
1019
1020 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1021
1022 /* Main loop */
1023
1024 while (!done)
1025 {
1026 pcre *re = NULL;
1027 pcre_extra *extra = NULL;
1028
1029 #if !defined NOPOSIX /* There are still compilers that require no indent */
1030 regex_t preg;
1031 int do_posix = 0;
1032 #endif
1033
1034 const char *error;
1035 unsigned char *p, *pp, *ppp;
1036 unsigned char *to_file = NULL;
1037 const unsigned char *tables = NULL;
1038 unsigned long int true_size, true_study_size = 0;
1039 size_t size, regex_gotten_store;
1040 int do_study = 0;
1041 int do_debug = debug;
1042 int do_G = 0;
1043 int do_g = 0;
1044 int do_showinfo = showinfo;
1045 int do_showrest = 0;
1046 int do_flip = 0;
1047 int erroroffset, len, delimiter, poffset;
1048
1049 use_utf8 = 0;
1050 debug_lengths = 1;
1051
1052 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1053 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1054 fflush(outfile);
1055
1056 p = buffer;
1057 while (isspace(*p)) p++;
1058 if (*p == 0) continue;
1059
1060 /* See if the pattern is to be loaded pre-compiled from a file. */
1061
1062 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1063 {
1064 unsigned long int magic, get_options;
1065 uschar sbuf[8];
1066 FILE *f;
1067
1068 p++;
1069 pp = p + (int)strlen((char *)p);
1070 while (isspace(pp[-1])) pp--;
1071 *pp = 0;
1072
1073 f = fopen((char *)p, "rb");
1074 if (f == NULL)
1075 {
1076 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1077 continue;
1078 }
1079
1080 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1081
1082 true_size =
1083 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1084 true_study_size =
1085 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1086
1087 re = (real_pcre *)new_malloc(true_size);
1088 regex_gotten_store = gotten_store;
1089
1090 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1091
1092 magic = ((real_pcre *)re)->magic_number;
1093 if (magic != MAGIC_NUMBER)
1094 {
1095 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1096 {
1097 do_flip = 1;
1098 }
1099 else
1100 {
1101 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1102 fclose(f);
1103 continue;
1104 }
1105 }
1106
1107 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1108 do_flip? " (byte-inverted)" : "", p);
1109
1110 /* Need to know if UTF-8 for printing data strings */
1111
1112 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1113 use_utf8 = (get_options & PCRE_UTF8) != 0;
1114
1115 /* Now see if there is any following study data */
1116
1117 if (true_study_size != 0)
1118 {
1119 pcre_study_data *psd;
1120
1121 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1122 extra->flags = PCRE_EXTRA_STUDY_DATA;
1123
1124 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1125 extra->study_data = psd;
1126
1127 if (fread(psd, 1, true_study_size, f) != true_study_size)
1128 {
1129 FAIL_READ:
1130 fprintf(outfile, "Failed to read data from %s\n", p);
1131 if (extra != NULL) new_free(extra);
1132 if (re != NULL) new_free(re);
1133 fclose(f);
1134 continue;
1135 }
1136 fprintf(outfile, "Study data loaded from %s\n", p);
1137 do_study = 1; /* To get the data output if requested */
1138 }
1139 else fprintf(outfile, "No study data\n");
1140
1141 fclose(f);
1142 goto SHOW_INFO;
1143 }
1144
1145 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1146 the pattern; if is isn't complete, read more. */
1147
1148 delimiter = *p++;
1149
1150 if (isalnum(delimiter) || delimiter == '\\')
1151 {
1152 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1153 goto SKIP_DATA;
1154 }
1155
1156 pp = p;
1157 poffset = p - buffer;
1158
1159 for(;;)
1160 {
1161 while (*pp != 0)
1162 {
1163 if (*pp == '\\' && pp[1] != 0) pp++;
1164 else if (*pp == delimiter) break;
1165 pp++;
1166 }
1167 if (*pp != 0) break;
1168 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1169 {
1170 fprintf(outfile, "** Unexpected EOF\n");
1171 done = 1;
1172 goto CONTINUE;
1173 }
1174 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1175 }
1176
1177 /* The buffer may have moved while being extended; reset the start of data
1178 pointer to the correct relative point in the buffer. */
1179
1180 p = buffer + poffset;
1181
1182 /* If the first character after the delimiter is backslash, make
1183 the pattern end with backslash. This is purely to provide a way
1184 of testing for the error message when a pattern ends with backslash. */
1185
1186 if (pp[1] == '\\') *pp++ = '\\';
1187
1188 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1189 for callouts. */
1190
1191 *pp++ = 0;
1192 strcpy((char *)pbuffer, (char *)p);
1193
1194 /* Look for options after final delimiter */
1195
1196 options = 0;
1197 study_options = 0;
1198 log_store = showstore; /* default from command line */
1199
1200 while (*pp != 0)
1201 {
1202 switch (*pp++)
1203 {
1204 case 'f': options |= PCRE_FIRSTLINE; break;
1205 case 'g': do_g = 1; break;
1206 case 'i': options |= PCRE_CASELESS; break;
1207 case 'm': options |= PCRE_MULTILINE; break;
1208 case 's': options |= PCRE_DOTALL; break;
1209 case 'x': options |= PCRE_EXTENDED; break;
1210
1211 case '+': do_showrest = 1; break;
1212 case 'A': options |= PCRE_ANCHORED; break;
1213 case 'B': do_debug = 1; break;
1214 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1215 case 'D': do_debug = do_showinfo = 1; break;
1216 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1217 case 'F': do_flip = 1; break;
1218 case 'G': do_G = 1; break;
1219 case 'I': do_showinfo = 1; break;
1220 case 'J': options |= PCRE_DUPNAMES; break;
1221 case 'M': log_store = 1; break;
1222 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1223
1224 #if !defined NOPOSIX
1225 case 'P': do_posix = 1; break;
1226 #endif
1227
1228 case 'S': do_study = 1; break;
1229 case 'U': options |= PCRE_UNGREEDY; break;
1230 case 'X': options |= PCRE_EXTRA; break;
1231 case 'Z': debug_lengths = 0; break;
1232 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1233 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1234
1235 case 'L':
1236 ppp = pp;
1237 /* The '\r' test here is so that it works on Windows. */
1238 /* The '0' test is just in case this is an unterminated line. */
1239 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1240 *ppp = 0;
1241 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1242 {
1243 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1244 goto SKIP_DATA;
1245 }
1246 locale_set = 1;
1247 tables = pcre_maketables();
1248 pp = ppp;
1249 break;
1250
1251 case '>':
1252 to_file = pp;
1253 while (*pp != 0) pp++;
1254 while (isspace(pp[-1])) pp--;
1255 *pp = 0;
1256 break;
1257
1258 case '<':
1259 {
1260 if (strncmp((char *)pp, "JS>", 3) == 0)
1261 {
1262 options |= PCRE_JAVASCRIPT_COMPAT;
1263 pp += 3;
1264 }
1265 else
1266 {
1267 int x = check_newline(pp, outfile);
1268 if (x == 0) goto SKIP_DATA;
1269 options |= x;
1270 while (*pp++ != '>');
1271 }
1272 }
1273 break;
1274
1275 case '\r': /* So that it works in Windows */
1276 case '\n':
1277 case ' ':
1278 break;
1279
1280 default:
1281 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1282 goto SKIP_DATA;
1283 }
1284 }
1285
1286 /* Handle compiling via the POSIX interface, which doesn't support the
1287 timing, showing, or debugging options, nor the ability to pass over
1288 local character tables. */
1289
1290 #if !defined NOPOSIX
1291 if (posix || do_posix)
1292 {
1293 int rc;
1294 int cflags = 0;
1295
1296 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1297 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1298 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1299 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1300 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1301
1302 rc = regcomp(&preg, (char *)p, cflags);
1303
1304 /* Compilation failed; go back for another re, skipping to blank line
1305 if non-interactive. */
1306
1307 if (rc != 0)
1308 {
1309 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1310 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1311 goto SKIP_DATA;
1312 }
1313 }
1314
1315 /* Handle compiling via the native interface */
1316
1317 else
1318 #endif /* !defined NOPOSIX */
1319
1320 {
1321 if (timeit > 0)
1322 {
1323 register int i;
1324 clock_t time_taken;
1325 clock_t start_time = clock();
1326 for (i = 0; i < timeit; i++)
1327 {
1328 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1329 if (re != NULL) free(re);
1330 }
1331 time_taken = clock() - start_time;
1332 fprintf(outfile, "Compile time %.4f milliseconds\n",
1333 (((double)time_taken * 1000.0) / (double)timeit) /
1334 (double)CLOCKS_PER_SEC);
1335 }
1336
1337 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1338
1339 /* Compilation failed; go back for another re, skipping to blank line
1340 if non-interactive. */
1341
1342 if (re == NULL)
1343 {
1344 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1345 SKIP_DATA:
1346 if (infile != stdin)
1347 {
1348 for (;;)
1349 {
1350 if (extend_inputline(infile, buffer, NULL) == NULL)
1351 {
1352 done = 1;
1353 goto CONTINUE;
1354 }
1355 len = (int)strlen((char *)buffer);
1356 while (len > 0 && isspace(buffer[len-1])) len--;
1357 if (len == 0) break;
1358 }
1359 fprintf(outfile, "\n");
1360 }
1361 goto CONTINUE;
1362 }
1363
1364 /* Compilation succeeded; print data if required. There are now two
1365 info-returning functions. The old one has a limited interface and
1366 returns only limited data. Check that it agrees with the newer one. */
1367
1368 if (log_store)
1369 fprintf(outfile, "Memory allocation (code space): %d\n",
1370 (int)(gotten_store -
1371 sizeof(real_pcre) -
1372 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1373
1374 /* Extract the size for possible writing before possibly flipping it,
1375 and remember the store that was got. */
1376
1377 true_size = ((real_pcre *)re)->size;
1378 regex_gotten_store = gotten_store;
1379
1380 /* If /S was present, study the regexp to generate additional info to
1381 help with the matching. */
1382
1383 if (do_study)
1384 {
1385 if (timeit > 0)
1386 {
1387 register int i;
1388 clock_t time_taken;
1389 clock_t start_time = clock();
1390 for (i = 0; i < timeit; i++)
1391 extra = pcre_study(re, study_options, &error);
1392 time_taken = clock() - start_time;
1393 if (extra != NULL) free(extra);
1394 fprintf(outfile, " Study time %.4f milliseconds\n",
1395 (((double)time_taken * 1000.0) / (double)timeit) /
1396 (double)CLOCKS_PER_SEC);
1397 }
1398 extra = pcre_study(re, study_options, &error);
1399 if (error != NULL)
1400 fprintf(outfile, "Failed to study: %s\n", error);
1401 else if (extra != NULL)
1402 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1403 }
1404
1405 /* If the 'F' option was present, we flip the bytes of all the integer
1406 fields in the regex data block and the study block. This is to make it
1407 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1408 compiled on a different architecture. */
1409
1410 if (do_flip)
1411 {
1412 real_pcre *rre = (real_pcre *)re;
1413 rre->magic_number =
1414 byteflip(rre->magic_number, sizeof(rre->magic_number));
1415 rre->size = byteflip(rre->size, sizeof(rre->size));
1416 rre->options = byteflip(rre->options, sizeof(rre->options));
1417 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1418 rre->top_bracket =
1419 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1420 rre->top_backref =
1421 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1422 rre->first_byte =
1423 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1424 rre->req_byte =
1425 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1426 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1427 sizeof(rre->name_table_offset));
1428 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1429 sizeof(rre->name_entry_size));
1430 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1431 sizeof(rre->name_count));
1432
1433 if (extra != NULL)
1434 {
1435 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1436 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1437 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1438 }
1439 }
1440
1441 /* Extract information from the compiled data if required */
1442
1443 SHOW_INFO:
1444
1445 if (do_debug)
1446 {
1447 fprintf(outfile, "------------------------------------------------------------------\n");
1448 pcre_printint(re, outfile, debug_lengths);
1449 }
1450
1451 if (do_showinfo)
1452 {
1453 unsigned long int get_options, all_options;
1454 #if !defined NOINFOCHECK
1455 int old_first_char, old_options, old_count;
1456 #endif
1457 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1458 hascrorlf;
1459 int nameentrysize, namecount;
1460 const uschar *nametable;
1461
1462 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1463 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1464 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1465 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1466 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1467 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1468 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1469 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1470 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1471 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1472 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1473 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1474
1475 #if !defined NOINFOCHECK
1476 old_count = pcre_info(re, &old_options, &old_first_char);
1477 if (count < 0) fprintf(outfile,
1478 "Error %d from pcre_info()\n", count);
1479 else
1480 {
1481 if (old_count != count) fprintf(outfile,
1482 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1483 old_count);
1484
1485 if (old_first_char != first_char) fprintf(outfile,
1486 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1487 first_char, old_first_char);
1488
1489 if (old_options != (int)get_options) fprintf(outfile,
1490 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1491 get_options, old_options);
1492 }
1493 #endif
1494
1495 if (size != regex_gotten_store) fprintf(outfile,
1496 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1497 (int)size, (int)regex_gotten_store);
1498
1499 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1500 if (backrefmax > 0)
1501 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1502
1503 if (namecount > 0)
1504 {
1505 fprintf(outfile, "Named capturing subpatterns:\n");
1506 while (namecount-- > 0)
1507 {
1508 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1509 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1510 GET2(nametable, 0));
1511 nametable += nameentrysize;
1512 }
1513 }
1514
1515 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1516 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1517
1518 all_options = ((real_pcre *)re)->options;
1519 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1520
1521 if (get_options == 0) fprintf(outfile, "No options\n");
1522 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1523 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1524 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1525 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1526 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1527 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1528 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1529 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1530 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1531 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1532 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1533 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1534 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1535 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1536 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1537 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1538
1539 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1540
1541 switch (get_options & PCRE_NEWLINE_BITS)
1542 {
1543 case PCRE_NEWLINE_CR:
1544 fprintf(outfile, "Forced newline sequence: CR\n");
1545 break;
1546
1547 case PCRE_NEWLINE_LF:
1548 fprintf(outfile, "Forced newline sequence: LF\n");
1549 break;
1550
1551 case PCRE_NEWLINE_CRLF:
1552 fprintf(outfile, "Forced newline sequence: CRLF\n");
1553 break;
1554
1555 case PCRE_NEWLINE_ANYCRLF:
1556 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1557 break;
1558
1559 case PCRE_NEWLINE_ANY:
1560 fprintf(outfile, "Forced newline sequence: ANY\n");
1561 break;
1562
1563 default:
1564 break;
1565 }
1566
1567 if (first_char == -1)
1568 {
1569 fprintf(outfile, "First char at start or follows newline\n");
1570 }
1571 else if (first_char < 0)
1572 {
1573 fprintf(outfile, "No first char\n");
1574 }
1575 else
1576 {
1577 int ch = first_char & 255;
1578 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1579 "" : " (caseless)";
1580 if (PRINTHEX(ch))
1581 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1582 else
1583 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1584 }
1585
1586 if (need_char < 0)
1587 {
1588 fprintf(outfile, "No need char\n");
1589 }
1590 else
1591 {
1592 int ch = need_char & 255;
1593 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1594 "" : " (caseless)";
1595 if (PRINTHEX(ch))
1596 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1597 else
1598 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1599 }
1600
1601 /* Don't output study size; at present it is in any case a fixed
1602 value, but it varies, depending on the computer architecture, and
1603 so messes up the test suite. (And with the /F option, it might be
1604 flipped.) */
1605
1606 if (do_study)
1607 {
1608 if (extra == NULL)
1609 fprintf(outfile, "Study returned NULL\n");
1610 else
1611 {
1612 uschar *start_bits = NULL;
1613 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1614
1615 if (start_bits == NULL)
1616 fprintf(outfile, "No starting byte set\n");
1617 else
1618 {
1619 int i;
1620 int c = 24;
1621 fprintf(outfile, "Starting byte set: ");
1622 for (i = 0; i < 256; i++)
1623 {
1624 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1625 {
1626 if (c > 75)
1627 {
1628 fprintf(outfile, "\n ");
1629 c = 2;
1630 }
1631 if (PRINTHEX(i) && i != ' ')
1632 {
1633 fprintf(outfile, "%c ", i);
1634 c += 2;
1635 }
1636 else
1637 {
1638 fprintf(outfile, "\\x%02x ", i);
1639 c += 5;
1640 }
1641 }
1642 }
1643 fprintf(outfile, "\n");
1644 }
1645 }
1646 }
1647 }
1648
1649 /* If the '>' option was present, we write out the regex to a file, and
1650 that is all. The first 8 bytes of the file are the regex length and then
1651 the study length, in big-endian order. */
1652
1653 if (to_file != NULL)
1654 {
1655 FILE *f = fopen((char *)to_file, "wb");
1656 if (f == NULL)
1657 {
1658 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1659 }
1660 else
1661 {
1662 uschar sbuf[8];
1663 sbuf[0] = (uschar)((true_size >> 24) & 255);
1664 sbuf[1] = (uschar)((true_size >> 16) & 255);
1665 sbuf[2] = (uschar)((true_size >> 8) & 255);
1666 sbuf[3] = (uschar)((true_size) & 255);
1667
1668 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1669 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1670 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1671 sbuf[7] = (uschar)((true_study_size) & 255);
1672
1673 if (fwrite(sbuf, 1, 8, f) < 8 ||
1674 fwrite(re, 1, true_size, f) < true_size)
1675 {
1676 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1677 }
1678 else
1679 {
1680 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1681 if (extra != NULL)
1682 {
1683 if (fwrite(extra->study_data, 1, true_study_size, f) <
1684 true_study_size)
1685 {
1686 fprintf(outfile, "Write error on %s: %s\n", to_file,
1687 strerror(errno));
1688 }
1689 else fprintf(outfile, "Study data written to %s\n", to_file);
1690
1691 }
1692 }
1693 fclose(f);
1694 }
1695
1696 new_free(re);
1697 if (extra != NULL) new_free(extra);
1698 if (tables != NULL) new_free((void *)tables);
1699 continue; /* With next regex */
1700 }
1701 } /* End of non-POSIX compile */
1702
1703 /* Read data lines and test them */
1704
1705 for (;;)
1706 {
1707 uschar *q;
1708 uschar *bptr;
1709 int *use_offsets = offsets;
1710 int use_size_offsets = size_offsets;
1711 int callout_data = 0;
1712 int callout_data_set = 0;
1713 int count, c;
1714 int copystrings = 0;
1715 int find_match_limit = default_find_match_limit;
1716 int getstrings = 0;
1717 int getlist = 0;
1718 int gmatched = 0;
1719 int start_offset = 0;
1720 int g_notempty = 0;
1721 int use_dfa = 0;
1722
1723 options = 0;
1724
1725 *copynames = 0;
1726 *getnames = 0;
1727
1728 copynamesptr = copynames;
1729 getnamesptr = getnames;
1730
1731 pcre_callout = callout;
1732 first_callout = 1;
1733 callout_extra = 0;
1734 callout_count = 0;
1735 callout_fail_count = 999999;
1736 callout_fail_id = -1;
1737 show_malloc = 0;
1738
1739 if (extra != NULL) extra->flags &=
1740 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1741
1742 len = 0;
1743 for (;;)
1744 {
1745 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1746 {
1747 if (len > 0) break;
1748 done = 1;
1749 goto CONTINUE;
1750 }
1751 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1752 len = (int)strlen((char *)buffer);
1753 if (buffer[len-1] == '\n') break;
1754 }
1755
1756 while (len > 0 && isspace(buffer[len-1])) len--;
1757 buffer[len] = 0;
1758 if (len == 0) break;
1759
1760 p = buffer;
1761 while (isspace(*p)) p++;
1762
1763 bptr = q = dbuffer;
1764 while ((c = *p++) != 0)
1765 {
1766 int i = 0;
1767 int n = 0;
1768
1769 if (c == '\\') switch ((c = *p++))
1770 {
1771 case 'a': c = 7; break;
1772 case 'b': c = '\b'; break;
1773 case 'e': c = 27; break;
1774 case 'f': c = '\f'; break;
1775 case 'n': c = '\n'; break;
1776 case 'r': c = '\r'; break;
1777 case 't': c = '\t'; break;
1778 case 'v': c = '\v'; break;
1779
1780 case '0': case '1': case '2': case '3':
1781 case '4': case '5': case '6': case '7':
1782 c -= '0';
1783 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1784 c = c * 8 + *p++ - '0';
1785
1786 #if !defined NOUTF8
1787 if (use_utf8 && c > 255)
1788 {
1789 unsigned char buff8[8];
1790 int ii, utn;
1791 utn = ord2utf8(c, buff8);
1792 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1793 c = buff8[ii]; /* Last byte */
1794 }
1795 #endif
1796 break;
1797
1798 case 'x':
1799
1800 /* Handle \x{..} specially - new Perl thing for utf8 */
1801
1802 #if !defined NOUTF8
1803 if (*p == '{')
1804 {
1805 unsigned char *pt = p;
1806 c = 0;
1807 while (isxdigit(*(++pt)))
1808 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1809 if (*pt == '}')
1810 {
1811 unsigned char buff8[8];
1812 int ii, utn;
1813 if (use_utf8)
1814 {
1815 utn = ord2utf8(c, buff8);
1816 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817 c = buff8[ii]; /* Last byte */
1818 }
1819 else
1820 {
1821 if (c > 255)
1822 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1823 "UTF-8 mode is not enabled.\n"
1824 "** Truncation will probably give the wrong result.\n", c);
1825 }
1826 p = pt + 1;
1827 break;
1828 }
1829 /* Not correct form; fall through */
1830 }
1831 #endif
1832
1833 /* Ordinary \x */
1834
1835 c = 0;
1836 while (i++ < 2 && isxdigit(*p))
1837 {
1838 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1839 p++;
1840 }
1841 break;
1842
1843 case 0: /* \ followed by EOF allows for an empty line */
1844 p--;
1845 continue;
1846
1847 case '>':
1848 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1849 continue;
1850
1851 case 'A': /* Option setting */
1852 options |= PCRE_ANCHORED;
1853 continue;
1854
1855 case 'B':
1856 options |= PCRE_NOTBOL;
1857 continue;
1858
1859 case 'C':
1860 if (isdigit(*p)) /* Set copy string */
1861 {
1862 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863 copystrings |= 1 << n;
1864 }
1865 else if (isalnum(*p))
1866 {
1867 uschar *npp = copynamesptr;
1868 while (isalnum(*p)) *npp++ = *p++;
1869 *npp++ = 0;
1870 *npp = 0;
1871 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1872 if (n < 0)
1873 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1874 copynamesptr = npp;
1875 }
1876 else if (*p == '+')
1877 {
1878 callout_extra = 1;
1879 p++;
1880 }
1881 else if (*p == '-')
1882 {
1883 pcre_callout = NULL;
1884 p++;
1885 }
1886 else if (*p == '!')
1887 {
1888 callout_fail_id = 0;
1889 p++;
1890 while(isdigit(*p))
1891 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1892 callout_fail_count = 0;
1893 if (*p == '!')
1894 {
1895 p++;
1896 while(isdigit(*p))
1897 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1898 }
1899 }
1900 else if (*p == '*')
1901 {
1902 int sign = 1;
1903 callout_data = 0;
1904 if (*(++p) == '-') { sign = -1; p++; }
1905 while(isdigit(*p))
1906 callout_data = callout_data * 10 + *p++ - '0';
1907 callout_data *= sign;
1908 callout_data_set = 1;
1909 }
1910 continue;
1911
1912 #if !defined NODFA
1913 case 'D':
1914 #if !defined NOPOSIX
1915 if (posix || do_posix)
1916 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1917 else
1918 #endif
1919 use_dfa = 1;
1920 continue;
1921
1922 case 'F':
1923 options |= PCRE_DFA_SHORTEST;
1924 continue;
1925 #endif
1926
1927 case 'G':
1928 if (isdigit(*p))
1929 {
1930 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1931 getstrings |= 1 << n;
1932 }
1933 else if (isalnum(*p))
1934 {
1935 uschar *npp = getnamesptr;
1936 while (isalnum(*p)) *npp++ = *p++;
1937 *npp++ = 0;
1938 *npp = 0;
1939 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1940 if (n < 0)
1941 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1942 getnamesptr = npp;
1943 }
1944 continue;
1945
1946 case 'L':
1947 getlist = 1;
1948 continue;
1949
1950 case 'M':
1951 find_match_limit = 1;
1952 continue;
1953
1954 case 'N':
1955 options |= PCRE_NOTEMPTY;
1956 continue;
1957
1958 case 'O':
1959 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1960 if (n > size_offsets_max)
1961 {
1962 size_offsets_max = n;
1963 free(offsets);
1964 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1965 if (offsets == NULL)
1966 {
1967 printf("** Failed to get %d bytes of memory for offsets vector\n",
1968 (int)(size_offsets_max * sizeof(int)));
1969 yield = 1;
1970 goto EXIT;
1971 }
1972 }
1973 use_size_offsets = n;
1974 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1975 continue;
1976
1977 case 'P':
1978 options |= PCRE_PARTIAL;
1979 continue;
1980
1981 case 'Q':
1982 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1983 if (extra == NULL)
1984 {
1985 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1986 extra->flags = 0;
1987 }
1988 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1989 extra->match_limit_recursion = n;
1990 continue;
1991
1992 case 'q':
1993 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1994 if (extra == NULL)
1995 {
1996 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1997 extra->flags = 0;
1998 }
1999 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2000 extra->match_limit = n;
2001 continue;
2002
2003 #if !defined NODFA
2004 case 'R':
2005 options |= PCRE_DFA_RESTART;
2006 continue;
2007 #endif
2008
2009 case 'S':
2010 show_malloc = 1;
2011 continue;
2012
2013 case 'Z':
2014 options |= PCRE_NOTEOL;
2015 continue;
2016
2017 case '?':
2018 options |= PCRE_NO_UTF8_CHECK;
2019 continue;
2020
2021 case '<':
2022 {
2023 int x = check_newline(p, outfile);
2024 if (x == 0) goto NEXT_DATA;
2025 options |= x;
2026 while (*p++ != '>');
2027 }
2028 continue;
2029 }
2030 *q++ = c;
2031 }
2032 *q = 0;
2033 len = q - dbuffer;
2034
2035 /* Move the data to the end of the buffer so that a read over the end of
2036 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2037 we are using the POSIX interface, we must include the terminating zero. */
2038
2039 #if !defined NOPOSIX
2040 if (posix || do_posix)
2041 {
2042 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2043 bptr += buffer_size - len - 1;
2044 }
2045 else
2046 #endif
2047 {
2048 memmove(bptr + buffer_size - len, bptr, len);
2049 bptr += buffer_size - len;
2050 }
2051
2052 if ((all_use_dfa || use_dfa) && find_match_limit)
2053 {
2054 printf("**Match limit not relevant for DFA matching: ignored\n");
2055 find_match_limit = 0;
2056 }
2057
2058 /* Handle matching via the POSIX interface, which does not
2059 support timing or playing with the match limit or callout data. */
2060
2061 #if !defined NOPOSIX
2062 if (posix || do_posix)
2063 {
2064 int rc;
2065 int eflags = 0;
2066 regmatch_t *pmatch = NULL;
2067 if (use_size_offsets > 0)
2068 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2069 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2070 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2071
2072 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2073
2074 if (rc != 0)
2075 {
2076 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2077 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2078 }
2079 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2080 != 0)
2081 {
2082 fprintf(outfile, "Matched with REG_NOSUB\n");
2083 }
2084 else
2085 {
2086 size_t i;
2087 for (i = 0; i < (size_t)use_size_offsets; i++)
2088 {
2089 if (pmatch[i].rm_so >= 0)
2090 {
2091 fprintf(outfile, "%2d: ", (int)i);
2092 (void)pchars(dbuffer + pmatch[i].rm_so,
2093 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2094 fprintf(outfile, "\n");
2095 if (i == 0 && do_showrest)
2096 {
2097 fprintf(outfile, " 0+ ");
2098 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2099 outfile);
2100 fprintf(outfile, "\n");
2101 }
2102 }
2103 }
2104 }
2105 free(pmatch);
2106 }
2107
2108 /* Handle matching via the native interface - repeats for /g and /G */
2109
2110 else
2111 #endif /* !defined NOPOSIX */
2112
2113 for (;; gmatched++) /* Loop for /g or /G */
2114 {
2115 if (timeitm > 0)
2116 {
2117 register int i;
2118 clock_t time_taken;
2119 clock_t start_time = clock();
2120
2121 #if !defined NODFA
2122 if (all_use_dfa || use_dfa)
2123 {
2124 int workspace[1000];
2125 for (i = 0; i < timeitm; i++)
2126 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2127 options | g_notempty, use_offsets, use_size_offsets, workspace,
2128 sizeof(workspace)/sizeof(int));
2129 }
2130 else
2131 #endif
2132
2133 for (i = 0; i < timeitm; i++)
2134 count = pcre_exec(re, extra, (char *)bptr, len,
2135 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2136
2137 time_taken = clock() - start_time;
2138 fprintf(outfile, "Execute time %.4f milliseconds\n",
2139 (((double)time_taken * 1000.0) / (double)timeitm) /
2140 (double)CLOCKS_PER_SEC);
2141 }
2142
2143 /* If find_match_limit is set, we want to do repeated matches with
2144 varying limits in order to find the minimum value for the match limit and
2145 for the recursion limit. */
2146
2147 if (find_match_limit)
2148 {
2149 if (extra == NULL)
2150 {
2151 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2152 extra->flags = 0;
2153 }
2154
2155 (void)check_match_limit(re, extra, bptr, len, start_offset,
2156 options|g_notempty, use_offsets, use_size_offsets,
2157 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2158 PCRE_ERROR_MATCHLIMIT, "match()");
2159
2160 count = check_match_limit(re, extra, bptr, len, start_offset,
2161 options|g_notempty, use_offsets, use_size_offsets,
2162 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2163 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2164 }
2165
2166 /* If callout_data is set, use the interface with additional data */
2167
2168 else if (callout_data_set)
2169 {
2170 if (extra == NULL)
2171 {
2172 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2173 extra->flags = 0;
2174 }
2175 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2176 extra->callout_data = &callout_data;
2177 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2178 options | g_notempty, use_offsets, use_size_offsets);
2179 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2180 }
2181
2182 /* The normal case is just to do the match once, with the default
2183 value of match_limit. */
2184
2185 #if !defined NODFA
2186 else if (all_use_dfa || use_dfa)
2187 {
2188 int workspace[1000];
2189 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2190 options | g_notempty, use_offsets, use_size_offsets, workspace,
2191 sizeof(workspace)/sizeof(int));
2192 if (count == 0)
2193 {
2194 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2195 count = use_size_offsets/2;
2196 }
2197 }
2198 #endif
2199
2200 else
2201 {
2202 count = pcre_exec(re, extra, (char *)bptr, len,
2203 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2204 if (count == 0)
2205 {
2206 fprintf(outfile, "Matched, but too many substrings\n");
2207 count = use_size_offsets/3;
2208 }
2209 }
2210
2211 /* Matched */
2212
2213 if (count >= 0)
2214 {
2215 int i, maxcount;
2216
2217 #if !defined NODFA
2218 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2219 #endif
2220 maxcount = use_size_offsets/3;
2221
2222 /* This is a check against a lunatic return value. */
2223
2224 if (count > maxcount)
2225 {
2226 fprintf(outfile,
2227 "** PCRE error: returned count %d is too big for offset size %d\n",
2228 count, use_size_offsets);
2229 count = use_size_offsets/3;
2230 if (do_g || do_G)
2231 {
2232 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2233 do_g = do_G = FALSE; /* Break g/G loop */
2234 }
2235 }
2236
2237 for (i = 0; i < count * 2; i += 2)
2238 {
2239 if (use_offsets[i] < 0)
2240 fprintf(outfile, "%2d: <unset>\n", i/2);
2241 else
2242 {
2243 fprintf(outfile, "%2d: ", i/2);
2244 (void)pchars(bptr + use_offsets[i],
2245 use_offsets[i+1] - use_offsets[i], outfile);
2246 fprintf(outfile, "\n");
2247 if (i == 0)
2248 {
2249 if (do_showrest)
2250 {
2251 fprintf(outfile, " 0+ ");
2252 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2253 outfile);
2254 fprintf(outfile, "\n");
2255 }
2256 }
2257 }
2258 }
2259
2260 for (i = 0; i < 32; i++)
2261 {
2262 if ((copystrings & (1 << i)) != 0)
2263 {
2264 char copybuffer[256];
2265 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2266 i, copybuffer, sizeof(copybuffer));
2267 if (rc < 0)
2268 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2269 else
2270 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2271 }
2272 }
2273
2274 for (copynamesptr = copynames;
2275 *copynamesptr != 0;
2276 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2277 {
2278 char copybuffer[256];
2279 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2280 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2281 if (rc < 0)
2282 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2283 else
2284 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2285 }
2286
2287 for (i = 0; i < 32; i++)
2288 {
2289 if ((getstrings & (1 << i)) != 0)
2290 {
2291 const char *substring;
2292 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2293 i, &substring);
2294 if (rc < 0)
2295 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2296 else
2297 {
2298 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2299 pcre_free_substring(substring);
2300 }
2301 }
2302 }
2303
2304 for (getnamesptr = getnames;
2305 *getnamesptr != 0;
2306 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2307 {
2308 const char *substring;
2309 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2310 count, (char *)getnamesptr, &substring);
2311 if (rc < 0)
2312 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2313 else
2314 {
2315 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2316 pcre_free_substring(substring);
2317 }
2318 }
2319
2320 if (getlist)
2321 {
2322 const char **stringlist;
2323 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2324 &stringlist);
2325 if (rc < 0)
2326 fprintf(outfile, "get substring list failed %d\n", rc);
2327 else
2328 {
2329 for (i = 0; i < count; i++)
2330 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2331 if (stringlist[i] != NULL)
2332 fprintf(outfile, "string list not terminated by NULL\n");
2333 /* free((void *)stringlist); */
2334 pcre_free_substring_list(stringlist);
2335 }
2336 }
2337 }
2338
2339 /* There was a partial match */
2340
2341 else if (count == PCRE_ERROR_PARTIAL)
2342 {
2343 fprintf(outfile, "Partial match");
2344 #if !defined NODFA
2345 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2346 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2347 bptr + use_offsets[0]);
2348 #endif
2349 fprintf(outfile, "\n");
2350 break; /* Out of the /g loop */
2351 }
2352
2353 /* Failed to match. If this is a /g or /G loop and we previously set
2354 g_notempty after a null match, this is not necessarily the end. We want
2355 to advance the start offset, and continue. We won't be at the end of the
2356 string - that was checked before setting g_notempty.
2357
2358 Complication arises in the case when the newline option is "any" or
2359 "anycrlf". If the previous match was at the end of a line terminated by
2360 CRLF, an advance of one character just passes the \r, whereas we should
2361 prefer the longer newline sequence, as does the code in pcre_exec().
2362 Fudge the offset value to achieve this.
2363
2364 Otherwise, in the case of UTF-8 matching, the advance must be one
2365 character, not one byte. */
2366
2367 else
2368 {
2369 if (g_notempty != 0)
2370 {
2371 int onechar = 1;
2372 unsigned int obits = ((real_pcre *)re)->options;
2373 use_offsets[0] = start_offset;
2374 if ((obits & PCRE_NEWLINE_BITS) == 0)
2375 {
2376 int d;
2377 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2378 obits = (d == '\r')? PCRE_NEWLINE_CR :
2379 (d == '\n')? PCRE_NEWLINE_LF :
2380 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2381 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2382 (d == -1)? PCRE_NEWLINE_ANY : 0;
2383 }
2384 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2385 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2386 &&
2387 start_offset < len - 1 &&
2388 bptr[start_offset] == '\r' &&
2389 bptr[start_offset+1] == '\n')
2390 onechar++;
2391 else if (use_utf8)
2392 {
2393 while (start_offset + onechar < len)
2394 {
2395 int tb = bptr[start_offset+onechar];
2396 if (tb <= 127) break;
2397 tb &= 0xc0;
2398 if (tb != 0 && tb != 0xc0) onechar++;
2399 }
2400 }
2401 use_offsets[1] = start_offset + onechar;
2402 }
2403 else
2404 {
2405 if (count == PCRE_ERROR_NOMATCH)
2406 {
2407 if (gmatched == 0) fprintf(outfile, "No match\n");
2408 }
2409 else fprintf(outfile, "Error %d\n", count);
2410 break; /* Out of the /g loop */
2411 }
2412 }
2413
2414 /* If not /g or /G we are done */
2415
2416 if (!do_g && !do_G) break;
2417
2418 /* If we have matched an empty string, first check to see if we are at
2419 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2420 what Perl's /g options does. This turns out to be rather cunning. First
2421 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2422 same point. If this fails (picked up above) we advance to the next
2423 character. */
2424
2425 g_notempty = 0;
2426
2427 if (use_offsets[0] == use_offsets[1])
2428 {
2429 if (use_offsets[0] == len) break;
2430 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2431 }
2432
2433 /* For /g, update the start offset, leaving the rest alone */
2434
2435 if (do_g) start_offset = use_offsets[1];
2436
2437 /* For /G, update the pointer and length */
2438
2439 else
2440 {
2441 bptr += use_offsets[1];
2442 len -= use_offsets[1];
2443 }
2444 } /* End of loop for /g and /G */
2445
2446 NEXT_DATA: continue;
2447 } /* End of loop for data lines */
2448
2449 CONTINUE:
2450
2451 #if !defined NOPOSIX
2452 if (posix || do_posix) regfree(&preg);
2453 #endif
2454
2455 if (re != NULL) new_free(re);
2456 if (extra != NULL) new_free(extra);
2457 if (tables != NULL)
2458 {
2459 new_free((void *)tables);
2460 setlocale(LC_CTYPE, "C");
2461 locale_set = 0;
2462 }
2463 }
2464
2465 if (infile == stdin) fprintf(outfile, "\n");
2466
2467 EXIT:
2468
2469 if (infile != NULL && infile != stdin) fclose(infile);
2470 if (outfile != NULL && outfile != stdout) fclose(outfile);
2471
2472 free(buffer);
2473 free(dbuffer);
2474 free(pbuffer);
2475 free(offsets);
2476
2477 return yield;
2478 }
2479
2480 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12