/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 391 - (show annotations) (download)
Tue Mar 17 21:16:01 2009 UTC (5 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 73092 byte(s)
Add support for UTF-8 in EBCDIC environments.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #define isatty _isatty /* This is what Windows calls them, I'm told */
75 #define fileno _fileno
76
77 #else
78 #include <sys/time.h> /* These two includes are needed */
79 #include <sys/resource.h> /* for setrlimit(). */
80 #define INPUT_MODE "rb"
81 #define OUTPUT_MODE "wb"
82 #endif
83
84
85 /* We have to include pcre_internal.h because we need the internal info for
86 displaying the results of pcre_study() and we also need to know about the
87 internal macros, structures, and other internal data values; pcretest has
88 "inside information" compared to a program that strictly follows the PCRE API.
89
90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92 appropriately for an application, not for building PCRE. */
93
94 #include "pcre.h"
95 #include "pcre_internal.h"
96
97 /* We need access to some of the data tables that PCRE uses. So as not to have
98 to keep two copies, we include the source file here, changing the names of the
99 external symbols to prevent clashes. */
100
101 #define _pcre_ucp_gentype ucp_gentype
102 #define _pcre_utf8_table1 utf8_table1
103 #define _pcre_utf8_table1_size utf8_table1_size
104 #define _pcre_utf8_table2 utf8_table2
105 #define _pcre_utf8_table3 utf8_table3
106 #define _pcre_utf8_table4 utf8_table4
107 #define _pcre_utt utt
108 #define _pcre_utt_size utt_size
109 #define _pcre_utt_names utt_names
110 #define _pcre_OP_lengths OP_lengths
111
112 #include "pcre_tables.c"
113
114 /* We also need the pcre_printint() function for printing out compiled
115 patterns. This function is in a separate file so that it can be included in
116 pcre_compile.c when that module is compiled with debugging enabled.
117
118 The definition of the macro PRINTABLE, which determines whether to print an
119 output character as-is or as a hex value when showing compiled patterns, is
120 contained in this file. We uses it here also, in cases when the locale has not
121 been explicitly changed, so as to get consistent output from systems that
122 differ in their output from isprint() even in the "C" locale. */
123
124 #include "pcre_printint.src"
125
126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127
128
129 /* It is possible to compile this test program without including support for
130 testing the POSIX interface, though this is not available via the standard
131 Makefile. */
132
133 #if !defined NOPOSIX
134 #include "pcreposix.h"
135 #endif
136
137 /* It is also possible, for the benefit of the version currently imported into
138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141 UTF8 support if PCRE is built without it. */
142
143 #ifndef SUPPORT_UTF8
144 #ifndef NOUTF8
145 #define NOUTF8
146 #endif
147 #endif
148
149
150 /* Other parameters */
151
152 #ifndef CLOCKS_PER_SEC
153 #ifdef CLK_TCK
154 #define CLOCKS_PER_SEC CLK_TCK
155 #else
156 #define CLOCKS_PER_SEC 100
157 #endif
158 #endif
159
160 /* This is the default loop count for timing. */
161
162 #define LOOPREPEAT 500000
163
164 /* Static variables */
165
166 static FILE *outfile;
167 static int log_store = 0;
168 static int callout_count;
169 static int callout_extra;
170 static int callout_fail_count;
171 static int callout_fail_id;
172 static int debug_lengths;
173 static int first_callout;
174 static int locale_set = 0;
175 static int show_malloc;
176 static int use_utf8;
177 static size_t gotten_store;
178
179 /* The buffers grow automatically if very long input lines are encountered. */
180
181 static int buffer_size = 50000;
182 static uschar *buffer = NULL;
183 static uschar *dbuffer = NULL;
184 static uschar *pbuffer = NULL;
185
186
187
188 /*************************************************
189 * Read or extend an input line *
190 *************************************************/
191
192 /* Input lines are read into buffer, but both patterns and data lines can be
193 continued over multiple input lines. In addition, if the buffer fills up, we
194 want to automatically expand it so as to be able to handle extremely large
195 lines that are needed for certain stress tests. When the input buffer is
196 expanded, the other two buffers must also be expanded likewise, and the
197 contents of pbuffer, which are a copy of the input for callouts, must be
198 preserved (for when expansion happens for a data line). This is not the most
199 optimal way of handling this, but hey, this is just a test program!
200
201 Arguments:
202 f the file to read
203 start where in buffer to start (this *must* be within buffer)
204 prompt for stdin or readline()
205
206 Returns: pointer to the start of new data
207 could be a copy of start, or could be moved
208 NULL if no data read and EOF reached
209 */
210
211 static uschar *
212 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 {
214 uschar *here = start;
215
216 for (;;)
217 {
218 int rlen = buffer_size - (here - buffer);
219
220 if (rlen > 1000)
221 {
222 int dlen;
223
224 /* If libreadline support is required, use readline() to read a line if the
225 input is a terminal. Note that readline() removes the trailing newline, so
226 we must put it back again, to be compatible with fgets(). */
227
228 #ifdef SUPPORT_LIBREADLINE
229 if (isatty(fileno(f)))
230 {
231 size_t len;
232 char *s = readline(prompt);
233 if (s == NULL) return (here == start)? NULL : start;
234 len = strlen(s);
235 if (len > 0) add_history(s);
236 if (len > rlen - 1) len = rlen - 1;
237 memcpy(here, s, len);
238 here[len] = '\n';
239 here[len+1] = 0;
240 free(s);
241 }
242 else
243 #endif
244
245 /* Read the next line by normal means, prompting if the file is stdin. */
246
247 {
248 if (f == stdin) printf(prompt);
249 if (fgets((char *)here, rlen, f) == NULL)
250 return (here == start)? NULL : start;
251 }
252
253 dlen = (int)strlen((char *)here);
254 if (dlen > 0 && here[dlen - 1] == '\n') return start;
255 here += dlen;
256 }
257
258 else
259 {
260 int new_buffer_size = 2*buffer_size;
261 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264
265 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266 {
267 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268 exit(1);
269 }
270
271 memcpy(new_buffer, buffer, buffer_size);
272 memcpy(new_pbuffer, pbuffer, buffer_size);
273
274 buffer_size = new_buffer_size;
275
276 start = new_buffer + (start - buffer);
277 here = new_buffer + (here - buffer);
278
279 free(buffer);
280 free(dbuffer);
281 free(pbuffer);
282
283 buffer = new_buffer;
284 dbuffer = new_dbuffer;
285 pbuffer = new_pbuffer;
286 }
287 }
288
289 return NULL; /* Control never gets here */
290 }
291
292
293
294
295
296
297
298 /*************************************************
299 * Read number from string *
300 *************************************************/
301
302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303 around with conditional compilation, just do the job by hand. It is only used
304 for unpicking arguments, so just keep it simple.
305
306 Arguments:
307 str string to be converted
308 endptr where to put the end pointer
309
310 Returns: the unsigned long
311 */
312
313 static int
314 get_value(unsigned char *str, unsigned char **endptr)
315 {
316 int result = 0;
317 while(*str != 0 && isspace(*str)) str++;
318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319 *endptr = str;
320 return(result);
321 }
322
323
324
325
326 /*************************************************
327 * Convert UTF-8 string to value *
328 *************************************************/
329
330 /* This function takes one or more bytes that represents a UTF-8 character,
331 and returns the value of the character.
332
333 Argument:
334 utf8bytes a pointer to the byte vector
335 vptr a pointer to an int to receive the value
336
337 Returns: > 0 => the number of bytes consumed
338 -6 to 0 => malformed UTF-8 character at offset = (-return)
339 */
340
341 #if !defined NOUTF8
342
343 static int
344 utf82ord(unsigned char *utf8bytes, int *vptr)
345 {
346 int c = *utf8bytes++;
347 int d = c;
348 int i, j, s;
349
350 for (i = -1; i < 6; i++) /* i is number of additional bytes */
351 {
352 if ((d & 0x80) == 0) break;
353 d <<= 1;
354 }
355
356 if (i == -1) { *vptr = c; return 1; } /* ascii character */
357 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358
359 /* i now has a value in the range 1-5 */
360
361 s = 6*i;
362 d = (c & utf8_table3[i]) << s;
363
364 for (j = 0; j < i; j++)
365 {
366 c = *utf8bytes++;
367 if ((c & 0xc0) != 0x80) return -(j+1);
368 s -= 6;
369 d |= (c & 0x3f) << s;
370 }
371
372 /* Check that encoding was the correct unique one */
373
374 for (j = 0; j < utf8_table1_size; j++)
375 if (d <= utf8_table1[j]) break;
376 if (j != i) return -(i+1);
377
378 /* Valid value */
379
380 *vptr = d;
381 return i+1;
382 }
383
384 #endif
385
386
387
388 /*************************************************
389 * Convert character value to UTF-8 *
390 *************************************************/
391
392 /* This function takes an integer value in the range 0 - 0x7fffffff
393 and encodes it as a UTF-8 character in 0 to 6 bytes.
394
395 Arguments:
396 cvalue the character value
397 utf8bytes pointer to buffer for result - at least 6 bytes long
398
399 Returns: number of characters placed in the buffer
400 */
401
402 #if !defined NOUTF8
403
404 static int
405 ord2utf8(int cvalue, uschar *utf8bytes)
406 {
407 register int i, j;
408 for (i = 0; i < utf8_table1_size; i++)
409 if (cvalue <= utf8_table1[i]) break;
410 utf8bytes += i;
411 for (j = i; j > 0; j--)
412 {
413 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 cvalue >>= 6;
415 }
416 *utf8bytes = utf8_table2[i] | cvalue;
417 return i + 1;
418 }
419
420 #endif
421
422
423
424 /*************************************************
425 * Print character string *
426 *************************************************/
427
428 /* Character string printing function. Must handle UTF-8 strings in utf8
429 mode. Yields number of characters printed. If handed a NULL file, just counts
430 chars without printing. */
431
432 static int pchars(unsigned char *p, int length, FILE *f)
433 {
434 int c = 0;
435 int yield = 0;
436
437 while (length-- > 0)
438 {
439 #if !defined NOUTF8
440 if (use_utf8)
441 {
442 int rc = utf82ord(p, &c);
443
444 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445 {
446 length -= rc - 1;
447 p += rc;
448 if (PRINTHEX(c))
449 {
450 if (f != NULL) fprintf(f, "%c", c);
451 yield++;
452 }
453 else
454 {
455 int n = 4;
456 if (f != NULL) fprintf(f, "\\x{%02x}", c);
457 yield += (n <= 0x000000ff)? 2 :
458 (n <= 0x00000fff)? 3 :
459 (n <= 0x0000ffff)? 4 :
460 (n <= 0x000fffff)? 5 : 6;
461 }
462 continue;
463 }
464 }
465 #endif
466
467 /* Not UTF-8, or malformed UTF-8 */
468
469 c = *p++;
470 if (PRINTHEX(c))
471 {
472 if (f != NULL) fprintf(f, "%c", c);
473 yield++;
474 }
475 else
476 {
477 if (f != NULL) fprintf(f, "\\x%02x", c);
478 yield += 4;
479 }
480 }
481
482 return yield;
483 }
484
485
486
487 /*************************************************
488 * Callout function *
489 *************************************************/
490
491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492 the match. Yield zero unless more callouts than the fail count, or the callout
493 data is not zero. */
494
495 static int callout(pcre_callout_block *cb)
496 {
497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 int i, pre_start, post_start, subject_length;
499
500 if (callout_extra)
501 {
502 fprintf(f, "Callout %d: last capture = %d\n",
503 cb->callout_number, cb->capture_last);
504
505 for (i = 0; i < cb->capture_top * 2; i += 2)
506 {
507 if (cb->offset_vector[i] < 0)
508 fprintf(f, "%2d: <unset>\n", i/2);
509 else
510 {
511 fprintf(f, "%2d: ", i/2);
512 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513 cb->offset_vector[i+1] - cb->offset_vector[i], f);
514 fprintf(f, "\n");
515 }
516 }
517 }
518
519 /* Re-print the subject in canonical form, the first time or if giving full
520 datails. On subsequent calls in the same match, we use pchars just to find the
521 printed lengths of the substrings. */
522
523 if (f != NULL) fprintf(f, "--->");
524
525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527 cb->current_position - cb->start_match, f);
528
529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530
531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532 cb->subject_length - cb->current_position, f);
533
534 if (f != NULL) fprintf(f, "\n");
535
536 /* Always print appropriate indicators, with callout number if not already
537 shown. For automatic callouts, show the pattern offset. */
538
539 if (cb->callout_number == 255)
540 {
541 fprintf(outfile, "%+3d ", cb->pattern_position);
542 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543 }
544 else
545 {
546 if (callout_extra) fprintf(outfile, " ");
547 else fprintf(outfile, "%3d ", cb->callout_number);
548 }
549
550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551 fprintf(outfile, "^");
552
553 if (post_start > 0)
554 {
555 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556 fprintf(outfile, "^");
557 }
558
559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560 fprintf(outfile, " ");
561
562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563 pbuffer + cb->pattern_position);
564
565 fprintf(outfile, "\n");
566 first_callout = 0;
567
568 if (cb->callout_data != NULL)
569 {
570 int callout_data = *((int *)(cb->callout_data));
571 if (callout_data != 0)
572 {
573 fprintf(outfile, "Callout data = %d\n", callout_data);
574 return callout_data;
575 }
576 }
577
578 return (cb->callout_number != callout_fail_id)? 0 :
579 (++callout_count >= callout_fail_count)? 1 : 0;
580 }
581
582
583 /*************************************************
584 * Local malloc functions *
585 *************************************************/
586
587 /* Alternative malloc function, to test functionality and show the size of the
588 compiled re. */
589
590 static void *new_malloc(size_t size)
591 {
592 void *block = malloc(size);
593 gotten_store = size;
594 if (show_malloc)
595 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 return block;
597 }
598
599 static void new_free(void *block)
600 {
601 if (show_malloc)
602 fprintf(outfile, "free %p\n", block);
603 free(block);
604 }
605
606
607 /* For recursion malloc/free, to test stacking calls */
608
609 static void *stack_malloc(size_t size)
610 {
611 void *block = malloc(size);
612 if (show_malloc)
613 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 return block;
615 }
616
617 static void stack_free(void *block)
618 {
619 if (show_malloc)
620 fprintf(outfile, "stack_free %p\n", block);
621 free(block);
622 }
623
624
625 /*************************************************
626 * Call pcre_fullinfo() *
627 *************************************************/
628
629 /* Get one piece of information from the pcre_fullinfo() function */
630
631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632 {
633 int rc;
634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636 }
637
638
639
640 /*************************************************
641 * Byte flipping function *
642 *************************************************/
643
644 static unsigned long int
645 byteflip(unsigned long int value, int n)
646 {
647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648 return ((value & 0x000000ff) << 24) |
649 ((value & 0x0000ff00) << 8) |
650 ((value & 0x00ff0000) >> 8) |
651 ((value & 0xff000000) >> 24);
652 }
653
654
655
656
657 /*************************************************
658 * Check match or recursion limit *
659 *************************************************/
660
661 static int
662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663 int start_offset, int options, int *use_offsets, int use_size_offsets,
664 int flag, unsigned long int *limit, int errnumber, const char *msg)
665 {
666 int count;
667 int min = 0;
668 int mid = 64;
669 int max = -1;
670
671 extra->flags |= flag;
672
673 for (;;)
674 {
675 *limit = mid;
676
677 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678 use_offsets, use_size_offsets);
679
680 if (count == errnumber)
681 {
682 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683 min = mid;
684 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685 }
686
687 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688 count == PCRE_ERROR_PARTIAL)
689 {
690 if (mid == min + 1)
691 {
692 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693 break;
694 }
695 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696 max = mid;
697 mid = (min + mid)/2;
698 }
699 else break; /* Some other error */
700 }
701
702 extra->flags &= ~flag;
703 return count;
704 }
705
706
707
708 /*************************************************
709 * Case-independent strncmp() function *
710 *************************************************/
711
712 /*
713 Arguments:
714 s first string
715 t second string
716 n number of characters to compare
717
718 Returns: < 0, = 0, or > 0, according to the comparison
719 */
720
721 static int
722 strncmpic(uschar *s, uschar *t, int n)
723 {
724 while (n--)
725 {
726 int c = tolower(*s++) - tolower(*t++);
727 if (c) return c;
728 }
729 return 0;
730 }
731
732
733
734 /*************************************************
735 * Check newline indicator *
736 *************************************************/
737
738 /* This is used both at compile and run-time to check for <xxx> escapes, where
739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740 no match.
741
742 Arguments:
743 p points after the leading '<'
744 f file for error message
745
746 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747 */
748
749 static int
750 check_newline(uschar *p, FILE *f)
751 {
752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 fprintf(f, "Unknown newline type at: <%s\n", p);
760 return 0;
761 }
762
763
764
765 /*************************************************
766 * Usage function *
767 *************************************************/
768
769 static void
770 usage(void)
771 {
772 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773 printf("Input and output default to stdin and stdout.\n");
774 #ifdef SUPPORT_LIBREADLINE
775 printf("If input is a terminal, readline() is used to read from it.\n");
776 #else
777 printf("This version of pcretest is not linked with readline().\n");
778 #endif
779 printf("\nOptions:\n");
780 printf(" -b show compiled code (bytecode)\n");
781 printf(" -C show PCRE compile-time options and exit\n");
782 printf(" -d debug: show compiled code and information (-b and -i)\n");
783 #if !defined NODFA
784 printf(" -dfa force DFA matching for all subjects\n");
785 #endif
786 printf(" -help show usage information\n");
787 printf(" -i show information about compiled patterns\n"
788 " -M find MATCH_LIMIT minimum for each subject\n"
789 " -m output memory used information\n"
790 " -o <n> set size of offsets vector to <n>\n");
791 #if !defined NOPOSIX
792 printf(" -p use POSIX interface\n");
793 #endif
794 printf(" -q quiet: do not output PCRE version number at start\n");
795 printf(" -S <n> set stack size to <n> megabytes\n");
796 printf(" -s output store (memory) used information\n"
797 " -t time compilation and execution\n");
798 printf(" -t <n> time compilation and execution, repeating <n> times\n");
799 printf(" -tm time execution (matching) only\n");
800 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
801 }
802
803
804
805 /*************************************************
806 * Main Program *
807 *************************************************/
808
809 /* Read lines from named file or stdin and write to named file or stdout; lines
810 consist of a regular expression, in delimiters and optionally followed by
811 options, followed by a set of test data, terminated by an empty line. */
812
813 int main(int argc, char **argv)
814 {
815 FILE *infile = stdin;
816 int options = 0;
817 int study_options = 0;
818 int default_find_match_limit = FALSE;
819 int op = 1;
820 int timeit = 0;
821 int timeitm = 0;
822 int showinfo = 0;
823 int showstore = 0;
824 int quiet = 0;
825 int size_offsets = 45;
826 int size_offsets_max;
827 int *offsets = NULL;
828 #if !defined NOPOSIX
829 int posix = 0;
830 #endif
831 int debug = 0;
832 int done = 0;
833 int all_use_dfa = 0;
834 int yield = 0;
835 int stack_size;
836
837 /* These vectors store, end-to-end, a list of captured substring names. Assume
838 that 1024 is plenty long enough for the few names we'll be testing. */
839
840 uschar copynames[1024];
841 uschar getnames[1024];
842
843 uschar *copynamesptr;
844 uschar *getnamesptr;
845
846 /* Get buffers from malloc() so that Electric Fence will check their misuse
847 when I am debugging. They grow automatically when very long lines are read. */
848
849 buffer = (unsigned char *)malloc(buffer_size);
850 dbuffer = (unsigned char *)malloc(buffer_size);
851 pbuffer = (unsigned char *)malloc(buffer_size);
852
853 /* The outfile variable is static so that new_malloc can use it. */
854
855 outfile = stdout;
856
857 /* The following _setmode() stuff is some Windows magic that tells its runtime
858 library to translate CRLF into a single LF character. At least, that's what
859 I've been told: never having used Windows I take this all on trust. Originally
860 it set 0x8000, but then I was advised that _O_BINARY was better. */
861
862 #if defined(_WIN32) || defined(WIN32)
863 _setmode( _fileno( stdout ), _O_BINARY );
864 #endif
865
866 /* Scan options */
867
868 while (argc > 1 && argv[op][0] == '-')
869 {
870 unsigned char *endptr;
871
872 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873 showstore = 1;
874 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875 else if (strcmp(argv[op], "-b") == 0) debug = 1;
876 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879 #if !defined NODFA
880 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881 #endif
882 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884 *endptr == 0))
885 {
886 op++;
887 argc--;
888 }
889 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890 {
891 int both = argv[op][2] == 0;
892 int temp;
893 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894 *endptr == 0))
895 {
896 timeitm = temp;
897 op++;
898 argc--;
899 }
900 else timeitm = LOOPREPEAT;
901 if (both) timeit = timeitm;
902 }
903 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905 *endptr == 0))
906 {
907 #if defined(_WIN32) || defined(WIN32)
908 printf("PCRE: -S not supported on this OS\n");
909 exit(1);
910 #else
911 int rc;
912 struct rlimit rlim;
913 getrlimit(RLIMIT_STACK, &rlim);
914 rlim.rlim_cur = stack_size * 1024 * 1024;
915 rc = setrlimit(RLIMIT_STACK, &rlim);
916 if (rc != 0)
917 {
918 printf("PCRE: setrlimit() failed with error %d\n", rc);
919 exit(1);
920 }
921 op++;
922 argc--;
923 #endif
924 }
925 #if !defined NOPOSIX
926 else if (strcmp(argv[op], "-p") == 0) posix = 1;
927 #endif
928 else if (strcmp(argv[op], "-C") == 0)
929 {
930 int rc;
931 unsigned long int lrc;
932 printf("PCRE version %s\n", pcre_version());
933 printf("Compiled with\n");
934 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935 printf(" %sUTF-8 support\n", rc? "" : "No ");
936 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937 printf(" %sUnicode properties support\n", rc? "" : "No ");
938 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939 /* Note that these values are always the ASCII values, even
940 in EBCDIC environments. CR is 13 and NL is 10. */
941 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
942 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
943 (rc == -2)? "ANYCRLF" :
944 (rc == -1)? "ANY" : "???");
945 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
946 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
947 "all Unicode newlines");
948 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
949 printf(" Internal link size = %d\n", rc);
950 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
951 printf(" POSIX malloc threshold = %d\n", rc);
952 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
953 printf(" Default match limit = %ld\n", lrc);
954 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
955 printf(" Default recursion depth limit = %ld\n", lrc);
956 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
957 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
958 goto EXIT;
959 }
960 else if (strcmp(argv[op], "-help") == 0 ||
961 strcmp(argv[op], "--help") == 0)
962 {
963 usage();
964 goto EXIT;
965 }
966 else
967 {
968 printf("** Unknown or malformed option %s\n", argv[op]);
969 usage();
970 yield = 1;
971 goto EXIT;
972 }
973 op++;
974 argc--;
975 }
976
977 /* Get the store for the offsets vector, and remember what it was */
978
979 size_offsets_max = size_offsets;
980 offsets = (int *)malloc(size_offsets_max * sizeof(int));
981 if (offsets == NULL)
982 {
983 printf("** Failed to get %d bytes of memory for offsets vector\n",
984 (int)(size_offsets_max * sizeof(int)));
985 yield = 1;
986 goto EXIT;
987 }
988
989 /* Sort out the input and output files */
990
991 if (argc > 1)
992 {
993 infile = fopen(argv[op], INPUT_MODE);
994 if (infile == NULL)
995 {
996 printf("** Failed to open %s\n", argv[op]);
997 yield = 1;
998 goto EXIT;
999 }
1000 }
1001
1002 if (argc > 2)
1003 {
1004 outfile = fopen(argv[op+1], OUTPUT_MODE);
1005 if (outfile == NULL)
1006 {
1007 printf("** Failed to open %s\n", argv[op+1]);
1008 yield = 1;
1009 goto EXIT;
1010 }
1011 }
1012
1013 /* Set alternative malloc function */
1014
1015 pcre_malloc = new_malloc;
1016 pcre_free = new_free;
1017 pcre_stack_malloc = stack_malloc;
1018 pcre_stack_free = stack_free;
1019
1020 /* Heading line unless quiet, then prompt for first regex if stdin */
1021
1022 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1023
1024 /* Main loop */
1025
1026 while (!done)
1027 {
1028 pcre *re = NULL;
1029 pcre_extra *extra = NULL;
1030
1031 #if !defined NOPOSIX /* There are still compilers that require no indent */
1032 regex_t preg;
1033 int do_posix = 0;
1034 #endif
1035
1036 const char *error;
1037 unsigned char *p, *pp, *ppp;
1038 unsigned char *to_file = NULL;
1039 const unsigned char *tables = NULL;
1040 unsigned long int true_size, true_study_size = 0;
1041 size_t size, regex_gotten_store;
1042 int do_study = 0;
1043 int do_debug = debug;
1044 int do_G = 0;
1045 int do_g = 0;
1046 int do_showinfo = showinfo;
1047 int do_showrest = 0;
1048 int do_flip = 0;
1049 int erroroffset, len, delimiter, poffset;
1050
1051 use_utf8 = 0;
1052 debug_lengths = 1;
1053
1054 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1055 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1056 fflush(outfile);
1057
1058 p = buffer;
1059 while (isspace(*p)) p++;
1060 if (*p == 0) continue;
1061
1062 /* See if the pattern is to be loaded pre-compiled from a file. */
1063
1064 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1065 {
1066 unsigned long int magic, get_options;
1067 uschar sbuf[8];
1068 FILE *f;
1069
1070 p++;
1071 pp = p + (int)strlen((char *)p);
1072 while (isspace(pp[-1])) pp--;
1073 *pp = 0;
1074
1075 f = fopen((char *)p, "rb");
1076 if (f == NULL)
1077 {
1078 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1079 continue;
1080 }
1081
1082 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1083
1084 true_size =
1085 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1086 true_study_size =
1087 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1088
1089 re = (real_pcre *)new_malloc(true_size);
1090 regex_gotten_store = gotten_store;
1091
1092 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1093
1094 magic = ((real_pcre *)re)->magic_number;
1095 if (magic != MAGIC_NUMBER)
1096 {
1097 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1098 {
1099 do_flip = 1;
1100 }
1101 else
1102 {
1103 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1104 fclose(f);
1105 continue;
1106 }
1107 }
1108
1109 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1110 do_flip? " (byte-inverted)" : "", p);
1111
1112 /* Need to know if UTF-8 for printing data strings */
1113
1114 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1115 use_utf8 = (get_options & PCRE_UTF8) != 0;
1116
1117 /* Now see if there is any following study data */
1118
1119 if (true_study_size != 0)
1120 {
1121 pcre_study_data *psd;
1122
1123 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1124 extra->flags = PCRE_EXTRA_STUDY_DATA;
1125
1126 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1127 extra->study_data = psd;
1128
1129 if (fread(psd, 1, true_study_size, f) != true_study_size)
1130 {
1131 FAIL_READ:
1132 fprintf(outfile, "Failed to read data from %s\n", p);
1133 if (extra != NULL) new_free(extra);
1134 if (re != NULL) new_free(re);
1135 fclose(f);
1136 continue;
1137 }
1138 fprintf(outfile, "Study data loaded from %s\n", p);
1139 do_study = 1; /* To get the data output if requested */
1140 }
1141 else fprintf(outfile, "No study data\n");
1142
1143 fclose(f);
1144 goto SHOW_INFO;
1145 }
1146
1147 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1148 the pattern; if is isn't complete, read more. */
1149
1150 delimiter = *p++;
1151
1152 if (isalnum(delimiter) || delimiter == '\\')
1153 {
1154 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1155 goto SKIP_DATA;
1156 }
1157
1158 pp = p;
1159 poffset = p - buffer;
1160
1161 for(;;)
1162 {
1163 while (*pp != 0)
1164 {
1165 if (*pp == '\\' && pp[1] != 0) pp++;
1166 else if (*pp == delimiter) break;
1167 pp++;
1168 }
1169 if (*pp != 0) break;
1170 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1171 {
1172 fprintf(outfile, "** Unexpected EOF\n");
1173 done = 1;
1174 goto CONTINUE;
1175 }
1176 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1177 }
1178
1179 /* The buffer may have moved while being extended; reset the start of data
1180 pointer to the correct relative point in the buffer. */
1181
1182 p = buffer + poffset;
1183
1184 /* If the first character after the delimiter is backslash, make
1185 the pattern end with backslash. This is purely to provide a way
1186 of testing for the error message when a pattern ends with backslash. */
1187
1188 if (pp[1] == '\\') *pp++ = '\\';
1189
1190 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1191 for callouts. */
1192
1193 *pp++ = 0;
1194 strcpy((char *)pbuffer, (char *)p);
1195
1196 /* Look for options after final delimiter */
1197
1198 options = 0;
1199 study_options = 0;
1200 log_store = showstore; /* default from command line */
1201
1202 while (*pp != 0)
1203 {
1204 switch (*pp++)
1205 {
1206 case 'f': options |= PCRE_FIRSTLINE; break;
1207 case 'g': do_g = 1; break;
1208 case 'i': options |= PCRE_CASELESS; break;
1209 case 'm': options |= PCRE_MULTILINE; break;
1210 case 's': options |= PCRE_DOTALL; break;
1211 case 'x': options |= PCRE_EXTENDED; break;
1212
1213 case '+': do_showrest = 1; break;
1214 case 'A': options |= PCRE_ANCHORED; break;
1215 case 'B': do_debug = 1; break;
1216 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1217 case 'D': do_debug = do_showinfo = 1; break;
1218 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1219 case 'F': do_flip = 1; break;
1220 case 'G': do_G = 1; break;
1221 case 'I': do_showinfo = 1; break;
1222 case 'J': options |= PCRE_DUPNAMES; break;
1223 case 'M': log_store = 1; break;
1224 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1225
1226 #if !defined NOPOSIX
1227 case 'P': do_posix = 1; break;
1228 #endif
1229
1230 case 'S': do_study = 1; break;
1231 case 'U': options |= PCRE_UNGREEDY; break;
1232 case 'X': options |= PCRE_EXTRA; break;
1233 case 'Z': debug_lengths = 0; break;
1234 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1235 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1236
1237 case 'L':
1238 ppp = pp;
1239 /* The '\r' test here is so that it works on Windows. */
1240 /* The '0' test is just in case this is an unterminated line. */
1241 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1242 *ppp = 0;
1243 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1244 {
1245 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1246 goto SKIP_DATA;
1247 }
1248 locale_set = 1;
1249 tables = pcre_maketables();
1250 pp = ppp;
1251 break;
1252
1253 case '>':
1254 to_file = pp;
1255 while (*pp != 0) pp++;
1256 while (isspace(pp[-1])) pp--;
1257 *pp = 0;
1258 break;
1259
1260 case '<':
1261 {
1262 if (strncmp((char *)pp, "JS>", 3) == 0)
1263 {
1264 options |= PCRE_JAVASCRIPT_COMPAT;
1265 pp += 3;
1266 }
1267 else
1268 {
1269 int x = check_newline(pp, outfile);
1270 if (x == 0) goto SKIP_DATA;
1271 options |= x;
1272 while (*pp++ != '>');
1273 }
1274 }
1275 break;
1276
1277 case '\r': /* So that it works in Windows */
1278 case '\n':
1279 case ' ':
1280 break;
1281
1282 default:
1283 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1284 goto SKIP_DATA;
1285 }
1286 }
1287
1288 /* Handle compiling via the POSIX interface, which doesn't support the
1289 timing, showing, or debugging options, nor the ability to pass over
1290 local character tables. */
1291
1292 #if !defined NOPOSIX
1293 if (posix || do_posix)
1294 {
1295 int rc;
1296 int cflags = 0;
1297
1298 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1299 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1300 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1301 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1302 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1303
1304 rc = regcomp(&preg, (char *)p, cflags);
1305
1306 /* Compilation failed; go back for another re, skipping to blank line
1307 if non-interactive. */
1308
1309 if (rc != 0)
1310 {
1311 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1312 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1313 goto SKIP_DATA;
1314 }
1315 }
1316
1317 /* Handle compiling via the native interface */
1318
1319 else
1320 #endif /* !defined NOPOSIX */
1321
1322 {
1323 if (timeit > 0)
1324 {
1325 register int i;
1326 clock_t time_taken;
1327 clock_t start_time = clock();
1328 for (i = 0; i < timeit; i++)
1329 {
1330 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1331 if (re != NULL) free(re);
1332 }
1333 time_taken = clock() - start_time;
1334 fprintf(outfile, "Compile time %.4f milliseconds\n",
1335 (((double)time_taken * 1000.0) / (double)timeit) /
1336 (double)CLOCKS_PER_SEC);
1337 }
1338
1339 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1340
1341 /* Compilation failed; go back for another re, skipping to blank line
1342 if non-interactive. */
1343
1344 if (re == NULL)
1345 {
1346 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1347 SKIP_DATA:
1348 if (infile != stdin)
1349 {
1350 for (;;)
1351 {
1352 if (extend_inputline(infile, buffer, NULL) == NULL)
1353 {
1354 done = 1;
1355 goto CONTINUE;
1356 }
1357 len = (int)strlen((char *)buffer);
1358 while (len > 0 && isspace(buffer[len-1])) len--;
1359 if (len == 0) break;
1360 }
1361 fprintf(outfile, "\n");
1362 }
1363 goto CONTINUE;
1364 }
1365
1366 /* Compilation succeeded; print data if required. There are now two
1367 info-returning functions. The old one has a limited interface and
1368 returns only limited data. Check that it agrees with the newer one. */
1369
1370 if (log_store)
1371 fprintf(outfile, "Memory allocation (code space): %d\n",
1372 (int)(gotten_store -
1373 sizeof(real_pcre) -
1374 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1375
1376 /* Extract the size for possible writing before possibly flipping it,
1377 and remember the store that was got. */
1378
1379 true_size = ((real_pcre *)re)->size;
1380 regex_gotten_store = gotten_store;
1381
1382 /* If /S was present, study the regexp to generate additional info to
1383 help with the matching. */
1384
1385 if (do_study)
1386 {
1387 if (timeit > 0)
1388 {
1389 register int i;
1390 clock_t time_taken;
1391 clock_t start_time = clock();
1392 for (i = 0; i < timeit; i++)
1393 extra = pcre_study(re, study_options, &error);
1394 time_taken = clock() - start_time;
1395 if (extra != NULL) free(extra);
1396 fprintf(outfile, " Study time %.4f milliseconds\n",
1397 (((double)time_taken * 1000.0) / (double)timeit) /
1398 (double)CLOCKS_PER_SEC);
1399 }
1400 extra = pcre_study(re, study_options, &error);
1401 if (error != NULL)
1402 fprintf(outfile, "Failed to study: %s\n", error);
1403 else if (extra != NULL)
1404 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1405 }
1406
1407 /* If the 'F' option was present, we flip the bytes of all the integer
1408 fields in the regex data block and the study block. This is to make it
1409 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1410 compiled on a different architecture. */
1411
1412 if (do_flip)
1413 {
1414 real_pcre *rre = (real_pcre *)re;
1415 rre->magic_number =
1416 byteflip(rre->magic_number, sizeof(rre->magic_number));
1417 rre->size = byteflip(rre->size, sizeof(rre->size));
1418 rre->options = byteflip(rre->options, sizeof(rre->options));
1419 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1420 rre->top_bracket =
1421 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1422 rre->top_backref =
1423 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1424 rre->first_byte =
1425 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1426 rre->req_byte =
1427 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1428 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1429 sizeof(rre->name_table_offset));
1430 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1431 sizeof(rre->name_entry_size));
1432 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1433 sizeof(rre->name_count));
1434
1435 if (extra != NULL)
1436 {
1437 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1438 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1439 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1440 }
1441 }
1442
1443 /* Extract information from the compiled data if required */
1444
1445 SHOW_INFO:
1446
1447 if (do_debug)
1448 {
1449 fprintf(outfile, "------------------------------------------------------------------\n");
1450 pcre_printint(re, outfile, debug_lengths);
1451 }
1452
1453 if (do_showinfo)
1454 {
1455 unsigned long int get_options, all_options;
1456 #if !defined NOINFOCHECK
1457 int old_first_char, old_options, old_count;
1458 #endif
1459 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1460 hascrorlf;
1461 int nameentrysize, namecount;
1462 const uschar *nametable;
1463
1464 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1465 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1466 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1467 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1468 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1469 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1470 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1471 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1472 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1473 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1474 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1475 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1476
1477 #if !defined NOINFOCHECK
1478 old_count = pcre_info(re, &old_options, &old_first_char);
1479 if (count < 0) fprintf(outfile,
1480 "Error %d from pcre_info()\n", count);
1481 else
1482 {
1483 if (old_count != count) fprintf(outfile,
1484 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1485 old_count);
1486
1487 if (old_first_char != first_char) fprintf(outfile,
1488 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1489 first_char, old_first_char);
1490
1491 if (old_options != (int)get_options) fprintf(outfile,
1492 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1493 get_options, old_options);
1494 }
1495 #endif
1496
1497 if (size != regex_gotten_store) fprintf(outfile,
1498 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1499 (int)size, (int)regex_gotten_store);
1500
1501 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1502 if (backrefmax > 0)
1503 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1504
1505 if (namecount > 0)
1506 {
1507 fprintf(outfile, "Named capturing subpatterns:\n");
1508 while (namecount-- > 0)
1509 {
1510 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1511 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1512 GET2(nametable, 0));
1513 nametable += nameentrysize;
1514 }
1515 }
1516
1517 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1518 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1519
1520 all_options = ((real_pcre *)re)->options;
1521 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1522
1523 if (get_options == 0) fprintf(outfile, "No options\n");
1524 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1525 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1526 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1527 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1528 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1529 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1530 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1531 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1532 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1533 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1534 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1535 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1536 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1537 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1538 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1539 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1540
1541 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1542
1543 switch (get_options & PCRE_NEWLINE_BITS)
1544 {
1545 case PCRE_NEWLINE_CR:
1546 fprintf(outfile, "Forced newline sequence: CR\n");
1547 break;
1548
1549 case PCRE_NEWLINE_LF:
1550 fprintf(outfile, "Forced newline sequence: LF\n");
1551 break;
1552
1553 case PCRE_NEWLINE_CRLF:
1554 fprintf(outfile, "Forced newline sequence: CRLF\n");
1555 break;
1556
1557 case PCRE_NEWLINE_ANYCRLF:
1558 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1559 break;
1560
1561 case PCRE_NEWLINE_ANY:
1562 fprintf(outfile, "Forced newline sequence: ANY\n");
1563 break;
1564
1565 default:
1566 break;
1567 }
1568
1569 if (first_char == -1)
1570 {
1571 fprintf(outfile, "First char at start or follows newline\n");
1572 }
1573 else if (first_char < 0)
1574 {
1575 fprintf(outfile, "No first char\n");
1576 }
1577 else
1578 {
1579 int ch = first_char & 255;
1580 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1581 "" : " (caseless)";
1582 if (PRINTHEX(ch))
1583 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1584 else
1585 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1586 }
1587
1588 if (need_char < 0)
1589 {
1590 fprintf(outfile, "No need char\n");
1591 }
1592 else
1593 {
1594 int ch = need_char & 255;
1595 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1596 "" : " (caseless)";
1597 if (PRINTHEX(ch))
1598 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1599 else
1600 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1601 }
1602
1603 /* Don't output study size; at present it is in any case a fixed
1604 value, but it varies, depending on the computer architecture, and
1605 so messes up the test suite. (And with the /F option, it might be
1606 flipped.) */
1607
1608 if (do_study)
1609 {
1610 if (extra == NULL)
1611 fprintf(outfile, "Study returned NULL\n");
1612 else
1613 {
1614 uschar *start_bits = NULL;
1615 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1616
1617 if (start_bits == NULL)
1618 fprintf(outfile, "No starting byte set\n");
1619 else
1620 {
1621 int i;
1622 int c = 24;
1623 fprintf(outfile, "Starting byte set: ");
1624 for (i = 0; i < 256; i++)
1625 {
1626 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1627 {
1628 if (c > 75)
1629 {
1630 fprintf(outfile, "\n ");
1631 c = 2;
1632 }
1633 if (PRINTHEX(i) && i != ' ')
1634 {
1635 fprintf(outfile, "%c ", i);
1636 c += 2;
1637 }
1638 else
1639 {
1640 fprintf(outfile, "\\x%02x ", i);
1641 c += 5;
1642 }
1643 }
1644 }
1645 fprintf(outfile, "\n");
1646 }
1647 }
1648 }
1649 }
1650
1651 /* If the '>' option was present, we write out the regex to a file, and
1652 that is all. The first 8 bytes of the file are the regex length and then
1653 the study length, in big-endian order. */
1654
1655 if (to_file != NULL)
1656 {
1657 FILE *f = fopen((char *)to_file, "wb");
1658 if (f == NULL)
1659 {
1660 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1661 }
1662 else
1663 {
1664 uschar sbuf[8];
1665 sbuf[0] = (uschar)((true_size >> 24) & 255);
1666 sbuf[1] = (uschar)((true_size >> 16) & 255);
1667 sbuf[2] = (uschar)((true_size >> 8) & 255);
1668 sbuf[3] = (uschar)((true_size) & 255);
1669
1670 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1671 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1672 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1673 sbuf[7] = (uschar)((true_study_size) & 255);
1674
1675 if (fwrite(sbuf, 1, 8, f) < 8 ||
1676 fwrite(re, 1, true_size, f) < true_size)
1677 {
1678 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1679 }
1680 else
1681 {
1682 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1683 if (extra != NULL)
1684 {
1685 if (fwrite(extra->study_data, 1, true_study_size, f) <
1686 true_study_size)
1687 {
1688 fprintf(outfile, "Write error on %s: %s\n", to_file,
1689 strerror(errno));
1690 }
1691 else fprintf(outfile, "Study data written to %s\n", to_file);
1692
1693 }
1694 }
1695 fclose(f);
1696 }
1697
1698 new_free(re);
1699 if (extra != NULL) new_free(extra);
1700 if (tables != NULL) new_free((void *)tables);
1701 continue; /* With next regex */
1702 }
1703 } /* End of non-POSIX compile */
1704
1705 /* Read data lines and test them */
1706
1707 for (;;)
1708 {
1709 uschar *q;
1710 uschar *bptr;
1711 int *use_offsets = offsets;
1712 int use_size_offsets = size_offsets;
1713 int callout_data = 0;
1714 int callout_data_set = 0;
1715 int count, c;
1716 int copystrings = 0;
1717 int find_match_limit = default_find_match_limit;
1718 int getstrings = 0;
1719 int getlist = 0;
1720 int gmatched = 0;
1721 int start_offset = 0;
1722 int g_notempty = 0;
1723 int use_dfa = 0;
1724
1725 options = 0;
1726
1727 *copynames = 0;
1728 *getnames = 0;
1729
1730 copynamesptr = copynames;
1731 getnamesptr = getnames;
1732
1733 pcre_callout = callout;
1734 first_callout = 1;
1735 callout_extra = 0;
1736 callout_count = 0;
1737 callout_fail_count = 999999;
1738 callout_fail_id = -1;
1739 show_malloc = 0;
1740
1741 if (extra != NULL) extra->flags &=
1742 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1743
1744 len = 0;
1745 for (;;)
1746 {
1747 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1748 {
1749 if (len > 0) break;
1750 done = 1;
1751 goto CONTINUE;
1752 }
1753 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1754 len = (int)strlen((char *)buffer);
1755 if (buffer[len-1] == '\n') break;
1756 }
1757
1758 while (len > 0 && isspace(buffer[len-1])) len--;
1759 buffer[len] = 0;
1760 if (len == 0) break;
1761
1762 p = buffer;
1763 while (isspace(*p)) p++;
1764
1765 bptr = q = dbuffer;
1766 while ((c = *p++) != 0)
1767 {
1768 int i = 0;
1769 int n = 0;
1770
1771 if (c == '\\') switch ((c = *p++))
1772 {
1773 case 'a': c = 7; break;
1774 case 'b': c = '\b'; break;
1775 case 'e': c = 27; break;
1776 case 'f': c = '\f'; break;
1777 case 'n': c = '\n'; break;
1778 case 'r': c = '\r'; break;
1779 case 't': c = '\t'; break;
1780 case 'v': c = '\v'; break;
1781
1782 case '0': case '1': case '2': case '3':
1783 case '4': case '5': case '6': case '7':
1784 c -= '0';
1785 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1786 c = c * 8 + *p++ - '0';
1787
1788 #if !defined NOUTF8
1789 if (use_utf8 && c > 255)
1790 {
1791 unsigned char buff8[8];
1792 int ii, utn;
1793 utn = ord2utf8(c, buff8);
1794 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1795 c = buff8[ii]; /* Last byte */
1796 }
1797 #endif
1798 break;
1799
1800 case 'x':
1801
1802 /* Handle \x{..} specially - new Perl thing for utf8 */
1803
1804 #if !defined NOUTF8
1805 if (*p == '{')
1806 {
1807 unsigned char *pt = p;
1808 c = 0;
1809 while (isxdigit(*(++pt)))
1810 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1811 if (*pt == '}')
1812 {
1813 unsigned char buff8[8];
1814 int ii, utn;
1815 if (use_utf8)
1816 {
1817 utn = ord2utf8(c, buff8);
1818 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1819 c = buff8[ii]; /* Last byte */
1820 }
1821 else
1822 {
1823 if (c > 255)
1824 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1825 "UTF-8 mode is not enabled.\n"
1826 "** Truncation will probably give the wrong result.\n", c);
1827 }
1828 p = pt + 1;
1829 break;
1830 }
1831 /* Not correct form; fall through */
1832 }
1833 #endif
1834
1835 /* Ordinary \x */
1836
1837 c = 0;
1838 while (i++ < 2 && isxdigit(*p))
1839 {
1840 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1841 p++;
1842 }
1843 break;
1844
1845 case 0: /* \ followed by EOF allows for an empty line */
1846 p--;
1847 continue;
1848
1849 case '>':
1850 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1851 continue;
1852
1853 case 'A': /* Option setting */
1854 options |= PCRE_ANCHORED;
1855 continue;
1856
1857 case 'B':
1858 options |= PCRE_NOTBOL;
1859 continue;
1860
1861 case 'C':
1862 if (isdigit(*p)) /* Set copy string */
1863 {
1864 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1865 copystrings |= 1 << n;
1866 }
1867 else if (isalnum(*p))
1868 {
1869 uschar *npp = copynamesptr;
1870 while (isalnum(*p)) *npp++ = *p++;
1871 *npp++ = 0;
1872 *npp = 0;
1873 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1874 if (n < 0)
1875 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1876 copynamesptr = npp;
1877 }
1878 else if (*p == '+')
1879 {
1880 callout_extra = 1;
1881 p++;
1882 }
1883 else if (*p == '-')
1884 {
1885 pcre_callout = NULL;
1886 p++;
1887 }
1888 else if (*p == '!')
1889 {
1890 callout_fail_id = 0;
1891 p++;
1892 while(isdigit(*p))
1893 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1894 callout_fail_count = 0;
1895 if (*p == '!')
1896 {
1897 p++;
1898 while(isdigit(*p))
1899 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1900 }
1901 }
1902 else if (*p == '*')
1903 {
1904 int sign = 1;
1905 callout_data = 0;
1906 if (*(++p) == '-') { sign = -1; p++; }
1907 while(isdigit(*p))
1908 callout_data = callout_data * 10 + *p++ - '0';
1909 callout_data *= sign;
1910 callout_data_set = 1;
1911 }
1912 continue;
1913
1914 #if !defined NODFA
1915 case 'D':
1916 #if !defined NOPOSIX
1917 if (posix || do_posix)
1918 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1919 else
1920 #endif
1921 use_dfa = 1;
1922 continue;
1923
1924 case 'F':
1925 options |= PCRE_DFA_SHORTEST;
1926 continue;
1927 #endif
1928
1929 case 'G':
1930 if (isdigit(*p))
1931 {
1932 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1933 getstrings |= 1 << n;
1934 }
1935 else if (isalnum(*p))
1936 {
1937 uschar *npp = getnamesptr;
1938 while (isalnum(*p)) *npp++ = *p++;
1939 *npp++ = 0;
1940 *npp = 0;
1941 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1942 if (n < 0)
1943 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1944 getnamesptr = npp;
1945 }
1946 continue;
1947
1948 case 'L':
1949 getlist = 1;
1950 continue;
1951
1952 case 'M':
1953 find_match_limit = 1;
1954 continue;
1955
1956 case 'N':
1957 options |= PCRE_NOTEMPTY;
1958 continue;
1959
1960 case 'O':
1961 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1962 if (n > size_offsets_max)
1963 {
1964 size_offsets_max = n;
1965 free(offsets);
1966 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1967 if (offsets == NULL)
1968 {
1969 printf("** Failed to get %d bytes of memory for offsets vector\n",
1970 (int)(size_offsets_max * sizeof(int)));
1971 yield = 1;
1972 goto EXIT;
1973 }
1974 }
1975 use_size_offsets = n;
1976 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1977 continue;
1978
1979 case 'P':
1980 options |= PCRE_PARTIAL;
1981 continue;
1982
1983 case 'Q':
1984 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1985 if (extra == NULL)
1986 {
1987 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1988 extra->flags = 0;
1989 }
1990 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1991 extra->match_limit_recursion = n;
1992 continue;
1993
1994 case 'q':
1995 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1996 if (extra == NULL)
1997 {
1998 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1999 extra->flags = 0;
2000 }
2001 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2002 extra->match_limit = n;
2003 continue;
2004
2005 #if !defined NODFA
2006 case 'R':
2007 options |= PCRE_DFA_RESTART;
2008 continue;
2009 #endif
2010
2011 case 'S':
2012 show_malloc = 1;
2013 continue;
2014
2015 case 'Y':
2016 options |= PCRE_NO_START_OPTIMIZE;
2017 continue;
2018
2019 case 'Z':
2020 options |= PCRE_NOTEOL;
2021 continue;
2022
2023 case '?':
2024 options |= PCRE_NO_UTF8_CHECK;
2025 continue;
2026
2027 case '<':
2028 {
2029 int x = check_newline(p, outfile);
2030 if (x == 0) goto NEXT_DATA;
2031 options |= x;
2032 while (*p++ != '>');
2033 }
2034 continue;
2035 }
2036 *q++ = c;
2037 }
2038 *q = 0;
2039 len = q - dbuffer;
2040
2041 /* Move the data to the end of the buffer so that a read over the end of
2042 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2043 we are using the POSIX interface, we must include the terminating zero. */
2044
2045 #if !defined NOPOSIX
2046 if (posix || do_posix)
2047 {
2048 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2049 bptr += buffer_size - len - 1;
2050 }
2051 else
2052 #endif
2053 {
2054 memmove(bptr + buffer_size - len, bptr, len);
2055 bptr += buffer_size - len;
2056 }
2057
2058 if ((all_use_dfa || use_dfa) && find_match_limit)
2059 {
2060 printf("**Match limit not relevant for DFA matching: ignored\n");
2061 find_match_limit = 0;
2062 }
2063
2064 /* Handle matching via the POSIX interface, which does not
2065 support timing or playing with the match limit or callout data. */
2066
2067 #if !defined NOPOSIX
2068 if (posix || do_posix)
2069 {
2070 int rc;
2071 int eflags = 0;
2072 regmatch_t *pmatch = NULL;
2073 if (use_size_offsets > 0)
2074 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2075 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2076 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2077 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2078
2079 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2080
2081 if (rc != 0)
2082 {
2083 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2084 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2085 }
2086 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2087 != 0)
2088 {
2089 fprintf(outfile, "Matched with REG_NOSUB\n");
2090 }
2091 else
2092 {
2093 size_t i;
2094 for (i = 0; i < (size_t)use_size_offsets; i++)
2095 {
2096 if (pmatch[i].rm_so >= 0)
2097 {
2098 fprintf(outfile, "%2d: ", (int)i);
2099 (void)pchars(dbuffer + pmatch[i].rm_so,
2100 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2101 fprintf(outfile, "\n");
2102 if (i == 0 && do_showrest)
2103 {
2104 fprintf(outfile, " 0+ ");
2105 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2106 outfile);
2107 fprintf(outfile, "\n");
2108 }
2109 }
2110 }
2111 }
2112 free(pmatch);
2113 }
2114
2115 /* Handle matching via the native interface - repeats for /g and /G */
2116
2117 else
2118 #endif /* !defined NOPOSIX */
2119
2120 for (;; gmatched++) /* Loop for /g or /G */
2121 {
2122 if (timeitm > 0)
2123 {
2124 register int i;
2125 clock_t time_taken;
2126 clock_t start_time = clock();
2127
2128 #if !defined NODFA
2129 if (all_use_dfa || use_dfa)
2130 {
2131 int workspace[1000];
2132 for (i = 0; i < timeitm; i++)
2133 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2134 options | g_notempty, use_offsets, use_size_offsets, workspace,
2135 sizeof(workspace)/sizeof(int));
2136 }
2137 else
2138 #endif
2139
2140 for (i = 0; i < timeitm; i++)
2141 count = pcre_exec(re, extra, (char *)bptr, len,
2142 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2143
2144 time_taken = clock() - start_time;
2145 fprintf(outfile, "Execute time %.4f milliseconds\n",
2146 (((double)time_taken * 1000.0) / (double)timeitm) /
2147 (double)CLOCKS_PER_SEC);
2148 }
2149
2150 /* If find_match_limit is set, we want to do repeated matches with
2151 varying limits in order to find the minimum value for the match limit and
2152 for the recursion limit. */
2153
2154 if (find_match_limit)
2155 {
2156 if (extra == NULL)
2157 {
2158 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2159 extra->flags = 0;
2160 }
2161
2162 (void)check_match_limit(re, extra, bptr, len, start_offset,
2163 options|g_notempty, use_offsets, use_size_offsets,
2164 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2165 PCRE_ERROR_MATCHLIMIT, "match()");
2166
2167 count = check_match_limit(re, extra, bptr, len, start_offset,
2168 options|g_notempty, use_offsets, use_size_offsets,
2169 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2170 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2171 }
2172
2173 /* If callout_data is set, use the interface with additional data */
2174
2175 else if (callout_data_set)
2176 {
2177 if (extra == NULL)
2178 {
2179 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2180 extra->flags = 0;
2181 }
2182 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2183 extra->callout_data = &callout_data;
2184 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2185 options | g_notempty, use_offsets, use_size_offsets);
2186 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2187 }
2188
2189 /* The normal case is just to do the match once, with the default
2190 value of match_limit. */
2191
2192 #if !defined NODFA
2193 else if (all_use_dfa || use_dfa)
2194 {
2195 int workspace[1000];
2196 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2197 options | g_notempty, use_offsets, use_size_offsets, workspace,
2198 sizeof(workspace)/sizeof(int));
2199 if (count == 0)
2200 {
2201 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2202 count = use_size_offsets/2;
2203 }
2204 }
2205 #endif
2206
2207 else
2208 {
2209 count = pcre_exec(re, extra, (char *)bptr, len,
2210 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2211 if (count == 0)
2212 {
2213 fprintf(outfile, "Matched, but too many substrings\n");
2214 count = use_size_offsets/3;
2215 }
2216 }
2217
2218 /* Matched */
2219
2220 if (count >= 0)
2221 {
2222 int i, maxcount;
2223
2224 #if !defined NODFA
2225 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2226 #endif
2227 maxcount = use_size_offsets/3;
2228
2229 /* This is a check against a lunatic return value. */
2230
2231 if (count > maxcount)
2232 {
2233 fprintf(outfile,
2234 "** PCRE error: returned count %d is too big for offset size %d\n",
2235 count, use_size_offsets);
2236 count = use_size_offsets/3;
2237 if (do_g || do_G)
2238 {
2239 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2240 do_g = do_G = FALSE; /* Break g/G loop */
2241 }
2242 }
2243
2244 for (i = 0; i < count * 2; i += 2)
2245 {
2246 if (use_offsets[i] < 0)
2247 fprintf(outfile, "%2d: <unset>\n", i/2);
2248 else
2249 {
2250 fprintf(outfile, "%2d: ", i/2);
2251 (void)pchars(bptr + use_offsets[i],
2252 use_offsets[i+1] - use_offsets[i], outfile);
2253 fprintf(outfile, "\n");
2254 if (i == 0)
2255 {
2256 if (do_showrest)
2257 {
2258 fprintf(outfile, " 0+ ");
2259 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2260 outfile);
2261 fprintf(outfile, "\n");
2262 }
2263 }
2264 }
2265 }
2266
2267 for (i = 0; i < 32; i++)
2268 {
2269 if ((copystrings & (1 << i)) != 0)
2270 {
2271 char copybuffer[256];
2272 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2273 i, copybuffer, sizeof(copybuffer));
2274 if (rc < 0)
2275 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2276 else
2277 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2278 }
2279 }
2280
2281 for (copynamesptr = copynames;
2282 *copynamesptr != 0;
2283 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2284 {
2285 char copybuffer[256];
2286 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2287 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2288 if (rc < 0)
2289 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2290 else
2291 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2292 }
2293
2294 for (i = 0; i < 32; i++)
2295 {
2296 if ((getstrings & (1 << i)) != 0)
2297 {
2298 const char *substring;
2299 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2300 i, &substring);
2301 if (rc < 0)
2302 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2303 else
2304 {
2305 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2306 pcre_free_substring(substring);
2307 }
2308 }
2309 }
2310
2311 for (getnamesptr = getnames;
2312 *getnamesptr != 0;
2313 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2314 {
2315 const char *substring;
2316 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2317 count, (char *)getnamesptr, &substring);
2318 if (rc < 0)
2319 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2320 else
2321 {
2322 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2323 pcre_free_substring(substring);
2324 }
2325 }
2326
2327 if (getlist)
2328 {
2329 const char **stringlist;
2330 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2331 &stringlist);
2332 if (rc < 0)
2333 fprintf(outfile, "get substring list failed %d\n", rc);
2334 else
2335 {
2336 for (i = 0; i < count; i++)
2337 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2338 if (stringlist[i] != NULL)
2339 fprintf(outfile, "string list not terminated by NULL\n");
2340 /* free((void *)stringlist); */
2341 pcre_free_substring_list(stringlist);
2342 }
2343 }
2344 }
2345
2346 /* There was a partial match */
2347
2348 else if (count == PCRE_ERROR_PARTIAL)
2349 {
2350 fprintf(outfile, "Partial match");
2351 #if !defined NODFA
2352 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2353 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2354 bptr + use_offsets[0]);
2355 #endif
2356 fprintf(outfile, "\n");
2357 break; /* Out of the /g loop */
2358 }
2359
2360 /* Failed to match. If this is a /g or /G loop and we previously set
2361 g_notempty after a null match, this is not necessarily the end. We want
2362 to advance the start offset, and continue. We won't be at the end of the
2363 string - that was checked before setting g_notempty.
2364
2365 Complication arises in the case when the newline option is "any" or
2366 "anycrlf". If the previous match was at the end of a line terminated by
2367 CRLF, an advance of one character just passes the \r, whereas we should
2368 prefer the longer newline sequence, as does the code in pcre_exec().
2369 Fudge the offset value to achieve this.
2370
2371 Otherwise, in the case of UTF-8 matching, the advance must be one
2372 character, not one byte. */
2373
2374 else
2375 {
2376 if (g_notempty != 0)
2377 {
2378 int onechar = 1;
2379 unsigned int obits = ((real_pcre *)re)->options;
2380 use_offsets[0] = start_offset;
2381 if ((obits & PCRE_NEWLINE_BITS) == 0)
2382 {
2383 int d;
2384 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2385 /* Note that these values are always the ASCII ones, even in
2386 EBCDIC environments. CR = 13, NL = 10. */
2387 obits = (d == 13)? PCRE_NEWLINE_CR :
2388 (d == 10)? PCRE_NEWLINE_LF :
2389 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2390 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2391 (d == -1)? PCRE_NEWLINE_ANY : 0;
2392 }
2393 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2394 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2395 &&
2396 start_offset < len - 1 &&
2397 bptr[start_offset] == '\r' &&
2398 bptr[start_offset+1] == '\n')
2399 onechar++;
2400 else if (use_utf8)
2401 {
2402 while (start_offset + onechar < len)
2403 {
2404 int tb = bptr[start_offset+onechar];
2405 if (tb <= 127) break;
2406 tb &= 0xc0;
2407 if (tb != 0 && tb != 0xc0) onechar++;
2408 }
2409 }
2410 use_offsets[1] = start_offset + onechar;
2411 }
2412 else
2413 {
2414 if (count == PCRE_ERROR_NOMATCH)
2415 {
2416 if (gmatched == 0) fprintf(outfile, "No match\n");
2417 }
2418 else fprintf(outfile, "Error %d\n", count);
2419 break; /* Out of the /g loop */
2420 }
2421 }
2422
2423 /* If not /g or /G we are done */
2424
2425 if (!do_g && !do_G) break;
2426
2427 /* If we have matched an empty string, first check to see if we are at
2428 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2429 what Perl's /g options does. This turns out to be rather cunning. First
2430 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2431 same point. If this fails (picked up above) we advance to the next
2432 character. */
2433
2434 g_notempty = 0;
2435
2436 if (use_offsets[0] == use_offsets[1])
2437 {
2438 if (use_offsets[0] == len) break;
2439 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2440 }
2441
2442 /* For /g, update the start offset, leaving the rest alone */
2443
2444 if (do_g) start_offset = use_offsets[1];
2445
2446 /* For /G, update the pointer and length */
2447
2448 else
2449 {
2450 bptr += use_offsets[1];
2451 len -= use_offsets[1];
2452 }
2453 } /* End of loop for /g and /G */
2454
2455 NEXT_DATA: continue;
2456 } /* End of loop for data lines */
2457
2458 CONTINUE:
2459
2460 #if !defined NOPOSIX
2461 if (posix || do_posix) regfree(&preg);
2462 #endif
2463
2464 if (re != NULL) new_free(re);
2465 if (extra != NULL) new_free(extra);
2466 if (tables != NULL)
2467 {
2468 new_free((void *)tables);
2469 setlocale(LC_CTYPE, "C");
2470 locale_set = 0;
2471 }
2472 }
2473
2474 if (infile == stdin) fprintf(outfile, "\n");
2475
2476 EXIT:
2477
2478 if (infile != NULL && infile != stdin) fclose(infile);
2479 if (outfile != NULL && outfile != stdout) fclose(outfile);
2480
2481 free(buffer);
2482 free(dbuffer);
2483 free(pbuffer);
2484 free(offsets);
2485
2486 return yield;
2487 }
2488
2489 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12