/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 507 - (show annotations) (download)
Wed Mar 10 16:08:01 2010 UTC (4 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 74179 byte(s)
Tidies for 8.02-RC1 release.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 #else
83 #include <sys/time.h> /* These two includes are needed */
84 #include <sys/resource.h> /* for setrlimit(). */
85 #define INPUT_MODE "rb"
86 #define OUTPUT_MODE "wb"
87 #endif
88
89
90 /* We have to include pcre_internal.h because we need the internal info for
91 displaying the results of pcre_study() and we also need to know about the
92 internal macros, structures, and other internal data values; pcretest has
93 "inside information" compared to a program that strictly follows the PCRE API.
94
95 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97 appropriately for an application, not for building PCRE. */
98
99 #include "pcre.h"
100 #include "pcre_internal.h"
101
102 /* We need access to some of the data tables that PCRE uses. So as not to have
103 to keep two copies, we include the source file here, changing the names of the
104 external symbols to prevent clashes. */
105
106 #define _pcre_ucp_gentype ucp_gentype
107 #define _pcre_utf8_table1 utf8_table1
108 #define _pcre_utf8_table1_size utf8_table1_size
109 #define _pcre_utf8_table2 utf8_table2
110 #define _pcre_utf8_table3 utf8_table3
111 #define _pcre_utf8_table4 utf8_table4
112 #define _pcre_utt utt
113 #define _pcre_utt_size utt_size
114 #define _pcre_utt_names utt_names
115 #define _pcre_OP_lengths OP_lengths
116
117 #include "pcre_tables.c"
118
119 /* We also need the pcre_printint() function for printing out compiled
120 patterns. This function is in a separate file so that it can be included in
121 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 know which case is being compiled. */
123
124 #define COMPILING_PCRETEST
125 #include "pcre_printint.src"
126
127 /* The definition of the macro PRINTABLE, which determines whether to print an
128 output character as-is or as a hex value when showing compiled patterns, is
129 contained in the printint.src file. We uses it here also, in cases when the
130 locale has not been explicitly changed, so as to get consistent output from
131 systems that differ in their output from isprint() even in the "C" locale. */
132
133 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134
135 /* It is possible to compile this test program without including support for
136 testing the POSIX interface, though this is not available via the standard
137 Makefile. */
138
139 #if !defined NOPOSIX
140 #include "pcreposix.h"
141 #endif
142
143 /* It is also possible, for the benefit of the version currently imported into
144 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145 interface to the DFA matcher (NODFA), and without the doublecheck of the old
146 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147 UTF8 support if PCRE is built without it. */
148
149 #ifndef SUPPORT_UTF8
150 #ifndef NOUTF8
151 #define NOUTF8
152 #endif
153 #endif
154
155
156 /* Other parameters */
157
158 #ifndef CLOCKS_PER_SEC
159 #ifdef CLK_TCK
160 #define CLOCKS_PER_SEC CLK_TCK
161 #else
162 #define CLOCKS_PER_SEC 100
163 #endif
164 #endif
165
166 /* This is the default loop count for timing. */
167
168 #define LOOPREPEAT 500000
169
170 /* Static variables */
171
172 static FILE *outfile;
173 static int log_store = 0;
174 static int callout_count;
175 static int callout_extra;
176 static int callout_fail_count;
177 static int callout_fail_id;
178 static int debug_lengths;
179 static int first_callout;
180 static int locale_set = 0;
181 static int show_malloc;
182 static int use_utf8;
183 static size_t gotten_store;
184
185 /* The buffers grow automatically if very long input lines are encountered. */
186
187 static int buffer_size = 50000;
188 static uschar *buffer = NULL;
189 static uschar *dbuffer = NULL;
190 static uschar *pbuffer = NULL;
191
192
193
194 /*************************************************
195 * Read or extend an input line *
196 *************************************************/
197
198 /* Input lines are read into buffer, but both patterns and data lines can be
199 continued over multiple input lines. In addition, if the buffer fills up, we
200 want to automatically expand it so as to be able to handle extremely large
201 lines that are needed for certain stress tests. When the input buffer is
202 expanded, the other two buffers must also be expanded likewise, and the
203 contents of pbuffer, which are a copy of the input for callouts, must be
204 preserved (for when expansion happens for a data line). This is not the most
205 optimal way of handling this, but hey, this is just a test program!
206
207 Arguments:
208 f the file to read
209 start where in buffer to start (this *must* be within buffer)
210 prompt for stdin or readline()
211
212 Returns: pointer to the start of new data
213 could be a copy of start, or could be moved
214 NULL if no data read and EOF reached
215 */
216
217 static uschar *
218 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 {
220 uschar *here = start;
221
222 for (;;)
223 {
224 int rlen = buffer_size - (here - buffer);
225
226 if (rlen > 1000)
227 {
228 int dlen;
229
230 /* If libreadline support is required, use readline() to read a line if the
231 input is a terminal. Note that readline() removes the trailing newline, so
232 we must put it back again, to be compatible with fgets(). */
233
234 #ifdef SUPPORT_LIBREADLINE
235 if (isatty(fileno(f)))
236 {
237 size_t len;
238 char *s = readline(prompt);
239 if (s == NULL) return (here == start)? NULL : start;
240 len = strlen(s);
241 if (len > 0) add_history(s);
242 if (len > rlen - 1) len = rlen - 1;
243 memcpy(here, s, len);
244 here[len] = '\n';
245 here[len+1] = 0;
246 free(s);
247 }
248 else
249 #endif
250
251 /* Read the next line by normal means, prompting if the file is stdin. */
252
253 {
254 if (f == stdin) printf(prompt);
255 if (fgets((char *)here, rlen, f) == NULL)
256 return (here == start)? NULL : start;
257 }
258
259 dlen = (int)strlen((char *)here);
260 if (dlen > 0 && here[dlen - 1] == '\n') return start;
261 here += dlen;
262 }
263
264 else
265 {
266 int new_buffer_size = 2*buffer_size;
267 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270
271 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272 {
273 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274 exit(1);
275 }
276
277 memcpy(new_buffer, buffer, buffer_size);
278 memcpy(new_pbuffer, pbuffer, buffer_size);
279
280 buffer_size = new_buffer_size;
281
282 start = new_buffer + (start - buffer);
283 here = new_buffer + (here - buffer);
284
285 free(buffer);
286 free(dbuffer);
287 free(pbuffer);
288
289 buffer = new_buffer;
290 dbuffer = new_dbuffer;
291 pbuffer = new_pbuffer;
292 }
293 }
294
295 return NULL; /* Control never gets here */
296 }
297
298
299
300
301
302
303
304 /*************************************************
305 * Read number from string *
306 *************************************************/
307
308 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309 around with conditional compilation, just do the job by hand. It is only used
310 for unpicking arguments, so just keep it simple.
311
312 Arguments:
313 str string to be converted
314 endptr where to put the end pointer
315
316 Returns: the unsigned long
317 */
318
319 static int
320 get_value(unsigned char *str, unsigned char **endptr)
321 {
322 int result = 0;
323 while(*str != 0 && isspace(*str)) str++;
324 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325 *endptr = str;
326 return(result);
327 }
328
329
330
331
332 /*************************************************
333 * Convert UTF-8 string to value *
334 *************************************************/
335
336 /* This function takes one or more bytes that represents a UTF-8 character,
337 and returns the value of the character.
338
339 Argument:
340 utf8bytes a pointer to the byte vector
341 vptr a pointer to an int to receive the value
342
343 Returns: > 0 => the number of bytes consumed
344 -6 to 0 => malformed UTF-8 character at offset = (-return)
345 */
346
347 #if !defined NOUTF8
348
349 static int
350 utf82ord(unsigned char *utf8bytes, int *vptr)
351 {
352 int c = *utf8bytes++;
353 int d = c;
354 int i, j, s;
355
356 for (i = -1; i < 6; i++) /* i is number of additional bytes */
357 {
358 if ((d & 0x80) == 0) break;
359 d <<= 1;
360 }
361
362 if (i == -1) { *vptr = c; return 1; } /* ascii character */
363 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364
365 /* i now has a value in the range 1-5 */
366
367 s = 6*i;
368 d = (c & utf8_table3[i]) << s;
369
370 for (j = 0; j < i; j++)
371 {
372 c = *utf8bytes++;
373 if ((c & 0xc0) != 0x80) return -(j+1);
374 s -= 6;
375 d |= (c & 0x3f) << s;
376 }
377
378 /* Check that encoding was the correct unique one */
379
380 for (j = 0; j < utf8_table1_size; j++)
381 if (d <= utf8_table1[j]) break;
382 if (j != i) return -(i+1);
383
384 /* Valid value */
385
386 *vptr = d;
387 return i+1;
388 }
389
390 #endif
391
392
393
394 /*************************************************
395 * Convert character value to UTF-8 *
396 *************************************************/
397
398 /* This function takes an integer value in the range 0 - 0x7fffffff
399 and encodes it as a UTF-8 character in 0 to 6 bytes.
400
401 Arguments:
402 cvalue the character value
403 utf8bytes pointer to buffer for result - at least 6 bytes long
404
405 Returns: number of characters placed in the buffer
406 */
407
408 #if !defined NOUTF8
409
410 static int
411 ord2utf8(int cvalue, uschar *utf8bytes)
412 {
413 register int i, j;
414 for (i = 0; i < utf8_table1_size; i++)
415 if (cvalue <= utf8_table1[i]) break;
416 utf8bytes += i;
417 for (j = i; j > 0; j--)
418 {
419 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 cvalue >>= 6;
421 }
422 *utf8bytes = utf8_table2[i] | cvalue;
423 return i + 1;
424 }
425
426 #endif
427
428
429
430 /*************************************************
431 * Print character string *
432 *************************************************/
433
434 /* Character string printing function. Must handle UTF-8 strings in utf8
435 mode. Yields number of characters printed. If handed a NULL file, just counts
436 chars without printing. */
437
438 static int pchars(unsigned char *p, int length, FILE *f)
439 {
440 int c = 0;
441 int yield = 0;
442
443 while (length-- > 0)
444 {
445 #if !defined NOUTF8
446 if (use_utf8)
447 {
448 int rc = utf82ord(p, &c);
449
450 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451 {
452 length -= rc - 1;
453 p += rc;
454 if (PRINTHEX(c))
455 {
456 if (f != NULL) fprintf(f, "%c", c);
457 yield++;
458 }
459 else
460 {
461 int n = 4;
462 if (f != NULL) fprintf(f, "\\x{%02x}", c);
463 yield += (n <= 0x000000ff)? 2 :
464 (n <= 0x00000fff)? 3 :
465 (n <= 0x0000ffff)? 4 :
466 (n <= 0x000fffff)? 5 : 6;
467 }
468 continue;
469 }
470 }
471 #endif
472
473 /* Not UTF-8, or malformed UTF-8 */
474
475 c = *p++;
476 if (PRINTHEX(c))
477 {
478 if (f != NULL) fprintf(f, "%c", c);
479 yield++;
480 }
481 else
482 {
483 if (f != NULL) fprintf(f, "\\x%02x", c);
484 yield += 4;
485 }
486 }
487
488 return yield;
489 }
490
491
492
493 /*************************************************
494 * Callout function *
495 *************************************************/
496
497 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498 the match. Yield zero unless more callouts than the fail count, or the callout
499 data is not zero. */
500
501 static int callout(pcre_callout_block *cb)
502 {
503 FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 int i, pre_start, post_start, subject_length;
505
506 if (callout_extra)
507 {
508 fprintf(f, "Callout %d: last capture = %d\n",
509 cb->callout_number, cb->capture_last);
510
511 for (i = 0; i < cb->capture_top * 2; i += 2)
512 {
513 if (cb->offset_vector[i] < 0)
514 fprintf(f, "%2d: <unset>\n", i/2);
515 else
516 {
517 fprintf(f, "%2d: ", i/2);
518 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519 cb->offset_vector[i+1] - cb->offset_vector[i], f);
520 fprintf(f, "\n");
521 }
522 }
523 }
524
525 /* Re-print the subject in canonical form, the first time or if giving full
526 datails. On subsequent calls in the same match, we use pchars just to find the
527 printed lengths of the substrings. */
528
529 if (f != NULL) fprintf(f, "--->");
530
531 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533 cb->current_position - cb->start_match, f);
534
535 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536
537 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538 cb->subject_length - cb->current_position, f);
539
540 if (f != NULL) fprintf(f, "\n");
541
542 /* Always print appropriate indicators, with callout number if not already
543 shown. For automatic callouts, show the pattern offset. */
544
545 if (cb->callout_number == 255)
546 {
547 fprintf(outfile, "%+3d ", cb->pattern_position);
548 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549 }
550 else
551 {
552 if (callout_extra) fprintf(outfile, " ");
553 else fprintf(outfile, "%3d ", cb->callout_number);
554 }
555
556 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557 fprintf(outfile, "^");
558
559 if (post_start > 0)
560 {
561 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562 fprintf(outfile, "^");
563 }
564
565 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566 fprintf(outfile, " ");
567
568 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569 pbuffer + cb->pattern_position);
570
571 fprintf(outfile, "\n");
572 first_callout = 0;
573
574 if (cb->callout_data != NULL)
575 {
576 int callout_data = *((int *)(cb->callout_data));
577 if (callout_data != 0)
578 {
579 fprintf(outfile, "Callout data = %d\n", callout_data);
580 return callout_data;
581 }
582 }
583
584 return (cb->callout_number != callout_fail_id)? 0 :
585 (++callout_count >= callout_fail_count)? 1 : 0;
586 }
587
588
589 /*************************************************
590 * Local malloc functions *
591 *************************************************/
592
593 /* Alternative malloc function, to test functionality and show the size of the
594 compiled re. */
595
596 static void *new_malloc(size_t size)
597 {
598 void *block = malloc(size);
599 gotten_store = size;
600 if (show_malloc)
601 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 return block;
603 }
604
605 static void new_free(void *block)
606 {
607 if (show_malloc)
608 fprintf(outfile, "free %p\n", block);
609 free(block);
610 }
611
612
613 /* For recursion malloc/free, to test stacking calls */
614
615 static void *stack_malloc(size_t size)
616 {
617 void *block = malloc(size);
618 if (show_malloc)
619 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 return block;
621 }
622
623 static void stack_free(void *block)
624 {
625 if (show_malloc)
626 fprintf(outfile, "stack_free %p\n", block);
627 free(block);
628 }
629
630
631 /*************************************************
632 * Call pcre_fullinfo() *
633 *************************************************/
634
635 /* Get one piece of information from the pcre_fullinfo() function */
636
637 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638 {
639 int rc;
640 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642 }
643
644
645
646 /*************************************************
647 * Byte flipping function *
648 *************************************************/
649
650 static unsigned long int
651 byteflip(unsigned long int value, int n)
652 {
653 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654 return ((value & 0x000000ff) << 24) |
655 ((value & 0x0000ff00) << 8) |
656 ((value & 0x00ff0000) >> 8) |
657 ((value & 0xff000000) >> 24);
658 }
659
660
661
662
663 /*************************************************
664 * Check match or recursion limit *
665 *************************************************/
666
667 static int
668 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669 int start_offset, int options, int *use_offsets, int use_size_offsets,
670 int flag, unsigned long int *limit, int errnumber, const char *msg)
671 {
672 int count;
673 int min = 0;
674 int mid = 64;
675 int max = -1;
676
677 extra->flags |= flag;
678
679 for (;;)
680 {
681 *limit = mid;
682
683 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684 use_offsets, use_size_offsets);
685
686 if (count == errnumber)
687 {
688 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689 min = mid;
690 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691 }
692
693 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694 count == PCRE_ERROR_PARTIAL)
695 {
696 if (mid == min + 1)
697 {
698 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699 break;
700 }
701 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702 max = mid;
703 mid = (min + mid)/2;
704 }
705 else break; /* Some other error */
706 }
707
708 extra->flags &= ~flag;
709 return count;
710 }
711
712
713
714 /*************************************************
715 * Case-independent strncmp() function *
716 *************************************************/
717
718 /*
719 Arguments:
720 s first string
721 t second string
722 n number of characters to compare
723
724 Returns: < 0, = 0, or > 0, according to the comparison
725 */
726
727 static int
728 strncmpic(uschar *s, uschar *t, int n)
729 {
730 while (n--)
731 {
732 int c = tolower(*s++) - tolower(*t++);
733 if (c) return c;
734 }
735 return 0;
736 }
737
738
739
740 /*************************************************
741 * Check newline indicator *
742 *************************************************/
743
744 /* This is used both at compile and run-time to check for <xxx> escapes, where
745 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
746 no match.
747
748 Arguments:
749 p points after the leading '<'
750 f file for error message
751
752 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
753 */
754
755 static int
756 check_newline(uschar *p, FILE *f)
757 {
758 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
759 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
760 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
761 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
762 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
763 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
764 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
765 fprintf(f, "Unknown newline type at: <%s\n", p);
766 return 0;
767 }
768
769
770
771 /*************************************************
772 * Usage function *
773 *************************************************/
774
775 static void
776 usage(void)
777 {
778 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
779 printf("Input and output default to stdin and stdout.\n");
780 #ifdef SUPPORT_LIBREADLINE
781 printf("If input is a terminal, readline() is used to read from it.\n");
782 #else
783 printf("This version of pcretest is not linked with readline().\n");
784 #endif
785 printf("\nOptions:\n");
786 printf(" -b show compiled code (bytecode)\n");
787 printf(" -C show PCRE compile-time options and exit\n");
788 printf(" -d debug: show compiled code and information (-b and -i)\n");
789 #if !defined NODFA
790 printf(" -dfa force DFA matching for all subjects\n");
791 #endif
792 printf(" -help show usage information\n");
793 printf(" -i show information about compiled patterns\n"
794 " -M find MATCH_LIMIT minimum for each subject\n"
795 " -m output memory used information\n"
796 " -o <n> set size of offsets vector to <n>\n");
797 #if !defined NOPOSIX
798 printf(" -p use POSIX interface\n");
799 #endif
800 printf(" -q quiet: do not output PCRE version number at start\n");
801 printf(" -S <n> set stack size to <n> megabytes\n");
802 printf(" -s output store (memory) used information\n"
803 " -t time compilation and execution\n");
804 printf(" -t <n> time compilation and execution, repeating <n> times\n");
805 printf(" -tm time execution (matching) only\n");
806 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
807 }
808
809
810
811 /*************************************************
812 * Main Program *
813 *************************************************/
814
815 /* Read lines from named file or stdin and write to named file or stdout; lines
816 consist of a regular expression, in delimiters and optionally followed by
817 options, followed by a set of test data, terminated by an empty line. */
818
819 int main(int argc, char **argv)
820 {
821 FILE *infile = stdin;
822 int options = 0;
823 int study_options = 0;
824 int default_find_match_limit = FALSE;
825 int op = 1;
826 int timeit = 0;
827 int timeitm = 0;
828 int showinfo = 0;
829 int showstore = 0;
830 int quiet = 0;
831 int size_offsets = 45;
832 int size_offsets_max;
833 int *offsets = NULL;
834 #if !defined NOPOSIX
835 int posix = 0;
836 #endif
837 int debug = 0;
838 int done = 0;
839 int all_use_dfa = 0;
840 int yield = 0;
841 int stack_size;
842
843 /* These vectors store, end-to-end, a list of captured substring names. Assume
844 that 1024 is plenty long enough for the few names we'll be testing. */
845
846 uschar copynames[1024];
847 uschar getnames[1024];
848
849 uschar *copynamesptr;
850 uschar *getnamesptr;
851
852 /* Get buffers from malloc() so that Electric Fence will check their misuse
853 when I am debugging. They grow automatically when very long lines are read. */
854
855 buffer = (unsigned char *)malloc(buffer_size);
856 dbuffer = (unsigned char *)malloc(buffer_size);
857 pbuffer = (unsigned char *)malloc(buffer_size);
858
859 /* The outfile variable is static so that new_malloc can use it. */
860
861 outfile = stdout;
862
863 /* The following _setmode() stuff is some Windows magic that tells its runtime
864 library to translate CRLF into a single LF character. At least, that's what
865 I've been told: never having used Windows I take this all on trust. Originally
866 it set 0x8000, but then I was advised that _O_BINARY was better. */
867
868 #if defined(_WIN32) || defined(WIN32)
869 _setmode( _fileno( stdout ), _O_BINARY );
870 #endif
871
872 /* Scan options */
873
874 while (argc > 1 && argv[op][0] == '-')
875 {
876 unsigned char *endptr;
877
878 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
879 showstore = 1;
880 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
881 else if (strcmp(argv[op], "-b") == 0) debug = 1;
882 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
883 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
884 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
885 #if !defined NODFA
886 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
887 #endif
888 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
889 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
890 *endptr == 0))
891 {
892 op++;
893 argc--;
894 }
895 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
896 {
897 int both = argv[op][2] == 0;
898 int temp;
899 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
900 *endptr == 0))
901 {
902 timeitm = temp;
903 op++;
904 argc--;
905 }
906 else timeitm = LOOPREPEAT;
907 if (both) timeit = timeitm;
908 }
909 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
910 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
911 *endptr == 0))
912 {
913 #if defined(_WIN32) || defined(WIN32)
914 printf("PCRE: -S not supported on this OS\n");
915 exit(1);
916 #else
917 int rc;
918 struct rlimit rlim;
919 getrlimit(RLIMIT_STACK, &rlim);
920 rlim.rlim_cur = stack_size * 1024 * 1024;
921 rc = setrlimit(RLIMIT_STACK, &rlim);
922 if (rc != 0)
923 {
924 printf("PCRE: setrlimit() failed with error %d\n", rc);
925 exit(1);
926 }
927 op++;
928 argc--;
929 #endif
930 }
931 #if !defined NOPOSIX
932 else if (strcmp(argv[op], "-p") == 0) posix = 1;
933 #endif
934 else if (strcmp(argv[op], "-C") == 0)
935 {
936 int rc;
937 unsigned long int lrc;
938 printf("PCRE version %s\n", pcre_version());
939 printf("Compiled with\n");
940 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
941 printf(" %sUTF-8 support\n", rc? "" : "No ");
942 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
943 printf(" %sUnicode properties support\n", rc? "" : "No ");
944 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
945 /* Note that these values are always the ASCII values, even
946 in EBCDIC environments. CR is 13 and NL is 10. */
947 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
948 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
949 (rc == -2)? "ANYCRLF" :
950 (rc == -1)? "ANY" : "???");
951 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
952 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
953 "all Unicode newlines");
954 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
955 printf(" Internal link size = %d\n", rc);
956 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
957 printf(" POSIX malloc threshold = %d\n", rc);
958 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
959 printf(" Default match limit = %ld\n", lrc);
960 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
961 printf(" Default recursion depth limit = %ld\n", lrc);
962 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
963 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
964 goto EXIT;
965 }
966 else if (strcmp(argv[op], "-help") == 0 ||
967 strcmp(argv[op], "--help") == 0)
968 {
969 usage();
970 goto EXIT;
971 }
972 else
973 {
974 printf("** Unknown or malformed option %s\n", argv[op]);
975 usage();
976 yield = 1;
977 goto EXIT;
978 }
979 op++;
980 argc--;
981 }
982
983 /* Get the store for the offsets vector, and remember what it was */
984
985 size_offsets_max = size_offsets;
986 offsets = (int *)malloc(size_offsets_max * sizeof(int));
987 if (offsets == NULL)
988 {
989 printf("** Failed to get %d bytes of memory for offsets vector\n",
990 (int)(size_offsets_max * sizeof(int)));
991 yield = 1;
992 goto EXIT;
993 }
994
995 /* Sort out the input and output files */
996
997 if (argc > 1)
998 {
999 infile = fopen(argv[op], INPUT_MODE);
1000 if (infile == NULL)
1001 {
1002 printf("** Failed to open %s\n", argv[op]);
1003 yield = 1;
1004 goto EXIT;
1005 }
1006 }
1007
1008 if (argc > 2)
1009 {
1010 outfile = fopen(argv[op+1], OUTPUT_MODE);
1011 if (outfile == NULL)
1012 {
1013 printf("** Failed to open %s\n", argv[op+1]);
1014 yield = 1;
1015 goto EXIT;
1016 }
1017 }
1018
1019 /* Set alternative malloc function */
1020
1021 pcre_malloc = new_malloc;
1022 pcre_free = new_free;
1023 pcre_stack_malloc = stack_malloc;
1024 pcre_stack_free = stack_free;
1025
1026 /* Heading line unless quiet, then prompt for first regex if stdin */
1027
1028 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1029
1030 /* Main loop */
1031
1032 while (!done)
1033 {
1034 pcre *re = NULL;
1035 pcre_extra *extra = NULL;
1036
1037 #if !defined NOPOSIX /* There are still compilers that require no indent */
1038 regex_t preg;
1039 int do_posix = 0;
1040 #endif
1041
1042 const char *error;
1043 unsigned char *p, *pp, *ppp;
1044 unsigned char *to_file = NULL;
1045 const unsigned char *tables = NULL;
1046 unsigned long int true_size, true_study_size = 0;
1047 size_t size, regex_gotten_store;
1048 int do_study = 0;
1049 int do_debug = debug;
1050 int do_G = 0;
1051 int do_g = 0;
1052 int do_showinfo = showinfo;
1053 int do_showrest = 0;
1054 int do_flip = 0;
1055 int erroroffset, len, delimiter, poffset;
1056
1057 use_utf8 = 0;
1058 debug_lengths = 1;
1059
1060 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1061 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1062 fflush(outfile);
1063
1064 p = buffer;
1065 while (isspace(*p)) p++;
1066 if (*p == 0) continue;
1067
1068 /* See if the pattern is to be loaded pre-compiled from a file. */
1069
1070 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1071 {
1072 unsigned long int magic, get_options;
1073 uschar sbuf[8];
1074 FILE *f;
1075
1076 p++;
1077 pp = p + (int)strlen((char *)p);
1078 while (isspace(pp[-1])) pp--;
1079 *pp = 0;
1080
1081 f = fopen((char *)p, "rb");
1082 if (f == NULL)
1083 {
1084 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1085 continue;
1086 }
1087
1088 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1089
1090 true_size =
1091 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1092 true_study_size =
1093 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1094
1095 re = (real_pcre *)new_malloc(true_size);
1096 regex_gotten_store = gotten_store;
1097
1098 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1099
1100 magic = ((real_pcre *)re)->magic_number;
1101 if (magic != MAGIC_NUMBER)
1102 {
1103 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1104 {
1105 do_flip = 1;
1106 }
1107 else
1108 {
1109 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1110 fclose(f);
1111 continue;
1112 }
1113 }
1114
1115 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1116 do_flip? " (byte-inverted)" : "", p);
1117
1118 /* Need to know if UTF-8 for printing data strings */
1119
1120 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1121 use_utf8 = (get_options & PCRE_UTF8) != 0;
1122
1123 /* Now see if there is any following study data */
1124
1125 if (true_study_size != 0)
1126 {
1127 pcre_study_data *psd;
1128
1129 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1130 extra->flags = PCRE_EXTRA_STUDY_DATA;
1131
1132 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1133 extra->study_data = psd;
1134
1135 if (fread(psd, 1, true_study_size, f) != true_study_size)
1136 {
1137 FAIL_READ:
1138 fprintf(outfile, "Failed to read data from %s\n", p);
1139 if (extra != NULL) new_free(extra);
1140 if (re != NULL) new_free(re);
1141 fclose(f);
1142 continue;
1143 }
1144 fprintf(outfile, "Study data loaded from %s\n", p);
1145 do_study = 1; /* To get the data output if requested */
1146 }
1147 else fprintf(outfile, "No study data\n");
1148
1149 fclose(f);
1150 goto SHOW_INFO;
1151 }
1152
1153 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1154 the pattern; if is isn't complete, read more. */
1155
1156 delimiter = *p++;
1157
1158 if (isalnum(delimiter) || delimiter == '\\')
1159 {
1160 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1161 goto SKIP_DATA;
1162 }
1163
1164 pp = p;
1165 poffset = p - buffer;
1166
1167 for(;;)
1168 {
1169 while (*pp != 0)
1170 {
1171 if (*pp == '\\' && pp[1] != 0) pp++;
1172 else if (*pp == delimiter) break;
1173 pp++;
1174 }
1175 if (*pp != 0) break;
1176 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1177 {
1178 fprintf(outfile, "** Unexpected EOF\n");
1179 done = 1;
1180 goto CONTINUE;
1181 }
1182 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1183 }
1184
1185 /* The buffer may have moved while being extended; reset the start of data
1186 pointer to the correct relative point in the buffer. */
1187
1188 p = buffer + poffset;
1189
1190 /* If the first character after the delimiter is backslash, make
1191 the pattern end with backslash. This is purely to provide a way
1192 of testing for the error message when a pattern ends with backslash. */
1193
1194 if (pp[1] == '\\') *pp++ = '\\';
1195
1196 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1197 for callouts. */
1198
1199 *pp++ = 0;
1200 strcpy((char *)pbuffer, (char *)p);
1201
1202 /* Look for options after final delimiter */
1203
1204 options = 0;
1205 study_options = 0;
1206 log_store = showstore; /* default from command line */
1207
1208 while (*pp != 0)
1209 {
1210 switch (*pp++)
1211 {
1212 case 'f': options |= PCRE_FIRSTLINE; break;
1213 case 'g': do_g = 1; break;
1214 case 'i': options |= PCRE_CASELESS; break;
1215 case 'm': options |= PCRE_MULTILINE; break;
1216 case 's': options |= PCRE_DOTALL; break;
1217 case 'x': options |= PCRE_EXTENDED; break;
1218
1219 case '+': do_showrest = 1; break;
1220 case 'A': options |= PCRE_ANCHORED; break;
1221 case 'B': do_debug = 1; break;
1222 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1223 case 'D': do_debug = do_showinfo = 1; break;
1224 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1225 case 'F': do_flip = 1; break;
1226 case 'G': do_G = 1; break;
1227 case 'I': do_showinfo = 1; break;
1228 case 'J': options |= PCRE_DUPNAMES; break;
1229 case 'M': log_store = 1; break;
1230 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1231
1232 #if !defined NOPOSIX
1233 case 'P': do_posix = 1; break;
1234 #endif
1235
1236 case 'S': do_study = 1; break;
1237 case 'U': options |= PCRE_UNGREEDY; break;
1238 case 'X': options |= PCRE_EXTRA; break;
1239 case 'Z': debug_lengths = 0; break;
1240 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1241 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1242
1243 case 'L':
1244 ppp = pp;
1245 /* The '\r' test here is so that it works on Windows. */
1246 /* The '0' test is just in case this is an unterminated line. */
1247 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1248 *ppp = 0;
1249 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1250 {
1251 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1252 goto SKIP_DATA;
1253 }
1254 locale_set = 1;
1255 tables = pcre_maketables();
1256 pp = ppp;
1257 break;
1258
1259 case '>':
1260 to_file = pp;
1261 while (*pp != 0) pp++;
1262 while (isspace(pp[-1])) pp--;
1263 *pp = 0;
1264 break;
1265
1266 case '<':
1267 {
1268 if (strncmp((char *)pp, "JS>", 3) == 0)
1269 {
1270 options |= PCRE_JAVASCRIPT_COMPAT;
1271 pp += 3;
1272 }
1273 else
1274 {
1275 int x = check_newline(pp, outfile);
1276 if (x == 0) goto SKIP_DATA;
1277 options |= x;
1278 while (*pp++ != '>');
1279 }
1280 }
1281 break;
1282
1283 case '\r': /* So that it works in Windows */
1284 case '\n':
1285 case ' ':
1286 break;
1287
1288 default:
1289 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1290 goto SKIP_DATA;
1291 }
1292 }
1293
1294 /* Handle compiling via the POSIX interface, which doesn't support the
1295 timing, showing, or debugging options, nor the ability to pass over
1296 local character tables. */
1297
1298 #if !defined NOPOSIX
1299 if (posix || do_posix)
1300 {
1301 int rc;
1302 int cflags = 0;
1303
1304 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1305 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1306 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1307 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1308 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1309 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1310
1311 rc = regcomp(&preg, (char *)p, cflags);
1312
1313 /* Compilation failed; go back for another re, skipping to blank line
1314 if non-interactive. */
1315
1316 if (rc != 0)
1317 {
1318 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1319 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1320 goto SKIP_DATA;
1321 }
1322 }
1323
1324 /* Handle compiling via the native interface */
1325
1326 else
1327 #endif /* !defined NOPOSIX */
1328
1329 {
1330 unsigned long int get_options;
1331
1332 if (timeit > 0)
1333 {
1334 register int i;
1335 clock_t time_taken;
1336 clock_t start_time = clock();
1337 for (i = 0; i < timeit; i++)
1338 {
1339 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1340 if (re != NULL) free(re);
1341 }
1342 time_taken = clock() - start_time;
1343 fprintf(outfile, "Compile time %.4f milliseconds\n",
1344 (((double)time_taken * 1000.0) / (double)timeit) /
1345 (double)CLOCKS_PER_SEC);
1346 }
1347
1348 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1349
1350 /* Compilation failed; go back for another re, skipping to blank line
1351 if non-interactive. */
1352
1353 if (re == NULL)
1354 {
1355 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1356 SKIP_DATA:
1357 if (infile != stdin)
1358 {
1359 for (;;)
1360 {
1361 if (extend_inputline(infile, buffer, NULL) == NULL)
1362 {
1363 done = 1;
1364 goto CONTINUE;
1365 }
1366 len = (int)strlen((char *)buffer);
1367 while (len > 0 && isspace(buffer[len-1])) len--;
1368 if (len == 0) break;
1369 }
1370 fprintf(outfile, "\n");
1371 }
1372 goto CONTINUE;
1373 }
1374
1375 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1376 within the regex; check for this so that we know how to process the data
1377 lines. */
1378
1379 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1380 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1381
1382 /* Print information if required. There are now two info-returning
1383 functions. The old one has a limited interface and returns only limited
1384 data. Check that it agrees with the newer one. */
1385
1386 if (log_store)
1387 fprintf(outfile, "Memory allocation (code space): %d\n",
1388 (int)(gotten_store -
1389 sizeof(real_pcre) -
1390 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1391
1392 /* Extract the size for possible writing before possibly flipping it,
1393 and remember the store that was got. */
1394
1395 true_size = ((real_pcre *)re)->size;
1396 regex_gotten_store = gotten_store;
1397
1398 /* If /S was present, study the regexp to generate additional info to
1399 help with the matching. */
1400
1401 if (do_study)
1402 {
1403 if (timeit > 0)
1404 {
1405 register int i;
1406 clock_t time_taken;
1407 clock_t start_time = clock();
1408 for (i = 0; i < timeit; i++)
1409 extra = pcre_study(re, study_options, &error);
1410 time_taken = clock() - start_time;
1411 if (extra != NULL) free(extra);
1412 fprintf(outfile, " Study time %.4f milliseconds\n",
1413 (((double)time_taken * 1000.0) / (double)timeit) /
1414 (double)CLOCKS_PER_SEC);
1415 }
1416 extra = pcre_study(re, study_options, &error);
1417 if (error != NULL)
1418 fprintf(outfile, "Failed to study: %s\n", error);
1419 else if (extra != NULL)
1420 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1421 }
1422
1423 /* If the 'F' option was present, we flip the bytes of all the integer
1424 fields in the regex data block and the study block. This is to make it
1425 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1426 compiled on a different architecture. */
1427
1428 if (do_flip)
1429 {
1430 real_pcre *rre = (real_pcre *)re;
1431 rre->magic_number =
1432 byteflip(rre->magic_number, sizeof(rre->magic_number));
1433 rre->size = byteflip(rre->size, sizeof(rre->size));
1434 rre->options = byteflip(rre->options, sizeof(rre->options));
1435 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1436 rre->top_bracket =
1437 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1438 rre->top_backref =
1439 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1440 rre->first_byte =
1441 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1442 rre->req_byte =
1443 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1444 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1445 sizeof(rre->name_table_offset));
1446 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1447 sizeof(rre->name_entry_size));
1448 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1449 sizeof(rre->name_count));
1450
1451 if (extra != NULL)
1452 {
1453 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1454 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1456 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1457 }
1458 }
1459
1460 /* Extract information from the compiled data if required */
1461
1462 SHOW_INFO:
1463
1464 if (do_debug)
1465 {
1466 fprintf(outfile, "------------------------------------------------------------------\n");
1467 pcre_printint(re, outfile, debug_lengths);
1468 }
1469
1470 /* We already have the options in get_options (see above) */
1471
1472 if (do_showinfo)
1473 {
1474 unsigned long int all_options;
1475 #if !defined NOINFOCHECK
1476 int old_first_char, old_options, old_count;
1477 #endif
1478 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1479 hascrorlf;
1480 int nameentrysize, namecount;
1481 const uschar *nametable;
1482
1483 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1484 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1485 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1486 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1487 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1488 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1489 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1490 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1491 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1492 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1493 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1494
1495 #if !defined NOINFOCHECK
1496 old_count = pcre_info(re, &old_options, &old_first_char);
1497 if (count < 0) fprintf(outfile,
1498 "Error %d from pcre_info()\n", count);
1499 else
1500 {
1501 if (old_count != count) fprintf(outfile,
1502 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1503 old_count);
1504
1505 if (old_first_char != first_char) fprintf(outfile,
1506 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1507 first_char, old_first_char);
1508
1509 if (old_options != (int)get_options) fprintf(outfile,
1510 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1511 get_options, old_options);
1512 }
1513 #endif
1514
1515 if (size != regex_gotten_store) fprintf(outfile,
1516 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1517 (int)size, (int)regex_gotten_store);
1518
1519 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1520 if (backrefmax > 0)
1521 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1522
1523 if (namecount > 0)
1524 {
1525 fprintf(outfile, "Named capturing subpatterns:\n");
1526 while (namecount-- > 0)
1527 {
1528 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1529 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1530 GET2(nametable, 0));
1531 nametable += nameentrysize;
1532 }
1533 }
1534
1535 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1536 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1537
1538 all_options = ((real_pcre *)re)->options;
1539 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1540
1541 if (get_options == 0) fprintf(outfile, "No options\n");
1542 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1543 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1544 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1545 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1546 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1547 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1548 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1549 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1550 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1551 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1552 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1553 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1554 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1555 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1556 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1557 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1558
1559 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1560
1561 switch (get_options & PCRE_NEWLINE_BITS)
1562 {
1563 case PCRE_NEWLINE_CR:
1564 fprintf(outfile, "Forced newline sequence: CR\n");
1565 break;
1566
1567 case PCRE_NEWLINE_LF:
1568 fprintf(outfile, "Forced newline sequence: LF\n");
1569 break;
1570
1571 case PCRE_NEWLINE_CRLF:
1572 fprintf(outfile, "Forced newline sequence: CRLF\n");
1573 break;
1574
1575 case PCRE_NEWLINE_ANYCRLF:
1576 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1577 break;
1578
1579 case PCRE_NEWLINE_ANY:
1580 fprintf(outfile, "Forced newline sequence: ANY\n");
1581 break;
1582
1583 default:
1584 break;
1585 }
1586
1587 if (first_char == -1)
1588 {
1589 fprintf(outfile, "First char at start or follows newline\n");
1590 }
1591 else if (first_char < 0)
1592 {
1593 fprintf(outfile, "No first char\n");
1594 }
1595 else
1596 {
1597 int ch = first_char & 255;
1598 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1599 "" : " (caseless)";
1600 if (PRINTHEX(ch))
1601 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1602 else
1603 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1604 }
1605
1606 if (need_char < 0)
1607 {
1608 fprintf(outfile, "No need char\n");
1609 }
1610 else
1611 {
1612 int ch = need_char & 255;
1613 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1614 "" : " (caseless)";
1615 if (PRINTHEX(ch))
1616 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1617 else
1618 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1619 }
1620
1621 /* Don't output study size; at present it is in any case a fixed
1622 value, but it varies, depending on the computer architecture, and
1623 so messes up the test suite. (And with the /F option, it might be
1624 flipped.) */
1625
1626 if (do_study)
1627 {
1628 if (extra == NULL)
1629 fprintf(outfile, "Study returned NULL\n");
1630 else
1631 {
1632 uschar *start_bits = NULL;
1633 int minlength;
1634
1635 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1636 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1637
1638 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1639 if (start_bits == NULL)
1640 fprintf(outfile, "No set of starting bytes\n");
1641 else
1642 {
1643 int i;
1644 int c = 24;
1645 fprintf(outfile, "Starting byte set: ");
1646 for (i = 0; i < 256; i++)
1647 {
1648 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1649 {
1650 if (c > 75)
1651 {
1652 fprintf(outfile, "\n ");
1653 c = 2;
1654 }
1655 if (PRINTHEX(i) && i != ' ')
1656 {
1657 fprintf(outfile, "%c ", i);
1658 c += 2;
1659 }
1660 else
1661 {
1662 fprintf(outfile, "\\x%02x ", i);
1663 c += 5;
1664 }
1665 }
1666 }
1667 fprintf(outfile, "\n");
1668 }
1669 }
1670 }
1671 }
1672
1673 /* If the '>' option was present, we write out the regex to a file, and
1674 that is all. The first 8 bytes of the file are the regex length and then
1675 the study length, in big-endian order. */
1676
1677 if (to_file != NULL)
1678 {
1679 FILE *f = fopen((char *)to_file, "wb");
1680 if (f == NULL)
1681 {
1682 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1683 }
1684 else
1685 {
1686 uschar sbuf[8];
1687 sbuf[0] = (uschar)((true_size >> 24) & 255);
1688 sbuf[1] = (uschar)((true_size >> 16) & 255);
1689 sbuf[2] = (uschar)((true_size >> 8) & 255);
1690 sbuf[3] = (uschar)((true_size) & 255);
1691
1692 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1693 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1694 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1695 sbuf[7] = (uschar)((true_study_size) & 255);
1696
1697 if (fwrite(sbuf, 1, 8, f) < 8 ||
1698 fwrite(re, 1, true_size, f) < true_size)
1699 {
1700 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1701 }
1702 else
1703 {
1704 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1705 if (extra != NULL)
1706 {
1707 if (fwrite(extra->study_data, 1, true_study_size, f) <
1708 true_study_size)
1709 {
1710 fprintf(outfile, "Write error on %s: %s\n", to_file,
1711 strerror(errno));
1712 }
1713 else fprintf(outfile, "Study data written to %s\n", to_file);
1714
1715 }
1716 }
1717 fclose(f);
1718 }
1719
1720 new_free(re);
1721 if (extra != NULL) new_free(extra);
1722 if (tables != NULL) new_free((void *)tables);
1723 continue; /* With next regex */
1724 }
1725 } /* End of non-POSIX compile */
1726
1727 /* Read data lines and test them */
1728
1729 for (;;)
1730 {
1731 uschar *q;
1732 uschar *bptr;
1733 int *use_offsets = offsets;
1734 int use_size_offsets = size_offsets;
1735 int callout_data = 0;
1736 int callout_data_set = 0;
1737 int count, c;
1738 int copystrings = 0;
1739 int find_match_limit = default_find_match_limit;
1740 int getstrings = 0;
1741 int getlist = 0;
1742 int gmatched = 0;
1743 int start_offset = 0;
1744 int g_notempty = 0;
1745 int use_dfa = 0;
1746
1747 options = 0;
1748
1749 *copynames = 0;
1750 *getnames = 0;
1751
1752 copynamesptr = copynames;
1753 getnamesptr = getnames;
1754
1755 pcre_callout = callout;
1756 first_callout = 1;
1757 callout_extra = 0;
1758 callout_count = 0;
1759 callout_fail_count = 999999;
1760 callout_fail_id = -1;
1761 show_malloc = 0;
1762
1763 if (extra != NULL) extra->flags &=
1764 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1765
1766 len = 0;
1767 for (;;)
1768 {
1769 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1770 {
1771 if (len > 0) break;
1772 done = 1;
1773 goto CONTINUE;
1774 }
1775 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1776 len = (int)strlen((char *)buffer);
1777 if (buffer[len-1] == '\n') break;
1778 }
1779
1780 while (len > 0 && isspace(buffer[len-1])) len--;
1781 buffer[len] = 0;
1782 if (len == 0) break;
1783
1784 p = buffer;
1785 while (isspace(*p)) p++;
1786
1787 bptr = q = dbuffer;
1788 while ((c = *p++) != 0)
1789 {
1790 int i = 0;
1791 int n = 0;
1792
1793 if (c == '\\') switch ((c = *p++))
1794 {
1795 case 'a': c = 7; break;
1796 case 'b': c = '\b'; break;
1797 case 'e': c = 27; break;
1798 case 'f': c = '\f'; break;
1799 case 'n': c = '\n'; break;
1800 case 'r': c = '\r'; break;
1801 case 't': c = '\t'; break;
1802 case 'v': c = '\v'; break;
1803
1804 case '0': case '1': case '2': case '3':
1805 case '4': case '5': case '6': case '7':
1806 c -= '0';
1807 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1808 c = c * 8 + *p++ - '0';
1809
1810 #if !defined NOUTF8
1811 if (use_utf8 && c > 255)
1812 {
1813 unsigned char buff8[8];
1814 int ii, utn;
1815 utn = ord2utf8(c, buff8);
1816 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817 c = buff8[ii]; /* Last byte */
1818 }
1819 #endif
1820 break;
1821
1822 case 'x':
1823
1824 /* Handle \x{..} specially - new Perl thing for utf8 */
1825
1826 #if !defined NOUTF8
1827 if (*p == '{')
1828 {
1829 unsigned char *pt = p;
1830 c = 0;
1831 while (isxdigit(*(++pt)))
1832 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1833 if (*pt == '}')
1834 {
1835 unsigned char buff8[8];
1836 int ii, utn;
1837 if (use_utf8)
1838 {
1839 utn = ord2utf8(c, buff8);
1840 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1841 c = buff8[ii]; /* Last byte */
1842 }
1843 else
1844 {
1845 if (c > 255)
1846 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1847 "UTF-8 mode is not enabled.\n"
1848 "** Truncation will probably give the wrong result.\n", c);
1849 }
1850 p = pt + 1;
1851 break;
1852 }
1853 /* Not correct form; fall through */
1854 }
1855 #endif
1856
1857 /* Ordinary \x */
1858
1859 c = 0;
1860 while (i++ < 2 && isxdigit(*p))
1861 {
1862 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1863 p++;
1864 }
1865 break;
1866
1867 case 0: /* \ followed by EOF allows for an empty line */
1868 p--;
1869 continue;
1870
1871 case '>':
1872 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1873 continue;
1874
1875 case 'A': /* Option setting */
1876 options |= PCRE_ANCHORED;
1877 continue;
1878
1879 case 'B':
1880 options |= PCRE_NOTBOL;
1881 continue;
1882
1883 case 'C':
1884 if (isdigit(*p)) /* Set copy string */
1885 {
1886 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1887 copystrings |= 1 << n;
1888 }
1889 else if (isalnum(*p))
1890 {
1891 uschar *npp = copynamesptr;
1892 while (isalnum(*p)) *npp++ = *p++;
1893 *npp++ = 0;
1894 *npp = 0;
1895 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1896 if (n < 0)
1897 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1898 copynamesptr = npp;
1899 }
1900 else if (*p == '+')
1901 {
1902 callout_extra = 1;
1903 p++;
1904 }
1905 else if (*p == '-')
1906 {
1907 pcre_callout = NULL;
1908 p++;
1909 }
1910 else if (*p == '!')
1911 {
1912 callout_fail_id = 0;
1913 p++;
1914 while(isdigit(*p))
1915 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1916 callout_fail_count = 0;
1917 if (*p == '!')
1918 {
1919 p++;
1920 while(isdigit(*p))
1921 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1922 }
1923 }
1924 else if (*p == '*')
1925 {
1926 int sign = 1;
1927 callout_data = 0;
1928 if (*(++p) == '-') { sign = -1; p++; }
1929 while(isdigit(*p))
1930 callout_data = callout_data * 10 + *p++ - '0';
1931 callout_data *= sign;
1932 callout_data_set = 1;
1933 }
1934 continue;
1935
1936 #if !defined NODFA
1937 case 'D':
1938 #if !defined NOPOSIX
1939 if (posix || do_posix)
1940 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1941 else
1942 #endif
1943 use_dfa = 1;
1944 continue;
1945
1946 case 'F':
1947 options |= PCRE_DFA_SHORTEST;
1948 continue;
1949 #endif
1950
1951 case 'G':
1952 if (isdigit(*p))
1953 {
1954 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1955 getstrings |= 1 << n;
1956 }
1957 else if (isalnum(*p))
1958 {
1959 uschar *npp = getnamesptr;
1960 while (isalnum(*p)) *npp++ = *p++;
1961 *npp++ = 0;
1962 *npp = 0;
1963 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1964 if (n < 0)
1965 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1966 getnamesptr = npp;
1967 }
1968 continue;
1969
1970 case 'L':
1971 getlist = 1;
1972 continue;
1973
1974 case 'M':
1975 find_match_limit = 1;
1976 continue;
1977
1978 case 'N':
1979 if ((options & PCRE_NOTEMPTY) != 0)
1980 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1981 else
1982 options |= PCRE_NOTEMPTY;
1983 continue;
1984
1985 case 'O':
1986 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1987 if (n > size_offsets_max)
1988 {
1989 size_offsets_max = n;
1990 free(offsets);
1991 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1992 if (offsets == NULL)
1993 {
1994 printf("** Failed to get %d bytes of memory for offsets vector\n",
1995 (int)(size_offsets_max * sizeof(int)));
1996 yield = 1;
1997 goto EXIT;
1998 }
1999 }
2000 use_size_offsets = n;
2001 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2002 continue;
2003
2004 case 'P':
2005 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2006 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2007 continue;
2008
2009 case 'Q':
2010 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2011 if (extra == NULL)
2012 {
2013 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2014 extra->flags = 0;
2015 }
2016 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2017 extra->match_limit_recursion = n;
2018 continue;
2019
2020 case 'q':
2021 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2022 if (extra == NULL)
2023 {
2024 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2025 extra->flags = 0;
2026 }
2027 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2028 extra->match_limit = n;
2029 continue;
2030
2031 #if !defined NODFA
2032 case 'R':
2033 options |= PCRE_DFA_RESTART;
2034 continue;
2035 #endif
2036
2037 case 'S':
2038 show_malloc = 1;
2039 continue;
2040
2041 case 'Y':
2042 options |= PCRE_NO_START_OPTIMIZE;
2043 continue;
2044
2045 case 'Z':
2046 options |= PCRE_NOTEOL;
2047 continue;
2048
2049 case '?':
2050 options |= PCRE_NO_UTF8_CHECK;
2051 continue;
2052
2053 case '<':
2054 {
2055 int x = check_newline(p, outfile);
2056 if (x == 0) goto NEXT_DATA;
2057 options |= x;
2058 while (*p++ != '>');
2059 }
2060 continue;
2061 }
2062 *q++ = c;
2063 }
2064 *q = 0;
2065 len = q - dbuffer;
2066
2067 /* Move the data to the end of the buffer so that a read over the end of
2068 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2069 we are using the POSIX interface, we must include the terminating zero. */
2070
2071 #if !defined NOPOSIX
2072 if (posix || do_posix)
2073 {
2074 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2075 bptr += buffer_size - len - 1;
2076 }
2077 else
2078 #endif
2079 {
2080 memmove(bptr + buffer_size - len, bptr, len);
2081 bptr += buffer_size - len;
2082 }
2083
2084 if ((all_use_dfa || use_dfa) && find_match_limit)
2085 {
2086 printf("**Match limit not relevant for DFA matching: ignored\n");
2087 find_match_limit = 0;
2088 }
2089
2090 /* Handle matching via the POSIX interface, which does not
2091 support timing or playing with the match limit or callout data. */
2092
2093 #if !defined NOPOSIX
2094 if (posix || do_posix)
2095 {
2096 int rc;
2097 int eflags = 0;
2098 regmatch_t *pmatch = NULL;
2099 if (use_size_offsets > 0)
2100 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2101 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2102 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2103 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2104
2105 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2106
2107 if (rc != 0)
2108 {
2109 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2110 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2111 }
2112 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2113 != 0)
2114 {
2115 fprintf(outfile, "Matched with REG_NOSUB\n");
2116 }
2117 else
2118 {
2119 size_t i;
2120 for (i = 0; i < (size_t)use_size_offsets; i++)
2121 {
2122 if (pmatch[i].rm_so >= 0)
2123 {
2124 fprintf(outfile, "%2d: ", (int)i);
2125 (void)pchars(dbuffer + pmatch[i].rm_so,
2126 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2127 fprintf(outfile, "\n");
2128 if (i == 0 && do_showrest)
2129 {
2130 fprintf(outfile, " 0+ ");
2131 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2132 outfile);
2133 fprintf(outfile, "\n");
2134 }
2135 }
2136 }
2137 }
2138 free(pmatch);
2139 }
2140
2141 /* Handle matching via the native interface - repeats for /g and /G */
2142
2143 else
2144 #endif /* !defined NOPOSIX */
2145
2146 for (;; gmatched++) /* Loop for /g or /G */
2147 {
2148 if (timeitm > 0)
2149 {
2150 register int i;
2151 clock_t time_taken;
2152 clock_t start_time = clock();
2153
2154 #if !defined NODFA
2155 if (all_use_dfa || use_dfa)
2156 {
2157 int workspace[1000];
2158 for (i = 0; i < timeitm; i++)
2159 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2160 options | g_notempty, use_offsets, use_size_offsets, workspace,
2161 sizeof(workspace)/sizeof(int));
2162 }
2163 else
2164 #endif
2165
2166 for (i = 0; i < timeitm; i++)
2167 count = pcre_exec(re, extra, (char *)bptr, len,
2168 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2169
2170 time_taken = clock() - start_time;
2171 fprintf(outfile, "Execute time %.4f milliseconds\n",
2172 (((double)time_taken * 1000.0) / (double)timeitm) /
2173 (double)CLOCKS_PER_SEC);
2174 }
2175
2176 /* If find_match_limit is set, we want to do repeated matches with
2177 varying limits in order to find the minimum value for the match limit and
2178 for the recursion limit. */
2179
2180 if (find_match_limit)
2181 {
2182 if (extra == NULL)
2183 {
2184 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2185 extra->flags = 0;
2186 }
2187
2188 (void)check_match_limit(re, extra, bptr, len, start_offset,
2189 options|g_notempty, use_offsets, use_size_offsets,
2190 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2191 PCRE_ERROR_MATCHLIMIT, "match()");
2192
2193 count = check_match_limit(re, extra, bptr, len, start_offset,
2194 options|g_notempty, use_offsets, use_size_offsets,
2195 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2196 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2197 }
2198
2199 /* If callout_data is set, use the interface with additional data */
2200
2201 else if (callout_data_set)
2202 {
2203 if (extra == NULL)
2204 {
2205 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2206 extra->flags = 0;
2207 }
2208 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2209 extra->callout_data = &callout_data;
2210 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2211 options | g_notempty, use_offsets, use_size_offsets);
2212 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2213 }
2214
2215 /* The normal case is just to do the match once, with the default
2216 value of match_limit. */
2217
2218 #if !defined NODFA
2219 else if (all_use_dfa || use_dfa)
2220 {
2221 int workspace[1000];
2222 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2223 options | g_notempty, use_offsets, use_size_offsets, workspace,
2224 sizeof(workspace)/sizeof(int));
2225 if (count == 0)
2226 {
2227 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2228 count = use_size_offsets/2;
2229 }
2230 }
2231 #endif
2232
2233 else
2234 {
2235 count = pcre_exec(re, extra, (char *)bptr, len,
2236 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2237 if (count == 0)
2238 {
2239 fprintf(outfile, "Matched, but too many substrings\n");
2240 count = use_size_offsets/3;
2241 }
2242 }
2243
2244 /* Matched */
2245
2246 if (count >= 0)
2247 {
2248 int i, maxcount;
2249
2250 #if !defined NODFA
2251 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2252 #endif
2253 maxcount = use_size_offsets/3;
2254
2255 /* This is a check against a lunatic return value. */
2256
2257 if (count > maxcount)
2258 {
2259 fprintf(outfile,
2260 "** PCRE error: returned count %d is too big for offset size %d\n",
2261 count, use_size_offsets);
2262 count = use_size_offsets/3;
2263 if (do_g || do_G)
2264 {
2265 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2266 do_g = do_G = FALSE; /* Break g/G loop */
2267 }
2268 }
2269
2270 for (i = 0; i < count * 2; i += 2)
2271 {
2272 if (use_offsets[i] < 0)
2273 fprintf(outfile, "%2d: <unset>\n", i/2);
2274 else
2275 {
2276 fprintf(outfile, "%2d: ", i/2);
2277 (void)pchars(bptr + use_offsets[i],
2278 use_offsets[i+1] - use_offsets[i], outfile);
2279 fprintf(outfile, "\n");
2280 if (i == 0)
2281 {
2282 if (do_showrest)
2283 {
2284 fprintf(outfile, " 0+ ");
2285 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2286 outfile);
2287 fprintf(outfile, "\n");
2288 }
2289 }
2290 }
2291 }
2292
2293 for (i = 0; i < 32; i++)
2294 {
2295 if ((copystrings & (1 << i)) != 0)
2296 {
2297 char copybuffer[256];
2298 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2299 i, copybuffer, sizeof(copybuffer));
2300 if (rc < 0)
2301 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2302 else
2303 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2304 }
2305 }
2306
2307 for (copynamesptr = copynames;
2308 *copynamesptr != 0;
2309 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2310 {
2311 char copybuffer[256];
2312 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2313 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2314 if (rc < 0)
2315 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2316 else
2317 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2318 }
2319
2320 for (i = 0; i < 32; i++)
2321 {
2322 if ((getstrings & (1 << i)) != 0)
2323 {
2324 const char *substring;
2325 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2326 i, &substring);
2327 if (rc < 0)
2328 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2329 else
2330 {
2331 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2332 pcre_free_substring(substring);
2333 }
2334 }
2335 }
2336
2337 for (getnamesptr = getnames;
2338 *getnamesptr != 0;
2339 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2340 {
2341 const char *substring;
2342 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2343 count, (char *)getnamesptr, &substring);
2344 if (rc < 0)
2345 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2346 else
2347 {
2348 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2349 pcre_free_substring(substring);
2350 }
2351 }
2352
2353 if (getlist)
2354 {
2355 const char **stringlist;
2356 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2357 &stringlist);
2358 if (rc < 0)
2359 fprintf(outfile, "get substring list failed %d\n", rc);
2360 else
2361 {
2362 for (i = 0; i < count; i++)
2363 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2364 if (stringlist[i] != NULL)
2365 fprintf(outfile, "string list not terminated by NULL\n");
2366 /* free((void *)stringlist); */
2367 pcre_free_substring_list(stringlist);
2368 }
2369 }
2370 }
2371
2372 /* There was a partial match */
2373
2374 else if (count == PCRE_ERROR_PARTIAL)
2375 {
2376 fprintf(outfile, "Partial match");
2377 if (use_size_offsets > 1)
2378 {
2379 fprintf(outfile, ": ");
2380 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2381 outfile);
2382 }
2383 fprintf(outfile, "\n");
2384 break; /* Out of the /g loop */
2385 }
2386
2387 /* Failed to match. If this is a /g or /G loop and we previously set
2388 g_notempty after a null match, this is not necessarily the end. We want
2389 to advance the start offset, and continue. We won't be at the end of the
2390 string - that was checked before setting g_notempty.
2391
2392 Complication arises in the case when the newline option is "any" or
2393 "anycrlf". If the previous match was at the end of a line terminated by
2394 CRLF, an advance of one character just passes the \r, whereas we should
2395 prefer the longer newline sequence, as does the code in pcre_exec().
2396 Fudge the offset value to achieve this.
2397
2398 Otherwise, in the case of UTF-8 matching, the advance must be one
2399 character, not one byte. */
2400
2401 else
2402 {
2403 if (g_notempty != 0)
2404 {
2405 int onechar = 1;
2406 unsigned int obits = ((real_pcre *)re)->options;
2407 use_offsets[0] = start_offset;
2408 if ((obits & PCRE_NEWLINE_BITS) == 0)
2409 {
2410 int d;
2411 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2412 /* Note that these values are always the ASCII ones, even in
2413 EBCDIC environments. CR = 13, NL = 10. */
2414 obits = (d == 13)? PCRE_NEWLINE_CR :
2415 (d == 10)? PCRE_NEWLINE_LF :
2416 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2417 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2418 (d == -1)? PCRE_NEWLINE_ANY : 0;
2419 }
2420 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2421 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2422 &&
2423 start_offset < len - 1 &&
2424 bptr[start_offset] == '\r' &&
2425 bptr[start_offset+1] == '\n')
2426 onechar++;
2427 else if (use_utf8)
2428 {
2429 while (start_offset + onechar < len)
2430 {
2431 int tb = bptr[start_offset+onechar];
2432 if (tb <= 127) break;
2433 tb &= 0xc0;
2434 if (tb != 0 && tb != 0xc0) onechar++;
2435 }
2436 }
2437 use_offsets[1] = start_offset + onechar;
2438 }
2439 else
2440 {
2441 if (count == PCRE_ERROR_NOMATCH)
2442 {
2443 if (gmatched == 0) fprintf(outfile, "No match\n");
2444 }
2445 else fprintf(outfile, "Error %d\n", count);
2446 break; /* Out of the /g loop */
2447 }
2448 }
2449
2450 /* If not /g or /G we are done */
2451
2452 if (!do_g && !do_G) break;
2453
2454 /* If we have matched an empty string, first check to see if we are at
2455 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2456 Perl's /g options does. This turns out to be rather cunning. First we set
2457 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2458 same point. If this fails (picked up above) we advance to the next
2459 character. */
2460
2461 g_notempty = 0;
2462
2463 if (use_offsets[0] == use_offsets[1])
2464 {
2465 if (use_offsets[0] == len) break;
2466 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2467 }
2468
2469 /* For /g, update the start offset, leaving the rest alone */
2470
2471 if (do_g) start_offset = use_offsets[1];
2472
2473 /* For /G, update the pointer and length */
2474
2475 else
2476 {
2477 bptr += use_offsets[1];
2478 len -= use_offsets[1];
2479 }
2480 } /* End of loop for /g and /G */
2481
2482 NEXT_DATA: continue;
2483 } /* End of loop for data lines */
2484
2485 CONTINUE:
2486
2487 #if !defined NOPOSIX
2488 if (posix || do_posix) regfree(&preg);
2489 #endif
2490
2491 if (re != NULL) new_free(re);
2492 if (extra != NULL) new_free(extra);
2493 if (tables != NULL)
2494 {
2495 new_free((void *)tables);
2496 setlocale(LC_CTYPE, "C");
2497 locale_set = 0;
2498 }
2499 }
2500
2501 if (infile == stdin) fprintf(outfile, "\n");
2502
2503 EXIT:
2504
2505 if (infile != NULL && infile != stdin) fclose(infile);
2506 if (outfile != NULL && outfile != stdout) fclose(outfile);
2507
2508 free(buffer);
2509 free(dbuffer);
2510 free(pbuffer);
2511 free(offsets);
2512
2513 return yield;
2514 }
2515
2516 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12