/[pcre]/code/tags/pcre-8.01/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-8.01/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 490 - (show annotations) (download)
Tue Jan 19 16:45:59 2010 UTC (4 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 74088 byte(s)
Tag release 8.01.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 #else
83 #include <sys/time.h> /* These two includes are needed */
84 #include <sys/resource.h> /* for setrlimit(). */
85 #define INPUT_MODE "rb"
86 #define OUTPUT_MODE "wb"
87 #endif
88
89
90 /* We have to include pcre_internal.h because we need the internal info for
91 displaying the results of pcre_study() and we also need to know about the
92 internal macros, structures, and other internal data values; pcretest has
93 "inside information" compared to a program that strictly follows the PCRE API.
94
95 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97 appropriately for an application, not for building PCRE. */
98
99 #include "pcre.h"
100 #include "pcre_internal.h"
101
102 /* We need access to some of the data tables that PCRE uses. So as not to have
103 to keep two copies, we include the source file here, changing the names of the
104 external symbols to prevent clashes. */
105
106 #define _pcre_ucp_gentype ucp_gentype
107 #define _pcre_utf8_table1 utf8_table1
108 #define _pcre_utf8_table1_size utf8_table1_size
109 #define _pcre_utf8_table2 utf8_table2
110 #define _pcre_utf8_table3 utf8_table3
111 #define _pcre_utf8_table4 utf8_table4
112 #define _pcre_utt utt
113 #define _pcre_utt_size utt_size
114 #define _pcre_utt_names utt_names
115 #define _pcre_OP_lengths OP_lengths
116
117 #include "pcre_tables.c"
118
119 /* We also need the pcre_printint() function for printing out compiled
120 patterns. This function is in a separate file so that it can be included in
121 pcre_compile.c when that module is compiled with debugging enabled.
122
123 The definition of the macro PRINTABLE, which determines whether to print an
124 output character as-is or as a hex value when showing compiled patterns, is
125 contained in this file. We uses it here also, in cases when the locale has not
126 been explicitly changed, so as to get consistent output from systems that
127 differ in their output from isprint() even in the "C" locale. */
128
129 #include "pcre_printint.src"
130
131 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
132
133
134 /* It is possible to compile this test program without including support for
135 testing the POSIX interface, though this is not available via the standard
136 Makefile. */
137
138 #if !defined NOPOSIX
139 #include "pcreposix.h"
140 #endif
141
142 /* It is also possible, for the benefit of the version currently imported into
143 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
144 interface to the DFA matcher (NODFA), and without the doublecheck of the old
145 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
146 UTF8 support if PCRE is built without it. */
147
148 #ifndef SUPPORT_UTF8
149 #ifndef NOUTF8
150 #define NOUTF8
151 #endif
152 #endif
153
154
155 /* Other parameters */
156
157 #ifndef CLOCKS_PER_SEC
158 #ifdef CLK_TCK
159 #define CLOCKS_PER_SEC CLK_TCK
160 #else
161 #define CLOCKS_PER_SEC 100
162 #endif
163 #endif
164
165 /* This is the default loop count for timing. */
166
167 #define LOOPREPEAT 500000
168
169 /* Static variables */
170
171 static FILE *outfile;
172 static int log_store = 0;
173 static int callout_count;
174 static int callout_extra;
175 static int callout_fail_count;
176 static int callout_fail_id;
177 static int debug_lengths;
178 static int first_callout;
179 static int locale_set = 0;
180 static int show_malloc;
181 static int use_utf8;
182 static size_t gotten_store;
183
184 /* The buffers grow automatically if very long input lines are encountered. */
185
186 static int buffer_size = 50000;
187 static uschar *buffer = NULL;
188 static uschar *dbuffer = NULL;
189 static uschar *pbuffer = NULL;
190
191
192
193 /*************************************************
194 * Read or extend an input line *
195 *************************************************/
196
197 /* Input lines are read into buffer, but both patterns and data lines can be
198 continued over multiple input lines. In addition, if the buffer fills up, we
199 want to automatically expand it so as to be able to handle extremely large
200 lines that are needed for certain stress tests. When the input buffer is
201 expanded, the other two buffers must also be expanded likewise, and the
202 contents of pbuffer, which are a copy of the input for callouts, must be
203 preserved (for when expansion happens for a data line). This is not the most
204 optimal way of handling this, but hey, this is just a test program!
205
206 Arguments:
207 f the file to read
208 start where in buffer to start (this *must* be within buffer)
209 prompt for stdin or readline()
210
211 Returns: pointer to the start of new data
212 could be a copy of start, or could be moved
213 NULL if no data read and EOF reached
214 */
215
216 static uschar *
217 extend_inputline(FILE *f, uschar *start, const char *prompt)
218 {
219 uschar *here = start;
220
221 for (;;)
222 {
223 int rlen = buffer_size - (here - buffer);
224
225 if (rlen > 1000)
226 {
227 int dlen;
228
229 /* If libreadline support is required, use readline() to read a line if the
230 input is a terminal. Note that readline() removes the trailing newline, so
231 we must put it back again, to be compatible with fgets(). */
232
233 #ifdef SUPPORT_LIBREADLINE
234 if (isatty(fileno(f)))
235 {
236 size_t len;
237 char *s = readline(prompt);
238 if (s == NULL) return (here == start)? NULL : start;
239 len = strlen(s);
240 if (len > 0) add_history(s);
241 if (len > rlen - 1) len = rlen - 1;
242 memcpy(here, s, len);
243 here[len] = '\n';
244 here[len+1] = 0;
245 free(s);
246 }
247 else
248 #endif
249
250 /* Read the next line by normal means, prompting if the file is stdin. */
251
252 {
253 if (f == stdin) printf(prompt);
254 if (fgets((char *)here, rlen, f) == NULL)
255 return (here == start)? NULL : start;
256 }
257
258 dlen = (int)strlen((char *)here);
259 if (dlen > 0 && here[dlen - 1] == '\n') return start;
260 here += dlen;
261 }
262
263 else
264 {
265 int new_buffer_size = 2*buffer_size;
266 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
267 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
268 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
269
270 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
271 {
272 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
273 exit(1);
274 }
275
276 memcpy(new_buffer, buffer, buffer_size);
277 memcpy(new_pbuffer, pbuffer, buffer_size);
278
279 buffer_size = new_buffer_size;
280
281 start = new_buffer + (start - buffer);
282 here = new_buffer + (here - buffer);
283
284 free(buffer);
285 free(dbuffer);
286 free(pbuffer);
287
288 buffer = new_buffer;
289 dbuffer = new_dbuffer;
290 pbuffer = new_pbuffer;
291 }
292 }
293
294 return NULL; /* Control never gets here */
295 }
296
297
298
299
300
301
302
303 /*************************************************
304 * Read number from string *
305 *************************************************/
306
307 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
308 around with conditional compilation, just do the job by hand. It is only used
309 for unpicking arguments, so just keep it simple.
310
311 Arguments:
312 str string to be converted
313 endptr where to put the end pointer
314
315 Returns: the unsigned long
316 */
317
318 static int
319 get_value(unsigned char *str, unsigned char **endptr)
320 {
321 int result = 0;
322 while(*str != 0 && isspace(*str)) str++;
323 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
324 *endptr = str;
325 return(result);
326 }
327
328
329
330
331 /*************************************************
332 * Convert UTF-8 string to value *
333 *************************************************/
334
335 /* This function takes one or more bytes that represents a UTF-8 character,
336 and returns the value of the character.
337
338 Argument:
339 utf8bytes a pointer to the byte vector
340 vptr a pointer to an int to receive the value
341
342 Returns: > 0 => the number of bytes consumed
343 -6 to 0 => malformed UTF-8 character at offset = (-return)
344 */
345
346 #if !defined NOUTF8
347
348 static int
349 utf82ord(unsigned char *utf8bytes, int *vptr)
350 {
351 int c = *utf8bytes++;
352 int d = c;
353 int i, j, s;
354
355 for (i = -1; i < 6; i++) /* i is number of additional bytes */
356 {
357 if ((d & 0x80) == 0) break;
358 d <<= 1;
359 }
360
361 if (i == -1) { *vptr = c; return 1; } /* ascii character */
362 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
363
364 /* i now has a value in the range 1-5 */
365
366 s = 6*i;
367 d = (c & utf8_table3[i]) << s;
368
369 for (j = 0; j < i; j++)
370 {
371 c = *utf8bytes++;
372 if ((c & 0xc0) != 0x80) return -(j+1);
373 s -= 6;
374 d |= (c & 0x3f) << s;
375 }
376
377 /* Check that encoding was the correct unique one */
378
379 for (j = 0; j < utf8_table1_size; j++)
380 if (d <= utf8_table1[j]) break;
381 if (j != i) return -(i+1);
382
383 /* Valid value */
384
385 *vptr = d;
386 return i+1;
387 }
388
389 #endif
390
391
392
393 /*************************************************
394 * Convert character value to UTF-8 *
395 *************************************************/
396
397 /* This function takes an integer value in the range 0 - 0x7fffffff
398 and encodes it as a UTF-8 character in 0 to 6 bytes.
399
400 Arguments:
401 cvalue the character value
402 utf8bytes pointer to buffer for result - at least 6 bytes long
403
404 Returns: number of characters placed in the buffer
405 */
406
407 #if !defined NOUTF8
408
409 static int
410 ord2utf8(int cvalue, uschar *utf8bytes)
411 {
412 register int i, j;
413 for (i = 0; i < utf8_table1_size; i++)
414 if (cvalue <= utf8_table1[i]) break;
415 utf8bytes += i;
416 for (j = i; j > 0; j--)
417 {
418 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
419 cvalue >>= 6;
420 }
421 *utf8bytes = utf8_table2[i] | cvalue;
422 return i + 1;
423 }
424
425 #endif
426
427
428
429 /*************************************************
430 * Print character string *
431 *************************************************/
432
433 /* Character string printing function. Must handle UTF-8 strings in utf8
434 mode. Yields number of characters printed. If handed a NULL file, just counts
435 chars without printing. */
436
437 static int pchars(unsigned char *p, int length, FILE *f)
438 {
439 int c = 0;
440 int yield = 0;
441
442 while (length-- > 0)
443 {
444 #if !defined NOUTF8
445 if (use_utf8)
446 {
447 int rc = utf82ord(p, &c);
448
449 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
450 {
451 length -= rc - 1;
452 p += rc;
453 if (PRINTHEX(c))
454 {
455 if (f != NULL) fprintf(f, "%c", c);
456 yield++;
457 }
458 else
459 {
460 int n = 4;
461 if (f != NULL) fprintf(f, "\\x{%02x}", c);
462 yield += (n <= 0x000000ff)? 2 :
463 (n <= 0x00000fff)? 3 :
464 (n <= 0x0000ffff)? 4 :
465 (n <= 0x000fffff)? 5 : 6;
466 }
467 continue;
468 }
469 }
470 #endif
471
472 /* Not UTF-8, or malformed UTF-8 */
473
474 c = *p++;
475 if (PRINTHEX(c))
476 {
477 if (f != NULL) fprintf(f, "%c", c);
478 yield++;
479 }
480 else
481 {
482 if (f != NULL) fprintf(f, "\\x%02x", c);
483 yield += 4;
484 }
485 }
486
487 return yield;
488 }
489
490
491
492 /*************************************************
493 * Callout function *
494 *************************************************/
495
496 /* Called from PCRE as a result of the (?C) item. We print out where we are in
497 the match. Yield zero unless more callouts than the fail count, or the callout
498 data is not zero. */
499
500 static int callout(pcre_callout_block *cb)
501 {
502 FILE *f = (first_callout | callout_extra)? outfile : NULL;
503 int i, pre_start, post_start, subject_length;
504
505 if (callout_extra)
506 {
507 fprintf(f, "Callout %d: last capture = %d\n",
508 cb->callout_number, cb->capture_last);
509
510 for (i = 0; i < cb->capture_top * 2; i += 2)
511 {
512 if (cb->offset_vector[i] < 0)
513 fprintf(f, "%2d: <unset>\n", i/2);
514 else
515 {
516 fprintf(f, "%2d: ", i/2);
517 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
518 cb->offset_vector[i+1] - cb->offset_vector[i], f);
519 fprintf(f, "\n");
520 }
521 }
522 }
523
524 /* Re-print the subject in canonical form, the first time or if giving full
525 datails. On subsequent calls in the same match, we use pchars just to find the
526 printed lengths of the substrings. */
527
528 if (f != NULL) fprintf(f, "--->");
529
530 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
531 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
532 cb->current_position - cb->start_match, f);
533
534 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
535
536 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
537 cb->subject_length - cb->current_position, f);
538
539 if (f != NULL) fprintf(f, "\n");
540
541 /* Always print appropriate indicators, with callout number if not already
542 shown. For automatic callouts, show the pattern offset. */
543
544 if (cb->callout_number == 255)
545 {
546 fprintf(outfile, "%+3d ", cb->pattern_position);
547 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
548 }
549 else
550 {
551 if (callout_extra) fprintf(outfile, " ");
552 else fprintf(outfile, "%3d ", cb->callout_number);
553 }
554
555 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
556 fprintf(outfile, "^");
557
558 if (post_start > 0)
559 {
560 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
561 fprintf(outfile, "^");
562 }
563
564 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
565 fprintf(outfile, " ");
566
567 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
568 pbuffer + cb->pattern_position);
569
570 fprintf(outfile, "\n");
571 first_callout = 0;
572
573 if (cb->callout_data != NULL)
574 {
575 int callout_data = *((int *)(cb->callout_data));
576 if (callout_data != 0)
577 {
578 fprintf(outfile, "Callout data = %d\n", callout_data);
579 return callout_data;
580 }
581 }
582
583 return (cb->callout_number != callout_fail_id)? 0 :
584 (++callout_count >= callout_fail_count)? 1 : 0;
585 }
586
587
588 /*************************************************
589 * Local malloc functions *
590 *************************************************/
591
592 /* Alternative malloc function, to test functionality and show the size of the
593 compiled re. */
594
595 static void *new_malloc(size_t size)
596 {
597 void *block = malloc(size);
598 gotten_store = size;
599 if (show_malloc)
600 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
601 return block;
602 }
603
604 static void new_free(void *block)
605 {
606 if (show_malloc)
607 fprintf(outfile, "free %p\n", block);
608 free(block);
609 }
610
611
612 /* For recursion malloc/free, to test stacking calls */
613
614 static void *stack_malloc(size_t size)
615 {
616 void *block = malloc(size);
617 if (show_malloc)
618 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
619 return block;
620 }
621
622 static void stack_free(void *block)
623 {
624 if (show_malloc)
625 fprintf(outfile, "stack_free %p\n", block);
626 free(block);
627 }
628
629
630 /*************************************************
631 * Call pcre_fullinfo() *
632 *************************************************/
633
634 /* Get one piece of information from the pcre_fullinfo() function */
635
636 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
637 {
638 int rc;
639 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
640 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
641 }
642
643
644
645 /*************************************************
646 * Byte flipping function *
647 *************************************************/
648
649 static unsigned long int
650 byteflip(unsigned long int value, int n)
651 {
652 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
653 return ((value & 0x000000ff) << 24) |
654 ((value & 0x0000ff00) << 8) |
655 ((value & 0x00ff0000) >> 8) |
656 ((value & 0xff000000) >> 24);
657 }
658
659
660
661
662 /*************************************************
663 * Check match or recursion limit *
664 *************************************************/
665
666 static int
667 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
668 int start_offset, int options, int *use_offsets, int use_size_offsets,
669 int flag, unsigned long int *limit, int errnumber, const char *msg)
670 {
671 int count;
672 int min = 0;
673 int mid = 64;
674 int max = -1;
675
676 extra->flags |= flag;
677
678 for (;;)
679 {
680 *limit = mid;
681
682 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
683 use_offsets, use_size_offsets);
684
685 if (count == errnumber)
686 {
687 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
688 min = mid;
689 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
690 }
691
692 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
693 count == PCRE_ERROR_PARTIAL)
694 {
695 if (mid == min + 1)
696 {
697 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
698 break;
699 }
700 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
701 max = mid;
702 mid = (min + mid)/2;
703 }
704 else break; /* Some other error */
705 }
706
707 extra->flags &= ~flag;
708 return count;
709 }
710
711
712
713 /*************************************************
714 * Case-independent strncmp() function *
715 *************************************************/
716
717 /*
718 Arguments:
719 s first string
720 t second string
721 n number of characters to compare
722
723 Returns: < 0, = 0, or > 0, according to the comparison
724 */
725
726 static int
727 strncmpic(uschar *s, uschar *t, int n)
728 {
729 while (n--)
730 {
731 int c = tolower(*s++) - tolower(*t++);
732 if (c) return c;
733 }
734 return 0;
735 }
736
737
738
739 /*************************************************
740 * Check newline indicator *
741 *************************************************/
742
743 /* This is used both at compile and run-time to check for <xxx> escapes, where
744 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
745 no match.
746
747 Arguments:
748 p points after the leading '<'
749 f file for error message
750
751 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752 */
753
754 static int
755 check_newline(uschar *p, FILE *f)
756 {
757 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 fprintf(f, "Unknown newline type at: <%s\n", p);
765 return 0;
766 }
767
768
769
770 /*************************************************
771 * Usage function *
772 *************************************************/
773
774 static void
775 usage(void)
776 {
777 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778 printf("Input and output default to stdin and stdout.\n");
779 #ifdef SUPPORT_LIBREADLINE
780 printf("If input is a terminal, readline() is used to read from it.\n");
781 #else
782 printf("This version of pcretest is not linked with readline().\n");
783 #endif
784 printf("\nOptions:\n");
785 printf(" -b show compiled code (bytecode)\n");
786 printf(" -C show PCRE compile-time options and exit\n");
787 printf(" -d debug: show compiled code and information (-b and -i)\n");
788 #if !defined NODFA
789 printf(" -dfa force DFA matching for all subjects\n");
790 #endif
791 printf(" -help show usage information\n");
792 printf(" -i show information about compiled patterns\n"
793 " -M find MATCH_LIMIT minimum for each subject\n"
794 " -m output memory used information\n"
795 " -o <n> set size of offsets vector to <n>\n");
796 #if !defined NOPOSIX
797 printf(" -p use POSIX interface\n");
798 #endif
799 printf(" -q quiet: do not output PCRE version number at start\n");
800 printf(" -S <n> set stack size to <n> megabytes\n");
801 printf(" -s output store (memory) used information\n"
802 " -t time compilation and execution\n");
803 printf(" -t <n> time compilation and execution, repeating <n> times\n");
804 printf(" -tm time execution (matching) only\n");
805 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806 }
807
808
809
810 /*************************************************
811 * Main Program *
812 *************************************************/
813
814 /* Read lines from named file or stdin and write to named file or stdout; lines
815 consist of a regular expression, in delimiters and optionally followed by
816 options, followed by a set of test data, terminated by an empty line. */
817
818 int main(int argc, char **argv)
819 {
820 FILE *infile = stdin;
821 int options = 0;
822 int study_options = 0;
823 int default_find_match_limit = FALSE;
824 int op = 1;
825 int timeit = 0;
826 int timeitm = 0;
827 int showinfo = 0;
828 int showstore = 0;
829 int quiet = 0;
830 int size_offsets = 45;
831 int size_offsets_max;
832 int *offsets = NULL;
833 #if !defined NOPOSIX
834 int posix = 0;
835 #endif
836 int debug = 0;
837 int done = 0;
838 int all_use_dfa = 0;
839 int yield = 0;
840 int stack_size;
841
842 /* These vectors store, end-to-end, a list of captured substring names. Assume
843 that 1024 is plenty long enough for the few names we'll be testing. */
844
845 uschar copynames[1024];
846 uschar getnames[1024];
847
848 uschar *copynamesptr;
849 uschar *getnamesptr;
850
851 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 when I am debugging. They grow automatically when very long lines are read. */
853
854 buffer = (unsigned char *)malloc(buffer_size);
855 dbuffer = (unsigned char *)malloc(buffer_size);
856 pbuffer = (unsigned char *)malloc(buffer_size);
857
858 /* The outfile variable is static so that new_malloc can use it. */
859
860 outfile = stdout;
861
862 /* The following _setmode() stuff is some Windows magic that tells its runtime
863 library to translate CRLF into a single LF character. At least, that's what
864 I've been told: never having used Windows I take this all on trust. Originally
865 it set 0x8000, but then I was advised that _O_BINARY was better. */
866
867 #if defined(_WIN32) || defined(WIN32)
868 _setmode( _fileno( stdout ), _O_BINARY );
869 #endif
870
871 /* Scan options */
872
873 while (argc > 1 && argv[op][0] == '-')
874 {
875 unsigned char *endptr;
876
877 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878 showstore = 1;
879 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 #if !defined NODFA
885 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 #endif
887 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889 *endptr == 0))
890 {
891 op++;
892 argc--;
893 }
894 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895 {
896 int both = argv[op][2] == 0;
897 int temp;
898 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899 *endptr == 0))
900 {
901 timeitm = temp;
902 op++;
903 argc--;
904 }
905 else timeitm = LOOPREPEAT;
906 if (both) timeit = timeitm;
907 }
908 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910 *endptr == 0))
911 {
912 #if defined(_WIN32) || defined(WIN32)
913 printf("PCRE: -S not supported on this OS\n");
914 exit(1);
915 #else
916 int rc;
917 struct rlimit rlim;
918 getrlimit(RLIMIT_STACK, &rlim);
919 rlim.rlim_cur = stack_size * 1024 * 1024;
920 rc = setrlimit(RLIMIT_STACK, &rlim);
921 if (rc != 0)
922 {
923 printf("PCRE: setrlimit() failed with error %d\n", rc);
924 exit(1);
925 }
926 op++;
927 argc--;
928 #endif
929 }
930 #if !defined NOPOSIX
931 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 #endif
933 else if (strcmp(argv[op], "-C") == 0)
934 {
935 int rc;
936 unsigned long int lrc;
937 printf("PCRE version %s\n", pcre_version());
938 printf("Compiled with\n");
939 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940 printf(" %sUTF-8 support\n", rc? "" : "No ");
941 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942 printf(" %sUnicode properties support\n", rc? "" : "No ");
943 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 /* Note that these values are always the ASCII values, even
945 in EBCDIC environments. CR is 13 and NL is 10. */
946 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 (rc == -2)? "ANYCRLF" :
949 (rc == -1)? "ANY" : "???");
950 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952 "all Unicode newlines");
953 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954 printf(" Internal link size = %d\n", rc);
955 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956 printf(" POSIX malloc threshold = %d\n", rc);
957 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958 printf(" Default match limit = %ld\n", lrc);
959 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960 printf(" Default recursion depth limit = %ld\n", lrc);
961 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 goto EXIT;
964 }
965 else if (strcmp(argv[op], "-help") == 0 ||
966 strcmp(argv[op], "--help") == 0)
967 {
968 usage();
969 goto EXIT;
970 }
971 else
972 {
973 printf("** Unknown or malformed option %s\n", argv[op]);
974 usage();
975 yield = 1;
976 goto EXIT;
977 }
978 op++;
979 argc--;
980 }
981
982 /* Get the store for the offsets vector, and remember what it was */
983
984 size_offsets_max = size_offsets;
985 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 if (offsets == NULL)
987 {
988 printf("** Failed to get %d bytes of memory for offsets vector\n",
989 (int)(size_offsets_max * sizeof(int)));
990 yield = 1;
991 goto EXIT;
992 }
993
994 /* Sort out the input and output files */
995
996 if (argc > 1)
997 {
998 infile = fopen(argv[op], INPUT_MODE);
999 if (infile == NULL)
1000 {
1001 printf("** Failed to open %s\n", argv[op]);
1002 yield = 1;
1003 goto EXIT;
1004 }
1005 }
1006
1007 if (argc > 2)
1008 {
1009 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 if (outfile == NULL)
1011 {
1012 printf("** Failed to open %s\n", argv[op+1]);
1013 yield = 1;
1014 goto EXIT;
1015 }
1016 }
1017
1018 /* Set alternative malloc function */
1019
1020 pcre_malloc = new_malloc;
1021 pcre_free = new_free;
1022 pcre_stack_malloc = stack_malloc;
1023 pcre_stack_free = stack_free;
1024
1025 /* Heading line unless quiet, then prompt for first regex if stdin */
1026
1027 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028
1029 /* Main loop */
1030
1031 while (!done)
1032 {
1033 pcre *re = NULL;
1034 pcre_extra *extra = NULL;
1035
1036 #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 regex_t preg;
1038 int do_posix = 0;
1039 #endif
1040
1041 const char *error;
1042 unsigned char *p, *pp, *ppp;
1043 unsigned char *to_file = NULL;
1044 const unsigned char *tables = NULL;
1045 unsigned long int true_size, true_study_size = 0;
1046 size_t size, regex_gotten_store;
1047 int do_study = 0;
1048 int do_debug = debug;
1049 int do_G = 0;
1050 int do_g = 0;
1051 int do_showinfo = showinfo;
1052 int do_showrest = 0;
1053 int do_flip = 0;
1054 int erroroffset, len, delimiter, poffset;
1055
1056 use_utf8 = 0;
1057 debug_lengths = 1;
1058
1059 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1060 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1061 fflush(outfile);
1062
1063 p = buffer;
1064 while (isspace(*p)) p++;
1065 if (*p == 0) continue;
1066
1067 /* See if the pattern is to be loaded pre-compiled from a file. */
1068
1069 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1070 {
1071 unsigned long int magic, get_options;
1072 uschar sbuf[8];
1073 FILE *f;
1074
1075 p++;
1076 pp = p + (int)strlen((char *)p);
1077 while (isspace(pp[-1])) pp--;
1078 *pp = 0;
1079
1080 f = fopen((char *)p, "rb");
1081 if (f == NULL)
1082 {
1083 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1084 continue;
1085 }
1086
1087 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1088
1089 true_size =
1090 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1091 true_study_size =
1092 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1093
1094 re = (real_pcre *)new_malloc(true_size);
1095 regex_gotten_store = gotten_store;
1096
1097 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1098
1099 magic = ((real_pcre *)re)->magic_number;
1100 if (magic != MAGIC_NUMBER)
1101 {
1102 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1103 {
1104 do_flip = 1;
1105 }
1106 else
1107 {
1108 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1109 fclose(f);
1110 continue;
1111 }
1112 }
1113
1114 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1115 do_flip? " (byte-inverted)" : "", p);
1116
1117 /* Need to know if UTF-8 for printing data strings */
1118
1119 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1120 use_utf8 = (get_options & PCRE_UTF8) != 0;
1121
1122 /* Now see if there is any following study data */
1123
1124 if (true_study_size != 0)
1125 {
1126 pcre_study_data *psd;
1127
1128 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1129 extra->flags = PCRE_EXTRA_STUDY_DATA;
1130
1131 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1132 extra->study_data = psd;
1133
1134 if (fread(psd, 1, true_study_size, f) != true_study_size)
1135 {
1136 FAIL_READ:
1137 fprintf(outfile, "Failed to read data from %s\n", p);
1138 if (extra != NULL) new_free(extra);
1139 if (re != NULL) new_free(re);
1140 fclose(f);
1141 continue;
1142 }
1143 fprintf(outfile, "Study data loaded from %s\n", p);
1144 do_study = 1; /* To get the data output if requested */
1145 }
1146 else fprintf(outfile, "No study data\n");
1147
1148 fclose(f);
1149 goto SHOW_INFO;
1150 }
1151
1152 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1153 the pattern; if is isn't complete, read more. */
1154
1155 delimiter = *p++;
1156
1157 if (isalnum(delimiter) || delimiter == '\\')
1158 {
1159 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1160 goto SKIP_DATA;
1161 }
1162
1163 pp = p;
1164 poffset = p - buffer;
1165
1166 for(;;)
1167 {
1168 while (*pp != 0)
1169 {
1170 if (*pp == '\\' && pp[1] != 0) pp++;
1171 else if (*pp == delimiter) break;
1172 pp++;
1173 }
1174 if (*pp != 0) break;
1175 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1176 {
1177 fprintf(outfile, "** Unexpected EOF\n");
1178 done = 1;
1179 goto CONTINUE;
1180 }
1181 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1182 }
1183
1184 /* The buffer may have moved while being extended; reset the start of data
1185 pointer to the correct relative point in the buffer. */
1186
1187 p = buffer + poffset;
1188
1189 /* If the first character after the delimiter is backslash, make
1190 the pattern end with backslash. This is purely to provide a way
1191 of testing for the error message when a pattern ends with backslash. */
1192
1193 if (pp[1] == '\\') *pp++ = '\\';
1194
1195 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1196 for callouts. */
1197
1198 *pp++ = 0;
1199 strcpy((char *)pbuffer, (char *)p);
1200
1201 /* Look for options after final delimiter */
1202
1203 options = 0;
1204 study_options = 0;
1205 log_store = showstore; /* default from command line */
1206
1207 while (*pp != 0)
1208 {
1209 switch (*pp++)
1210 {
1211 case 'f': options |= PCRE_FIRSTLINE; break;
1212 case 'g': do_g = 1; break;
1213 case 'i': options |= PCRE_CASELESS; break;
1214 case 'm': options |= PCRE_MULTILINE; break;
1215 case 's': options |= PCRE_DOTALL; break;
1216 case 'x': options |= PCRE_EXTENDED; break;
1217
1218 case '+': do_showrest = 1; break;
1219 case 'A': options |= PCRE_ANCHORED; break;
1220 case 'B': do_debug = 1; break;
1221 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1222 case 'D': do_debug = do_showinfo = 1; break;
1223 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1224 case 'F': do_flip = 1; break;
1225 case 'G': do_G = 1; break;
1226 case 'I': do_showinfo = 1; break;
1227 case 'J': options |= PCRE_DUPNAMES; break;
1228 case 'M': log_store = 1; break;
1229 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1230
1231 #if !defined NOPOSIX
1232 case 'P': do_posix = 1; break;
1233 #endif
1234
1235 case 'S': do_study = 1; break;
1236 case 'U': options |= PCRE_UNGREEDY; break;
1237 case 'X': options |= PCRE_EXTRA; break;
1238 case 'Z': debug_lengths = 0; break;
1239 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1240 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1241
1242 case 'L':
1243 ppp = pp;
1244 /* The '\r' test here is so that it works on Windows. */
1245 /* The '0' test is just in case this is an unterminated line. */
1246 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1247 *ppp = 0;
1248 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1249 {
1250 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1251 goto SKIP_DATA;
1252 }
1253 locale_set = 1;
1254 tables = pcre_maketables();
1255 pp = ppp;
1256 break;
1257
1258 case '>':
1259 to_file = pp;
1260 while (*pp != 0) pp++;
1261 while (isspace(pp[-1])) pp--;
1262 *pp = 0;
1263 break;
1264
1265 case '<':
1266 {
1267 if (strncmp((char *)pp, "JS>", 3) == 0)
1268 {
1269 options |= PCRE_JAVASCRIPT_COMPAT;
1270 pp += 3;
1271 }
1272 else
1273 {
1274 int x = check_newline(pp, outfile);
1275 if (x == 0) goto SKIP_DATA;
1276 options |= x;
1277 while (*pp++ != '>');
1278 }
1279 }
1280 break;
1281
1282 case '\r': /* So that it works in Windows */
1283 case '\n':
1284 case ' ':
1285 break;
1286
1287 default:
1288 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1289 goto SKIP_DATA;
1290 }
1291 }
1292
1293 /* Handle compiling via the POSIX interface, which doesn't support the
1294 timing, showing, or debugging options, nor the ability to pass over
1295 local character tables. */
1296
1297 #if !defined NOPOSIX
1298 if (posix || do_posix)
1299 {
1300 int rc;
1301 int cflags = 0;
1302
1303 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1304 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1305 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1306 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1307 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1308 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1309
1310 rc = regcomp(&preg, (char *)p, cflags);
1311
1312 /* Compilation failed; go back for another re, skipping to blank line
1313 if non-interactive. */
1314
1315 if (rc != 0)
1316 {
1317 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1318 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1319 goto SKIP_DATA;
1320 }
1321 }
1322
1323 /* Handle compiling via the native interface */
1324
1325 else
1326 #endif /* !defined NOPOSIX */
1327
1328 {
1329 unsigned long int get_options;
1330
1331 if (timeit > 0)
1332 {
1333 register int i;
1334 clock_t time_taken;
1335 clock_t start_time = clock();
1336 for (i = 0; i < timeit; i++)
1337 {
1338 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1339 if (re != NULL) free(re);
1340 }
1341 time_taken = clock() - start_time;
1342 fprintf(outfile, "Compile time %.4f milliseconds\n",
1343 (((double)time_taken * 1000.0) / (double)timeit) /
1344 (double)CLOCKS_PER_SEC);
1345 }
1346
1347 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1348
1349 /* Compilation failed; go back for another re, skipping to blank line
1350 if non-interactive. */
1351
1352 if (re == NULL)
1353 {
1354 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1355 SKIP_DATA:
1356 if (infile != stdin)
1357 {
1358 for (;;)
1359 {
1360 if (extend_inputline(infile, buffer, NULL) == NULL)
1361 {
1362 done = 1;
1363 goto CONTINUE;
1364 }
1365 len = (int)strlen((char *)buffer);
1366 while (len > 0 && isspace(buffer[len-1])) len--;
1367 if (len == 0) break;
1368 }
1369 fprintf(outfile, "\n");
1370 }
1371 goto CONTINUE;
1372 }
1373
1374 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1375 within the regex; check for this so that we know how to process the data
1376 lines. */
1377
1378 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1379 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1380
1381 /* Print information if required. There are now two info-returning
1382 functions. The old one has a limited interface and returns only limited
1383 data. Check that it agrees with the newer one. */
1384
1385 if (log_store)
1386 fprintf(outfile, "Memory allocation (code space): %d\n",
1387 (int)(gotten_store -
1388 sizeof(real_pcre) -
1389 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1390
1391 /* Extract the size for possible writing before possibly flipping it,
1392 and remember the store that was got. */
1393
1394 true_size = ((real_pcre *)re)->size;
1395 regex_gotten_store = gotten_store;
1396
1397 /* If /S was present, study the regexp to generate additional info to
1398 help with the matching. */
1399
1400 if (do_study)
1401 {
1402 if (timeit > 0)
1403 {
1404 register int i;
1405 clock_t time_taken;
1406 clock_t start_time = clock();
1407 for (i = 0; i < timeit; i++)
1408 extra = pcre_study(re, study_options, &error);
1409 time_taken = clock() - start_time;
1410 if (extra != NULL) free(extra);
1411 fprintf(outfile, " Study time %.4f milliseconds\n",
1412 (((double)time_taken * 1000.0) / (double)timeit) /
1413 (double)CLOCKS_PER_SEC);
1414 }
1415 extra = pcre_study(re, study_options, &error);
1416 if (error != NULL)
1417 fprintf(outfile, "Failed to study: %s\n", error);
1418 else if (extra != NULL)
1419 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1420 }
1421
1422 /* If the 'F' option was present, we flip the bytes of all the integer
1423 fields in the regex data block and the study block. This is to make it
1424 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1425 compiled on a different architecture. */
1426
1427 if (do_flip)
1428 {
1429 real_pcre *rre = (real_pcre *)re;
1430 rre->magic_number =
1431 byteflip(rre->magic_number, sizeof(rre->magic_number));
1432 rre->size = byteflip(rre->size, sizeof(rre->size));
1433 rre->options = byteflip(rre->options, sizeof(rre->options));
1434 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1435 rre->top_bracket =
1436 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1437 rre->top_backref =
1438 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1439 rre->first_byte =
1440 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1441 rre->req_byte =
1442 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1443 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1444 sizeof(rre->name_table_offset));
1445 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1446 sizeof(rre->name_entry_size));
1447 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1448 sizeof(rre->name_count));
1449
1450 if (extra != NULL)
1451 {
1452 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1453 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1454 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1455 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1456 }
1457 }
1458
1459 /* Extract information from the compiled data if required */
1460
1461 SHOW_INFO:
1462
1463 if (do_debug)
1464 {
1465 fprintf(outfile, "------------------------------------------------------------------\n");
1466 pcre_printint(re, outfile, debug_lengths);
1467 }
1468
1469 /* We already have the options in get_options (see above) */
1470
1471 if (do_showinfo)
1472 {
1473 unsigned long int all_options;
1474 #if !defined NOINFOCHECK
1475 int old_first_char, old_options, old_count;
1476 #endif
1477 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1478 hascrorlf;
1479 int nameentrysize, namecount;
1480 const uschar *nametable;
1481
1482 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1483 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1484 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1485 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1486 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1487 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1488 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1489 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1490 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1491 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1492 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1493
1494 #if !defined NOINFOCHECK
1495 old_count = pcre_info(re, &old_options, &old_first_char);
1496 if (count < 0) fprintf(outfile,
1497 "Error %d from pcre_info()\n", count);
1498 else
1499 {
1500 if (old_count != count) fprintf(outfile,
1501 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1502 old_count);
1503
1504 if (old_first_char != first_char) fprintf(outfile,
1505 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1506 first_char, old_first_char);
1507
1508 if (old_options != (int)get_options) fprintf(outfile,
1509 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1510 get_options, old_options);
1511 }
1512 #endif
1513
1514 if (size != regex_gotten_store) fprintf(outfile,
1515 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1516 (int)size, (int)regex_gotten_store);
1517
1518 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1519 if (backrefmax > 0)
1520 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1521
1522 if (namecount > 0)
1523 {
1524 fprintf(outfile, "Named capturing subpatterns:\n");
1525 while (namecount-- > 0)
1526 {
1527 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1528 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1529 GET2(nametable, 0));
1530 nametable += nameentrysize;
1531 }
1532 }
1533
1534 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1535 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1536
1537 all_options = ((real_pcre *)re)->options;
1538 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1539
1540 if (get_options == 0) fprintf(outfile, "No options\n");
1541 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1542 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1543 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1544 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1545 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1546 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1547 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1548 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1549 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1550 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1551 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1552 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1553 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1554 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1555 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1556 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1557
1558 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1559
1560 switch (get_options & PCRE_NEWLINE_BITS)
1561 {
1562 case PCRE_NEWLINE_CR:
1563 fprintf(outfile, "Forced newline sequence: CR\n");
1564 break;
1565
1566 case PCRE_NEWLINE_LF:
1567 fprintf(outfile, "Forced newline sequence: LF\n");
1568 break;
1569
1570 case PCRE_NEWLINE_CRLF:
1571 fprintf(outfile, "Forced newline sequence: CRLF\n");
1572 break;
1573
1574 case PCRE_NEWLINE_ANYCRLF:
1575 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1576 break;
1577
1578 case PCRE_NEWLINE_ANY:
1579 fprintf(outfile, "Forced newline sequence: ANY\n");
1580 break;
1581
1582 default:
1583 break;
1584 }
1585
1586 if (first_char == -1)
1587 {
1588 fprintf(outfile, "First char at start or follows newline\n");
1589 }
1590 else if (first_char < 0)
1591 {
1592 fprintf(outfile, "No first char\n");
1593 }
1594 else
1595 {
1596 int ch = first_char & 255;
1597 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1598 "" : " (caseless)";
1599 if (PRINTHEX(ch))
1600 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1601 else
1602 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1603 }
1604
1605 if (need_char < 0)
1606 {
1607 fprintf(outfile, "No need char\n");
1608 }
1609 else
1610 {
1611 int ch = need_char & 255;
1612 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1613 "" : " (caseless)";
1614 if (PRINTHEX(ch))
1615 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1616 else
1617 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1618 }
1619
1620 /* Don't output study size; at present it is in any case a fixed
1621 value, but it varies, depending on the computer architecture, and
1622 so messes up the test suite. (And with the /F option, it might be
1623 flipped.) */
1624
1625 if (do_study)
1626 {
1627 if (extra == NULL)
1628 fprintf(outfile, "Study returned NULL\n");
1629 else
1630 {
1631 uschar *start_bits = NULL;
1632 int minlength;
1633
1634 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1635 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1636
1637 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1638 if (start_bits == NULL)
1639 fprintf(outfile, "No set of starting bytes\n");
1640 else
1641 {
1642 int i;
1643 int c = 24;
1644 fprintf(outfile, "Starting byte set: ");
1645 for (i = 0; i < 256; i++)
1646 {
1647 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1648 {
1649 if (c > 75)
1650 {
1651 fprintf(outfile, "\n ");
1652 c = 2;
1653 }
1654 if (PRINTHEX(i) && i != ' ')
1655 {
1656 fprintf(outfile, "%c ", i);
1657 c += 2;
1658 }
1659 else
1660 {
1661 fprintf(outfile, "\\x%02x ", i);
1662 c += 5;
1663 }
1664 }
1665 }
1666 fprintf(outfile, "\n");
1667 }
1668 }
1669 }
1670 }
1671
1672 /* If the '>' option was present, we write out the regex to a file, and
1673 that is all. The first 8 bytes of the file are the regex length and then
1674 the study length, in big-endian order. */
1675
1676 if (to_file != NULL)
1677 {
1678 FILE *f = fopen((char *)to_file, "wb");
1679 if (f == NULL)
1680 {
1681 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1682 }
1683 else
1684 {
1685 uschar sbuf[8];
1686 sbuf[0] = (uschar)((true_size >> 24) & 255);
1687 sbuf[1] = (uschar)((true_size >> 16) & 255);
1688 sbuf[2] = (uschar)((true_size >> 8) & 255);
1689 sbuf[3] = (uschar)((true_size) & 255);
1690
1691 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1692 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1693 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1694 sbuf[7] = (uschar)((true_study_size) & 255);
1695
1696 if (fwrite(sbuf, 1, 8, f) < 8 ||
1697 fwrite(re, 1, true_size, f) < true_size)
1698 {
1699 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1700 }
1701 else
1702 {
1703 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1704 if (extra != NULL)
1705 {
1706 if (fwrite(extra->study_data, 1, true_study_size, f) <
1707 true_study_size)
1708 {
1709 fprintf(outfile, "Write error on %s: %s\n", to_file,
1710 strerror(errno));
1711 }
1712 else fprintf(outfile, "Study data written to %s\n", to_file);
1713
1714 }
1715 }
1716 fclose(f);
1717 }
1718
1719 new_free(re);
1720 if (extra != NULL) new_free(extra);
1721 if (tables != NULL) new_free((void *)tables);
1722 continue; /* With next regex */
1723 }
1724 } /* End of non-POSIX compile */
1725
1726 /* Read data lines and test them */
1727
1728 for (;;)
1729 {
1730 uschar *q;
1731 uschar *bptr;
1732 int *use_offsets = offsets;
1733 int use_size_offsets = size_offsets;
1734 int callout_data = 0;
1735 int callout_data_set = 0;
1736 int count, c;
1737 int copystrings = 0;
1738 int find_match_limit = default_find_match_limit;
1739 int getstrings = 0;
1740 int getlist = 0;
1741 int gmatched = 0;
1742 int start_offset = 0;
1743 int g_notempty = 0;
1744 int use_dfa = 0;
1745
1746 options = 0;
1747
1748 *copynames = 0;
1749 *getnames = 0;
1750
1751 copynamesptr = copynames;
1752 getnamesptr = getnames;
1753
1754 pcre_callout = callout;
1755 first_callout = 1;
1756 callout_extra = 0;
1757 callout_count = 0;
1758 callout_fail_count = 999999;
1759 callout_fail_id = -1;
1760 show_malloc = 0;
1761
1762 if (extra != NULL) extra->flags &=
1763 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1764
1765 len = 0;
1766 for (;;)
1767 {
1768 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1769 {
1770 if (len > 0) break;
1771 done = 1;
1772 goto CONTINUE;
1773 }
1774 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1775 len = (int)strlen((char *)buffer);
1776 if (buffer[len-1] == '\n') break;
1777 }
1778
1779 while (len > 0 && isspace(buffer[len-1])) len--;
1780 buffer[len] = 0;
1781 if (len == 0) break;
1782
1783 p = buffer;
1784 while (isspace(*p)) p++;
1785
1786 bptr = q = dbuffer;
1787 while ((c = *p++) != 0)
1788 {
1789 int i = 0;
1790 int n = 0;
1791
1792 if (c == '\\') switch ((c = *p++))
1793 {
1794 case 'a': c = 7; break;
1795 case 'b': c = '\b'; break;
1796 case 'e': c = 27; break;
1797 case 'f': c = '\f'; break;
1798 case 'n': c = '\n'; break;
1799 case 'r': c = '\r'; break;
1800 case 't': c = '\t'; break;
1801 case 'v': c = '\v'; break;
1802
1803 case '0': case '1': case '2': case '3':
1804 case '4': case '5': case '6': case '7':
1805 c -= '0';
1806 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1807 c = c * 8 + *p++ - '0';
1808
1809 #if !defined NOUTF8
1810 if (use_utf8 && c > 255)
1811 {
1812 unsigned char buff8[8];
1813 int ii, utn;
1814 utn = ord2utf8(c, buff8);
1815 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1816 c = buff8[ii]; /* Last byte */
1817 }
1818 #endif
1819 break;
1820
1821 case 'x':
1822
1823 /* Handle \x{..} specially - new Perl thing for utf8 */
1824
1825 #if !defined NOUTF8
1826 if (*p == '{')
1827 {
1828 unsigned char *pt = p;
1829 c = 0;
1830 while (isxdigit(*(++pt)))
1831 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1832 if (*pt == '}')
1833 {
1834 unsigned char buff8[8];
1835 int ii, utn;
1836 if (use_utf8)
1837 {
1838 utn = ord2utf8(c, buff8);
1839 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1840 c = buff8[ii]; /* Last byte */
1841 }
1842 else
1843 {
1844 if (c > 255)
1845 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1846 "UTF-8 mode is not enabled.\n"
1847 "** Truncation will probably give the wrong result.\n", c);
1848 }
1849 p = pt + 1;
1850 break;
1851 }
1852 /* Not correct form; fall through */
1853 }
1854 #endif
1855
1856 /* Ordinary \x */
1857
1858 c = 0;
1859 while (i++ < 2 && isxdigit(*p))
1860 {
1861 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1862 p++;
1863 }
1864 break;
1865
1866 case 0: /* \ followed by EOF allows for an empty line */
1867 p--;
1868 continue;
1869
1870 case '>':
1871 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1872 continue;
1873
1874 case 'A': /* Option setting */
1875 options |= PCRE_ANCHORED;
1876 continue;
1877
1878 case 'B':
1879 options |= PCRE_NOTBOL;
1880 continue;
1881
1882 case 'C':
1883 if (isdigit(*p)) /* Set copy string */
1884 {
1885 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1886 copystrings |= 1 << n;
1887 }
1888 else if (isalnum(*p))
1889 {
1890 uschar *npp = copynamesptr;
1891 while (isalnum(*p)) *npp++ = *p++;
1892 *npp++ = 0;
1893 *npp = 0;
1894 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1895 if (n < 0)
1896 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1897 copynamesptr = npp;
1898 }
1899 else if (*p == '+')
1900 {
1901 callout_extra = 1;
1902 p++;
1903 }
1904 else if (*p == '-')
1905 {
1906 pcre_callout = NULL;
1907 p++;
1908 }
1909 else if (*p == '!')
1910 {
1911 callout_fail_id = 0;
1912 p++;
1913 while(isdigit(*p))
1914 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1915 callout_fail_count = 0;
1916 if (*p == '!')
1917 {
1918 p++;
1919 while(isdigit(*p))
1920 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1921 }
1922 }
1923 else if (*p == '*')
1924 {
1925 int sign = 1;
1926 callout_data = 0;
1927 if (*(++p) == '-') { sign = -1; p++; }
1928 while(isdigit(*p))
1929 callout_data = callout_data * 10 + *p++ - '0';
1930 callout_data *= sign;
1931 callout_data_set = 1;
1932 }
1933 continue;
1934
1935 #if !defined NODFA
1936 case 'D':
1937 #if !defined NOPOSIX
1938 if (posix || do_posix)
1939 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1940 else
1941 #endif
1942 use_dfa = 1;
1943 continue;
1944
1945 case 'F':
1946 options |= PCRE_DFA_SHORTEST;
1947 continue;
1948 #endif
1949
1950 case 'G':
1951 if (isdigit(*p))
1952 {
1953 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1954 getstrings |= 1 << n;
1955 }
1956 else if (isalnum(*p))
1957 {
1958 uschar *npp = getnamesptr;
1959 while (isalnum(*p)) *npp++ = *p++;
1960 *npp++ = 0;
1961 *npp = 0;
1962 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1963 if (n < 0)
1964 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1965 getnamesptr = npp;
1966 }
1967 continue;
1968
1969 case 'L':
1970 getlist = 1;
1971 continue;
1972
1973 case 'M':
1974 find_match_limit = 1;
1975 continue;
1976
1977 case 'N':
1978 if ((options & PCRE_NOTEMPTY) != 0)
1979 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1980 else
1981 options |= PCRE_NOTEMPTY;
1982 continue;
1983
1984 case 'O':
1985 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1986 if (n > size_offsets_max)
1987 {
1988 size_offsets_max = n;
1989 free(offsets);
1990 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1991 if (offsets == NULL)
1992 {
1993 printf("** Failed to get %d bytes of memory for offsets vector\n",
1994 (int)(size_offsets_max * sizeof(int)));
1995 yield = 1;
1996 goto EXIT;
1997 }
1998 }
1999 use_size_offsets = n;
2000 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2001 continue;
2002
2003 case 'P':
2004 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2005 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2006 continue;
2007
2008 case 'Q':
2009 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2010 if (extra == NULL)
2011 {
2012 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2013 extra->flags = 0;
2014 }
2015 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2016 extra->match_limit_recursion = n;
2017 continue;
2018
2019 case 'q':
2020 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2021 if (extra == NULL)
2022 {
2023 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024 extra->flags = 0;
2025 }
2026 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2027 extra->match_limit = n;
2028 continue;
2029
2030 #if !defined NODFA
2031 case 'R':
2032 options |= PCRE_DFA_RESTART;
2033 continue;
2034 #endif
2035
2036 case 'S':
2037 show_malloc = 1;
2038 continue;
2039
2040 case 'Y':
2041 options |= PCRE_NO_START_OPTIMIZE;
2042 continue;
2043
2044 case 'Z':
2045 options |= PCRE_NOTEOL;
2046 continue;
2047
2048 case '?':
2049 options |= PCRE_NO_UTF8_CHECK;
2050 continue;
2051
2052 case '<':
2053 {
2054 int x = check_newline(p, outfile);
2055 if (x == 0) goto NEXT_DATA;
2056 options |= x;
2057 while (*p++ != '>');
2058 }
2059 continue;
2060 }
2061 *q++ = c;
2062 }
2063 *q = 0;
2064 len = q - dbuffer;
2065
2066 /* Move the data to the end of the buffer so that a read over the end of
2067 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2068 we are using the POSIX interface, we must include the terminating zero. */
2069
2070 #if !defined NOPOSIX
2071 if (posix || do_posix)
2072 {
2073 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2074 bptr += buffer_size - len - 1;
2075 }
2076 else
2077 #endif
2078 {
2079 memmove(bptr + buffer_size - len, bptr, len);
2080 bptr += buffer_size - len;
2081 }
2082
2083 if ((all_use_dfa || use_dfa) && find_match_limit)
2084 {
2085 printf("**Match limit not relevant for DFA matching: ignored\n");
2086 find_match_limit = 0;
2087 }
2088
2089 /* Handle matching via the POSIX interface, which does not
2090 support timing or playing with the match limit or callout data. */
2091
2092 #if !defined NOPOSIX
2093 if (posix || do_posix)
2094 {
2095 int rc;
2096 int eflags = 0;
2097 regmatch_t *pmatch = NULL;
2098 if (use_size_offsets > 0)
2099 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2100 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2101 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2102 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2103
2104 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2105
2106 if (rc != 0)
2107 {
2108 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2109 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2110 }
2111 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2112 != 0)
2113 {
2114 fprintf(outfile, "Matched with REG_NOSUB\n");
2115 }
2116 else
2117 {
2118 size_t i;
2119 for (i = 0; i < (size_t)use_size_offsets; i++)
2120 {
2121 if (pmatch[i].rm_so >= 0)
2122 {
2123 fprintf(outfile, "%2d: ", (int)i);
2124 (void)pchars(dbuffer + pmatch[i].rm_so,
2125 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2126 fprintf(outfile, "\n");
2127 if (i == 0 && do_showrest)
2128 {
2129 fprintf(outfile, " 0+ ");
2130 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2131 outfile);
2132 fprintf(outfile, "\n");
2133 }
2134 }
2135 }
2136 }
2137 free(pmatch);
2138 }
2139
2140 /* Handle matching via the native interface - repeats for /g and /G */
2141
2142 else
2143 #endif /* !defined NOPOSIX */
2144
2145 for (;; gmatched++) /* Loop for /g or /G */
2146 {
2147 if (timeitm > 0)
2148 {
2149 register int i;
2150 clock_t time_taken;
2151 clock_t start_time = clock();
2152
2153 #if !defined NODFA
2154 if (all_use_dfa || use_dfa)
2155 {
2156 int workspace[1000];
2157 for (i = 0; i < timeitm; i++)
2158 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2159 options | g_notempty, use_offsets, use_size_offsets, workspace,
2160 sizeof(workspace)/sizeof(int));
2161 }
2162 else
2163 #endif
2164
2165 for (i = 0; i < timeitm; i++)
2166 count = pcre_exec(re, extra, (char *)bptr, len,
2167 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2168
2169 time_taken = clock() - start_time;
2170 fprintf(outfile, "Execute time %.4f milliseconds\n",
2171 (((double)time_taken * 1000.0) / (double)timeitm) /
2172 (double)CLOCKS_PER_SEC);
2173 }
2174
2175 /* If find_match_limit is set, we want to do repeated matches with
2176 varying limits in order to find the minimum value for the match limit and
2177 for the recursion limit. */
2178
2179 if (find_match_limit)
2180 {
2181 if (extra == NULL)
2182 {
2183 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2184 extra->flags = 0;
2185 }
2186
2187 (void)check_match_limit(re, extra, bptr, len, start_offset,
2188 options|g_notempty, use_offsets, use_size_offsets,
2189 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2190 PCRE_ERROR_MATCHLIMIT, "match()");
2191
2192 count = check_match_limit(re, extra, bptr, len, start_offset,
2193 options|g_notempty, use_offsets, use_size_offsets,
2194 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2195 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2196 }
2197
2198 /* If callout_data is set, use the interface with additional data */
2199
2200 else if (callout_data_set)
2201 {
2202 if (extra == NULL)
2203 {
2204 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2205 extra->flags = 0;
2206 }
2207 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2208 extra->callout_data = &callout_data;
2209 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2210 options | g_notempty, use_offsets, use_size_offsets);
2211 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2212 }
2213
2214 /* The normal case is just to do the match once, with the default
2215 value of match_limit. */
2216
2217 #if !defined NODFA
2218 else if (all_use_dfa || use_dfa)
2219 {
2220 int workspace[1000];
2221 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2222 options | g_notempty, use_offsets, use_size_offsets, workspace,
2223 sizeof(workspace)/sizeof(int));
2224 if (count == 0)
2225 {
2226 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2227 count = use_size_offsets/2;
2228 }
2229 }
2230 #endif
2231
2232 else
2233 {
2234 count = pcre_exec(re, extra, (char *)bptr, len,
2235 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2236 if (count == 0)
2237 {
2238 fprintf(outfile, "Matched, but too many substrings\n");
2239 count = use_size_offsets/3;
2240 }
2241 }
2242
2243 /* Matched */
2244
2245 if (count >= 0)
2246 {
2247 int i, maxcount;
2248
2249 #if !defined NODFA
2250 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2251 #endif
2252 maxcount = use_size_offsets/3;
2253
2254 /* This is a check against a lunatic return value. */
2255
2256 if (count > maxcount)
2257 {
2258 fprintf(outfile,
2259 "** PCRE error: returned count %d is too big for offset size %d\n",
2260 count, use_size_offsets);
2261 count = use_size_offsets/3;
2262 if (do_g || do_G)
2263 {
2264 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2265 do_g = do_G = FALSE; /* Break g/G loop */
2266 }
2267 }
2268
2269 for (i = 0; i < count * 2; i += 2)
2270 {
2271 if (use_offsets[i] < 0)
2272 fprintf(outfile, "%2d: <unset>\n", i/2);
2273 else
2274 {
2275 fprintf(outfile, "%2d: ", i/2);
2276 (void)pchars(bptr + use_offsets[i],
2277 use_offsets[i+1] - use_offsets[i], outfile);
2278 fprintf(outfile, "\n");
2279 if (i == 0)
2280 {
2281 if (do_showrest)
2282 {
2283 fprintf(outfile, " 0+ ");
2284 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2285 outfile);
2286 fprintf(outfile, "\n");
2287 }
2288 }
2289 }
2290 }
2291
2292 for (i = 0; i < 32; i++)
2293 {
2294 if ((copystrings & (1 << i)) != 0)
2295 {
2296 char copybuffer[256];
2297 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2298 i, copybuffer, sizeof(copybuffer));
2299 if (rc < 0)
2300 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2301 else
2302 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2303 }
2304 }
2305
2306 for (copynamesptr = copynames;
2307 *copynamesptr != 0;
2308 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2309 {
2310 char copybuffer[256];
2311 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2312 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2313 if (rc < 0)
2314 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2315 else
2316 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2317 }
2318
2319 for (i = 0; i < 32; i++)
2320 {
2321 if ((getstrings & (1 << i)) != 0)
2322 {
2323 const char *substring;
2324 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2325 i, &substring);
2326 if (rc < 0)
2327 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2328 else
2329 {
2330 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2331 pcre_free_substring(substring);
2332 }
2333 }
2334 }
2335
2336 for (getnamesptr = getnames;
2337 *getnamesptr != 0;
2338 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2339 {
2340 const char *substring;
2341 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2342 count, (char *)getnamesptr, &substring);
2343 if (rc < 0)
2344 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2345 else
2346 {
2347 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2348 pcre_free_substring(substring);
2349 }
2350 }
2351
2352 if (getlist)
2353 {
2354 const char **stringlist;
2355 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2356 &stringlist);
2357 if (rc < 0)
2358 fprintf(outfile, "get substring list failed %d\n", rc);
2359 else
2360 {
2361 for (i = 0; i < count; i++)
2362 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2363 if (stringlist[i] != NULL)
2364 fprintf(outfile, "string list not terminated by NULL\n");
2365 /* free((void *)stringlist); */
2366 pcre_free_substring_list(stringlist);
2367 }
2368 }
2369 }
2370
2371 /* There was a partial match */
2372
2373 else if (count == PCRE_ERROR_PARTIAL)
2374 {
2375 fprintf(outfile, "Partial match");
2376 if (use_size_offsets > 1)
2377 {
2378 fprintf(outfile, ": ");
2379 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2380 outfile);
2381 }
2382 fprintf(outfile, "\n");
2383 break; /* Out of the /g loop */
2384 }
2385
2386 /* Failed to match. If this is a /g or /G loop and we previously set
2387 g_notempty after a null match, this is not necessarily the end. We want
2388 to advance the start offset, and continue. We won't be at the end of the
2389 string - that was checked before setting g_notempty.
2390
2391 Complication arises in the case when the newline option is "any" or
2392 "anycrlf". If the previous match was at the end of a line terminated by
2393 CRLF, an advance of one character just passes the \r, whereas we should
2394 prefer the longer newline sequence, as does the code in pcre_exec().
2395 Fudge the offset value to achieve this.
2396
2397 Otherwise, in the case of UTF-8 matching, the advance must be one
2398 character, not one byte. */
2399
2400 else
2401 {
2402 if (g_notempty != 0)
2403 {
2404 int onechar = 1;
2405 unsigned int obits = ((real_pcre *)re)->options;
2406 use_offsets[0] = start_offset;
2407 if ((obits & PCRE_NEWLINE_BITS) == 0)
2408 {
2409 int d;
2410 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2411 /* Note that these values are always the ASCII ones, even in
2412 EBCDIC environments. CR = 13, NL = 10. */
2413 obits = (d == 13)? PCRE_NEWLINE_CR :
2414 (d == 10)? PCRE_NEWLINE_LF :
2415 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2416 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2417 (d == -1)? PCRE_NEWLINE_ANY : 0;
2418 }
2419 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2420 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2421 &&
2422 start_offset < len - 1 &&
2423 bptr[start_offset] == '\r' &&
2424 bptr[start_offset+1] == '\n')
2425 onechar++;
2426 else if (use_utf8)
2427 {
2428 while (start_offset + onechar < len)
2429 {
2430 int tb = bptr[start_offset+onechar];
2431 if (tb <= 127) break;
2432 tb &= 0xc0;
2433 if (tb != 0 && tb != 0xc0) onechar++;
2434 }
2435 }
2436 use_offsets[1] = start_offset + onechar;
2437 }
2438 else
2439 {
2440 if (count == PCRE_ERROR_NOMATCH)
2441 {
2442 if (gmatched == 0) fprintf(outfile, "No match\n");
2443 }
2444 else fprintf(outfile, "Error %d\n", count);
2445 break; /* Out of the /g loop */
2446 }
2447 }
2448
2449 /* If not /g or /G we are done */
2450
2451 if (!do_g && !do_G) break;
2452
2453 /* If we have matched an empty string, first check to see if we are at
2454 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2455 Perl's /g options does. This turns out to be rather cunning. First we set
2456 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2457 same point. If this fails (picked up above) we advance to the next
2458 character. */
2459
2460 g_notempty = 0;
2461
2462 if (use_offsets[0] == use_offsets[1])
2463 {
2464 if (use_offsets[0] == len) break;
2465 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2466 }
2467
2468 /* For /g, update the start offset, leaving the rest alone */
2469
2470 if (do_g) start_offset = use_offsets[1];
2471
2472 /* For /G, update the pointer and length */
2473
2474 else
2475 {
2476 bptr += use_offsets[1];
2477 len -= use_offsets[1];
2478 }
2479 } /* End of loop for /g and /G */
2480
2481 NEXT_DATA: continue;
2482 } /* End of loop for data lines */
2483
2484 CONTINUE:
2485
2486 #if !defined NOPOSIX
2487 if (posix || do_posix) regfree(&preg);
2488 #endif
2489
2490 if (re != NULL) new_free(re);
2491 if (extra != NULL) new_free(extra);
2492 if (tables != NULL)
2493 {
2494 new_free((void *)tables);
2495 setlocale(LC_CTYPE, "C");
2496 locale_set = 0;
2497 }
2498 }
2499
2500 if (infile == stdin) fprintf(outfile, "\n");
2501
2502 EXIT:
2503
2504 if (infile != NULL && infile != stdin) fclose(infile);
2505 if (outfile != NULL && outfile != stdout) fclose(outfile);
2506
2507 free(buffer);
2508 free(dbuffer);
2509 free(pbuffer);
2510 free(offsets);
2511
2512 return yield;
2513 }
2514
2515 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12