/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 345 - (show annotations) (download)
Mon Apr 28 15:10:02 2008 UTC (6 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 71563 byte(s)
Tidies for the 7.7-RC1 distribution.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #define isatty _isatty /* This is what Windows calls them, I'm told */
75 #define fileno _fileno
76
77 #else
78 #include <sys/time.h> /* These two includes are needed */
79 #include <sys/resource.h> /* for setrlimit(). */
80 #define INPUT_MODE "rb"
81 #define OUTPUT_MODE "wb"
82 #endif
83
84
85 /* We have to include pcre_internal.h because we need the internal info for
86 displaying the results of pcre_study() and we also need to know about the
87 internal macros, structures, and other internal data values; pcretest has
88 "inside information" compared to a program that strictly follows the PCRE API.
89
90 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92 appropriately for an application, not for building PCRE. */
93
94 #include "pcre.h"
95 #include "pcre_internal.h"
96
97 /* We need access to the data tables that PCRE uses. So as not to have to keep
98 two copies, we include the source file here, changing the names of the external
99 symbols to prevent clashes. */
100
101 #define _pcre_utf8_table1 utf8_table1
102 #define _pcre_utf8_table1_size utf8_table1_size
103 #define _pcre_utf8_table2 utf8_table2
104 #define _pcre_utf8_table3 utf8_table3
105 #define _pcre_utf8_table4 utf8_table4
106 #define _pcre_utt utt
107 #define _pcre_utt_size utt_size
108 #define _pcre_utt_names utt_names
109 #define _pcre_OP_lengths OP_lengths
110
111 #include "pcre_tables.c"
112
113 /* We also need the pcre_printint() function for printing out compiled
114 patterns. This function is in a separate file so that it can be included in
115 pcre_compile.c when that module is compiled with debugging enabled.
116
117 The definition of the macro PRINTABLE, which determines whether to print an
118 output character as-is or as a hex value when showing compiled patterns, is
119 contained in this file. We uses it here also, in cases when the locale has not
120 been explicitly changed, so as to get consistent output from systems that
121 differ in their output from isprint() even in the "C" locale. */
122
123 #include "pcre_printint.src"
124
125 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
126
127
128 /* It is possible to compile this test program without including support for
129 testing the POSIX interface, though this is not available via the standard
130 Makefile. */
131
132 #if !defined NOPOSIX
133 #include "pcreposix.h"
134 #endif
135
136 /* It is also possible, for the benefit of the version currently imported into
137 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
138 interface to the DFA matcher (NODFA), and without the doublecheck of the old
139 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
140 UTF8 support if PCRE is built without it. */
141
142 #ifndef SUPPORT_UTF8
143 #ifndef NOUTF8
144 #define NOUTF8
145 #endif
146 #endif
147
148
149 /* Other parameters */
150
151 #ifndef CLOCKS_PER_SEC
152 #ifdef CLK_TCK
153 #define CLOCKS_PER_SEC CLK_TCK
154 #else
155 #define CLOCKS_PER_SEC 100
156 #endif
157 #endif
158
159 /* This is the default loop count for timing. */
160
161 #define LOOPREPEAT 500000
162
163 /* Static variables */
164
165 static FILE *outfile;
166 static int log_store = 0;
167 static int callout_count;
168 static int callout_extra;
169 static int callout_fail_count;
170 static int callout_fail_id;
171 static int debug_lengths;
172 static int first_callout;
173 static int locale_set = 0;
174 static int show_malloc;
175 static int use_utf8;
176 static size_t gotten_store;
177
178 /* The buffers grow automatically if very long input lines are encountered. */
179
180 static int buffer_size = 50000;
181 static uschar *buffer = NULL;
182 static uschar *dbuffer = NULL;
183 static uschar *pbuffer = NULL;
184
185
186
187 /*************************************************
188 * Read or extend an input line *
189 *************************************************/
190
191 /* Input lines are read into buffer, but both patterns and data lines can be
192 continued over multiple input lines. In addition, if the buffer fills up, we
193 want to automatically expand it so as to be able to handle extremely large
194 lines that are needed for certain stress tests. When the input buffer is
195 expanded, the other two buffers must also be expanded likewise, and the
196 contents of pbuffer, which are a copy of the input for callouts, must be
197 preserved (for when expansion happens for a data line). This is not the most
198 optimal way of handling this, but hey, this is just a test program!
199
200 Arguments:
201 f the file to read
202 start where in buffer to start (this *must* be within buffer)
203 prompt for stdin or readline()
204
205 Returns: pointer to the start of new data
206 could be a copy of start, or could be moved
207 NULL if no data read and EOF reached
208 */
209
210 static uschar *
211 extend_inputline(FILE *f, uschar *start, const char *prompt)
212 {
213 uschar *here = start;
214
215 for (;;)
216 {
217 int rlen = buffer_size - (here - buffer);
218
219 if (rlen > 1000)
220 {
221 int dlen;
222
223 /* If libreadline support is required, use readline() to read a line if the
224 input is a terminal. Note that readline() removes the trailing newline, so
225 we must put it back again, to be compatible with fgets(). */
226
227 #ifdef SUPPORT_LIBREADLINE
228 if (isatty(fileno(f)))
229 {
230 size_t len;
231 char *s = readline(prompt);
232 if (s == NULL) return (here == start)? NULL : start;
233 len = strlen(s);
234 if (len > 0) add_history(s);
235 if (len > rlen - 1) len = rlen - 1;
236 memcpy(here, s, len);
237 here[len] = '\n';
238 here[len+1] = 0;
239 free(s);
240 }
241 else
242 #endif
243
244 /* Read the next line by normal means, prompting if the file is stdin. */
245
246 {
247 if (f == stdin) printf(prompt);
248 if (fgets((char *)here, rlen, f) == NULL)
249 return (here == start)? NULL : start;
250 }
251
252 dlen = (int)strlen((char *)here);
253 if (dlen > 0 && here[dlen - 1] == '\n') return start;
254 here += dlen;
255 }
256
257 else
258 {
259 int new_buffer_size = 2*buffer_size;
260 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
261 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
262 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
263
264 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
265 {
266 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
267 exit(1);
268 }
269
270 memcpy(new_buffer, buffer, buffer_size);
271 memcpy(new_pbuffer, pbuffer, buffer_size);
272
273 buffer_size = new_buffer_size;
274
275 start = new_buffer + (start - buffer);
276 here = new_buffer + (here - buffer);
277
278 free(buffer);
279 free(dbuffer);
280 free(pbuffer);
281
282 buffer = new_buffer;
283 dbuffer = new_dbuffer;
284 pbuffer = new_pbuffer;
285 }
286 }
287
288 return NULL; /* Control never gets here */
289 }
290
291
292
293
294
295
296
297 /*************************************************
298 * Read number from string *
299 *************************************************/
300
301 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
302 around with conditional compilation, just do the job by hand. It is only used
303 for unpicking arguments, so just keep it simple.
304
305 Arguments:
306 str string to be converted
307 endptr where to put the end pointer
308
309 Returns: the unsigned long
310 */
311
312 static int
313 get_value(unsigned char *str, unsigned char **endptr)
314 {
315 int result = 0;
316 while(*str != 0 && isspace(*str)) str++;
317 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
318 *endptr = str;
319 return(result);
320 }
321
322
323
324
325 /*************************************************
326 * Convert UTF-8 string to value *
327 *************************************************/
328
329 /* This function takes one or more bytes that represents a UTF-8 character,
330 and returns the value of the character.
331
332 Argument:
333 utf8bytes a pointer to the byte vector
334 vptr a pointer to an int to receive the value
335
336 Returns: > 0 => the number of bytes consumed
337 -6 to 0 => malformed UTF-8 character at offset = (-return)
338 */
339
340 #if !defined NOUTF8
341
342 static int
343 utf82ord(unsigned char *utf8bytes, int *vptr)
344 {
345 int c = *utf8bytes++;
346 int d = c;
347 int i, j, s;
348
349 for (i = -1; i < 6; i++) /* i is number of additional bytes */
350 {
351 if ((d & 0x80) == 0) break;
352 d <<= 1;
353 }
354
355 if (i == -1) { *vptr = c; return 1; } /* ascii character */
356 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
357
358 /* i now has a value in the range 1-5 */
359
360 s = 6*i;
361 d = (c & utf8_table3[i]) << s;
362
363 for (j = 0; j < i; j++)
364 {
365 c = *utf8bytes++;
366 if ((c & 0xc0) != 0x80) return -(j+1);
367 s -= 6;
368 d |= (c & 0x3f) << s;
369 }
370
371 /* Check that encoding was the correct unique one */
372
373 for (j = 0; j < utf8_table1_size; j++)
374 if (d <= utf8_table1[j]) break;
375 if (j != i) return -(i+1);
376
377 /* Valid value */
378
379 *vptr = d;
380 return i+1;
381 }
382
383 #endif
384
385
386
387 /*************************************************
388 * Convert character value to UTF-8 *
389 *************************************************/
390
391 /* This function takes an integer value in the range 0 - 0x7fffffff
392 and encodes it as a UTF-8 character in 0 to 6 bytes.
393
394 Arguments:
395 cvalue the character value
396 utf8bytes pointer to buffer for result - at least 6 bytes long
397
398 Returns: number of characters placed in the buffer
399 */
400
401 #if !defined NOUTF8
402
403 static int
404 ord2utf8(int cvalue, uschar *utf8bytes)
405 {
406 register int i, j;
407 for (i = 0; i < utf8_table1_size; i++)
408 if (cvalue <= utf8_table1[i]) break;
409 utf8bytes += i;
410 for (j = i; j > 0; j--)
411 {
412 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
413 cvalue >>= 6;
414 }
415 *utf8bytes = utf8_table2[i] | cvalue;
416 return i + 1;
417 }
418
419 #endif
420
421
422
423 /*************************************************
424 * Print character string *
425 *************************************************/
426
427 /* Character string printing function. Must handle UTF-8 strings in utf8
428 mode. Yields number of characters printed. If handed a NULL file, just counts
429 chars without printing. */
430
431 static int pchars(unsigned char *p, int length, FILE *f)
432 {
433 int c = 0;
434 int yield = 0;
435
436 while (length-- > 0)
437 {
438 #if !defined NOUTF8
439 if (use_utf8)
440 {
441 int rc = utf82ord(p, &c);
442
443 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
444 {
445 length -= rc - 1;
446 p += rc;
447 if (PRINTHEX(c))
448 {
449 if (f != NULL) fprintf(f, "%c", c);
450 yield++;
451 }
452 else
453 {
454 int n = 4;
455 if (f != NULL) fprintf(f, "\\x{%02x}", c);
456 yield += (n <= 0x000000ff)? 2 :
457 (n <= 0x00000fff)? 3 :
458 (n <= 0x0000ffff)? 4 :
459 (n <= 0x000fffff)? 5 : 6;
460 }
461 continue;
462 }
463 }
464 #endif
465
466 /* Not UTF-8, or malformed UTF-8 */
467
468 c = *p++;
469 if (PRINTHEX(c))
470 {
471 if (f != NULL) fprintf(f, "%c", c);
472 yield++;
473 }
474 else
475 {
476 if (f != NULL) fprintf(f, "\\x%02x", c);
477 yield += 4;
478 }
479 }
480
481 return yield;
482 }
483
484
485
486 /*************************************************
487 * Callout function *
488 *************************************************/
489
490 /* Called from PCRE as a result of the (?C) item. We print out where we are in
491 the match. Yield zero unless more callouts than the fail count, or the callout
492 data is not zero. */
493
494 static int callout(pcre_callout_block *cb)
495 {
496 FILE *f = (first_callout | callout_extra)? outfile : NULL;
497 int i, pre_start, post_start, subject_length;
498
499 if (callout_extra)
500 {
501 fprintf(f, "Callout %d: last capture = %d\n",
502 cb->callout_number, cb->capture_last);
503
504 for (i = 0; i < cb->capture_top * 2; i += 2)
505 {
506 if (cb->offset_vector[i] < 0)
507 fprintf(f, "%2d: <unset>\n", i/2);
508 else
509 {
510 fprintf(f, "%2d: ", i/2);
511 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
512 cb->offset_vector[i+1] - cb->offset_vector[i], f);
513 fprintf(f, "\n");
514 }
515 }
516 }
517
518 /* Re-print the subject in canonical form, the first time or if giving full
519 datails. On subsequent calls in the same match, we use pchars just to find the
520 printed lengths of the substrings. */
521
522 if (f != NULL) fprintf(f, "--->");
523
524 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
525 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
526 cb->current_position - cb->start_match, f);
527
528 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
529
530 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
531 cb->subject_length - cb->current_position, f);
532
533 if (f != NULL) fprintf(f, "\n");
534
535 /* Always print appropriate indicators, with callout number if not already
536 shown. For automatic callouts, show the pattern offset. */
537
538 if (cb->callout_number == 255)
539 {
540 fprintf(outfile, "%+3d ", cb->pattern_position);
541 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
542 }
543 else
544 {
545 if (callout_extra) fprintf(outfile, " ");
546 else fprintf(outfile, "%3d ", cb->callout_number);
547 }
548
549 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
550 fprintf(outfile, "^");
551
552 if (post_start > 0)
553 {
554 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
555 fprintf(outfile, "^");
556 }
557
558 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
559 fprintf(outfile, " ");
560
561 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
562 pbuffer + cb->pattern_position);
563
564 fprintf(outfile, "\n");
565 first_callout = 0;
566
567 if (cb->callout_data != NULL)
568 {
569 int callout_data = *((int *)(cb->callout_data));
570 if (callout_data != 0)
571 {
572 fprintf(outfile, "Callout data = %d\n", callout_data);
573 return callout_data;
574 }
575 }
576
577 return (cb->callout_number != callout_fail_id)? 0 :
578 (++callout_count >= callout_fail_count)? 1 : 0;
579 }
580
581
582 /*************************************************
583 * Local malloc functions *
584 *************************************************/
585
586 /* Alternative malloc function, to test functionality and show the size of the
587 compiled re. */
588
589 static void *new_malloc(size_t size)
590 {
591 void *block = malloc(size);
592 gotten_store = size;
593 if (show_malloc)
594 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
595 return block;
596 }
597
598 static void new_free(void *block)
599 {
600 if (show_malloc)
601 fprintf(outfile, "free %p\n", block);
602 free(block);
603 }
604
605
606 /* For recursion malloc/free, to test stacking calls */
607
608 static void *stack_malloc(size_t size)
609 {
610 void *block = malloc(size);
611 if (show_malloc)
612 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
613 return block;
614 }
615
616 static void stack_free(void *block)
617 {
618 if (show_malloc)
619 fprintf(outfile, "stack_free %p\n", block);
620 free(block);
621 }
622
623
624 /*************************************************
625 * Call pcre_fullinfo() *
626 *************************************************/
627
628 /* Get one piece of information from the pcre_fullinfo() function */
629
630 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
631 {
632 int rc;
633 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
634 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
635 }
636
637
638
639 /*************************************************
640 * Byte flipping function *
641 *************************************************/
642
643 static unsigned long int
644 byteflip(unsigned long int value, int n)
645 {
646 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
647 return ((value & 0x000000ff) << 24) |
648 ((value & 0x0000ff00) << 8) |
649 ((value & 0x00ff0000) >> 8) |
650 ((value & 0xff000000) >> 24);
651 }
652
653
654
655
656 /*************************************************
657 * Check match or recursion limit *
658 *************************************************/
659
660 static int
661 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
662 int start_offset, int options, int *use_offsets, int use_size_offsets,
663 int flag, unsigned long int *limit, int errnumber, const char *msg)
664 {
665 int count;
666 int min = 0;
667 int mid = 64;
668 int max = -1;
669
670 extra->flags |= flag;
671
672 for (;;)
673 {
674 *limit = mid;
675
676 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
677 use_offsets, use_size_offsets);
678
679 if (count == errnumber)
680 {
681 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
682 min = mid;
683 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
684 }
685
686 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
687 count == PCRE_ERROR_PARTIAL)
688 {
689 if (mid == min + 1)
690 {
691 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
692 break;
693 }
694 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
695 max = mid;
696 mid = (min + mid)/2;
697 }
698 else break; /* Some other error */
699 }
700
701 extra->flags &= ~flag;
702 return count;
703 }
704
705
706
707 /*************************************************
708 * Case-independent strncmp() function *
709 *************************************************/
710
711 /*
712 Arguments:
713 s first string
714 t second string
715 n number of characters to compare
716
717 Returns: < 0, = 0, or > 0, according to the comparison
718 */
719
720 static int
721 strncmpic(uschar *s, uschar *t, int n)
722 {
723 while (n--)
724 {
725 int c = tolower(*s++) - tolower(*t++);
726 if (c) return c;
727 }
728 return 0;
729 }
730
731
732
733 /*************************************************
734 * Check newline indicator *
735 *************************************************/
736
737 /* This is used both at compile and run-time to check for <xxx> escapes, where
738 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
739 no match.
740
741 Arguments:
742 p points after the leading '<'
743 f file for error message
744
745 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
746 */
747
748 static int
749 check_newline(uschar *p, FILE *f)
750 {
751 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
752 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
753 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
754 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
755 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
756 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
757 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
758 fprintf(f, "Unknown newline type at: <%s\n", p);
759 return 0;
760 }
761
762
763
764 /*************************************************
765 * Usage function *
766 *************************************************/
767
768 static void
769 usage(void)
770 {
771 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
772 printf("Input and output default to stdin and stdout.\n");
773 #ifdef SUPPORT_LIBREADLINE
774 printf("If input is a terminal, readline() is used to read from it.\n");
775 #else
776 printf("This version of pcretest is not linked with readline().\n");
777 #endif
778 printf("\nOptions:\n");
779 printf(" -b show compiled code (bytecode)\n");
780 printf(" -C show PCRE compile-time options and exit\n");
781 printf(" -d debug: show compiled code and information (-b and -i)\n");
782 #if !defined NODFA
783 printf(" -dfa force DFA matching for all subjects\n");
784 #endif
785 printf(" -help show usage information\n");
786 printf(" -i show information about compiled patterns\n"
787 " -m output memory used information\n"
788 " -o <n> set size of offsets vector to <n>\n");
789 #if !defined NOPOSIX
790 printf(" -p use POSIX interface\n");
791 #endif
792 printf(" -q quiet: do not output PCRE version number at start\n");
793 printf(" -S <n> set stack size to <n> megabytes\n");
794 printf(" -s output store (memory) used information\n"
795 " -t time compilation and execution\n");
796 printf(" -t <n> time compilation and execution, repeating <n> times\n");
797 printf(" -tm time execution (matching) only\n");
798 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
799 }
800
801
802
803 /*************************************************
804 * Main Program *
805 *************************************************/
806
807 /* Read lines from named file or stdin and write to named file or stdout; lines
808 consist of a regular expression, in delimiters and optionally followed by
809 options, followed by a set of test data, terminated by an empty line. */
810
811 int main(int argc, char **argv)
812 {
813 FILE *infile = stdin;
814 int options = 0;
815 int study_options = 0;
816 int op = 1;
817 int timeit = 0;
818 int timeitm = 0;
819 int showinfo = 0;
820 int showstore = 0;
821 int quiet = 0;
822 int size_offsets = 45;
823 int size_offsets_max;
824 int *offsets = NULL;
825 #if !defined NOPOSIX
826 int posix = 0;
827 #endif
828 int debug = 0;
829 int done = 0;
830 int all_use_dfa = 0;
831 int yield = 0;
832 int stack_size;
833
834 /* These vectors store, end-to-end, a list of captured substring names. Assume
835 that 1024 is plenty long enough for the few names we'll be testing. */
836
837 uschar copynames[1024];
838 uschar getnames[1024];
839
840 uschar *copynamesptr;
841 uschar *getnamesptr;
842
843 /* Get buffers from malloc() so that Electric Fence will check their misuse
844 when I am debugging. They grow automatically when very long lines are read. */
845
846 buffer = (unsigned char *)malloc(buffer_size);
847 dbuffer = (unsigned char *)malloc(buffer_size);
848 pbuffer = (unsigned char *)malloc(buffer_size);
849
850 /* The outfile variable is static so that new_malloc can use it. */
851
852 outfile = stdout;
853
854 /* The following _setmode() stuff is some Windows magic that tells its runtime
855 library to translate CRLF into a single LF character. At least, that's what
856 I've been told: never having used Windows I take this all on trust. Originally
857 it set 0x8000, but then I was advised that _O_BINARY was better. */
858
859 #if defined(_WIN32) || defined(WIN32)
860 _setmode( _fileno( stdout ), _O_BINARY );
861 #endif
862
863 /* Scan options */
864
865 while (argc > 1 && argv[op][0] == '-')
866 {
867 unsigned char *endptr;
868
869 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
870 showstore = 1;
871 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
872 else if (strcmp(argv[op], "-b") == 0) debug = 1;
873 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
874 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
875 #if !defined NODFA
876 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
877 #endif
878 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
879 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
880 *endptr == 0))
881 {
882 op++;
883 argc--;
884 }
885 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
886 {
887 int both = argv[op][2] == 0;
888 int temp;
889 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
890 *endptr == 0))
891 {
892 timeitm = temp;
893 op++;
894 argc--;
895 }
896 else timeitm = LOOPREPEAT;
897 if (both) timeit = timeitm;
898 }
899 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
900 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
901 *endptr == 0))
902 {
903 #if defined(_WIN32) || defined(WIN32)
904 printf("PCRE: -S not supported on this OS\n");
905 exit(1);
906 #else
907 int rc;
908 struct rlimit rlim;
909 getrlimit(RLIMIT_STACK, &rlim);
910 rlim.rlim_cur = stack_size * 1024 * 1024;
911 rc = setrlimit(RLIMIT_STACK, &rlim);
912 if (rc != 0)
913 {
914 printf("PCRE: setrlimit() failed with error %d\n", rc);
915 exit(1);
916 }
917 op++;
918 argc--;
919 #endif
920 }
921 #if !defined NOPOSIX
922 else if (strcmp(argv[op], "-p") == 0) posix = 1;
923 #endif
924 else if (strcmp(argv[op], "-C") == 0)
925 {
926 int rc;
927 printf("PCRE version %s\n", pcre_version());
928 printf("Compiled with\n");
929 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
930 printf(" %sUTF-8 support\n", rc? "" : "No ");
931 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
932 printf(" %sUnicode properties support\n", rc? "" : "No ");
933 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
934 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
935 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
936 (rc == -2)? "ANYCRLF" :
937 (rc == -1)? "ANY" : "???");
938 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
939 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
940 "all Unicode newlines");
941 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
942 printf(" Internal link size = %d\n", rc);
943 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
944 printf(" POSIX malloc threshold = %d\n", rc);
945 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
946 printf(" Default match limit = %d\n", rc);
947 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
948 printf(" Default recursion depth limit = %d\n", rc);
949 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
950 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
951 goto EXIT;
952 }
953 else if (strcmp(argv[op], "-help") == 0 ||
954 strcmp(argv[op], "--help") == 0)
955 {
956 usage();
957 goto EXIT;
958 }
959 else
960 {
961 printf("** Unknown or malformed option %s\n", argv[op]);
962 usage();
963 yield = 1;
964 goto EXIT;
965 }
966 op++;
967 argc--;
968 }
969
970 /* Get the store for the offsets vector, and remember what it was */
971
972 size_offsets_max = size_offsets;
973 offsets = (int *)malloc(size_offsets_max * sizeof(int));
974 if (offsets == NULL)
975 {
976 printf("** Failed to get %d bytes of memory for offsets vector\n",
977 (int)(size_offsets_max * sizeof(int)));
978 yield = 1;
979 goto EXIT;
980 }
981
982 /* Sort out the input and output files */
983
984 if (argc > 1)
985 {
986 infile = fopen(argv[op], INPUT_MODE);
987 if (infile == NULL)
988 {
989 printf("** Failed to open %s\n", argv[op]);
990 yield = 1;
991 goto EXIT;
992 }
993 }
994
995 if (argc > 2)
996 {
997 outfile = fopen(argv[op+1], OUTPUT_MODE);
998 if (outfile == NULL)
999 {
1000 printf("** Failed to open %s\n", argv[op+1]);
1001 yield = 1;
1002 goto EXIT;
1003 }
1004 }
1005
1006 /* Set alternative malloc function */
1007
1008 pcre_malloc = new_malloc;
1009 pcre_free = new_free;
1010 pcre_stack_malloc = stack_malloc;
1011 pcre_stack_free = stack_free;
1012
1013 /* Heading line unless quiet, then prompt for first regex if stdin */
1014
1015 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1016
1017 /* Main loop */
1018
1019 while (!done)
1020 {
1021 pcre *re = NULL;
1022 pcre_extra *extra = NULL;
1023
1024 #if !defined NOPOSIX /* There are still compilers that require no indent */
1025 regex_t preg;
1026 int do_posix = 0;
1027 #endif
1028
1029 const char *error;
1030 unsigned char *p, *pp, *ppp;
1031 unsigned char *to_file = NULL;
1032 const unsigned char *tables = NULL;
1033 unsigned long int true_size, true_study_size = 0;
1034 size_t size, regex_gotten_store;
1035 int do_study = 0;
1036 int do_debug = debug;
1037 int do_G = 0;
1038 int do_g = 0;
1039 int do_showinfo = showinfo;
1040 int do_showrest = 0;
1041 int do_flip = 0;
1042 int erroroffset, len, delimiter, poffset;
1043
1044 use_utf8 = 0;
1045 debug_lengths = 1;
1046
1047 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1048 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1049 fflush(outfile);
1050
1051 p = buffer;
1052 while (isspace(*p)) p++;
1053 if (*p == 0) continue;
1054
1055 /* See if the pattern is to be loaded pre-compiled from a file. */
1056
1057 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1058 {
1059 unsigned long int magic, get_options;
1060 uschar sbuf[8];
1061 FILE *f;
1062
1063 p++;
1064 pp = p + (int)strlen((char *)p);
1065 while (isspace(pp[-1])) pp--;
1066 *pp = 0;
1067
1068 f = fopen((char *)p, "rb");
1069 if (f == NULL)
1070 {
1071 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1072 continue;
1073 }
1074
1075 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1076
1077 true_size =
1078 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1079 true_study_size =
1080 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1081
1082 re = (real_pcre *)new_malloc(true_size);
1083 regex_gotten_store = gotten_store;
1084
1085 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1086
1087 magic = ((real_pcre *)re)->magic_number;
1088 if (magic != MAGIC_NUMBER)
1089 {
1090 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1091 {
1092 do_flip = 1;
1093 }
1094 else
1095 {
1096 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1097 fclose(f);
1098 continue;
1099 }
1100 }
1101
1102 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1103 do_flip? " (byte-inverted)" : "", p);
1104
1105 /* Need to know if UTF-8 for printing data strings */
1106
1107 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1108 use_utf8 = (get_options & PCRE_UTF8) != 0;
1109
1110 /* Now see if there is any following study data */
1111
1112 if (true_study_size != 0)
1113 {
1114 pcre_study_data *psd;
1115
1116 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1117 extra->flags = PCRE_EXTRA_STUDY_DATA;
1118
1119 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1120 extra->study_data = psd;
1121
1122 if (fread(psd, 1, true_study_size, f) != true_study_size)
1123 {
1124 FAIL_READ:
1125 fprintf(outfile, "Failed to read data from %s\n", p);
1126 if (extra != NULL) new_free(extra);
1127 if (re != NULL) new_free(re);
1128 fclose(f);
1129 continue;
1130 }
1131 fprintf(outfile, "Study data loaded from %s\n", p);
1132 do_study = 1; /* To get the data output if requested */
1133 }
1134 else fprintf(outfile, "No study data\n");
1135
1136 fclose(f);
1137 goto SHOW_INFO;
1138 }
1139
1140 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1141 the pattern; if is isn't complete, read more. */
1142
1143 delimiter = *p++;
1144
1145 if (isalnum(delimiter) || delimiter == '\\')
1146 {
1147 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1148 goto SKIP_DATA;
1149 }
1150
1151 pp = p;
1152 poffset = p - buffer;
1153
1154 for(;;)
1155 {
1156 while (*pp != 0)
1157 {
1158 if (*pp == '\\' && pp[1] != 0) pp++;
1159 else if (*pp == delimiter) break;
1160 pp++;
1161 }
1162 if (*pp != 0) break;
1163 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1164 {
1165 fprintf(outfile, "** Unexpected EOF\n");
1166 done = 1;
1167 goto CONTINUE;
1168 }
1169 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1170 }
1171
1172 /* The buffer may have moved while being extended; reset the start of data
1173 pointer to the correct relative point in the buffer. */
1174
1175 p = buffer + poffset;
1176
1177 /* If the first character after the delimiter is backslash, make
1178 the pattern end with backslash. This is purely to provide a way
1179 of testing for the error message when a pattern ends with backslash. */
1180
1181 if (pp[1] == '\\') *pp++ = '\\';
1182
1183 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1184 for callouts. */
1185
1186 *pp++ = 0;
1187 strcpy((char *)pbuffer, (char *)p);
1188
1189 /* Look for options after final delimiter */
1190
1191 options = 0;
1192 study_options = 0;
1193 log_store = showstore; /* default from command line */
1194
1195 while (*pp != 0)
1196 {
1197 switch (*pp++)
1198 {
1199 case 'f': options |= PCRE_FIRSTLINE; break;
1200 case 'g': do_g = 1; break;
1201 case 'i': options |= PCRE_CASELESS; break;
1202 case 'm': options |= PCRE_MULTILINE; break;
1203 case 's': options |= PCRE_DOTALL; break;
1204 case 'x': options |= PCRE_EXTENDED; break;
1205
1206 case '+': do_showrest = 1; break;
1207 case 'A': options |= PCRE_ANCHORED; break;
1208 case 'B': do_debug = 1; break;
1209 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1210 case 'D': do_debug = do_showinfo = 1; break;
1211 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1212 case 'F': do_flip = 1; break;
1213 case 'G': do_G = 1; break;
1214 case 'I': do_showinfo = 1; break;
1215 case 'J': options |= PCRE_DUPNAMES; break;
1216 case 'M': log_store = 1; break;
1217 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1218
1219 #if !defined NOPOSIX
1220 case 'P': do_posix = 1; break;
1221 #endif
1222
1223 case 'S': do_study = 1; break;
1224 case 'U': options |= PCRE_UNGREEDY; break;
1225 case 'X': options |= PCRE_EXTRA; break;
1226 case 'Z': debug_lengths = 0; break;
1227 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1228 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1229
1230 case 'L':
1231 ppp = pp;
1232 /* The '\r' test here is so that it works on Windows. */
1233 /* The '0' test is just in case this is an unterminated line. */
1234 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1235 *ppp = 0;
1236 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1237 {
1238 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1239 goto SKIP_DATA;
1240 }
1241 locale_set = 1;
1242 tables = pcre_maketables();
1243 pp = ppp;
1244 break;
1245
1246 case '>':
1247 to_file = pp;
1248 while (*pp != 0) pp++;
1249 while (isspace(pp[-1])) pp--;
1250 *pp = 0;
1251 break;
1252
1253 case '<':
1254 {
1255 if (strncmp((char *)pp, "JS>", 3) == 0)
1256 {
1257 options |= PCRE_JAVASCRIPT_COMPAT;
1258 pp += 3;
1259 }
1260 else
1261 {
1262 int x = check_newline(pp, outfile);
1263 if (x == 0) goto SKIP_DATA;
1264 options |= x;
1265 while (*pp++ != '>');
1266 }
1267 }
1268 break;
1269
1270 case '\r': /* So that it works in Windows */
1271 case '\n':
1272 case ' ':
1273 break;
1274
1275 default:
1276 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1277 goto SKIP_DATA;
1278 }
1279 }
1280
1281 /* Handle compiling via the POSIX interface, which doesn't support the
1282 timing, showing, or debugging options, nor the ability to pass over
1283 local character tables. */
1284
1285 #if !defined NOPOSIX
1286 if (posix || do_posix)
1287 {
1288 int rc;
1289 int cflags = 0;
1290
1291 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1292 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1293 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1294 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1295 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1296
1297 rc = regcomp(&preg, (char *)p, cflags);
1298
1299 /* Compilation failed; go back for another re, skipping to blank line
1300 if non-interactive. */
1301
1302 if (rc != 0)
1303 {
1304 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1305 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1306 goto SKIP_DATA;
1307 }
1308 }
1309
1310 /* Handle compiling via the native interface */
1311
1312 else
1313 #endif /* !defined NOPOSIX */
1314
1315 {
1316 if (timeit > 0)
1317 {
1318 register int i;
1319 clock_t time_taken;
1320 clock_t start_time = clock();
1321 for (i = 0; i < timeit; i++)
1322 {
1323 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1324 if (re != NULL) free(re);
1325 }
1326 time_taken = clock() - start_time;
1327 fprintf(outfile, "Compile time %.4f milliseconds\n",
1328 (((double)time_taken * 1000.0) / (double)timeit) /
1329 (double)CLOCKS_PER_SEC);
1330 }
1331
1332 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1333
1334 /* Compilation failed; go back for another re, skipping to blank line
1335 if non-interactive. */
1336
1337 if (re == NULL)
1338 {
1339 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1340 SKIP_DATA:
1341 if (infile != stdin)
1342 {
1343 for (;;)
1344 {
1345 if (extend_inputline(infile, buffer, NULL) == NULL)
1346 {
1347 done = 1;
1348 goto CONTINUE;
1349 }
1350 len = (int)strlen((char *)buffer);
1351 while (len > 0 && isspace(buffer[len-1])) len--;
1352 if (len == 0) break;
1353 }
1354 fprintf(outfile, "\n");
1355 }
1356 goto CONTINUE;
1357 }
1358
1359 /* Compilation succeeded; print data if required. There are now two
1360 info-returning functions. The old one has a limited interface and
1361 returns only limited data. Check that it agrees with the newer one. */
1362
1363 if (log_store)
1364 fprintf(outfile, "Memory allocation (code space): %d\n",
1365 (int)(gotten_store -
1366 sizeof(real_pcre) -
1367 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1368
1369 /* Extract the size for possible writing before possibly flipping it,
1370 and remember the store that was got. */
1371
1372 true_size = ((real_pcre *)re)->size;
1373 regex_gotten_store = gotten_store;
1374
1375 /* If /S was present, study the regexp to generate additional info to
1376 help with the matching. */
1377
1378 if (do_study)
1379 {
1380 if (timeit > 0)
1381 {
1382 register int i;
1383 clock_t time_taken;
1384 clock_t start_time = clock();
1385 for (i = 0; i < timeit; i++)
1386 extra = pcre_study(re, study_options, &error);
1387 time_taken = clock() - start_time;
1388 if (extra != NULL) free(extra);
1389 fprintf(outfile, " Study time %.4f milliseconds\n",
1390 (((double)time_taken * 1000.0) / (double)timeit) /
1391 (double)CLOCKS_PER_SEC);
1392 }
1393 extra = pcre_study(re, study_options, &error);
1394 if (error != NULL)
1395 fprintf(outfile, "Failed to study: %s\n", error);
1396 else if (extra != NULL)
1397 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1398 }
1399
1400 /* If the 'F' option was present, we flip the bytes of all the integer
1401 fields in the regex data block and the study block. This is to make it
1402 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1403 compiled on a different architecture. */
1404
1405 if (do_flip)
1406 {
1407 real_pcre *rre = (real_pcre *)re;
1408 rre->magic_number =
1409 byteflip(rre->magic_number, sizeof(rre->magic_number));
1410 rre->size = byteflip(rre->size, sizeof(rre->size));
1411 rre->options = byteflip(rre->options, sizeof(rre->options));
1412 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1413 rre->top_bracket =
1414 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1415 rre->top_backref =
1416 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1417 rre->first_byte =
1418 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1419 rre->req_byte =
1420 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1421 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1422 sizeof(rre->name_table_offset));
1423 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1424 sizeof(rre->name_entry_size));
1425 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1426 sizeof(rre->name_count));
1427
1428 if (extra != NULL)
1429 {
1430 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1431 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1432 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1433 }
1434 }
1435
1436 /* Extract information from the compiled data if required */
1437
1438 SHOW_INFO:
1439
1440 if (do_debug)
1441 {
1442 fprintf(outfile, "------------------------------------------------------------------\n");
1443 pcre_printint(re, outfile, debug_lengths);
1444 }
1445
1446 if (do_showinfo)
1447 {
1448 unsigned long int get_options, all_options;
1449 #if !defined NOINFOCHECK
1450 int old_first_char, old_options, old_count;
1451 #endif
1452 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1453 hascrorlf;
1454 int nameentrysize, namecount;
1455 const uschar *nametable;
1456
1457 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1458 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1459 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1460 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1461 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1462 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1463 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1464 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1465 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1466 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1467 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1468 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1469
1470 #if !defined NOINFOCHECK
1471 old_count = pcre_info(re, &old_options, &old_first_char);
1472 if (count < 0) fprintf(outfile,
1473 "Error %d from pcre_info()\n", count);
1474 else
1475 {
1476 if (old_count != count) fprintf(outfile,
1477 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1478 old_count);
1479
1480 if (old_first_char != first_char) fprintf(outfile,
1481 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1482 first_char, old_first_char);
1483
1484 if (old_options != (int)get_options) fprintf(outfile,
1485 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1486 get_options, old_options);
1487 }
1488 #endif
1489
1490 if (size != regex_gotten_store) fprintf(outfile,
1491 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1492 (int)size, (int)regex_gotten_store);
1493
1494 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1495 if (backrefmax > 0)
1496 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1497
1498 if (namecount > 0)
1499 {
1500 fprintf(outfile, "Named capturing subpatterns:\n");
1501 while (namecount-- > 0)
1502 {
1503 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1504 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1505 GET2(nametable, 0));
1506 nametable += nameentrysize;
1507 }
1508 }
1509
1510 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1511 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1512
1513 all_options = ((real_pcre *)re)->options;
1514 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1515
1516 if (get_options == 0) fprintf(outfile, "No options\n");
1517 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1518 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1519 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1520 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1521 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1522 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1523 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1524 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1525 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1526 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1527 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1528 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1529 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1530 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1531 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1532 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1533
1534 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1535
1536 switch (get_options & PCRE_NEWLINE_BITS)
1537 {
1538 case PCRE_NEWLINE_CR:
1539 fprintf(outfile, "Forced newline sequence: CR\n");
1540 break;
1541
1542 case PCRE_NEWLINE_LF:
1543 fprintf(outfile, "Forced newline sequence: LF\n");
1544 break;
1545
1546 case PCRE_NEWLINE_CRLF:
1547 fprintf(outfile, "Forced newline sequence: CRLF\n");
1548 break;
1549
1550 case PCRE_NEWLINE_ANYCRLF:
1551 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1552 break;
1553
1554 case PCRE_NEWLINE_ANY:
1555 fprintf(outfile, "Forced newline sequence: ANY\n");
1556 break;
1557
1558 default:
1559 break;
1560 }
1561
1562 if (first_char == -1)
1563 {
1564 fprintf(outfile, "First char at start or follows newline\n");
1565 }
1566 else if (first_char < 0)
1567 {
1568 fprintf(outfile, "No first char\n");
1569 }
1570 else
1571 {
1572 int ch = first_char & 255;
1573 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1574 "" : " (caseless)";
1575 if (PRINTHEX(ch))
1576 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1577 else
1578 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1579 }
1580
1581 if (need_char < 0)
1582 {
1583 fprintf(outfile, "No need char\n");
1584 }
1585 else
1586 {
1587 int ch = need_char & 255;
1588 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1589 "" : " (caseless)";
1590 if (PRINTHEX(ch))
1591 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1592 else
1593 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1594 }
1595
1596 /* Don't output study size; at present it is in any case a fixed
1597 value, but it varies, depending on the computer architecture, and
1598 so messes up the test suite. (And with the /F option, it might be
1599 flipped.) */
1600
1601 if (do_study)
1602 {
1603 if (extra == NULL)
1604 fprintf(outfile, "Study returned NULL\n");
1605 else
1606 {
1607 uschar *start_bits = NULL;
1608 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1609
1610 if (start_bits == NULL)
1611 fprintf(outfile, "No starting byte set\n");
1612 else
1613 {
1614 int i;
1615 int c = 24;
1616 fprintf(outfile, "Starting byte set: ");
1617 for (i = 0; i < 256; i++)
1618 {
1619 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1620 {
1621 if (c > 75)
1622 {
1623 fprintf(outfile, "\n ");
1624 c = 2;
1625 }
1626 if (PRINTHEX(i) && i != ' ')
1627 {
1628 fprintf(outfile, "%c ", i);
1629 c += 2;
1630 }
1631 else
1632 {
1633 fprintf(outfile, "\\x%02x ", i);
1634 c += 5;
1635 }
1636 }
1637 }
1638 fprintf(outfile, "\n");
1639 }
1640 }
1641 }
1642 }
1643
1644 /* If the '>' option was present, we write out the regex to a file, and
1645 that is all. The first 8 bytes of the file are the regex length and then
1646 the study length, in big-endian order. */
1647
1648 if (to_file != NULL)
1649 {
1650 FILE *f = fopen((char *)to_file, "wb");
1651 if (f == NULL)
1652 {
1653 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1654 }
1655 else
1656 {
1657 uschar sbuf[8];
1658 sbuf[0] = (uschar)((true_size >> 24) & 255);
1659 sbuf[1] = (uschar)((true_size >> 16) & 255);
1660 sbuf[2] = (uschar)((true_size >> 8) & 255);
1661 sbuf[3] = (uschar)((true_size) & 255);
1662
1663 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1664 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1665 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1666 sbuf[7] = (uschar)((true_study_size) & 255);
1667
1668 if (fwrite(sbuf, 1, 8, f) < 8 ||
1669 fwrite(re, 1, true_size, f) < true_size)
1670 {
1671 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1672 }
1673 else
1674 {
1675 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1676 if (extra != NULL)
1677 {
1678 if (fwrite(extra->study_data, 1, true_study_size, f) <
1679 true_study_size)
1680 {
1681 fprintf(outfile, "Write error on %s: %s\n", to_file,
1682 strerror(errno));
1683 }
1684 else fprintf(outfile, "Study data written to %s\n", to_file);
1685
1686 }
1687 }
1688 fclose(f);
1689 }
1690
1691 new_free(re);
1692 if (extra != NULL) new_free(extra);
1693 if (tables != NULL) new_free((void *)tables);
1694 continue; /* With next regex */
1695 }
1696 } /* End of non-POSIX compile */
1697
1698 /* Read data lines and test them */
1699
1700 for (;;)
1701 {
1702 uschar *q;
1703 uschar *bptr;
1704 int *use_offsets = offsets;
1705 int use_size_offsets = size_offsets;
1706 int callout_data = 0;
1707 int callout_data_set = 0;
1708 int count, c;
1709 int copystrings = 0;
1710 int find_match_limit = 0;
1711 int getstrings = 0;
1712 int getlist = 0;
1713 int gmatched = 0;
1714 int start_offset = 0;
1715 int g_notempty = 0;
1716 int use_dfa = 0;
1717
1718 options = 0;
1719
1720 *copynames = 0;
1721 *getnames = 0;
1722
1723 copynamesptr = copynames;
1724 getnamesptr = getnames;
1725
1726 pcre_callout = callout;
1727 first_callout = 1;
1728 callout_extra = 0;
1729 callout_count = 0;
1730 callout_fail_count = 999999;
1731 callout_fail_id = -1;
1732 show_malloc = 0;
1733
1734 if (extra != NULL) extra->flags &=
1735 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1736
1737 len = 0;
1738 for (;;)
1739 {
1740 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1741 {
1742 if (len > 0) break;
1743 done = 1;
1744 goto CONTINUE;
1745 }
1746 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1747 len = (int)strlen((char *)buffer);
1748 if (buffer[len-1] == '\n') break;
1749 }
1750
1751 while (len > 0 && isspace(buffer[len-1])) len--;
1752 buffer[len] = 0;
1753 if (len == 0) break;
1754
1755 p = buffer;
1756 while (isspace(*p)) p++;
1757
1758 bptr = q = dbuffer;
1759 while ((c = *p++) != 0)
1760 {
1761 int i = 0;
1762 int n = 0;
1763
1764 if (c == '\\') switch ((c = *p++))
1765 {
1766 case 'a': c = 7; break;
1767 case 'b': c = '\b'; break;
1768 case 'e': c = 27; break;
1769 case 'f': c = '\f'; break;
1770 case 'n': c = '\n'; break;
1771 case 'r': c = '\r'; break;
1772 case 't': c = '\t'; break;
1773 case 'v': c = '\v'; break;
1774
1775 case '0': case '1': case '2': case '3':
1776 case '4': case '5': case '6': case '7':
1777 c -= '0';
1778 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1779 c = c * 8 + *p++ - '0';
1780
1781 #if !defined NOUTF8
1782 if (use_utf8 && c > 255)
1783 {
1784 unsigned char buff8[8];
1785 int ii, utn;
1786 utn = ord2utf8(c, buff8);
1787 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1788 c = buff8[ii]; /* Last byte */
1789 }
1790 #endif
1791 break;
1792
1793 case 'x':
1794
1795 /* Handle \x{..} specially - new Perl thing for utf8 */
1796
1797 #if !defined NOUTF8
1798 if (*p == '{')
1799 {
1800 unsigned char *pt = p;
1801 c = 0;
1802 while (isxdigit(*(++pt)))
1803 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1804 if (*pt == '}')
1805 {
1806 unsigned char buff8[8];
1807 int ii, utn;
1808 utn = ord2utf8(c, buff8);
1809 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1810 c = buff8[ii]; /* Last byte */
1811 p = pt + 1;
1812 break;
1813 }
1814 /* Not correct form; fall through */
1815 }
1816 #endif
1817
1818 /* Ordinary \x */
1819
1820 c = 0;
1821 while (i++ < 2 && isxdigit(*p))
1822 {
1823 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1824 p++;
1825 }
1826 break;
1827
1828 case 0: /* \ followed by EOF allows for an empty line */
1829 p--;
1830 continue;
1831
1832 case '>':
1833 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1834 continue;
1835
1836 case 'A': /* Option setting */
1837 options |= PCRE_ANCHORED;
1838 continue;
1839
1840 case 'B':
1841 options |= PCRE_NOTBOL;
1842 continue;
1843
1844 case 'C':
1845 if (isdigit(*p)) /* Set copy string */
1846 {
1847 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1848 copystrings |= 1 << n;
1849 }
1850 else if (isalnum(*p))
1851 {
1852 uschar *npp = copynamesptr;
1853 while (isalnum(*p)) *npp++ = *p++;
1854 *npp++ = 0;
1855 *npp = 0;
1856 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1857 if (n < 0)
1858 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1859 copynamesptr = npp;
1860 }
1861 else if (*p == '+')
1862 {
1863 callout_extra = 1;
1864 p++;
1865 }
1866 else if (*p == '-')
1867 {
1868 pcre_callout = NULL;
1869 p++;
1870 }
1871 else if (*p == '!')
1872 {
1873 callout_fail_id = 0;
1874 p++;
1875 while(isdigit(*p))
1876 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1877 callout_fail_count = 0;
1878 if (*p == '!')
1879 {
1880 p++;
1881 while(isdigit(*p))
1882 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1883 }
1884 }
1885 else if (*p == '*')
1886 {
1887 int sign = 1;
1888 callout_data = 0;
1889 if (*(++p) == '-') { sign = -1; p++; }
1890 while(isdigit(*p))
1891 callout_data = callout_data * 10 + *p++ - '0';
1892 callout_data *= sign;
1893 callout_data_set = 1;
1894 }
1895 continue;
1896
1897 #if !defined NODFA
1898 case 'D':
1899 #if !defined NOPOSIX
1900 if (posix || do_posix)
1901 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1902 else
1903 #endif
1904 use_dfa = 1;
1905 continue;
1906
1907 case 'F':
1908 options |= PCRE_DFA_SHORTEST;
1909 continue;
1910 #endif
1911
1912 case 'G':
1913 if (isdigit(*p))
1914 {
1915 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1916 getstrings |= 1 << n;
1917 }
1918 else if (isalnum(*p))
1919 {
1920 uschar *npp = getnamesptr;
1921 while (isalnum(*p)) *npp++ = *p++;
1922 *npp++ = 0;
1923 *npp = 0;
1924 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1925 if (n < 0)
1926 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1927 getnamesptr = npp;
1928 }
1929 continue;
1930
1931 case 'L':
1932 getlist = 1;
1933 continue;
1934
1935 case 'M':
1936 find_match_limit = 1;
1937 continue;
1938
1939 case 'N':
1940 options |= PCRE_NOTEMPTY;
1941 continue;
1942
1943 case 'O':
1944 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1945 if (n > size_offsets_max)
1946 {
1947 size_offsets_max = n;
1948 free(offsets);
1949 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1950 if (offsets == NULL)
1951 {
1952 printf("** Failed to get %d bytes of memory for offsets vector\n",
1953 (int)(size_offsets_max * sizeof(int)));
1954 yield = 1;
1955 goto EXIT;
1956 }
1957 }
1958 use_size_offsets = n;
1959 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1960 continue;
1961
1962 case 'P':
1963 options |= PCRE_PARTIAL;
1964 continue;
1965
1966 case 'Q':
1967 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1968 if (extra == NULL)
1969 {
1970 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1971 extra->flags = 0;
1972 }
1973 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1974 extra->match_limit_recursion = n;
1975 continue;
1976
1977 case 'q':
1978 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1979 if (extra == NULL)
1980 {
1981 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1982 extra->flags = 0;
1983 }
1984 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1985 extra->match_limit = n;
1986 continue;
1987
1988 #if !defined NODFA
1989 case 'R':
1990 options |= PCRE_DFA_RESTART;
1991 continue;
1992 #endif
1993
1994 case 'S':
1995 show_malloc = 1;
1996 continue;
1997
1998 case 'Z':
1999 options |= PCRE_NOTEOL;
2000 continue;
2001
2002 case '?':
2003 options |= PCRE_NO_UTF8_CHECK;
2004 continue;
2005
2006 case '<':
2007 {
2008 int x = check_newline(p, outfile);
2009 if (x == 0) goto NEXT_DATA;
2010 options |= x;
2011 while (*p++ != '>');
2012 }
2013 continue;
2014 }
2015 *q++ = c;
2016 }
2017 *q = 0;
2018 len = q - dbuffer;
2019
2020 if ((all_use_dfa || use_dfa) && find_match_limit)
2021 {
2022 printf("**Match limit not relevant for DFA matching: ignored\n");
2023 find_match_limit = 0;
2024 }
2025
2026 /* Handle matching via the POSIX interface, which does not
2027 support timing or playing with the match limit or callout data. */
2028
2029 #if !defined NOPOSIX
2030 if (posix || do_posix)
2031 {
2032 int rc;
2033 int eflags = 0;
2034 regmatch_t *pmatch = NULL;
2035 if (use_size_offsets > 0)
2036 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2037 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2038 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2039
2040 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2041
2042 if (rc != 0)
2043 {
2044 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2045 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2046 }
2047 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2048 != 0)
2049 {
2050 fprintf(outfile, "Matched with REG_NOSUB\n");
2051 }
2052 else
2053 {
2054 size_t i;
2055 for (i = 0; i < (size_t)use_size_offsets; i++)
2056 {
2057 if (pmatch[i].rm_so >= 0)
2058 {
2059 fprintf(outfile, "%2d: ", (int)i);
2060 (void)pchars(dbuffer + pmatch[i].rm_so,
2061 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2062 fprintf(outfile, "\n");
2063 if (i == 0 && do_showrest)
2064 {
2065 fprintf(outfile, " 0+ ");
2066 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2067 outfile);
2068 fprintf(outfile, "\n");
2069 }
2070 }
2071 }
2072 }
2073 free(pmatch);
2074 }
2075
2076 /* Handle matching via the native interface - repeats for /g and /G */
2077
2078 else
2079 #endif /* !defined NOPOSIX */
2080
2081 for (;; gmatched++) /* Loop for /g or /G */
2082 {
2083 if (timeitm > 0)
2084 {
2085 register int i;
2086 clock_t time_taken;
2087 clock_t start_time = clock();
2088
2089 #if !defined NODFA
2090 if (all_use_dfa || use_dfa)
2091 {
2092 int workspace[1000];
2093 for (i = 0; i < timeitm; i++)
2094 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2095 options | g_notempty, use_offsets, use_size_offsets, workspace,
2096 sizeof(workspace)/sizeof(int));
2097 }
2098 else
2099 #endif
2100
2101 for (i = 0; i < timeitm; i++)
2102 count = pcre_exec(re, extra, (char *)bptr, len,
2103 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2104
2105 time_taken = clock() - start_time;
2106 fprintf(outfile, "Execute time %.4f milliseconds\n",
2107 (((double)time_taken * 1000.0) / (double)timeitm) /
2108 (double)CLOCKS_PER_SEC);
2109 }
2110
2111 /* If find_match_limit is set, we want to do repeated matches with
2112 varying limits in order to find the minimum value for the match limit and
2113 for the recursion limit. */
2114
2115 if (find_match_limit)
2116 {
2117 if (extra == NULL)
2118 {
2119 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2120 extra->flags = 0;
2121 }
2122
2123 (void)check_match_limit(re, extra, bptr, len, start_offset,
2124 options|g_notempty, use_offsets, use_size_offsets,
2125 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2126 PCRE_ERROR_MATCHLIMIT, "match()");
2127
2128 count = check_match_limit(re, extra, bptr, len, start_offset,
2129 options|g_notempty, use_offsets, use_size_offsets,
2130 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2131 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2132 }
2133
2134 /* If callout_data is set, use the interface with additional data */
2135
2136 else if (callout_data_set)
2137 {
2138 if (extra == NULL)
2139 {
2140 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2141 extra->flags = 0;
2142 }
2143 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2144 extra->callout_data = &callout_data;
2145 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2146 options | g_notempty, use_offsets, use_size_offsets);
2147 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2148 }
2149
2150 /* The normal case is just to do the match once, with the default
2151 value of match_limit. */
2152
2153 #if !defined NODFA
2154 else if (all_use_dfa || use_dfa)
2155 {
2156 int workspace[1000];
2157 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2158 options | g_notempty, use_offsets, use_size_offsets, workspace,
2159 sizeof(workspace)/sizeof(int));
2160 if (count == 0)
2161 {
2162 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2163 count = use_size_offsets/2;
2164 }
2165 }
2166 #endif
2167
2168 else
2169 {
2170 count = pcre_exec(re, extra, (char *)bptr, len,
2171 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2172 if (count == 0)
2173 {
2174 fprintf(outfile, "Matched, but too many substrings\n");
2175 count = use_size_offsets/3;
2176 }
2177 }
2178
2179 /* Matched */
2180
2181 if (count >= 0)
2182 {
2183 int i, maxcount;
2184
2185 #if !defined NODFA
2186 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2187 #endif
2188 maxcount = use_size_offsets/3;
2189
2190 /* This is a check against a lunatic return value. */
2191
2192 if (count > maxcount)
2193 {
2194 fprintf(outfile,
2195 "** PCRE error: returned count %d is too big for offset size %d\n",
2196 count, use_size_offsets);
2197 count = use_size_offsets/3;
2198 if (do_g || do_G)
2199 {
2200 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2201 do_g = do_G = FALSE; /* Break g/G loop */
2202 }
2203 }
2204
2205 for (i = 0; i < count * 2; i += 2)
2206 {
2207 if (use_offsets[i] < 0)
2208 fprintf(outfile, "%2d: <unset>\n", i/2);
2209 else
2210 {
2211 fprintf(outfile, "%2d: ", i/2);
2212 (void)pchars(bptr + use_offsets[i],
2213 use_offsets[i+1] - use_offsets[i], outfile);
2214 fprintf(outfile, "\n");
2215 if (i == 0)
2216 {
2217 if (do_showrest)
2218 {
2219 fprintf(outfile, " 0+ ");
2220 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2221 outfile);
2222 fprintf(outfile, "\n");
2223 }
2224 }
2225 }
2226 }
2227
2228 for (i = 0; i < 32; i++)
2229 {
2230 if ((copystrings & (1 << i)) != 0)
2231 {
2232 char copybuffer[256];
2233 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2234 i, copybuffer, sizeof(copybuffer));
2235 if (rc < 0)
2236 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2237 else
2238 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2239 }
2240 }
2241
2242 for (copynamesptr = copynames;
2243 *copynamesptr != 0;
2244 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2245 {
2246 char copybuffer[256];
2247 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2248 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2249 if (rc < 0)
2250 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2251 else
2252 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2253 }
2254
2255 for (i = 0; i < 32; i++)
2256 {
2257 if ((getstrings & (1 << i)) != 0)
2258 {
2259 const char *substring;
2260 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2261 i, &substring);
2262 if (rc < 0)
2263 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2264 else
2265 {
2266 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2267 pcre_free_substring(substring);
2268 }
2269 }
2270 }
2271
2272 for (getnamesptr = getnames;
2273 *getnamesptr != 0;
2274 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2275 {
2276 const char *substring;
2277 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2278 count, (char *)getnamesptr, &substring);
2279 if (rc < 0)
2280 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2281 else
2282 {
2283 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2284 pcre_free_substring(substring);
2285 }
2286 }
2287
2288 if (getlist)
2289 {
2290 const char **stringlist;
2291 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2292 &stringlist);
2293 if (rc < 0)
2294 fprintf(outfile, "get substring list failed %d\n", rc);
2295 else
2296 {
2297 for (i = 0; i < count; i++)
2298 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2299 if (stringlist[i] != NULL)
2300 fprintf(outfile, "string list not terminated by NULL\n");
2301 /* free((void *)stringlist); */
2302 pcre_free_substring_list(stringlist);
2303 }
2304 }
2305 }
2306
2307 /* There was a partial match */
2308
2309 else if (count == PCRE_ERROR_PARTIAL)
2310 {
2311 fprintf(outfile, "Partial match");
2312 #if !defined NODFA
2313 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2314 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2315 bptr + use_offsets[0]);
2316 #endif
2317 fprintf(outfile, "\n");
2318 break; /* Out of the /g loop */
2319 }
2320
2321 /* Failed to match. If this is a /g or /G loop and we previously set
2322 g_notempty after a null match, this is not necessarily the end. We want
2323 to advance the start offset, and continue. We won't be at the end of the
2324 string - that was checked before setting g_notempty.
2325
2326 Complication arises in the case when the newline option is "any" or
2327 "anycrlf". If the previous match was at the end of a line terminated by
2328 CRLF, an advance of one character just passes the \r, whereas we should
2329 prefer the longer newline sequence, as does the code in pcre_exec().
2330 Fudge the offset value to achieve this.
2331
2332 Otherwise, in the case of UTF-8 matching, the advance must be one
2333 character, not one byte. */
2334
2335 else
2336 {
2337 if (g_notempty != 0)
2338 {
2339 int onechar = 1;
2340 unsigned int obits = ((real_pcre *)re)->options;
2341 use_offsets[0] = start_offset;
2342 if ((obits & PCRE_NEWLINE_BITS) == 0)
2343 {
2344 int d;
2345 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2346 obits = (d == '\r')? PCRE_NEWLINE_CR :
2347 (d == '\n')? PCRE_NEWLINE_LF :
2348 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2349 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2350 (d == -1)? PCRE_NEWLINE_ANY : 0;
2351 }
2352 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2353 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2354 &&
2355 start_offset < len - 1 &&
2356 bptr[start_offset] == '\r' &&
2357 bptr[start_offset+1] == '\n')
2358 onechar++;
2359 else if (use_utf8)
2360 {
2361 while (start_offset + onechar < len)
2362 {
2363 int tb = bptr[start_offset+onechar];
2364 if (tb <= 127) break;
2365 tb &= 0xc0;
2366 if (tb != 0 && tb != 0xc0) onechar++;
2367 }
2368 }
2369 use_offsets[1] = start_offset + onechar;
2370 }
2371 else
2372 {
2373 if (count == PCRE_ERROR_NOMATCH)
2374 {
2375 if (gmatched == 0) fprintf(outfile, "No match\n");
2376 }
2377 else fprintf(outfile, "Error %d\n", count);
2378 break; /* Out of the /g loop */
2379 }
2380 }
2381
2382 /* If not /g or /G we are done */
2383
2384 if (!do_g && !do_G) break;
2385
2386 /* If we have matched an empty string, first check to see if we are at
2387 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2388 what Perl's /g options does. This turns out to be rather cunning. First
2389 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2390 same point. If this fails (picked up above) we advance to the next
2391 character. */
2392
2393 g_notempty = 0;
2394
2395 if (use_offsets[0] == use_offsets[1])
2396 {
2397 if (use_offsets[0] == len) break;
2398 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2399 }
2400
2401 /* For /g, update the start offset, leaving the rest alone */
2402
2403 if (do_g) start_offset = use_offsets[1];
2404
2405 /* For /G, update the pointer and length */
2406
2407 else
2408 {
2409 bptr += use_offsets[1];
2410 len -= use_offsets[1];
2411 }
2412 } /* End of loop for /g and /G */
2413
2414 NEXT_DATA: continue;
2415 } /* End of loop for data lines */
2416
2417 CONTINUE:
2418
2419 #if !defined NOPOSIX
2420 if (posix || do_posix) regfree(&preg);
2421 #endif
2422
2423 if (re != NULL) new_free(re);
2424 if (extra != NULL) new_free(extra);
2425 if (tables != NULL)
2426 {
2427 new_free((void *)tables);
2428 setlocale(LC_CTYPE, "C");
2429 locale_set = 0;
2430 }
2431 }
2432
2433 if (infile == stdin) fprintf(outfile, "\n");
2434
2435 EXIT:
2436
2437 if (infile != NULL && infile != stdin) fclose(infile);
2438 if (outfile != NULL && outfile != stdout) fclose(outfile);
2439
2440 free(buffer);
2441 free(dbuffer);
2442 free(pbuffer);
2443 free(offsets);
2444
2445 return yield;
2446 }
2447
2448 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12