/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 289 - (show annotations) (download)
Sun Dec 23 12:17:20 2007 UTC (6 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 71251 byte(s)
Tidies for 7.5-RC1 release.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #include <unistd.h>
53 #include <readline/readline.h>
54 #include <readline/history.h>
55 #endif
56
57
58 /* A number of things vary for Windows builds. Originally, pcretest opened its
59 input and output without "b"; then I was told that "b" was needed in some
60 environments, so it was added for release 5.0 to both the input and output. (It
61 makes no difference on Unix-like systems.) Later I was told that it is wrong
62 for the input on Windows. I've now abstracted the modes into two macros that
63 are set here, to make it easier to fiddle with them, and removed "b" from the
64 input mode under Windows. */
65
66 #if defined(_WIN32) || defined(WIN32)
67 #include <io.h> /* For _setmode() */
68 #include <fcntl.h> /* For _O_BINARY */
69 #define INPUT_MODE "r"
70 #define OUTPUT_MODE "wb"
71
72 #else
73 #include <sys/time.h> /* These two includes are needed */
74 #include <sys/resource.h> /* for setrlimit(). */
75 #define INPUT_MODE "rb"
76 #define OUTPUT_MODE "wb"
77 #endif
78
79
80 /* We have to include pcre_internal.h because we need the internal info for
81 displaying the results of pcre_study() and we also need to know about the
82 internal macros, structures, and other internal data values; pcretest has
83 "inside information" compared to a program that strictly follows the PCRE API.
84
85 Although pcre_internal.h does itself include pcre.h, we explicitly include it
86 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87 appropriately for an application, not for building PCRE. */
88
89 #include "pcre.h"
90 #include "pcre_internal.h"
91
92 /* We need access to the data tables that PCRE uses. So as not to have to keep
93 two copies, we include the source file here, changing the names of the external
94 symbols to prevent clashes. */
95
96 #define _pcre_utf8_table1 utf8_table1
97 #define _pcre_utf8_table1_size utf8_table1_size
98 #define _pcre_utf8_table2 utf8_table2
99 #define _pcre_utf8_table3 utf8_table3
100 #define _pcre_utf8_table4 utf8_table4
101 #define _pcre_utt utt
102 #define _pcre_utt_size utt_size
103 #define _pcre_utt_names utt_names
104 #define _pcre_OP_lengths OP_lengths
105
106 #include "pcre_tables.c"
107
108 /* We also need the pcre_printint() function for printing out compiled
109 patterns. This function is in a separate file so that it can be included in
110 pcre_compile.c when that module is compiled with debugging enabled.
111
112 The definition of the macro PRINTABLE, which determines whether to print an
113 output character as-is or as a hex value when showing compiled patterns, is
114 contained in this file. We uses it here also, in cases when the locale has not
115 been explicitly changed, so as to get consistent output from systems that
116 differ in their output from isprint() even in the "C" locale. */
117
118 #include "pcre_printint.src"
119
120 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
121
122
123 /* It is possible to compile this test program without including support for
124 testing the POSIX interface, though this is not available via the standard
125 Makefile. */
126
127 #if !defined NOPOSIX
128 #include "pcreposix.h"
129 #endif
130
131 /* It is also possible, for the benefit of the version currently imported into
132 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133 interface to the DFA matcher (NODFA), and without the doublecheck of the old
134 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135 UTF8 support if PCRE is built without it. */
136
137 #ifndef SUPPORT_UTF8
138 #ifndef NOUTF8
139 #define NOUTF8
140 #endif
141 #endif
142
143
144 /* Other parameters */
145
146 #ifndef CLOCKS_PER_SEC
147 #ifdef CLK_TCK
148 #define CLOCKS_PER_SEC CLK_TCK
149 #else
150 #define CLOCKS_PER_SEC 100
151 #endif
152 #endif
153
154 /* This is the default loop count for timing. */
155
156 #define LOOPREPEAT 500000
157
158 /* Static variables */
159
160 static FILE *outfile;
161 static int log_store = 0;
162 static int callout_count;
163 static int callout_extra;
164 static int callout_fail_count;
165 static int callout_fail_id;
166 static int debug_lengths;
167 static int first_callout;
168 static int locale_set = 0;
169 static int show_malloc;
170 static int use_utf8;
171 static size_t gotten_store;
172
173 /* The buffers grow automatically if very long input lines are encountered. */
174
175 static int buffer_size = 50000;
176 static uschar *buffer = NULL;
177 static uschar *dbuffer = NULL;
178 static uschar *pbuffer = NULL;
179
180
181
182 /*************************************************
183 * Read or extend an input line *
184 *************************************************/
185
186 /* Input lines are read into buffer, but both patterns and data lines can be
187 continued over multiple input lines. In addition, if the buffer fills up, we
188 want to automatically expand it so as to be able to handle extremely large
189 lines that are needed for certain stress tests. When the input buffer is
190 expanded, the other two buffers must also be expanded likewise, and the
191 contents of pbuffer, which are a copy of the input for callouts, must be
192 preserved (for when expansion happens for a data line). This is not the most
193 optimal way of handling this, but hey, this is just a test program!
194
195 Arguments:
196 f the file to read
197 start where in buffer to start (this *must* be within buffer)
198 prompt for stdin or readline()
199
200 Returns: pointer to the start of new data
201 could be a copy of start, or could be moved
202 NULL if no data read and EOF reached
203 */
204
205 static uschar *
206 extend_inputline(FILE *f, uschar *start, const char *prompt)
207 {
208 uschar *here = start;
209
210 for (;;)
211 {
212 int rlen = buffer_size - (here - buffer);
213
214 if (rlen > 1000)
215 {
216 int dlen;
217
218 /* If libreadline support is required, use readline() to read a line if the
219 input is a terminal. Note that readline() removes the trailing newline, so
220 we must put it back again, to be compatible with fgets(). */
221
222 #ifdef SUPPORT_LIBREADLINE
223 if (isatty(fileno(f)))
224 {
225 size_t len;
226 char *s = readline(prompt);
227 if (s == NULL) return (here == start)? NULL : start;
228 len = strlen(s);
229 if (len > 0) add_history(s);
230 if (len > rlen - 1) len = rlen - 1;
231 memcpy(here, s, len);
232 here[len] = '\n';
233 here[len+1] = 0;
234 free(s);
235 }
236 else
237 #endif
238
239 /* Read the next line by normal means, prompting if the file is stdin. */
240
241 {
242 if (f == stdin) printf(prompt);
243 if (fgets((char *)here, rlen, f) == NULL)
244 return (here == start)? NULL : start;
245 }
246
247 dlen = (int)strlen((char *)here);
248 if (dlen > 0 && here[dlen - 1] == '\n') return start;
249 here += dlen;
250 }
251
252 else
253 {
254 int new_buffer_size = 2*buffer_size;
255 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
258
259 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
260 {
261 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
262 exit(1);
263 }
264
265 memcpy(new_buffer, buffer, buffer_size);
266 memcpy(new_pbuffer, pbuffer, buffer_size);
267
268 buffer_size = new_buffer_size;
269
270 start = new_buffer + (start - buffer);
271 here = new_buffer + (here - buffer);
272
273 free(buffer);
274 free(dbuffer);
275 free(pbuffer);
276
277 buffer = new_buffer;
278 dbuffer = new_dbuffer;
279 pbuffer = new_pbuffer;
280 }
281 }
282
283 return NULL; /* Control never gets here */
284 }
285
286
287
288
289
290
291
292 /*************************************************
293 * Read number from string *
294 *************************************************/
295
296 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297 around with conditional compilation, just do the job by hand. It is only used
298 for unpicking arguments, so just keep it simple.
299
300 Arguments:
301 str string to be converted
302 endptr where to put the end pointer
303
304 Returns: the unsigned long
305 */
306
307 static int
308 get_value(unsigned char *str, unsigned char **endptr)
309 {
310 int result = 0;
311 while(*str != 0 && isspace(*str)) str++;
312 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
313 *endptr = str;
314 return(result);
315 }
316
317
318
319
320 /*************************************************
321 * Convert UTF-8 string to value *
322 *************************************************/
323
324 /* This function takes one or more bytes that represents a UTF-8 character,
325 and returns the value of the character.
326
327 Argument:
328 utf8bytes a pointer to the byte vector
329 vptr a pointer to an int to receive the value
330
331 Returns: > 0 => the number of bytes consumed
332 -6 to 0 => malformed UTF-8 character at offset = (-return)
333 */
334
335 #if !defined NOUTF8
336
337 static int
338 utf82ord(unsigned char *utf8bytes, int *vptr)
339 {
340 int c = *utf8bytes++;
341 int d = c;
342 int i, j, s;
343
344 for (i = -1; i < 6; i++) /* i is number of additional bytes */
345 {
346 if ((d & 0x80) == 0) break;
347 d <<= 1;
348 }
349
350 if (i == -1) { *vptr = c; return 1; } /* ascii character */
351 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
352
353 /* i now has a value in the range 1-5 */
354
355 s = 6*i;
356 d = (c & utf8_table3[i]) << s;
357
358 for (j = 0; j < i; j++)
359 {
360 c = *utf8bytes++;
361 if ((c & 0xc0) != 0x80) return -(j+1);
362 s -= 6;
363 d |= (c & 0x3f) << s;
364 }
365
366 /* Check that encoding was the correct unique one */
367
368 for (j = 0; j < utf8_table1_size; j++)
369 if (d <= utf8_table1[j]) break;
370 if (j != i) return -(i+1);
371
372 /* Valid value */
373
374 *vptr = d;
375 return i+1;
376 }
377
378 #endif
379
380
381
382 /*************************************************
383 * Convert character value to UTF-8 *
384 *************************************************/
385
386 /* This function takes an integer value in the range 0 - 0x7fffffff
387 and encodes it as a UTF-8 character in 0 to 6 bytes.
388
389 Arguments:
390 cvalue the character value
391 utf8bytes pointer to buffer for result - at least 6 bytes long
392
393 Returns: number of characters placed in the buffer
394 */
395
396 #if !defined NOUTF8
397
398 static int
399 ord2utf8(int cvalue, uschar *utf8bytes)
400 {
401 register int i, j;
402 for (i = 0; i < utf8_table1_size; i++)
403 if (cvalue <= utf8_table1[i]) break;
404 utf8bytes += i;
405 for (j = i; j > 0; j--)
406 {
407 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
408 cvalue >>= 6;
409 }
410 *utf8bytes = utf8_table2[i] | cvalue;
411 return i + 1;
412 }
413
414 #endif
415
416
417
418 /*************************************************
419 * Print character string *
420 *************************************************/
421
422 /* Character string printing function. Must handle UTF-8 strings in utf8
423 mode. Yields number of characters printed. If handed a NULL file, just counts
424 chars without printing. */
425
426 static int pchars(unsigned char *p, int length, FILE *f)
427 {
428 int c = 0;
429 int yield = 0;
430
431 while (length-- > 0)
432 {
433 #if !defined NOUTF8
434 if (use_utf8)
435 {
436 int rc = utf82ord(p, &c);
437
438 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
439 {
440 length -= rc - 1;
441 p += rc;
442 if (PRINTHEX(c))
443 {
444 if (f != NULL) fprintf(f, "%c", c);
445 yield++;
446 }
447 else
448 {
449 int n = 4;
450 if (f != NULL) fprintf(f, "\\x{%02x}", c);
451 yield += (n <= 0x000000ff)? 2 :
452 (n <= 0x00000fff)? 3 :
453 (n <= 0x0000ffff)? 4 :
454 (n <= 0x000fffff)? 5 : 6;
455 }
456 continue;
457 }
458 }
459 #endif
460
461 /* Not UTF-8, or malformed UTF-8 */
462
463 c = *p++;
464 if (PRINTHEX(c))
465 {
466 if (f != NULL) fprintf(f, "%c", c);
467 yield++;
468 }
469 else
470 {
471 if (f != NULL) fprintf(f, "\\x%02x", c);
472 yield += 4;
473 }
474 }
475
476 return yield;
477 }
478
479
480
481 /*************************************************
482 * Callout function *
483 *************************************************/
484
485 /* Called from PCRE as a result of the (?C) item. We print out where we are in
486 the match. Yield zero unless more callouts than the fail count, or the callout
487 data is not zero. */
488
489 static int callout(pcre_callout_block *cb)
490 {
491 FILE *f = (first_callout | callout_extra)? outfile : NULL;
492 int i, pre_start, post_start, subject_length;
493
494 if (callout_extra)
495 {
496 fprintf(f, "Callout %d: last capture = %d\n",
497 cb->callout_number, cb->capture_last);
498
499 for (i = 0; i < cb->capture_top * 2; i += 2)
500 {
501 if (cb->offset_vector[i] < 0)
502 fprintf(f, "%2d: <unset>\n", i/2);
503 else
504 {
505 fprintf(f, "%2d: ", i/2);
506 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
507 cb->offset_vector[i+1] - cb->offset_vector[i], f);
508 fprintf(f, "\n");
509 }
510 }
511 }
512
513 /* Re-print the subject in canonical form, the first time or if giving full
514 datails. On subsequent calls in the same match, we use pchars just to find the
515 printed lengths of the substrings. */
516
517 if (f != NULL) fprintf(f, "--->");
518
519 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
520 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
521 cb->current_position - cb->start_match, f);
522
523 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
524
525 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
526 cb->subject_length - cb->current_position, f);
527
528 if (f != NULL) fprintf(f, "\n");
529
530 /* Always print appropriate indicators, with callout number if not already
531 shown. For automatic callouts, show the pattern offset. */
532
533 if (cb->callout_number == 255)
534 {
535 fprintf(outfile, "%+3d ", cb->pattern_position);
536 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
537 }
538 else
539 {
540 if (callout_extra) fprintf(outfile, " ");
541 else fprintf(outfile, "%3d ", cb->callout_number);
542 }
543
544 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
545 fprintf(outfile, "^");
546
547 if (post_start > 0)
548 {
549 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
550 fprintf(outfile, "^");
551 }
552
553 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
554 fprintf(outfile, " ");
555
556 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
557 pbuffer + cb->pattern_position);
558
559 fprintf(outfile, "\n");
560 first_callout = 0;
561
562 if (cb->callout_data != NULL)
563 {
564 int callout_data = *((int *)(cb->callout_data));
565 if (callout_data != 0)
566 {
567 fprintf(outfile, "Callout data = %d\n", callout_data);
568 return callout_data;
569 }
570 }
571
572 return (cb->callout_number != callout_fail_id)? 0 :
573 (++callout_count >= callout_fail_count)? 1 : 0;
574 }
575
576
577 /*************************************************
578 * Local malloc functions *
579 *************************************************/
580
581 /* Alternative malloc function, to test functionality and show the size of the
582 compiled re. */
583
584 static void *new_malloc(size_t size)
585 {
586 void *block = malloc(size);
587 gotten_store = size;
588 if (show_malloc)
589 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
590 return block;
591 }
592
593 static void new_free(void *block)
594 {
595 if (show_malloc)
596 fprintf(outfile, "free %p\n", block);
597 free(block);
598 }
599
600
601 /* For recursion malloc/free, to test stacking calls */
602
603 static void *stack_malloc(size_t size)
604 {
605 void *block = malloc(size);
606 if (show_malloc)
607 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
608 return block;
609 }
610
611 static void stack_free(void *block)
612 {
613 if (show_malloc)
614 fprintf(outfile, "stack_free %p\n", block);
615 free(block);
616 }
617
618
619 /*************************************************
620 * Call pcre_fullinfo() *
621 *************************************************/
622
623 /* Get one piece of information from the pcre_fullinfo() function */
624
625 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
626 {
627 int rc;
628 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
629 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
630 }
631
632
633
634 /*************************************************
635 * Byte flipping function *
636 *************************************************/
637
638 static unsigned long int
639 byteflip(unsigned long int value, int n)
640 {
641 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642 return ((value & 0x000000ff) << 24) |
643 ((value & 0x0000ff00) << 8) |
644 ((value & 0x00ff0000) >> 8) |
645 ((value & 0xff000000) >> 24);
646 }
647
648
649
650
651 /*************************************************
652 * Check match or recursion limit *
653 *************************************************/
654
655 static int
656 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657 int start_offset, int options, int *use_offsets, int use_size_offsets,
658 int flag, unsigned long int *limit, int errnumber, const char *msg)
659 {
660 int count;
661 int min = 0;
662 int mid = 64;
663 int max = -1;
664
665 extra->flags |= flag;
666
667 for (;;)
668 {
669 *limit = mid;
670
671 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672 use_offsets, use_size_offsets);
673
674 if (count == errnumber)
675 {
676 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
677 min = mid;
678 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
679 }
680
681 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682 count == PCRE_ERROR_PARTIAL)
683 {
684 if (mid == min + 1)
685 {
686 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
687 break;
688 }
689 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
690 max = mid;
691 mid = (min + mid)/2;
692 }
693 else break; /* Some other error */
694 }
695
696 extra->flags &= ~flag;
697 return count;
698 }
699
700
701
702 /*************************************************
703 * Case-independent strncmp() function *
704 *************************************************/
705
706 /*
707 Arguments:
708 s first string
709 t second string
710 n number of characters to compare
711
712 Returns: < 0, = 0, or > 0, according to the comparison
713 */
714
715 static int
716 strncmpic(uschar *s, uschar *t, int n)
717 {
718 while (n--)
719 {
720 int c = tolower(*s++) - tolower(*t++);
721 if (c) return c;
722 }
723 return 0;
724 }
725
726
727
728 /*************************************************
729 * Check newline indicator *
730 *************************************************/
731
732 /* This is used both at compile and run-time to check for <xxx> escapes, where
733 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734 no match.
735
736 Arguments:
737 p points after the leading '<'
738 f file for error message
739
740 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
741 */
742
743 static int
744 check_newline(uschar *p, FILE *f)
745 {
746 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753 fprintf(f, "Unknown newline type at: <%s\n", p);
754 return 0;
755 }
756
757
758
759 /*************************************************
760 * Usage function *
761 *************************************************/
762
763 static void
764 usage(void)
765 {
766 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
767 printf("Input and output default to stdin and stdout.\n");
768 #ifdef SUPPORT_LIBREADLINE
769 printf("If input is a terminal, readline() is used to read from it.\n");
770 #else
771 printf("This version of pcretest is not linked with readline().\n");
772 #endif
773 printf("\nOptions:\n");
774 printf(" -b show compiled code (bytecode)\n");
775 printf(" -C show PCRE compile-time options and exit\n");
776 printf(" -d debug: show compiled code and information (-b and -i)\n");
777 #if !defined NODFA
778 printf(" -dfa force DFA matching for all subjects\n");
779 #endif
780 printf(" -help show usage information\n");
781 printf(" -i show information about compiled patterns\n"
782 " -m output memory used information\n"
783 " -o <n> set size of offsets vector to <n>\n");
784 #if !defined NOPOSIX
785 printf(" -p use POSIX interface\n");
786 #endif
787 printf(" -q quiet: do not output PCRE version number at start\n");
788 printf(" -S <n> set stack size to <n> megabytes\n");
789 printf(" -s output store (memory) used information\n"
790 " -t time compilation and execution\n");
791 printf(" -t <n> time compilation and execution, repeating <n> times\n");
792 printf(" -tm time execution (matching) only\n");
793 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
794 }
795
796
797
798 /*************************************************
799 * Main Program *
800 *************************************************/
801
802 /* Read lines from named file or stdin and write to named file or stdout; lines
803 consist of a regular expression, in delimiters and optionally followed by
804 options, followed by a set of test data, terminated by an empty line. */
805
806 int main(int argc, char **argv)
807 {
808 FILE *infile = stdin;
809 int options = 0;
810 int study_options = 0;
811 int op = 1;
812 int timeit = 0;
813 int timeitm = 0;
814 int showinfo = 0;
815 int showstore = 0;
816 int quiet = 0;
817 int size_offsets = 45;
818 int size_offsets_max;
819 int *offsets = NULL;
820 #if !defined NOPOSIX
821 int posix = 0;
822 #endif
823 int debug = 0;
824 int done = 0;
825 int all_use_dfa = 0;
826 int yield = 0;
827 int stack_size;
828
829 /* These vectors store, end-to-end, a list of captured substring names. Assume
830 that 1024 is plenty long enough for the few names we'll be testing. */
831
832 uschar copynames[1024];
833 uschar getnames[1024];
834
835 uschar *copynamesptr;
836 uschar *getnamesptr;
837
838 /* Get buffers from malloc() so that Electric Fence will check their misuse
839 when I am debugging. They grow automatically when very long lines are read. */
840
841 buffer = (unsigned char *)malloc(buffer_size);
842 dbuffer = (unsigned char *)malloc(buffer_size);
843 pbuffer = (unsigned char *)malloc(buffer_size);
844
845 /* The outfile variable is static so that new_malloc can use it. */
846
847 outfile = stdout;
848
849 /* The following _setmode() stuff is some Windows magic that tells its runtime
850 library to translate CRLF into a single LF character. At least, that's what
851 I've been told: never having used Windows I take this all on trust. Originally
852 it set 0x8000, but then I was advised that _O_BINARY was better. */
853
854 #if defined(_WIN32) || defined(WIN32)
855 _setmode( _fileno( stdout ), _O_BINARY );
856 #endif
857
858 /* Scan options */
859
860 while (argc > 1 && argv[op][0] == '-')
861 {
862 unsigned char *endptr;
863
864 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
865 showstore = 1;
866 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867 else if (strcmp(argv[op], "-b") == 0) debug = 1;
868 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
870 #if !defined NODFA
871 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
872 #endif
873 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
874 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
875 *endptr == 0))
876 {
877 op++;
878 argc--;
879 }
880 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
881 {
882 int both = argv[op][2] == 0;
883 int temp;
884 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
885 *endptr == 0))
886 {
887 timeitm = temp;
888 op++;
889 argc--;
890 }
891 else timeitm = LOOPREPEAT;
892 if (both) timeit = timeitm;
893 }
894 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
896 *endptr == 0))
897 {
898 #if defined(_WIN32) || defined(WIN32)
899 printf("PCRE: -S not supported on this OS\n");
900 exit(1);
901 #else
902 int rc;
903 struct rlimit rlim;
904 getrlimit(RLIMIT_STACK, &rlim);
905 rlim.rlim_cur = stack_size * 1024 * 1024;
906 rc = setrlimit(RLIMIT_STACK, &rlim);
907 if (rc != 0)
908 {
909 printf("PCRE: setrlimit() failed with error %d\n", rc);
910 exit(1);
911 }
912 op++;
913 argc--;
914 #endif
915 }
916 #if !defined NOPOSIX
917 else if (strcmp(argv[op], "-p") == 0) posix = 1;
918 #endif
919 else if (strcmp(argv[op], "-C") == 0)
920 {
921 int rc;
922 printf("PCRE version %s\n", pcre_version());
923 printf("Compiled with\n");
924 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
925 printf(" %sUTF-8 support\n", rc? "" : "No ");
926 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927 printf(" %sUnicode properties support\n", rc? "" : "No ");
928 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
930 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931 (rc == -2)? "ANYCRLF" :
932 (rc == -1)? "ANY" : "???");
933 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935 "all Unicode newlines");
936 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937 printf(" Internal link size = %d\n", rc);
938 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939 printf(" POSIX malloc threshold = %d\n", rc);
940 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941 printf(" Default match limit = %d\n", rc);
942 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943 printf(" Default recursion depth limit = %d\n", rc);
944 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
946 goto EXIT;
947 }
948 else if (strcmp(argv[op], "-help") == 0 ||
949 strcmp(argv[op], "--help") == 0)
950 {
951 usage();
952 goto EXIT;
953 }
954 else
955 {
956 printf("** Unknown or malformed option %s\n", argv[op]);
957 usage();
958 yield = 1;
959 goto EXIT;
960 }
961 op++;
962 argc--;
963 }
964
965 /* Get the store for the offsets vector, and remember what it was */
966
967 size_offsets_max = size_offsets;
968 offsets = (int *)malloc(size_offsets_max * sizeof(int));
969 if (offsets == NULL)
970 {
971 printf("** Failed to get %d bytes of memory for offsets vector\n",
972 (int)(size_offsets_max * sizeof(int)));
973 yield = 1;
974 goto EXIT;
975 }
976
977 /* Sort out the input and output files */
978
979 if (argc > 1)
980 {
981 infile = fopen(argv[op], INPUT_MODE);
982 if (infile == NULL)
983 {
984 printf("** Failed to open %s\n", argv[op]);
985 yield = 1;
986 goto EXIT;
987 }
988 }
989
990 if (argc > 2)
991 {
992 outfile = fopen(argv[op+1], OUTPUT_MODE);
993 if (outfile == NULL)
994 {
995 printf("** Failed to open %s\n", argv[op+1]);
996 yield = 1;
997 goto EXIT;
998 }
999 }
1000
1001 /* Set alternative malloc function */
1002
1003 pcre_malloc = new_malloc;
1004 pcre_free = new_free;
1005 pcre_stack_malloc = stack_malloc;
1006 pcre_stack_free = stack_free;
1007
1008 /* Heading line unless quiet, then prompt for first regex if stdin */
1009
1010 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1011
1012 /* Main loop */
1013
1014 while (!done)
1015 {
1016 pcre *re = NULL;
1017 pcre_extra *extra = NULL;
1018
1019 #if !defined NOPOSIX /* There are still compilers that require no indent */
1020 regex_t preg;
1021 int do_posix = 0;
1022 #endif
1023
1024 const char *error;
1025 unsigned char *p, *pp, *ppp;
1026 unsigned char *to_file = NULL;
1027 const unsigned char *tables = NULL;
1028 unsigned long int true_size, true_study_size = 0;
1029 size_t size, regex_gotten_store;
1030 int do_study = 0;
1031 int do_debug = debug;
1032 int do_G = 0;
1033 int do_g = 0;
1034 int do_showinfo = showinfo;
1035 int do_showrest = 0;
1036 int do_flip = 0;
1037 int erroroffset, len, delimiter, poffset;
1038
1039 use_utf8 = 0;
1040 debug_lengths = 1;
1041
1042 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1043 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044 fflush(outfile);
1045
1046 p = buffer;
1047 while (isspace(*p)) p++;
1048 if (*p == 0) continue;
1049
1050 /* See if the pattern is to be loaded pre-compiled from a file. */
1051
1052 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1053 {
1054 unsigned long int magic, get_options;
1055 uschar sbuf[8];
1056 FILE *f;
1057
1058 p++;
1059 pp = p + (int)strlen((char *)p);
1060 while (isspace(pp[-1])) pp--;
1061 *pp = 0;
1062
1063 f = fopen((char *)p, "rb");
1064 if (f == NULL)
1065 {
1066 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1067 continue;
1068 }
1069
1070 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1071
1072 true_size =
1073 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1074 true_study_size =
1075 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1076
1077 re = (real_pcre *)new_malloc(true_size);
1078 regex_gotten_store = gotten_store;
1079
1080 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1081
1082 magic = ((real_pcre *)re)->magic_number;
1083 if (magic != MAGIC_NUMBER)
1084 {
1085 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1086 {
1087 do_flip = 1;
1088 }
1089 else
1090 {
1091 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1092 fclose(f);
1093 continue;
1094 }
1095 }
1096
1097 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1098 do_flip? " (byte-inverted)" : "", p);
1099
1100 /* Need to know if UTF-8 for printing data strings */
1101
1102 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103 use_utf8 = (get_options & PCRE_UTF8) != 0;
1104
1105 /* Now see if there is any following study data */
1106
1107 if (true_study_size != 0)
1108 {
1109 pcre_study_data *psd;
1110
1111 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1112 extra->flags = PCRE_EXTRA_STUDY_DATA;
1113
1114 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1115 extra->study_data = psd;
1116
1117 if (fread(psd, 1, true_study_size, f) != true_study_size)
1118 {
1119 FAIL_READ:
1120 fprintf(outfile, "Failed to read data from %s\n", p);
1121 if (extra != NULL) new_free(extra);
1122 if (re != NULL) new_free(re);
1123 fclose(f);
1124 continue;
1125 }
1126 fprintf(outfile, "Study data loaded from %s\n", p);
1127 do_study = 1; /* To get the data output if requested */
1128 }
1129 else fprintf(outfile, "No study data\n");
1130
1131 fclose(f);
1132 goto SHOW_INFO;
1133 }
1134
1135 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1136 the pattern; if is isn't complete, read more. */
1137
1138 delimiter = *p++;
1139
1140 if (isalnum(delimiter) || delimiter == '\\')
1141 {
1142 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143 goto SKIP_DATA;
1144 }
1145
1146 pp = p;
1147 poffset = p - buffer;
1148
1149 for(;;)
1150 {
1151 while (*pp != 0)
1152 {
1153 if (*pp == '\\' && pp[1] != 0) pp++;
1154 else if (*pp == delimiter) break;
1155 pp++;
1156 }
1157 if (*pp != 0) break;
1158 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1159 {
1160 fprintf(outfile, "** Unexpected EOF\n");
1161 done = 1;
1162 goto CONTINUE;
1163 }
1164 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1165 }
1166
1167 /* The buffer may have moved while being extended; reset the start of data
1168 pointer to the correct relative point in the buffer. */
1169
1170 p = buffer + poffset;
1171
1172 /* If the first character after the delimiter is backslash, make
1173 the pattern end with backslash. This is purely to provide a way
1174 of testing for the error message when a pattern ends with backslash. */
1175
1176 if (pp[1] == '\\') *pp++ = '\\';
1177
1178 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1179 for callouts. */
1180
1181 *pp++ = 0;
1182 strcpy((char *)pbuffer, (char *)p);
1183
1184 /* Look for options after final delimiter */
1185
1186 options = 0;
1187 study_options = 0;
1188 log_store = showstore; /* default from command line */
1189
1190 while (*pp != 0)
1191 {
1192 switch (*pp++)
1193 {
1194 case 'f': options |= PCRE_FIRSTLINE; break;
1195 case 'g': do_g = 1; break;
1196 case 'i': options |= PCRE_CASELESS; break;
1197 case 'm': options |= PCRE_MULTILINE; break;
1198 case 's': options |= PCRE_DOTALL; break;
1199 case 'x': options |= PCRE_EXTENDED; break;
1200
1201 case '+': do_showrest = 1; break;
1202 case 'A': options |= PCRE_ANCHORED; break;
1203 case 'B': do_debug = 1; break;
1204 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205 case 'D': do_debug = do_showinfo = 1; break;
1206 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207 case 'F': do_flip = 1; break;
1208 case 'G': do_G = 1; break;
1209 case 'I': do_showinfo = 1; break;
1210 case 'J': options |= PCRE_DUPNAMES; break;
1211 case 'M': log_store = 1; break;
1212 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1213
1214 #if !defined NOPOSIX
1215 case 'P': do_posix = 1; break;
1216 #endif
1217
1218 case 'S': do_study = 1; break;
1219 case 'U': options |= PCRE_UNGREEDY; break;
1220 case 'X': options |= PCRE_EXTRA; break;
1221 case 'Z': debug_lengths = 0; break;
1222 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224
1225 case 'L':
1226 ppp = pp;
1227 /* The '\r' test here is so that it works on Windows. */
1228 /* The '0' test is just in case this is an unterminated line. */
1229 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1230 *ppp = 0;
1231 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1232 {
1233 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1234 goto SKIP_DATA;
1235 }
1236 locale_set = 1;
1237 tables = pcre_maketables();
1238 pp = ppp;
1239 break;
1240
1241 case '>':
1242 to_file = pp;
1243 while (*pp != 0) pp++;
1244 while (isspace(pp[-1])) pp--;
1245 *pp = 0;
1246 break;
1247
1248 case '<':
1249 {
1250 int x = check_newline(pp, outfile);
1251 if (x == 0) goto SKIP_DATA;
1252 options |= x;
1253 while (*pp++ != '>');
1254 }
1255 break;
1256
1257 case '\r': /* So that it works in Windows */
1258 case '\n':
1259 case ' ':
1260 break;
1261
1262 default:
1263 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1264 goto SKIP_DATA;
1265 }
1266 }
1267
1268 /* Handle compiling via the POSIX interface, which doesn't support the
1269 timing, showing, or debugging options, nor the ability to pass over
1270 local character tables. */
1271
1272 #if !defined NOPOSIX
1273 if (posix || do_posix)
1274 {
1275 int rc;
1276 int cflags = 0;
1277
1278 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1279 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1280 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1281 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1282 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1283
1284 rc = regcomp(&preg, (char *)p, cflags);
1285
1286 /* Compilation failed; go back for another re, skipping to blank line
1287 if non-interactive. */
1288
1289 if (rc != 0)
1290 {
1291 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1292 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1293 goto SKIP_DATA;
1294 }
1295 }
1296
1297 /* Handle compiling via the native interface */
1298
1299 else
1300 #endif /* !defined NOPOSIX */
1301
1302 {
1303 if (timeit > 0)
1304 {
1305 register int i;
1306 clock_t time_taken;
1307 clock_t start_time = clock();
1308 for (i = 0; i < timeit; i++)
1309 {
1310 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1311 if (re != NULL) free(re);
1312 }
1313 time_taken = clock() - start_time;
1314 fprintf(outfile, "Compile time %.4f milliseconds\n",
1315 (((double)time_taken * 1000.0) / (double)timeit) /
1316 (double)CLOCKS_PER_SEC);
1317 }
1318
1319 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1320
1321 /* Compilation failed; go back for another re, skipping to blank line
1322 if non-interactive. */
1323
1324 if (re == NULL)
1325 {
1326 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1327 SKIP_DATA:
1328 if (infile != stdin)
1329 {
1330 for (;;)
1331 {
1332 if (extend_inputline(infile, buffer, NULL) == NULL)
1333 {
1334 done = 1;
1335 goto CONTINUE;
1336 }
1337 len = (int)strlen((char *)buffer);
1338 while (len > 0 && isspace(buffer[len-1])) len--;
1339 if (len == 0) break;
1340 }
1341 fprintf(outfile, "\n");
1342 }
1343 goto CONTINUE;
1344 }
1345
1346 /* Compilation succeeded; print data if required. There are now two
1347 info-returning functions. The old one has a limited interface and
1348 returns only limited data. Check that it agrees with the newer one. */
1349
1350 if (log_store)
1351 fprintf(outfile, "Memory allocation (code space): %d\n",
1352 (int)(gotten_store -
1353 sizeof(real_pcre) -
1354 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1355
1356 /* Extract the size for possible writing before possibly flipping it,
1357 and remember the store that was got. */
1358
1359 true_size = ((real_pcre *)re)->size;
1360 regex_gotten_store = gotten_store;
1361
1362 /* If /S was present, study the regexp to generate additional info to
1363 help with the matching. */
1364
1365 if (do_study)
1366 {
1367 if (timeit > 0)
1368 {
1369 register int i;
1370 clock_t time_taken;
1371 clock_t start_time = clock();
1372 for (i = 0; i < timeit; i++)
1373 extra = pcre_study(re, study_options, &error);
1374 time_taken = clock() - start_time;
1375 if (extra != NULL) free(extra);
1376 fprintf(outfile, " Study time %.4f milliseconds\n",
1377 (((double)time_taken * 1000.0) / (double)timeit) /
1378 (double)CLOCKS_PER_SEC);
1379 }
1380 extra = pcre_study(re, study_options, &error);
1381 if (error != NULL)
1382 fprintf(outfile, "Failed to study: %s\n", error);
1383 else if (extra != NULL)
1384 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1385 }
1386
1387 /* If the 'F' option was present, we flip the bytes of all the integer
1388 fields in the regex data block and the study block. This is to make it
1389 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1390 compiled on a different architecture. */
1391
1392 if (do_flip)
1393 {
1394 real_pcre *rre = (real_pcre *)re;
1395 rre->magic_number =
1396 byteflip(rre->magic_number, sizeof(rre->magic_number));
1397 rre->size = byteflip(rre->size, sizeof(rre->size));
1398 rre->options = byteflip(rre->options, sizeof(rre->options));
1399 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1400 rre->top_bracket =
1401 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1402 rre->top_backref =
1403 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1404 rre->first_byte =
1405 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1406 rre->req_byte =
1407 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1408 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1409 sizeof(rre->name_table_offset));
1410 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1411 sizeof(rre->name_entry_size));
1412 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1413 sizeof(rre->name_count));
1414
1415 if (extra != NULL)
1416 {
1417 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1418 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1419 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1420 }
1421 }
1422
1423 /* Extract information from the compiled data if required */
1424
1425 SHOW_INFO:
1426
1427 if (do_debug)
1428 {
1429 fprintf(outfile, "------------------------------------------------------------------\n");
1430 pcre_printint(re, outfile, debug_lengths);
1431 }
1432
1433 if (do_showinfo)
1434 {
1435 unsigned long int get_options, all_options;
1436 #if !defined NOINFOCHECK
1437 int old_first_char, old_options, old_count;
1438 #endif
1439 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1440 hascrorlf;
1441 int nameentrysize, namecount;
1442 const uschar *nametable;
1443
1444 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1445 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1446 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1447 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1448 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1449 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1450 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1451 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1452 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1453 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1454 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1455 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1456
1457 #if !defined NOINFOCHECK
1458 old_count = pcre_info(re, &old_options, &old_first_char);
1459 if (count < 0) fprintf(outfile,
1460 "Error %d from pcre_info()\n", count);
1461 else
1462 {
1463 if (old_count != count) fprintf(outfile,
1464 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1465 old_count);
1466
1467 if (old_first_char != first_char) fprintf(outfile,
1468 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1469 first_char, old_first_char);
1470
1471 if (old_options != (int)get_options) fprintf(outfile,
1472 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1473 get_options, old_options);
1474 }
1475 #endif
1476
1477 if (size != regex_gotten_store) fprintf(outfile,
1478 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1479 (int)size, (int)regex_gotten_store);
1480
1481 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1482 if (backrefmax > 0)
1483 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1484
1485 if (namecount > 0)
1486 {
1487 fprintf(outfile, "Named capturing subpatterns:\n");
1488 while (namecount-- > 0)
1489 {
1490 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1491 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1492 GET2(nametable, 0));
1493 nametable += nameentrysize;
1494 }
1495 }
1496
1497 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1498 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1499
1500 all_options = ((real_pcre *)re)->options;
1501 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1502
1503 if (get_options == 0) fprintf(outfile, "No options\n");
1504 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1505 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1506 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1507 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1508 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1509 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1510 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1511 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1512 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1513 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1514 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1515 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1516 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1517 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1518 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1519 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1520
1521 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1522
1523 switch (get_options & PCRE_NEWLINE_BITS)
1524 {
1525 case PCRE_NEWLINE_CR:
1526 fprintf(outfile, "Forced newline sequence: CR\n");
1527 break;
1528
1529 case PCRE_NEWLINE_LF:
1530 fprintf(outfile, "Forced newline sequence: LF\n");
1531 break;
1532
1533 case PCRE_NEWLINE_CRLF:
1534 fprintf(outfile, "Forced newline sequence: CRLF\n");
1535 break;
1536
1537 case PCRE_NEWLINE_ANYCRLF:
1538 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1539 break;
1540
1541 case PCRE_NEWLINE_ANY:
1542 fprintf(outfile, "Forced newline sequence: ANY\n");
1543 break;
1544
1545 default:
1546 break;
1547 }
1548
1549 if (first_char == -1)
1550 {
1551 fprintf(outfile, "First char at start or follows newline\n");
1552 }
1553 else if (first_char < 0)
1554 {
1555 fprintf(outfile, "No first char\n");
1556 }
1557 else
1558 {
1559 int ch = first_char & 255;
1560 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1561 "" : " (caseless)";
1562 if (PRINTHEX(ch))
1563 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1564 else
1565 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1566 }
1567
1568 if (need_char < 0)
1569 {
1570 fprintf(outfile, "No need char\n");
1571 }
1572 else
1573 {
1574 int ch = need_char & 255;
1575 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1576 "" : " (caseless)";
1577 if (PRINTHEX(ch))
1578 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1579 else
1580 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1581 }
1582
1583 /* Don't output study size; at present it is in any case a fixed
1584 value, but it varies, depending on the computer architecture, and
1585 so messes up the test suite. (And with the /F option, it might be
1586 flipped.) */
1587
1588 if (do_study)
1589 {
1590 if (extra == NULL)
1591 fprintf(outfile, "Study returned NULL\n");
1592 else
1593 {
1594 uschar *start_bits = NULL;
1595 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1596
1597 if (start_bits == NULL)
1598 fprintf(outfile, "No starting byte set\n");
1599 else
1600 {
1601 int i;
1602 int c = 24;
1603 fprintf(outfile, "Starting byte set: ");
1604 for (i = 0; i < 256; i++)
1605 {
1606 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1607 {
1608 if (c > 75)
1609 {
1610 fprintf(outfile, "\n ");
1611 c = 2;
1612 }
1613 if (PRINTHEX(i) && i != ' ')
1614 {
1615 fprintf(outfile, "%c ", i);
1616 c += 2;
1617 }
1618 else
1619 {
1620 fprintf(outfile, "\\x%02x ", i);
1621 c += 5;
1622 }
1623 }
1624 }
1625 fprintf(outfile, "\n");
1626 }
1627 }
1628 }
1629 }
1630
1631 /* If the '>' option was present, we write out the regex to a file, and
1632 that is all. The first 8 bytes of the file are the regex length and then
1633 the study length, in big-endian order. */
1634
1635 if (to_file != NULL)
1636 {
1637 FILE *f = fopen((char *)to_file, "wb");
1638 if (f == NULL)
1639 {
1640 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1641 }
1642 else
1643 {
1644 uschar sbuf[8];
1645 sbuf[0] = (uschar)((true_size >> 24) & 255);
1646 sbuf[1] = (uschar)((true_size >> 16) & 255);
1647 sbuf[2] = (uschar)((true_size >> 8) & 255);
1648 sbuf[3] = (uschar)((true_size) & 255);
1649
1650 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1651 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1652 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1653 sbuf[7] = (uschar)((true_study_size) & 255);
1654
1655 if (fwrite(sbuf, 1, 8, f) < 8 ||
1656 fwrite(re, 1, true_size, f) < true_size)
1657 {
1658 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1659 }
1660 else
1661 {
1662 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1663 if (extra != NULL)
1664 {
1665 if (fwrite(extra->study_data, 1, true_study_size, f) <
1666 true_study_size)
1667 {
1668 fprintf(outfile, "Write error on %s: %s\n", to_file,
1669 strerror(errno));
1670 }
1671 else fprintf(outfile, "Study data written to %s\n", to_file);
1672
1673 }
1674 }
1675 fclose(f);
1676 }
1677
1678 new_free(re);
1679 if (extra != NULL) new_free(extra);
1680 if (tables != NULL) new_free((void *)tables);
1681 continue; /* With next regex */
1682 }
1683 } /* End of non-POSIX compile */
1684
1685 /* Read data lines and test them */
1686
1687 for (;;)
1688 {
1689 uschar *q;
1690 uschar *bptr;
1691 int *use_offsets = offsets;
1692 int use_size_offsets = size_offsets;
1693 int callout_data = 0;
1694 int callout_data_set = 0;
1695 int count, c;
1696 int copystrings = 0;
1697 int find_match_limit = 0;
1698 int getstrings = 0;
1699 int getlist = 0;
1700 int gmatched = 0;
1701 int start_offset = 0;
1702 int g_notempty = 0;
1703 int use_dfa = 0;
1704
1705 options = 0;
1706
1707 *copynames = 0;
1708 *getnames = 0;
1709
1710 copynamesptr = copynames;
1711 getnamesptr = getnames;
1712
1713 pcre_callout = callout;
1714 first_callout = 1;
1715 callout_extra = 0;
1716 callout_count = 0;
1717 callout_fail_count = 999999;
1718 callout_fail_id = -1;
1719 show_malloc = 0;
1720
1721 if (extra != NULL) extra->flags &=
1722 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1723
1724 len = 0;
1725 for (;;)
1726 {
1727 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1728 {
1729 if (len > 0) break;
1730 done = 1;
1731 goto CONTINUE;
1732 }
1733 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1734 len = (int)strlen((char *)buffer);
1735 if (buffer[len-1] == '\n') break;
1736 }
1737
1738 while (len > 0 && isspace(buffer[len-1])) len--;
1739 buffer[len] = 0;
1740 if (len == 0) break;
1741
1742 p = buffer;
1743 while (isspace(*p)) p++;
1744
1745 bptr = q = dbuffer;
1746 while ((c = *p++) != 0)
1747 {
1748 int i = 0;
1749 int n = 0;
1750
1751 if (c == '\\') switch ((c = *p++))
1752 {
1753 case 'a': c = 7; break;
1754 case 'b': c = '\b'; break;
1755 case 'e': c = 27; break;
1756 case 'f': c = '\f'; break;
1757 case 'n': c = '\n'; break;
1758 case 'r': c = '\r'; break;
1759 case 't': c = '\t'; break;
1760 case 'v': c = '\v'; break;
1761
1762 case '0': case '1': case '2': case '3':
1763 case '4': case '5': case '6': case '7':
1764 c -= '0';
1765 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1766 c = c * 8 + *p++ - '0';
1767
1768 #if !defined NOUTF8
1769 if (use_utf8 && c > 255)
1770 {
1771 unsigned char buff8[8];
1772 int ii, utn;
1773 utn = ord2utf8(c, buff8);
1774 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1775 c = buff8[ii]; /* Last byte */
1776 }
1777 #endif
1778 break;
1779
1780 case 'x':
1781
1782 /* Handle \x{..} specially - new Perl thing for utf8 */
1783
1784 #if !defined NOUTF8
1785 if (*p == '{')
1786 {
1787 unsigned char *pt = p;
1788 c = 0;
1789 while (isxdigit(*(++pt)))
1790 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1791 if (*pt == '}')
1792 {
1793 unsigned char buff8[8];
1794 int ii, utn;
1795 utn = ord2utf8(c, buff8);
1796 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1797 c = buff8[ii]; /* Last byte */
1798 p = pt + 1;
1799 break;
1800 }
1801 /* Not correct form; fall through */
1802 }
1803 #endif
1804
1805 /* Ordinary \x */
1806
1807 c = 0;
1808 while (i++ < 2 && isxdigit(*p))
1809 {
1810 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1811 p++;
1812 }
1813 break;
1814
1815 case 0: /* \ followed by EOF allows for an empty line */
1816 p--;
1817 continue;
1818
1819 case '>':
1820 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1821 continue;
1822
1823 case 'A': /* Option setting */
1824 options |= PCRE_ANCHORED;
1825 continue;
1826
1827 case 'B':
1828 options |= PCRE_NOTBOL;
1829 continue;
1830
1831 case 'C':
1832 if (isdigit(*p)) /* Set copy string */
1833 {
1834 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1835 copystrings |= 1 << n;
1836 }
1837 else if (isalnum(*p))
1838 {
1839 uschar *npp = copynamesptr;
1840 while (isalnum(*p)) *npp++ = *p++;
1841 *npp++ = 0;
1842 *npp = 0;
1843 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1844 if (n < 0)
1845 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1846 copynamesptr = npp;
1847 }
1848 else if (*p == '+')
1849 {
1850 callout_extra = 1;
1851 p++;
1852 }
1853 else if (*p == '-')
1854 {
1855 pcre_callout = NULL;
1856 p++;
1857 }
1858 else if (*p == '!')
1859 {
1860 callout_fail_id = 0;
1861 p++;
1862 while(isdigit(*p))
1863 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1864 callout_fail_count = 0;
1865 if (*p == '!')
1866 {
1867 p++;
1868 while(isdigit(*p))
1869 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1870 }
1871 }
1872 else if (*p == '*')
1873 {
1874 int sign = 1;
1875 callout_data = 0;
1876 if (*(++p) == '-') { sign = -1; p++; }
1877 while(isdigit(*p))
1878 callout_data = callout_data * 10 + *p++ - '0';
1879 callout_data *= sign;
1880 callout_data_set = 1;
1881 }
1882 continue;
1883
1884 #if !defined NODFA
1885 case 'D':
1886 #if !defined NOPOSIX
1887 if (posix || do_posix)
1888 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1889 else
1890 #endif
1891 use_dfa = 1;
1892 continue;
1893
1894 case 'F':
1895 options |= PCRE_DFA_SHORTEST;
1896 continue;
1897 #endif
1898
1899 case 'G':
1900 if (isdigit(*p))
1901 {
1902 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1903 getstrings |= 1 << n;
1904 }
1905 else if (isalnum(*p))
1906 {
1907 uschar *npp = getnamesptr;
1908 while (isalnum(*p)) *npp++ = *p++;
1909 *npp++ = 0;
1910 *npp = 0;
1911 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1912 if (n < 0)
1913 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1914 getnamesptr = npp;
1915 }
1916 continue;
1917
1918 case 'L':
1919 getlist = 1;
1920 continue;
1921
1922 case 'M':
1923 find_match_limit = 1;
1924 continue;
1925
1926 case 'N':
1927 options |= PCRE_NOTEMPTY;
1928 continue;
1929
1930 case 'O':
1931 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1932 if (n > size_offsets_max)
1933 {
1934 size_offsets_max = n;
1935 free(offsets);
1936 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1937 if (offsets == NULL)
1938 {
1939 printf("** Failed to get %d bytes of memory for offsets vector\n",
1940 (int)(size_offsets_max * sizeof(int)));
1941 yield = 1;
1942 goto EXIT;
1943 }
1944 }
1945 use_size_offsets = n;
1946 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1947 continue;
1948
1949 case 'P':
1950 options |= PCRE_PARTIAL;
1951 continue;
1952
1953 case 'Q':
1954 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1955 if (extra == NULL)
1956 {
1957 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1958 extra->flags = 0;
1959 }
1960 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1961 extra->match_limit_recursion = n;
1962 continue;
1963
1964 case 'q':
1965 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1966 if (extra == NULL)
1967 {
1968 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1969 extra->flags = 0;
1970 }
1971 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1972 extra->match_limit = n;
1973 continue;
1974
1975 #if !defined NODFA
1976 case 'R':
1977 options |= PCRE_DFA_RESTART;
1978 continue;
1979 #endif
1980
1981 case 'S':
1982 show_malloc = 1;
1983 continue;
1984
1985 case 'Z':
1986 options |= PCRE_NOTEOL;
1987 continue;
1988
1989 case '?':
1990 options |= PCRE_NO_UTF8_CHECK;
1991 continue;
1992
1993 case '<':
1994 {
1995 int x = check_newline(p, outfile);
1996 if (x == 0) goto NEXT_DATA;
1997 options |= x;
1998 while (*p++ != '>');
1999 }
2000 continue;
2001 }
2002 *q++ = c;
2003 }
2004 *q = 0;
2005 len = q - dbuffer;
2006
2007 if ((all_use_dfa || use_dfa) && find_match_limit)
2008 {
2009 printf("**Match limit not relevant for DFA matching: ignored\n");
2010 find_match_limit = 0;
2011 }
2012
2013 /* Handle matching via the POSIX interface, which does not
2014 support timing or playing with the match limit or callout data. */
2015
2016 #if !defined NOPOSIX
2017 if (posix || do_posix)
2018 {
2019 int rc;
2020 int eflags = 0;
2021 regmatch_t *pmatch = NULL;
2022 if (use_size_offsets > 0)
2023 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2024 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2025 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2026
2027 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2028
2029 if (rc != 0)
2030 {
2031 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2032 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2033 }
2034 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2035 != 0)
2036 {
2037 fprintf(outfile, "Matched with REG_NOSUB\n");
2038 }
2039 else
2040 {
2041 size_t i;
2042 for (i = 0; i < (size_t)use_size_offsets; i++)
2043 {
2044 if (pmatch[i].rm_so >= 0)
2045 {
2046 fprintf(outfile, "%2d: ", (int)i);
2047 (void)pchars(dbuffer + pmatch[i].rm_so,
2048 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2049 fprintf(outfile, "\n");
2050 if (i == 0 && do_showrest)
2051 {
2052 fprintf(outfile, " 0+ ");
2053 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2054 outfile);
2055 fprintf(outfile, "\n");
2056 }
2057 }
2058 }
2059 }
2060 free(pmatch);
2061 }
2062
2063 /* Handle matching via the native interface - repeats for /g and /G */
2064
2065 else
2066 #endif /* !defined NOPOSIX */
2067
2068 for (;; gmatched++) /* Loop for /g or /G */
2069 {
2070 if (timeitm > 0)
2071 {
2072 register int i;
2073 clock_t time_taken;
2074 clock_t start_time = clock();
2075
2076 #if !defined NODFA
2077 if (all_use_dfa || use_dfa)
2078 {
2079 int workspace[1000];
2080 for (i = 0; i < timeitm; i++)
2081 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2082 options | g_notempty, use_offsets, use_size_offsets, workspace,
2083 sizeof(workspace)/sizeof(int));
2084 }
2085 else
2086 #endif
2087
2088 for (i = 0; i < timeitm; i++)
2089 count = pcre_exec(re, extra, (char *)bptr, len,
2090 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2091
2092 time_taken = clock() - start_time;
2093 fprintf(outfile, "Execute time %.4f milliseconds\n",
2094 (((double)time_taken * 1000.0) / (double)timeitm) /
2095 (double)CLOCKS_PER_SEC);
2096 }
2097
2098 /* If find_match_limit is set, we want to do repeated matches with
2099 varying limits in order to find the minimum value for the match limit and
2100 for the recursion limit. */
2101
2102 if (find_match_limit)
2103 {
2104 if (extra == NULL)
2105 {
2106 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2107 extra->flags = 0;
2108 }
2109
2110 (void)check_match_limit(re, extra, bptr, len, start_offset,
2111 options|g_notempty, use_offsets, use_size_offsets,
2112 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2113 PCRE_ERROR_MATCHLIMIT, "match()");
2114
2115 count = check_match_limit(re, extra, bptr, len, start_offset,
2116 options|g_notempty, use_offsets, use_size_offsets,
2117 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2118 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2119 }
2120
2121 /* If callout_data is set, use the interface with additional data */
2122
2123 else if (callout_data_set)
2124 {
2125 if (extra == NULL)
2126 {
2127 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2128 extra->flags = 0;
2129 }
2130 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2131 extra->callout_data = &callout_data;
2132 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2133 options | g_notempty, use_offsets, use_size_offsets);
2134 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2135 }
2136
2137 /* The normal case is just to do the match once, with the default
2138 value of match_limit. */
2139
2140 #if !defined NODFA
2141 else if (all_use_dfa || use_dfa)
2142 {
2143 int workspace[1000];
2144 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2145 options | g_notempty, use_offsets, use_size_offsets, workspace,
2146 sizeof(workspace)/sizeof(int));
2147 if (count == 0)
2148 {
2149 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2150 count = use_size_offsets/2;
2151 }
2152 }
2153 #endif
2154
2155 else
2156 {
2157 count = pcre_exec(re, extra, (char *)bptr, len,
2158 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2159 if (count == 0)
2160 {
2161 fprintf(outfile, "Matched, but too many substrings\n");
2162 count = use_size_offsets/3;
2163 }
2164 }
2165
2166 /* Matched */
2167
2168 if (count >= 0)
2169 {
2170 int i, maxcount;
2171
2172 #if !defined NODFA
2173 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2174 #endif
2175 maxcount = use_size_offsets/3;
2176
2177 /* This is a check against a lunatic return value. */
2178
2179 if (count > maxcount)
2180 {
2181 fprintf(outfile,
2182 "** PCRE error: returned count %d is too big for offset size %d\n",
2183 count, use_size_offsets);
2184 count = use_size_offsets/3;
2185 if (do_g || do_G)
2186 {
2187 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2188 do_g = do_G = FALSE; /* Break g/G loop */
2189 }
2190 }
2191
2192 for (i = 0; i < count * 2; i += 2)
2193 {
2194 if (use_offsets[i] < 0)
2195 fprintf(outfile, "%2d: <unset>\n", i/2);
2196 else
2197 {
2198 fprintf(outfile, "%2d: ", i/2);
2199 (void)pchars(bptr + use_offsets[i],
2200 use_offsets[i+1] - use_offsets[i], outfile);
2201 fprintf(outfile, "\n");
2202 if (i == 0)
2203 {
2204 if (do_showrest)
2205 {
2206 fprintf(outfile, " 0+ ");
2207 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2208 outfile);
2209 fprintf(outfile, "\n");
2210 }
2211 }
2212 }
2213 }
2214
2215 for (i = 0; i < 32; i++)
2216 {
2217 if ((copystrings & (1 << i)) != 0)
2218 {
2219 char copybuffer[256];
2220 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2221 i, copybuffer, sizeof(copybuffer));
2222 if (rc < 0)
2223 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2224 else
2225 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2226 }
2227 }
2228
2229 for (copynamesptr = copynames;
2230 *copynamesptr != 0;
2231 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2232 {
2233 char copybuffer[256];
2234 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2235 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2236 if (rc < 0)
2237 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2238 else
2239 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2240 }
2241
2242 for (i = 0; i < 32; i++)
2243 {
2244 if ((getstrings & (1 << i)) != 0)
2245 {
2246 const char *substring;
2247 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2248 i, &substring);
2249 if (rc < 0)
2250 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2251 else
2252 {
2253 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2254 pcre_free_substring(substring);
2255 }
2256 }
2257 }
2258
2259 for (getnamesptr = getnames;
2260 *getnamesptr != 0;
2261 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2262 {
2263 const char *substring;
2264 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2265 count, (char *)getnamesptr, &substring);
2266 if (rc < 0)
2267 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2268 else
2269 {
2270 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2271 pcre_free_substring(substring);
2272 }
2273 }
2274
2275 if (getlist)
2276 {
2277 const char **stringlist;
2278 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2279 &stringlist);
2280 if (rc < 0)
2281 fprintf(outfile, "get substring list failed %d\n", rc);
2282 else
2283 {
2284 for (i = 0; i < count; i++)
2285 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2286 if (stringlist[i] != NULL)
2287 fprintf(outfile, "string list not terminated by NULL\n");
2288 /* free((void *)stringlist); */
2289 pcre_free_substring_list(stringlist);
2290 }
2291 }
2292 }
2293
2294 /* There was a partial match */
2295
2296 else if (count == PCRE_ERROR_PARTIAL)
2297 {
2298 fprintf(outfile, "Partial match");
2299 #if !defined NODFA
2300 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2301 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2302 bptr + use_offsets[0]);
2303 #endif
2304 fprintf(outfile, "\n");
2305 break; /* Out of the /g loop */
2306 }
2307
2308 /* Failed to match. If this is a /g or /G loop and we previously set
2309 g_notempty after a null match, this is not necessarily the end. We want
2310 to advance the start offset, and continue. We won't be at the end of the
2311 string - that was checked before setting g_notempty.
2312
2313 Complication arises in the case when the newline option is "any" or
2314 "anycrlf". If the previous match was at the end of a line terminated by
2315 CRLF, an advance of one character just passes the \r, whereas we should
2316 prefer the longer newline sequence, as does the code in pcre_exec().
2317 Fudge the offset value to achieve this.
2318
2319 Otherwise, in the case of UTF-8 matching, the advance must be one
2320 character, not one byte. */
2321
2322 else
2323 {
2324 if (g_notempty != 0)
2325 {
2326 int onechar = 1;
2327 unsigned int obits = ((real_pcre *)re)->options;
2328 use_offsets[0] = start_offset;
2329 if ((obits & PCRE_NEWLINE_BITS) == 0)
2330 {
2331 int d;
2332 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2333 obits = (d == '\r')? PCRE_NEWLINE_CR :
2334 (d == '\n')? PCRE_NEWLINE_LF :
2335 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2336 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2337 (d == -1)? PCRE_NEWLINE_ANY : 0;
2338 }
2339 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2340 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2341 &&
2342 start_offset < len - 1 &&
2343 bptr[start_offset] == '\r' &&
2344 bptr[start_offset+1] == '\n')
2345 onechar++;
2346 else if (use_utf8)
2347 {
2348 while (start_offset + onechar < len)
2349 {
2350 int tb = bptr[start_offset+onechar];
2351 if (tb <= 127) break;
2352 tb &= 0xc0;
2353 if (tb != 0 && tb != 0xc0) onechar++;
2354 }
2355 }
2356 use_offsets[1] = start_offset + onechar;
2357 }
2358 else
2359 {
2360 if (count == PCRE_ERROR_NOMATCH)
2361 {
2362 if (gmatched == 0) fprintf(outfile, "No match\n");
2363 }
2364 else fprintf(outfile, "Error %d\n", count);
2365 break; /* Out of the /g loop */
2366 }
2367 }
2368
2369 /* If not /g or /G we are done */
2370
2371 if (!do_g && !do_G) break;
2372
2373 /* If we have matched an empty string, first check to see if we are at
2374 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2375 what Perl's /g options does. This turns out to be rather cunning. First
2376 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2377 same point. If this fails (picked up above) we advance to the next
2378 character. */
2379
2380 g_notempty = 0;
2381
2382 if (use_offsets[0] == use_offsets[1])
2383 {
2384 if (use_offsets[0] == len) break;
2385 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2386 }
2387
2388 /* For /g, update the start offset, leaving the rest alone */
2389
2390 if (do_g) start_offset = use_offsets[1];
2391
2392 /* For /G, update the pointer and length */
2393
2394 else
2395 {
2396 bptr += use_offsets[1];
2397 len -= use_offsets[1];
2398 }
2399 } /* End of loop for /g and /G */
2400
2401 NEXT_DATA: continue;
2402 } /* End of loop for data lines */
2403
2404 CONTINUE:
2405
2406 #if !defined NOPOSIX
2407 if (posix || do_posix) regfree(&preg);
2408 #endif
2409
2410 if (re != NULL) new_free(re);
2411 if (extra != NULL) new_free(extra);
2412 if (tables != NULL)
2413 {
2414 new_free((void *)tables);
2415 setlocale(LC_CTYPE, "C");
2416 locale_set = 0;
2417 }
2418 }
2419
2420 if (infile == stdin) fprintf(outfile, "\n");
2421
2422 EXIT:
2423
2424 if (infile != NULL && infile != stdin) fclose(infile);
2425 if (outfile != NULL && outfile != stdout) fclose(outfile);
2426
2427 free(buffer);
2428 free(dbuffer);
2429 free(pbuffer);
2430 free(offsets);
2431
2432 return yield;
2433 }
2434
2435 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12