/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 169 - (show annotations) (download)
Mon Jun 4 10:49:21 2007 UTC (7 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 68640 byte(s)
Add PCRE_INFO_OKPARTIAL and PCRE_INFO_JCHANGED.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47
48 /* A number of things vary for Windows builds. Originally, pcretest opened its
49 input and output without "b"; then I was told that "b" was needed in some
50 environments, so it was added for release 5.0 to both the input and output. (It
51 makes no difference on Unix-like systems.) Later I was told that it is wrong
52 for the input on Windows. I've now abstracted the modes into two macros that
53 are set here, to make it easier to fiddle with them, and removed "b" from the
54 input mode under Windows. */
55
56 #if defined(_WIN32) || defined(WIN32)
57 #include <io.h> /* For _setmode() */
58 #include <fcntl.h> /* For _O_BINARY */
59 #define INPUT_MODE "r"
60 #define OUTPUT_MODE "wb"
61
62 #else
63 #include <sys/time.h> /* These two includes are needed */
64 #include <sys/resource.h> /* for setrlimit(). */
65 #define INPUT_MODE "rb"
66 #define OUTPUT_MODE "wb"
67 #endif
68
69
70 /* We have to include pcre_internal.h because we need the internal info for
71 displaying the results of pcre_study() and we also need to know about the
72 internal macros, structures, and other internal data values; pcretest has
73 "inside information" compared to a program that strictly follows the PCRE API.
74
75 Although pcre_internal.h does itself include pcre.h, we explicitly include it
76 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77 appropriately for an application, not for building PCRE. */
78
79 #include "pcre.h"
80 #include "pcre_internal.h"
81
82 /* We need access to the data tables that PCRE uses. So as not to have to keep
83 two copies, we include the source file here, changing the names of the external
84 symbols to prevent clashes. */
85
86 #define _pcre_utf8_table1 utf8_table1
87 #define _pcre_utf8_table1_size utf8_table1_size
88 #define _pcre_utf8_table2 utf8_table2
89 #define _pcre_utf8_table3 utf8_table3
90 #define _pcre_utf8_table4 utf8_table4
91 #define _pcre_utt utt
92 #define _pcre_utt_size utt_size
93 #define _pcre_OP_lengths OP_lengths
94
95 #include "pcre_tables.c"
96
97 /* We also need the pcre_printint() function for printing out compiled
98 patterns. This function is in a separate file so that it can be included in
99 pcre_compile.c when that module is compiled with debugging enabled.
100
101 The definition of the macro PRINTABLE, which determines whether to print an
102 output character as-is or as a hex value when showing compiled patterns, is
103 contained in this file. We uses it here also, in cases when the locale has not
104 been explicitly changed, so as to get consistent output from systems that
105 differ in their output from isprint() even in the "C" locale. */
106
107 #include "pcre_printint.src"
108
109 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110
111
112 /* It is possible to compile this test program without including support for
113 testing the POSIX interface, though this is not available via the standard
114 Makefile. */
115
116 #if !defined NOPOSIX
117 #include "pcreposix.h"
118 #endif
119
120 /* It is also possible, for the benefit of the version currently imported into
121 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122 interface to the DFA matcher (NODFA), and without the doublecheck of the old
123 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124 UTF8 support if PCRE is built without it. */
125
126 #ifndef SUPPORT_UTF8
127 #ifndef NOUTF8
128 #define NOUTF8
129 #endif
130 #endif
131
132
133 /* Other parameters */
134
135 #ifndef CLOCKS_PER_SEC
136 #ifdef CLK_TCK
137 #define CLOCKS_PER_SEC CLK_TCK
138 #else
139 #define CLOCKS_PER_SEC 100
140 #endif
141 #endif
142
143 /* This is the default loop count for timing. */
144
145 #define LOOPREPEAT 500000
146
147 /* Static variables */
148
149 static FILE *outfile;
150 static int log_store = 0;
151 static int callout_count;
152 static int callout_extra;
153 static int callout_fail_count;
154 static int callout_fail_id;
155 static int first_callout;
156 static int locale_set = 0;
157 static int show_malloc;
158 static int use_utf8;
159 static size_t gotten_store;
160
161 /* The buffers grow automatically if very long input lines are encountered. */
162
163 static int buffer_size = 50000;
164 static uschar *buffer = NULL;
165 static uschar *dbuffer = NULL;
166 static uschar *pbuffer = NULL;
167
168
169
170 /*************************************************
171 * Read or extend an input line *
172 *************************************************/
173
174 /* Input lines are read into buffer, but both patterns and data lines can be
175 continued over multiple input lines. In addition, if the buffer fills up, we
176 want to automatically expand it so as to be able to handle extremely large
177 lines that are needed for certain stress tests. When the input buffer is
178 expanded, the other two buffers must also be expanded likewise, and the
179 contents of pbuffer, which are a copy of the input for callouts, must be
180 preserved (for when expansion happens for a data line). This is not the most
181 optimal way of handling this, but hey, this is just a test program!
182
183 Arguments:
184 f the file to read
185 start where in buffer to start (this *must* be within buffer)
186
187 Returns: pointer to the start of new data
188 could be a copy of start, or could be moved
189 NULL if no data read and EOF reached
190 */
191
192 static uschar *
193 extend_inputline(FILE *f, uschar *start)
194 {
195 uschar *here = start;
196
197 for (;;)
198 {
199 int rlen = buffer_size - (here - buffer);
200
201 if (rlen > 1000)
202 {
203 int dlen;
204 if (fgets((char *)here, rlen, f) == NULL)
205 return (here == start)? NULL : start;
206 dlen = (int)strlen((char *)here);
207 if (dlen > 0 && here[dlen - 1] == '\n') return start;
208 here += dlen;
209 }
210
211 else
212 {
213 int new_buffer_size = 2*buffer_size;
214 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217
218 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219 {
220 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221 exit(1);
222 }
223
224 memcpy(new_buffer, buffer, buffer_size);
225 memcpy(new_pbuffer, pbuffer, buffer_size);
226
227 buffer_size = new_buffer_size;
228
229 start = new_buffer + (start - buffer);
230 here = new_buffer + (here - buffer);
231
232 free(buffer);
233 free(dbuffer);
234 free(pbuffer);
235
236 buffer = new_buffer;
237 dbuffer = new_dbuffer;
238 pbuffer = new_pbuffer;
239 }
240 }
241
242 return NULL; /* Control never gets here */
243 }
244
245
246
247
248
249
250
251 /*************************************************
252 * Read number from string *
253 *************************************************/
254
255 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256 around with conditional compilation, just do the job by hand. It is only used
257 for unpicking arguments, so just keep it simple.
258
259 Arguments:
260 str string to be converted
261 endptr where to put the end pointer
262
263 Returns: the unsigned long
264 */
265
266 static int
267 get_value(unsigned char *str, unsigned char **endptr)
268 {
269 int result = 0;
270 while(*str != 0 && isspace(*str)) str++;
271 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272 *endptr = str;
273 return(result);
274 }
275
276
277
278
279 /*************************************************
280 * Convert UTF-8 string to value *
281 *************************************************/
282
283 /* This function takes one or more bytes that represents a UTF-8 character,
284 and returns the value of the character.
285
286 Argument:
287 utf8bytes a pointer to the byte vector
288 vptr a pointer to an int to receive the value
289
290 Returns: > 0 => the number of bytes consumed
291 -6 to 0 => malformed UTF-8 character at offset = (-return)
292 */
293
294 #if !defined NOUTF8
295
296 static int
297 utf82ord(unsigned char *utf8bytes, int *vptr)
298 {
299 int c = *utf8bytes++;
300 int d = c;
301 int i, j, s;
302
303 for (i = -1; i < 6; i++) /* i is number of additional bytes */
304 {
305 if ((d & 0x80) == 0) break;
306 d <<= 1;
307 }
308
309 if (i == -1) { *vptr = c; return 1; } /* ascii character */
310 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
311
312 /* i now has a value in the range 1-5 */
313
314 s = 6*i;
315 d = (c & utf8_table3[i]) << s;
316
317 for (j = 0; j < i; j++)
318 {
319 c = *utf8bytes++;
320 if ((c & 0xc0) != 0x80) return -(j+1);
321 s -= 6;
322 d |= (c & 0x3f) << s;
323 }
324
325 /* Check that encoding was the correct unique one */
326
327 for (j = 0; j < utf8_table1_size; j++)
328 if (d <= utf8_table1[j]) break;
329 if (j != i) return -(i+1);
330
331 /* Valid value */
332
333 *vptr = d;
334 return i+1;
335 }
336
337 #endif
338
339
340
341 /*************************************************
342 * Convert character value to UTF-8 *
343 *************************************************/
344
345 /* This function takes an integer value in the range 0 - 0x7fffffff
346 and encodes it as a UTF-8 character in 0 to 6 bytes.
347
348 Arguments:
349 cvalue the character value
350 utf8bytes pointer to buffer for result - at least 6 bytes long
351
352 Returns: number of characters placed in the buffer
353 */
354
355 #if !defined NOUTF8
356
357 static int
358 ord2utf8(int cvalue, uschar *utf8bytes)
359 {
360 register int i, j;
361 for (i = 0; i < utf8_table1_size; i++)
362 if (cvalue <= utf8_table1[i]) break;
363 utf8bytes += i;
364 for (j = i; j > 0; j--)
365 {
366 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367 cvalue >>= 6;
368 }
369 *utf8bytes = utf8_table2[i] | cvalue;
370 return i + 1;
371 }
372
373 #endif
374
375
376
377 /*************************************************
378 * Print character string *
379 *************************************************/
380
381 /* Character string printing function. Must handle UTF-8 strings in utf8
382 mode. Yields number of characters printed. If handed a NULL file, just counts
383 chars without printing. */
384
385 static int pchars(unsigned char *p, int length, FILE *f)
386 {
387 int c = 0;
388 int yield = 0;
389
390 while (length-- > 0)
391 {
392 #if !defined NOUTF8
393 if (use_utf8)
394 {
395 int rc = utf82ord(p, &c);
396
397 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
398 {
399 length -= rc - 1;
400 p += rc;
401 if (PRINTHEX(c))
402 {
403 if (f != NULL) fprintf(f, "%c", c);
404 yield++;
405 }
406 else
407 {
408 int n = 4;
409 if (f != NULL) fprintf(f, "\\x{%02x}", c);
410 yield += (n <= 0x000000ff)? 2 :
411 (n <= 0x00000fff)? 3 :
412 (n <= 0x0000ffff)? 4 :
413 (n <= 0x000fffff)? 5 : 6;
414 }
415 continue;
416 }
417 }
418 #endif
419
420 /* Not UTF-8, or malformed UTF-8 */
421
422 c = *p++;
423 if (PRINTHEX(c))
424 {
425 if (f != NULL) fprintf(f, "%c", c);
426 yield++;
427 }
428 else
429 {
430 if (f != NULL) fprintf(f, "\\x%02x", c);
431 yield += 4;
432 }
433 }
434
435 return yield;
436 }
437
438
439
440 /*************************************************
441 * Callout function *
442 *************************************************/
443
444 /* Called from PCRE as a result of the (?C) item. We print out where we are in
445 the match. Yield zero unless more callouts than the fail count, or the callout
446 data is not zero. */
447
448 static int callout(pcre_callout_block *cb)
449 {
450 FILE *f = (first_callout | callout_extra)? outfile : NULL;
451 int i, pre_start, post_start, subject_length;
452
453 if (callout_extra)
454 {
455 fprintf(f, "Callout %d: last capture = %d\n",
456 cb->callout_number, cb->capture_last);
457
458 for (i = 0; i < cb->capture_top * 2; i += 2)
459 {
460 if (cb->offset_vector[i] < 0)
461 fprintf(f, "%2d: <unset>\n", i/2);
462 else
463 {
464 fprintf(f, "%2d: ", i/2);
465 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466 cb->offset_vector[i+1] - cb->offset_vector[i], f);
467 fprintf(f, "\n");
468 }
469 }
470 }
471
472 /* Re-print the subject in canonical form, the first time or if giving full
473 datails. On subsequent calls in the same match, we use pchars just to find the
474 printed lengths of the substrings. */
475
476 if (f != NULL) fprintf(f, "--->");
477
478 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480 cb->current_position - cb->start_match, f);
481
482 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483
484 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485 cb->subject_length - cb->current_position, f);
486
487 if (f != NULL) fprintf(f, "\n");
488
489 /* Always print appropriate indicators, with callout number if not already
490 shown. For automatic callouts, show the pattern offset. */
491
492 if (cb->callout_number == 255)
493 {
494 fprintf(outfile, "%+3d ", cb->pattern_position);
495 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
496 }
497 else
498 {
499 if (callout_extra) fprintf(outfile, " ");
500 else fprintf(outfile, "%3d ", cb->callout_number);
501 }
502
503 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504 fprintf(outfile, "^");
505
506 if (post_start > 0)
507 {
508 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510 }
511
512 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513 fprintf(outfile, " ");
514
515 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516 pbuffer + cb->pattern_position);
517
518 fprintf(outfile, "\n");
519 first_callout = 0;
520
521 if (cb->callout_data != NULL)
522 {
523 int callout_data = *((int *)(cb->callout_data));
524 if (callout_data != 0)
525 {
526 fprintf(outfile, "Callout data = %d\n", callout_data);
527 return callout_data;
528 }
529 }
530
531 return (cb->callout_number != callout_fail_id)? 0 :
532 (++callout_count >= callout_fail_count)? 1 : 0;
533 }
534
535
536 /*************************************************
537 * Local malloc functions *
538 *************************************************/
539
540 /* Alternative malloc function, to test functionality and show the size of the
541 compiled re. */
542
543 static void *new_malloc(size_t size)
544 {
545 void *block = malloc(size);
546 gotten_store = size;
547 if (show_malloc)
548 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
549 return block;
550 }
551
552 static void new_free(void *block)
553 {
554 if (show_malloc)
555 fprintf(outfile, "free %p\n", block);
556 free(block);
557 }
558
559
560 /* For recursion malloc/free, to test stacking calls */
561
562 static void *stack_malloc(size_t size)
563 {
564 void *block = malloc(size);
565 if (show_malloc)
566 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567 return block;
568 }
569
570 static void stack_free(void *block)
571 {
572 if (show_malloc)
573 fprintf(outfile, "stack_free %p\n", block);
574 free(block);
575 }
576
577
578 /*************************************************
579 * Call pcre_fullinfo() *
580 *************************************************/
581
582 /* Get one piece of information from the pcre_fullinfo() function */
583
584 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
585 {
586 int rc;
587 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
588 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
589 }
590
591
592
593 /*************************************************
594 * Byte flipping function *
595 *************************************************/
596
597 static unsigned long int
598 byteflip(unsigned long int value, int n)
599 {
600 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601 return ((value & 0x000000ff) << 24) |
602 ((value & 0x0000ff00) << 8) |
603 ((value & 0x00ff0000) >> 8) |
604 ((value & 0xff000000) >> 24);
605 }
606
607
608
609
610 /*************************************************
611 * Check match or recursion limit *
612 *************************************************/
613
614 static int
615 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616 int start_offset, int options, int *use_offsets, int use_size_offsets,
617 int flag, unsigned long int *limit, int errnumber, const char *msg)
618 {
619 int count;
620 int min = 0;
621 int mid = 64;
622 int max = -1;
623
624 extra->flags |= flag;
625
626 for (;;)
627 {
628 *limit = mid;
629
630 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631 use_offsets, use_size_offsets);
632
633 if (count == errnumber)
634 {
635 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636 min = mid;
637 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638 }
639
640 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641 count == PCRE_ERROR_PARTIAL)
642 {
643 if (mid == min + 1)
644 {
645 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646 break;
647 }
648 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649 max = mid;
650 mid = (min + mid)/2;
651 }
652 else break; /* Some other error */
653 }
654
655 extra->flags &= ~flag;
656 return count;
657 }
658
659
660
661 /*************************************************
662 * Check newline indicator *
663 *************************************************/
664
665 /* This is used both at compile and run-time to check for <xxx> escapes, where
666 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667 no match.
668
669 Arguments:
670 p points after the leading '<'
671 f file for error message
672
673 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
674 */
675
676 static int
677 check_newline(uschar *p, FILE *f)
678 {
679 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684 fprintf(f, "Unknown newline type at: <%s\n", p);
685 return 0;
686 }
687
688
689
690 /*************************************************
691 * Usage function *
692 *************************************************/
693
694 static void
695 usage(void)
696 {
697 printf("Usage: pcretest [options] [<input> [<output>]]\n");
698 printf(" -b show compiled code (bytecode)\n");
699 printf(" -C show PCRE compile-time options and exit\n");
700 printf(" -d debug: show compiled code and information (-b and -i)\n");
701 #if !defined NODFA
702 printf(" -dfa force DFA matching for all subjects\n");
703 #endif
704 printf(" -help show usage information\n");
705 printf(" -i show information about compiled patterns\n"
706 " -m output memory used information\n"
707 " -o <n> set size of offsets vector to <n>\n");
708 #if !defined NOPOSIX
709 printf(" -p use POSIX interface\n");
710 #endif
711 printf(" -q quiet: do not output PCRE version number at start\n");
712 printf(" -S <n> set stack size to <n> megabytes\n");
713 printf(" -s output store (memory) used information\n"
714 " -t time compilation and execution\n");
715 printf(" -t <n> time compilation and execution, repeating <n> times\n");
716 printf(" -tm time execution (matching) only\n");
717 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
718 }
719
720
721
722 /*************************************************
723 * Main Program *
724 *************************************************/
725
726 /* Read lines from named file or stdin and write to named file or stdout; lines
727 consist of a regular expression, in delimiters and optionally followed by
728 options, followed by a set of test data, terminated by an empty line. */
729
730 int main(int argc, char **argv)
731 {
732 FILE *infile = stdin;
733 int options = 0;
734 int study_options = 0;
735 int op = 1;
736 int timeit = 0;
737 int timeitm = 0;
738 int showinfo = 0;
739 int showstore = 0;
740 int quiet = 0;
741 int size_offsets = 45;
742 int size_offsets_max;
743 int *offsets = NULL;
744 #if !defined NOPOSIX
745 int posix = 0;
746 #endif
747 int debug = 0;
748 int done = 0;
749 int all_use_dfa = 0;
750 int yield = 0;
751 int stack_size;
752
753 /* These vectors store, end-to-end, a list of captured substring names. Assume
754 that 1024 is plenty long enough for the few names we'll be testing. */
755
756 uschar copynames[1024];
757 uschar getnames[1024];
758
759 uschar *copynamesptr;
760 uschar *getnamesptr;
761
762 /* Get buffers from malloc() so that Electric Fence will check their misuse
763 when I am debugging. They grow automatically when very long lines are read. */
764
765 buffer = (unsigned char *)malloc(buffer_size);
766 dbuffer = (unsigned char *)malloc(buffer_size);
767 pbuffer = (unsigned char *)malloc(buffer_size);
768
769 /* The outfile variable is static so that new_malloc can use it. */
770
771 outfile = stdout;
772
773 /* The following _setmode() stuff is some Windows magic that tells its runtime
774 library to translate CRLF into a single LF character. At least, that's what
775 I've been told: never having used Windows I take this all on trust. Originally
776 it set 0x8000, but then I was advised that _O_BINARY was better. */
777
778 #if defined(_WIN32) || defined(WIN32)
779 _setmode( _fileno( stdout ), _O_BINARY );
780 #endif
781
782 /* Scan options */
783
784 while (argc > 1 && argv[op][0] == '-')
785 {
786 unsigned char *endptr;
787
788 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
789 showstore = 1;
790 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
791 else if (strcmp(argv[op], "-b") == 0) debug = 1;
792 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
793 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
794 #if !defined NODFA
795 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
796 #endif
797 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
798 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
799 *endptr == 0))
800 {
801 op++;
802 argc--;
803 }
804 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
805 {
806 int both = argv[op][2] == 0;
807 int temp;
808 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
809 *endptr == 0))
810 {
811 timeitm = temp;
812 op++;
813 argc--;
814 }
815 else timeitm = LOOPREPEAT;
816 if (both) timeit = timeitm;
817 }
818 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
819 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
820 *endptr == 0))
821 {
822 #if defined(_WIN32) || defined(WIN32)
823 printf("PCRE: -S not supported on this OS\n");
824 exit(1);
825 #else
826 int rc;
827 struct rlimit rlim;
828 getrlimit(RLIMIT_STACK, &rlim);
829 rlim.rlim_cur = stack_size * 1024 * 1024;
830 rc = setrlimit(RLIMIT_STACK, &rlim);
831 if (rc != 0)
832 {
833 printf("PCRE: setrlimit() failed with error %d\n", rc);
834 exit(1);
835 }
836 op++;
837 argc--;
838 #endif
839 }
840 #if !defined NOPOSIX
841 else if (strcmp(argv[op], "-p") == 0) posix = 1;
842 #endif
843 else if (strcmp(argv[op], "-C") == 0)
844 {
845 int rc;
846 printf("PCRE version %s\n", pcre_version());
847 printf("Compiled with\n");
848 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
849 printf(" %sUTF-8 support\n", rc? "" : "No ");
850 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
851 printf(" %sUnicode properties support\n", rc? "" : "No ");
852 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
854 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855 (rc == -2)? "ANYCRLF" :
856 (rc == -1)? "ANY" : "???");
857 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858 printf(" Internal link size = %d\n", rc);
859 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
860 printf(" POSIX malloc threshold = %d\n", rc);
861 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
862 printf(" Default match limit = %d\n", rc);
863 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
864 printf(" Default recursion depth limit = %d\n", rc);
865 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
867 goto EXIT;
868 }
869 else if (strcmp(argv[op], "-help") == 0 ||
870 strcmp(argv[op], "--help") == 0)
871 {
872 usage();
873 goto EXIT;
874 }
875 else
876 {
877 printf("** Unknown or malformed option %s\n", argv[op]);
878 usage();
879 yield = 1;
880 goto EXIT;
881 }
882 op++;
883 argc--;
884 }
885
886 /* Get the store for the offsets vector, and remember what it was */
887
888 size_offsets_max = size_offsets;
889 offsets = (int *)malloc(size_offsets_max * sizeof(int));
890 if (offsets == NULL)
891 {
892 printf("** Failed to get %d bytes of memory for offsets vector\n",
893 (int)(size_offsets_max * sizeof(int)));
894 yield = 1;
895 goto EXIT;
896 }
897
898 /* Sort out the input and output files */
899
900 if (argc > 1)
901 {
902 infile = fopen(argv[op], INPUT_MODE);
903 if (infile == NULL)
904 {
905 printf("** Failed to open %s\n", argv[op]);
906 yield = 1;
907 goto EXIT;
908 }
909 }
910
911 if (argc > 2)
912 {
913 outfile = fopen(argv[op+1], OUTPUT_MODE);
914 if (outfile == NULL)
915 {
916 printf("** Failed to open %s\n", argv[op+1]);
917 yield = 1;
918 goto EXIT;
919 }
920 }
921
922 /* Set alternative malloc function */
923
924 pcre_malloc = new_malloc;
925 pcre_free = new_free;
926 pcre_stack_malloc = stack_malloc;
927 pcre_stack_free = stack_free;
928
929 /* Heading line unless quiet, then prompt for first regex if stdin */
930
931 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
932
933 /* Main loop */
934
935 while (!done)
936 {
937 pcre *re = NULL;
938 pcre_extra *extra = NULL;
939
940 #if !defined NOPOSIX /* There are still compilers that require no indent */
941 regex_t preg;
942 int do_posix = 0;
943 #endif
944
945 const char *error;
946 unsigned char *p, *pp, *ppp;
947 unsigned char *to_file = NULL;
948 const unsigned char *tables = NULL;
949 unsigned long int true_size, true_study_size = 0;
950 size_t size, regex_gotten_store;
951 int do_study = 0;
952 int do_debug = debug;
953 int debug_lengths = 1;
954 int do_G = 0;
955 int do_g = 0;
956 int do_showinfo = showinfo;
957 int do_showrest = 0;
958 int do_flip = 0;
959 int erroroffset, len, delimiter, poffset;
960
961 use_utf8 = 0;
962
963 if (infile == stdin) printf(" re> ");
964 if (extend_inputline(infile, buffer) == NULL) break;
965 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
966 fflush(outfile);
967
968 p = buffer;
969 while (isspace(*p)) p++;
970 if (*p == 0) continue;
971
972 /* See if the pattern is to be loaded pre-compiled from a file. */
973
974 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
975 {
976 unsigned long int magic, get_options;
977 uschar sbuf[8];
978 FILE *f;
979
980 p++;
981 pp = p + (int)strlen((char *)p);
982 while (isspace(pp[-1])) pp--;
983 *pp = 0;
984
985 f = fopen((char *)p, "rb");
986 if (f == NULL)
987 {
988 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
989 continue;
990 }
991
992 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
993
994 true_size =
995 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
996 true_study_size =
997 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
998
999 re = (real_pcre *)new_malloc(true_size);
1000 regex_gotten_store = gotten_store;
1001
1002 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1003
1004 magic = ((real_pcre *)re)->magic_number;
1005 if (magic != MAGIC_NUMBER)
1006 {
1007 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1008 {
1009 do_flip = 1;
1010 }
1011 else
1012 {
1013 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1014 fclose(f);
1015 continue;
1016 }
1017 }
1018
1019 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1020 do_flip? " (byte-inverted)" : "", p);
1021
1022 /* Need to know if UTF-8 for printing data strings */
1023
1024 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1025 use_utf8 = (get_options & PCRE_UTF8) != 0;
1026
1027 /* Now see if there is any following study data */
1028
1029 if (true_study_size != 0)
1030 {
1031 pcre_study_data *psd;
1032
1033 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1034 extra->flags = PCRE_EXTRA_STUDY_DATA;
1035
1036 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1037 extra->study_data = psd;
1038
1039 if (fread(psd, 1, true_study_size, f) != true_study_size)
1040 {
1041 FAIL_READ:
1042 fprintf(outfile, "Failed to read data from %s\n", p);
1043 if (extra != NULL) new_free(extra);
1044 if (re != NULL) new_free(re);
1045 fclose(f);
1046 continue;
1047 }
1048 fprintf(outfile, "Study data loaded from %s\n", p);
1049 do_study = 1; /* To get the data output if requested */
1050 }
1051 else fprintf(outfile, "No study data\n");
1052
1053 fclose(f);
1054 goto SHOW_INFO;
1055 }
1056
1057 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1058 the pattern; if is isn't complete, read more. */
1059
1060 delimiter = *p++;
1061
1062 if (isalnum(delimiter) || delimiter == '\\')
1063 {
1064 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1065 goto SKIP_DATA;
1066 }
1067
1068 pp = p;
1069 poffset = p - buffer;
1070
1071 for(;;)
1072 {
1073 while (*pp != 0)
1074 {
1075 if (*pp == '\\' && pp[1] != 0) pp++;
1076 else if (*pp == delimiter) break;
1077 pp++;
1078 }
1079 if (*pp != 0) break;
1080 if (infile == stdin) printf(" > ");
1081 if ((pp = extend_inputline(infile, pp)) == NULL)
1082 {
1083 fprintf(outfile, "** Unexpected EOF\n");
1084 done = 1;
1085 goto CONTINUE;
1086 }
1087 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1088 }
1089
1090 /* The buffer may have moved while being extended; reset the start of data
1091 pointer to the correct relative point in the buffer. */
1092
1093 p = buffer + poffset;
1094
1095 /* If the first character after the delimiter is backslash, make
1096 the pattern end with backslash. This is purely to provide a way
1097 of testing for the error message when a pattern ends with backslash. */
1098
1099 if (pp[1] == '\\') *pp++ = '\\';
1100
1101 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1102 for callouts. */
1103
1104 *pp++ = 0;
1105 strcpy((char *)pbuffer, (char *)p);
1106
1107 /* Look for options after final delimiter */
1108
1109 options = 0;
1110 study_options = 0;
1111 log_store = showstore; /* default from command line */
1112
1113 while (*pp != 0)
1114 {
1115 switch (*pp++)
1116 {
1117 case 'f': options |= PCRE_FIRSTLINE; break;
1118 case 'g': do_g = 1; break;
1119 case 'i': options |= PCRE_CASELESS; break;
1120 case 'm': options |= PCRE_MULTILINE; break;
1121 case 's': options |= PCRE_DOTALL; break;
1122 case 'x': options |= PCRE_EXTENDED; break;
1123
1124 case '+': do_showrest = 1; break;
1125 case 'A': options |= PCRE_ANCHORED; break;
1126 case 'B': do_debug = 1; break;
1127 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1128 case 'D': do_debug = do_showinfo = 1; break;
1129 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1130 case 'F': do_flip = 1; break;
1131 case 'G': do_G = 1; break;
1132 case 'I': do_showinfo = 1; break;
1133 case 'J': options |= PCRE_DUPNAMES; break;
1134 case 'M': log_store = 1; break;
1135 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1136
1137 #if !defined NOPOSIX
1138 case 'P': do_posix = 1; break;
1139 #endif
1140
1141 case 'S': do_study = 1; break;
1142 case 'U': options |= PCRE_UNGREEDY; break;
1143 case 'X': options |= PCRE_EXTRA; break;
1144 case 'Z': debug_lengths = 0; break;
1145 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147
1148 case 'L':
1149 ppp = pp;
1150 /* The '\r' test here is so that it works on Windows. */
1151 /* The '0' test is just in case this is an unterminated line. */
1152 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1153 *ppp = 0;
1154 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1155 {
1156 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1157 goto SKIP_DATA;
1158 }
1159 locale_set = 1;
1160 tables = pcre_maketables();
1161 pp = ppp;
1162 break;
1163
1164 case '>':
1165 to_file = pp;
1166 while (*pp != 0) pp++;
1167 while (isspace(pp[-1])) pp--;
1168 *pp = 0;
1169 break;
1170
1171 case '<':
1172 {
1173 int x = check_newline(pp, outfile);
1174 if (x == 0) goto SKIP_DATA;
1175 options |= x;
1176 while (*pp++ != '>');
1177 }
1178 break;
1179
1180 case '\r': /* So that it works in Windows */
1181 case '\n':
1182 case ' ':
1183 break;
1184
1185 default:
1186 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1187 goto SKIP_DATA;
1188 }
1189 }
1190
1191 /* Handle compiling via the POSIX interface, which doesn't support the
1192 timing, showing, or debugging options, nor the ability to pass over
1193 local character tables. */
1194
1195 #if !defined NOPOSIX
1196 if (posix || do_posix)
1197 {
1198 int rc;
1199 int cflags = 0;
1200
1201 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1202 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1203 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1204 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1205 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1206
1207 rc = regcomp(&preg, (char *)p, cflags);
1208
1209 /* Compilation failed; go back for another re, skipping to blank line
1210 if non-interactive. */
1211
1212 if (rc != 0)
1213 {
1214 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1215 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1216 goto SKIP_DATA;
1217 }
1218 }
1219
1220 /* Handle compiling via the native interface */
1221
1222 else
1223 #endif /* !defined NOPOSIX */
1224
1225 {
1226 if (timeit > 0)
1227 {
1228 register int i;
1229 clock_t time_taken;
1230 clock_t start_time = clock();
1231 for (i = 0; i < timeit; i++)
1232 {
1233 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1234 if (re != NULL) free(re);
1235 }
1236 time_taken = clock() - start_time;
1237 fprintf(outfile, "Compile time %.4f milliseconds\n",
1238 (((double)time_taken * 1000.0) / (double)timeit) /
1239 (double)CLOCKS_PER_SEC);
1240 }
1241
1242 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1243
1244 /* Compilation failed; go back for another re, skipping to blank line
1245 if non-interactive. */
1246
1247 if (re == NULL)
1248 {
1249 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1250 SKIP_DATA:
1251 if (infile != stdin)
1252 {
1253 for (;;)
1254 {
1255 if (extend_inputline(infile, buffer) == NULL)
1256 {
1257 done = 1;
1258 goto CONTINUE;
1259 }
1260 len = (int)strlen((char *)buffer);
1261 while (len > 0 && isspace(buffer[len-1])) len--;
1262 if (len == 0) break;
1263 }
1264 fprintf(outfile, "\n");
1265 }
1266 goto CONTINUE;
1267 }
1268
1269 /* Compilation succeeded; print data if required. There are now two
1270 info-returning functions. The old one has a limited interface and
1271 returns only limited data. Check that it agrees with the newer one. */
1272
1273 if (log_store)
1274 fprintf(outfile, "Memory allocation (code space): %d\n",
1275 (int)(gotten_store -
1276 sizeof(real_pcre) -
1277 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1278
1279 /* Extract the size for possible writing before possibly flipping it,
1280 and remember the store that was got. */
1281
1282 true_size = ((real_pcre *)re)->size;
1283 regex_gotten_store = gotten_store;
1284
1285 /* If /S was present, study the regexp to generate additional info to
1286 help with the matching. */
1287
1288 if (do_study)
1289 {
1290 if (timeit > 0)
1291 {
1292 register int i;
1293 clock_t time_taken;
1294 clock_t start_time = clock();
1295 for (i = 0; i < timeit; i++)
1296 extra = pcre_study(re, study_options, &error);
1297 time_taken = clock() - start_time;
1298 if (extra != NULL) free(extra);
1299 fprintf(outfile, " Study time %.4f milliseconds\n",
1300 (((double)time_taken * 1000.0) / (double)timeit) /
1301 (double)CLOCKS_PER_SEC);
1302 }
1303 extra = pcre_study(re, study_options, &error);
1304 if (error != NULL)
1305 fprintf(outfile, "Failed to study: %s\n", error);
1306 else if (extra != NULL)
1307 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1308 }
1309
1310 /* If the 'F' option was present, we flip the bytes of all the integer
1311 fields in the regex data block and the study block. This is to make it
1312 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1313 compiled on a different architecture. */
1314
1315 if (do_flip)
1316 {
1317 real_pcre *rre = (real_pcre *)re;
1318 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1319 rre->size = byteflip(rre->size, sizeof(rre->size));
1320 rre->options = byteflip(rre->options, sizeof(rre->options));
1321 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1322 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1323 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1324 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1325 rre->name_table_offset = byteflip(rre->name_table_offset,
1326 sizeof(rre->name_table_offset));
1327 rre->name_entry_size = byteflip(rre->name_entry_size,
1328 sizeof(rre->name_entry_size));
1329 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1330
1331 if (extra != NULL)
1332 {
1333 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1334 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1335 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1336 }
1337 }
1338
1339 /* Extract information from the compiled data if required */
1340
1341 SHOW_INFO:
1342
1343 if (do_debug)
1344 {
1345 fprintf(outfile, "------------------------------------------------------------------\n");
1346 pcre_printint(re, outfile, debug_lengths);
1347 }
1348
1349 if (do_showinfo)
1350 {
1351 unsigned long int get_options, all_options;
1352 #if !defined NOINFOCHECK
1353 int old_first_char, old_options, old_count;
1354 #endif
1355 int count, backrefmax, first_char, need_char, okpartial, jchanged;
1356 int nameentrysize, namecount;
1357 const uschar *nametable;
1358
1359 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1360 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1361 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1362 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1363 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1364 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1365 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1369 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1370
1371 #if !defined NOINFOCHECK
1372 old_count = pcre_info(re, &old_options, &old_first_char);
1373 if (count < 0) fprintf(outfile,
1374 "Error %d from pcre_info()\n", count);
1375 else
1376 {
1377 if (old_count != count) fprintf(outfile,
1378 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1379 old_count);
1380
1381 if (old_first_char != first_char) fprintf(outfile,
1382 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1383 first_char, old_first_char);
1384
1385 if (old_options != (int)get_options) fprintf(outfile,
1386 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1387 get_options, old_options);
1388 }
1389 #endif
1390
1391 if (size != regex_gotten_store) fprintf(outfile,
1392 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1393 (int)size, (int)regex_gotten_store);
1394
1395 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1396 if (backrefmax > 0)
1397 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1398
1399 if (namecount > 0)
1400 {
1401 fprintf(outfile, "Named capturing subpatterns:\n");
1402 while (namecount-- > 0)
1403 {
1404 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1405 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1406 GET2(nametable, 0));
1407 nametable += nameentrysize;
1408 }
1409 }
1410
1411 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1412
1413 all_options = ((real_pcre *)re)->options;
1414 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1415
1416 if (get_options == 0) fprintf(outfile, "No options\n");
1417 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1418 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1419 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1420 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1421 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1422 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1423 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1424 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1425 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1426 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1427 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1428 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1429 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1430 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1431
1432 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1433
1434 switch (get_options & PCRE_NEWLINE_BITS)
1435 {
1436 case PCRE_NEWLINE_CR:
1437 fprintf(outfile, "Forced newline sequence: CR\n");
1438 break;
1439
1440 case PCRE_NEWLINE_LF:
1441 fprintf(outfile, "Forced newline sequence: LF\n");
1442 break;
1443
1444 case PCRE_NEWLINE_CRLF:
1445 fprintf(outfile, "Forced newline sequence: CRLF\n");
1446 break;
1447
1448 case PCRE_NEWLINE_ANYCRLF:
1449 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1450 break;
1451
1452 case PCRE_NEWLINE_ANY:
1453 fprintf(outfile, "Forced newline sequence: ANY\n");
1454 break;
1455
1456 default:
1457 break;
1458 }
1459
1460 if (first_char == -1)
1461 {
1462 fprintf(outfile, "First char at start or follows newline\n");
1463 }
1464 else if (first_char < 0)
1465 {
1466 fprintf(outfile, "No first char\n");
1467 }
1468 else
1469 {
1470 int ch = first_char & 255;
1471 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1472 "" : " (caseless)";
1473 if (PRINTHEX(ch))
1474 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1475 else
1476 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1477 }
1478
1479 if (need_char < 0)
1480 {
1481 fprintf(outfile, "No need char\n");
1482 }
1483 else
1484 {
1485 int ch = need_char & 255;
1486 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1487 "" : " (caseless)";
1488 if (PRINTHEX(ch))
1489 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1490 else
1491 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1492 }
1493
1494 /* Don't output study size; at present it is in any case a fixed
1495 value, but it varies, depending on the computer architecture, and
1496 so messes up the test suite. (And with the /F option, it might be
1497 flipped.) */
1498
1499 if (do_study)
1500 {
1501 if (extra == NULL)
1502 fprintf(outfile, "Study returned NULL\n");
1503 else
1504 {
1505 uschar *start_bits = NULL;
1506 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1507
1508 if (start_bits == NULL)
1509 fprintf(outfile, "No starting byte set\n");
1510 else
1511 {
1512 int i;
1513 int c = 24;
1514 fprintf(outfile, "Starting byte set: ");
1515 for (i = 0; i < 256; i++)
1516 {
1517 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1518 {
1519 if (c > 75)
1520 {
1521 fprintf(outfile, "\n ");
1522 c = 2;
1523 }
1524 if (PRINTHEX(i) && i != ' ')
1525 {
1526 fprintf(outfile, "%c ", i);
1527 c += 2;
1528 }
1529 else
1530 {
1531 fprintf(outfile, "\\x%02x ", i);
1532 c += 5;
1533 }
1534 }
1535 }
1536 fprintf(outfile, "\n");
1537 }
1538 }
1539 }
1540 }
1541
1542 /* If the '>' option was present, we write out the regex to a file, and
1543 that is all. The first 8 bytes of the file are the regex length and then
1544 the study length, in big-endian order. */
1545
1546 if (to_file != NULL)
1547 {
1548 FILE *f = fopen((char *)to_file, "wb");
1549 if (f == NULL)
1550 {
1551 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1552 }
1553 else
1554 {
1555 uschar sbuf[8];
1556 sbuf[0] = (true_size >> 24) & 255;
1557 sbuf[1] = (true_size >> 16) & 255;
1558 sbuf[2] = (true_size >> 8) & 255;
1559 sbuf[3] = (true_size) & 255;
1560
1561 sbuf[4] = (true_study_size >> 24) & 255;
1562 sbuf[5] = (true_study_size >> 16) & 255;
1563 sbuf[6] = (true_study_size >> 8) & 255;
1564 sbuf[7] = (true_study_size) & 255;
1565
1566 if (fwrite(sbuf, 1, 8, f) < 8 ||
1567 fwrite(re, 1, true_size, f) < true_size)
1568 {
1569 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1570 }
1571 else
1572 {
1573 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1574 if (extra != NULL)
1575 {
1576 if (fwrite(extra->study_data, 1, true_study_size, f) <
1577 true_study_size)
1578 {
1579 fprintf(outfile, "Write error on %s: %s\n", to_file,
1580 strerror(errno));
1581 }
1582 else fprintf(outfile, "Study data written to %s\n", to_file);
1583
1584 }
1585 }
1586 fclose(f);
1587 }
1588
1589 new_free(re);
1590 if (extra != NULL) new_free(extra);
1591 if (tables != NULL) new_free((void *)tables);
1592 continue; /* With next regex */
1593 }
1594 } /* End of non-POSIX compile */
1595
1596 /* Read data lines and test them */
1597
1598 for (;;)
1599 {
1600 uschar *q;
1601 uschar *bptr;
1602 int *use_offsets = offsets;
1603 int use_size_offsets = size_offsets;
1604 int callout_data = 0;
1605 int callout_data_set = 0;
1606 int count, c;
1607 int copystrings = 0;
1608 int find_match_limit = 0;
1609 int getstrings = 0;
1610 int getlist = 0;
1611 int gmatched = 0;
1612 int start_offset = 0;
1613 int g_notempty = 0;
1614 int use_dfa = 0;
1615
1616 options = 0;
1617
1618 *copynames = 0;
1619 *getnames = 0;
1620
1621 copynamesptr = copynames;
1622 getnamesptr = getnames;
1623
1624 pcre_callout = callout;
1625 first_callout = 1;
1626 callout_extra = 0;
1627 callout_count = 0;
1628 callout_fail_count = 999999;
1629 callout_fail_id = -1;
1630 show_malloc = 0;
1631
1632 if (extra != NULL) extra->flags &=
1633 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1634
1635 len = 0;
1636 for (;;)
1637 {
1638 if (infile == stdin) printf("data> ");
1639 if (extend_inputline(infile, buffer + len) == NULL)
1640 {
1641 if (len > 0) break;
1642 done = 1;
1643 goto CONTINUE;
1644 }
1645 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1646 len = (int)strlen((char *)buffer);
1647 if (buffer[len-1] == '\n') break;
1648 }
1649
1650 while (len > 0 && isspace(buffer[len-1])) len--;
1651 buffer[len] = 0;
1652 if (len == 0) break;
1653
1654 p = buffer;
1655 while (isspace(*p)) p++;
1656
1657 bptr = q = dbuffer;
1658 while ((c = *p++) != 0)
1659 {
1660 int i = 0;
1661 int n = 0;
1662
1663 if (c == '\\') switch ((c = *p++))
1664 {
1665 case 'a': c = 7; break;
1666 case 'b': c = '\b'; break;
1667 case 'e': c = 27; break;
1668 case 'f': c = '\f'; break;
1669 case 'n': c = '\n'; break;
1670 case 'r': c = '\r'; break;
1671 case 't': c = '\t'; break;
1672 case 'v': c = '\v'; break;
1673
1674 case '0': case '1': case '2': case '3':
1675 case '4': case '5': case '6': case '7':
1676 c -= '0';
1677 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1678 c = c * 8 + *p++ - '0';
1679
1680 #if !defined NOUTF8
1681 if (use_utf8 && c > 255)
1682 {
1683 unsigned char buff8[8];
1684 int ii, utn;
1685 utn = ord2utf8(c, buff8);
1686 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1687 c = buff8[ii]; /* Last byte */
1688 }
1689 #endif
1690 break;
1691
1692 case 'x':
1693
1694 /* Handle \x{..} specially - new Perl thing for utf8 */
1695
1696 #if !defined NOUTF8
1697 if (*p == '{')
1698 {
1699 unsigned char *pt = p;
1700 c = 0;
1701 while (isxdigit(*(++pt)))
1702 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1703 if (*pt == '}')
1704 {
1705 unsigned char buff8[8];
1706 int ii, utn;
1707 utn = ord2utf8(c, buff8);
1708 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1709 c = buff8[ii]; /* Last byte */
1710 p = pt + 1;
1711 break;
1712 }
1713 /* Not correct form; fall through */
1714 }
1715 #endif
1716
1717 /* Ordinary \x */
1718
1719 c = 0;
1720 while (i++ < 2 && isxdigit(*p))
1721 {
1722 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1723 p++;
1724 }
1725 break;
1726
1727 case 0: /* \ followed by EOF allows for an empty line */
1728 p--;
1729 continue;
1730
1731 case '>':
1732 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1733 continue;
1734
1735 case 'A': /* Option setting */
1736 options |= PCRE_ANCHORED;
1737 continue;
1738
1739 case 'B':
1740 options |= PCRE_NOTBOL;
1741 continue;
1742
1743 case 'C':
1744 if (isdigit(*p)) /* Set copy string */
1745 {
1746 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1747 copystrings |= 1 << n;
1748 }
1749 else if (isalnum(*p))
1750 {
1751 uschar *npp = copynamesptr;
1752 while (isalnum(*p)) *npp++ = *p++;
1753 *npp++ = 0;
1754 *npp = 0;
1755 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1756 if (n < 0)
1757 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1758 copynamesptr = npp;
1759 }
1760 else if (*p == '+')
1761 {
1762 callout_extra = 1;
1763 p++;
1764 }
1765 else if (*p == '-')
1766 {
1767 pcre_callout = NULL;
1768 p++;
1769 }
1770 else if (*p == '!')
1771 {
1772 callout_fail_id = 0;
1773 p++;
1774 while(isdigit(*p))
1775 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1776 callout_fail_count = 0;
1777 if (*p == '!')
1778 {
1779 p++;
1780 while(isdigit(*p))
1781 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1782 }
1783 }
1784 else if (*p == '*')
1785 {
1786 int sign = 1;
1787 callout_data = 0;
1788 if (*(++p) == '-') { sign = -1; p++; }
1789 while(isdigit(*p))
1790 callout_data = callout_data * 10 + *p++ - '0';
1791 callout_data *= sign;
1792 callout_data_set = 1;
1793 }
1794 continue;
1795
1796 #if !defined NODFA
1797 case 'D':
1798 #if !defined NOPOSIX
1799 if (posix || do_posix)
1800 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1801 else
1802 #endif
1803 use_dfa = 1;
1804 continue;
1805
1806 case 'F':
1807 options |= PCRE_DFA_SHORTEST;
1808 continue;
1809 #endif
1810
1811 case 'G':
1812 if (isdigit(*p))
1813 {
1814 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1815 getstrings |= 1 << n;
1816 }
1817 else if (isalnum(*p))
1818 {
1819 uschar *npp = getnamesptr;
1820 while (isalnum(*p)) *npp++ = *p++;
1821 *npp++ = 0;
1822 *npp = 0;
1823 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1824 if (n < 0)
1825 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1826 getnamesptr = npp;
1827 }
1828 continue;
1829
1830 case 'L':
1831 getlist = 1;
1832 continue;
1833
1834 case 'M':
1835 find_match_limit = 1;
1836 continue;
1837
1838 case 'N':
1839 options |= PCRE_NOTEMPTY;
1840 continue;
1841
1842 case 'O':
1843 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1844 if (n > size_offsets_max)
1845 {
1846 size_offsets_max = n;
1847 free(offsets);
1848 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1849 if (offsets == NULL)
1850 {
1851 printf("** Failed to get %d bytes of memory for offsets vector\n",
1852 (int)(size_offsets_max * sizeof(int)));
1853 yield = 1;
1854 goto EXIT;
1855 }
1856 }
1857 use_size_offsets = n;
1858 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1859 continue;
1860
1861 case 'P':
1862 options |= PCRE_PARTIAL;
1863 continue;
1864
1865 case 'Q':
1866 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1867 if (extra == NULL)
1868 {
1869 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1870 extra->flags = 0;
1871 }
1872 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1873 extra->match_limit_recursion = n;
1874 continue;
1875
1876 case 'q':
1877 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1878 if (extra == NULL)
1879 {
1880 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1881 extra->flags = 0;
1882 }
1883 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1884 extra->match_limit = n;
1885 continue;
1886
1887 #if !defined NODFA
1888 case 'R':
1889 options |= PCRE_DFA_RESTART;
1890 continue;
1891 #endif
1892
1893 case 'S':
1894 show_malloc = 1;
1895 continue;
1896
1897 case 'Z':
1898 options |= PCRE_NOTEOL;
1899 continue;
1900
1901 case '?':
1902 options |= PCRE_NO_UTF8_CHECK;
1903 continue;
1904
1905 case '<':
1906 {
1907 int x = check_newline(p, outfile);
1908 if (x == 0) goto NEXT_DATA;
1909 options |= x;
1910 while (*p++ != '>');
1911 }
1912 continue;
1913 }
1914 *q++ = c;
1915 }
1916 *q = 0;
1917 len = q - dbuffer;
1918
1919 if ((all_use_dfa || use_dfa) && find_match_limit)
1920 {
1921 printf("**Match limit not relevant for DFA matching: ignored\n");
1922 find_match_limit = 0;
1923 }
1924
1925 /* Handle matching via the POSIX interface, which does not
1926 support timing or playing with the match limit or callout data. */
1927
1928 #if !defined NOPOSIX
1929 if (posix || do_posix)
1930 {
1931 int rc;
1932 int eflags = 0;
1933 regmatch_t *pmatch = NULL;
1934 if (use_size_offsets > 0)
1935 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1936 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1937 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1938
1939 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1940
1941 if (rc != 0)
1942 {
1943 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1944 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1945 }
1946 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1947 != 0)
1948 {
1949 fprintf(outfile, "Matched with REG_NOSUB\n");
1950 }
1951 else
1952 {
1953 size_t i;
1954 for (i = 0; i < (size_t)use_size_offsets; i++)
1955 {
1956 if (pmatch[i].rm_so >= 0)
1957 {
1958 fprintf(outfile, "%2d: ", (int)i);
1959 (void)pchars(dbuffer + pmatch[i].rm_so,
1960 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1961 fprintf(outfile, "\n");
1962 if (i == 0 && do_showrest)
1963 {
1964 fprintf(outfile, " 0+ ");
1965 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1966 outfile);
1967 fprintf(outfile, "\n");
1968 }
1969 }
1970 }
1971 }
1972 free(pmatch);
1973 }
1974
1975 /* Handle matching via the native interface - repeats for /g and /G */
1976
1977 else
1978 #endif /* !defined NOPOSIX */
1979
1980 for (;; gmatched++) /* Loop for /g or /G */
1981 {
1982 if (timeitm > 0)
1983 {
1984 register int i;
1985 clock_t time_taken;
1986 clock_t start_time = clock();
1987
1988 #if !defined NODFA
1989 if (all_use_dfa || use_dfa)
1990 {
1991 int workspace[1000];
1992 for (i = 0; i < timeitm; i++)
1993 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1994 options | g_notempty, use_offsets, use_size_offsets, workspace,
1995 sizeof(workspace)/sizeof(int));
1996 }
1997 else
1998 #endif
1999
2000 for (i = 0; i < timeitm; i++)
2001 count = pcre_exec(re, extra, (char *)bptr, len,
2002 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2003
2004 time_taken = clock() - start_time;
2005 fprintf(outfile, "Execute time %.4f milliseconds\n",
2006 (((double)time_taken * 1000.0) / (double)timeitm) /
2007 (double)CLOCKS_PER_SEC);
2008 }
2009
2010 /* If find_match_limit is set, we want to do repeated matches with
2011 varying limits in order to find the minimum value for the match limit and
2012 for the recursion limit. */
2013
2014 if (find_match_limit)
2015 {
2016 if (extra == NULL)
2017 {
2018 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2019 extra->flags = 0;
2020 }
2021
2022 (void)check_match_limit(re, extra, bptr, len, start_offset,
2023 options|g_notempty, use_offsets, use_size_offsets,
2024 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2025 PCRE_ERROR_MATCHLIMIT, "match()");
2026
2027 count = check_match_limit(re, extra, bptr, len, start_offset,
2028 options|g_notempty, use_offsets, use_size_offsets,
2029 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2030 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2031 }
2032
2033 /* If callout_data is set, use the interface with additional data */
2034
2035 else if (callout_data_set)
2036 {
2037 if (extra == NULL)
2038 {
2039 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2040 extra->flags = 0;
2041 }
2042 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2043 extra->callout_data = &callout_data;
2044 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2045 options | g_notempty, use_offsets, use_size_offsets);
2046 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2047 }
2048
2049 /* The normal case is just to do the match once, with the default
2050 value of match_limit. */
2051
2052 #if !defined NODFA
2053 else if (all_use_dfa || use_dfa)
2054 {
2055 int workspace[1000];
2056 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2057 options | g_notempty, use_offsets, use_size_offsets, workspace,
2058 sizeof(workspace)/sizeof(int));
2059 if (count == 0)
2060 {
2061 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2062 count = use_size_offsets/2;
2063 }
2064 }
2065 #endif
2066
2067 else
2068 {
2069 count = pcre_exec(re, extra, (char *)bptr, len,
2070 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2071 if (count == 0)
2072 {
2073 fprintf(outfile, "Matched, but too many substrings\n");
2074 count = use_size_offsets/3;
2075 }
2076 }
2077
2078 /* Matched */
2079
2080 if (count >= 0)
2081 {
2082 int i, maxcount;
2083
2084 #if !defined NODFA
2085 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2086 #endif
2087 maxcount = use_size_offsets/3;
2088
2089 /* This is a check against a lunatic return value. */
2090
2091 if (count > maxcount)
2092 {
2093 fprintf(outfile,
2094 "** PCRE error: returned count %d is too big for offset size %d\n",
2095 count, use_size_offsets);
2096 count = use_size_offsets/3;
2097 if (do_g || do_G)
2098 {
2099 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2100 do_g = do_G = FALSE; /* Break g/G loop */
2101 }
2102 }
2103
2104 for (i = 0; i < count * 2; i += 2)
2105 {
2106 if (use_offsets[i] < 0)
2107 fprintf(outfile, "%2d: <unset>\n", i/2);
2108 else
2109 {
2110 fprintf(outfile, "%2d: ", i/2);
2111 (void)pchars(bptr + use_offsets[i],
2112 use_offsets[i+1] - use_offsets[i], outfile);
2113 fprintf(outfile, "\n");
2114 if (i == 0)
2115 {
2116 if (do_showrest)
2117 {
2118 fprintf(outfile, " 0+ ");
2119 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2120 outfile);
2121 fprintf(outfile, "\n");
2122 }
2123 }
2124 }
2125 }
2126
2127 for (i = 0; i < 32; i++)
2128 {
2129 if ((copystrings & (1 << i)) != 0)
2130 {
2131 char copybuffer[256];
2132 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2133 i, copybuffer, sizeof(copybuffer));
2134 if (rc < 0)
2135 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2136 else
2137 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2138 }
2139 }
2140
2141 for (copynamesptr = copynames;
2142 *copynamesptr != 0;
2143 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2144 {
2145 char copybuffer[256];
2146 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2147 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2148 if (rc < 0)
2149 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2150 else
2151 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2152 }
2153
2154 for (i = 0; i < 32; i++)
2155 {
2156 if ((getstrings & (1 << i)) != 0)
2157 {
2158 const char *substring;
2159 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2160 i, &substring);
2161 if (rc < 0)
2162 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2163 else
2164 {
2165 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2166 pcre_free_substring(substring);
2167 }
2168 }
2169 }
2170
2171 for (getnamesptr = getnames;
2172 *getnamesptr != 0;
2173 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2174 {
2175 const char *substring;
2176 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2177 count, (char *)getnamesptr, &substring);
2178 if (rc < 0)
2179 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2180 else
2181 {
2182 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2183 pcre_free_substring(substring);
2184 }
2185 }
2186
2187 if (getlist)
2188 {
2189 const char **stringlist;
2190 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2191 &stringlist);
2192 if (rc < 0)
2193 fprintf(outfile, "get substring list failed %d\n", rc);
2194 else
2195 {
2196 for (i = 0; i < count; i++)
2197 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2198 if (stringlist[i] != NULL)
2199 fprintf(outfile, "string list not terminated by NULL\n");
2200 /* free((void *)stringlist); */
2201 pcre_free_substring_list(stringlist);
2202 }
2203 }
2204 }
2205
2206 /* There was a partial match */
2207
2208 else if (count == PCRE_ERROR_PARTIAL)
2209 {
2210 fprintf(outfile, "Partial match");
2211 #if !defined NODFA
2212 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2213 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2214 bptr + use_offsets[0]);
2215 #endif
2216 fprintf(outfile, "\n");
2217 break; /* Out of the /g loop */
2218 }
2219
2220 /* Failed to match. If this is a /g or /G loop and we previously set
2221 g_notempty after a null match, this is not necessarily the end. We want
2222 to advance the start offset, and continue. We won't be at the end of the
2223 string - that was checked before setting g_notempty.
2224
2225 Complication arises in the case when the newline option is "any" or
2226 "anycrlf". If the previous match was at the end of a line terminated by
2227 CRLF, an advance of one character just passes the \r, whereas we should
2228 prefer the longer newline sequence, as does the code in pcre_exec().
2229 Fudge the offset value to achieve this.
2230
2231 Otherwise, in the case of UTF-8 matching, the advance must be one
2232 character, not one byte. */
2233
2234 else
2235 {
2236 if (g_notempty != 0)
2237 {
2238 int onechar = 1;
2239 unsigned int obits = ((real_pcre *)re)->options;
2240 use_offsets[0] = start_offset;
2241 if ((obits & PCRE_NEWLINE_BITS) == 0)
2242 {
2243 int d;
2244 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2245 obits = (d == '\r')? PCRE_NEWLINE_CR :
2246 (d == '\n')? PCRE_NEWLINE_LF :
2247 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2248 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2249 (d == -1)? PCRE_NEWLINE_ANY : 0;
2250 }
2251 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2252 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2253 &&
2254 start_offset < len - 1 &&
2255 bptr[start_offset] == '\r' &&
2256 bptr[start_offset+1] == '\n')
2257 onechar++;
2258 else if (use_utf8)
2259 {
2260 while (start_offset + onechar < len)
2261 {
2262 int tb = bptr[start_offset+onechar];
2263 if (tb <= 127) break;
2264 tb &= 0xc0;
2265 if (tb != 0 && tb != 0xc0) onechar++;
2266 }
2267 }
2268 use_offsets[1] = start_offset + onechar;
2269 }
2270 else
2271 {
2272 if (count == PCRE_ERROR_NOMATCH)
2273 {
2274 if (gmatched == 0) fprintf(outfile, "No match\n");
2275 }
2276 else fprintf(outfile, "Error %d\n", count);
2277 break; /* Out of the /g loop */
2278 }
2279 }
2280
2281 /* If not /g or /G we are done */
2282
2283 if (!do_g && !do_G) break;
2284
2285 /* If we have matched an empty string, first check to see if we are at
2286 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2287 what Perl's /g options does. This turns out to be rather cunning. First
2288 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2289 same point. If this fails (picked up above) we advance to the next
2290 character. */
2291
2292 g_notempty = 0;
2293
2294 if (use_offsets[0] == use_offsets[1])
2295 {
2296 if (use_offsets[0] == len) break;
2297 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2298 }
2299
2300 /* For /g, update the start offset, leaving the rest alone */
2301
2302 if (do_g) start_offset = use_offsets[1];
2303
2304 /* For /G, update the pointer and length */
2305
2306 else
2307 {
2308 bptr += use_offsets[1];
2309 len -= use_offsets[1];
2310 }
2311 } /* End of loop for /g and /G */
2312
2313 NEXT_DATA: continue;
2314 } /* End of loop for data lines */
2315
2316 CONTINUE:
2317
2318 #if !defined NOPOSIX
2319 if (posix || do_posix) regfree(&preg);
2320 #endif
2321
2322 if (re != NULL) new_free(re);
2323 if (extra != NULL) new_free(extra);
2324 if (tables != NULL)
2325 {
2326 new_free((void *)tables);
2327 setlocale(LC_CTYPE, "C");
2328 locale_set = 0;
2329 }
2330 }
2331
2332 if (infile == stdin) fprintf(outfile, "\n");
2333
2334 EXIT:
2335
2336 if (infile != NULL && infile != stdin) fclose(infile);
2337 if (outfile != NULL && outfile != stdout) fclose(outfile);
2338
2339 free(buffer);
2340 free(dbuffer);
2341 free(pbuffer);
2342 free(offsets);
2343
2344 return yield;
2345 }
2346
2347 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12