/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 149 - (show annotations) (download)
Mon Apr 16 15:28:08 2007 UTC (7 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 68575 byte(s)
Add PCRE_NEWLINE_ANYCRLF.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47
48 /* A number of things vary for Windows builds. Originally, pcretest opened its
49 input and output without "b"; then I was told that "b" was needed in some
50 environments, so it was added for release 5.0 to both the input and output. (It
51 makes no difference on Unix-like systems.) Later I was told that it is wrong
52 for the input on Windows. I've now abstracted the modes into two macros that
53 are set here, to make it easier to fiddle with them, and removed "b" from the
54 input mode under Windows. */
55
56 #if defined(_WIN32) || defined(WIN32)
57 #include <io.h> /* For _setmode() */
58 #include <fcntl.h> /* For _O_BINARY */
59 #define INPUT_MODE "r"
60 #define OUTPUT_MODE "wb"
61
62 #else
63 #include <sys/time.h> /* These two includes are needed */
64 #include <sys/resource.h> /* for setrlimit(). */
65 #define INPUT_MODE "rb"
66 #define OUTPUT_MODE "wb"
67 #endif
68
69
70 /* We have to include pcre_internal.h because we need the internal info for
71 displaying the results of pcre_study() and we also need to know about the
72 internal macros, structures, and other internal data values; pcretest has
73 "inside information" compared to a program that strictly follows the PCRE API.
74
75 Although pcre_internal.h does itself include pcre.h, we explicitly include it
76 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77 appropriately for an application, not for building PCRE. */
78
79 #include "pcre.h"
80 #include "pcre_internal.h"
81
82 /* We need access to the data tables that PCRE uses. So as not to have to keep
83 two copies, we include the source file here, changing the names of the external
84 symbols to prevent clashes. */
85
86 #define _pcre_utf8_table1 utf8_table1
87 #define _pcre_utf8_table1_size utf8_table1_size
88 #define _pcre_utf8_table2 utf8_table2
89 #define _pcre_utf8_table3 utf8_table3
90 #define _pcre_utf8_table4 utf8_table4
91 #define _pcre_utt utt
92 #define _pcre_utt_size utt_size
93 #define _pcre_OP_lengths OP_lengths
94
95 #include "pcre_tables.c"
96
97 /* We also need the pcre_printint() function for printing out compiled
98 patterns. This function is in a separate file so that it can be included in
99 pcre_compile.c when that module is compiled with debugging enabled.
100
101 The definition of the macro PRINTABLE, which determines whether to print an
102 output character as-is or as a hex value when showing compiled patterns, is
103 contained in this file. We uses it here also, in cases when the locale has not
104 been explicitly changed, so as to get consistent output from systems that
105 differ in their output from isprint() even in the "C" locale. */
106
107 #include "pcre_printint.src"
108
109 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110
111
112 /* It is possible to compile this test program without including support for
113 testing the POSIX interface, though this is not available via the standard
114 Makefile. */
115
116 #if !defined NOPOSIX
117 #include "pcreposix.h"
118 #endif
119
120 /* It is also possible, for the benefit of the version currently imported into
121 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122 interface to the DFA matcher (NODFA), and without the doublecheck of the old
123 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124 UTF8 support if PCRE is built without it. */
125
126 #ifndef SUPPORT_UTF8
127 #ifndef NOUTF8
128 #define NOUTF8
129 #endif
130 #endif
131
132
133 /* Other parameters */
134
135 #ifndef CLOCKS_PER_SEC
136 #ifdef CLK_TCK
137 #define CLOCKS_PER_SEC CLK_TCK
138 #else
139 #define CLOCKS_PER_SEC 100
140 #endif
141 #endif
142
143 /* This is the default loop count for timing. */
144
145 #define LOOPREPEAT 500000
146
147 /* Static variables */
148
149 static FILE *outfile;
150 static int log_store = 0;
151 static int callout_count;
152 static int callout_extra;
153 static int callout_fail_count;
154 static int callout_fail_id;
155 static int first_callout;
156 static int locale_set = 0;
157 static int show_malloc;
158 static int use_utf8;
159 static size_t gotten_store;
160
161 /* The buffers grow automatically if very long input lines are encountered. */
162
163 static int buffer_size = 50000;
164 static uschar *buffer = NULL;
165 static uschar *dbuffer = NULL;
166 static uschar *pbuffer = NULL;
167
168
169
170 /*************************************************
171 * Read or extend an input line *
172 *************************************************/
173
174 /* Input lines are read into buffer, but both patterns and data lines can be
175 continued over multiple input lines. In addition, if the buffer fills up, we
176 want to automatically expand it so as to be able to handle extremely large
177 lines that are needed for certain stress tests. When the input buffer is
178 expanded, the other two buffers must also be expanded likewise, and the
179 contents of pbuffer, which are a copy of the input for callouts, must be
180 preserved (for when expansion happens for a data line). This is not the most
181 optimal way of handling this, but hey, this is just a test program!
182
183 Arguments:
184 f the file to read
185 start where in buffer to start (this *must* be within buffer)
186
187 Returns: pointer to the start of new data
188 could be a copy of start, or could be moved
189 NULL if no data read and EOF reached
190 */
191
192 static uschar *
193 extend_inputline(FILE *f, uschar *start)
194 {
195 uschar *here = start;
196
197 for (;;)
198 {
199 int rlen = buffer_size - (here - buffer);
200
201 if (rlen > 1000)
202 {
203 int dlen;
204 if (fgets((char *)here, rlen, f) == NULL)
205 return (here == start)? NULL : start;
206 dlen = (int)strlen((char *)here);
207 if (dlen > 0 && here[dlen - 1] == '\n') return start;
208 here += dlen;
209 }
210
211 else
212 {
213 int new_buffer_size = 2*buffer_size;
214 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217
218 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219 {
220 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221 exit(1);
222 }
223
224 memcpy(new_buffer, buffer, buffer_size);
225 memcpy(new_pbuffer, pbuffer, buffer_size);
226
227 buffer_size = new_buffer_size;
228
229 start = new_buffer + (start - buffer);
230 here = new_buffer + (here - buffer);
231
232 free(buffer);
233 free(dbuffer);
234 free(pbuffer);
235
236 buffer = new_buffer;
237 dbuffer = new_dbuffer;
238 pbuffer = new_pbuffer;
239 }
240 }
241
242 return NULL; /* Control never gets here */
243 }
244
245
246
247
248
249
250
251 /*************************************************
252 * Read number from string *
253 *************************************************/
254
255 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256 around with conditional compilation, just do the job by hand. It is only used
257 for unpicking arguments, so just keep it simple.
258
259 Arguments:
260 str string to be converted
261 endptr where to put the end pointer
262
263 Returns: the unsigned long
264 */
265
266 static int
267 get_value(unsigned char *str, unsigned char **endptr)
268 {
269 int result = 0;
270 while(*str != 0 && isspace(*str)) str++;
271 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272 *endptr = str;
273 return(result);
274 }
275
276
277
278
279 /*************************************************
280 * Convert UTF-8 string to value *
281 *************************************************/
282
283 /* This function takes one or more bytes that represents a UTF-8 character,
284 and returns the value of the character.
285
286 Argument:
287 utf8bytes a pointer to the byte vector
288 vptr a pointer to an int to receive the value
289
290 Returns: > 0 => the number of bytes consumed
291 -6 to 0 => malformed UTF-8 character at offset = (-return)
292 */
293
294 #if !defined NOUTF8
295
296 static int
297 utf82ord(unsigned char *utf8bytes, int *vptr)
298 {
299 int c = *utf8bytes++;
300 int d = c;
301 int i, j, s;
302
303 for (i = -1; i < 6; i++) /* i is number of additional bytes */
304 {
305 if ((d & 0x80) == 0) break;
306 d <<= 1;
307 }
308
309 if (i == -1) { *vptr = c; return 1; } /* ascii character */
310 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
311
312 /* i now has a value in the range 1-5 */
313
314 s = 6*i;
315 d = (c & utf8_table3[i]) << s;
316
317 for (j = 0; j < i; j++)
318 {
319 c = *utf8bytes++;
320 if ((c & 0xc0) != 0x80) return -(j+1);
321 s -= 6;
322 d |= (c & 0x3f) << s;
323 }
324
325 /* Check that encoding was the correct unique one */
326
327 for (j = 0; j < utf8_table1_size; j++)
328 if (d <= utf8_table1[j]) break;
329 if (j != i) return -(i+1);
330
331 /* Valid value */
332
333 *vptr = d;
334 return i+1;
335 }
336
337 #endif
338
339
340
341 /*************************************************
342 * Convert character value to UTF-8 *
343 *************************************************/
344
345 /* This function takes an integer value in the range 0 - 0x7fffffff
346 and encodes it as a UTF-8 character in 0 to 6 bytes.
347
348 Arguments:
349 cvalue the character value
350 utf8bytes pointer to buffer for result - at least 6 bytes long
351
352 Returns: number of characters placed in the buffer
353 */
354
355 #if !defined NOUTF8
356
357 static int
358 ord2utf8(int cvalue, uschar *utf8bytes)
359 {
360 register int i, j;
361 for (i = 0; i < utf8_table1_size; i++)
362 if (cvalue <= utf8_table1[i]) break;
363 utf8bytes += i;
364 for (j = i; j > 0; j--)
365 {
366 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367 cvalue >>= 6;
368 }
369 *utf8bytes = utf8_table2[i] | cvalue;
370 return i + 1;
371 }
372
373 #endif
374
375
376
377 /*************************************************
378 * Print character string *
379 *************************************************/
380
381 /* Character string printing function. Must handle UTF-8 strings in utf8
382 mode. Yields number of characters printed. If handed a NULL file, just counts
383 chars without printing. */
384
385 static int pchars(unsigned char *p, int length, FILE *f)
386 {
387 int c = 0;
388 int yield = 0;
389
390 while (length-- > 0)
391 {
392 #if !defined NOUTF8
393 if (use_utf8)
394 {
395 int rc = utf82ord(p, &c);
396
397 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
398 {
399 length -= rc - 1;
400 p += rc;
401 if (PRINTHEX(c))
402 {
403 if (f != NULL) fprintf(f, "%c", c);
404 yield++;
405 }
406 else
407 {
408 int n = 4;
409 if (f != NULL) fprintf(f, "\\x{%02x}", c);
410 yield += (n <= 0x000000ff)? 2 :
411 (n <= 0x00000fff)? 3 :
412 (n <= 0x0000ffff)? 4 :
413 (n <= 0x000fffff)? 5 : 6;
414 }
415 continue;
416 }
417 }
418 #endif
419
420 /* Not UTF-8, or malformed UTF-8 */
421
422 c = *p++;
423 if (PRINTHEX(c))
424 {
425 if (f != NULL) fprintf(f, "%c", c);
426 yield++;
427 }
428 else
429 {
430 if (f != NULL) fprintf(f, "\\x%02x", c);
431 yield += 4;
432 }
433 }
434
435 return yield;
436 }
437
438
439
440 /*************************************************
441 * Callout function *
442 *************************************************/
443
444 /* Called from PCRE as a result of the (?C) item. We print out where we are in
445 the match. Yield zero unless more callouts than the fail count, or the callout
446 data is not zero. */
447
448 static int callout(pcre_callout_block *cb)
449 {
450 FILE *f = (first_callout | callout_extra)? outfile : NULL;
451 int i, pre_start, post_start, subject_length;
452
453 if (callout_extra)
454 {
455 fprintf(f, "Callout %d: last capture = %d\n",
456 cb->callout_number, cb->capture_last);
457
458 for (i = 0; i < cb->capture_top * 2; i += 2)
459 {
460 if (cb->offset_vector[i] < 0)
461 fprintf(f, "%2d: <unset>\n", i/2);
462 else
463 {
464 fprintf(f, "%2d: ", i/2);
465 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466 cb->offset_vector[i+1] - cb->offset_vector[i], f);
467 fprintf(f, "\n");
468 }
469 }
470 }
471
472 /* Re-print the subject in canonical form, the first time or if giving full
473 datails. On subsequent calls in the same match, we use pchars just to find the
474 printed lengths of the substrings. */
475
476 if (f != NULL) fprintf(f, "--->");
477
478 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480 cb->current_position - cb->start_match, f);
481
482 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483
484 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485 cb->subject_length - cb->current_position, f);
486
487 if (f != NULL) fprintf(f, "\n");
488
489 /* Always print appropriate indicators, with callout number if not already
490 shown. For automatic callouts, show the pattern offset. */
491
492 if (cb->callout_number == 255)
493 {
494 fprintf(outfile, "%+3d ", cb->pattern_position);
495 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
496 }
497 else
498 {
499 if (callout_extra) fprintf(outfile, " ");
500 else fprintf(outfile, "%3d ", cb->callout_number);
501 }
502
503 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504 fprintf(outfile, "^");
505
506 if (post_start > 0)
507 {
508 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510 }
511
512 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513 fprintf(outfile, " ");
514
515 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516 pbuffer + cb->pattern_position);
517
518 fprintf(outfile, "\n");
519 first_callout = 0;
520
521 if (cb->callout_data != NULL)
522 {
523 int callout_data = *((int *)(cb->callout_data));
524 if (callout_data != 0)
525 {
526 fprintf(outfile, "Callout data = %d\n", callout_data);
527 return callout_data;
528 }
529 }
530
531 return (cb->callout_number != callout_fail_id)? 0 :
532 (++callout_count >= callout_fail_count)? 1 : 0;
533 }
534
535
536 /*************************************************
537 * Local malloc functions *
538 *************************************************/
539
540 /* Alternative malloc function, to test functionality and show the size of the
541 compiled re. */
542
543 static void *new_malloc(size_t size)
544 {
545 void *block = malloc(size);
546 gotten_store = size;
547 if (show_malloc)
548 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
549 return block;
550 }
551
552 static void new_free(void *block)
553 {
554 if (show_malloc)
555 fprintf(outfile, "free %p\n", block);
556 free(block);
557 }
558
559
560 /* For recursion malloc/free, to test stacking calls */
561
562 static void *stack_malloc(size_t size)
563 {
564 void *block = malloc(size);
565 if (show_malloc)
566 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567 return block;
568 }
569
570 static void stack_free(void *block)
571 {
572 if (show_malloc)
573 fprintf(outfile, "stack_free %p\n", block);
574 free(block);
575 }
576
577
578 /*************************************************
579 * Call pcre_fullinfo() *
580 *************************************************/
581
582 /* Get one piece of information from the pcre_fullinfo() function */
583
584 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
585 {
586 int rc;
587 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
588 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
589 }
590
591
592
593 /*************************************************
594 * Byte flipping function *
595 *************************************************/
596
597 static unsigned long int
598 byteflip(unsigned long int value, int n)
599 {
600 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601 return ((value & 0x000000ff) << 24) |
602 ((value & 0x0000ff00) << 8) |
603 ((value & 0x00ff0000) >> 8) |
604 ((value & 0xff000000) >> 24);
605 }
606
607
608
609
610 /*************************************************
611 * Check match or recursion limit *
612 *************************************************/
613
614 static int
615 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616 int start_offset, int options, int *use_offsets, int use_size_offsets,
617 int flag, unsigned long int *limit, int errnumber, const char *msg)
618 {
619 int count;
620 int min = 0;
621 int mid = 64;
622 int max = -1;
623
624 extra->flags |= flag;
625
626 for (;;)
627 {
628 *limit = mid;
629
630 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631 use_offsets, use_size_offsets);
632
633 if (count == errnumber)
634 {
635 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636 min = mid;
637 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638 }
639
640 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641 count == PCRE_ERROR_PARTIAL)
642 {
643 if (mid == min + 1)
644 {
645 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646 break;
647 }
648 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649 max = mid;
650 mid = (min + mid)/2;
651 }
652 else break; /* Some other error */
653 }
654
655 extra->flags &= ~flag;
656 return count;
657 }
658
659
660
661 /*************************************************
662 * Check newline indicator *
663 *************************************************/
664
665 /* This is used both at compile and run-time to check for <xxx> escapes, where
666 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667 no match.
668
669 Arguments:
670 p points after the leading '<'
671 f file for error message
672
673 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
674 */
675
676 static int
677 check_newline(uschar *p, FILE *f)
678 {
679 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684 fprintf(f, "Unknown newline type at: <%s\n", p);
685 return 0;
686 }
687
688
689
690 /*************************************************
691 * Usage function *
692 *************************************************/
693
694 static void
695 usage(void)
696 {
697 printf("Usage: pcretest [options] [<input> [<output>]]\n");
698 printf(" -b show compiled code (bytecode)\n");
699 printf(" -C show PCRE compile-time options and exit\n");
700 printf(" -d debug: show compiled code and information (-b and -i)\n");
701 #if !defined NODFA
702 printf(" -dfa force DFA matching for all subjects\n");
703 #endif
704 printf(" -help show usage information\n");
705 printf(" -i show information about compiled patterns\n"
706 " -m output memory used information\n"
707 " -o <n> set size of offsets vector to <n>\n");
708 #if !defined NOPOSIX
709 printf(" -p use POSIX interface\n");
710 #endif
711 printf(" -q quiet: do not output PCRE version number at start\n");
712 printf(" -S <n> set stack size to <n> megabytes\n");
713 printf(" -s output store (memory) used information\n"
714 " -t time compilation and execution\n");
715 printf(" -t <n> time compilation and execution, repeating <n> times\n");
716 printf(" -tm time execution (matching) only\n");
717 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
718 }
719
720
721
722 /*************************************************
723 * Main Program *
724 *************************************************/
725
726 /* Read lines from named file or stdin and write to named file or stdout; lines
727 consist of a regular expression, in delimiters and optionally followed by
728 options, followed by a set of test data, terminated by an empty line. */
729
730 int main(int argc, char **argv)
731 {
732 FILE *infile = stdin;
733 int options = 0;
734 int study_options = 0;
735 int op = 1;
736 int timeit = 0;
737 int timeitm = 0;
738 int showinfo = 0;
739 int showstore = 0;
740 int quiet = 0;
741 int size_offsets = 45;
742 int size_offsets_max;
743 int *offsets = NULL;
744 #if !defined NOPOSIX
745 int posix = 0;
746 #endif
747 int debug = 0;
748 int done = 0;
749 int all_use_dfa = 0;
750 int yield = 0;
751 int stack_size;
752
753 /* These vectors store, end-to-end, a list of captured substring names. Assume
754 that 1024 is plenty long enough for the few names we'll be testing. */
755
756 uschar copynames[1024];
757 uschar getnames[1024];
758
759 uschar *copynamesptr;
760 uschar *getnamesptr;
761
762 /* Get buffers from malloc() so that Electric Fence will check their misuse
763 when I am debugging. They grow automatically when very long lines are read. */
764
765 buffer = (unsigned char *)malloc(buffer_size);
766 dbuffer = (unsigned char *)malloc(buffer_size);
767 pbuffer = (unsigned char *)malloc(buffer_size);
768
769 /* The outfile variable is static so that new_malloc can use it. */
770
771 outfile = stdout;
772
773 /* The following _setmode() stuff is some Windows magic that tells its runtime
774 library to translate CRLF into a single LF character. At least, that's what
775 I've been told: never having used Windows I take this all on trust. Originally
776 it set 0x8000, but then I was advised that _O_BINARY was better. */
777
778 #if defined(_WIN32) || defined(WIN32)
779 _setmode( _fileno( stdout ), _O_BINARY );
780 #endif
781
782 /* Scan options */
783
784 while (argc > 1 && argv[op][0] == '-')
785 {
786 unsigned char *endptr;
787
788 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
789 showstore = 1;
790 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
791 else if (strcmp(argv[op], "-b") == 0) debug = 1;
792 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
793 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
794 #if !defined NODFA
795 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
796 #endif
797 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
798 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
799 *endptr == 0))
800 {
801 op++;
802 argc--;
803 }
804 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
805 {
806 int both = argv[op][2] == 0;
807 int temp;
808 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
809 *endptr == 0))
810 {
811 timeitm = temp;
812 op++;
813 argc--;
814 }
815 else timeitm = LOOPREPEAT;
816 if (both) timeit = timeitm;
817 }
818 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
819 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
820 *endptr == 0))
821 {
822 #if defined(_WIN32) || defined(WIN32)
823 printf("PCRE: -S not supported on this OS\n");
824 exit(1);
825 #else
826 int rc;
827 struct rlimit rlim;
828 getrlimit(RLIMIT_STACK, &rlim);
829 rlim.rlim_cur = stack_size * 1024 * 1024;
830 rc = setrlimit(RLIMIT_STACK, &rlim);
831 if (rc != 0)
832 {
833 printf("PCRE: setrlimit() failed with error %d\n", rc);
834 exit(1);
835 }
836 op++;
837 argc--;
838 #endif
839 }
840 #if !defined NOPOSIX
841 else if (strcmp(argv[op], "-p") == 0) posix = 1;
842 #endif
843 else if (strcmp(argv[op], "-C") == 0)
844 {
845 int rc;
846 printf("PCRE version %s\n", pcre_version());
847 printf("Compiled with\n");
848 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
849 printf(" %sUTF-8 support\n", rc? "" : "No ");
850 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
851 printf(" %sUnicode properties support\n", rc? "" : "No ");
852 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
854 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855 (rc == -2)? "ANYCRLF" :
856 (rc == -1)? "ANY" : "???");
857 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858 printf(" Internal link size = %d\n", rc);
859 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
860 printf(" POSIX malloc threshold = %d\n", rc);
861 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
862 printf(" Default match limit = %d\n", rc);
863 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
864 printf(" Default recursion depth limit = %d\n", rc);
865 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
867 goto EXIT;
868 }
869 else if (strcmp(argv[op], "-help") == 0 ||
870 strcmp(argv[op], "--help") == 0)
871 {
872 usage();
873 goto EXIT;
874 }
875 else
876 {
877 printf("** Unknown or malformed option %s\n", argv[op]);
878 usage();
879 yield = 1;
880 goto EXIT;
881 }
882 op++;
883 argc--;
884 }
885
886 /* Get the store for the offsets vector, and remember what it was */
887
888 size_offsets_max = size_offsets;
889 offsets = (int *)malloc(size_offsets_max * sizeof(int));
890 if (offsets == NULL)
891 {
892 printf("** Failed to get %d bytes of memory for offsets vector\n",
893 size_offsets_max * sizeof(int));
894 yield = 1;
895 goto EXIT;
896 }
897
898 /* Sort out the input and output files */
899
900 if (argc > 1)
901 {
902 infile = fopen(argv[op], INPUT_MODE);
903 if (infile == NULL)
904 {
905 printf("** Failed to open %s\n", argv[op]);
906 yield = 1;
907 goto EXIT;
908 }
909 }
910
911 if (argc > 2)
912 {
913 outfile = fopen(argv[op+1], OUTPUT_MODE);
914 if (outfile == NULL)
915 {
916 printf("** Failed to open %s\n", argv[op+1]);
917 yield = 1;
918 goto EXIT;
919 }
920 }
921
922 /* Set alternative malloc function */
923
924 pcre_malloc = new_malloc;
925 pcre_free = new_free;
926 pcre_stack_malloc = stack_malloc;
927 pcre_stack_free = stack_free;
928
929 /* Heading line unless quiet, then prompt for first regex if stdin */
930
931 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
932
933 /* Main loop */
934
935 while (!done)
936 {
937 pcre *re = NULL;
938 pcre_extra *extra = NULL;
939
940 #if !defined NOPOSIX /* There are still compilers that require no indent */
941 regex_t preg;
942 int do_posix = 0;
943 #endif
944
945 const char *error;
946 unsigned char *p, *pp, *ppp;
947 unsigned char *to_file = NULL;
948 const unsigned char *tables = NULL;
949 unsigned long int true_size, true_study_size = 0;
950 size_t size, regex_gotten_store;
951 int do_study = 0;
952 int do_debug = debug;
953 int debug_lengths = 1;
954 int do_G = 0;
955 int do_g = 0;
956 int do_showinfo = showinfo;
957 int do_showrest = 0;
958 int do_flip = 0;
959 int erroroffset, len, delimiter, poffset;
960
961 use_utf8 = 0;
962
963 if (infile == stdin) printf(" re> ");
964 if (extend_inputline(infile, buffer) == NULL) break;
965 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
966 fflush(outfile);
967
968 p = buffer;
969 while (isspace(*p)) p++;
970 if (*p == 0) continue;
971
972 /* See if the pattern is to be loaded pre-compiled from a file. */
973
974 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
975 {
976 unsigned long int magic, get_options;
977 uschar sbuf[8];
978 FILE *f;
979
980 p++;
981 pp = p + (int)strlen((char *)p);
982 while (isspace(pp[-1])) pp--;
983 *pp = 0;
984
985 f = fopen((char *)p, "rb");
986 if (f == NULL)
987 {
988 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
989 continue;
990 }
991
992 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
993
994 true_size =
995 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
996 true_study_size =
997 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
998
999 re = (real_pcre *)new_malloc(true_size);
1000 regex_gotten_store = gotten_store;
1001
1002 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1003
1004 magic = ((real_pcre *)re)->magic_number;
1005 if (magic != MAGIC_NUMBER)
1006 {
1007 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1008 {
1009 do_flip = 1;
1010 }
1011 else
1012 {
1013 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1014 fclose(f);
1015 continue;
1016 }
1017 }
1018
1019 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1020 do_flip? " (byte-inverted)" : "", p);
1021
1022 /* Need to know if UTF-8 for printing data strings */
1023
1024 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1025 use_utf8 = (get_options & PCRE_UTF8) != 0;
1026
1027 /* Now see if there is any following study data */
1028
1029 if (true_study_size != 0)
1030 {
1031 pcre_study_data *psd;
1032
1033 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1034 extra->flags = PCRE_EXTRA_STUDY_DATA;
1035
1036 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1037 extra->study_data = psd;
1038
1039 if (fread(psd, 1, true_study_size, f) != true_study_size)
1040 {
1041 FAIL_READ:
1042 fprintf(outfile, "Failed to read data from %s\n", p);
1043 if (extra != NULL) new_free(extra);
1044 if (re != NULL) new_free(re);
1045 fclose(f);
1046 continue;
1047 }
1048 fprintf(outfile, "Study data loaded from %s\n", p);
1049 do_study = 1; /* To get the data output if requested */
1050 }
1051 else fprintf(outfile, "No study data\n");
1052
1053 fclose(f);
1054 goto SHOW_INFO;
1055 }
1056
1057 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1058 the pattern; if is isn't complete, read more. */
1059
1060 delimiter = *p++;
1061
1062 if (isalnum(delimiter) || delimiter == '\\')
1063 {
1064 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1065 goto SKIP_DATA;
1066 }
1067
1068 pp = p;
1069 poffset = p - buffer;
1070
1071 for(;;)
1072 {
1073 while (*pp != 0)
1074 {
1075 if (*pp == '\\' && pp[1] != 0) pp++;
1076 else if (*pp == delimiter) break;
1077 pp++;
1078 }
1079 if (*pp != 0) break;
1080 if (infile == stdin) printf(" > ");
1081 if ((pp = extend_inputline(infile, pp)) == NULL)
1082 {
1083 fprintf(outfile, "** Unexpected EOF\n");
1084 done = 1;
1085 goto CONTINUE;
1086 }
1087 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1088 }
1089
1090 /* The buffer may have moved while being extended; reset the start of data
1091 pointer to the correct relative point in the buffer. */
1092
1093 p = buffer + poffset;
1094
1095 /* If the first character after the delimiter is backslash, make
1096 the pattern end with backslash. This is purely to provide a way
1097 of testing for the error message when a pattern ends with backslash. */
1098
1099 if (pp[1] == '\\') *pp++ = '\\';
1100
1101 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1102 for callouts. */
1103
1104 *pp++ = 0;
1105 strcpy((char *)pbuffer, (char *)p);
1106
1107 /* Look for options after final delimiter */
1108
1109 options = 0;
1110 study_options = 0;
1111 log_store = showstore; /* default from command line */
1112
1113 while (*pp != 0)
1114 {
1115 switch (*pp++)
1116 {
1117 case 'f': options |= PCRE_FIRSTLINE; break;
1118 case 'g': do_g = 1; break;
1119 case 'i': options |= PCRE_CASELESS; break;
1120 case 'm': options |= PCRE_MULTILINE; break;
1121 case 's': options |= PCRE_DOTALL; break;
1122 case 'x': options |= PCRE_EXTENDED; break;
1123
1124 case '+': do_showrest = 1; break;
1125 case 'A': options |= PCRE_ANCHORED; break;
1126 case 'B': do_debug = 1; break;
1127 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1128 case 'D': do_debug = do_showinfo = 1; break;
1129 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1130 case 'F': do_flip = 1; break;
1131 case 'G': do_G = 1; break;
1132 case 'I': do_showinfo = 1; break;
1133 case 'J': options |= PCRE_DUPNAMES; break;
1134 case 'M': log_store = 1; break;
1135 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1136
1137 #if !defined NOPOSIX
1138 case 'P': do_posix = 1; break;
1139 #endif
1140
1141 case 'S': do_study = 1; break;
1142 case 'U': options |= PCRE_UNGREEDY; break;
1143 case 'X': options |= PCRE_EXTRA; break;
1144 case 'Z': debug_lengths = 0; break;
1145 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147
1148 case 'L':
1149 ppp = pp;
1150 /* The '\r' test here is so that it works on Windows. */
1151 /* The '0' test is just in case this is an unterminated line. */
1152 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1153 *ppp = 0;
1154 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1155 {
1156 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1157 goto SKIP_DATA;
1158 }
1159 locale_set = 1;
1160 tables = pcre_maketables();
1161 pp = ppp;
1162 break;
1163
1164 case '>':
1165 to_file = pp;
1166 while (*pp != 0) pp++;
1167 while (isspace(pp[-1])) pp--;
1168 *pp = 0;
1169 break;
1170
1171 case '<':
1172 {
1173 int x = check_newline(pp, outfile);
1174 if (x == 0) goto SKIP_DATA;
1175 options |= x;
1176 while (*pp++ != '>');
1177 }
1178 break;
1179
1180 case '\r': /* So that it works in Windows */
1181 case '\n':
1182 case ' ':
1183 break;
1184
1185 default:
1186 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1187 goto SKIP_DATA;
1188 }
1189 }
1190
1191 /* Handle compiling via the POSIX interface, which doesn't support the
1192 timing, showing, or debugging options, nor the ability to pass over
1193 local character tables. */
1194
1195 #if !defined NOPOSIX
1196 if (posix || do_posix)
1197 {
1198 int rc;
1199 int cflags = 0;
1200
1201 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1202 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1203 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1204 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1205 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1206
1207 rc = regcomp(&preg, (char *)p, cflags);
1208
1209 /* Compilation failed; go back for another re, skipping to blank line
1210 if non-interactive. */
1211
1212 if (rc != 0)
1213 {
1214 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1215 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1216 goto SKIP_DATA;
1217 }
1218 }
1219
1220 /* Handle compiling via the native interface */
1221
1222 else
1223 #endif /* !defined NOPOSIX */
1224
1225 {
1226 if (timeit > 0)
1227 {
1228 register int i;
1229 clock_t time_taken;
1230 clock_t start_time = clock();
1231 for (i = 0; i < timeit; i++)
1232 {
1233 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1234 if (re != NULL) free(re);
1235 }
1236 time_taken = clock() - start_time;
1237 fprintf(outfile, "Compile time %.4f milliseconds\n",
1238 (((double)time_taken * 1000.0) / (double)timeit) /
1239 (double)CLOCKS_PER_SEC);
1240 }
1241
1242 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1243
1244 /* Compilation failed; go back for another re, skipping to blank line
1245 if non-interactive. */
1246
1247 if (re == NULL)
1248 {
1249 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1250 SKIP_DATA:
1251 if (infile != stdin)
1252 {
1253 for (;;)
1254 {
1255 if (extend_inputline(infile, buffer) == NULL)
1256 {
1257 done = 1;
1258 goto CONTINUE;
1259 }
1260 len = (int)strlen((char *)buffer);
1261 while (len > 0 && isspace(buffer[len-1])) len--;
1262 if (len == 0) break;
1263 }
1264 fprintf(outfile, "\n");
1265 }
1266 goto CONTINUE;
1267 }
1268
1269 /* Compilation succeeded; print data if required. There are now two
1270 info-returning functions. The old one has a limited interface and
1271 returns only limited data. Check that it agrees with the newer one. */
1272
1273 if (log_store)
1274 fprintf(outfile, "Memory allocation (code space): %d\n",
1275 (int)(gotten_store -
1276 sizeof(real_pcre) -
1277 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1278
1279 /* Extract the size for possible writing before possibly flipping it,
1280 and remember the store that was got. */
1281
1282 true_size = ((real_pcre *)re)->size;
1283 regex_gotten_store = gotten_store;
1284
1285 /* If /S was present, study the regexp to generate additional info to
1286 help with the matching. */
1287
1288 if (do_study)
1289 {
1290 if (timeit > 0)
1291 {
1292 register int i;
1293 clock_t time_taken;
1294 clock_t start_time = clock();
1295 for (i = 0; i < timeit; i++)
1296 extra = pcre_study(re, study_options, &error);
1297 time_taken = clock() - start_time;
1298 if (extra != NULL) free(extra);
1299 fprintf(outfile, " Study time %.4f milliseconds\n",
1300 (((double)time_taken * 1000.0) / (double)timeit) /
1301 (double)CLOCKS_PER_SEC);
1302 }
1303 extra = pcre_study(re, study_options, &error);
1304 if (error != NULL)
1305 fprintf(outfile, "Failed to study: %s\n", error);
1306 else if (extra != NULL)
1307 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1308 }
1309
1310 /* If the 'F' option was present, we flip the bytes of all the integer
1311 fields in the regex data block and the study block. This is to make it
1312 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1313 compiled on a different architecture. */
1314
1315 if (do_flip)
1316 {
1317 real_pcre *rre = (real_pcre *)re;
1318 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1319 rre->size = byteflip(rre->size, sizeof(rre->size));
1320 rre->options = byteflip(rre->options, sizeof(rre->options));
1321 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1322 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1323 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1324 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1325 rre->name_table_offset = byteflip(rre->name_table_offset,
1326 sizeof(rre->name_table_offset));
1327 rre->name_entry_size = byteflip(rre->name_entry_size,
1328 sizeof(rre->name_entry_size));
1329 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1330
1331 if (extra != NULL)
1332 {
1333 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1334 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1335 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1336 }
1337 }
1338
1339 /* Extract information from the compiled data if required */
1340
1341 SHOW_INFO:
1342
1343 if (do_debug)
1344 {
1345 fprintf(outfile, "------------------------------------------------------------------\n");
1346 pcre_printint(re, outfile, debug_lengths);
1347 }
1348
1349 if (do_showinfo)
1350 {
1351 unsigned long int get_options, all_options;
1352 #if !defined NOINFOCHECK
1353 int old_first_char, old_options, old_count;
1354 #endif
1355 int count, backrefmax, first_char, need_char;
1356 int nameentrysize, namecount;
1357 const uschar *nametable;
1358
1359 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1360 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1361 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1362 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1363 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1364 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1365 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368
1369 #if !defined NOINFOCHECK
1370 old_count = pcre_info(re, &old_options, &old_first_char);
1371 if (count < 0) fprintf(outfile,
1372 "Error %d from pcre_info()\n", count);
1373 else
1374 {
1375 if (old_count != count) fprintf(outfile,
1376 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1377 old_count);
1378
1379 if (old_first_char != first_char) fprintf(outfile,
1380 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1381 first_char, old_first_char);
1382
1383 if (old_options != (int)get_options) fprintf(outfile,
1384 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1385 get_options, old_options);
1386 }
1387 #endif
1388
1389 if (size != regex_gotten_store) fprintf(outfile,
1390 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1391 (int)size, (int)regex_gotten_store);
1392
1393 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1394 if (backrefmax > 0)
1395 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1396
1397 if (namecount > 0)
1398 {
1399 fprintf(outfile, "Named capturing subpatterns:\n");
1400 while (namecount-- > 0)
1401 {
1402 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1403 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1404 GET2(nametable, 0));
1405 nametable += nameentrysize;
1406 }
1407 }
1408
1409 /* The NOPARTIAL bit is a private bit in the options, so we have
1410 to fish it out via out back door */
1411
1412 all_options = ((real_pcre *)re)->options;
1413 if (do_flip)
1414 {
1415 all_options = byteflip(all_options, sizeof(all_options));
1416 }
1417
1418 if ((all_options & PCRE_NOPARTIAL) != 0)
1419 fprintf(outfile, "Partial matching not supported\n");
1420
1421 if (get_options == 0) fprintf(outfile, "No options\n");
1422 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1423 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1424 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1425 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1426 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1427 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1428 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1429 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1430 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1431 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1432 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1433 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1434 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1435 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1436
1437 switch (get_options & PCRE_NEWLINE_BITS)
1438 {
1439 case PCRE_NEWLINE_CR:
1440 fprintf(outfile, "Forced newline sequence: CR\n");
1441 break;
1442
1443 case PCRE_NEWLINE_LF:
1444 fprintf(outfile, "Forced newline sequence: LF\n");
1445 break;
1446
1447 case PCRE_NEWLINE_CRLF:
1448 fprintf(outfile, "Forced newline sequence: CRLF\n");
1449 break;
1450
1451 case PCRE_NEWLINE_ANYCRLF:
1452 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1453 break;
1454
1455 case PCRE_NEWLINE_ANY:
1456 fprintf(outfile, "Forced newline sequence: ANY\n");
1457 break;
1458
1459 default:
1460 break;
1461 }
1462
1463 if (first_char == -1)
1464 {
1465 fprintf(outfile, "First char at start or follows newline\n");
1466 }
1467 else if (first_char < 0)
1468 {
1469 fprintf(outfile, "No first char\n");
1470 }
1471 else
1472 {
1473 int ch = first_char & 255;
1474 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1475 "" : " (caseless)";
1476 if (PRINTHEX(ch))
1477 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1478 else
1479 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1480 }
1481
1482 if (need_char < 0)
1483 {
1484 fprintf(outfile, "No need char\n");
1485 }
1486 else
1487 {
1488 int ch = need_char & 255;
1489 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1490 "" : " (caseless)";
1491 if (PRINTHEX(ch))
1492 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1493 else
1494 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1495 }
1496
1497 /* Don't output study size; at present it is in any case a fixed
1498 value, but it varies, depending on the computer architecture, and
1499 so messes up the test suite. (And with the /F option, it might be
1500 flipped.) */
1501
1502 if (do_study)
1503 {
1504 if (extra == NULL)
1505 fprintf(outfile, "Study returned NULL\n");
1506 else
1507 {
1508 uschar *start_bits = NULL;
1509 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1510
1511 if (start_bits == NULL)
1512 fprintf(outfile, "No starting byte set\n");
1513 else
1514 {
1515 int i;
1516 int c = 24;
1517 fprintf(outfile, "Starting byte set: ");
1518 for (i = 0; i < 256; i++)
1519 {
1520 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1521 {
1522 if (c > 75)
1523 {
1524 fprintf(outfile, "\n ");
1525 c = 2;
1526 }
1527 if (PRINTHEX(i) && i != ' ')
1528 {
1529 fprintf(outfile, "%c ", i);
1530 c += 2;
1531 }
1532 else
1533 {
1534 fprintf(outfile, "\\x%02x ", i);
1535 c += 5;
1536 }
1537 }
1538 }
1539 fprintf(outfile, "\n");
1540 }
1541 }
1542 }
1543 }
1544
1545 /* If the '>' option was present, we write out the regex to a file, and
1546 that is all. The first 8 bytes of the file are the regex length and then
1547 the study length, in big-endian order. */
1548
1549 if (to_file != NULL)
1550 {
1551 FILE *f = fopen((char *)to_file, "wb");
1552 if (f == NULL)
1553 {
1554 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1555 }
1556 else
1557 {
1558 uschar sbuf[8];
1559 sbuf[0] = (true_size >> 24) & 255;
1560 sbuf[1] = (true_size >> 16) & 255;
1561 sbuf[2] = (true_size >> 8) & 255;
1562 sbuf[3] = (true_size) & 255;
1563
1564 sbuf[4] = (true_study_size >> 24) & 255;
1565 sbuf[5] = (true_study_size >> 16) & 255;
1566 sbuf[6] = (true_study_size >> 8) & 255;
1567 sbuf[7] = (true_study_size) & 255;
1568
1569 if (fwrite(sbuf, 1, 8, f) < 8 ||
1570 fwrite(re, 1, true_size, f) < true_size)
1571 {
1572 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1573 }
1574 else
1575 {
1576 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1577 if (extra != NULL)
1578 {
1579 if (fwrite(extra->study_data, 1, true_study_size, f) <
1580 true_study_size)
1581 {
1582 fprintf(outfile, "Write error on %s: %s\n", to_file,
1583 strerror(errno));
1584 }
1585 else fprintf(outfile, "Study data written to %s\n", to_file);
1586
1587 }
1588 }
1589 fclose(f);
1590 }
1591
1592 new_free(re);
1593 if (extra != NULL) new_free(extra);
1594 if (tables != NULL) new_free((void *)tables);
1595 continue; /* With next regex */
1596 }
1597 } /* End of non-POSIX compile */
1598
1599 /* Read data lines and test them */
1600
1601 for (;;)
1602 {
1603 uschar *q;
1604 uschar *bptr;
1605 int *use_offsets = offsets;
1606 int use_size_offsets = size_offsets;
1607 int callout_data = 0;
1608 int callout_data_set = 0;
1609 int count, c;
1610 int copystrings = 0;
1611 int find_match_limit = 0;
1612 int getstrings = 0;
1613 int getlist = 0;
1614 int gmatched = 0;
1615 int start_offset = 0;
1616 int g_notempty = 0;
1617 int use_dfa = 0;
1618
1619 options = 0;
1620
1621 *copynames = 0;
1622 *getnames = 0;
1623
1624 copynamesptr = copynames;
1625 getnamesptr = getnames;
1626
1627 pcre_callout = callout;
1628 first_callout = 1;
1629 callout_extra = 0;
1630 callout_count = 0;
1631 callout_fail_count = 999999;
1632 callout_fail_id = -1;
1633 show_malloc = 0;
1634
1635 if (extra != NULL) extra->flags &=
1636 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1637
1638 len = 0;
1639 for (;;)
1640 {
1641 if (infile == stdin) printf("data> ");
1642 if (extend_inputline(infile, buffer + len) == NULL)
1643 {
1644 if (len > 0) break;
1645 done = 1;
1646 goto CONTINUE;
1647 }
1648 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1649 len = (int)strlen((char *)buffer);
1650 if (buffer[len-1] == '\n') break;
1651 }
1652
1653 while (len > 0 && isspace(buffer[len-1])) len--;
1654 buffer[len] = 0;
1655 if (len == 0) break;
1656
1657 p = buffer;
1658 while (isspace(*p)) p++;
1659
1660 bptr = q = dbuffer;
1661 while ((c = *p++) != 0)
1662 {
1663 int i = 0;
1664 int n = 0;
1665
1666 if (c == '\\') switch ((c = *p++))
1667 {
1668 case 'a': c = 7; break;
1669 case 'b': c = '\b'; break;
1670 case 'e': c = 27; break;
1671 case 'f': c = '\f'; break;
1672 case 'n': c = '\n'; break;
1673 case 'r': c = '\r'; break;
1674 case 't': c = '\t'; break;
1675 case 'v': c = '\v'; break;
1676
1677 case '0': case '1': case '2': case '3':
1678 case '4': case '5': case '6': case '7':
1679 c -= '0';
1680 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1681 c = c * 8 + *p++ - '0';
1682
1683 #if !defined NOUTF8
1684 if (use_utf8 && c > 255)
1685 {
1686 unsigned char buff8[8];
1687 int ii, utn;
1688 utn = ord2utf8(c, buff8);
1689 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1690 c = buff8[ii]; /* Last byte */
1691 }
1692 #endif
1693 break;
1694
1695 case 'x':
1696
1697 /* Handle \x{..} specially - new Perl thing for utf8 */
1698
1699 #if !defined NOUTF8
1700 if (*p == '{')
1701 {
1702 unsigned char *pt = p;
1703 c = 0;
1704 while (isxdigit(*(++pt)))
1705 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1706 if (*pt == '}')
1707 {
1708 unsigned char buff8[8];
1709 int ii, utn;
1710 utn = ord2utf8(c, buff8);
1711 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1712 c = buff8[ii]; /* Last byte */
1713 p = pt + 1;
1714 break;
1715 }
1716 /* Not correct form; fall through */
1717 }
1718 #endif
1719
1720 /* Ordinary \x */
1721
1722 c = 0;
1723 while (i++ < 2 && isxdigit(*p))
1724 {
1725 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1726 p++;
1727 }
1728 break;
1729
1730 case 0: /* \ followed by EOF allows for an empty line */
1731 p--;
1732 continue;
1733
1734 case '>':
1735 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1736 continue;
1737
1738 case 'A': /* Option setting */
1739 options |= PCRE_ANCHORED;
1740 continue;
1741
1742 case 'B':
1743 options |= PCRE_NOTBOL;
1744 continue;
1745
1746 case 'C':
1747 if (isdigit(*p)) /* Set copy string */
1748 {
1749 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1750 copystrings |= 1 << n;
1751 }
1752 else if (isalnum(*p))
1753 {
1754 uschar *npp = copynamesptr;
1755 while (isalnum(*p)) *npp++ = *p++;
1756 *npp++ = 0;
1757 *npp = 0;
1758 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1759 if (n < 0)
1760 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1761 copynamesptr = npp;
1762 }
1763 else if (*p == '+')
1764 {
1765 callout_extra = 1;
1766 p++;
1767 }
1768 else if (*p == '-')
1769 {
1770 pcre_callout = NULL;
1771 p++;
1772 }
1773 else if (*p == '!')
1774 {
1775 callout_fail_id = 0;
1776 p++;
1777 while(isdigit(*p))
1778 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1779 callout_fail_count = 0;
1780 if (*p == '!')
1781 {
1782 p++;
1783 while(isdigit(*p))
1784 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1785 }
1786 }
1787 else if (*p == '*')
1788 {
1789 int sign = 1;
1790 callout_data = 0;
1791 if (*(++p) == '-') { sign = -1; p++; }
1792 while(isdigit(*p))
1793 callout_data = callout_data * 10 + *p++ - '0';
1794 callout_data *= sign;
1795 callout_data_set = 1;
1796 }
1797 continue;
1798
1799 #if !defined NODFA
1800 case 'D':
1801 #if !defined NOPOSIX
1802 if (posix || do_posix)
1803 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1804 else
1805 #endif
1806 use_dfa = 1;
1807 continue;
1808
1809 case 'F':
1810 options |= PCRE_DFA_SHORTEST;
1811 continue;
1812 #endif
1813
1814 case 'G':
1815 if (isdigit(*p))
1816 {
1817 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1818 getstrings |= 1 << n;
1819 }
1820 else if (isalnum(*p))
1821 {
1822 uschar *npp = getnamesptr;
1823 while (isalnum(*p)) *npp++ = *p++;
1824 *npp++ = 0;
1825 *npp = 0;
1826 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1827 if (n < 0)
1828 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1829 getnamesptr = npp;
1830 }
1831 continue;
1832
1833 case 'L':
1834 getlist = 1;
1835 continue;
1836
1837 case 'M':
1838 find_match_limit = 1;
1839 continue;
1840
1841 case 'N':
1842 options |= PCRE_NOTEMPTY;
1843 continue;
1844
1845 case 'O':
1846 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1847 if (n > size_offsets_max)
1848 {
1849 size_offsets_max = n;
1850 free(offsets);
1851 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1852 if (offsets == NULL)
1853 {
1854 printf("** Failed to get %d bytes of memory for offsets vector\n",
1855 size_offsets_max * sizeof(int));
1856 yield = 1;
1857 goto EXIT;
1858 }
1859 }
1860 use_size_offsets = n;
1861 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1862 continue;
1863
1864 case 'P':
1865 options |= PCRE_PARTIAL;
1866 continue;
1867
1868 case 'Q':
1869 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1870 if (extra == NULL)
1871 {
1872 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1873 extra->flags = 0;
1874 }
1875 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1876 extra->match_limit_recursion = n;
1877 continue;
1878
1879 case 'q':
1880 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1881 if (extra == NULL)
1882 {
1883 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1884 extra->flags = 0;
1885 }
1886 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1887 extra->match_limit = n;
1888 continue;
1889
1890 #if !defined NODFA
1891 case 'R':
1892 options |= PCRE_DFA_RESTART;
1893 continue;
1894 #endif
1895
1896 case 'S':
1897 show_malloc = 1;
1898 continue;
1899
1900 case 'Z':
1901 options |= PCRE_NOTEOL;
1902 continue;
1903
1904 case '?':
1905 options |= PCRE_NO_UTF8_CHECK;
1906 continue;
1907
1908 case '<':
1909 {
1910 int x = check_newline(p, outfile);
1911 if (x == 0) goto NEXT_DATA;
1912 options |= x;
1913 while (*p++ != '>');
1914 }
1915 continue;
1916 }
1917 *q++ = c;
1918 }
1919 *q = 0;
1920 len = q - dbuffer;
1921
1922 if ((all_use_dfa || use_dfa) && find_match_limit)
1923 {
1924 printf("**Match limit not relevant for DFA matching: ignored\n");
1925 find_match_limit = 0;
1926 }
1927
1928 /* Handle matching via the POSIX interface, which does not
1929 support timing or playing with the match limit or callout data. */
1930
1931 #if !defined NOPOSIX
1932 if (posix || do_posix)
1933 {
1934 int rc;
1935 int eflags = 0;
1936 regmatch_t *pmatch = NULL;
1937 if (use_size_offsets > 0)
1938 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1939 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1940 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1941
1942 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1943
1944 if (rc != 0)
1945 {
1946 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1947 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1948 }
1949 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1950 != 0)
1951 {
1952 fprintf(outfile, "Matched with REG_NOSUB\n");
1953 }
1954 else
1955 {
1956 size_t i;
1957 for (i = 0; i < (size_t)use_size_offsets; i++)
1958 {
1959 if (pmatch[i].rm_so >= 0)
1960 {
1961 fprintf(outfile, "%2d: ", (int)i);
1962 (void)pchars(dbuffer + pmatch[i].rm_so,
1963 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1964 fprintf(outfile, "\n");
1965 if (i == 0 && do_showrest)
1966 {
1967 fprintf(outfile, " 0+ ");
1968 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1969 outfile);
1970 fprintf(outfile, "\n");
1971 }
1972 }
1973 }
1974 }
1975 free(pmatch);
1976 }
1977
1978 /* Handle matching via the native interface - repeats for /g and /G */
1979
1980 else
1981 #endif /* !defined NOPOSIX */
1982
1983 for (;; gmatched++) /* Loop for /g or /G */
1984 {
1985 if (timeitm > 0)
1986 {
1987 register int i;
1988 clock_t time_taken;
1989 clock_t start_time = clock();
1990
1991 #if !defined NODFA
1992 if (all_use_dfa || use_dfa)
1993 {
1994 int workspace[1000];
1995 for (i = 0; i < timeitm; i++)
1996 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1997 options | g_notempty, use_offsets, use_size_offsets, workspace,
1998 sizeof(workspace)/sizeof(int));
1999 }
2000 else
2001 #endif
2002
2003 for (i = 0; i < timeitm; i++)
2004 count = pcre_exec(re, extra, (char *)bptr, len,
2005 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2006
2007 time_taken = clock() - start_time;
2008 fprintf(outfile, "Execute time %.4f milliseconds\n",
2009 (((double)time_taken * 1000.0) / (double)timeitm) /
2010 (double)CLOCKS_PER_SEC);
2011 }
2012
2013 /* If find_match_limit is set, we want to do repeated matches with
2014 varying limits in order to find the minimum value for the match limit and
2015 for the recursion limit. */
2016
2017 if (find_match_limit)
2018 {
2019 if (extra == NULL)
2020 {
2021 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2022 extra->flags = 0;
2023 }
2024
2025 (void)check_match_limit(re, extra, bptr, len, start_offset,
2026 options|g_notempty, use_offsets, use_size_offsets,
2027 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2028 PCRE_ERROR_MATCHLIMIT, "match()");
2029
2030 count = check_match_limit(re, extra, bptr, len, start_offset,
2031 options|g_notempty, use_offsets, use_size_offsets,
2032 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2033 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2034 }
2035
2036 /* If callout_data is set, use the interface with additional data */
2037
2038 else if (callout_data_set)
2039 {
2040 if (extra == NULL)
2041 {
2042 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2043 extra->flags = 0;
2044 }
2045 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2046 extra->callout_data = &callout_data;
2047 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2048 options | g_notempty, use_offsets, use_size_offsets);
2049 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2050 }
2051
2052 /* The normal case is just to do the match once, with the default
2053 value of match_limit. */
2054
2055 #if !defined NODFA
2056 else if (all_use_dfa || use_dfa)
2057 {
2058 int workspace[1000];
2059 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2060 options | g_notempty, use_offsets, use_size_offsets, workspace,
2061 sizeof(workspace)/sizeof(int));
2062 if (count == 0)
2063 {
2064 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2065 count = use_size_offsets/2;
2066 }
2067 }
2068 #endif
2069
2070 else
2071 {
2072 count = pcre_exec(re, extra, (char *)bptr, len,
2073 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2074 if (count == 0)
2075 {
2076 fprintf(outfile, "Matched, but too many substrings\n");
2077 count = use_size_offsets/3;
2078 }
2079 }
2080
2081 /* Matched */
2082
2083 if (count >= 0)
2084 {
2085 int i, maxcount;
2086
2087 #if !defined NODFA
2088 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2089 #endif
2090 maxcount = use_size_offsets/3;
2091
2092 /* This is a check against a lunatic return value. */
2093
2094 if (count > maxcount)
2095 {
2096 fprintf(outfile,
2097 "** PCRE error: returned count %d is too big for offset size %d\n",
2098 count, use_size_offsets);
2099 count = use_size_offsets/3;
2100 if (do_g || do_G)
2101 {
2102 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2103 do_g = do_G = FALSE; /* Break g/G loop */
2104 }
2105 }
2106
2107 for (i = 0; i < count * 2; i += 2)
2108 {
2109 if (use_offsets[i] < 0)
2110 fprintf(outfile, "%2d: <unset>\n", i/2);
2111 else
2112 {
2113 fprintf(outfile, "%2d: ", i/2);
2114 (void)pchars(bptr + use_offsets[i],
2115 use_offsets[i+1] - use_offsets[i], outfile);
2116 fprintf(outfile, "\n");
2117 if (i == 0)
2118 {
2119 if (do_showrest)
2120 {
2121 fprintf(outfile, " 0+ ");
2122 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2123 outfile);
2124 fprintf(outfile, "\n");
2125 }
2126 }
2127 }
2128 }
2129
2130 for (i = 0; i < 32; i++)
2131 {
2132 if ((copystrings & (1 << i)) != 0)
2133 {
2134 char copybuffer[256];
2135 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2136 i, copybuffer, sizeof(copybuffer));
2137 if (rc < 0)
2138 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2139 else
2140 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2141 }
2142 }
2143
2144 for (copynamesptr = copynames;
2145 *copynamesptr != 0;
2146 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2147 {
2148 char copybuffer[256];
2149 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2150 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2151 if (rc < 0)
2152 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2153 else
2154 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2155 }
2156
2157 for (i = 0; i < 32; i++)
2158 {
2159 if ((getstrings & (1 << i)) != 0)
2160 {
2161 const char *substring;
2162 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2163 i, &substring);
2164 if (rc < 0)
2165 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2166 else
2167 {
2168 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2169 pcre_free_substring(substring);
2170 }
2171 }
2172 }
2173
2174 for (getnamesptr = getnames;
2175 *getnamesptr != 0;
2176 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2177 {
2178 const char *substring;
2179 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2180 count, (char *)getnamesptr, &substring);
2181 if (rc < 0)
2182 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2183 else
2184 {
2185 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2186 pcre_free_substring(substring);
2187 }
2188 }
2189
2190 if (getlist)
2191 {
2192 const char **stringlist;
2193 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2194 &stringlist);
2195 if (rc < 0)
2196 fprintf(outfile, "get substring list failed %d\n", rc);
2197 else
2198 {
2199 for (i = 0; i < count; i++)
2200 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2201 if (stringlist[i] != NULL)
2202 fprintf(outfile, "string list not terminated by NULL\n");
2203 /* free((void *)stringlist); */
2204 pcre_free_substring_list(stringlist);
2205 }
2206 }
2207 }
2208
2209 /* There was a partial match */
2210
2211 else if (count == PCRE_ERROR_PARTIAL)
2212 {
2213 fprintf(outfile, "Partial match");
2214 #if !defined NODFA
2215 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2216 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2217 bptr + use_offsets[0]);
2218 #endif
2219 fprintf(outfile, "\n");
2220 break; /* Out of the /g loop */
2221 }
2222
2223 /* Failed to match. If this is a /g or /G loop and we previously set
2224 g_notempty after a null match, this is not necessarily the end. We want
2225 to advance the start offset, and continue. We won't be at the end of the
2226 string - that was checked before setting g_notempty.
2227
2228 Complication arises in the case when the newline option is "any" or
2229 "anycrlf". If the previous match was at the end of a line terminated by
2230 CRLF, an advance of one character just passes the \r, whereas we should
2231 prefer the longer newline sequence, as does the code in pcre_exec().
2232 Fudge the offset value to achieve this.
2233
2234 Otherwise, in the case of UTF-8 matching, the advance must be one
2235 character, not one byte. */
2236
2237 else
2238 {
2239 if (g_notempty != 0)
2240 {
2241 int onechar = 1;
2242 unsigned int obits = ((real_pcre *)re)->options;
2243 use_offsets[0] = start_offset;
2244 if ((obits & PCRE_NEWLINE_BITS) == 0)
2245 {
2246 int d;
2247 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2248 obits = (d == '\r')? PCRE_NEWLINE_CR :
2249 (d == '\n')? PCRE_NEWLINE_LF :
2250 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2251 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2252 (d == -1)? PCRE_NEWLINE_ANY : 0;
2253 }
2254 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2255 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2256 &&
2257 start_offset < len - 1 &&
2258 bptr[start_offset] == '\r' &&
2259 bptr[start_offset+1] == '\n')
2260 onechar++;
2261 else if (use_utf8)
2262 {
2263 while (start_offset + onechar < len)
2264 {
2265 int tb = bptr[start_offset+onechar];
2266 if (tb <= 127) break;
2267 tb &= 0xc0;
2268 if (tb != 0 && tb != 0xc0) onechar++;
2269 }
2270 }
2271 use_offsets[1] = start_offset + onechar;
2272 }
2273 else
2274 {
2275 if (count == PCRE_ERROR_NOMATCH)
2276 {
2277 if (gmatched == 0) fprintf(outfile, "No match\n");
2278 }
2279 else fprintf(outfile, "Error %d\n", count);
2280 break; /* Out of the /g loop */
2281 }
2282 }
2283
2284 /* If not /g or /G we are done */
2285
2286 if (!do_g && !do_G) break;
2287
2288 /* If we have matched an empty string, first check to see if we are at
2289 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2290 what Perl's /g options does. This turns out to be rather cunning. First
2291 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2292 same point. If this fails (picked up above) we advance to the next
2293 character. */
2294
2295 g_notempty = 0;
2296
2297 if (use_offsets[0] == use_offsets[1])
2298 {
2299 if (use_offsets[0] == len) break;
2300 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2301 }
2302
2303 /* For /g, update the start offset, leaving the rest alone */
2304
2305 if (do_g) start_offset = use_offsets[1];
2306
2307 /* For /G, update the pointer and length */
2308
2309 else
2310 {
2311 bptr += use_offsets[1];
2312 len -= use_offsets[1];
2313 }
2314 } /* End of loop for /g and /G */
2315
2316 NEXT_DATA: continue;
2317 } /* End of loop for data lines */
2318
2319 CONTINUE:
2320
2321 #if !defined NOPOSIX
2322 if (posix || do_posix) regfree(&preg);
2323 #endif
2324
2325 if (re != NULL) new_free(re);
2326 if (extra != NULL) new_free(extra);
2327 if (tables != NULL)
2328 {
2329 new_free((void *)tables);
2330 setlocale(LC_CTYPE, "C");
2331 locale_set = 0;
2332 }
2333 }
2334
2335 if (infile == stdin) fprintf(outfile, "\n");
2336
2337 EXIT:
2338
2339 if (infile != NULL && infile != stdin) fclose(infile);
2340 if (outfile != NULL && outfile != stdout) fclose(outfile);
2341
2342 free(buffer);
2343 free(dbuffer);
2344 free(pbuffer);
2345 free(offsets);
2346
2347 return yield;
2348 }
2349
2350 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12