/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 121 - (show annotations) (download)
Mon Mar 12 12:12:47 2007 UTC (7 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 67069 byte(s)
Add some missing free() calls to pcretest and pcregrep.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47
48 /* A number of things vary for Windows builds. Originally, pcretest opened its
49 input and output without "b"; then I was told that "b" was needed in some
50 environments, so it was added for release 5.0 to both the input and output. (It
51 makes no difference on Unix-like systems.) Later I was told that it is wrong
52 for the input on Windows. I've now abstracted the modes into two macros that
53 are set here, to make it easier to fiddle with them, and removed "b" from the
54 input mode under Windows. */
55
56 #if defined(_WIN32) || defined(WIN32)
57 #include <io.h> /* For _setmode() */
58 #include <fcntl.h> /* For _O_BINARY */
59 #define INPUT_MODE "r"
60 #define OUTPUT_MODE "wb"
61
62 #else
63 #include <sys/time.h> /* These two includes are needed */
64 #include <sys/resource.h> /* for setrlimit(). */
65 #define INPUT_MODE "rb"
66 #define OUTPUT_MODE "wb"
67 #endif
68
69
70 #define PCRE_SPY /* For Win32 build, import data, not export */
71
72 /* We include pcre_internal.h because we need the internal info for displaying
73 the results of pcre_study() and we also need to know about the internal
74 macros, structures, and other internal data values; pcretest has "inside
75 information" compared to a program that strictly follows the PCRE API. */
76
77 #include "pcre_internal.h"
78
79 /* We need access to the data tables that PCRE uses. So as not to have to keep
80 two copies, we include the source file here, changing the names of the external
81 symbols to prevent clashes. */
82
83 #define _pcre_utf8_table1 utf8_table1
84 #define _pcre_utf8_table1_size utf8_table1_size
85 #define _pcre_utf8_table2 utf8_table2
86 #define _pcre_utf8_table3 utf8_table3
87 #define _pcre_utf8_table4 utf8_table4
88 #define _pcre_utt utt
89 #define _pcre_utt_size utt_size
90 #define _pcre_OP_lengths OP_lengths
91
92 #include "pcre_tables.c"
93
94 /* We also need the pcre_printint() function for printing out compiled
95 patterns. This function is in a separate file so that it can be included in
96 pcre_compile.c when that module is compiled with debugging enabled.
97
98 The definition of the macro PRINTABLE, which determines whether to print an
99 output character as-is or as a hex value when showing compiled patterns, is
100 contained in this file. We uses it here also, in cases when the locale has not
101 been explicitly changed, so as to get consistent output from systems that
102 differ in their output from isprint() even in the "C" locale. */
103
104 #include "pcre_printint.src"
105
106 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107
108
109 /* It is possible to compile this test program without including support for
110 testing the POSIX interface, though this is not available via the standard
111 Makefile. */
112
113 #if !defined NOPOSIX
114 #include "pcreposix.h"
115 #endif
116
117 /* It is also possible, for the benefit of the version currently imported into
118 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119 interface to the DFA matcher (NODFA), and without the doublecheck of the old
120 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121 UTF8 support if PCRE is built without it. */
122
123 #ifndef SUPPORT_UTF8
124 #ifndef NOUTF8
125 #define NOUTF8
126 #endif
127 #endif
128
129
130 /* Other parameters */
131
132 #ifndef CLOCKS_PER_SEC
133 #ifdef CLK_TCK
134 #define CLOCKS_PER_SEC CLK_TCK
135 #else
136 #define CLOCKS_PER_SEC 100
137 #endif
138 #endif
139
140 /* This is the default loop count for timing. */
141
142 #define LOOPREPEAT 500000
143
144 /* Static variables */
145
146 static FILE *outfile;
147 static int log_store = 0;
148 static int callout_count;
149 static int callout_extra;
150 static int callout_fail_count;
151 static int callout_fail_id;
152 static int first_callout;
153 static int locale_set = 0;
154 static int show_malloc;
155 static int use_utf8;
156 static size_t gotten_store;
157
158 /* The buffers grow automatically if very long input lines are encountered. */
159
160 static int buffer_size = 50000;
161 static uschar *buffer = NULL;
162 static uschar *dbuffer = NULL;
163 static uschar *pbuffer = NULL;
164
165
166
167 /*************************************************
168 * Read or extend an input line *
169 *************************************************/
170
171 /* Input lines are read into buffer, but both patterns and data lines can be
172 continued over multiple input lines. In addition, if the buffer fills up, we
173 want to automatically expand it so as to be able to handle extremely large
174 lines that are needed for certain stress tests. When the input buffer is
175 expanded, the other two buffers must also be expanded likewise, and the
176 contents of pbuffer, which are a copy of the input for callouts, must be
177 preserved (for when expansion happens for a data line). This is not the most
178 optimal way of handling this, but hey, this is just a test program!
179
180 Arguments:
181 f the file to read
182 start where in buffer to start (this *must* be within buffer)
183
184 Returns: pointer to the start of new data
185 could be a copy of start, or could be moved
186 NULL if no data read and EOF reached
187 */
188
189 static uschar *
190 extend_inputline(FILE *f, uschar *start)
191 {
192 uschar *here = start;
193
194 for (;;)
195 {
196 int rlen = buffer_size - (here - buffer);
197
198 if (rlen > 1000)
199 {
200 int dlen;
201 if (fgets((char *)here, rlen, f) == NULL)
202 return (here == start)? NULL : start;
203 dlen = (int)strlen((char *)here);
204 if (dlen > 0 && here[dlen - 1] == '\n') return start;
205 here += dlen;
206 }
207
208 else
209 {
210 int new_buffer_size = 2*buffer_size;
211 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214
215 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216 {
217 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218 exit(1);
219 }
220
221 memcpy(new_buffer, buffer, buffer_size);
222 memcpy(new_pbuffer, pbuffer, buffer_size);
223
224 buffer_size = new_buffer_size;
225
226 start = new_buffer + (start - buffer);
227 here = new_buffer + (here - buffer);
228
229 free(buffer);
230 free(dbuffer);
231 free(pbuffer);
232
233 buffer = new_buffer;
234 dbuffer = new_dbuffer;
235 pbuffer = new_pbuffer;
236 }
237 }
238
239 return NULL; /* Control never gets here */
240 }
241
242
243
244
245
246
247
248 /*************************************************
249 * Read number from string *
250 *************************************************/
251
252 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253 around with conditional compilation, just do the job by hand. It is only used
254 for unpicking arguments, so just keep it simple.
255
256 Arguments:
257 str string to be converted
258 endptr where to put the end pointer
259
260 Returns: the unsigned long
261 */
262
263 static int
264 get_value(unsigned char *str, unsigned char **endptr)
265 {
266 int result = 0;
267 while(*str != 0 && isspace(*str)) str++;
268 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
269 *endptr = str;
270 return(result);
271 }
272
273
274
275
276 /*************************************************
277 * Convert UTF-8 string to value *
278 *************************************************/
279
280 /* This function takes one or more bytes that represents a UTF-8 character,
281 and returns the value of the character.
282
283 Argument:
284 utf8bytes a pointer to the byte vector
285 vptr a pointer to an int to receive the value
286
287 Returns: > 0 => the number of bytes consumed
288 -6 to 0 => malformed UTF-8 character at offset = (-return)
289 */
290
291 #if !defined NOUTF8
292
293 static int
294 utf82ord(unsigned char *utf8bytes, int *vptr)
295 {
296 int c = *utf8bytes++;
297 int d = c;
298 int i, j, s;
299
300 for (i = -1; i < 6; i++) /* i is number of additional bytes */
301 {
302 if ((d & 0x80) == 0) break;
303 d <<= 1;
304 }
305
306 if (i == -1) { *vptr = c; return 1; } /* ascii character */
307 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
308
309 /* i now has a value in the range 1-5 */
310
311 s = 6*i;
312 d = (c & utf8_table3[i]) << s;
313
314 for (j = 0; j < i; j++)
315 {
316 c = *utf8bytes++;
317 if ((c & 0xc0) != 0x80) return -(j+1);
318 s -= 6;
319 d |= (c & 0x3f) << s;
320 }
321
322 /* Check that encoding was the correct unique one */
323
324 for (j = 0; j < utf8_table1_size; j++)
325 if (d <= utf8_table1[j]) break;
326 if (j != i) return -(i+1);
327
328 /* Valid value */
329
330 *vptr = d;
331 return i+1;
332 }
333
334 #endif
335
336
337
338 /*************************************************
339 * Convert character value to UTF-8 *
340 *************************************************/
341
342 /* This function takes an integer value in the range 0 - 0x7fffffff
343 and encodes it as a UTF-8 character in 0 to 6 bytes.
344
345 Arguments:
346 cvalue the character value
347 utf8bytes pointer to buffer for result - at least 6 bytes long
348
349 Returns: number of characters placed in the buffer
350 */
351
352 #if !defined NOUTF8
353
354 static int
355 ord2utf8(int cvalue, uschar *utf8bytes)
356 {
357 register int i, j;
358 for (i = 0; i < utf8_table1_size; i++)
359 if (cvalue <= utf8_table1[i]) break;
360 utf8bytes += i;
361 for (j = i; j > 0; j--)
362 {
363 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364 cvalue >>= 6;
365 }
366 *utf8bytes = utf8_table2[i] | cvalue;
367 return i + 1;
368 }
369
370 #endif
371
372
373
374 /*************************************************
375 * Print character string *
376 *************************************************/
377
378 /* Character string printing function. Must handle UTF-8 strings in utf8
379 mode. Yields number of characters printed. If handed a NULL file, just counts
380 chars without printing. */
381
382 static int pchars(unsigned char *p, int length, FILE *f)
383 {
384 int c = 0;
385 int yield = 0;
386
387 while (length-- > 0)
388 {
389 #if !defined NOUTF8
390 if (use_utf8)
391 {
392 int rc = utf82ord(p, &c);
393
394 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
395 {
396 length -= rc - 1;
397 p += rc;
398 if (PRINTHEX(c))
399 {
400 if (f != NULL) fprintf(f, "%c", c);
401 yield++;
402 }
403 else
404 {
405 int n = 4;
406 if (f != NULL) fprintf(f, "\\x{%02x}", c);
407 yield += (n <= 0x000000ff)? 2 :
408 (n <= 0x00000fff)? 3 :
409 (n <= 0x0000ffff)? 4 :
410 (n <= 0x000fffff)? 5 : 6;
411 }
412 continue;
413 }
414 }
415 #endif
416
417 /* Not UTF-8, or malformed UTF-8 */
418
419 c = *p++;
420 if (PRINTHEX(c))
421 {
422 if (f != NULL) fprintf(f, "%c", c);
423 yield++;
424 }
425 else
426 {
427 if (f != NULL) fprintf(f, "\\x%02x", c);
428 yield += 4;
429 }
430 }
431
432 return yield;
433 }
434
435
436
437 /*************************************************
438 * Callout function *
439 *************************************************/
440
441 /* Called from PCRE as a result of the (?C) item. We print out where we are in
442 the match. Yield zero unless more callouts than the fail count, or the callout
443 data is not zero. */
444
445 static int callout(pcre_callout_block *cb)
446 {
447 FILE *f = (first_callout | callout_extra)? outfile : NULL;
448 int i, pre_start, post_start, subject_length;
449
450 if (callout_extra)
451 {
452 fprintf(f, "Callout %d: last capture = %d\n",
453 cb->callout_number, cb->capture_last);
454
455 for (i = 0; i < cb->capture_top * 2; i += 2)
456 {
457 if (cb->offset_vector[i] < 0)
458 fprintf(f, "%2d: <unset>\n", i/2);
459 else
460 {
461 fprintf(f, "%2d: ", i/2);
462 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
463 cb->offset_vector[i+1] - cb->offset_vector[i], f);
464 fprintf(f, "\n");
465 }
466 }
467 }
468
469 /* Re-print the subject in canonical form, the first time or if giving full
470 datails. On subsequent calls in the same match, we use pchars just to find the
471 printed lengths of the substrings. */
472
473 if (f != NULL) fprintf(f, "--->");
474
475 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
476 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
477 cb->current_position - cb->start_match, f);
478
479 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
480
481 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
482 cb->subject_length - cb->current_position, f);
483
484 if (f != NULL) fprintf(f, "\n");
485
486 /* Always print appropriate indicators, with callout number if not already
487 shown. For automatic callouts, show the pattern offset. */
488
489 if (cb->callout_number == 255)
490 {
491 fprintf(outfile, "%+3d ", cb->pattern_position);
492 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
493 }
494 else
495 {
496 if (callout_extra) fprintf(outfile, " ");
497 else fprintf(outfile, "%3d ", cb->callout_number);
498 }
499
500 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
501 fprintf(outfile, "^");
502
503 if (post_start > 0)
504 {
505 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
506 fprintf(outfile, "^");
507 }
508
509 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
510 fprintf(outfile, " ");
511
512 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
513 pbuffer + cb->pattern_position);
514
515 fprintf(outfile, "\n");
516 first_callout = 0;
517
518 if (cb->callout_data != NULL)
519 {
520 int callout_data = *((int *)(cb->callout_data));
521 if (callout_data != 0)
522 {
523 fprintf(outfile, "Callout data = %d\n", callout_data);
524 return callout_data;
525 }
526 }
527
528 return (cb->callout_number != callout_fail_id)? 0 :
529 (++callout_count >= callout_fail_count)? 1 : 0;
530 }
531
532
533 /*************************************************
534 * Local malloc functions *
535 *************************************************/
536
537 /* Alternative malloc function, to test functionality and show the size of the
538 compiled re. */
539
540 static void *new_malloc(size_t size)
541 {
542 void *block = malloc(size);
543 gotten_store = size;
544 if (show_malloc)
545 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
546 return block;
547 }
548
549 static void new_free(void *block)
550 {
551 if (show_malloc)
552 fprintf(outfile, "free %p\n", block);
553 free(block);
554 }
555
556
557 /* For recursion malloc/free, to test stacking calls */
558
559 static void *stack_malloc(size_t size)
560 {
561 void *block = malloc(size);
562 if (show_malloc)
563 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
564 return block;
565 }
566
567 static void stack_free(void *block)
568 {
569 if (show_malloc)
570 fprintf(outfile, "stack_free %p\n", block);
571 free(block);
572 }
573
574
575 /*************************************************
576 * Call pcre_fullinfo() *
577 *************************************************/
578
579 /* Get one piece of information from the pcre_fullinfo() function */
580
581 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
582 {
583 int rc;
584 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
585 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
586 }
587
588
589
590 /*************************************************
591 * Byte flipping function *
592 *************************************************/
593
594 static unsigned long int
595 byteflip(unsigned long int value, int n)
596 {
597 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598 return ((value & 0x000000ff) << 24) |
599 ((value & 0x0000ff00) << 8) |
600 ((value & 0x00ff0000) >> 8) |
601 ((value & 0xff000000) >> 24);
602 }
603
604
605
606
607 /*************************************************
608 * Check match or recursion limit *
609 *************************************************/
610
611 static int
612 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613 int start_offset, int options, int *use_offsets, int use_size_offsets,
614 int flag, unsigned long int *limit, int errnumber, const char *msg)
615 {
616 int count;
617 int min = 0;
618 int mid = 64;
619 int max = -1;
620
621 extra->flags |= flag;
622
623 for (;;)
624 {
625 *limit = mid;
626
627 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628 use_offsets, use_size_offsets);
629
630 if (count == errnumber)
631 {
632 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633 min = mid;
634 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635 }
636
637 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638 count == PCRE_ERROR_PARTIAL)
639 {
640 if (mid == min + 1)
641 {
642 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643 break;
644 }
645 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646 max = mid;
647 mid = (min + mid)/2;
648 }
649 else break; /* Some other error */
650 }
651
652 extra->flags &= ~flag;
653 return count;
654 }
655
656
657
658 /*************************************************
659 * Check newline indicator *
660 *************************************************/
661
662 /* This is used both at compile and run-time to check for <xxx> escapes, where
663 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664
665 Arguments:
666 p points after the leading '<'
667 f file for error message
668
669 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
670 */
671
672 static int
673 check_newline(uschar *p, FILE *f)
674 {
675 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679 fprintf(f, "Unknown newline type at: <%s\n", p);
680 return 0;
681 }
682
683
684
685 /*************************************************
686 * Usage function *
687 *************************************************/
688
689 static void
690 usage(void)
691 {
692 printf("Usage: pcretest [options] [<input> [<output>]]\n");
693 printf(" -b show compiled code (bytecode)\n");
694 printf(" -C show PCRE compile-time options and exit\n");
695 printf(" -d debug: show compiled code and information (-b and -i)\n");
696 #if !defined NODFA
697 printf(" -dfa force DFA matching for all subjects\n");
698 #endif
699 printf(" -help show usage information\n");
700 printf(" -i show information about compiled patterns\n"
701 " -m output memory used information\n"
702 " -o <n> set size of offsets vector to <n>\n");
703 #if !defined NOPOSIX
704 printf(" -p use POSIX interface\n");
705 #endif
706 printf(" -q quiet: do not output PCRE version number at start\n");
707 printf(" -S <n> set stack size to <n> megabytes\n");
708 printf(" -s output store (memory) used information\n"
709 " -t time compilation and execution\n");
710 printf(" -t <n> time compilation and execution, repeating <n> times\n");
711 printf(" -tm time execution (matching) only\n");
712 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
713 }
714
715
716
717 /*************************************************
718 * Main Program *
719 *************************************************/
720
721 /* Read lines from named file or stdin and write to named file or stdout; lines
722 consist of a regular expression, in delimiters and optionally followed by
723 options, followed by a set of test data, terminated by an empty line. */
724
725 int main(int argc, char **argv)
726 {
727 FILE *infile = stdin;
728 int options = 0;
729 int study_options = 0;
730 int op = 1;
731 int timeit = 0;
732 int timeitm = 0;
733 int showinfo = 0;
734 int showstore = 0;
735 int quiet = 0;
736 int size_offsets = 45;
737 int size_offsets_max;
738 int *offsets = NULL;
739 #if !defined NOPOSIX
740 int posix = 0;
741 #endif
742 int debug = 0;
743 int done = 0;
744 int all_use_dfa = 0;
745 int yield = 0;
746 int stack_size;
747
748 /* These vectors store, end-to-end, a list of captured substring names. Assume
749 that 1024 is plenty long enough for the few names we'll be testing. */
750
751 uschar copynames[1024];
752 uschar getnames[1024];
753
754 uschar *copynamesptr;
755 uschar *getnamesptr;
756
757 /* Get buffers from malloc() so that Electric Fence will check their misuse
758 when I am debugging. They grow automatically when very long lines are read. */
759
760 buffer = (unsigned char *)malloc(buffer_size);
761 dbuffer = (unsigned char *)malloc(buffer_size);
762 pbuffer = (unsigned char *)malloc(buffer_size);
763
764 /* The outfile variable is static so that new_malloc can use it. */
765
766 outfile = stdout;
767
768 /* The following _setmode() stuff is some Windows magic that tells its runtime
769 library to translate CRLF into a single LF character. At least, that's what
770 I've been told: never having used Windows I take this all on trust. Originally
771 it set 0x8000, but then I was advised that _O_BINARY was better. */
772
773 #if defined(_WIN32) || defined(WIN32)
774 _setmode( _fileno( stdout ), _O_BINARY );
775 #endif
776
777 /* Scan options */
778
779 while (argc > 1 && argv[op][0] == '-')
780 {
781 unsigned char *endptr;
782
783 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784 showstore = 1;
785 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786 else if (strcmp(argv[op], "-b") == 0) debug = 1;
787 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789 #if !defined NODFA
790 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
791 #endif
792 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
793 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794 *endptr == 0))
795 {
796 op++;
797 argc--;
798 }
799 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800 {
801 int both = argv[op][2] == 0;
802 int temp;
803 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804 *endptr == 0))
805 {
806 timeitm = temp;
807 op++;
808 argc--;
809 }
810 else timeitm = LOOPREPEAT;
811 if (both) timeit = timeitm;
812 }
813 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815 *endptr == 0))
816 {
817 #if defined(_WIN32) || defined(WIN32)
818 printf("PCRE: -S not supported on this OS\n");
819 exit(1);
820 #else
821 int rc;
822 struct rlimit rlim;
823 getrlimit(RLIMIT_STACK, &rlim);
824 rlim.rlim_cur = stack_size * 1024 * 1024;
825 rc = setrlimit(RLIMIT_STACK, &rlim);
826 if (rc != 0)
827 {
828 printf("PCRE: setrlimit() failed with error %d\n", rc);
829 exit(1);
830 }
831 op++;
832 argc--;
833 #endif
834 }
835 #if !defined NOPOSIX
836 else if (strcmp(argv[op], "-p") == 0) posix = 1;
837 #endif
838 else if (strcmp(argv[op], "-C") == 0)
839 {
840 int rc;
841 printf("PCRE version %s\n", pcre_version());
842 printf("Compiled with\n");
843 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
844 printf(" %sUTF-8 support\n", rc? "" : "No ");
845 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846 printf(" %sUnicode properties support\n", rc? "" : "No ");
847 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
849 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850 (rc == -1)? "ANY" : "???");
851 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852 printf(" Internal link size = %d\n", rc);
853 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854 printf(" POSIX malloc threshold = %d\n", rc);
855 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856 printf(" Default match limit = %d\n", rc);
857 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858 printf(" Default recursion depth limit = %d\n", rc);
859 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
861 goto EXIT;
862 }
863 else if (strcmp(argv[op], "-help") == 0 ||
864 strcmp(argv[op], "--help") == 0)
865 {
866 usage();
867 goto EXIT;
868 }
869 else
870 {
871 printf("** Unknown or malformed option %s\n", argv[op]);
872 usage();
873 yield = 1;
874 goto EXIT;
875 }
876 op++;
877 argc--;
878 }
879
880 /* Get the store for the offsets vector, and remember what it was */
881
882 size_offsets_max = size_offsets;
883 offsets = (int *)malloc(size_offsets_max * sizeof(int));
884 if (offsets == NULL)
885 {
886 printf("** Failed to get %d bytes of memory for offsets vector\n",
887 size_offsets_max * sizeof(int));
888 yield = 1;
889 goto EXIT;
890 }
891
892 /* Sort out the input and output files */
893
894 if (argc > 1)
895 {
896 infile = fopen(argv[op], INPUT_MODE);
897 if (infile == NULL)
898 {
899 printf("** Failed to open %s\n", argv[op]);
900 yield = 1;
901 goto EXIT;
902 }
903 }
904
905 if (argc > 2)
906 {
907 outfile = fopen(argv[op+1], OUTPUT_MODE);
908 if (outfile == NULL)
909 {
910 printf("** Failed to open %s\n", argv[op+1]);
911 yield = 1;
912 goto EXIT;
913 }
914 }
915
916 /* Set alternative malloc function */
917
918 pcre_malloc = new_malloc;
919 pcre_free = new_free;
920 pcre_stack_malloc = stack_malloc;
921 pcre_stack_free = stack_free;
922
923 /* Heading line unless quiet, then prompt for first regex if stdin */
924
925 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926
927 /* Main loop */
928
929 while (!done)
930 {
931 pcre *re = NULL;
932 pcre_extra *extra = NULL;
933
934 #if !defined NOPOSIX /* There are still compilers that require no indent */
935 regex_t preg;
936 int do_posix = 0;
937 #endif
938
939 const char *error;
940 unsigned char *p, *pp, *ppp;
941 unsigned char *to_file = NULL;
942 const unsigned char *tables = NULL;
943 unsigned long int true_size, true_study_size = 0;
944 size_t size, regex_gotten_store;
945 int do_study = 0;
946 int do_debug = debug;
947 int debug_lengths = 1;
948 int do_G = 0;
949 int do_g = 0;
950 int do_showinfo = showinfo;
951 int do_showrest = 0;
952 int do_flip = 0;
953 int erroroffset, len, delimiter, poffset;
954
955 use_utf8 = 0;
956
957 if (infile == stdin) printf(" re> ");
958 if (extend_inputline(infile, buffer) == NULL) break;
959 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
960 fflush(outfile);
961
962 p = buffer;
963 while (isspace(*p)) p++;
964 if (*p == 0) continue;
965
966 /* See if the pattern is to be loaded pre-compiled from a file. */
967
968 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
969 {
970 unsigned long int magic, get_options;
971 uschar sbuf[8];
972 FILE *f;
973
974 p++;
975 pp = p + (int)strlen((char *)p);
976 while (isspace(pp[-1])) pp--;
977 *pp = 0;
978
979 f = fopen((char *)p, "rb");
980 if (f == NULL)
981 {
982 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
983 continue;
984 }
985
986 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
987
988 true_size =
989 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
990 true_study_size =
991 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
992
993 re = (real_pcre *)new_malloc(true_size);
994 regex_gotten_store = gotten_store;
995
996 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
997
998 magic = ((real_pcre *)re)->magic_number;
999 if (magic != MAGIC_NUMBER)
1000 {
1001 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1002 {
1003 do_flip = 1;
1004 }
1005 else
1006 {
1007 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1008 fclose(f);
1009 continue;
1010 }
1011 }
1012
1013 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1014 do_flip? " (byte-inverted)" : "", p);
1015
1016 /* Need to know if UTF-8 for printing data strings */
1017
1018 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1019 use_utf8 = (get_options & PCRE_UTF8) != 0;
1020
1021 /* Now see if there is any following study data */
1022
1023 if (true_study_size != 0)
1024 {
1025 pcre_study_data *psd;
1026
1027 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1028 extra->flags = PCRE_EXTRA_STUDY_DATA;
1029
1030 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1031 extra->study_data = psd;
1032
1033 if (fread(psd, 1, true_study_size, f) != true_study_size)
1034 {
1035 FAIL_READ:
1036 fprintf(outfile, "Failed to read data from %s\n", p);
1037 if (extra != NULL) new_free(extra);
1038 if (re != NULL) new_free(re);
1039 fclose(f);
1040 continue;
1041 }
1042 fprintf(outfile, "Study data loaded from %s\n", p);
1043 do_study = 1; /* To get the data output if requested */
1044 }
1045 else fprintf(outfile, "No study data\n");
1046
1047 fclose(f);
1048 goto SHOW_INFO;
1049 }
1050
1051 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1052 the pattern; if is isn't complete, read more. */
1053
1054 delimiter = *p++;
1055
1056 if (isalnum(delimiter) || delimiter == '\\')
1057 {
1058 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1059 goto SKIP_DATA;
1060 }
1061
1062 pp = p;
1063 poffset = p - buffer;
1064
1065 for(;;)
1066 {
1067 while (*pp != 0)
1068 {
1069 if (*pp == '\\' && pp[1] != 0) pp++;
1070 else if (*pp == delimiter) break;
1071 pp++;
1072 }
1073 if (*pp != 0) break;
1074 if (infile == stdin) printf(" > ");
1075 if ((pp = extend_inputline(infile, pp)) == NULL)
1076 {
1077 fprintf(outfile, "** Unexpected EOF\n");
1078 done = 1;
1079 goto CONTINUE;
1080 }
1081 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1082 }
1083
1084 /* The buffer may have moved while being extended; reset the start of data
1085 pointer to the correct relative point in the buffer. */
1086
1087 p = buffer + poffset;
1088
1089 /* If the first character after the delimiter is backslash, make
1090 the pattern end with backslash. This is purely to provide a way
1091 of testing for the error message when a pattern ends with backslash. */
1092
1093 if (pp[1] == '\\') *pp++ = '\\';
1094
1095 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1096 for callouts. */
1097
1098 *pp++ = 0;
1099 strcpy((char *)pbuffer, (char *)p);
1100
1101 /* Look for options after final delimiter */
1102
1103 options = 0;
1104 study_options = 0;
1105 log_store = showstore; /* default from command line */
1106
1107 while (*pp != 0)
1108 {
1109 switch (*pp++)
1110 {
1111 case 'f': options |= PCRE_FIRSTLINE; break;
1112 case 'g': do_g = 1; break;
1113 case 'i': options |= PCRE_CASELESS; break;
1114 case 'm': options |= PCRE_MULTILINE; break;
1115 case 's': options |= PCRE_DOTALL; break;
1116 case 'x': options |= PCRE_EXTENDED; break;
1117
1118 case '+': do_showrest = 1; break;
1119 case 'A': options |= PCRE_ANCHORED; break;
1120 case 'B': do_debug = 1; break;
1121 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1122 case 'D': do_debug = do_showinfo = 1; break;
1123 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1124 case 'F': do_flip = 1; break;
1125 case 'G': do_G = 1; break;
1126 case 'I': do_showinfo = 1; break;
1127 case 'J': options |= PCRE_DUPNAMES; break;
1128 case 'M': log_store = 1; break;
1129 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1130
1131 #if !defined NOPOSIX
1132 case 'P': do_posix = 1; break;
1133 #endif
1134
1135 case 'S': do_study = 1; break;
1136 case 'U': options |= PCRE_UNGREEDY; break;
1137 case 'X': options |= PCRE_EXTRA; break;
1138 case 'Z': debug_lengths = 0;
1139 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1140 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1141
1142 case 'L':
1143 ppp = pp;
1144 /* The '\r' test here is so that it works on Windows. */
1145 /* The '0' test is just in case this is an unterminated line. */
1146 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1147 *ppp = 0;
1148 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1149 {
1150 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1151 goto SKIP_DATA;
1152 }
1153 locale_set = 1;
1154 tables = pcre_maketables();
1155 pp = ppp;
1156 break;
1157
1158 case '>':
1159 to_file = pp;
1160 while (*pp != 0) pp++;
1161 while (isspace(pp[-1])) pp--;
1162 *pp = 0;
1163 break;
1164
1165 case '<':
1166 {
1167 int x = check_newline(pp, outfile);
1168 if (x == 0) goto SKIP_DATA;
1169 options |= x;
1170 while (*pp++ != '>');
1171 }
1172 break;
1173
1174 case '\r': /* So that it works in Windows */
1175 case '\n':
1176 case ' ':
1177 break;
1178
1179 default:
1180 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1181 goto SKIP_DATA;
1182 }
1183 }
1184
1185 /* Handle compiling via the POSIX interface, which doesn't support the
1186 timing, showing, or debugging options, nor the ability to pass over
1187 local character tables. */
1188
1189 #if !defined NOPOSIX
1190 if (posix || do_posix)
1191 {
1192 int rc;
1193 int cflags = 0;
1194
1195 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1196 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1197 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1198 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1199 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1200
1201 rc = regcomp(&preg, (char *)p, cflags);
1202
1203 /* Compilation failed; go back for another re, skipping to blank line
1204 if non-interactive. */
1205
1206 if (rc != 0)
1207 {
1208 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1209 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1210 goto SKIP_DATA;
1211 }
1212 }
1213
1214 /* Handle compiling via the native interface */
1215
1216 else
1217 #endif /* !defined NOPOSIX */
1218
1219 {
1220 if (timeit > 0)
1221 {
1222 register int i;
1223 clock_t time_taken;
1224 clock_t start_time = clock();
1225 for (i = 0; i < timeit; i++)
1226 {
1227 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1228 if (re != NULL) free(re);
1229 }
1230 time_taken = clock() - start_time;
1231 fprintf(outfile, "Compile time %.4f milliseconds\n",
1232 (((double)time_taken * 1000.0) / (double)timeit) /
1233 (double)CLOCKS_PER_SEC);
1234 }
1235
1236 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1237
1238 /* Compilation failed; go back for another re, skipping to blank line
1239 if non-interactive. */
1240
1241 if (re == NULL)
1242 {
1243 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1244 SKIP_DATA:
1245 if (infile != stdin)
1246 {
1247 for (;;)
1248 {
1249 if (extend_inputline(infile, buffer) == NULL)
1250 {
1251 done = 1;
1252 goto CONTINUE;
1253 }
1254 len = (int)strlen((char *)buffer);
1255 while (len > 0 && isspace(buffer[len-1])) len--;
1256 if (len == 0) break;
1257 }
1258 fprintf(outfile, "\n");
1259 }
1260 goto CONTINUE;
1261 }
1262
1263 /* Compilation succeeded; print data if required. There are now two
1264 info-returning functions. The old one has a limited interface and
1265 returns only limited data. Check that it agrees with the newer one. */
1266
1267 if (log_store)
1268 fprintf(outfile, "Memory allocation (code space): %d\n",
1269 (int)(gotten_store -
1270 sizeof(real_pcre) -
1271 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1272
1273 /* Extract the size for possible writing before possibly flipping it,
1274 and remember the store that was got. */
1275
1276 true_size = ((real_pcre *)re)->size;
1277 regex_gotten_store = gotten_store;
1278
1279 /* If /S was present, study the regexp to generate additional info to
1280 help with the matching. */
1281
1282 if (do_study)
1283 {
1284 if (timeit > 0)
1285 {
1286 register int i;
1287 clock_t time_taken;
1288 clock_t start_time = clock();
1289 for (i = 0; i < timeit; i++)
1290 extra = pcre_study(re, study_options, &error);
1291 time_taken = clock() - start_time;
1292 if (extra != NULL) free(extra);
1293 fprintf(outfile, " Study time %.4f milliseconds\n",
1294 (((double)time_taken * 1000.0) / (double)timeit) /
1295 (double)CLOCKS_PER_SEC);
1296 }
1297 extra = pcre_study(re, study_options, &error);
1298 if (error != NULL)
1299 fprintf(outfile, "Failed to study: %s\n", error);
1300 else if (extra != NULL)
1301 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1302 }
1303
1304 /* If the 'F' option was present, we flip the bytes of all the integer
1305 fields in the regex data block and the study block. This is to make it
1306 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1307 compiled on a different architecture. */
1308
1309 if (do_flip)
1310 {
1311 real_pcre *rre = (real_pcre *)re;
1312 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1313 rre->size = byteflip(rre->size, sizeof(rre->size));
1314 rre->options = byteflip(rre->options, sizeof(rre->options));
1315 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1316 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1317 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1318 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1319 rre->name_table_offset = byteflip(rre->name_table_offset,
1320 sizeof(rre->name_table_offset));
1321 rre->name_entry_size = byteflip(rre->name_entry_size,
1322 sizeof(rre->name_entry_size));
1323 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1324
1325 if (extra != NULL)
1326 {
1327 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1328 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1329 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1330 }
1331 }
1332
1333 /* Extract information from the compiled data if required */
1334
1335 SHOW_INFO:
1336
1337 if (do_debug)
1338 {
1339 fprintf(outfile, "------------------------------------------------------------------\n");
1340 pcre_printint(re, outfile, debug_lengths);
1341 }
1342
1343 if (do_showinfo)
1344 {
1345 unsigned long int get_options, all_options;
1346 #if !defined NOINFOCHECK
1347 int old_first_char, old_options, old_count;
1348 #endif
1349 int count, backrefmax, first_char, need_char;
1350 int nameentrysize, namecount;
1351 const uschar *nametable;
1352
1353 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1354 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1355 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1356 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1357 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1358 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1359 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1360 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1361 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1362
1363 #if !defined NOINFOCHECK
1364 old_count = pcre_info(re, &old_options, &old_first_char);
1365 if (count < 0) fprintf(outfile,
1366 "Error %d from pcre_info()\n", count);
1367 else
1368 {
1369 if (old_count != count) fprintf(outfile,
1370 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1371 old_count);
1372
1373 if (old_first_char != first_char) fprintf(outfile,
1374 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1375 first_char, old_first_char);
1376
1377 if (old_options != (int)get_options) fprintf(outfile,
1378 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1379 get_options, old_options);
1380 }
1381 #endif
1382
1383 if (size != regex_gotten_store) fprintf(outfile,
1384 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1385 (int)size, (int)regex_gotten_store);
1386
1387 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1388 if (backrefmax > 0)
1389 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1390
1391 if (namecount > 0)
1392 {
1393 fprintf(outfile, "Named capturing subpatterns:\n");
1394 while (namecount-- > 0)
1395 {
1396 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1397 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1398 GET2(nametable, 0));
1399 nametable += nameentrysize;
1400 }
1401 }
1402
1403 /* The NOPARTIAL bit is a private bit in the options, so we have
1404 to fish it out via out back door */
1405
1406 all_options = ((real_pcre *)re)->options;
1407 if (do_flip)
1408 {
1409 all_options = byteflip(all_options, sizeof(all_options));
1410 }
1411
1412 if ((all_options & PCRE_NOPARTIAL) != 0)
1413 fprintf(outfile, "Partial matching not supported\n");
1414
1415 if (get_options == 0) fprintf(outfile, "No options\n");
1416 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1417 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1418 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1419 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1420 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1421 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1422 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1423 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1424 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1425 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1426 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1427 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1428 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1429 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1430
1431 switch (get_options & PCRE_NEWLINE_BITS)
1432 {
1433 case PCRE_NEWLINE_CR:
1434 fprintf(outfile, "Forced newline sequence: CR\n");
1435 break;
1436
1437 case PCRE_NEWLINE_LF:
1438 fprintf(outfile, "Forced newline sequence: LF\n");
1439 break;
1440
1441 case PCRE_NEWLINE_CRLF:
1442 fprintf(outfile, "Forced newline sequence: CRLF\n");
1443 break;
1444
1445 case PCRE_NEWLINE_ANY:
1446 fprintf(outfile, "Forced newline sequence: ANY\n");
1447 break;
1448
1449 default:
1450 break;
1451 }
1452
1453 if (first_char == -1)
1454 {
1455 fprintf(outfile, "First char at start or follows newline\n");
1456 }
1457 else if (first_char < 0)
1458 {
1459 fprintf(outfile, "No first char\n");
1460 }
1461 else
1462 {
1463 int ch = first_char & 255;
1464 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1465 "" : " (caseless)";
1466 if (PRINTHEX(ch))
1467 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1468 else
1469 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1470 }
1471
1472 if (need_char < 0)
1473 {
1474 fprintf(outfile, "No need char\n");
1475 }
1476 else
1477 {
1478 int ch = need_char & 255;
1479 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1480 "" : " (caseless)";
1481 if (PRINTHEX(ch))
1482 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1483 else
1484 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1485 }
1486
1487 /* Don't output study size; at present it is in any case a fixed
1488 value, but it varies, depending on the computer architecture, and
1489 so messes up the test suite. (And with the /F option, it might be
1490 flipped.) */
1491
1492 if (do_study)
1493 {
1494 if (extra == NULL)
1495 fprintf(outfile, "Study returned NULL\n");
1496 else
1497 {
1498 uschar *start_bits = NULL;
1499 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1500
1501 if (start_bits == NULL)
1502 fprintf(outfile, "No starting byte set\n");
1503 else
1504 {
1505 int i;
1506 int c = 24;
1507 fprintf(outfile, "Starting byte set: ");
1508 for (i = 0; i < 256; i++)
1509 {
1510 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1511 {
1512 if (c > 75)
1513 {
1514 fprintf(outfile, "\n ");
1515 c = 2;
1516 }
1517 if (PRINTHEX(i) && i != ' ')
1518 {
1519 fprintf(outfile, "%c ", i);
1520 c += 2;
1521 }
1522 else
1523 {
1524 fprintf(outfile, "\\x%02x ", i);
1525 c += 5;
1526 }
1527 }
1528 }
1529 fprintf(outfile, "\n");
1530 }
1531 }
1532 }
1533 }
1534
1535 /* If the '>' option was present, we write out the regex to a file, and
1536 that is all. The first 8 bytes of the file are the regex length and then
1537 the study length, in big-endian order. */
1538
1539 if (to_file != NULL)
1540 {
1541 FILE *f = fopen((char *)to_file, "wb");
1542 if (f == NULL)
1543 {
1544 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1545 }
1546 else
1547 {
1548 uschar sbuf[8];
1549 sbuf[0] = (true_size >> 24) & 255;
1550 sbuf[1] = (true_size >> 16) & 255;
1551 sbuf[2] = (true_size >> 8) & 255;
1552 sbuf[3] = (true_size) & 255;
1553
1554 sbuf[4] = (true_study_size >> 24) & 255;
1555 sbuf[5] = (true_study_size >> 16) & 255;
1556 sbuf[6] = (true_study_size >> 8) & 255;
1557 sbuf[7] = (true_study_size) & 255;
1558
1559 if (fwrite(sbuf, 1, 8, f) < 8 ||
1560 fwrite(re, 1, true_size, f) < true_size)
1561 {
1562 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1563 }
1564 else
1565 {
1566 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1567 if (extra != NULL)
1568 {
1569 if (fwrite(extra->study_data, 1, true_study_size, f) <
1570 true_study_size)
1571 {
1572 fprintf(outfile, "Write error on %s: %s\n", to_file,
1573 strerror(errno));
1574 }
1575 else fprintf(outfile, "Study data written to %s\n", to_file);
1576
1577 }
1578 }
1579 fclose(f);
1580 }
1581
1582 new_free(re);
1583 if (extra != NULL) new_free(extra);
1584 if (tables != NULL) new_free((void *)tables);
1585 continue; /* With next regex */
1586 }
1587 } /* End of non-POSIX compile */
1588
1589 /* Read data lines and test them */
1590
1591 for (;;)
1592 {
1593 uschar *q;
1594 uschar *bptr = dbuffer;
1595 int *use_offsets = offsets;
1596 int use_size_offsets = size_offsets;
1597 int callout_data = 0;
1598 int callout_data_set = 0;
1599 int count, c;
1600 int copystrings = 0;
1601 int find_match_limit = 0;
1602 int getstrings = 0;
1603 int getlist = 0;
1604 int gmatched = 0;
1605 int start_offset = 0;
1606 int g_notempty = 0;
1607 int use_dfa = 0;
1608
1609 options = 0;
1610
1611 *copynames = 0;
1612 *getnames = 0;
1613
1614 copynamesptr = copynames;
1615 getnamesptr = getnames;
1616
1617 pcre_callout = callout;
1618 first_callout = 1;
1619 callout_extra = 0;
1620 callout_count = 0;
1621 callout_fail_count = 999999;
1622 callout_fail_id = -1;
1623 show_malloc = 0;
1624
1625 if (extra != NULL) extra->flags &=
1626 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1627
1628 len = 0;
1629 for (;;)
1630 {
1631 if (infile == stdin) printf("data> ");
1632 if (extend_inputline(infile, buffer + len) == NULL)
1633 {
1634 if (len > 0) break;
1635 done = 1;
1636 goto CONTINUE;
1637 }
1638 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1639 len = (int)strlen((char *)buffer);
1640 if (buffer[len-1] == '\n') break;
1641 }
1642
1643 while (len > 0 && isspace(buffer[len-1])) len--;
1644 buffer[len] = 0;
1645 if (len == 0) break;
1646
1647 p = buffer;
1648 while (isspace(*p)) p++;
1649
1650 q = dbuffer;
1651 while ((c = *p++) != 0)
1652 {
1653 int i = 0;
1654 int n = 0;
1655
1656 if (c == '\\') switch ((c = *p++))
1657 {
1658 case 'a': c = 7; break;
1659 case 'b': c = '\b'; break;
1660 case 'e': c = 27; break;
1661 case 'f': c = '\f'; break;
1662 case 'n': c = '\n'; break;
1663 case 'r': c = '\r'; break;
1664 case 't': c = '\t'; break;
1665 case 'v': c = '\v'; break;
1666
1667 case '0': case '1': case '2': case '3':
1668 case '4': case '5': case '6': case '7':
1669 c -= '0';
1670 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1671 c = c * 8 + *p++ - '0';
1672
1673 #if !defined NOUTF8
1674 if (use_utf8 && c > 255)
1675 {
1676 unsigned char buff8[8];
1677 int ii, utn;
1678 utn = ord2utf8(c, buff8);
1679 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1680 c = buff8[ii]; /* Last byte */
1681 }
1682 #endif
1683 break;
1684
1685 case 'x':
1686
1687 /* Handle \x{..} specially - new Perl thing for utf8 */
1688
1689 #if !defined NOUTF8
1690 if (*p == '{')
1691 {
1692 unsigned char *pt = p;
1693 c = 0;
1694 while (isxdigit(*(++pt)))
1695 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1696 if (*pt == '}')
1697 {
1698 unsigned char buff8[8];
1699 int ii, utn;
1700 utn = ord2utf8(c, buff8);
1701 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1702 c = buff8[ii]; /* Last byte */
1703 p = pt + 1;
1704 break;
1705 }
1706 /* Not correct form; fall through */
1707 }
1708 #endif
1709
1710 /* Ordinary \x */
1711
1712 c = 0;
1713 while (i++ < 2 && isxdigit(*p))
1714 {
1715 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1716 p++;
1717 }
1718 break;
1719
1720 case 0: /* \ followed by EOF allows for an empty line */
1721 p--;
1722 continue;
1723
1724 case '>':
1725 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1726 continue;
1727
1728 case 'A': /* Option setting */
1729 options |= PCRE_ANCHORED;
1730 continue;
1731
1732 case 'B':
1733 options |= PCRE_NOTBOL;
1734 continue;
1735
1736 case 'C':
1737 if (isdigit(*p)) /* Set copy string */
1738 {
1739 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1740 copystrings |= 1 << n;
1741 }
1742 else if (isalnum(*p))
1743 {
1744 uschar *npp = copynamesptr;
1745 while (isalnum(*p)) *npp++ = *p++;
1746 *npp++ = 0;
1747 *npp = 0;
1748 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1749 if (n < 0)
1750 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1751 copynamesptr = npp;
1752 }
1753 else if (*p == '+')
1754 {
1755 callout_extra = 1;
1756 p++;
1757 }
1758 else if (*p == '-')
1759 {
1760 pcre_callout = NULL;
1761 p++;
1762 }
1763 else if (*p == '!')
1764 {
1765 callout_fail_id = 0;
1766 p++;
1767 while(isdigit(*p))
1768 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1769 callout_fail_count = 0;
1770 if (*p == '!')
1771 {
1772 p++;
1773 while(isdigit(*p))
1774 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1775 }
1776 }
1777 else if (*p == '*')
1778 {
1779 int sign = 1;
1780 callout_data = 0;
1781 if (*(++p) == '-') { sign = -1; p++; }
1782 while(isdigit(*p))
1783 callout_data = callout_data * 10 + *p++ - '0';
1784 callout_data *= sign;
1785 callout_data_set = 1;
1786 }
1787 continue;
1788
1789 #if !defined NODFA
1790 case 'D':
1791 #if !defined NOPOSIX
1792 if (posix || do_posix)
1793 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1794 else
1795 #endif
1796 use_dfa = 1;
1797 continue;
1798
1799 case 'F':
1800 options |= PCRE_DFA_SHORTEST;
1801 continue;
1802 #endif
1803
1804 case 'G':
1805 if (isdigit(*p))
1806 {
1807 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1808 getstrings |= 1 << n;
1809 }
1810 else if (isalnum(*p))
1811 {
1812 uschar *npp = getnamesptr;
1813 while (isalnum(*p)) *npp++ = *p++;
1814 *npp++ = 0;
1815 *npp = 0;
1816 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1817 if (n < 0)
1818 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1819 getnamesptr = npp;
1820 }
1821 continue;
1822
1823 case 'L':
1824 getlist = 1;
1825 continue;
1826
1827 case 'M':
1828 find_match_limit = 1;
1829 continue;
1830
1831 case 'N':
1832 options |= PCRE_NOTEMPTY;
1833 continue;
1834
1835 case 'O':
1836 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1837 if (n > size_offsets_max)
1838 {
1839 size_offsets_max = n;
1840 free(offsets);
1841 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1842 if (offsets == NULL)
1843 {
1844 printf("** Failed to get %d bytes of memory for offsets vector\n",
1845 size_offsets_max * sizeof(int));
1846 yield = 1;
1847 goto EXIT;
1848 }
1849 }
1850 use_size_offsets = n;
1851 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1852 continue;
1853
1854 case 'P':
1855 options |= PCRE_PARTIAL;
1856 continue;
1857
1858 case 'Q':
1859 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1860 if (extra == NULL)
1861 {
1862 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1863 extra->flags = 0;
1864 }
1865 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1866 extra->match_limit_recursion = n;
1867 continue;
1868
1869 case 'q':
1870 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871 if (extra == NULL)
1872 {
1873 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874 extra->flags = 0;
1875 }
1876 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1877 extra->match_limit = n;
1878 continue;
1879
1880 #if !defined NODFA
1881 case 'R':
1882 options |= PCRE_DFA_RESTART;
1883 continue;
1884 #endif
1885
1886 case 'S':
1887 show_malloc = 1;
1888 continue;
1889
1890 case 'Z':
1891 options |= PCRE_NOTEOL;
1892 continue;
1893
1894 case '?':
1895 options |= PCRE_NO_UTF8_CHECK;
1896 continue;
1897
1898 case '<':
1899 {
1900 int x = check_newline(p, outfile);
1901 if (x == 0) goto NEXT_DATA;
1902 options |= x;
1903 while (*p++ != '>');
1904 }
1905 continue;
1906 }
1907 *q++ = c;
1908 }
1909 *q = 0;
1910 len = q - dbuffer;
1911
1912 if ((all_use_dfa || use_dfa) && find_match_limit)
1913 {
1914 printf("**Match limit not relevant for DFA matching: ignored\n");
1915 find_match_limit = 0;
1916 }
1917
1918 /* Handle matching via the POSIX interface, which does not
1919 support timing or playing with the match limit or callout data. */
1920
1921 #if !defined NOPOSIX
1922 if (posix || do_posix)
1923 {
1924 int rc;
1925 int eflags = 0;
1926 regmatch_t *pmatch = NULL;
1927 if (use_size_offsets > 0)
1928 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1929 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1930 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1931
1932 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1933
1934 if (rc != 0)
1935 {
1936 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1937 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1938 }
1939 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1940 != 0)
1941 {
1942 fprintf(outfile, "Matched with REG_NOSUB\n");
1943 }
1944 else
1945 {
1946 size_t i;
1947 for (i = 0; i < (size_t)use_size_offsets; i++)
1948 {
1949 if (pmatch[i].rm_so >= 0)
1950 {
1951 fprintf(outfile, "%2d: ", (int)i);
1952 (void)pchars(dbuffer + pmatch[i].rm_so,
1953 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1954 fprintf(outfile, "\n");
1955 if (i == 0 && do_showrest)
1956 {
1957 fprintf(outfile, " 0+ ");
1958 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1959 outfile);
1960 fprintf(outfile, "\n");
1961 }
1962 }
1963 }
1964 }
1965 free(pmatch);
1966 }
1967
1968 /* Handle matching via the native interface - repeats for /g and /G */
1969
1970 else
1971 #endif /* !defined NOPOSIX */
1972
1973 for (;; gmatched++) /* Loop for /g or /G */
1974 {
1975 if (timeitm > 0)
1976 {
1977 register int i;
1978 clock_t time_taken;
1979 clock_t start_time = clock();
1980
1981 #if !defined NODFA
1982 if (all_use_dfa || use_dfa)
1983 {
1984 int workspace[1000];
1985 for (i = 0; i < timeitm; i++)
1986 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1987 options | g_notempty, use_offsets, use_size_offsets, workspace,
1988 sizeof(workspace)/sizeof(int));
1989 }
1990 else
1991 #endif
1992
1993 for (i = 0; i < timeitm; i++)
1994 count = pcre_exec(re, extra, (char *)bptr, len,
1995 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1996
1997 time_taken = clock() - start_time;
1998 fprintf(outfile, "Execute time %.4f milliseconds\n",
1999 (((double)time_taken * 1000.0) / (double)timeitm) /
2000 (double)CLOCKS_PER_SEC);
2001 }
2002
2003 /* If find_match_limit is set, we want to do repeated matches with
2004 varying limits in order to find the minimum value for the match limit and
2005 for the recursion limit. */
2006
2007 if (find_match_limit)
2008 {
2009 if (extra == NULL)
2010 {
2011 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2012 extra->flags = 0;
2013 }
2014
2015 (void)check_match_limit(re, extra, bptr, len, start_offset,
2016 options|g_notempty, use_offsets, use_size_offsets,
2017 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2018 PCRE_ERROR_MATCHLIMIT, "match()");
2019
2020 count = check_match_limit(re, extra, bptr, len, start_offset,
2021 options|g_notempty, use_offsets, use_size_offsets,
2022 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2023 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2024 }
2025
2026 /* If callout_data is set, use the interface with additional data */
2027
2028 else if (callout_data_set)
2029 {
2030 if (extra == NULL)
2031 {
2032 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2033 extra->flags = 0;
2034 }
2035 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2036 extra->callout_data = &callout_data;
2037 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2038 options | g_notempty, use_offsets, use_size_offsets);
2039 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2040 }
2041
2042 /* The normal case is just to do the match once, with the default
2043 value of match_limit. */
2044
2045 #if !defined NODFA
2046 else if (all_use_dfa || use_dfa)
2047 {
2048 int workspace[1000];
2049 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2050 options | g_notempty, use_offsets, use_size_offsets, workspace,
2051 sizeof(workspace)/sizeof(int));
2052 if (count == 0)
2053 {
2054 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2055 count = use_size_offsets/2;
2056 }
2057 }
2058 #endif
2059
2060 else
2061 {
2062 count = pcre_exec(re, extra, (char *)bptr, len,
2063 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2064 if (count == 0)
2065 {
2066 fprintf(outfile, "Matched, but too many substrings\n");
2067 count = use_size_offsets/3;
2068 }
2069 }
2070
2071 /* Matched */
2072
2073 if (count >= 0)
2074 {
2075 int i, maxcount;
2076
2077 #if !defined NODFA
2078 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2079 #endif
2080 maxcount = use_size_offsets/3;
2081
2082 /* This is a check against a lunatic return value. */
2083
2084 if (count > maxcount)
2085 {
2086 fprintf(outfile,
2087 "** PCRE error: returned count %d is too big for offset size %d\n",
2088 count, use_size_offsets);
2089 count = use_size_offsets/3;
2090 if (do_g || do_G)
2091 {
2092 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2093 do_g = do_G = FALSE; /* Break g/G loop */
2094 }
2095 }
2096
2097 for (i = 0; i < count * 2; i += 2)
2098 {
2099 if (use_offsets[i] < 0)
2100 fprintf(outfile, "%2d: <unset>\n", i/2);
2101 else
2102 {
2103 fprintf(outfile, "%2d: ", i/2);
2104 (void)pchars(bptr + use_offsets[i],
2105 use_offsets[i+1] - use_offsets[i], outfile);
2106 fprintf(outfile, "\n");
2107 if (i == 0)
2108 {
2109 if (do_showrest)
2110 {
2111 fprintf(outfile, " 0+ ");
2112 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2113 outfile);
2114 fprintf(outfile, "\n");
2115 }
2116 }
2117 }
2118 }
2119
2120 for (i = 0; i < 32; i++)
2121 {
2122 if ((copystrings & (1 << i)) != 0)
2123 {
2124 char copybuffer[256];
2125 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2126 i, copybuffer, sizeof(copybuffer));
2127 if (rc < 0)
2128 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2129 else
2130 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2131 }
2132 }
2133
2134 for (copynamesptr = copynames;
2135 *copynamesptr != 0;
2136 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2137 {
2138 char copybuffer[256];
2139 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2140 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2141 if (rc < 0)
2142 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2143 else
2144 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2145 }
2146
2147 for (i = 0; i < 32; i++)
2148 {
2149 if ((getstrings & (1 << i)) != 0)
2150 {
2151 const char *substring;
2152 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2153 i, &substring);
2154 if (rc < 0)
2155 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2156 else
2157 {
2158 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2159 pcre_free_substring(substring);
2160 }
2161 }
2162 }
2163
2164 for (getnamesptr = getnames;
2165 *getnamesptr != 0;
2166 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2167 {
2168 const char *substring;
2169 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2170 count, (char *)getnamesptr, &substring);
2171 if (rc < 0)
2172 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2173 else
2174 {
2175 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2176 pcre_free_substring(substring);
2177 }
2178 }
2179
2180 if (getlist)
2181 {
2182 const char **stringlist;
2183 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2184 &stringlist);
2185 if (rc < 0)
2186 fprintf(outfile, "get substring list failed %d\n", rc);
2187 else
2188 {
2189 for (i = 0; i < count; i++)
2190 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2191 if (stringlist[i] != NULL)
2192 fprintf(outfile, "string list not terminated by NULL\n");
2193 /* free((void *)stringlist); */
2194 pcre_free_substring_list(stringlist);
2195 }
2196 }
2197 }
2198
2199 /* There was a partial match */
2200
2201 else if (count == PCRE_ERROR_PARTIAL)
2202 {
2203 fprintf(outfile, "Partial match");
2204 #if !defined NODFA
2205 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2206 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2207 bptr + use_offsets[0]);
2208 #endif
2209 fprintf(outfile, "\n");
2210 break; /* Out of the /g loop */
2211 }
2212
2213 /* Failed to match. If this is a /g or /G loop and we previously set
2214 g_notempty after a null match, this is not necessarily the end.
2215 We want to advance the start offset, and continue. In the case of UTF-8
2216 matching, the advance must be one character, not one byte. Fudge the
2217 offset values to achieve this. We won't be at the end of the string -
2218 that was checked before setting g_notempty. */
2219
2220 else
2221 {
2222 if (g_notempty != 0)
2223 {
2224 int onechar = 1;
2225 use_offsets[0] = start_offset;
2226 if (use_utf8)
2227 {
2228 while (start_offset + onechar < len)
2229 {
2230 int tb = bptr[start_offset+onechar];
2231 if (tb <= 127) break;
2232 tb &= 0xc0;
2233 if (tb != 0 && tb != 0xc0) onechar++;
2234 }
2235 }
2236 use_offsets[1] = start_offset + onechar;
2237 }
2238 else
2239 {
2240 if (count == PCRE_ERROR_NOMATCH)
2241 {
2242 if (gmatched == 0) fprintf(outfile, "No match\n");
2243 }
2244 else fprintf(outfile, "Error %d\n", count);
2245 break; /* Out of the /g loop */
2246 }
2247 }
2248
2249 /* If not /g or /G we are done */
2250
2251 if (!do_g && !do_G) break;
2252
2253 /* If we have matched an empty string, first check to see if we are at
2254 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2255 what Perl's /g options does. This turns out to be rather cunning. First
2256 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2257 same point. If this fails (picked up above) we advance to the next
2258 character. */
2259
2260 g_notempty = 0;
2261 if (use_offsets[0] == use_offsets[1])
2262 {
2263 if (use_offsets[0] == len) break;
2264 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2265 }
2266
2267 /* For /g, update the start offset, leaving the rest alone */
2268
2269 if (do_g) start_offset = use_offsets[1];
2270
2271 /* For /G, update the pointer and length */
2272
2273 else
2274 {
2275 bptr += use_offsets[1];
2276 len -= use_offsets[1];
2277 }
2278 } /* End of loop for /g and /G */
2279
2280 NEXT_DATA: continue;
2281 } /* End of loop for data lines */
2282
2283 CONTINUE:
2284
2285 #if !defined NOPOSIX
2286 if (posix || do_posix) regfree(&preg);
2287 #endif
2288
2289 if (re != NULL) new_free(re);
2290 if (extra != NULL) new_free(extra);
2291 if (tables != NULL)
2292 {
2293 new_free((void *)tables);
2294 setlocale(LC_CTYPE, "C");
2295 locale_set = 0;
2296 }
2297 }
2298
2299 if (infile == stdin) fprintf(outfile, "\n");
2300
2301 EXIT:
2302
2303 if (infile != NULL && infile != stdin) fclose(infile);
2304 if (outfile != NULL && outfile != stdout) fclose(outfile);
2305
2306 free(buffer);
2307 free(dbuffer);
2308 free(pbuffer);
2309 free(offsets);
2310
2311 return yield;
2312 }
2313
2314 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12