/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 96 - (show annotations) (download)
Fri Mar 2 13:10:43 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 66834 byte(s)
 r6896@hex:  nm | 2007-03-02 13:09:14 +0000
 Added EOL and keywork properties throughout

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47
48 /* A number of things vary for Windows builds. Originally, pcretest opened its
49 input and output without "b"; then I was told that "b" was needed in some
50 environments, so it was added for release 5.0 to both the input and output. (It
51 makes no difference on Unix-like systems.) Later I was told that it is wrong
52 for the input on Windows. I've now abstracted the modes into two macros that
53 are set here, to make it easier to fiddle with them, and removed "b" from the
54 input mode under Windows. */
55
56 #if defined(_WIN32) || defined(WIN32)
57 #include <io.h> /* For _setmode() */
58 #include <fcntl.h> /* For _O_BINARY */
59 #define INPUT_MODE "r"
60 #define OUTPUT_MODE "wb"
61
62 #else
63 #include <sys/time.h> /* These two includes are needed */
64 #include <sys/resource.h> /* for setrlimit(). */
65 #define INPUT_MODE "rb"
66 #define OUTPUT_MODE "wb"
67 #endif
68
69
70 #define PCRE_SPY /* For Win32 build, import data, not export */
71
72 /* We include pcre_internal.h because we need the internal info for displaying
73 the results of pcre_study() and we also need to know about the internal
74 macros, structures, and other internal data values; pcretest has "inside
75 information" compared to a program that strictly follows the PCRE API. */
76
77 #include "pcre_internal.h"
78
79 /* We need access to the data tables that PCRE uses. So as not to have to keep
80 two copies, we include the source file here, changing the names of the external
81 symbols to prevent clashes. */
82
83 #define _pcre_utf8_table1 utf8_table1
84 #define _pcre_utf8_table1_size utf8_table1_size
85 #define _pcre_utf8_table2 utf8_table2
86 #define _pcre_utf8_table3 utf8_table3
87 #define _pcre_utf8_table4 utf8_table4
88 #define _pcre_utt utt
89 #define _pcre_utt_size utt_size
90 #define _pcre_OP_lengths OP_lengths
91
92 #include "pcre_tables.c"
93
94 /* We also need the pcre_printint() function for printing out compiled
95 patterns. This function is in a separate file so that it can be included in
96 pcre_compile.c when that module is compiled with debugging enabled.
97
98 The definition of the macro PRINTABLE, which determines whether to print an
99 output character as-is or as a hex value when showing compiled patterns, is
100 contained in this file. We uses it here also, in cases when the locale has not
101 been explicitly changed, so as to get consistent output from systems that
102 differ in their output from isprint() even in the "C" locale. */
103
104 #include "pcre_printint.src"
105
106 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107
108
109 /* It is possible to compile this test program without including support for
110 testing the POSIX interface, though this is not available via the standard
111 Makefile. */
112
113 #if !defined NOPOSIX
114 #include "pcreposix.h"
115 #endif
116
117 /* It is also possible, for the benefit of the version imported into Exim, to
118 build pcretest without support for UTF8 (define NOUTF8), without the interface
119 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
120 function (define NOINFOCHECK). */
121
122
123 /* Other parameters */
124
125 #ifndef CLOCKS_PER_SEC
126 #ifdef CLK_TCK
127 #define CLOCKS_PER_SEC CLK_TCK
128 #else
129 #define CLOCKS_PER_SEC 100
130 #endif
131 #endif
132
133 /* This is the default loop count for timing. */
134
135 #define LOOPREPEAT 500000
136
137 /* Static variables */
138
139 static FILE *outfile;
140 static int log_store = 0;
141 static int callout_count;
142 static int callout_extra;
143 static int callout_fail_count;
144 static int callout_fail_id;
145 static int first_callout;
146 static int locale_set = 0;
147 static int show_malloc;
148 static int use_utf8;
149 static size_t gotten_store;
150
151 /* The buffers grow automatically if very long input lines are encountered. */
152
153 static int buffer_size = 50000;
154 static uschar *buffer = NULL;
155 static uschar *dbuffer = NULL;
156 static uschar *pbuffer = NULL;
157
158
159
160 /*************************************************
161 * Read or extend an input line *
162 *************************************************/
163
164 /* Input lines are read into buffer, but both patterns and data lines can be
165 continued over multiple input lines. In addition, if the buffer fills up, we
166 want to automatically expand it so as to be able to handle extremely large
167 lines that are needed for certain stress tests. When the input buffer is
168 expanded, the other two buffers must also be expanded likewise, and the
169 contents of pbuffer, which are a copy of the input for callouts, must be
170 preserved (for when expansion happens for a data line). This is not the most
171 optimal way of handling this, but hey, this is just a test program!
172
173 Arguments:
174 f the file to read
175 start where in buffer to start (this *must* be within buffer)
176
177 Returns: pointer to the start of new data
178 could be a copy of start, or could be moved
179 NULL if no data read and EOF reached
180 */
181
182 static uschar *
183 extend_inputline(FILE *f, uschar *start)
184 {
185 uschar *here = start;
186
187 for (;;)
188 {
189 int rlen = buffer_size - (here - buffer);
190
191 if (rlen > 1000)
192 {
193 int dlen;
194 if (fgets((char *)here, rlen, f) == NULL)
195 return (here == start)? NULL : start;
196 dlen = (int)strlen((char *)here);
197 if (dlen > 0 && here[dlen - 1] == '\n') return start;
198 here += dlen;
199 }
200
201 else
202 {
203 int new_buffer_size = 2*buffer_size;
204 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
205 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
206 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
207
208 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
209 {
210 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
211 exit(1);
212 }
213
214 memcpy(new_buffer, buffer, buffer_size);
215 memcpy(new_pbuffer, pbuffer, buffer_size);
216
217 buffer_size = new_buffer_size;
218
219 start = new_buffer + (start - buffer);
220 here = new_buffer + (here - buffer);
221
222 free(buffer);
223 free(dbuffer);
224 free(pbuffer);
225
226 buffer = new_buffer;
227 dbuffer = new_dbuffer;
228 pbuffer = new_pbuffer;
229 }
230 }
231
232 return NULL; /* Control never gets here */
233 }
234
235
236
237
238
239
240
241 /*************************************************
242 * Read number from string *
243 *************************************************/
244
245 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
246 around with conditional compilation, just do the job by hand. It is only used
247 for unpicking arguments, so just keep it simple.
248
249 Arguments:
250 str string to be converted
251 endptr where to put the end pointer
252
253 Returns: the unsigned long
254 */
255
256 static int
257 get_value(unsigned char *str, unsigned char **endptr)
258 {
259 int result = 0;
260 while(*str != 0 && isspace(*str)) str++;
261 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
262 *endptr = str;
263 return(result);
264 }
265
266
267
268
269 /*************************************************
270 * Convert UTF-8 string to value *
271 *************************************************/
272
273 /* This function takes one or more bytes that represents a UTF-8 character,
274 and returns the value of the character.
275
276 Argument:
277 utf8bytes a pointer to the byte vector
278 vptr a pointer to an int to receive the value
279
280 Returns: > 0 => the number of bytes consumed
281 -6 to 0 => malformed UTF-8 character at offset = (-return)
282 */
283
284 #if !defined NOUTF8
285
286 static int
287 utf82ord(unsigned char *utf8bytes, int *vptr)
288 {
289 int c = *utf8bytes++;
290 int d = c;
291 int i, j, s;
292
293 for (i = -1; i < 6; i++) /* i is number of additional bytes */
294 {
295 if ((d & 0x80) == 0) break;
296 d <<= 1;
297 }
298
299 if (i == -1) { *vptr = c; return 1; } /* ascii character */
300 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
301
302 /* i now has a value in the range 1-5 */
303
304 s = 6*i;
305 d = (c & utf8_table3[i]) << s;
306
307 for (j = 0; j < i; j++)
308 {
309 c = *utf8bytes++;
310 if ((c & 0xc0) != 0x80) return -(j+1);
311 s -= 6;
312 d |= (c & 0x3f) << s;
313 }
314
315 /* Check that encoding was the correct unique one */
316
317 for (j = 0; j < utf8_table1_size; j++)
318 if (d <= utf8_table1[j]) break;
319 if (j != i) return -(i+1);
320
321 /* Valid value */
322
323 *vptr = d;
324 return i+1;
325 }
326
327 #endif
328
329
330
331 /*************************************************
332 * Convert character value to UTF-8 *
333 *************************************************/
334
335 /* This function takes an integer value in the range 0 - 0x7fffffff
336 and encodes it as a UTF-8 character in 0 to 6 bytes.
337
338 Arguments:
339 cvalue the character value
340 utf8bytes pointer to buffer for result - at least 6 bytes long
341
342 Returns: number of characters placed in the buffer
343 */
344
345 #if !defined NOUTF8
346
347 static int
348 ord2utf8(int cvalue, uschar *utf8bytes)
349 {
350 register int i, j;
351 for (i = 0; i < utf8_table1_size; i++)
352 if (cvalue <= utf8_table1[i]) break;
353 utf8bytes += i;
354 for (j = i; j > 0; j--)
355 {
356 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
357 cvalue >>= 6;
358 }
359 *utf8bytes = utf8_table2[i] | cvalue;
360 return i + 1;
361 }
362
363 #endif
364
365
366
367 /*************************************************
368 * Print character string *
369 *************************************************/
370
371 /* Character string printing function. Must handle UTF-8 strings in utf8
372 mode. Yields number of characters printed. If handed a NULL file, just counts
373 chars without printing. */
374
375 static int pchars(unsigned char *p, int length, FILE *f)
376 {
377 int c = 0;
378 int yield = 0;
379
380 while (length-- > 0)
381 {
382 #if !defined NOUTF8
383 if (use_utf8)
384 {
385 int rc = utf82ord(p, &c);
386
387 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
388 {
389 length -= rc - 1;
390 p += rc;
391 if (PRINTHEX(c))
392 {
393 if (f != NULL) fprintf(f, "%c", c);
394 yield++;
395 }
396 else
397 {
398 int n = 4;
399 if (f != NULL) fprintf(f, "\\x{%02x}", c);
400 yield += (n <= 0x000000ff)? 2 :
401 (n <= 0x00000fff)? 3 :
402 (n <= 0x0000ffff)? 4 :
403 (n <= 0x000fffff)? 5 : 6;
404 }
405 continue;
406 }
407 }
408 #endif
409
410 /* Not UTF-8, or malformed UTF-8 */
411
412 c = *p++;
413 if (PRINTHEX(c))
414 {
415 if (f != NULL) fprintf(f, "%c", c);
416 yield++;
417 }
418 else
419 {
420 if (f != NULL) fprintf(f, "\\x%02x", c);
421 yield += 4;
422 }
423 }
424
425 return yield;
426 }
427
428
429
430 /*************************************************
431 * Callout function *
432 *************************************************/
433
434 /* Called from PCRE as a result of the (?C) item. We print out where we are in
435 the match. Yield zero unless more callouts than the fail count, or the callout
436 data is not zero. */
437
438 static int callout(pcre_callout_block *cb)
439 {
440 FILE *f = (first_callout | callout_extra)? outfile : NULL;
441 int i, pre_start, post_start, subject_length;
442
443 if (callout_extra)
444 {
445 fprintf(f, "Callout %d: last capture = %d\n",
446 cb->callout_number, cb->capture_last);
447
448 for (i = 0; i < cb->capture_top * 2; i += 2)
449 {
450 if (cb->offset_vector[i] < 0)
451 fprintf(f, "%2d: <unset>\n", i/2);
452 else
453 {
454 fprintf(f, "%2d: ", i/2);
455 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
456 cb->offset_vector[i+1] - cb->offset_vector[i], f);
457 fprintf(f, "\n");
458 }
459 }
460 }
461
462 /* Re-print the subject in canonical form, the first time or if giving full
463 datails. On subsequent calls in the same match, we use pchars just to find the
464 printed lengths of the substrings. */
465
466 if (f != NULL) fprintf(f, "--->");
467
468 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
469 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
470 cb->current_position - cb->start_match, f);
471
472 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
473
474 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
475 cb->subject_length - cb->current_position, f);
476
477 if (f != NULL) fprintf(f, "\n");
478
479 /* Always print appropriate indicators, with callout number if not already
480 shown. For automatic callouts, show the pattern offset. */
481
482 if (cb->callout_number == 255)
483 {
484 fprintf(outfile, "%+3d ", cb->pattern_position);
485 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
486 }
487 else
488 {
489 if (callout_extra) fprintf(outfile, " ");
490 else fprintf(outfile, "%3d ", cb->callout_number);
491 }
492
493 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
494 fprintf(outfile, "^");
495
496 if (post_start > 0)
497 {
498 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
499 fprintf(outfile, "^");
500 }
501
502 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
503 fprintf(outfile, " ");
504
505 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
506 pbuffer + cb->pattern_position);
507
508 fprintf(outfile, "\n");
509 first_callout = 0;
510
511 if (cb->callout_data != NULL)
512 {
513 int callout_data = *((int *)(cb->callout_data));
514 if (callout_data != 0)
515 {
516 fprintf(outfile, "Callout data = %d\n", callout_data);
517 return callout_data;
518 }
519 }
520
521 return (cb->callout_number != callout_fail_id)? 0 :
522 (++callout_count >= callout_fail_count)? 1 : 0;
523 }
524
525
526 /*************************************************
527 * Local malloc functions *
528 *************************************************/
529
530 /* Alternative malloc function, to test functionality and show the size of the
531 compiled re. */
532
533 static void *new_malloc(size_t size)
534 {
535 void *block = malloc(size);
536 gotten_store = size;
537 if (show_malloc)
538 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
539 return block;
540 }
541
542 static void new_free(void *block)
543 {
544 if (show_malloc)
545 fprintf(outfile, "free %p\n", block);
546 free(block);
547 }
548
549
550 /* For recursion malloc/free, to test stacking calls */
551
552 static void *stack_malloc(size_t size)
553 {
554 void *block = malloc(size);
555 if (show_malloc)
556 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
557 return block;
558 }
559
560 static void stack_free(void *block)
561 {
562 if (show_malloc)
563 fprintf(outfile, "stack_free %p\n", block);
564 free(block);
565 }
566
567
568 /*************************************************
569 * Call pcre_fullinfo() *
570 *************************************************/
571
572 /* Get one piece of information from the pcre_fullinfo() function */
573
574 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
575 {
576 int rc;
577 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
578 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
579 }
580
581
582
583 /*************************************************
584 * Byte flipping function *
585 *************************************************/
586
587 static unsigned long int
588 byteflip(unsigned long int value, int n)
589 {
590 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
591 return ((value & 0x000000ff) << 24) |
592 ((value & 0x0000ff00) << 8) |
593 ((value & 0x00ff0000) >> 8) |
594 ((value & 0xff000000) >> 24);
595 }
596
597
598
599
600 /*************************************************
601 * Check match or recursion limit *
602 *************************************************/
603
604 static int
605 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
606 int start_offset, int options, int *use_offsets, int use_size_offsets,
607 int flag, unsigned long int *limit, int errnumber, const char *msg)
608 {
609 int count;
610 int min = 0;
611 int mid = 64;
612 int max = -1;
613
614 extra->flags |= flag;
615
616 for (;;)
617 {
618 *limit = mid;
619
620 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
621 use_offsets, use_size_offsets);
622
623 if (count == errnumber)
624 {
625 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
626 min = mid;
627 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
628 }
629
630 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
631 count == PCRE_ERROR_PARTIAL)
632 {
633 if (mid == min + 1)
634 {
635 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
636 break;
637 }
638 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
639 max = mid;
640 mid = (min + mid)/2;
641 }
642 else break; /* Some other error */
643 }
644
645 extra->flags &= ~flag;
646 return count;
647 }
648
649
650
651 /*************************************************
652 * Check newline indicator *
653 *************************************************/
654
655 /* This is used both at compile and run-time to check for <xxx> escapes, where
656 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
657
658 Arguments:
659 p points after the leading '<'
660 f file for error message
661
662 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
663 */
664
665 static int
666 check_newline(uschar *p, FILE *f)
667 {
668 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
669 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
670 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
671 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
672 fprintf(f, "Unknown newline type at: <%s\n", p);
673 return 0;
674 }
675
676
677
678 /*************************************************
679 * Usage function *
680 *************************************************/
681
682 static void
683 usage(void)
684 {
685 printf("Usage: pcretest [options] [<input> [<output>]]\n");
686 printf(" -b show compiled code (bytecode)\n");
687 printf(" -C show PCRE compile-time options and exit\n");
688 printf(" -d debug: show compiled code and information (-b and -i)\n");
689 #if !defined NODFA
690 printf(" -dfa force DFA matching for all subjects\n");
691 #endif
692 printf(" -help show usage information\n");
693 printf(" -i show information about compiled patterns\n"
694 " -m output memory used information\n"
695 " -o <n> set size of offsets vector to <n>\n");
696 #if !defined NOPOSIX
697 printf(" -p use POSIX interface\n");
698 #endif
699 printf(" -q quiet: do not output PCRE version number at start\n");
700 printf(" -S <n> set stack size to <n> megabytes\n");
701 printf(" -s output store (memory) used information\n"
702 " -t time compilation and execution\n");
703 printf(" -t <n> time compilation and execution, repeating <n> times\n");
704 printf(" -tm time execution (matching) only\n");
705 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
706 }
707
708
709
710 /*************************************************
711 * Main Program *
712 *************************************************/
713
714 /* Read lines from named file or stdin and write to named file or stdout; lines
715 consist of a regular expression, in delimiters and optionally followed by
716 options, followed by a set of test data, terminated by an empty line. */
717
718 int main(int argc, char **argv)
719 {
720 FILE *infile = stdin;
721 int options = 0;
722 int study_options = 0;
723 int op = 1;
724 int timeit = 0;
725 int timeitm = 0;
726 int showinfo = 0;
727 int showstore = 0;
728 int quiet = 0;
729 int size_offsets = 45;
730 int size_offsets_max;
731 int *offsets = NULL;
732 #if !defined NOPOSIX
733 int posix = 0;
734 #endif
735 int debug = 0;
736 int done = 0;
737 int all_use_dfa = 0;
738 int yield = 0;
739 int stack_size;
740
741 /* These vectors store, end-to-end, a list of captured substring names. Assume
742 that 1024 is plenty long enough for the few names we'll be testing. */
743
744 uschar copynames[1024];
745 uschar getnames[1024];
746
747 uschar *copynamesptr;
748 uschar *getnamesptr;
749
750 /* Get buffers from malloc() so that Electric Fence will check their misuse
751 when I am debugging. They grow automatically when very long lines are read. */
752
753 buffer = (unsigned char *)malloc(buffer_size);
754 dbuffer = (unsigned char *)malloc(buffer_size);
755 pbuffer = (unsigned char *)malloc(buffer_size);
756
757 /* The outfile variable is static so that new_malloc can use it. */
758
759 outfile = stdout;
760
761 /* The following _setmode() stuff is some Windows magic that tells its runtime
762 library to translate CRLF into a single LF character. At least, that's what
763 I've been told: never having used Windows I take this all on trust. Originally
764 it set 0x8000, but then I was advised that _O_BINARY was better. */
765
766 #if defined(_WIN32) || defined(WIN32)
767 _setmode( _fileno( stdout ), _O_BINARY );
768 #endif
769
770 /* Scan options */
771
772 while (argc > 1 && argv[op][0] == '-')
773 {
774 unsigned char *endptr;
775
776 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
777 showstore = 1;
778 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
779 else if (strcmp(argv[op], "-b") == 0) debug = 1;
780 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
781 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
782 #if !defined NODFA
783 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
784 #endif
785 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
786 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
787 *endptr == 0))
788 {
789 op++;
790 argc--;
791 }
792 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
793 {
794 int both = argv[op][2] == 0;
795 int temp;
796 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
797 *endptr == 0))
798 {
799 timeitm = temp;
800 op++;
801 argc--;
802 }
803 else timeitm = LOOPREPEAT;
804 if (both) timeit = timeitm;
805 }
806 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
807 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
808 *endptr == 0))
809 {
810 #if defined(_WIN32) || defined(WIN32)
811 printf("PCRE: -S not supported on this OS\n");
812 exit(1);
813 #else
814 int rc;
815 struct rlimit rlim;
816 getrlimit(RLIMIT_STACK, &rlim);
817 rlim.rlim_cur = stack_size * 1024 * 1024;
818 rc = setrlimit(RLIMIT_STACK, &rlim);
819 if (rc != 0)
820 {
821 printf("PCRE: setrlimit() failed with error %d\n", rc);
822 exit(1);
823 }
824 op++;
825 argc--;
826 #endif
827 }
828 #if !defined NOPOSIX
829 else if (strcmp(argv[op], "-p") == 0) posix = 1;
830 #endif
831 else if (strcmp(argv[op], "-C") == 0)
832 {
833 int rc;
834 printf("PCRE version %s\n", pcre_version());
835 printf("Compiled with\n");
836 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
837 printf(" %sUTF-8 support\n", rc? "" : "No ");
838 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
839 printf(" %sUnicode properties support\n", rc? "" : "No ");
840 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
841 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
842 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
843 (rc == -1)? "ANY" : "???");
844 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
845 printf(" Internal link size = %d\n", rc);
846 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
847 printf(" POSIX malloc threshold = %d\n", rc);
848 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
849 printf(" Default match limit = %d\n", rc);
850 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
851 printf(" Default recursion depth limit = %d\n", rc);
852 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
853 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
854 exit(0);
855 }
856 else if (strcmp(argv[op], "-help") == 0 ||
857 strcmp(argv[op], "--help") == 0)
858 {
859 usage();
860 goto EXIT;
861 }
862 else
863 {
864 printf("** Unknown or malformed option %s\n", argv[op]);
865 usage();
866 yield = 1;
867 goto EXIT;
868 }
869 op++;
870 argc--;
871 }
872
873 /* Get the store for the offsets vector, and remember what it was */
874
875 size_offsets_max = size_offsets;
876 offsets = (int *)malloc(size_offsets_max * sizeof(int));
877 if (offsets == NULL)
878 {
879 printf("** Failed to get %d bytes of memory for offsets vector\n",
880 size_offsets_max * sizeof(int));
881 yield = 1;
882 goto EXIT;
883 }
884
885 /* Sort out the input and output files */
886
887 if (argc > 1)
888 {
889 infile = fopen(argv[op], INPUT_MODE);
890 if (infile == NULL)
891 {
892 printf("** Failed to open %s\n", argv[op]);
893 yield = 1;
894 goto EXIT;
895 }
896 }
897
898 if (argc > 2)
899 {
900 outfile = fopen(argv[op+1], OUTPUT_MODE);
901 if (outfile == NULL)
902 {
903 printf("** Failed to open %s\n", argv[op+1]);
904 yield = 1;
905 goto EXIT;
906 }
907 }
908
909 /* Set alternative malloc function */
910
911 pcre_malloc = new_malloc;
912 pcre_free = new_free;
913 pcre_stack_malloc = stack_malloc;
914 pcre_stack_free = stack_free;
915
916 /* Heading line unless quiet, then prompt for first regex if stdin */
917
918 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
919
920 /* Main loop */
921
922 while (!done)
923 {
924 pcre *re = NULL;
925 pcre_extra *extra = NULL;
926
927 #if !defined NOPOSIX /* There are still compilers that require no indent */
928 regex_t preg;
929 int do_posix = 0;
930 #endif
931
932 const char *error;
933 unsigned char *p, *pp, *ppp;
934 unsigned char *to_file = NULL;
935 const unsigned char *tables = NULL;
936 unsigned long int true_size, true_study_size = 0;
937 size_t size, regex_gotten_store;
938 int do_study = 0;
939 int do_debug = debug;
940 int do_G = 0;
941 int do_g = 0;
942 int do_showinfo = showinfo;
943 int do_showrest = 0;
944 int do_flip = 0;
945 int erroroffset, len, delimiter, poffset;
946
947 use_utf8 = 0;
948
949 if (infile == stdin) printf(" re> ");
950 if (extend_inputline(infile, buffer) == NULL) break;
951 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
952 fflush(outfile);
953
954 p = buffer;
955 while (isspace(*p)) p++;
956 if (*p == 0) continue;
957
958 /* See if the pattern is to be loaded pre-compiled from a file. */
959
960 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
961 {
962 unsigned long int magic, get_options;
963 uschar sbuf[8];
964 FILE *f;
965
966 p++;
967 pp = p + (int)strlen((char *)p);
968 while (isspace(pp[-1])) pp--;
969 *pp = 0;
970
971 f = fopen((char *)p, "rb");
972 if (f == NULL)
973 {
974 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
975 continue;
976 }
977
978 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
979
980 true_size =
981 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
982 true_study_size =
983 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
984
985 re = (real_pcre *)new_malloc(true_size);
986 regex_gotten_store = gotten_store;
987
988 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
989
990 magic = ((real_pcre *)re)->magic_number;
991 if (magic != MAGIC_NUMBER)
992 {
993 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
994 {
995 do_flip = 1;
996 }
997 else
998 {
999 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1000 fclose(f);
1001 continue;
1002 }
1003 }
1004
1005 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1006 do_flip? " (byte-inverted)" : "", p);
1007
1008 /* Need to know if UTF-8 for printing data strings */
1009
1010 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1011 use_utf8 = (get_options & PCRE_UTF8) != 0;
1012
1013 /* Now see if there is any following study data */
1014
1015 if (true_study_size != 0)
1016 {
1017 pcre_study_data *psd;
1018
1019 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1020 extra->flags = PCRE_EXTRA_STUDY_DATA;
1021
1022 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1023 extra->study_data = psd;
1024
1025 if (fread(psd, 1, true_study_size, f) != true_study_size)
1026 {
1027 FAIL_READ:
1028 fprintf(outfile, "Failed to read data from %s\n", p);
1029 if (extra != NULL) new_free(extra);
1030 if (re != NULL) new_free(re);
1031 fclose(f);
1032 continue;
1033 }
1034 fprintf(outfile, "Study data loaded from %s\n", p);
1035 do_study = 1; /* To get the data output if requested */
1036 }
1037 else fprintf(outfile, "No study data\n");
1038
1039 fclose(f);
1040 goto SHOW_INFO;
1041 }
1042
1043 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1044 the pattern; if is isn't complete, read more. */
1045
1046 delimiter = *p++;
1047
1048 if (isalnum(delimiter) || delimiter == '\\')
1049 {
1050 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1051 goto SKIP_DATA;
1052 }
1053
1054 pp = p;
1055 poffset = p - buffer;
1056
1057 for(;;)
1058 {
1059 while (*pp != 0)
1060 {
1061 if (*pp == '\\' && pp[1] != 0) pp++;
1062 else if (*pp == delimiter) break;
1063 pp++;
1064 }
1065 if (*pp != 0) break;
1066 if (infile == stdin) printf(" > ");
1067 if ((pp = extend_inputline(infile, pp)) == NULL)
1068 {
1069 fprintf(outfile, "** Unexpected EOF\n");
1070 done = 1;
1071 goto CONTINUE;
1072 }
1073 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1074 }
1075
1076 /* The buffer may have moved while being extended; reset the start of data
1077 pointer to the correct relative point in the buffer. */
1078
1079 p = buffer + poffset;
1080
1081 /* If the first character after the delimiter is backslash, make
1082 the pattern end with backslash. This is purely to provide a way
1083 of testing for the error message when a pattern ends with backslash. */
1084
1085 if (pp[1] == '\\') *pp++ = '\\';
1086
1087 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1088 for callouts. */
1089
1090 *pp++ = 0;
1091 strcpy((char *)pbuffer, (char *)p);
1092
1093 /* Look for options after final delimiter */
1094
1095 options = 0;
1096 study_options = 0;
1097 log_store = showstore; /* default from command line */
1098
1099 while (*pp != 0)
1100 {
1101 switch (*pp++)
1102 {
1103 case 'f': options |= PCRE_FIRSTLINE; break;
1104 case 'g': do_g = 1; break;
1105 case 'i': options |= PCRE_CASELESS; break;
1106 case 'm': options |= PCRE_MULTILINE; break;
1107 case 's': options |= PCRE_DOTALL; break;
1108 case 'x': options |= PCRE_EXTENDED; break;
1109
1110 case '+': do_showrest = 1; break;
1111 case 'A': options |= PCRE_ANCHORED; break;
1112 case 'B': do_debug = 1; break;
1113 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1114 case 'D': do_debug = do_showinfo = 1; break;
1115 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1116 case 'F': do_flip = 1; break;
1117 case 'G': do_G = 1; break;
1118 case 'I': do_showinfo = 1; break;
1119 case 'J': options |= PCRE_DUPNAMES; break;
1120 case 'M': log_store = 1; break;
1121 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1122
1123 #if !defined NOPOSIX
1124 case 'P': do_posix = 1; break;
1125 #endif
1126
1127 case 'S': do_study = 1; break;
1128 case 'U': options |= PCRE_UNGREEDY; break;
1129 case 'X': options |= PCRE_EXTRA; break;
1130 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1131 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1132
1133 case 'L':
1134 ppp = pp;
1135 /* The '\r' test here is so that it works on Windows. */
1136 /* The '0' test is just in case this is an unterminated line. */
1137 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1138 *ppp = 0;
1139 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1140 {
1141 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1142 goto SKIP_DATA;
1143 }
1144 locale_set = 1;
1145 tables = pcre_maketables();
1146 pp = ppp;
1147 break;
1148
1149 case '>':
1150 to_file = pp;
1151 while (*pp != 0) pp++;
1152 while (isspace(pp[-1])) pp--;
1153 *pp = 0;
1154 break;
1155
1156 case '<':
1157 {
1158 int x = check_newline(pp, outfile);
1159 if (x == 0) goto SKIP_DATA;
1160 options |= x;
1161 while (*pp++ != '>');
1162 }
1163 break;
1164
1165 case '\r': /* So that it works in Windows */
1166 case '\n':
1167 case ' ':
1168 break;
1169
1170 default:
1171 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1172 goto SKIP_DATA;
1173 }
1174 }
1175
1176 /* Handle compiling via the POSIX interface, which doesn't support the
1177 timing, showing, or debugging options, nor the ability to pass over
1178 local character tables. */
1179
1180 #if !defined NOPOSIX
1181 if (posix || do_posix)
1182 {
1183 int rc;
1184 int cflags = 0;
1185
1186 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1187 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1188 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1189 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1190 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1191
1192 rc = regcomp(&preg, (char *)p, cflags);
1193
1194 /* Compilation failed; go back for another re, skipping to blank line
1195 if non-interactive. */
1196
1197 if (rc != 0)
1198 {
1199 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1200 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1201 goto SKIP_DATA;
1202 }
1203 }
1204
1205 /* Handle compiling via the native interface */
1206
1207 else
1208 #endif /* !defined NOPOSIX */
1209
1210 {
1211 if (timeit > 0)
1212 {
1213 register int i;
1214 clock_t time_taken;
1215 clock_t start_time = clock();
1216 for (i = 0; i < timeit; i++)
1217 {
1218 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1219 if (re != NULL) free(re);
1220 }
1221 time_taken = clock() - start_time;
1222 fprintf(outfile, "Compile time %.4f milliseconds\n",
1223 (((double)time_taken * 1000.0) / (double)timeit) /
1224 (double)CLOCKS_PER_SEC);
1225 }
1226
1227 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1228
1229 /* Compilation failed; go back for another re, skipping to blank line
1230 if non-interactive. */
1231
1232 if (re == NULL)
1233 {
1234 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1235 SKIP_DATA:
1236 if (infile != stdin)
1237 {
1238 for (;;)
1239 {
1240 if (extend_inputline(infile, buffer) == NULL)
1241 {
1242 done = 1;
1243 goto CONTINUE;
1244 }
1245 len = (int)strlen((char *)buffer);
1246 while (len > 0 && isspace(buffer[len-1])) len--;
1247 if (len == 0) break;
1248 }
1249 fprintf(outfile, "\n");
1250 }
1251 goto CONTINUE;
1252 }
1253
1254 /* Compilation succeeded; print data if required. There are now two
1255 info-returning functions. The old one has a limited interface and
1256 returns only limited data. Check that it agrees with the newer one. */
1257
1258 if (log_store)
1259 fprintf(outfile, "Memory allocation (code space): %d\n",
1260 (int)(gotten_store -
1261 sizeof(real_pcre) -
1262 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1263
1264 /* Extract the size for possible writing before possibly flipping it,
1265 and remember the store that was got. */
1266
1267 true_size = ((real_pcre *)re)->size;
1268 regex_gotten_store = gotten_store;
1269
1270 /* If /S was present, study the regexp to generate additional info to
1271 help with the matching. */
1272
1273 if (do_study)
1274 {
1275 if (timeit > 0)
1276 {
1277 register int i;
1278 clock_t time_taken;
1279 clock_t start_time = clock();
1280 for (i = 0; i < timeit; i++)
1281 extra = pcre_study(re, study_options, &error);
1282 time_taken = clock() - start_time;
1283 if (extra != NULL) free(extra);
1284 fprintf(outfile, " Study time %.4f milliseconds\n",
1285 (((double)time_taken * 1000.0) / (double)timeit) /
1286 (double)CLOCKS_PER_SEC);
1287 }
1288 extra = pcre_study(re, study_options, &error);
1289 if (error != NULL)
1290 fprintf(outfile, "Failed to study: %s\n", error);
1291 else if (extra != NULL)
1292 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1293 }
1294
1295 /* If the 'F' option was present, we flip the bytes of all the integer
1296 fields in the regex data block and the study block. This is to make it
1297 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1298 compiled on a different architecture. */
1299
1300 if (do_flip)
1301 {
1302 real_pcre *rre = (real_pcre *)re;
1303 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1304 rre->size = byteflip(rre->size, sizeof(rre->size));
1305 rre->options = byteflip(rre->options, sizeof(rre->options));
1306 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1307 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1308 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1309 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1310 rre->name_table_offset = byteflip(rre->name_table_offset,
1311 sizeof(rre->name_table_offset));
1312 rre->name_entry_size = byteflip(rre->name_entry_size,
1313 sizeof(rre->name_entry_size));
1314 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1315
1316 if (extra != NULL)
1317 {
1318 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1319 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1320 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1321 }
1322 }
1323
1324 /* Extract information from the compiled data if required */
1325
1326 SHOW_INFO:
1327
1328 if (do_debug)
1329 {
1330 fprintf(outfile, "------------------------------------------------------------------\n");
1331 pcre_printint(re, outfile);
1332 }
1333
1334 if (do_showinfo)
1335 {
1336 unsigned long int get_options, all_options;
1337 #if !defined NOINFOCHECK
1338 int old_first_char, old_options, old_count;
1339 #endif
1340 int count, backrefmax, first_char, need_char;
1341 int nameentrysize, namecount;
1342 const uschar *nametable;
1343
1344 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1345 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1346 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1347 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1348 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1349 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1350 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1351 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1352 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1353
1354 #if !defined NOINFOCHECK
1355 old_count = pcre_info(re, &old_options, &old_first_char);
1356 if (count < 0) fprintf(outfile,
1357 "Error %d from pcre_info()\n", count);
1358 else
1359 {
1360 if (old_count != count) fprintf(outfile,
1361 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1362 old_count);
1363
1364 if (old_first_char != first_char) fprintf(outfile,
1365 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1366 first_char, old_first_char);
1367
1368 if (old_options != (int)get_options) fprintf(outfile,
1369 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1370 get_options, old_options);
1371 }
1372 #endif
1373
1374 if (size != regex_gotten_store) fprintf(outfile,
1375 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1376 (int)size, (int)regex_gotten_store);
1377
1378 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1379 if (backrefmax > 0)
1380 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1381
1382 if (namecount > 0)
1383 {
1384 fprintf(outfile, "Named capturing subpatterns:\n");
1385 while (namecount-- > 0)
1386 {
1387 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1388 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1389 GET2(nametable, 0));
1390 nametable += nameentrysize;
1391 }
1392 }
1393
1394 /* The NOPARTIAL bit is a private bit in the options, so we have
1395 to fish it out via out back door */
1396
1397 all_options = ((real_pcre *)re)->options;
1398 if (do_flip)
1399 {
1400 all_options = byteflip(all_options, sizeof(all_options));
1401 }
1402
1403 if ((all_options & PCRE_NOPARTIAL) != 0)
1404 fprintf(outfile, "Partial matching not supported\n");
1405
1406 if (get_options == 0) fprintf(outfile, "No options\n");
1407 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1408 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1409 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1410 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1411 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1412 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1413 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1414 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1415 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1416 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1417 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1418 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1419 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1420 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1421
1422 switch (get_options & PCRE_NEWLINE_BITS)
1423 {
1424 case PCRE_NEWLINE_CR:
1425 fprintf(outfile, "Forced newline sequence: CR\n");
1426 break;
1427
1428 case PCRE_NEWLINE_LF:
1429 fprintf(outfile, "Forced newline sequence: LF\n");
1430 break;
1431
1432 case PCRE_NEWLINE_CRLF:
1433 fprintf(outfile, "Forced newline sequence: CRLF\n");
1434 break;
1435
1436 case PCRE_NEWLINE_ANY:
1437 fprintf(outfile, "Forced newline sequence: ANY\n");
1438 break;
1439
1440 default:
1441 break;
1442 }
1443
1444 if (first_char == -1)
1445 {
1446 fprintf(outfile, "First char at start or follows newline\n");
1447 }
1448 else if (first_char < 0)
1449 {
1450 fprintf(outfile, "No first char\n");
1451 }
1452 else
1453 {
1454 int ch = first_char & 255;
1455 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1456 "" : " (caseless)";
1457 if (PRINTHEX(ch))
1458 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1459 else
1460 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1461 }
1462
1463 if (need_char < 0)
1464 {
1465 fprintf(outfile, "No need char\n");
1466 }
1467 else
1468 {
1469 int ch = need_char & 255;
1470 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1471 "" : " (caseless)";
1472 if (PRINTHEX(ch))
1473 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1474 else
1475 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1476 }
1477
1478 /* Don't output study size; at present it is in any case a fixed
1479 value, but it varies, depending on the computer architecture, and
1480 so messes up the test suite. (And with the /F option, it might be
1481 flipped.) */
1482
1483 if (do_study)
1484 {
1485 if (extra == NULL)
1486 fprintf(outfile, "Study returned NULL\n");
1487 else
1488 {
1489 uschar *start_bits = NULL;
1490 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1491
1492 if (start_bits == NULL)
1493 fprintf(outfile, "No starting byte set\n");
1494 else
1495 {
1496 int i;
1497 int c = 24;
1498 fprintf(outfile, "Starting byte set: ");
1499 for (i = 0; i < 256; i++)
1500 {
1501 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1502 {
1503 if (c > 75)
1504 {
1505 fprintf(outfile, "\n ");
1506 c = 2;
1507 }
1508 if (PRINTHEX(i) && i != ' ')
1509 {
1510 fprintf(outfile, "%c ", i);
1511 c += 2;
1512 }
1513 else
1514 {
1515 fprintf(outfile, "\\x%02x ", i);
1516 c += 5;
1517 }
1518 }
1519 }
1520 fprintf(outfile, "\n");
1521 }
1522 }
1523 }
1524 }
1525
1526 /* If the '>' option was present, we write out the regex to a file, and
1527 that is all. The first 8 bytes of the file are the regex length and then
1528 the study length, in big-endian order. */
1529
1530 if (to_file != NULL)
1531 {
1532 FILE *f = fopen((char *)to_file, "wb");
1533 if (f == NULL)
1534 {
1535 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1536 }
1537 else
1538 {
1539 uschar sbuf[8];
1540 sbuf[0] = (true_size >> 24) & 255;
1541 sbuf[1] = (true_size >> 16) & 255;
1542 sbuf[2] = (true_size >> 8) & 255;
1543 sbuf[3] = (true_size) & 255;
1544
1545 sbuf[4] = (true_study_size >> 24) & 255;
1546 sbuf[5] = (true_study_size >> 16) & 255;
1547 sbuf[6] = (true_study_size >> 8) & 255;
1548 sbuf[7] = (true_study_size) & 255;
1549
1550 if (fwrite(sbuf, 1, 8, f) < 8 ||
1551 fwrite(re, 1, true_size, f) < true_size)
1552 {
1553 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1554 }
1555 else
1556 {
1557 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1558 if (extra != NULL)
1559 {
1560 if (fwrite(extra->study_data, 1, true_study_size, f) <
1561 true_study_size)
1562 {
1563 fprintf(outfile, "Write error on %s: %s\n", to_file,
1564 strerror(errno));
1565 }
1566 else fprintf(outfile, "Study data written to %s\n", to_file);
1567
1568 }
1569 }
1570 fclose(f);
1571 }
1572
1573 new_free(re);
1574 if (extra != NULL) new_free(extra);
1575 if (tables != NULL) new_free((void *)tables);
1576 continue; /* With next regex */
1577 }
1578 } /* End of non-POSIX compile */
1579
1580 /* Read data lines and test them */
1581
1582 for (;;)
1583 {
1584 uschar *q;
1585 uschar *bptr = dbuffer;
1586 int *use_offsets = offsets;
1587 int use_size_offsets = size_offsets;
1588 int callout_data = 0;
1589 int callout_data_set = 0;
1590 int count, c;
1591 int copystrings = 0;
1592 int find_match_limit = 0;
1593 int getstrings = 0;
1594 int getlist = 0;
1595 int gmatched = 0;
1596 int start_offset = 0;
1597 int g_notempty = 0;
1598 int use_dfa = 0;
1599
1600 options = 0;
1601
1602 *copynames = 0;
1603 *getnames = 0;
1604
1605 copynamesptr = copynames;
1606 getnamesptr = getnames;
1607
1608 pcre_callout = callout;
1609 first_callout = 1;
1610 callout_extra = 0;
1611 callout_count = 0;
1612 callout_fail_count = 999999;
1613 callout_fail_id = -1;
1614 show_malloc = 0;
1615
1616 if (extra != NULL) extra->flags &=
1617 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1618
1619 len = 0;
1620 for (;;)
1621 {
1622 if (infile == stdin) printf("data> ");
1623 if (extend_inputline(infile, buffer + len) == NULL)
1624 {
1625 if (len > 0) break;
1626 done = 1;
1627 goto CONTINUE;
1628 }
1629 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1630 len = (int)strlen((char *)buffer);
1631 if (buffer[len-1] == '\n') break;
1632 }
1633
1634 while (len > 0 && isspace(buffer[len-1])) len--;
1635 buffer[len] = 0;
1636 if (len == 0) break;
1637
1638 p = buffer;
1639 while (isspace(*p)) p++;
1640
1641 q = dbuffer;
1642 while ((c = *p++) != 0)
1643 {
1644 int i = 0;
1645 int n = 0;
1646
1647 if (c == '\\') switch ((c = *p++))
1648 {
1649 case 'a': c = 7; break;
1650 case 'b': c = '\b'; break;
1651 case 'e': c = 27; break;
1652 case 'f': c = '\f'; break;
1653 case 'n': c = '\n'; break;
1654 case 'r': c = '\r'; break;
1655 case 't': c = '\t'; break;
1656 case 'v': c = '\v'; break;
1657
1658 case '0': case '1': case '2': case '3':
1659 case '4': case '5': case '6': case '7':
1660 c -= '0';
1661 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1662 c = c * 8 + *p++ - '0';
1663
1664 #if !defined NOUTF8
1665 if (use_utf8 && c > 255)
1666 {
1667 unsigned char buff8[8];
1668 int ii, utn;
1669 utn = ord2utf8(c, buff8);
1670 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1671 c = buff8[ii]; /* Last byte */
1672 }
1673 #endif
1674 break;
1675
1676 case 'x':
1677
1678 /* Handle \x{..} specially - new Perl thing for utf8 */
1679
1680 #if !defined NOUTF8
1681 if (*p == '{')
1682 {
1683 unsigned char *pt = p;
1684 c = 0;
1685 while (isxdigit(*(++pt)))
1686 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1687 if (*pt == '}')
1688 {
1689 unsigned char buff8[8];
1690 int ii, utn;
1691 utn = ord2utf8(c, buff8);
1692 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1693 c = buff8[ii]; /* Last byte */
1694 p = pt + 1;
1695 break;
1696 }
1697 /* Not correct form; fall through */
1698 }
1699 #endif
1700
1701 /* Ordinary \x */
1702
1703 c = 0;
1704 while (i++ < 2 && isxdigit(*p))
1705 {
1706 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1707 p++;
1708 }
1709 break;
1710
1711 case 0: /* \ followed by EOF allows for an empty line */
1712 p--;
1713 continue;
1714
1715 case '>':
1716 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1717 continue;
1718
1719 case 'A': /* Option setting */
1720 options |= PCRE_ANCHORED;
1721 continue;
1722
1723 case 'B':
1724 options |= PCRE_NOTBOL;
1725 continue;
1726
1727 case 'C':
1728 if (isdigit(*p)) /* Set copy string */
1729 {
1730 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1731 copystrings |= 1 << n;
1732 }
1733 else if (isalnum(*p))
1734 {
1735 uschar *npp = copynamesptr;
1736 while (isalnum(*p)) *npp++ = *p++;
1737 *npp++ = 0;
1738 *npp = 0;
1739 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1740 if (n < 0)
1741 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1742 copynamesptr = npp;
1743 }
1744 else if (*p == '+')
1745 {
1746 callout_extra = 1;
1747 p++;
1748 }
1749 else if (*p == '-')
1750 {
1751 pcre_callout = NULL;
1752 p++;
1753 }
1754 else if (*p == '!')
1755 {
1756 callout_fail_id = 0;
1757 p++;
1758 while(isdigit(*p))
1759 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1760 callout_fail_count = 0;
1761 if (*p == '!')
1762 {
1763 p++;
1764 while(isdigit(*p))
1765 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1766 }
1767 }
1768 else if (*p == '*')
1769 {
1770 int sign = 1;
1771 callout_data = 0;
1772 if (*(++p) == '-') { sign = -1; p++; }
1773 while(isdigit(*p))
1774 callout_data = callout_data * 10 + *p++ - '0';
1775 callout_data *= sign;
1776 callout_data_set = 1;
1777 }
1778 continue;
1779
1780 #if !defined NODFA
1781 case 'D':
1782 #if !defined NOPOSIX
1783 if (posix || do_posix)
1784 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1785 else
1786 #endif
1787 use_dfa = 1;
1788 continue;
1789
1790 case 'F':
1791 options |= PCRE_DFA_SHORTEST;
1792 continue;
1793 #endif
1794
1795 case 'G':
1796 if (isdigit(*p))
1797 {
1798 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1799 getstrings |= 1 << n;
1800 }
1801 else if (isalnum(*p))
1802 {
1803 uschar *npp = getnamesptr;
1804 while (isalnum(*p)) *npp++ = *p++;
1805 *npp++ = 0;
1806 *npp = 0;
1807 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1808 if (n < 0)
1809 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1810 getnamesptr = npp;
1811 }
1812 continue;
1813
1814 case 'L':
1815 getlist = 1;
1816 continue;
1817
1818 case 'M':
1819 find_match_limit = 1;
1820 continue;
1821
1822 case 'N':
1823 options |= PCRE_NOTEMPTY;
1824 continue;
1825
1826 case 'O':
1827 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1828 if (n > size_offsets_max)
1829 {
1830 size_offsets_max = n;
1831 free(offsets);
1832 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1833 if (offsets == NULL)
1834 {
1835 printf("** Failed to get %d bytes of memory for offsets vector\n",
1836 size_offsets_max * sizeof(int));
1837 yield = 1;
1838 goto EXIT;
1839 }
1840 }
1841 use_size_offsets = n;
1842 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1843 continue;
1844
1845 case 'P':
1846 options |= PCRE_PARTIAL;
1847 continue;
1848
1849 case 'Q':
1850 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1851 if (extra == NULL)
1852 {
1853 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1854 extra->flags = 0;
1855 }
1856 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1857 extra->match_limit_recursion = n;
1858 continue;
1859
1860 case 'q':
1861 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1862 if (extra == NULL)
1863 {
1864 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1865 extra->flags = 0;
1866 }
1867 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1868 extra->match_limit = n;
1869 continue;
1870
1871 #if !defined NODFA
1872 case 'R':
1873 options |= PCRE_DFA_RESTART;
1874 continue;
1875 #endif
1876
1877 case 'S':
1878 show_malloc = 1;
1879 continue;
1880
1881 case 'Z':
1882 options |= PCRE_NOTEOL;
1883 continue;
1884
1885 case '?':
1886 options |= PCRE_NO_UTF8_CHECK;
1887 continue;
1888
1889 case '<':
1890 {
1891 int x = check_newline(p, outfile);
1892 if (x == 0) goto NEXT_DATA;
1893 options |= x;
1894 while (*p++ != '>');
1895 }
1896 continue;
1897 }
1898 *q++ = c;
1899 }
1900 *q = 0;
1901 len = q - dbuffer;
1902
1903 if ((all_use_dfa || use_dfa) && find_match_limit)
1904 {
1905 printf("**Match limit not relevant for DFA matching: ignored\n");
1906 find_match_limit = 0;
1907 }
1908
1909 /* Handle matching via the POSIX interface, which does not
1910 support timing or playing with the match limit or callout data. */
1911
1912 #if !defined NOPOSIX
1913 if (posix || do_posix)
1914 {
1915 int rc;
1916 int eflags = 0;
1917 regmatch_t *pmatch = NULL;
1918 if (use_size_offsets > 0)
1919 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1920 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1921 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1922
1923 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1924
1925 if (rc != 0)
1926 {
1927 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1928 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1929 }
1930 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1931 != 0)
1932 {
1933 fprintf(outfile, "Matched with REG_NOSUB\n");
1934 }
1935 else
1936 {
1937 size_t i;
1938 for (i = 0; i < (size_t)use_size_offsets; i++)
1939 {
1940 if (pmatch[i].rm_so >= 0)
1941 {
1942 fprintf(outfile, "%2d: ", (int)i);
1943 (void)pchars(dbuffer + pmatch[i].rm_so,
1944 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1945 fprintf(outfile, "\n");
1946 if (i == 0 && do_showrest)
1947 {
1948 fprintf(outfile, " 0+ ");
1949 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1950 outfile);
1951 fprintf(outfile, "\n");
1952 }
1953 }
1954 }
1955 }
1956 free(pmatch);
1957 }
1958
1959 /* Handle matching via the native interface - repeats for /g and /G */
1960
1961 else
1962 #endif /* !defined NOPOSIX */
1963
1964 for (;; gmatched++) /* Loop for /g or /G */
1965 {
1966 if (timeitm > 0)
1967 {
1968 register int i;
1969 clock_t time_taken;
1970 clock_t start_time = clock();
1971
1972 #if !defined NODFA
1973 if (all_use_dfa || use_dfa)
1974 {
1975 int workspace[1000];
1976 for (i = 0; i < timeitm; i++)
1977 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1978 options | g_notempty, use_offsets, use_size_offsets, workspace,
1979 sizeof(workspace)/sizeof(int));
1980 }
1981 else
1982 #endif
1983
1984 for (i = 0; i < timeitm; i++)
1985 count = pcre_exec(re, extra, (char *)bptr, len,
1986 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1987
1988 time_taken = clock() - start_time;
1989 fprintf(outfile, "Execute time %.4f milliseconds\n",
1990 (((double)time_taken * 1000.0) / (double)timeitm) /
1991 (double)CLOCKS_PER_SEC);
1992 }
1993
1994 /* If find_match_limit is set, we want to do repeated matches with
1995 varying limits in order to find the minimum value for the match limit and
1996 for the recursion limit. */
1997
1998 if (find_match_limit)
1999 {
2000 if (extra == NULL)
2001 {
2002 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2003 extra->flags = 0;
2004 }
2005
2006 (void)check_match_limit(re, extra, bptr, len, start_offset,
2007 options|g_notempty, use_offsets, use_size_offsets,
2008 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2009 PCRE_ERROR_MATCHLIMIT, "match()");
2010
2011 count = check_match_limit(re, extra, bptr, len, start_offset,
2012 options|g_notempty, use_offsets, use_size_offsets,
2013 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2014 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2015 }
2016
2017 /* If callout_data is set, use the interface with additional data */
2018
2019 else if (callout_data_set)
2020 {
2021 if (extra == NULL)
2022 {
2023 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024 extra->flags = 0;
2025 }
2026 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2027 extra->callout_data = &callout_data;
2028 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2029 options | g_notempty, use_offsets, use_size_offsets);
2030 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2031 }
2032
2033 /* The normal case is just to do the match once, with the default
2034 value of match_limit. */
2035
2036 #if !defined NODFA
2037 else if (all_use_dfa || use_dfa)
2038 {
2039 int workspace[1000];
2040 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2041 options | g_notempty, use_offsets, use_size_offsets, workspace,
2042 sizeof(workspace)/sizeof(int));
2043 if (count == 0)
2044 {
2045 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2046 count = use_size_offsets/2;
2047 }
2048 }
2049 #endif
2050
2051 else
2052 {
2053 count = pcre_exec(re, extra, (char *)bptr, len,
2054 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2055 if (count == 0)
2056 {
2057 fprintf(outfile, "Matched, but too many substrings\n");
2058 count = use_size_offsets/3;
2059 }
2060 }
2061
2062 /* Matched */
2063
2064 if (count >= 0)
2065 {
2066 int i, maxcount;
2067
2068 #if !defined NODFA
2069 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2070 #endif
2071 maxcount = use_size_offsets/3;
2072
2073 /* This is a check against a lunatic return value. */
2074
2075 if (count > maxcount)
2076 {
2077 fprintf(outfile,
2078 "** PCRE error: returned count %d is too big for offset size %d\n",
2079 count, use_size_offsets);
2080 count = use_size_offsets/3;
2081 if (do_g || do_G)
2082 {
2083 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2084 do_g = do_G = FALSE; /* Break g/G loop */
2085 }
2086 }
2087
2088 for (i = 0; i < count * 2; i += 2)
2089 {
2090 if (use_offsets[i] < 0)
2091 fprintf(outfile, "%2d: <unset>\n", i/2);
2092 else
2093 {
2094 fprintf(outfile, "%2d: ", i/2);
2095 (void)pchars(bptr + use_offsets[i],
2096 use_offsets[i+1] - use_offsets[i], outfile);
2097 fprintf(outfile, "\n");
2098 if (i == 0)
2099 {
2100 if (do_showrest)
2101 {
2102 fprintf(outfile, " 0+ ");
2103 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2104 outfile);
2105 fprintf(outfile, "\n");
2106 }
2107 }
2108 }
2109 }
2110
2111 for (i = 0; i < 32; i++)
2112 {
2113 if ((copystrings & (1 << i)) != 0)
2114 {
2115 char copybuffer[256];
2116 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2117 i, copybuffer, sizeof(copybuffer));
2118 if (rc < 0)
2119 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2120 else
2121 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2122 }
2123 }
2124
2125 for (copynamesptr = copynames;
2126 *copynamesptr != 0;
2127 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2128 {
2129 char copybuffer[256];
2130 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2131 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2132 if (rc < 0)
2133 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2134 else
2135 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2136 }
2137
2138 for (i = 0; i < 32; i++)
2139 {
2140 if ((getstrings & (1 << i)) != 0)
2141 {
2142 const char *substring;
2143 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2144 i, &substring);
2145 if (rc < 0)
2146 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2147 else
2148 {
2149 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2150 pcre_free_substring(substring);
2151 }
2152 }
2153 }
2154
2155 for (getnamesptr = getnames;
2156 *getnamesptr != 0;
2157 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2158 {
2159 const char *substring;
2160 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2161 count, (char *)getnamesptr, &substring);
2162 if (rc < 0)
2163 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2164 else
2165 {
2166 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2167 pcre_free_substring(substring);
2168 }
2169 }
2170
2171 if (getlist)
2172 {
2173 const char **stringlist;
2174 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2175 &stringlist);
2176 if (rc < 0)
2177 fprintf(outfile, "get substring list failed %d\n", rc);
2178 else
2179 {
2180 for (i = 0; i < count; i++)
2181 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2182 if (stringlist[i] != NULL)
2183 fprintf(outfile, "string list not terminated by NULL\n");
2184 /* free((void *)stringlist); */
2185 pcre_free_substring_list(stringlist);
2186 }
2187 }
2188 }
2189
2190 /* There was a partial match */
2191
2192 else if (count == PCRE_ERROR_PARTIAL)
2193 {
2194 fprintf(outfile, "Partial match");
2195 #if !defined NODFA
2196 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2197 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2198 bptr + use_offsets[0]);
2199 #endif
2200 fprintf(outfile, "\n");
2201 break; /* Out of the /g loop */
2202 }
2203
2204 /* Failed to match. If this is a /g or /G loop and we previously set
2205 g_notempty after a null match, this is not necessarily the end.
2206 We want to advance the start offset, and continue. In the case of UTF-8
2207 matching, the advance must be one character, not one byte. Fudge the
2208 offset values to achieve this. We won't be at the end of the string -
2209 that was checked before setting g_notempty. */
2210
2211 else
2212 {
2213 if (g_notempty != 0)
2214 {
2215 int onechar = 1;
2216 use_offsets[0] = start_offset;
2217 if (use_utf8)
2218 {
2219 while (start_offset + onechar < len)
2220 {
2221 int tb = bptr[start_offset+onechar];
2222 if (tb <= 127) break;
2223 tb &= 0xc0;
2224 if (tb != 0 && tb != 0xc0) onechar++;
2225 }
2226 }
2227 use_offsets[1] = start_offset + onechar;
2228 }
2229 else
2230 {
2231 if (count == PCRE_ERROR_NOMATCH)
2232 {
2233 if (gmatched == 0) fprintf(outfile, "No match\n");
2234 }
2235 else fprintf(outfile, "Error %d\n", count);
2236 break; /* Out of the /g loop */
2237 }
2238 }
2239
2240 /* If not /g or /G we are done */
2241
2242 if (!do_g && !do_G) break;
2243
2244 /* If we have matched an empty string, first check to see if we are at
2245 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2246 what Perl's /g options does. This turns out to be rather cunning. First
2247 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2248 same point. If this fails (picked up above) we advance to the next
2249 character. */
2250
2251 g_notempty = 0;
2252 if (use_offsets[0] == use_offsets[1])
2253 {
2254 if (use_offsets[0] == len) break;
2255 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2256 }
2257
2258 /* For /g, update the start offset, leaving the rest alone */
2259
2260 if (do_g) start_offset = use_offsets[1];
2261
2262 /* For /G, update the pointer and length */
2263
2264 else
2265 {
2266 bptr += use_offsets[1];
2267 len -= use_offsets[1];
2268 }
2269 } /* End of loop for /g and /G */
2270
2271 NEXT_DATA: continue;
2272 } /* End of loop for data lines */
2273
2274 CONTINUE:
2275
2276 #if !defined NOPOSIX
2277 if (posix || do_posix) regfree(&preg);
2278 #endif
2279
2280 if (re != NULL) new_free(re);
2281 if (extra != NULL) new_free(extra);
2282 if (tables != NULL)
2283 {
2284 new_free((void *)tables);
2285 setlocale(LC_CTYPE, "C");
2286 locale_set = 0;
2287 }
2288 }
2289
2290 if (infile == stdin) fprintf(outfile, "\n");
2291
2292 EXIT:
2293
2294 if (infile != NULL && infile != stdin) fclose(infile);
2295 if (outfile != NULL && outfile != stdout) fclose(outfile);
2296
2297 free(buffer);
2298 free(dbuffer);
2299 free(pbuffer);
2300 free(offsets);
2301
2302 return yield;
2303 }
2304
2305 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12