/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 146 - (show annotations) (download)
Thu Apr 5 09:17:28 2007 UTC (7 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 68194 byte(s)
Another buglet in pcretest for "any" newline and /g.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47
48 /* A number of things vary for Windows builds. Originally, pcretest opened its
49 input and output without "b"; then I was told that "b" was needed in some
50 environments, so it was added for release 5.0 to both the input and output. (It
51 makes no difference on Unix-like systems.) Later I was told that it is wrong
52 for the input on Windows. I've now abstracted the modes into two macros that
53 are set here, to make it easier to fiddle with them, and removed "b" from the
54 input mode under Windows. */
55
56 #if defined(_WIN32) || defined(WIN32)
57 #include <io.h> /* For _setmode() */
58 #include <fcntl.h> /* For _O_BINARY */
59 #define INPUT_MODE "r"
60 #define OUTPUT_MODE "wb"
61
62 #else
63 #include <sys/time.h> /* These two includes are needed */
64 #include <sys/resource.h> /* for setrlimit(). */
65 #define INPUT_MODE "rb"
66 #define OUTPUT_MODE "wb"
67 #endif
68
69
70 /* We have to include pcre_internal.h because we need the internal info for
71 displaying the results of pcre_study() and we also need to know about the
72 internal macros, structures, and other internal data values; pcretest has
73 "inside information" compared to a program that strictly follows the PCRE API.
74
75 Although pcre_internal.h does itself include pcre.h, we explicitly include it
76 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77 appropriately for an application, not for building PCRE. */
78
79 #include "pcre.h"
80 #include "pcre_internal.h"
81
82 /* We need access to the data tables that PCRE uses. So as not to have to keep
83 two copies, we include the source file here, changing the names of the external
84 symbols to prevent clashes. */
85
86 #define _pcre_utf8_table1 utf8_table1
87 #define _pcre_utf8_table1_size utf8_table1_size
88 #define _pcre_utf8_table2 utf8_table2
89 #define _pcre_utf8_table3 utf8_table3
90 #define _pcre_utf8_table4 utf8_table4
91 #define _pcre_utt utt
92 #define _pcre_utt_size utt_size
93 #define _pcre_OP_lengths OP_lengths
94
95 #include "pcre_tables.c"
96
97 /* We also need the pcre_printint() function for printing out compiled
98 patterns. This function is in a separate file so that it can be included in
99 pcre_compile.c when that module is compiled with debugging enabled.
100
101 The definition of the macro PRINTABLE, which determines whether to print an
102 output character as-is or as a hex value when showing compiled patterns, is
103 contained in this file. We uses it here also, in cases when the locale has not
104 been explicitly changed, so as to get consistent output from systems that
105 differ in their output from isprint() even in the "C" locale. */
106
107 #include "pcre_printint.src"
108
109 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110
111
112 /* It is possible to compile this test program without including support for
113 testing the POSIX interface, though this is not available via the standard
114 Makefile. */
115
116 #if !defined NOPOSIX
117 #include "pcreposix.h"
118 #endif
119
120 /* It is also possible, for the benefit of the version currently imported into
121 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122 interface to the DFA matcher (NODFA), and without the doublecheck of the old
123 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124 UTF8 support if PCRE is built without it. */
125
126 #ifndef SUPPORT_UTF8
127 #ifndef NOUTF8
128 #define NOUTF8
129 #endif
130 #endif
131
132
133 /* Other parameters */
134
135 #ifndef CLOCKS_PER_SEC
136 #ifdef CLK_TCK
137 #define CLOCKS_PER_SEC CLK_TCK
138 #else
139 #define CLOCKS_PER_SEC 100
140 #endif
141 #endif
142
143 /* This is the default loop count for timing. */
144
145 #define LOOPREPEAT 500000
146
147 /* Static variables */
148
149 static FILE *outfile;
150 static int log_store = 0;
151 static int callout_count;
152 static int callout_extra;
153 static int callout_fail_count;
154 static int callout_fail_id;
155 static int first_callout;
156 static int locale_set = 0;
157 static int show_malloc;
158 static int use_utf8;
159 static size_t gotten_store;
160
161 /* The buffers grow automatically if very long input lines are encountered. */
162
163 static int buffer_size = 50000;
164 static uschar *buffer = NULL;
165 static uschar *dbuffer = NULL;
166 static uschar *pbuffer = NULL;
167
168
169
170 /*************************************************
171 * Read or extend an input line *
172 *************************************************/
173
174 /* Input lines are read into buffer, but both patterns and data lines can be
175 continued over multiple input lines. In addition, if the buffer fills up, we
176 want to automatically expand it so as to be able to handle extremely large
177 lines that are needed for certain stress tests. When the input buffer is
178 expanded, the other two buffers must also be expanded likewise, and the
179 contents of pbuffer, which are a copy of the input for callouts, must be
180 preserved (for when expansion happens for a data line). This is not the most
181 optimal way of handling this, but hey, this is just a test program!
182
183 Arguments:
184 f the file to read
185 start where in buffer to start (this *must* be within buffer)
186
187 Returns: pointer to the start of new data
188 could be a copy of start, or could be moved
189 NULL if no data read and EOF reached
190 */
191
192 static uschar *
193 extend_inputline(FILE *f, uschar *start)
194 {
195 uschar *here = start;
196
197 for (;;)
198 {
199 int rlen = buffer_size - (here - buffer);
200
201 if (rlen > 1000)
202 {
203 int dlen;
204 if (fgets((char *)here, rlen, f) == NULL)
205 return (here == start)? NULL : start;
206 dlen = (int)strlen((char *)here);
207 if (dlen > 0 && here[dlen - 1] == '\n') return start;
208 here += dlen;
209 }
210
211 else
212 {
213 int new_buffer_size = 2*buffer_size;
214 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217
218 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219 {
220 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221 exit(1);
222 }
223
224 memcpy(new_buffer, buffer, buffer_size);
225 memcpy(new_pbuffer, pbuffer, buffer_size);
226
227 buffer_size = new_buffer_size;
228
229 start = new_buffer + (start - buffer);
230 here = new_buffer + (here - buffer);
231
232 free(buffer);
233 free(dbuffer);
234 free(pbuffer);
235
236 buffer = new_buffer;
237 dbuffer = new_dbuffer;
238 pbuffer = new_pbuffer;
239 }
240 }
241
242 return NULL; /* Control never gets here */
243 }
244
245
246
247
248
249
250
251 /*************************************************
252 * Read number from string *
253 *************************************************/
254
255 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256 around with conditional compilation, just do the job by hand. It is only used
257 for unpicking arguments, so just keep it simple.
258
259 Arguments:
260 str string to be converted
261 endptr where to put the end pointer
262
263 Returns: the unsigned long
264 */
265
266 static int
267 get_value(unsigned char *str, unsigned char **endptr)
268 {
269 int result = 0;
270 while(*str != 0 && isspace(*str)) str++;
271 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272 *endptr = str;
273 return(result);
274 }
275
276
277
278
279 /*************************************************
280 * Convert UTF-8 string to value *
281 *************************************************/
282
283 /* This function takes one or more bytes that represents a UTF-8 character,
284 and returns the value of the character.
285
286 Argument:
287 utf8bytes a pointer to the byte vector
288 vptr a pointer to an int to receive the value
289
290 Returns: > 0 => the number of bytes consumed
291 -6 to 0 => malformed UTF-8 character at offset = (-return)
292 */
293
294 #if !defined NOUTF8
295
296 static int
297 utf82ord(unsigned char *utf8bytes, int *vptr)
298 {
299 int c = *utf8bytes++;
300 int d = c;
301 int i, j, s;
302
303 for (i = -1; i < 6; i++) /* i is number of additional bytes */
304 {
305 if ((d & 0x80) == 0) break;
306 d <<= 1;
307 }
308
309 if (i == -1) { *vptr = c; return 1; } /* ascii character */
310 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
311
312 /* i now has a value in the range 1-5 */
313
314 s = 6*i;
315 d = (c & utf8_table3[i]) << s;
316
317 for (j = 0; j < i; j++)
318 {
319 c = *utf8bytes++;
320 if ((c & 0xc0) != 0x80) return -(j+1);
321 s -= 6;
322 d |= (c & 0x3f) << s;
323 }
324
325 /* Check that encoding was the correct unique one */
326
327 for (j = 0; j < utf8_table1_size; j++)
328 if (d <= utf8_table1[j]) break;
329 if (j != i) return -(i+1);
330
331 /* Valid value */
332
333 *vptr = d;
334 return i+1;
335 }
336
337 #endif
338
339
340
341 /*************************************************
342 * Convert character value to UTF-8 *
343 *************************************************/
344
345 /* This function takes an integer value in the range 0 - 0x7fffffff
346 and encodes it as a UTF-8 character in 0 to 6 bytes.
347
348 Arguments:
349 cvalue the character value
350 utf8bytes pointer to buffer for result - at least 6 bytes long
351
352 Returns: number of characters placed in the buffer
353 */
354
355 #if !defined NOUTF8
356
357 static int
358 ord2utf8(int cvalue, uschar *utf8bytes)
359 {
360 register int i, j;
361 for (i = 0; i < utf8_table1_size; i++)
362 if (cvalue <= utf8_table1[i]) break;
363 utf8bytes += i;
364 for (j = i; j > 0; j--)
365 {
366 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367 cvalue >>= 6;
368 }
369 *utf8bytes = utf8_table2[i] | cvalue;
370 return i + 1;
371 }
372
373 #endif
374
375
376
377 /*************************************************
378 * Print character string *
379 *************************************************/
380
381 /* Character string printing function. Must handle UTF-8 strings in utf8
382 mode. Yields number of characters printed. If handed a NULL file, just counts
383 chars without printing. */
384
385 static int pchars(unsigned char *p, int length, FILE *f)
386 {
387 int c = 0;
388 int yield = 0;
389
390 while (length-- > 0)
391 {
392 #if !defined NOUTF8
393 if (use_utf8)
394 {
395 int rc = utf82ord(p, &c);
396
397 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
398 {
399 length -= rc - 1;
400 p += rc;
401 if (PRINTHEX(c))
402 {
403 if (f != NULL) fprintf(f, "%c", c);
404 yield++;
405 }
406 else
407 {
408 int n = 4;
409 if (f != NULL) fprintf(f, "\\x{%02x}", c);
410 yield += (n <= 0x000000ff)? 2 :
411 (n <= 0x00000fff)? 3 :
412 (n <= 0x0000ffff)? 4 :
413 (n <= 0x000fffff)? 5 : 6;
414 }
415 continue;
416 }
417 }
418 #endif
419
420 /* Not UTF-8, or malformed UTF-8 */
421
422 c = *p++;
423 if (PRINTHEX(c))
424 {
425 if (f != NULL) fprintf(f, "%c", c);
426 yield++;
427 }
428 else
429 {
430 if (f != NULL) fprintf(f, "\\x%02x", c);
431 yield += 4;
432 }
433 }
434
435 return yield;
436 }
437
438
439
440 /*************************************************
441 * Callout function *
442 *************************************************/
443
444 /* Called from PCRE as a result of the (?C) item. We print out where we are in
445 the match. Yield zero unless more callouts than the fail count, or the callout
446 data is not zero. */
447
448 static int callout(pcre_callout_block *cb)
449 {
450 FILE *f = (first_callout | callout_extra)? outfile : NULL;
451 int i, pre_start, post_start, subject_length;
452
453 if (callout_extra)
454 {
455 fprintf(f, "Callout %d: last capture = %d\n",
456 cb->callout_number, cb->capture_last);
457
458 for (i = 0; i < cb->capture_top * 2; i += 2)
459 {
460 if (cb->offset_vector[i] < 0)
461 fprintf(f, "%2d: <unset>\n", i/2);
462 else
463 {
464 fprintf(f, "%2d: ", i/2);
465 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466 cb->offset_vector[i+1] - cb->offset_vector[i], f);
467 fprintf(f, "\n");
468 }
469 }
470 }
471
472 /* Re-print the subject in canonical form, the first time or if giving full
473 datails. On subsequent calls in the same match, we use pchars just to find the
474 printed lengths of the substrings. */
475
476 if (f != NULL) fprintf(f, "--->");
477
478 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480 cb->current_position - cb->start_match, f);
481
482 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483
484 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485 cb->subject_length - cb->current_position, f);
486
487 if (f != NULL) fprintf(f, "\n");
488
489 /* Always print appropriate indicators, with callout number if not already
490 shown. For automatic callouts, show the pattern offset. */
491
492 if (cb->callout_number == 255)
493 {
494 fprintf(outfile, "%+3d ", cb->pattern_position);
495 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
496 }
497 else
498 {
499 if (callout_extra) fprintf(outfile, " ");
500 else fprintf(outfile, "%3d ", cb->callout_number);
501 }
502
503 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504 fprintf(outfile, "^");
505
506 if (post_start > 0)
507 {
508 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510 }
511
512 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513 fprintf(outfile, " ");
514
515 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516 pbuffer + cb->pattern_position);
517
518 fprintf(outfile, "\n");
519 first_callout = 0;
520
521 if (cb->callout_data != NULL)
522 {
523 int callout_data = *((int *)(cb->callout_data));
524 if (callout_data != 0)
525 {
526 fprintf(outfile, "Callout data = %d\n", callout_data);
527 return callout_data;
528 }
529 }
530
531 return (cb->callout_number != callout_fail_id)? 0 :
532 (++callout_count >= callout_fail_count)? 1 : 0;
533 }
534
535
536 /*************************************************
537 * Local malloc functions *
538 *************************************************/
539
540 /* Alternative malloc function, to test functionality and show the size of the
541 compiled re. */
542
543 static void *new_malloc(size_t size)
544 {
545 void *block = malloc(size);
546 gotten_store = size;
547 if (show_malloc)
548 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
549 return block;
550 }
551
552 static void new_free(void *block)
553 {
554 if (show_malloc)
555 fprintf(outfile, "free %p\n", block);
556 free(block);
557 }
558
559
560 /* For recursion malloc/free, to test stacking calls */
561
562 static void *stack_malloc(size_t size)
563 {
564 void *block = malloc(size);
565 if (show_malloc)
566 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567 return block;
568 }
569
570 static void stack_free(void *block)
571 {
572 if (show_malloc)
573 fprintf(outfile, "stack_free %p\n", block);
574 free(block);
575 }
576
577
578 /*************************************************
579 * Call pcre_fullinfo() *
580 *************************************************/
581
582 /* Get one piece of information from the pcre_fullinfo() function */
583
584 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
585 {
586 int rc;
587 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
588 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
589 }
590
591
592
593 /*************************************************
594 * Byte flipping function *
595 *************************************************/
596
597 static unsigned long int
598 byteflip(unsigned long int value, int n)
599 {
600 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601 return ((value & 0x000000ff) << 24) |
602 ((value & 0x0000ff00) << 8) |
603 ((value & 0x00ff0000) >> 8) |
604 ((value & 0xff000000) >> 24);
605 }
606
607
608
609
610 /*************************************************
611 * Check match or recursion limit *
612 *************************************************/
613
614 static int
615 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616 int start_offset, int options, int *use_offsets, int use_size_offsets,
617 int flag, unsigned long int *limit, int errnumber, const char *msg)
618 {
619 int count;
620 int min = 0;
621 int mid = 64;
622 int max = -1;
623
624 extra->flags |= flag;
625
626 for (;;)
627 {
628 *limit = mid;
629
630 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631 use_offsets, use_size_offsets);
632
633 if (count == errnumber)
634 {
635 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636 min = mid;
637 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638 }
639
640 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641 count == PCRE_ERROR_PARTIAL)
642 {
643 if (mid == min + 1)
644 {
645 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646 break;
647 }
648 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649 max = mid;
650 mid = (min + mid)/2;
651 }
652 else break; /* Some other error */
653 }
654
655 extra->flags &= ~flag;
656 return count;
657 }
658
659
660
661 /*************************************************
662 * Check newline indicator *
663 *************************************************/
664
665 /* This is used both at compile and run-time to check for <xxx> escapes, where
666 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
667
668 Arguments:
669 p points after the leading '<'
670 f file for error message
671
672 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
673 */
674
675 static int
676 check_newline(uschar *p, FILE *f)
677 {
678 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
679 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
680 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
681 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
682 fprintf(f, "Unknown newline type at: <%s\n", p);
683 return 0;
684 }
685
686
687
688 /*************************************************
689 * Usage function *
690 *************************************************/
691
692 static void
693 usage(void)
694 {
695 printf("Usage: pcretest [options] [<input> [<output>]]\n");
696 printf(" -b show compiled code (bytecode)\n");
697 printf(" -C show PCRE compile-time options and exit\n");
698 printf(" -d debug: show compiled code and information (-b and -i)\n");
699 #if !defined NODFA
700 printf(" -dfa force DFA matching for all subjects\n");
701 #endif
702 printf(" -help show usage information\n");
703 printf(" -i show information about compiled patterns\n"
704 " -m output memory used information\n"
705 " -o <n> set size of offsets vector to <n>\n");
706 #if !defined NOPOSIX
707 printf(" -p use POSIX interface\n");
708 #endif
709 printf(" -q quiet: do not output PCRE version number at start\n");
710 printf(" -S <n> set stack size to <n> megabytes\n");
711 printf(" -s output store (memory) used information\n"
712 " -t time compilation and execution\n");
713 printf(" -t <n> time compilation and execution, repeating <n> times\n");
714 printf(" -tm time execution (matching) only\n");
715 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
716 }
717
718
719
720 /*************************************************
721 * Main Program *
722 *************************************************/
723
724 /* Read lines from named file or stdin and write to named file or stdout; lines
725 consist of a regular expression, in delimiters and optionally followed by
726 options, followed by a set of test data, terminated by an empty line. */
727
728 int main(int argc, char **argv)
729 {
730 FILE *infile = stdin;
731 int options = 0;
732 int study_options = 0;
733 int op = 1;
734 int timeit = 0;
735 int timeitm = 0;
736 int showinfo = 0;
737 int showstore = 0;
738 int quiet = 0;
739 int size_offsets = 45;
740 int size_offsets_max;
741 int *offsets = NULL;
742 #if !defined NOPOSIX
743 int posix = 0;
744 #endif
745 int debug = 0;
746 int done = 0;
747 int all_use_dfa = 0;
748 int yield = 0;
749 int stack_size;
750
751 /* These vectors store, end-to-end, a list of captured substring names. Assume
752 that 1024 is plenty long enough for the few names we'll be testing. */
753
754 uschar copynames[1024];
755 uschar getnames[1024];
756
757 uschar *copynamesptr;
758 uschar *getnamesptr;
759
760 /* Get buffers from malloc() so that Electric Fence will check their misuse
761 when I am debugging. They grow automatically when very long lines are read. */
762
763 buffer = (unsigned char *)malloc(buffer_size);
764 dbuffer = (unsigned char *)malloc(buffer_size);
765 pbuffer = (unsigned char *)malloc(buffer_size);
766
767 /* The outfile variable is static so that new_malloc can use it. */
768
769 outfile = stdout;
770
771 /* The following _setmode() stuff is some Windows magic that tells its runtime
772 library to translate CRLF into a single LF character. At least, that's what
773 I've been told: never having used Windows I take this all on trust. Originally
774 it set 0x8000, but then I was advised that _O_BINARY was better. */
775
776 #if defined(_WIN32) || defined(WIN32)
777 _setmode( _fileno( stdout ), _O_BINARY );
778 #endif
779
780 /* Scan options */
781
782 while (argc > 1 && argv[op][0] == '-')
783 {
784 unsigned char *endptr;
785
786 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
787 showstore = 1;
788 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
789 else if (strcmp(argv[op], "-b") == 0) debug = 1;
790 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
791 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
792 #if !defined NODFA
793 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
794 #endif
795 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
796 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
797 *endptr == 0))
798 {
799 op++;
800 argc--;
801 }
802 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
803 {
804 int both = argv[op][2] == 0;
805 int temp;
806 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
807 *endptr == 0))
808 {
809 timeitm = temp;
810 op++;
811 argc--;
812 }
813 else timeitm = LOOPREPEAT;
814 if (both) timeit = timeitm;
815 }
816 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
817 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
818 *endptr == 0))
819 {
820 #if defined(_WIN32) || defined(WIN32)
821 printf("PCRE: -S not supported on this OS\n");
822 exit(1);
823 #else
824 int rc;
825 struct rlimit rlim;
826 getrlimit(RLIMIT_STACK, &rlim);
827 rlim.rlim_cur = stack_size * 1024 * 1024;
828 rc = setrlimit(RLIMIT_STACK, &rlim);
829 if (rc != 0)
830 {
831 printf("PCRE: setrlimit() failed with error %d\n", rc);
832 exit(1);
833 }
834 op++;
835 argc--;
836 #endif
837 }
838 #if !defined NOPOSIX
839 else if (strcmp(argv[op], "-p") == 0) posix = 1;
840 #endif
841 else if (strcmp(argv[op], "-C") == 0)
842 {
843 int rc;
844 printf("PCRE version %s\n", pcre_version());
845 printf("Compiled with\n");
846 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
847 printf(" %sUTF-8 support\n", rc? "" : "No ");
848 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
849 printf(" %sUnicode properties support\n", rc? "" : "No ");
850 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
851 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
852 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
853 (rc == -1)? "ANY" : "???");
854 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
855 printf(" Internal link size = %d\n", rc);
856 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
857 printf(" POSIX malloc threshold = %d\n", rc);
858 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
859 printf(" Default match limit = %d\n", rc);
860 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
861 printf(" Default recursion depth limit = %d\n", rc);
862 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
863 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
864 goto EXIT;
865 }
866 else if (strcmp(argv[op], "-help") == 0 ||
867 strcmp(argv[op], "--help") == 0)
868 {
869 usage();
870 goto EXIT;
871 }
872 else
873 {
874 printf("** Unknown or malformed option %s\n", argv[op]);
875 usage();
876 yield = 1;
877 goto EXIT;
878 }
879 op++;
880 argc--;
881 }
882
883 /* Get the store for the offsets vector, and remember what it was */
884
885 size_offsets_max = size_offsets;
886 offsets = (int *)malloc(size_offsets_max * sizeof(int));
887 if (offsets == NULL)
888 {
889 printf("** Failed to get %d bytes of memory for offsets vector\n",
890 size_offsets_max * sizeof(int));
891 yield = 1;
892 goto EXIT;
893 }
894
895 /* Sort out the input and output files */
896
897 if (argc > 1)
898 {
899 infile = fopen(argv[op], INPUT_MODE);
900 if (infile == NULL)
901 {
902 printf("** Failed to open %s\n", argv[op]);
903 yield = 1;
904 goto EXIT;
905 }
906 }
907
908 if (argc > 2)
909 {
910 outfile = fopen(argv[op+1], OUTPUT_MODE);
911 if (outfile == NULL)
912 {
913 printf("** Failed to open %s\n", argv[op+1]);
914 yield = 1;
915 goto EXIT;
916 }
917 }
918
919 /* Set alternative malloc function */
920
921 pcre_malloc = new_malloc;
922 pcre_free = new_free;
923 pcre_stack_malloc = stack_malloc;
924 pcre_stack_free = stack_free;
925
926 /* Heading line unless quiet, then prompt for first regex if stdin */
927
928 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
929
930 /* Main loop */
931
932 while (!done)
933 {
934 pcre *re = NULL;
935 pcre_extra *extra = NULL;
936
937 #if !defined NOPOSIX /* There are still compilers that require no indent */
938 regex_t preg;
939 int do_posix = 0;
940 #endif
941
942 const char *error;
943 unsigned char *p, *pp, *ppp;
944 unsigned char *to_file = NULL;
945 const unsigned char *tables = NULL;
946 unsigned long int true_size, true_study_size = 0;
947 size_t size, regex_gotten_store;
948 int do_study = 0;
949 int do_debug = debug;
950 int debug_lengths = 1;
951 int do_G = 0;
952 int do_g = 0;
953 int do_showinfo = showinfo;
954 int do_showrest = 0;
955 int do_flip = 0;
956 int erroroffset, len, delimiter, poffset;
957
958 use_utf8 = 0;
959
960 if (infile == stdin) printf(" re> ");
961 if (extend_inputline(infile, buffer) == NULL) break;
962 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
963 fflush(outfile);
964
965 p = buffer;
966 while (isspace(*p)) p++;
967 if (*p == 0) continue;
968
969 /* See if the pattern is to be loaded pre-compiled from a file. */
970
971 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
972 {
973 unsigned long int magic, get_options;
974 uschar sbuf[8];
975 FILE *f;
976
977 p++;
978 pp = p + (int)strlen((char *)p);
979 while (isspace(pp[-1])) pp--;
980 *pp = 0;
981
982 f = fopen((char *)p, "rb");
983 if (f == NULL)
984 {
985 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
986 continue;
987 }
988
989 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
990
991 true_size =
992 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
993 true_study_size =
994 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
995
996 re = (real_pcre *)new_malloc(true_size);
997 regex_gotten_store = gotten_store;
998
999 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1000
1001 magic = ((real_pcre *)re)->magic_number;
1002 if (magic != MAGIC_NUMBER)
1003 {
1004 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1005 {
1006 do_flip = 1;
1007 }
1008 else
1009 {
1010 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1011 fclose(f);
1012 continue;
1013 }
1014 }
1015
1016 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1017 do_flip? " (byte-inverted)" : "", p);
1018
1019 /* Need to know if UTF-8 for printing data strings */
1020
1021 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1022 use_utf8 = (get_options & PCRE_UTF8) != 0;
1023
1024 /* Now see if there is any following study data */
1025
1026 if (true_study_size != 0)
1027 {
1028 pcre_study_data *psd;
1029
1030 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1031 extra->flags = PCRE_EXTRA_STUDY_DATA;
1032
1033 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1034 extra->study_data = psd;
1035
1036 if (fread(psd, 1, true_study_size, f) != true_study_size)
1037 {
1038 FAIL_READ:
1039 fprintf(outfile, "Failed to read data from %s\n", p);
1040 if (extra != NULL) new_free(extra);
1041 if (re != NULL) new_free(re);
1042 fclose(f);
1043 continue;
1044 }
1045 fprintf(outfile, "Study data loaded from %s\n", p);
1046 do_study = 1; /* To get the data output if requested */
1047 }
1048 else fprintf(outfile, "No study data\n");
1049
1050 fclose(f);
1051 goto SHOW_INFO;
1052 }
1053
1054 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1055 the pattern; if is isn't complete, read more. */
1056
1057 delimiter = *p++;
1058
1059 if (isalnum(delimiter) || delimiter == '\\')
1060 {
1061 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1062 goto SKIP_DATA;
1063 }
1064
1065 pp = p;
1066 poffset = p - buffer;
1067
1068 for(;;)
1069 {
1070 while (*pp != 0)
1071 {
1072 if (*pp == '\\' && pp[1] != 0) pp++;
1073 else if (*pp == delimiter) break;
1074 pp++;
1075 }
1076 if (*pp != 0) break;
1077 if (infile == stdin) printf(" > ");
1078 if ((pp = extend_inputline(infile, pp)) == NULL)
1079 {
1080 fprintf(outfile, "** Unexpected EOF\n");
1081 done = 1;
1082 goto CONTINUE;
1083 }
1084 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1085 }
1086
1087 /* The buffer may have moved while being extended; reset the start of data
1088 pointer to the correct relative point in the buffer. */
1089
1090 p = buffer + poffset;
1091
1092 /* If the first character after the delimiter is backslash, make
1093 the pattern end with backslash. This is purely to provide a way
1094 of testing for the error message when a pattern ends with backslash. */
1095
1096 if (pp[1] == '\\') *pp++ = '\\';
1097
1098 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1099 for callouts. */
1100
1101 *pp++ = 0;
1102 strcpy((char *)pbuffer, (char *)p);
1103
1104 /* Look for options after final delimiter */
1105
1106 options = 0;
1107 study_options = 0;
1108 log_store = showstore; /* default from command line */
1109
1110 while (*pp != 0)
1111 {
1112 switch (*pp++)
1113 {
1114 case 'f': options |= PCRE_FIRSTLINE; break;
1115 case 'g': do_g = 1; break;
1116 case 'i': options |= PCRE_CASELESS; break;
1117 case 'm': options |= PCRE_MULTILINE; break;
1118 case 's': options |= PCRE_DOTALL; break;
1119 case 'x': options |= PCRE_EXTENDED; break;
1120
1121 case '+': do_showrest = 1; break;
1122 case 'A': options |= PCRE_ANCHORED; break;
1123 case 'B': do_debug = 1; break;
1124 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1125 case 'D': do_debug = do_showinfo = 1; break;
1126 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1127 case 'F': do_flip = 1; break;
1128 case 'G': do_G = 1; break;
1129 case 'I': do_showinfo = 1; break;
1130 case 'J': options |= PCRE_DUPNAMES; break;
1131 case 'M': log_store = 1; break;
1132 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1133
1134 #if !defined NOPOSIX
1135 case 'P': do_posix = 1; break;
1136 #endif
1137
1138 case 'S': do_study = 1; break;
1139 case 'U': options |= PCRE_UNGREEDY; break;
1140 case 'X': options |= PCRE_EXTRA; break;
1141 case 'Z': debug_lengths = 0; break;
1142 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1143 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1144
1145 case 'L':
1146 ppp = pp;
1147 /* The '\r' test here is so that it works on Windows. */
1148 /* The '0' test is just in case this is an unterminated line. */
1149 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1150 *ppp = 0;
1151 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1152 {
1153 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1154 goto SKIP_DATA;
1155 }
1156 locale_set = 1;
1157 tables = pcre_maketables();
1158 pp = ppp;
1159 break;
1160
1161 case '>':
1162 to_file = pp;
1163 while (*pp != 0) pp++;
1164 while (isspace(pp[-1])) pp--;
1165 *pp = 0;
1166 break;
1167
1168 case '<':
1169 {
1170 int x = check_newline(pp, outfile);
1171 if (x == 0) goto SKIP_DATA;
1172 options |= x;
1173 while (*pp++ != '>');
1174 }
1175 break;
1176
1177 case '\r': /* So that it works in Windows */
1178 case '\n':
1179 case ' ':
1180 break;
1181
1182 default:
1183 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1184 goto SKIP_DATA;
1185 }
1186 }
1187
1188 /* Handle compiling via the POSIX interface, which doesn't support the
1189 timing, showing, or debugging options, nor the ability to pass over
1190 local character tables. */
1191
1192 #if !defined NOPOSIX
1193 if (posix || do_posix)
1194 {
1195 int rc;
1196 int cflags = 0;
1197
1198 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1199 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1200 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1201 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1202 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1203
1204 rc = regcomp(&preg, (char *)p, cflags);
1205
1206 /* Compilation failed; go back for another re, skipping to blank line
1207 if non-interactive. */
1208
1209 if (rc != 0)
1210 {
1211 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1212 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1213 goto SKIP_DATA;
1214 }
1215 }
1216
1217 /* Handle compiling via the native interface */
1218
1219 else
1220 #endif /* !defined NOPOSIX */
1221
1222 {
1223 if (timeit > 0)
1224 {
1225 register int i;
1226 clock_t time_taken;
1227 clock_t start_time = clock();
1228 for (i = 0; i < timeit; i++)
1229 {
1230 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1231 if (re != NULL) free(re);
1232 }
1233 time_taken = clock() - start_time;
1234 fprintf(outfile, "Compile time %.4f milliseconds\n",
1235 (((double)time_taken * 1000.0) / (double)timeit) /
1236 (double)CLOCKS_PER_SEC);
1237 }
1238
1239 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1240
1241 /* Compilation failed; go back for another re, skipping to blank line
1242 if non-interactive. */
1243
1244 if (re == NULL)
1245 {
1246 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1247 SKIP_DATA:
1248 if (infile != stdin)
1249 {
1250 for (;;)
1251 {
1252 if (extend_inputline(infile, buffer) == NULL)
1253 {
1254 done = 1;
1255 goto CONTINUE;
1256 }
1257 len = (int)strlen((char *)buffer);
1258 while (len > 0 && isspace(buffer[len-1])) len--;
1259 if (len == 0) break;
1260 }
1261 fprintf(outfile, "\n");
1262 }
1263 goto CONTINUE;
1264 }
1265
1266 /* Compilation succeeded; print data if required. There are now two
1267 info-returning functions. The old one has a limited interface and
1268 returns only limited data. Check that it agrees with the newer one. */
1269
1270 if (log_store)
1271 fprintf(outfile, "Memory allocation (code space): %d\n",
1272 (int)(gotten_store -
1273 sizeof(real_pcre) -
1274 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1275
1276 /* Extract the size for possible writing before possibly flipping it,
1277 and remember the store that was got. */
1278
1279 true_size = ((real_pcre *)re)->size;
1280 regex_gotten_store = gotten_store;
1281
1282 /* If /S was present, study the regexp to generate additional info to
1283 help with the matching. */
1284
1285 if (do_study)
1286 {
1287 if (timeit > 0)
1288 {
1289 register int i;
1290 clock_t time_taken;
1291 clock_t start_time = clock();
1292 for (i = 0; i < timeit; i++)
1293 extra = pcre_study(re, study_options, &error);
1294 time_taken = clock() - start_time;
1295 if (extra != NULL) free(extra);
1296 fprintf(outfile, " Study time %.4f milliseconds\n",
1297 (((double)time_taken * 1000.0) / (double)timeit) /
1298 (double)CLOCKS_PER_SEC);
1299 }
1300 extra = pcre_study(re, study_options, &error);
1301 if (error != NULL)
1302 fprintf(outfile, "Failed to study: %s\n", error);
1303 else if (extra != NULL)
1304 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1305 }
1306
1307 /* If the 'F' option was present, we flip the bytes of all the integer
1308 fields in the regex data block and the study block. This is to make it
1309 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1310 compiled on a different architecture. */
1311
1312 if (do_flip)
1313 {
1314 real_pcre *rre = (real_pcre *)re;
1315 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1316 rre->size = byteflip(rre->size, sizeof(rre->size));
1317 rre->options = byteflip(rre->options, sizeof(rre->options));
1318 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1319 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1320 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1321 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1322 rre->name_table_offset = byteflip(rre->name_table_offset,
1323 sizeof(rre->name_table_offset));
1324 rre->name_entry_size = byteflip(rre->name_entry_size,
1325 sizeof(rre->name_entry_size));
1326 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1327
1328 if (extra != NULL)
1329 {
1330 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1331 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1332 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1333 }
1334 }
1335
1336 /* Extract information from the compiled data if required */
1337
1338 SHOW_INFO:
1339
1340 if (do_debug)
1341 {
1342 fprintf(outfile, "------------------------------------------------------------------\n");
1343 pcre_printint(re, outfile, debug_lengths);
1344 }
1345
1346 if (do_showinfo)
1347 {
1348 unsigned long int get_options, all_options;
1349 #if !defined NOINFOCHECK
1350 int old_first_char, old_options, old_count;
1351 #endif
1352 int count, backrefmax, first_char, need_char;
1353 int nameentrysize, namecount;
1354 const uschar *nametable;
1355
1356 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1357 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1358 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1359 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1360 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1361 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1362 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1363 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1364 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1365
1366 #if !defined NOINFOCHECK
1367 old_count = pcre_info(re, &old_options, &old_first_char);
1368 if (count < 0) fprintf(outfile,
1369 "Error %d from pcre_info()\n", count);
1370 else
1371 {
1372 if (old_count != count) fprintf(outfile,
1373 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1374 old_count);
1375
1376 if (old_first_char != first_char) fprintf(outfile,
1377 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1378 first_char, old_first_char);
1379
1380 if (old_options != (int)get_options) fprintf(outfile,
1381 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1382 get_options, old_options);
1383 }
1384 #endif
1385
1386 if (size != regex_gotten_store) fprintf(outfile,
1387 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1388 (int)size, (int)regex_gotten_store);
1389
1390 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1391 if (backrefmax > 0)
1392 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1393
1394 if (namecount > 0)
1395 {
1396 fprintf(outfile, "Named capturing subpatterns:\n");
1397 while (namecount-- > 0)
1398 {
1399 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1400 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1401 GET2(nametable, 0));
1402 nametable += nameentrysize;
1403 }
1404 }
1405
1406 /* The NOPARTIAL bit is a private bit in the options, so we have
1407 to fish it out via out back door */
1408
1409 all_options = ((real_pcre *)re)->options;
1410 if (do_flip)
1411 {
1412 all_options = byteflip(all_options, sizeof(all_options));
1413 }
1414
1415 if ((all_options & PCRE_NOPARTIAL) != 0)
1416 fprintf(outfile, "Partial matching not supported\n");
1417
1418 if (get_options == 0) fprintf(outfile, "No options\n");
1419 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1420 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1421 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1422 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1423 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1424 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1425 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1426 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1427 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1428 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1429 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1430 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1431 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1432 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1433
1434 switch (get_options & PCRE_NEWLINE_BITS)
1435 {
1436 case PCRE_NEWLINE_CR:
1437 fprintf(outfile, "Forced newline sequence: CR\n");
1438 break;
1439
1440 case PCRE_NEWLINE_LF:
1441 fprintf(outfile, "Forced newline sequence: LF\n");
1442 break;
1443
1444 case PCRE_NEWLINE_CRLF:
1445 fprintf(outfile, "Forced newline sequence: CRLF\n");
1446 break;
1447
1448 case PCRE_NEWLINE_ANY:
1449 fprintf(outfile, "Forced newline sequence: ANY\n");
1450 break;
1451
1452 default:
1453 break;
1454 }
1455
1456 if (first_char == -1)
1457 {
1458 fprintf(outfile, "First char at start or follows newline\n");
1459 }
1460 else if (first_char < 0)
1461 {
1462 fprintf(outfile, "No first char\n");
1463 }
1464 else
1465 {
1466 int ch = first_char & 255;
1467 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1468 "" : " (caseless)";
1469 if (PRINTHEX(ch))
1470 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1471 else
1472 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1473 }
1474
1475 if (need_char < 0)
1476 {
1477 fprintf(outfile, "No need char\n");
1478 }
1479 else
1480 {
1481 int ch = need_char & 255;
1482 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1483 "" : " (caseless)";
1484 if (PRINTHEX(ch))
1485 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1486 else
1487 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1488 }
1489
1490 /* Don't output study size; at present it is in any case a fixed
1491 value, but it varies, depending on the computer architecture, and
1492 so messes up the test suite. (And with the /F option, it might be
1493 flipped.) */
1494
1495 if (do_study)
1496 {
1497 if (extra == NULL)
1498 fprintf(outfile, "Study returned NULL\n");
1499 else
1500 {
1501 uschar *start_bits = NULL;
1502 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1503
1504 if (start_bits == NULL)
1505 fprintf(outfile, "No starting byte set\n");
1506 else
1507 {
1508 int i;
1509 int c = 24;
1510 fprintf(outfile, "Starting byte set: ");
1511 for (i = 0; i < 256; i++)
1512 {
1513 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1514 {
1515 if (c > 75)
1516 {
1517 fprintf(outfile, "\n ");
1518 c = 2;
1519 }
1520 if (PRINTHEX(i) && i != ' ')
1521 {
1522 fprintf(outfile, "%c ", i);
1523 c += 2;
1524 }
1525 else
1526 {
1527 fprintf(outfile, "\\x%02x ", i);
1528 c += 5;
1529 }
1530 }
1531 }
1532 fprintf(outfile, "\n");
1533 }
1534 }
1535 }
1536 }
1537
1538 /* If the '>' option was present, we write out the regex to a file, and
1539 that is all. The first 8 bytes of the file are the regex length and then
1540 the study length, in big-endian order. */
1541
1542 if (to_file != NULL)
1543 {
1544 FILE *f = fopen((char *)to_file, "wb");
1545 if (f == NULL)
1546 {
1547 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1548 }
1549 else
1550 {
1551 uschar sbuf[8];
1552 sbuf[0] = (true_size >> 24) & 255;
1553 sbuf[1] = (true_size >> 16) & 255;
1554 sbuf[2] = (true_size >> 8) & 255;
1555 sbuf[3] = (true_size) & 255;
1556
1557 sbuf[4] = (true_study_size >> 24) & 255;
1558 sbuf[5] = (true_study_size >> 16) & 255;
1559 sbuf[6] = (true_study_size >> 8) & 255;
1560 sbuf[7] = (true_study_size) & 255;
1561
1562 if (fwrite(sbuf, 1, 8, f) < 8 ||
1563 fwrite(re, 1, true_size, f) < true_size)
1564 {
1565 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1566 }
1567 else
1568 {
1569 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1570 if (extra != NULL)
1571 {
1572 if (fwrite(extra->study_data, 1, true_study_size, f) <
1573 true_study_size)
1574 {
1575 fprintf(outfile, "Write error on %s: %s\n", to_file,
1576 strerror(errno));
1577 }
1578 else fprintf(outfile, "Study data written to %s\n", to_file);
1579
1580 }
1581 }
1582 fclose(f);
1583 }
1584
1585 new_free(re);
1586 if (extra != NULL) new_free(extra);
1587 if (tables != NULL) new_free((void *)tables);
1588 continue; /* With next regex */
1589 }
1590 } /* End of non-POSIX compile */
1591
1592 /* Read data lines and test them */
1593
1594 for (;;)
1595 {
1596 uschar *q;
1597 uschar *bptr = dbuffer;
1598 int *use_offsets = offsets;
1599 int use_size_offsets = size_offsets;
1600 int callout_data = 0;
1601 int callout_data_set = 0;
1602 int count, c;
1603 int copystrings = 0;
1604 int find_match_limit = 0;
1605 int getstrings = 0;
1606 int getlist = 0;
1607 int gmatched = 0;
1608 int start_offset = 0;
1609 int g_notempty = 0;
1610 int use_dfa = 0;
1611
1612 options = 0;
1613
1614 *copynames = 0;
1615 *getnames = 0;
1616
1617 copynamesptr = copynames;
1618 getnamesptr = getnames;
1619
1620 pcre_callout = callout;
1621 first_callout = 1;
1622 callout_extra = 0;
1623 callout_count = 0;
1624 callout_fail_count = 999999;
1625 callout_fail_id = -1;
1626 show_malloc = 0;
1627
1628 if (extra != NULL) extra->flags &=
1629 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1630
1631 len = 0;
1632 for (;;)
1633 {
1634 if (infile == stdin) printf("data> ");
1635 if (extend_inputline(infile, buffer + len) == NULL)
1636 {
1637 if (len > 0) break;
1638 done = 1;
1639 goto CONTINUE;
1640 }
1641 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1642 len = (int)strlen((char *)buffer);
1643 if (buffer[len-1] == '\n') break;
1644 }
1645
1646 while (len > 0 && isspace(buffer[len-1])) len--;
1647 buffer[len] = 0;
1648 if (len == 0) break;
1649
1650 p = buffer;
1651 while (isspace(*p)) p++;
1652
1653 q = dbuffer;
1654 while ((c = *p++) != 0)
1655 {
1656 int i = 0;
1657 int n = 0;
1658
1659 if (c == '\\') switch ((c = *p++))
1660 {
1661 case 'a': c = 7; break;
1662 case 'b': c = '\b'; break;
1663 case 'e': c = 27; break;
1664 case 'f': c = '\f'; break;
1665 case 'n': c = '\n'; break;
1666 case 'r': c = '\r'; break;
1667 case 't': c = '\t'; break;
1668 case 'v': c = '\v'; break;
1669
1670 case '0': case '1': case '2': case '3':
1671 case '4': case '5': case '6': case '7':
1672 c -= '0';
1673 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1674 c = c * 8 + *p++ - '0';
1675
1676 #if !defined NOUTF8
1677 if (use_utf8 && c > 255)
1678 {
1679 unsigned char buff8[8];
1680 int ii, utn;
1681 utn = ord2utf8(c, buff8);
1682 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1683 c = buff8[ii]; /* Last byte */
1684 }
1685 #endif
1686 break;
1687
1688 case 'x':
1689
1690 /* Handle \x{..} specially - new Perl thing for utf8 */
1691
1692 #if !defined NOUTF8
1693 if (*p == '{')
1694 {
1695 unsigned char *pt = p;
1696 c = 0;
1697 while (isxdigit(*(++pt)))
1698 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1699 if (*pt == '}')
1700 {
1701 unsigned char buff8[8];
1702 int ii, utn;
1703 utn = ord2utf8(c, buff8);
1704 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1705 c = buff8[ii]; /* Last byte */
1706 p = pt + 1;
1707 break;
1708 }
1709 /* Not correct form; fall through */
1710 }
1711 #endif
1712
1713 /* Ordinary \x */
1714
1715 c = 0;
1716 while (i++ < 2 && isxdigit(*p))
1717 {
1718 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1719 p++;
1720 }
1721 break;
1722
1723 case 0: /* \ followed by EOF allows for an empty line */
1724 p--;
1725 continue;
1726
1727 case '>':
1728 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1729 continue;
1730
1731 case 'A': /* Option setting */
1732 options |= PCRE_ANCHORED;
1733 continue;
1734
1735 case 'B':
1736 options |= PCRE_NOTBOL;
1737 continue;
1738
1739 case 'C':
1740 if (isdigit(*p)) /* Set copy string */
1741 {
1742 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1743 copystrings |= 1 << n;
1744 }
1745 else if (isalnum(*p))
1746 {
1747 uschar *npp = copynamesptr;
1748 while (isalnum(*p)) *npp++ = *p++;
1749 *npp++ = 0;
1750 *npp = 0;
1751 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1752 if (n < 0)
1753 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1754 copynamesptr = npp;
1755 }
1756 else if (*p == '+')
1757 {
1758 callout_extra = 1;
1759 p++;
1760 }
1761 else if (*p == '-')
1762 {
1763 pcre_callout = NULL;
1764 p++;
1765 }
1766 else if (*p == '!')
1767 {
1768 callout_fail_id = 0;
1769 p++;
1770 while(isdigit(*p))
1771 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1772 callout_fail_count = 0;
1773 if (*p == '!')
1774 {
1775 p++;
1776 while(isdigit(*p))
1777 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1778 }
1779 }
1780 else if (*p == '*')
1781 {
1782 int sign = 1;
1783 callout_data = 0;
1784 if (*(++p) == '-') { sign = -1; p++; }
1785 while(isdigit(*p))
1786 callout_data = callout_data * 10 + *p++ - '0';
1787 callout_data *= sign;
1788 callout_data_set = 1;
1789 }
1790 continue;
1791
1792 #if !defined NODFA
1793 case 'D':
1794 #if !defined NOPOSIX
1795 if (posix || do_posix)
1796 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1797 else
1798 #endif
1799 use_dfa = 1;
1800 continue;
1801
1802 case 'F':
1803 options |= PCRE_DFA_SHORTEST;
1804 continue;
1805 #endif
1806
1807 case 'G':
1808 if (isdigit(*p))
1809 {
1810 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1811 getstrings |= 1 << n;
1812 }
1813 else if (isalnum(*p))
1814 {
1815 uschar *npp = getnamesptr;
1816 while (isalnum(*p)) *npp++ = *p++;
1817 *npp++ = 0;
1818 *npp = 0;
1819 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1820 if (n < 0)
1821 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1822 getnamesptr = npp;
1823 }
1824 continue;
1825
1826 case 'L':
1827 getlist = 1;
1828 continue;
1829
1830 case 'M':
1831 find_match_limit = 1;
1832 continue;
1833
1834 case 'N':
1835 options |= PCRE_NOTEMPTY;
1836 continue;
1837
1838 case 'O':
1839 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1840 if (n > size_offsets_max)
1841 {
1842 size_offsets_max = n;
1843 free(offsets);
1844 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1845 if (offsets == NULL)
1846 {
1847 printf("** Failed to get %d bytes of memory for offsets vector\n",
1848 size_offsets_max * sizeof(int));
1849 yield = 1;
1850 goto EXIT;
1851 }
1852 }
1853 use_size_offsets = n;
1854 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1855 continue;
1856
1857 case 'P':
1858 options |= PCRE_PARTIAL;
1859 continue;
1860
1861 case 'Q':
1862 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863 if (extra == NULL)
1864 {
1865 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1866 extra->flags = 0;
1867 }
1868 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1869 extra->match_limit_recursion = n;
1870 continue;
1871
1872 case 'q':
1873 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1874 if (extra == NULL)
1875 {
1876 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1877 extra->flags = 0;
1878 }
1879 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1880 extra->match_limit = n;
1881 continue;
1882
1883 #if !defined NODFA
1884 case 'R':
1885 options |= PCRE_DFA_RESTART;
1886 continue;
1887 #endif
1888
1889 case 'S':
1890 show_malloc = 1;
1891 continue;
1892
1893 case 'Z':
1894 options |= PCRE_NOTEOL;
1895 continue;
1896
1897 case '?':
1898 options |= PCRE_NO_UTF8_CHECK;
1899 continue;
1900
1901 case '<':
1902 {
1903 int x = check_newline(p, outfile);
1904 if (x == 0) goto NEXT_DATA;
1905 options |= x;
1906 while (*p++ != '>');
1907 }
1908 continue;
1909 }
1910 *q++ = c;
1911 }
1912 *q = 0;
1913 len = q - dbuffer;
1914
1915 if ((all_use_dfa || use_dfa) && find_match_limit)
1916 {
1917 printf("**Match limit not relevant for DFA matching: ignored\n");
1918 find_match_limit = 0;
1919 }
1920
1921 /* Handle matching via the POSIX interface, which does not
1922 support timing or playing with the match limit or callout data. */
1923
1924 #if !defined NOPOSIX
1925 if (posix || do_posix)
1926 {
1927 int rc;
1928 int eflags = 0;
1929 regmatch_t *pmatch = NULL;
1930 if (use_size_offsets > 0)
1931 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1932 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1933 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1934
1935 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1936
1937 if (rc != 0)
1938 {
1939 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1940 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1941 }
1942 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1943 != 0)
1944 {
1945 fprintf(outfile, "Matched with REG_NOSUB\n");
1946 }
1947 else
1948 {
1949 size_t i;
1950 for (i = 0; i < (size_t)use_size_offsets; i++)
1951 {
1952 if (pmatch[i].rm_so >= 0)
1953 {
1954 fprintf(outfile, "%2d: ", (int)i);
1955 (void)pchars(dbuffer + pmatch[i].rm_so,
1956 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1957 fprintf(outfile, "\n");
1958 if (i == 0 && do_showrest)
1959 {
1960 fprintf(outfile, " 0+ ");
1961 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1962 outfile);
1963 fprintf(outfile, "\n");
1964 }
1965 }
1966 }
1967 }
1968 free(pmatch);
1969 }
1970
1971 /* Handle matching via the native interface - repeats for /g and /G */
1972
1973 else
1974 #endif /* !defined NOPOSIX */
1975
1976 for (;; gmatched++) /* Loop for /g or /G */
1977 {
1978 if (timeitm > 0)
1979 {
1980 register int i;
1981 clock_t time_taken;
1982 clock_t start_time = clock();
1983
1984 #if !defined NODFA
1985 if (all_use_dfa || use_dfa)
1986 {
1987 int workspace[1000];
1988 for (i = 0; i < timeitm; i++)
1989 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1990 options | g_notempty, use_offsets, use_size_offsets, workspace,
1991 sizeof(workspace)/sizeof(int));
1992 }
1993 else
1994 #endif
1995
1996 for (i = 0; i < timeitm; i++)
1997 count = pcre_exec(re, extra, (char *)bptr, len,
1998 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1999
2000 time_taken = clock() - start_time;
2001 fprintf(outfile, "Execute time %.4f milliseconds\n",
2002 (((double)time_taken * 1000.0) / (double)timeitm) /
2003 (double)CLOCKS_PER_SEC);
2004 }
2005
2006 /* If find_match_limit is set, we want to do repeated matches with
2007 varying limits in order to find the minimum value for the match limit and
2008 for the recursion limit. */
2009
2010 if (find_match_limit)
2011 {
2012 if (extra == NULL)
2013 {
2014 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2015 extra->flags = 0;
2016 }
2017
2018 (void)check_match_limit(re, extra, bptr, len, start_offset,
2019 options|g_notempty, use_offsets, use_size_offsets,
2020 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2021 PCRE_ERROR_MATCHLIMIT, "match()");
2022
2023 count = check_match_limit(re, extra, bptr, len, start_offset,
2024 options|g_notempty, use_offsets, use_size_offsets,
2025 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2026 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2027 }
2028
2029 /* If callout_data is set, use the interface with additional data */
2030
2031 else if (callout_data_set)
2032 {
2033 if (extra == NULL)
2034 {
2035 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2036 extra->flags = 0;
2037 }
2038 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2039 extra->callout_data = &callout_data;
2040 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2041 options | g_notempty, use_offsets, use_size_offsets);
2042 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2043 }
2044
2045 /* The normal case is just to do the match once, with the default
2046 value of match_limit. */
2047
2048 #if !defined NODFA
2049 else if (all_use_dfa || use_dfa)
2050 {
2051 int workspace[1000];
2052 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2053 options | g_notempty, use_offsets, use_size_offsets, workspace,
2054 sizeof(workspace)/sizeof(int));
2055 if (count == 0)
2056 {
2057 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2058 count = use_size_offsets/2;
2059 }
2060 }
2061 #endif
2062
2063 else
2064 {
2065 count = pcre_exec(re, extra, (char *)bptr, len,
2066 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2067 if (count == 0)
2068 {
2069 fprintf(outfile, "Matched, but too many substrings\n");
2070 count = use_size_offsets/3;
2071 }
2072 }
2073
2074 /* Matched */
2075
2076 if (count >= 0)
2077 {
2078 int i, maxcount;
2079
2080 #if !defined NODFA
2081 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2082 #endif
2083 maxcount = use_size_offsets/3;
2084
2085 /* This is a check against a lunatic return value. */
2086
2087 if (count > maxcount)
2088 {
2089 fprintf(outfile,
2090 "** PCRE error: returned count %d is too big for offset size %d\n",
2091 count, use_size_offsets);
2092 count = use_size_offsets/3;
2093 if (do_g || do_G)
2094 {
2095 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2096 do_g = do_G = FALSE; /* Break g/G loop */
2097 }
2098 }
2099
2100 for (i = 0; i < count * 2; i += 2)
2101 {
2102 if (use_offsets[i] < 0)
2103 fprintf(outfile, "%2d: <unset>\n", i/2);
2104 else
2105 {
2106 fprintf(outfile, "%2d: ", i/2);
2107 (void)pchars(bptr + use_offsets[i],
2108 use_offsets[i+1] - use_offsets[i], outfile);
2109 fprintf(outfile, "\n");
2110 if (i == 0)
2111 {
2112 if (do_showrest)
2113 {
2114 fprintf(outfile, " 0+ ");
2115 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2116 outfile);
2117 fprintf(outfile, "\n");
2118 }
2119 }
2120 }
2121 }
2122
2123 for (i = 0; i < 32; i++)
2124 {
2125 if ((copystrings & (1 << i)) != 0)
2126 {
2127 char copybuffer[256];
2128 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2129 i, copybuffer, sizeof(copybuffer));
2130 if (rc < 0)
2131 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2132 else
2133 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2134 }
2135 }
2136
2137 for (copynamesptr = copynames;
2138 *copynamesptr != 0;
2139 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2140 {
2141 char copybuffer[256];
2142 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2143 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2144 if (rc < 0)
2145 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2146 else
2147 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2148 }
2149
2150 for (i = 0; i < 32; i++)
2151 {
2152 if ((getstrings & (1 << i)) != 0)
2153 {
2154 const char *substring;
2155 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2156 i, &substring);
2157 if (rc < 0)
2158 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2159 else
2160 {
2161 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2162 pcre_free_substring(substring);
2163 }
2164 }
2165 }
2166
2167 for (getnamesptr = getnames;
2168 *getnamesptr != 0;
2169 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2170 {
2171 const char *substring;
2172 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2173 count, (char *)getnamesptr, &substring);
2174 if (rc < 0)
2175 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2176 else
2177 {
2178 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2179 pcre_free_substring(substring);
2180 }
2181 }
2182
2183 if (getlist)
2184 {
2185 const char **stringlist;
2186 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2187 &stringlist);
2188 if (rc < 0)
2189 fprintf(outfile, "get substring list failed %d\n", rc);
2190 else
2191 {
2192 for (i = 0; i < count; i++)
2193 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2194 if (stringlist[i] != NULL)
2195 fprintf(outfile, "string list not terminated by NULL\n");
2196 /* free((void *)stringlist); */
2197 pcre_free_substring_list(stringlist);
2198 }
2199 }
2200 }
2201
2202 /* There was a partial match */
2203
2204 else if (count == PCRE_ERROR_PARTIAL)
2205 {
2206 fprintf(outfile, "Partial match");
2207 #if !defined NODFA
2208 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2209 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2210 bptr + use_offsets[0]);
2211 #endif
2212 fprintf(outfile, "\n");
2213 break; /* Out of the /g loop */
2214 }
2215
2216 /* Failed to match. If this is a /g or /G loop and we previously set
2217 g_notempty after a null match, this is not necessarily the end. We want
2218 to advance the start offset, and continue. We won't be at the end of the
2219 string - that was checked before setting g_notempty.
2220
2221 Complication arises in the case when the newline option is "any".
2222 If the previous match was at the end of a line terminated by CRLF, an
2223 advance of one character just passes the \r, whereas we should prefer the
2224 longer newline sequence, as does the code in pcre_exec(). Fudge the
2225 offset value to achieve this.
2226
2227 Otherwise, in the case of UTF-8 matching, the advance must be one
2228 character, not one byte. */
2229
2230 else
2231 {
2232 if (g_notempty != 0)
2233 {
2234 int onechar = 1;
2235 unsigned int obits = ((real_pcre *)re)->options;
2236 use_offsets[0] = start_offset;
2237 if ((obits & PCRE_NEWLINE_BITS) == 0)
2238 {
2239 int d;
2240 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2241 obits = (d == '\r')? PCRE_NEWLINE_CR :
2242 (d == '\n')? PCRE_NEWLINE_LF :
2243 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2244 (d == -1)? PCRE_NEWLINE_ANY : 0;
2245 }
2246 if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2247 start_offset < len - 1 &&
2248 bptr[start_offset] == '\r' &&
2249 bptr[start_offset+1] == '\n')
2250 onechar++;
2251 else if (use_utf8)
2252 {
2253 while (start_offset + onechar < len)
2254 {
2255 int tb = bptr[start_offset+onechar];
2256 if (tb <= 127) break;
2257 tb &= 0xc0;
2258 if (tb != 0 && tb != 0xc0) onechar++;
2259 }
2260 }
2261 use_offsets[1] = start_offset + onechar;
2262 }
2263 else
2264 {
2265 if (count == PCRE_ERROR_NOMATCH)
2266 {
2267 if (gmatched == 0) fprintf(outfile, "No match\n");
2268 }
2269 else fprintf(outfile, "Error %d\n", count);
2270 break; /* Out of the /g loop */
2271 }
2272 }
2273
2274 /* If not /g or /G we are done */
2275
2276 if (!do_g && !do_G) break;
2277
2278 /* If we have matched an empty string, first check to see if we are at
2279 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2280 what Perl's /g options does. This turns out to be rather cunning. First
2281 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2282 same point. If this fails (picked up above) we advance to the next
2283 character. */
2284
2285 g_notempty = 0;
2286
2287 if (use_offsets[0] == use_offsets[1])
2288 {
2289 if (use_offsets[0] == len) break;
2290 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2291 }
2292
2293 /* For /g, update the start offset, leaving the rest alone */
2294
2295 if (do_g) start_offset = use_offsets[1];
2296
2297 /* For /G, update the pointer and length */
2298
2299 else
2300 {
2301 bptr += use_offsets[1];
2302 len -= use_offsets[1];
2303 }
2304 } /* End of loop for /g and /G */
2305
2306 NEXT_DATA: continue;
2307 } /* End of loop for data lines */
2308
2309 CONTINUE:
2310
2311 #if !defined NOPOSIX
2312 if (posix || do_posix) regfree(&preg);
2313 #endif
2314
2315 if (re != NULL) new_free(re);
2316 if (extra != NULL) new_free(extra);
2317 if (tables != NULL)
2318 {
2319 new_free((void *)tables);
2320 setlocale(LC_CTYPE, "C");
2321 locale_set = 0;
2322 }
2323 }
2324
2325 if (infile == stdin) fprintf(outfile, "\n");
2326
2327 EXIT:
2328
2329 if (infile != NULL && infile != stdin) fclose(infile);
2330 if (outfile != NULL && outfile != stdout) fclose(outfile);
2331
2332 free(buffer);
2333 free(dbuffer);
2334 free(pbuffer);
2335 free(offsets);
2336
2337 return yield;
2338 }
2339
2340 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12