/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 226 - (show annotations) (download)
Tue Aug 21 11:46:08 2007 UTC (6 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 68848 byte(s)
Don't advance by 2 if explicit \r or \n in the pattern. Add 
PCRE_INFO_HASCRORLF.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_OP_lengths OP_lengths
98
99 #include "pcre_tables.c"
100
101 /* We also need the pcre_printint() function for printing out compiled
102 patterns. This function is in a separate file so that it can be included in
103 pcre_compile.c when that module is compiled with debugging enabled.
104
105 The definition of the macro PRINTABLE, which determines whether to print an
106 output character as-is or as a hex value when showing compiled patterns, is
107 contained in this file. We uses it here also, in cases when the locale has not
108 been explicitly changed, so as to get consistent output from systems that
109 differ in their output from isprint() even in the "C" locale. */
110
111 #include "pcre_printint.src"
112
113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114
115
116 /* It is possible to compile this test program without including support for
117 testing the POSIX interface, though this is not available via the standard
118 Makefile. */
119
120 #if !defined NOPOSIX
121 #include "pcreposix.h"
122 #endif
123
124 /* It is also possible, for the benefit of the version currently imported into
125 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126 interface to the DFA matcher (NODFA), and without the doublecheck of the old
127 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128 UTF8 support if PCRE is built without it. */
129
130 #ifndef SUPPORT_UTF8
131 #ifndef NOUTF8
132 #define NOUTF8
133 #endif
134 #endif
135
136
137 /* Other parameters */
138
139 #ifndef CLOCKS_PER_SEC
140 #ifdef CLK_TCK
141 #define CLOCKS_PER_SEC CLK_TCK
142 #else
143 #define CLOCKS_PER_SEC 100
144 #endif
145 #endif
146
147 /* This is the default loop count for timing. */
148
149 #define LOOPREPEAT 500000
150
151 /* Static variables */
152
153 static FILE *outfile;
154 static int log_store = 0;
155 static int callout_count;
156 static int callout_extra;
157 static int callout_fail_count;
158 static int callout_fail_id;
159 static int debug_lengths;
160 static int first_callout;
161 static int locale_set = 0;
162 static int show_malloc;
163 static int use_utf8;
164 static size_t gotten_store;
165
166 /* The buffers grow automatically if very long input lines are encountered. */
167
168 static int buffer_size = 50000;
169 static uschar *buffer = NULL;
170 static uschar *dbuffer = NULL;
171 static uschar *pbuffer = NULL;
172
173
174
175 /*************************************************
176 * Read or extend an input line *
177 *************************************************/
178
179 /* Input lines are read into buffer, but both patterns and data lines can be
180 continued over multiple input lines. In addition, if the buffer fills up, we
181 want to automatically expand it so as to be able to handle extremely large
182 lines that are needed for certain stress tests. When the input buffer is
183 expanded, the other two buffers must also be expanded likewise, and the
184 contents of pbuffer, which are a copy of the input for callouts, must be
185 preserved (for when expansion happens for a data line). This is not the most
186 optimal way of handling this, but hey, this is just a test program!
187
188 Arguments:
189 f the file to read
190 start where in buffer to start (this *must* be within buffer)
191
192 Returns: pointer to the start of new data
193 could be a copy of start, or could be moved
194 NULL if no data read and EOF reached
195 */
196
197 static uschar *
198 extend_inputline(FILE *f, uschar *start)
199 {
200 uschar *here = start;
201
202 for (;;)
203 {
204 int rlen = buffer_size - (here - buffer);
205
206 if (rlen > 1000)
207 {
208 int dlen;
209 if (fgets((char *)here, rlen, f) == NULL)
210 return (here == start)? NULL : start;
211 dlen = (int)strlen((char *)here);
212 if (dlen > 0 && here[dlen - 1] == '\n') return start;
213 here += dlen;
214 }
215
216 else
217 {
218 int new_buffer_size = 2*buffer_size;
219 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222
223 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224 {
225 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226 exit(1);
227 }
228
229 memcpy(new_buffer, buffer, buffer_size);
230 memcpy(new_pbuffer, pbuffer, buffer_size);
231
232 buffer_size = new_buffer_size;
233
234 start = new_buffer + (start - buffer);
235 here = new_buffer + (here - buffer);
236
237 free(buffer);
238 free(dbuffer);
239 free(pbuffer);
240
241 buffer = new_buffer;
242 dbuffer = new_dbuffer;
243 pbuffer = new_pbuffer;
244 }
245 }
246
247 return NULL; /* Control never gets here */
248 }
249
250
251
252
253
254
255
256 /*************************************************
257 * Read number from string *
258 *************************************************/
259
260 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261 around with conditional compilation, just do the job by hand. It is only used
262 for unpicking arguments, so just keep it simple.
263
264 Arguments:
265 str string to be converted
266 endptr where to put the end pointer
267
268 Returns: the unsigned long
269 */
270
271 static int
272 get_value(unsigned char *str, unsigned char **endptr)
273 {
274 int result = 0;
275 while(*str != 0 && isspace(*str)) str++;
276 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277 *endptr = str;
278 return(result);
279 }
280
281
282
283
284 /*************************************************
285 * Convert UTF-8 string to value *
286 *************************************************/
287
288 /* This function takes one or more bytes that represents a UTF-8 character,
289 and returns the value of the character.
290
291 Argument:
292 utf8bytes a pointer to the byte vector
293 vptr a pointer to an int to receive the value
294
295 Returns: > 0 => the number of bytes consumed
296 -6 to 0 => malformed UTF-8 character at offset = (-return)
297 */
298
299 #if !defined NOUTF8
300
301 static int
302 utf82ord(unsigned char *utf8bytes, int *vptr)
303 {
304 int c = *utf8bytes++;
305 int d = c;
306 int i, j, s;
307
308 for (i = -1; i < 6; i++) /* i is number of additional bytes */
309 {
310 if ((d & 0x80) == 0) break;
311 d <<= 1;
312 }
313
314 if (i == -1) { *vptr = c; return 1; } /* ascii character */
315 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316
317 /* i now has a value in the range 1-5 */
318
319 s = 6*i;
320 d = (c & utf8_table3[i]) << s;
321
322 for (j = 0; j < i; j++)
323 {
324 c = *utf8bytes++;
325 if ((c & 0xc0) != 0x80) return -(j+1);
326 s -= 6;
327 d |= (c & 0x3f) << s;
328 }
329
330 /* Check that encoding was the correct unique one */
331
332 for (j = 0; j < utf8_table1_size; j++)
333 if (d <= utf8_table1[j]) break;
334 if (j != i) return -(i+1);
335
336 /* Valid value */
337
338 *vptr = d;
339 return i+1;
340 }
341
342 #endif
343
344
345
346 /*************************************************
347 * Convert character value to UTF-8 *
348 *************************************************/
349
350 /* This function takes an integer value in the range 0 - 0x7fffffff
351 and encodes it as a UTF-8 character in 0 to 6 bytes.
352
353 Arguments:
354 cvalue the character value
355 utf8bytes pointer to buffer for result - at least 6 bytes long
356
357 Returns: number of characters placed in the buffer
358 */
359
360 #if !defined NOUTF8
361
362 static int
363 ord2utf8(int cvalue, uschar *utf8bytes)
364 {
365 register int i, j;
366 for (i = 0; i < utf8_table1_size; i++)
367 if (cvalue <= utf8_table1[i]) break;
368 utf8bytes += i;
369 for (j = i; j > 0; j--)
370 {
371 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 cvalue >>= 6;
373 }
374 *utf8bytes = utf8_table2[i] | cvalue;
375 return i + 1;
376 }
377
378 #endif
379
380
381
382 /*************************************************
383 * Print character string *
384 *************************************************/
385
386 /* Character string printing function. Must handle UTF-8 strings in utf8
387 mode. Yields number of characters printed. If handed a NULL file, just counts
388 chars without printing. */
389
390 static int pchars(unsigned char *p, int length, FILE *f)
391 {
392 int c = 0;
393 int yield = 0;
394
395 while (length-- > 0)
396 {
397 #if !defined NOUTF8
398 if (use_utf8)
399 {
400 int rc = utf82ord(p, &c);
401
402 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403 {
404 length -= rc - 1;
405 p += rc;
406 if (PRINTHEX(c))
407 {
408 if (f != NULL) fprintf(f, "%c", c);
409 yield++;
410 }
411 else
412 {
413 int n = 4;
414 if (f != NULL) fprintf(f, "\\x{%02x}", c);
415 yield += (n <= 0x000000ff)? 2 :
416 (n <= 0x00000fff)? 3 :
417 (n <= 0x0000ffff)? 4 :
418 (n <= 0x000fffff)? 5 : 6;
419 }
420 continue;
421 }
422 }
423 #endif
424
425 /* Not UTF-8, or malformed UTF-8 */
426
427 c = *p++;
428 if (PRINTHEX(c))
429 {
430 if (f != NULL) fprintf(f, "%c", c);
431 yield++;
432 }
433 else
434 {
435 if (f != NULL) fprintf(f, "\\x%02x", c);
436 yield += 4;
437 }
438 }
439
440 return yield;
441 }
442
443
444
445 /*************************************************
446 * Callout function *
447 *************************************************/
448
449 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450 the match. Yield zero unless more callouts than the fail count, or the callout
451 data is not zero. */
452
453 static int callout(pcre_callout_block *cb)
454 {
455 FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 int i, pre_start, post_start, subject_length;
457
458 if (callout_extra)
459 {
460 fprintf(f, "Callout %d: last capture = %d\n",
461 cb->callout_number, cb->capture_last);
462
463 for (i = 0; i < cb->capture_top * 2; i += 2)
464 {
465 if (cb->offset_vector[i] < 0)
466 fprintf(f, "%2d: <unset>\n", i/2);
467 else
468 {
469 fprintf(f, "%2d: ", i/2);
470 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471 cb->offset_vector[i+1] - cb->offset_vector[i], f);
472 fprintf(f, "\n");
473 }
474 }
475 }
476
477 /* Re-print the subject in canonical form, the first time or if giving full
478 datails. On subsequent calls in the same match, we use pchars just to find the
479 printed lengths of the substrings. */
480
481 if (f != NULL) fprintf(f, "--->");
482
483 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485 cb->current_position - cb->start_match, f);
486
487 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488
489 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490 cb->subject_length - cb->current_position, f);
491
492 if (f != NULL) fprintf(f, "\n");
493
494 /* Always print appropriate indicators, with callout number if not already
495 shown. For automatic callouts, show the pattern offset. */
496
497 if (cb->callout_number == 255)
498 {
499 fprintf(outfile, "%+3d ", cb->pattern_position);
500 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501 }
502 else
503 {
504 if (callout_extra) fprintf(outfile, " ");
505 else fprintf(outfile, "%3d ", cb->callout_number);
506 }
507
508 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510
511 if (post_start > 0)
512 {
513 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514 fprintf(outfile, "^");
515 }
516
517 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518 fprintf(outfile, " ");
519
520 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521 pbuffer + cb->pattern_position);
522
523 fprintf(outfile, "\n");
524 first_callout = 0;
525
526 if (cb->callout_data != NULL)
527 {
528 int callout_data = *((int *)(cb->callout_data));
529 if (callout_data != 0)
530 {
531 fprintf(outfile, "Callout data = %d\n", callout_data);
532 return callout_data;
533 }
534 }
535
536 return (cb->callout_number != callout_fail_id)? 0 :
537 (++callout_count >= callout_fail_count)? 1 : 0;
538 }
539
540
541 /*************************************************
542 * Local malloc functions *
543 *************************************************/
544
545 /* Alternative malloc function, to test functionality and show the size of the
546 compiled re. */
547
548 static void *new_malloc(size_t size)
549 {
550 void *block = malloc(size);
551 gotten_store = size;
552 if (show_malloc)
553 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 return block;
555 }
556
557 static void new_free(void *block)
558 {
559 if (show_malloc)
560 fprintf(outfile, "free %p\n", block);
561 free(block);
562 }
563
564
565 /* For recursion malloc/free, to test stacking calls */
566
567 static void *stack_malloc(size_t size)
568 {
569 void *block = malloc(size);
570 if (show_malloc)
571 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 return block;
573 }
574
575 static void stack_free(void *block)
576 {
577 if (show_malloc)
578 fprintf(outfile, "stack_free %p\n", block);
579 free(block);
580 }
581
582
583 /*************************************************
584 * Call pcre_fullinfo() *
585 *************************************************/
586
587 /* Get one piece of information from the pcre_fullinfo() function */
588
589 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590 {
591 int rc;
592 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594 }
595
596
597
598 /*************************************************
599 * Byte flipping function *
600 *************************************************/
601
602 static unsigned long int
603 byteflip(unsigned long int value, int n)
604 {
605 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606 return ((value & 0x000000ff) << 24) |
607 ((value & 0x0000ff00) << 8) |
608 ((value & 0x00ff0000) >> 8) |
609 ((value & 0xff000000) >> 24);
610 }
611
612
613
614
615 /*************************************************
616 * Check match or recursion limit *
617 *************************************************/
618
619 static int
620 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621 int start_offset, int options, int *use_offsets, int use_size_offsets,
622 int flag, unsigned long int *limit, int errnumber, const char *msg)
623 {
624 int count;
625 int min = 0;
626 int mid = 64;
627 int max = -1;
628
629 extra->flags |= flag;
630
631 for (;;)
632 {
633 *limit = mid;
634
635 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636 use_offsets, use_size_offsets);
637
638 if (count == errnumber)
639 {
640 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641 min = mid;
642 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643 }
644
645 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646 count == PCRE_ERROR_PARTIAL)
647 {
648 if (mid == min + 1)
649 {
650 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651 break;
652 }
653 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654 max = mid;
655 mid = (min + mid)/2;
656 }
657 else break; /* Some other error */
658 }
659
660 extra->flags &= ~flag;
661 return count;
662 }
663
664
665
666 /*************************************************
667 * Check newline indicator *
668 *************************************************/
669
670 /* This is used both at compile and run-time to check for <xxx> escapes, where
671 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
672 no match.
673
674 Arguments:
675 p points after the leading '<'
676 f file for error message
677
678 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
679 */
680
681 static int
682 check_newline(uschar *p, FILE *f)
683 {
684 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
685 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
686 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
687 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
688 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
689 fprintf(f, "Unknown newline type at: <%s\n", p);
690 return 0;
691 }
692
693
694
695 /*************************************************
696 * Usage function *
697 *************************************************/
698
699 static void
700 usage(void)
701 {
702 printf("Usage: pcretest [options] [<input> [<output>]]\n");
703 printf(" -b show compiled code (bytecode)\n");
704 printf(" -C show PCRE compile-time options and exit\n");
705 printf(" -d debug: show compiled code and information (-b and -i)\n");
706 #if !defined NODFA
707 printf(" -dfa force DFA matching for all subjects\n");
708 #endif
709 printf(" -help show usage information\n");
710 printf(" -i show information about compiled patterns\n"
711 " -m output memory used information\n"
712 " -o <n> set size of offsets vector to <n>\n");
713 #if !defined NOPOSIX
714 printf(" -p use POSIX interface\n");
715 #endif
716 printf(" -q quiet: do not output PCRE version number at start\n");
717 printf(" -S <n> set stack size to <n> megabytes\n");
718 printf(" -s output store (memory) used information\n"
719 " -t time compilation and execution\n");
720 printf(" -t <n> time compilation and execution, repeating <n> times\n");
721 printf(" -tm time execution (matching) only\n");
722 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
723 }
724
725
726
727 /*************************************************
728 * Main Program *
729 *************************************************/
730
731 /* Read lines from named file or stdin and write to named file or stdout; lines
732 consist of a regular expression, in delimiters and optionally followed by
733 options, followed by a set of test data, terminated by an empty line. */
734
735 int main(int argc, char **argv)
736 {
737 FILE *infile = stdin;
738 int options = 0;
739 int study_options = 0;
740 int op = 1;
741 int timeit = 0;
742 int timeitm = 0;
743 int showinfo = 0;
744 int showstore = 0;
745 int quiet = 0;
746 int size_offsets = 45;
747 int size_offsets_max;
748 int *offsets = NULL;
749 #if !defined NOPOSIX
750 int posix = 0;
751 #endif
752 int debug = 0;
753 int done = 0;
754 int all_use_dfa = 0;
755 int yield = 0;
756 int stack_size;
757
758 /* These vectors store, end-to-end, a list of captured substring names. Assume
759 that 1024 is plenty long enough for the few names we'll be testing. */
760
761 uschar copynames[1024];
762 uschar getnames[1024];
763
764 uschar *copynamesptr;
765 uschar *getnamesptr;
766
767 /* Get buffers from malloc() so that Electric Fence will check their misuse
768 when I am debugging. They grow automatically when very long lines are read. */
769
770 buffer = (unsigned char *)malloc(buffer_size);
771 dbuffer = (unsigned char *)malloc(buffer_size);
772 pbuffer = (unsigned char *)malloc(buffer_size);
773
774 /* The outfile variable is static so that new_malloc can use it. */
775
776 outfile = stdout;
777
778 /* The following _setmode() stuff is some Windows magic that tells its runtime
779 library to translate CRLF into a single LF character. At least, that's what
780 I've been told: never having used Windows I take this all on trust. Originally
781 it set 0x8000, but then I was advised that _O_BINARY was better. */
782
783 #if defined(_WIN32) || defined(WIN32)
784 _setmode( _fileno( stdout ), _O_BINARY );
785 #endif
786
787 /* Scan options */
788
789 while (argc > 1 && argv[op][0] == '-')
790 {
791 unsigned char *endptr;
792
793 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
794 showstore = 1;
795 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
796 else if (strcmp(argv[op], "-b") == 0) debug = 1;
797 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
798 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
799 #if !defined NODFA
800 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
801 #endif
802 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
803 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
804 *endptr == 0))
805 {
806 op++;
807 argc--;
808 }
809 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
810 {
811 int both = argv[op][2] == 0;
812 int temp;
813 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
814 *endptr == 0))
815 {
816 timeitm = temp;
817 op++;
818 argc--;
819 }
820 else timeitm = LOOPREPEAT;
821 if (both) timeit = timeitm;
822 }
823 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
824 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
825 *endptr == 0))
826 {
827 #if defined(_WIN32) || defined(WIN32)
828 printf("PCRE: -S not supported on this OS\n");
829 exit(1);
830 #else
831 int rc;
832 struct rlimit rlim;
833 getrlimit(RLIMIT_STACK, &rlim);
834 rlim.rlim_cur = stack_size * 1024 * 1024;
835 rc = setrlimit(RLIMIT_STACK, &rlim);
836 if (rc != 0)
837 {
838 printf("PCRE: setrlimit() failed with error %d\n", rc);
839 exit(1);
840 }
841 op++;
842 argc--;
843 #endif
844 }
845 #if !defined NOPOSIX
846 else if (strcmp(argv[op], "-p") == 0) posix = 1;
847 #endif
848 else if (strcmp(argv[op], "-C") == 0)
849 {
850 int rc;
851 printf("PCRE version %s\n", pcre_version());
852 printf("Compiled with\n");
853 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
854 printf(" %sUTF-8 support\n", rc? "" : "No ");
855 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
856 printf(" %sUnicode properties support\n", rc? "" : "No ");
857 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
858 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
859 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
860 (rc == -2)? "ANYCRLF" :
861 (rc == -1)? "ANY" : "???");
862 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
863 printf(" Internal link size = %d\n", rc);
864 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
865 printf(" POSIX malloc threshold = %d\n", rc);
866 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
867 printf(" Default match limit = %d\n", rc);
868 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
869 printf(" Default recursion depth limit = %d\n", rc);
870 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
871 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
872 goto EXIT;
873 }
874 else if (strcmp(argv[op], "-help") == 0 ||
875 strcmp(argv[op], "--help") == 0)
876 {
877 usage();
878 goto EXIT;
879 }
880 else
881 {
882 printf("** Unknown or malformed option %s\n", argv[op]);
883 usage();
884 yield = 1;
885 goto EXIT;
886 }
887 op++;
888 argc--;
889 }
890
891 /* Get the store for the offsets vector, and remember what it was */
892
893 size_offsets_max = size_offsets;
894 offsets = (int *)malloc(size_offsets_max * sizeof(int));
895 if (offsets == NULL)
896 {
897 printf("** Failed to get %d bytes of memory for offsets vector\n",
898 (int)(size_offsets_max * sizeof(int)));
899 yield = 1;
900 goto EXIT;
901 }
902
903 /* Sort out the input and output files */
904
905 if (argc > 1)
906 {
907 infile = fopen(argv[op], INPUT_MODE);
908 if (infile == NULL)
909 {
910 printf("** Failed to open %s\n", argv[op]);
911 yield = 1;
912 goto EXIT;
913 }
914 }
915
916 if (argc > 2)
917 {
918 outfile = fopen(argv[op+1], OUTPUT_MODE);
919 if (outfile == NULL)
920 {
921 printf("** Failed to open %s\n", argv[op+1]);
922 yield = 1;
923 goto EXIT;
924 }
925 }
926
927 /* Set alternative malloc function */
928
929 pcre_malloc = new_malloc;
930 pcre_free = new_free;
931 pcre_stack_malloc = stack_malloc;
932 pcre_stack_free = stack_free;
933
934 /* Heading line unless quiet, then prompt for first regex if stdin */
935
936 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
937
938 /* Main loop */
939
940 while (!done)
941 {
942 pcre *re = NULL;
943 pcre_extra *extra = NULL;
944
945 #if !defined NOPOSIX /* There are still compilers that require no indent */
946 regex_t preg;
947 int do_posix = 0;
948 #endif
949
950 const char *error;
951 unsigned char *p, *pp, *ppp;
952 unsigned char *to_file = NULL;
953 const unsigned char *tables = NULL;
954 unsigned long int true_size, true_study_size = 0;
955 size_t size, regex_gotten_store;
956 int do_study = 0;
957 int do_debug = debug;
958 int do_G = 0;
959 int do_g = 0;
960 int do_showinfo = showinfo;
961 int do_showrest = 0;
962 int do_flip = 0;
963 int erroroffset, len, delimiter, poffset;
964
965 use_utf8 = 0;
966 debug_lengths = 1;
967
968 if (infile == stdin) printf(" re> ");
969 if (extend_inputline(infile, buffer) == NULL) break;
970 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
971 fflush(outfile);
972
973 p = buffer;
974 while (isspace(*p)) p++;
975 if (*p == 0) continue;
976
977 /* See if the pattern is to be loaded pre-compiled from a file. */
978
979 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
980 {
981 unsigned long int magic, get_options;
982 uschar sbuf[8];
983 FILE *f;
984
985 p++;
986 pp = p + (int)strlen((char *)p);
987 while (isspace(pp[-1])) pp--;
988 *pp = 0;
989
990 f = fopen((char *)p, "rb");
991 if (f == NULL)
992 {
993 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
994 continue;
995 }
996
997 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
998
999 true_size =
1000 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1001 true_study_size =
1002 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1003
1004 re = (real_pcre *)new_malloc(true_size);
1005 regex_gotten_store = gotten_store;
1006
1007 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1008
1009 magic = ((real_pcre *)re)->magic_number;
1010 if (magic != MAGIC_NUMBER)
1011 {
1012 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1013 {
1014 do_flip = 1;
1015 }
1016 else
1017 {
1018 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1019 fclose(f);
1020 continue;
1021 }
1022 }
1023
1024 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1025 do_flip? " (byte-inverted)" : "", p);
1026
1027 /* Need to know if UTF-8 for printing data strings */
1028
1029 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1030 use_utf8 = (get_options & PCRE_UTF8) != 0;
1031
1032 /* Now see if there is any following study data */
1033
1034 if (true_study_size != 0)
1035 {
1036 pcre_study_data *psd;
1037
1038 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1039 extra->flags = PCRE_EXTRA_STUDY_DATA;
1040
1041 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1042 extra->study_data = psd;
1043
1044 if (fread(psd, 1, true_study_size, f) != true_study_size)
1045 {
1046 FAIL_READ:
1047 fprintf(outfile, "Failed to read data from %s\n", p);
1048 if (extra != NULL) new_free(extra);
1049 if (re != NULL) new_free(re);
1050 fclose(f);
1051 continue;
1052 }
1053 fprintf(outfile, "Study data loaded from %s\n", p);
1054 do_study = 1; /* To get the data output if requested */
1055 }
1056 else fprintf(outfile, "No study data\n");
1057
1058 fclose(f);
1059 goto SHOW_INFO;
1060 }
1061
1062 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1063 the pattern; if is isn't complete, read more. */
1064
1065 delimiter = *p++;
1066
1067 if (isalnum(delimiter) || delimiter == '\\')
1068 {
1069 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1070 goto SKIP_DATA;
1071 }
1072
1073 pp = p;
1074 poffset = p - buffer;
1075
1076 for(;;)
1077 {
1078 while (*pp != 0)
1079 {
1080 if (*pp == '\\' && pp[1] != 0) pp++;
1081 else if (*pp == delimiter) break;
1082 pp++;
1083 }
1084 if (*pp != 0) break;
1085 if (infile == stdin) printf(" > ");
1086 if ((pp = extend_inputline(infile, pp)) == NULL)
1087 {
1088 fprintf(outfile, "** Unexpected EOF\n");
1089 done = 1;
1090 goto CONTINUE;
1091 }
1092 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1093 }
1094
1095 /* The buffer may have moved while being extended; reset the start of data
1096 pointer to the correct relative point in the buffer. */
1097
1098 p = buffer + poffset;
1099
1100 /* If the first character after the delimiter is backslash, make
1101 the pattern end with backslash. This is purely to provide a way
1102 of testing for the error message when a pattern ends with backslash. */
1103
1104 if (pp[1] == '\\') *pp++ = '\\';
1105
1106 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1107 for callouts. */
1108
1109 *pp++ = 0;
1110 strcpy((char *)pbuffer, (char *)p);
1111
1112 /* Look for options after final delimiter */
1113
1114 options = 0;
1115 study_options = 0;
1116 log_store = showstore; /* default from command line */
1117
1118 while (*pp != 0)
1119 {
1120 switch (*pp++)
1121 {
1122 case 'f': options |= PCRE_FIRSTLINE; break;
1123 case 'g': do_g = 1; break;
1124 case 'i': options |= PCRE_CASELESS; break;
1125 case 'm': options |= PCRE_MULTILINE; break;
1126 case 's': options |= PCRE_DOTALL; break;
1127 case 'x': options |= PCRE_EXTENDED; break;
1128
1129 case '+': do_showrest = 1; break;
1130 case 'A': options |= PCRE_ANCHORED; break;
1131 case 'B': do_debug = 1; break;
1132 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1133 case 'D': do_debug = do_showinfo = 1; break;
1134 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1135 case 'F': do_flip = 1; break;
1136 case 'G': do_G = 1; break;
1137 case 'I': do_showinfo = 1; break;
1138 case 'J': options |= PCRE_DUPNAMES; break;
1139 case 'M': log_store = 1; break;
1140 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1141
1142 #if !defined NOPOSIX
1143 case 'P': do_posix = 1; break;
1144 #endif
1145
1146 case 'S': do_study = 1; break;
1147 case 'U': options |= PCRE_UNGREEDY; break;
1148 case 'X': options |= PCRE_EXTRA; break;
1149 case 'Z': debug_lengths = 0; break;
1150 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1151 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1152
1153 case 'L':
1154 ppp = pp;
1155 /* The '\r' test here is so that it works on Windows. */
1156 /* The '0' test is just in case this is an unterminated line. */
1157 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1158 *ppp = 0;
1159 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1160 {
1161 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1162 goto SKIP_DATA;
1163 }
1164 locale_set = 1;
1165 tables = pcre_maketables();
1166 pp = ppp;
1167 break;
1168
1169 case '>':
1170 to_file = pp;
1171 while (*pp != 0) pp++;
1172 while (isspace(pp[-1])) pp--;
1173 *pp = 0;
1174 break;
1175
1176 case '<':
1177 {
1178 int x = check_newline(pp, outfile);
1179 if (x == 0) goto SKIP_DATA;
1180 options |= x;
1181 while (*pp++ != '>');
1182 }
1183 break;
1184
1185 case '\r': /* So that it works in Windows */
1186 case '\n':
1187 case ' ':
1188 break;
1189
1190 default:
1191 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1192 goto SKIP_DATA;
1193 }
1194 }
1195
1196 /* Handle compiling via the POSIX interface, which doesn't support the
1197 timing, showing, or debugging options, nor the ability to pass over
1198 local character tables. */
1199
1200 #if !defined NOPOSIX
1201 if (posix || do_posix)
1202 {
1203 int rc;
1204 int cflags = 0;
1205
1206 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1207 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1208 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1209 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1210 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1211
1212 rc = regcomp(&preg, (char *)p, cflags);
1213
1214 /* Compilation failed; go back for another re, skipping to blank line
1215 if non-interactive. */
1216
1217 if (rc != 0)
1218 {
1219 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1220 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1221 goto SKIP_DATA;
1222 }
1223 }
1224
1225 /* Handle compiling via the native interface */
1226
1227 else
1228 #endif /* !defined NOPOSIX */
1229
1230 {
1231 if (timeit > 0)
1232 {
1233 register int i;
1234 clock_t time_taken;
1235 clock_t start_time = clock();
1236 for (i = 0; i < timeit; i++)
1237 {
1238 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1239 if (re != NULL) free(re);
1240 }
1241 time_taken = clock() - start_time;
1242 fprintf(outfile, "Compile time %.4f milliseconds\n",
1243 (((double)time_taken * 1000.0) / (double)timeit) /
1244 (double)CLOCKS_PER_SEC);
1245 }
1246
1247 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1248
1249 /* Compilation failed; go back for another re, skipping to blank line
1250 if non-interactive. */
1251
1252 if (re == NULL)
1253 {
1254 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1255 SKIP_DATA:
1256 if (infile != stdin)
1257 {
1258 for (;;)
1259 {
1260 if (extend_inputline(infile, buffer) == NULL)
1261 {
1262 done = 1;
1263 goto CONTINUE;
1264 }
1265 len = (int)strlen((char *)buffer);
1266 while (len > 0 && isspace(buffer[len-1])) len--;
1267 if (len == 0) break;
1268 }
1269 fprintf(outfile, "\n");
1270 }
1271 goto CONTINUE;
1272 }
1273
1274 /* Compilation succeeded; print data if required. There are now two
1275 info-returning functions. The old one has a limited interface and
1276 returns only limited data. Check that it agrees with the newer one. */
1277
1278 if (log_store)
1279 fprintf(outfile, "Memory allocation (code space): %d\n",
1280 (int)(gotten_store -
1281 sizeof(real_pcre) -
1282 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1283
1284 /* Extract the size for possible writing before possibly flipping it,
1285 and remember the store that was got. */
1286
1287 true_size = ((real_pcre *)re)->size;
1288 regex_gotten_store = gotten_store;
1289
1290 /* If /S was present, study the regexp to generate additional info to
1291 help with the matching. */
1292
1293 if (do_study)
1294 {
1295 if (timeit > 0)
1296 {
1297 register int i;
1298 clock_t time_taken;
1299 clock_t start_time = clock();
1300 for (i = 0; i < timeit; i++)
1301 extra = pcre_study(re, study_options, &error);
1302 time_taken = clock() - start_time;
1303 if (extra != NULL) free(extra);
1304 fprintf(outfile, " Study time %.4f milliseconds\n",
1305 (((double)time_taken * 1000.0) / (double)timeit) /
1306 (double)CLOCKS_PER_SEC);
1307 }
1308 extra = pcre_study(re, study_options, &error);
1309 if (error != NULL)
1310 fprintf(outfile, "Failed to study: %s\n", error);
1311 else if (extra != NULL)
1312 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1313 }
1314
1315 /* If the 'F' option was present, we flip the bytes of all the integer
1316 fields in the regex data block and the study block. This is to make it
1317 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1318 compiled on a different architecture. */
1319
1320 if (do_flip)
1321 {
1322 real_pcre *rre = (real_pcre *)re;
1323 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1324 rre->size = byteflip(rre->size, sizeof(rre->size));
1325 rre->options = byteflip(rre->options, sizeof(rre->options));
1326 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1327 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1328 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1329 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1330 rre->name_table_offset = byteflip(rre->name_table_offset,
1331 sizeof(rre->name_table_offset));
1332 rre->name_entry_size = byteflip(rre->name_entry_size,
1333 sizeof(rre->name_entry_size));
1334 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1335
1336 if (extra != NULL)
1337 {
1338 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1339 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1340 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1341 }
1342 }
1343
1344 /* Extract information from the compiled data if required */
1345
1346 SHOW_INFO:
1347
1348 if (do_debug)
1349 {
1350 fprintf(outfile, "------------------------------------------------------------------\n");
1351 pcre_printint(re, outfile, debug_lengths);
1352 }
1353
1354 if (do_showinfo)
1355 {
1356 unsigned long int get_options, all_options;
1357 #if !defined NOINFOCHECK
1358 int old_first_char, old_options, old_count;
1359 #endif
1360 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1361 hascrorlf;
1362 int nameentrysize, namecount;
1363 const uschar *nametable;
1364
1365 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1366 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1367 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1368 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1369 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1370 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1371 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1372 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1373 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1374 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1375 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1376 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1377
1378 #if !defined NOINFOCHECK
1379 old_count = pcre_info(re, &old_options, &old_first_char);
1380 if (count < 0) fprintf(outfile,
1381 "Error %d from pcre_info()\n", count);
1382 else
1383 {
1384 if (old_count != count) fprintf(outfile,
1385 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1386 old_count);
1387
1388 if (old_first_char != first_char) fprintf(outfile,
1389 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1390 first_char, old_first_char);
1391
1392 if (old_options != (int)get_options) fprintf(outfile,
1393 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1394 get_options, old_options);
1395 }
1396 #endif
1397
1398 if (size != regex_gotten_store) fprintf(outfile,
1399 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1400 (int)size, (int)regex_gotten_store);
1401
1402 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1403 if (backrefmax > 0)
1404 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1405
1406 if (namecount > 0)
1407 {
1408 fprintf(outfile, "Named capturing subpatterns:\n");
1409 while (namecount-- > 0)
1410 {
1411 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1412 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1413 GET2(nametable, 0));
1414 nametable += nameentrysize;
1415 }
1416 }
1417
1418 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1419 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1420
1421 all_options = ((real_pcre *)re)->options;
1422 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1423
1424 if (get_options == 0) fprintf(outfile, "No options\n");
1425 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1426 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1427 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1428 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1429 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1430 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1431 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1432 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1433 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1434 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1435 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1436 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1437 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1438 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1439
1440 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1441
1442 switch (get_options & PCRE_NEWLINE_BITS)
1443 {
1444 case PCRE_NEWLINE_CR:
1445 fprintf(outfile, "Forced newline sequence: CR\n");
1446 break;
1447
1448 case PCRE_NEWLINE_LF:
1449 fprintf(outfile, "Forced newline sequence: LF\n");
1450 break;
1451
1452 case PCRE_NEWLINE_CRLF:
1453 fprintf(outfile, "Forced newline sequence: CRLF\n");
1454 break;
1455
1456 case PCRE_NEWLINE_ANYCRLF:
1457 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1458 break;
1459
1460 case PCRE_NEWLINE_ANY:
1461 fprintf(outfile, "Forced newline sequence: ANY\n");
1462 break;
1463
1464 default:
1465 break;
1466 }
1467
1468 if (first_char == -1)
1469 {
1470 fprintf(outfile, "First char at start or follows newline\n");
1471 }
1472 else if (first_char < 0)
1473 {
1474 fprintf(outfile, "No first char\n");
1475 }
1476 else
1477 {
1478 int ch = first_char & 255;
1479 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1480 "" : " (caseless)";
1481 if (PRINTHEX(ch))
1482 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1483 else
1484 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1485 }
1486
1487 if (need_char < 0)
1488 {
1489 fprintf(outfile, "No need char\n");
1490 }
1491 else
1492 {
1493 int ch = need_char & 255;
1494 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1495 "" : " (caseless)";
1496 if (PRINTHEX(ch))
1497 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1498 else
1499 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1500 }
1501
1502 /* Don't output study size; at present it is in any case a fixed
1503 value, but it varies, depending on the computer architecture, and
1504 so messes up the test suite. (And with the /F option, it might be
1505 flipped.) */
1506
1507 if (do_study)
1508 {
1509 if (extra == NULL)
1510 fprintf(outfile, "Study returned NULL\n");
1511 else
1512 {
1513 uschar *start_bits = NULL;
1514 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1515
1516 if (start_bits == NULL)
1517 fprintf(outfile, "No starting byte set\n");
1518 else
1519 {
1520 int i;
1521 int c = 24;
1522 fprintf(outfile, "Starting byte set: ");
1523 for (i = 0; i < 256; i++)
1524 {
1525 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1526 {
1527 if (c > 75)
1528 {
1529 fprintf(outfile, "\n ");
1530 c = 2;
1531 }
1532 if (PRINTHEX(i) && i != ' ')
1533 {
1534 fprintf(outfile, "%c ", i);
1535 c += 2;
1536 }
1537 else
1538 {
1539 fprintf(outfile, "\\x%02x ", i);
1540 c += 5;
1541 }
1542 }
1543 }
1544 fprintf(outfile, "\n");
1545 }
1546 }
1547 }
1548 }
1549
1550 /* If the '>' option was present, we write out the regex to a file, and
1551 that is all. The first 8 bytes of the file are the regex length and then
1552 the study length, in big-endian order. */
1553
1554 if (to_file != NULL)
1555 {
1556 FILE *f = fopen((char *)to_file, "wb");
1557 if (f == NULL)
1558 {
1559 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1560 }
1561 else
1562 {
1563 uschar sbuf[8];
1564 sbuf[0] = (true_size >> 24) & 255;
1565 sbuf[1] = (true_size >> 16) & 255;
1566 sbuf[2] = (true_size >> 8) & 255;
1567 sbuf[3] = (true_size) & 255;
1568
1569 sbuf[4] = (true_study_size >> 24) & 255;
1570 sbuf[5] = (true_study_size >> 16) & 255;
1571 sbuf[6] = (true_study_size >> 8) & 255;
1572 sbuf[7] = (true_study_size) & 255;
1573
1574 if (fwrite(sbuf, 1, 8, f) < 8 ||
1575 fwrite(re, 1, true_size, f) < true_size)
1576 {
1577 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1578 }
1579 else
1580 {
1581 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1582 if (extra != NULL)
1583 {
1584 if (fwrite(extra->study_data, 1, true_study_size, f) <
1585 true_study_size)
1586 {
1587 fprintf(outfile, "Write error on %s: %s\n", to_file,
1588 strerror(errno));
1589 }
1590 else fprintf(outfile, "Study data written to %s\n", to_file);
1591
1592 }
1593 }
1594 fclose(f);
1595 }
1596
1597 new_free(re);
1598 if (extra != NULL) new_free(extra);
1599 if (tables != NULL) new_free((void *)tables);
1600 continue; /* With next regex */
1601 }
1602 } /* End of non-POSIX compile */
1603
1604 /* Read data lines and test them */
1605
1606 for (;;)
1607 {
1608 uschar *q;
1609 uschar *bptr;
1610 int *use_offsets = offsets;
1611 int use_size_offsets = size_offsets;
1612 int callout_data = 0;
1613 int callout_data_set = 0;
1614 int count, c;
1615 int copystrings = 0;
1616 int find_match_limit = 0;
1617 int getstrings = 0;
1618 int getlist = 0;
1619 int gmatched = 0;
1620 int start_offset = 0;
1621 int g_notempty = 0;
1622 int use_dfa = 0;
1623
1624 options = 0;
1625
1626 *copynames = 0;
1627 *getnames = 0;
1628
1629 copynamesptr = copynames;
1630 getnamesptr = getnames;
1631
1632 pcre_callout = callout;
1633 first_callout = 1;
1634 callout_extra = 0;
1635 callout_count = 0;
1636 callout_fail_count = 999999;
1637 callout_fail_id = -1;
1638 show_malloc = 0;
1639
1640 if (extra != NULL) extra->flags &=
1641 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1642
1643 len = 0;
1644 for (;;)
1645 {
1646 if (infile == stdin) printf("data> ");
1647 if (extend_inputline(infile, buffer + len) == NULL)
1648 {
1649 if (len > 0) break;
1650 done = 1;
1651 goto CONTINUE;
1652 }
1653 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1654 len = (int)strlen((char *)buffer);
1655 if (buffer[len-1] == '\n') break;
1656 }
1657
1658 while (len > 0 && isspace(buffer[len-1])) len--;
1659 buffer[len] = 0;
1660 if (len == 0) break;
1661
1662 p = buffer;
1663 while (isspace(*p)) p++;
1664
1665 bptr = q = dbuffer;
1666 while ((c = *p++) != 0)
1667 {
1668 int i = 0;
1669 int n = 0;
1670
1671 if (c == '\\') switch ((c = *p++))
1672 {
1673 case 'a': c = 7; break;
1674 case 'b': c = '\b'; break;
1675 case 'e': c = 27; break;
1676 case 'f': c = '\f'; break;
1677 case 'n': c = '\n'; break;
1678 case 'r': c = '\r'; break;
1679 case 't': c = '\t'; break;
1680 case 'v': c = '\v'; break;
1681
1682 case '0': case '1': case '2': case '3':
1683 case '4': case '5': case '6': case '7':
1684 c -= '0';
1685 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1686 c = c * 8 + *p++ - '0';
1687
1688 #if !defined NOUTF8
1689 if (use_utf8 && c > 255)
1690 {
1691 unsigned char buff8[8];
1692 int ii, utn;
1693 utn = ord2utf8(c, buff8);
1694 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1695 c = buff8[ii]; /* Last byte */
1696 }
1697 #endif
1698 break;
1699
1700 case 'x':
1701
1702 /* Handle \x{..} specially - new Perl thing for utf8 */
1703
1704 #if !defined NOUTF8
1705 if (*p == '{')
1706 {
1707 unsigned char *pt = p;
1708 c = 0;
1709 while (isxdigit(*(++pt)))
1710 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1711 if (*pt == '}')
1712 {
1713 unsigned char buff8[8];
1714 int ii, utn;
1715 utn = ord2utf8(c, buff8);
1716 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1717 c = buff8[ii]; /* Last byte */
1718 p = pt + 1;
1719 break;
1720 }
1721 /* Not correct form; fall through */
1722 }
1723 #endif
1724
1725 /* Ordinary \x */
1726
1727 c = 0;
1728 while (i++ < 2 && isxdigit(*p))
1729 {
1730 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1731 p++;
1732 }
1733 break;
1734
1735 case 0: /* \ followed by EOF allows for an empty line */
1736 p--;
1737 continue;
1738
1739 case '>':
1740 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1741 continue;
1742
1743 case 'A': /* Option setting */
1744 options |= PCRE_ANCHORED;
1745 continue;
1746
1747 case 'B':
1748 options |= PCRE_NOTBOL;
1749 continue;
1750
1751 case 'C':
1752 if (isdigit(*p)) /* Set copy string */
1753 {
1754 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1755 copystrings |= 1 << n;
1756 }
1757 else if (isalnum(*p))
1758 {
1759 uschar *npp = copynamesptr;
1760 while (isalnum(*p)) *npp++ = *p++;
1761 *npp++ = 0;
1762 *npp = 0;
1763 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1764 if (n < 0)
1765 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1766 copynamesptr = npp;
1767 }
1768 else if (*p == '+')
1769 {
1770 callout_extra = 1;
1771 p++;
1772 }
1773 else if (*p == '-')
1774 {
1775 pcre_callout = NULL;
1776 p++;
1777 }
1778 else if (*p == '!')
1779 {
1780 callout_fail_id = 0;
1781 p++;
1782 while(isdigit(*p))
1783 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1784 callout_fail_count = 0;
1785 if (*p == '!')
1786 {
1787 p++;
1788 while(isdigit(*p))
1789 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1790 }
1791 }
1792 else if (*p == '*')
1793 {
1794 int sign = 1;
1795 callout_data = 0;
1796 if (*(++p) == '-') { sign = -1; p++; }
1797 while(isdigit(*p))
1798 callout_data = callout_data * 10 + *p++ - '0';
1799 callout_data *= sign;
1800 callout_data_set = 1;
1801 }
1802 continue;
1803
1804 #if !defined NODFA
1805 case 'D':
1806 #if !defined NOPOSIX
1807 if (posix || do_posix)
1808 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1809 else
1810 #endif
1811 use_dfa = 1;
1812 continue;
1813
1814 case 'F':
1815 options |= PCRE_DFA_SHORTEST;
1816 continue;
1817 #endif
1818
1819 case 'G':
1820 if (isdigit(*p))
1821 {
1822 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1823 getstrings |= 1 << n;
1824 }
1825 else if (isalnum(*p))
1826 {
1827 uschar *npp = getnamesptr;
1828 while (isalnum(*p)) *npp++ = *p++;
1829 *npp++ = 0;
1830 *npp = 0;
1831 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1832 if (n < 0)
1833 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1834 getnamesptr = npp;
1835 }
1836 continue;
1837
1838 case 'L':
1839 getlist = 1;
1840 continue;
1841
1842 case 'M':
1843 find_match_limit = 1;
1844 continue;
1845
1846 case 'N':
1847 options |= PCRE_NOTEMPTY;
1848 continue;
1849
1850 case 'O':
1851 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1852 if (n > size_offsets_max)
1853 {
1854 size_offsets_max = n;
1855 free(offsets);
1856 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1857 if (offsets == NULL)
1858 {
1859 printf("** Failed to get %d bytes of memory for offsets vector\n",
1860 (int)(size_offsets_max * sizeof(int)));
1861 yield = 1;
1862 goto EXIT;
1863 }
1864 }
1865 use_size_offsets = n;
1866 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1867 continue;
1868
1869 case 'P':
1870 options |= PCRE_PARTIAL;
1871 continue;
1872
1873 case 'Q':
1874 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1875 if (extra == NULL)
1876 {
1877 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1878 extra->flags = 0;
1879 }
1880 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1881 extra->match_limit_recursion = n;
1882 continue;
1883
1884 case 'q':
1885 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1886 if (extra == NULL)
1887 {
1888 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1889 extra->flags = 0;
1890 }
1891 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1892 extra->match_limit = n;
1893 continue;
1894
1895 #if !defined NODFA
1896 case 'R':
1897 options |= PCRE_DFA_RESTART;
1898 continue;
1899 #endif
1900
1901 case 'S':
1902 show_malloc = 1;
1903 continue;
1904
1905 case 'Z':
1906 options |= PCRE_NOTEOL;
1907 continue;
1908
1909 case '?':
1910 options |= PCRE_NO_UTF8_CHECK;
1911 continue;
1912
1913 case '<':
1914 {
1915 int x = check_newline(p, outfile);
1916 if (x == 0) goto NEXT_DATA;
1917 options |= x;
1918 while (*p++ != '>');
1919 }
1920 continue;
1921 }
1922 *q++ = c;
1923 }
1924 *q = 0;
1925 len = q - dbuffer;
1926
1927 if ((all_use_dfa || use_dfa) && find_match_limit)
1928 {
1929 printf("**Match limit not relevant for DFA matching: ignored\n");
1930 find_match_limit = 0;
1931 }
1932
1933 /* Handle matching via the POSIX interface, which does not
1934 support timing or playing with the match limit or callout data. */
1935
1936 #if !defined NOPOSIX
1937 if (posix || do_posix)
1938 {
1939 int rc;
1940 int eflags = 0;
1941 regmatch_t *pmatch = NULL;
1942 if (use_size_offsets > 0)
1943 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1944 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1945 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1946
1947 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1948
1949 if (rc != 0)
1950 {
1951 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1952 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1953 }
1954 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1955 != 0)
1956 {
1957 fprintf(outfile, "Matched with REG_NOSUB\n");
1958 }
1959 else
1960 {
1961 size_t i;
1962 for (i = 0; i < (size_t)use_size_offsets; i++)
1963 {
1964 if (pmatch[i].rm_so >= 0)
1965 {
1966 fprintf(outfile, "%2d: ", (int)i);
1967 (void)pchars(dbuffer + pmatch[i].rm_so,
1968 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1969 fprintf(outfile, "\n");
1970 if (i == 0 && do_showrest)
1971 {
1972 fprintf(outfile, " 0+ ");
1973 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1974 outfile);
1975 fprintf(outfile, "\n");
1976 }
1977 }
1978 }
1979 }
1980 free(pmatch);
1981 }
1982
1983 /* Handle matching via the native interface - repeats for /g and /G */
1984
1985 else
1986 #endif /* !defined NOPOSIX */
1987
1988 for (;; gmatched++) /* Loop for /g or /G */
1989 {
1990 if (timeitm > 0)
1991 {
1992 register int i;
1993 clock_t time_taken;
1994 clock_t start_time = clock();
1995
1996 #if !defined NODFA
1997 if (all_use_dfa || use_dfa)
1998 {
1999 int workspace[1000];
2000 for (i = 0; i < timeitm; i++)
2001 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2002 options | g_notempty, use_offsets, use_size_offsets, workspace,
2003 sizeof(workspace)/sizeof(int));
2004 }
2005 else
2006 #endif
2007
2008 for (i = 0; i < timeitm; i++)
2009 count = pcre_exec(re, extra, (char *)bptr, len,
2010 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2011
2012 time_taken = clock() - start_time;
2013 fprintf(outfile, "Execute time %.4f milliseconds\n",
2014 (((double)time_taken * 1000.0) / (double)timeitm) /
2015 (double)CLOCKS_PER_SEC);
2016 }
2017
2018 /* If find_match_limit is set, we want to do repeated matches with
2019 varying limits in order to find the minimum value for the match limit and
2020 for the recursion limit. */
2021
2022 if (find_match_limit)
2023 {
2024 if (extra == NULL)
2025 {
2026 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2027 extra->flags = 0;
2028 }
2029
2030 (void)check_match_limit(re, extra, bptr, len, start_offset,
2031 options|g_notempty, use_offsets, use_size_offsets,
2032 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2033 PCRE_ERROR_MATCHLIMIT, "match()");
2034
2035 count = check_match_limit(re, extra, bptr, len, start_offset,
2036 options|g_notempty, use_offsets, use_size_offsets,
2037 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2038 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2039 }
2040
2041 /* If callout_data is set, use the interface with additional data */
2042
2043 else if (callout_data_set)
2044 {
2045 if (extra == NULL)
2046 {
2047 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2048 extra->flags = 0;
2049 }
2050 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2051 extra->callout_data = &callout_data;
2052 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2053 options | g_notempty, use_offsets, use_size_offsets);
2054 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2055 }
2056
2057 /* The normal case is just to do the match once, with the default
2058 value of match_limit. */
2059
2060 #if !defined NODFA
2061 else if (all_use_dfa || use_dfa)
2062 {
2063 int workspace[1000];
2064 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2065 options | g_notempty, use_offsets, use_size_offsets, workspace,
2066 sizeof(workspace)/sizeof(int));
2067 if (count == 0)
2068 {
2069 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2070 count = use_size_offsets/2;
2071 }
2072 }
2073 #endif
2074
2075 else
2076 {
2077 count = pcre_exec(re, extra, (char *)bptr, len,
2078 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2079 if (count == 0)
2080 {
2081 fprintf(outfile, "Matched, but too many substrings\n");
2082 count = use_size_offsets/3;
2083 }
2084 }
2085
2086 /* Matched */
2087
2088 if (count >= 0)
2089 {
2090 int i, maxcount;
2091
2092 #if !defined NODFA
2093 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2094 #endif
2095 maxcount = use_size_offsets/3;
2096
2097 /* This is a check against a lunatic return value. */
2098
2099 if (count > maxcount)
2100 {
2101 fprintf(outfile,
2102 "** PCRE error: returned count %d is too big for offset size %d\n",
2103 count, use_size_offsets);
2104 count = use_size_offsets/3;
2105 if (do_g || do_G)
2106 {
2107 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2108 do_g = do_G = FALSE; /* Break g/G loop */
2109 }
2110 }
2111
2112 for (i = 0; i < count * 2; i += 2)
2113 {
2114 if (use_offsets[i] < 0)
2115 fprintf(outfile, "%2d: <unset>\n", i/2);
2116 else
2117 {
2118 fprintf(outfile, "%2d: ", i/2);
2119 (void)pchars(bptr + use_offsets[i],
2120 use_offsets[i+1] - use_offsets[i], outfile);
2121 fprintf(outfile, "\n");
2122 if (i == 0)
2123 {
2124 if (do_showrest)
2125 {
2126 fprintf(outfile, " 0+ ");
2127 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2128 outfile);
2129 fprintf(outfile, "\n");
2130 }
2131 }
2132 }
2133 }
2134
2135 for (i = 0; i < 32; i++)
2136 {
2137 if ((copystrings & (1 << i)) != 0)
2138 {
2139 char copybuffer[256];
2140 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2141 i, copybuffer, sizeof(copybuffer));
2142 if (rc < 0)
2143 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2144 else
2145 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2146 }
2147 }
2148
2149 for (copynamesptr = copynames;
2150 *copynamesptr != 0;
2151 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2152 {
2153 char copybuffer[256];
2154 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2155 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2156 if (rc < 0)
2157 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2158 else
2159 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2160 }
2161
2162 for (i = 0; i < 32; i++)
2163 {
2164 if ((getstrings & (1 << i)) != 0)
2165 {
2166 const char *substring;
2167 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2168 i, &substring);
2169 if (rc < 0)
2170 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2171 else
2172 {
2173 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2174 pcre_free_substring(substring);
2175 }
2176 }
2177 }
2178
2179 for (getnamesptr = getnames;
2180 *getnamesptr != 0;
2181 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2182 {
2183 const char *substring;
2184 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2185 count, (char *)getnamesptr, &substring);
2186 if (rc < 0)
2187 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2188 else
2189 {
2190 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2191 pcre_free_substring(substring);
2192 }
2193 }
2194
2195 if (getlist)
2196 {
2197 const char **stringlist;
2198 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2199 &stringlist);
2200 if (rc < 0)
2201 fprintf(outfile, "get substring list failed %d\n", rc);
2202 else
2203 {
2204 for (i = 0; i < count; i++)
2205 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2206 if (stringlist[i] != NULL)
2207 fprintf(outfile, "string list not terminated by NULL\n");
2208 /* free((void *)stringlist); */
2209 pcre_free_substring_list(stringlist);
2210 }
2211 }
2212 }
2213
2214 /* There was a partial match */
2215
2216 else if (count == PCRE_ERROR_PARTIAL)
2217 {
2218 fprintf(outfile, "Partial match");
2219 #if !defined NODFA
2220 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2221 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2222 bptr + use_offsets[0]);
2223 #endif
2224 fprintf(outfile, "\n");
2225 break; /* Out of the /g loop */
2226 }
2227
2228 /* Failed to match. If this is a /g or /G loop and we previously set
2229 g_notempty after a null match, this is not necessarily the end. We want
2230 to advance the start offset, and continue. We won't be at the end of the
2231 string - that was checked before setting g_notempty.
2232
2233 Complication arises in the case when the newline option is "any" or
2234 "anycrlf". If the previous match was at the end of a line terminated by
2235 CRLF, an advance of one character just passes the \r, whereas we should
2236 prefer the longer newline sequence, as does the code in pcre_exec().
2237 Fudge the offset value to achieve this.
2238
2239 Otherwise, in the case of UTF-8 matching, the advance must be one
2240 character, not one byte. */
2241
2242 else
2243 {
2244 if (g_notempty != 0)
2245 {
2246 int onechar = 1;
2247 unsigned int obits = ((real_pcre *)re)->options;
2248 use_offsets[0] = start_offset;
2249 if ((obits & PCRE_NEWLINE_BITS) == 0)
2250 {
2251 int d;
2252 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2253 obits = (d == '\r')? PCRE_NEWLINE_CR :
2254 (d == '\n')? PCRE_NEWLINE_LF :
2255 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2256 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2257 (d == -1)? PCRE_NEWLINE_ANY : 0;
2258 }
2259 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2260 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2261 &&
2262 start_offset < len - 1 &&
2263 bptr[start_offset] == '\r' &&
2264 bptr[start_offset+1] == '\n')
2265 onechar++;
2266 else if (use_utf8)
2267 {
2268 while (start_offset + onechar < len)
2269 {
2270 int tb = bptr[start_offset+onechar];
2271 if (tb <= 127) break;
2272 tb &= 0xc0;
2273 if (tb != 0 && tb != 0xc0) onechar++;
2274 }
2275 }
2276 use_offsets[1] = start_offset + onechar;
2277 }
2278 else
2279 {
2280 if (count == PCRE_ERROR_NOMATCH)
2281 {
2282 if (gmatched == 0) fprintf(outfile, "No match\n");
2283 }
2284 else fprintf(outfile, "Error %d\n", count);
2285 break; /* Out of the /g loop */
2286 }
2287 }
2288
2289 /* If not /g or /G we are done */
2290
2291 if (!do_g && !do_G) break;
2292
2293 /* If we have matched an empty string, first check to see if we are at
2294 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2295 what Perl's /g options does. This turns out to be rather cunning. First
2296 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2297 same point. If this fails (picked up above) we advance to the next
2298 character. */
2299
2300 g_notempty = 0;
2301
2302 if (use_offsets[0] == use_offsets[1])
2303 {
2304 if (use_offsets[0] == len) break;
2305 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2306 }
2307
2308 /* For /g, update the start offset, leaving the rest alone */
2309
2310 if (do_g) start_offset = use_offsets[1];
2311
2312 /* For /G, update the pointer and length */
2313
2314 else
2315 {
2316 bptr += use_offsets[1];
2317 len -= use_offsets[1];
2318 }
2319 } /* End of loop for /g and /G */
2320
2321 NEXT_DATA: continue;
2322 } /* End of loop for data lines */
2323
2324 CONTINUE:
2325
2326 #if !defined NOPOSIX
2327 if (posix || do_posix) regfree(&preg);
2328 #endif
2329
2330 if (re != NULL) new_free(re);
2331 if (extra != NULL) new_free(extra);
2332 if (tables != NULL)
2333 {
2334 new_free((void *)tables);
2335 setlocale(LC_CTYPE, "C");
2336 locale_set = 0;
2337 }
2338 }
2339
2340 if (infile == stdin) fprintf(outfile, "\n");
2341
2342 EXIT:
2343
2344 if (infile != NULL && infile != stdin) fclose(infile);
2345 if (outfile != NULL && outfile != stdout) fclose(outfile);
2346
2347 free(buffer);
2348 free(dbuffer);
2349 free(pbuffer);
2350 free(offsets);
2351
2352 return yield;
2353 }
2354
2355 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12