/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 211 - (show annotations) (download)
Thu Aug 9 09:52:43 2007 UTC (7 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 68691 byte(s)
Update UTF-8 validity check and documentation.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_OP_lengths OP_lengths
98
99 #include "pcre_tables.c"
100
101 /* We also need the pcre_printint() function for printing out compiled
102 patterns. This function is in a separate file so that it can be included in
103 pcre_compile.c when that module is compiled with debugging enabled.
104
105 The definition of the macro PRINTABLE, which determines whether to print an
106 output character as-is or as a hex value when showing compiled patterns, is
107 contained in this file. We uses it here also, in cases when the locale has not
108 been explicitly changed, so as to get consistent output from systems that
109 differ in their output from isprint() even in the "C" locale. */
110
111 #include "pcre_printint.src"
112
113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114
115
116 /* It is possible to compile this test program without including support for
117 testing the POSIX interface, though this is not available via the standard
118 Makefile. */
119
120 #if !defined NOPOSIX
121 #include "pcreposix.h"
122 #endif
123
124 /* It is also possible, for the benefit of the version currently imported into
125 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126 interface to the DFA matcher (NODFA), and without the doublecheck of the old
127 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128 UTF8 support if PCRE is built without it. */
129
130 #ifndef SUPPORT_UTF8
131 #ifndef NOUTF8
132 #define NOUTF8
133 #endif
134 #endif
135
136
137 /* Other parameters */
138
139 #ifndef CLOCKS_PER_SEC
140 #ifdef CLK_TCK
141 #define CLOCKS_PER_SEC CLK_TCK
142 #else
143 #define CLOCKS_PER_SEC 100
144 #endif
145 #endif
146
147 /* This is the default loop count for timing. */
148
149 #define LOOPREPEAT 500000
150
151 /* Static variables */
152
153 static FILE *outfile;
154 static int log_store = 0;
155 static int callout_count;
156 static int callout_extra;
157 static int callout_fail_count;
158 static int callout_fail_id;
159 static int debug_lengths;
160 static int first_callout;
161 static int locale_set = 0;
162 static int show_malloc;
163 static int use_utf8;
164 static size_t gotten_store;
165
166 /* The buffers grow automatically if very long input lines are encountered. */
167
168 static int buffer_size = 50000;
169 static uschar *buffer = NULL;
170 static uschar *dbuffer = NULL;
171 static uschar *pbuffer = NULL;
172
173
174
175 /*************************************************
176 * Read or extend an input line *
177 *************************************************/
178
179 /* Input lines are read into buffer, but both patterns and data lines can be
180 continued over multiple input lines. In addition, if the buffer fills up, we
181 want to automatically expand it so as to be able to handle extremely large
182 lines that are needed for certain stress tests. When the input buffer is
183 expanded, the other two buffers must also be expanded likewise, and the
184 contents of pbuffer, which are a copy of the input for callouts, must be
185 preserved (for when expansion happens for a data line). This is not the most
186 optimal way of handling this, but hey, this is just a test program!
187
188 Arguments:
189 f the file to read
190 start where in buffer to start (this *must* be within buffer)
191
192 Returns: pointer to the start of new data
193 could be a copy of start, or could be moved
194 NULL if no data read and EOF reached
195 */
196
197 static uschar *
198 extend_inputline(FILE *f, uschar *start)
199 {
200 uschar *here = start;
201
202 for (;;)
203 {
204 int rlen = buffer_size - (here - buffer);
205
206 if (rlen > 1000)
207 {
208 int dlen;
209 if (fgets((char *)here, rlen, f) == NULL)
210 return (here == start)? NULL : start;
211 dlen = (int)strlen((char *)here);
212 if (dlen > 0 && here[dlen - 1] == '\n') return start;
213 here += dlen;
214 }
215
216 else
217 {
218 int new_buffer_size = 2*buffer_size;
219 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222
223 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224 {
225 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226 exit(1);
227 }
228
229 memcpy(new_buffer, buffer, buffer_size);
230 memcpy(new_pbuffer, pbuffer, buffer_size);
231
232 buffer_size = new_buffer_size;
233
234 start = new_buffer + (start - buffer);
235 here = new_buffer + (here - buffer);
236
237 free(buffer);
238 free(dbuffer);
239 free(pbuffer);
240
241 buffer = new_buffer;
242 dbuffer = new_dbuffer;
243 pbuffer = new_pbuffer;
244 }
245 }
246
247 return NULL; /* Control never gets here */
248 }
249
250
251
252
253
254
255
256 /*************************************************
257 * Read number from string *
258 *************************************************/
259
260 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261 around with conditional compilation, just do the job by hand. It is only used
262 for unpicking arguments, so just keep it simple.
263
264 Arguments:
265 str string to be converted
266 endptr where to put the end pointer
267
268 Returns: the unsigned long
269 */
270
271 static int
272 get_value(unsigned char *str, unsigned char **endptr)
273 {
274 int result = 0;
275 while(*str != 0 && isspace(*str)) str++;
276 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277 *endptr = str;
278 return(result);
279 }
280
281
282
283
284 /*************************************************
285 * Convert UTF-8 string to value *
286 *************************************************/
287
288 /* This function takes one or more bytes that represents a UTF-8 character,
289 and returns the value of the character.
290
291 Argument:
292 utf8bytes a pointer to the byte vector
293 vptr a pointer to an int to receive the value
294
295 Returns: > 0 => the number of bytes consumed
296 -6 to 0 => malformed UTF-8 character at offset = (-return)
297 */
298
299 #if !defined NOUTF8
300
301 static int
302 utf82ord(unsigned char *utf8bytes, int *vptr)
303 {
304 int c = *utf8bytes++;
305 int d = c;
306 int i, j, s;
307
308 for (i = -1; i < 6; i++) /* i is number of additional bytes */
309 {
310 if ((d & 0x80) == 0) break;
311 d <<= 1;
312 }
313
314 if (i == -1) { *vptr = c; return 1; } /* ascii character */
315 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316
317 /* i now has a value in the range 1-5 */
318
319 s = 6*i;
320 d = (c & utf8_table3[i]) << s;
321
322 for (j = 0; j < i; j++)
323 {
324 c = *utf8bytes++;
325 if ((c & 0xc0) != 0x80) return -(j+1);
326 s -= 6;
327 d |= (c & 0x3f) << s;
328 }
329
330 /* Check that encoding was the correct unique one */
331
332 for (j = 0; j < utf8_table1_size; j++)
333 if (d <= utf8_table1[j]) break;
334 if (j != i) return -(i+1);
335
336 /* Valid value */
337
338 *vptr = d;
339 return i+1;
340 }
341
342 #endif
343
344
345
346 /*************************************************
347 * Convert character value to UTF-8 *
348 *************************************************/
349
350 /* This function takes an integer value in the range 0 - 0x7fffffff
351 and encodes it as a UTF-8 character in 0 to 6 bytes.
352
353 Arguments:
354 cvalue the character value
355 utf8bytes pointer to buffer for result - at least 6 bytes long
356
357 Returns: number of characters placed in the buffer
358 */
359
360 #if !defined NOUTF8
361
362 static int
363 ord2utf8(int cvalue, uschar *utf8bytes)
364 {
365 register int i, j;
366 for (i = 0; i < utf8_table1_size; i++)
367 if (cvalue <= utf8_table1[i]) break;
368 utf8bytes += i;
369 for (j = i; j > 0; j--)
370 {
371 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 cvalue >>= 6;
373 }
374 *utf8bytes = utf8_table2[i] | cvalue;
375 return i + 1;
376 }
377
378 #endif
379
380
381
382 /*************************************************
383 * Print character string *
384 *************************************************/
385
386 /* Character string printing function. Must handle UTF-8 strings in utf8
387 mode. Yields number of characters printed. If handed a NULL file, just counts
388 chars without printing. */
389
390 static int pchars(unsigned char *p, int length, FILE *f)
391 {
392 int c = 0;
393 int yield = 0;
394
395 while (length-- > 0)
396 {
397 #if !defined NOUTF8
398 if (use_utf8)
399 {
400 int rc = utf82ord(p, &c);
401
402 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403 {
404 length -= rc - 1;
405 p += rc;
406 if (PRINTHEX(c))
407 {
408 if (f != NULL) fprintf(f, "%c", c);
409 yield++;
410 }
411 else
412 {
413 int n = 4;
414 if (f != NULL) fprintf(f, "\\x{%02x}", c);
415 yield += (n <= 0x000000ff)? 2 :
416 (n <= 0x00000fff)? 3 :
417 (n <= 0x0000ffff)? 4 :
418 (n <= 0x000fffff)? 5 : 6;
419 }
420 continue;
421 }
422 }
423 #endif
424
425 /* Not UTF-8, or malformed UTF-8 */
426
427 c = *p++;
428 if (PRINTHEX(c))
429 {
430 if (f != NULL) fprintf(f, "%c", c);
431 yield++;
432 }
433 else
434 {
435 if (f != NULL) fprintf(f, "\\x%02x", c);
436 yield += 4;
437 }
438 }
439
440 return yield;
441 }
442
443
444
445 /*************************************************
446 * Callout function *
447 *************************************************/
448
449 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450 the match. Yield zero unless more callouts than the fail count, or the callout
451 data is not zero. */
452
453 static int callout(pcre_callout_block *cb)
454 {
455 FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 int i, pre_start, post_start, subject_length;
457
458 if (callout_extra)
459 {
460 fprintf(f, "Callout %d: last capture = %d\n",
461 cb->callout_number, cb->capture_last);
462
463 for (i = 0; i < cb->capture_top * 2; i += 2)
464 {
465 if (cb->offset_vector[i] < 0)
466 fprintf(f, "%2d: <unset>\n", i/2);
467 else
468 {
469 fprintf(f, "%2d: ", i/2);
470 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471 cb->offset_vector[i+1] - cb->offset_vector[i], f);
472 fprintf(f, "\n");
473 }
474 }
475 }
476
477 /* Re-print the subject in canonical form, the first time or if giving full
478 datails. On subsequent calls in the same match, we use pchars just to find the
479 printed lengths of the substrings. */
480
481 if (f != NULL) fprintf(f, "--->");
482
483 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485 cb->current_position - cb->start_match, f);
486
487 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488
489 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490 cb->subject_length - cb->current_position, f);
491
492 if (f != NULL) fprintf(f, "\n");
493
494 /* Always print appropriate indicators, with callout number if not already
495 shown. For automatic callouts, show the pattern offset. */
496
497 if (cb->callout_number == 255)
498 {
499 fprintf(outfile, "%+3d ", cb->pattern_position);
500 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501 }
502 else
503 {
504 if (callout_extra) fprintf(outfile, " ");
505 else fprintf(outfile, "%3d ", cb->callout_number);
506 }
507
508 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510
511 if (post_start > 0)
512 {
513 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514 fprintf(outfile, "^");
515 }
516
517 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518 fprintf(outfile, " ");
519
520 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521 pbuffer + cb->pattern_position);
522
523 fprintf(outfile, "\n");
524 first_callout = 0;
525
526 if (cb->callout_data != NULL)
527 {
528 int callout_data = *((int *)(cb->callout_data));
529 if (callout_data != 0)
530 {
531 fprintf(outfile, "Callout data = %d\n", callout_data);
532 return callout_data;
533 }
534 }
535
536 return (cb->callout_number != callout_fail_id)? 0 :
537 (++callout_count >= callout_fail_count)? 1 : 0;
538 }
539
540
541 /*************************************************
542 * Local malloc functions *
543 *************************************************/
544
545 /* Alternative malloc function, to test functionality and show the size of the
546 compiled re. */
547
548 static void *new_malloc(size_t size)
549 {
550 void *block = malloc(size);
551 gotten_store = size;
552 if (show_malloc)
553 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 return block;
555 }
556
557 static void new_free(void *block)
558 {
559 if (show_malloc)
560 fprintf(outfile, "free %p\n", block);
561 free(block);
562 }
563
564
565 /* For recursion malloc/free, to test stacking calls */
566
567 static void *stack_malloc(size_t size)
568 {
569 void *block = malloc(size);
570 if (show_malloc)
571 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 return block;
573 }
574
575 static void stack_free(void *block)
576 {
577 if (show_malloc)
578 fprintf(outfile, "stack_free %p\n", block);
579 free(block);
580 }
581
582
583 /*************************************************
584 * Call pcre_fullinfo() *
585 *************************************************/
586
587 /* Get one piece of information from the pcre_fullinfo() function */
588
589 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590 {
591 int rc;
592 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594 }
595
596
597
598 /*************************************************
599 * Byte flipping function *
600 *************************************************/
601
602 static unsigned long int
603 byteflip(unsigned long int value, int n)
604 {
605 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606 return ((value & 0x000000ff) << 24) |
607 ((value & 0x0000ff00) << 8) |
608 ((value & 0x00ff0000) >> 8) |
609 ((value & 0xff000000) >> 24);
610 }
611
612
613
614
615 /*************************************************
616 * Check match or recursion limit *
617 *************************************************/
618
619 static int
620 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621 int start_offset, int options, int *use_offsets, int use_size_offsets,
622 int flag, unsigned long int *limit, int errnumber, const char *msg)
623 {
624 int count;
625 int min = 0;
626 int mid = 64;
627 int max = -1;
628
629 extra->flags |= flag;
630
631 for (;;)
632 {
633 *limit = mid;
634
635 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636 use_offsets, use_size_offsets);
637
638 if (count == errnumber)
639 {
640 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641 min = mid;
642 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643 }
644
645 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646 count == PCRE_ERROR_PARTIAL)
647 {
648 if (mid == min + 1)
649 {
650 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651 break;
652 }
653 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654 max = mid;
655 mid = (min + mid)/2;
656 }
657 else break; /* Some other error */
658 }
659
660 extra->flags &= ~flag;
661 return count;
662 }
663
664
665
666 /*************************************************
667 * Check newline indicator *
668 *************************************************/
669
670 /* This is used both at compile and run-time to check for <xxx> escapes, where
671 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
672 no match.
673
674 Arguments:
675 p points after the leading '<'
676 f file for error message
677
678 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
679 */
680
681 static int
682 check_newline(uschar *p, FILE *f)
683 {
684 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
685 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
686 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
687 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
688 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
689 fprintf(f, "Unknown newline type at: <%s\n", p);
690 return 0;
691 }
692
693
694
695 /*************************************************
696 * Usage function *
697 *************************************************/
698
699 static void
700 usage(void)
701 {
702 printf("Usage: pcretest [options] [<input> [<output>]]\n");
703 printf(" -b show compiled code (bytecode)\n");
704 printf(" -C show PCRE compile-time options and exit\n");
705 printf(" -d debug: show compiled code and information (-b and -i)\n");
706 #if !defined NODFA
707 printf(" -dfa force DFA matching for all subjects\n");
708 #endif
709 printf(" -help show usage information\n");
710 printf(" -i show information about compiled patterns\n"
711 " -m output memory used information\n"
712 " -o <n> set size of offsets vector to <n>\n");
713 #if !defined NOPOSIX
714 printf(" -p use POSIX interface\n");
715 #endif
716 printf(" -q quiet: do not output PCRE version number at start\n");
717 printf(" -S <n> set stack size to <n> megabytes\n");
718 printf(" -s output store (memory) used information\n"
719 " -t time compilation and execution\n");
720 printf(" -t <n> time compilation and execution, repeating <n> times\n");
721 printf(" -tm time execution (matching) only\n");
722 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
723 }
724
725
726
727 /*************************************************
728 * Main Program *
729 *************************************************/
730
731 /* Read lines from named file or stdin and write to named file or stdout; lines
732 consist of a regular expression, in delimiters and optionally followed by
733 options, followed by a set of test data, terminated by an empty line. */
734
735 int main(int argc, char **argv)
736 {
737 FILE *infile = stdin;
738 int options = 0;
739 int study_options = 0;
740 int op = 1;
741 int timeit = 0;
742 int timeitm = 0;
743 int showinfo = 0;
744 int showstore = 0;
745 int quiet = 0;
746 int size_offsets = 45;
747 int size_offsets_max;
748 int *offsets = NULL;
749 #if !defined NOPOSIX
750 int posix = 0;
751 #endif
752 int debug = 0;
753 int done = 0;
754 int all_use_dfa = 0;
755 int yield = 0;
756 int stack_size;
757
758 /* These vectors store, end-to-end, a list of captured substring names. Assume
759 that 1024 is plenty long enough for the few names we'll be testing. */
760
761 uschar copynames[1024];
762 uschar getnames[1024];
763
764 uschar *copynamesptr;
765 uschar *getnamesptr;
766
767 /* Get buffers from malloc() so that Electric Fence will check their misuse
768 when I am debugging. They grow automatically when very long lines are read. */
769
770 buffer = (unsigned char *)malloc(buffer_size);
771 dbuffer = (unsigned char *)malloc(buffer_size);
772 pbuffer = (unsigned char *)malloc(buffer_size);
773
774 /* The outfile variable is static so that new_malloc can use it. */
775
776 outfile = stdout;
777
778 /* The following _setmode() stuff is some Windows magic that tells its runtime
779 library to translate CRLF into a single LF character. At least, that's what
780 I've been told: never having used Windows I take this all on trust. Originally
781 it set 0x8000, but then I was advised that _O_BINARY was better. */
782
783 #if defined(_WIN32) || defined(WIN32)
784 _setmode( _fileno( stdout ), _O_BINARY );
785 #endif
786
787 /* Scan options */
788
789 while (argc > 1 && argv[op][0] == '-')
790 {
791 unsigned char *endptr;
792
793 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
794 showstore = 1;
795 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
796 else if (strcmp(argv[op], "-b") == 0) debug = 1;
797 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
798 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
799 #if !defined NODFA
800 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
801 #endif
802 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
803 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
804 *endptr == 0))
805 {
806 op++;
807 argc--;
808 }
809 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
810 {
811 int both = argv[op][2] == 0;
812 int temp;
813 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
814 *endptr == 0))
815 {
816 timeitm = temp;
817 op++;
818 argc--;
819 }
820 else timeitm = LOOPREPEAT;
821 if (both) timeit = timeitm;
822 }
823 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
824 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
825 *endptr == 0))
826 {
827 #if defined(_WIN32) || defined(WIN32)
828 printf("PCRE: -S not supported on this OS\n");
829 exit(1);
830 #else
831 int rc;
832 struct rlimit rlim;
833 getrlimit(RLIMIT_STACK, &rlim);
834 rlim.rlim_cur = stack_size * 1024 * 1024;
835 rc = setrlimit(RLIMIT_STACK, &rlim);
836 if (rc != 0)
837 {
838 printf("PCRE: setrlimit() failed with error %d\n", rc);
839 exit(1);
840 }
841 op++;
842 argc--;
843 #endif
844 }
845 #if !defined NOPOSIX
846 else if (strcmp(argv[op], "-p") == 0) posix = 1;
847 #endif
848 else if (strcmp(argv[op], "-C") == 0)
849 {
850 int rc;
851 printf("PCRE version %s\n", pcre_version());
852 printf("Compiled with\n");
853 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
854 printf(" %sUTF-8 support\n", rc? "" : "No ");
855 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
856 printf(" %sUnicode properties support\n", rc? "" : "No ");
857 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
858 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
859 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
860 (rc == -2)? "ANYCRLF" :
861 (rc == -1)? "ANY" : "???");
862 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
863 printf(" Internal link size = %d\n", rc);
864 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
865 printf(" POSIX malloc threshold = %d\n", rc);
866 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
867 printf(" Default match limit = %d\n", rc);
868 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
869 printf(" Default recursion depth limit = %d\n", rc);
870 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
871 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
872 goto EXIT;
873 }
874 else if (strcmp(argv[op], "-help") == 0 ||
875 strcmp(argv[op], "--help") == 0)
876 {
877 usage();
878 goto EXIT;
879 }
880 else
881 {
882 printf("** Unknown or malformed option %s\n", argv[op]);
883 usage();
884 yield = 1;
885 goto EXIT;
886 }
887 op++;
888 argc--;
889 }
890
891 /* Get the store for the offsets vector, and remember what it was */
892
893 size_offsets_max = size_offsets;
894 offsets = (int *)malloc(size_offsets_max * sizeof(int));
895 if (offsets == NULL)
896 {
897 printf("** Failed to get %d bytes of memory for offsets vector\n",
898 (int)(size_offsets_max * sizeof(int)));
899 yield = 1;
900 goto EXIT;
901 }
902
903 /* Sort out the input and output files */
904
905 if (argc > 1)
906 {
907 infile = fopen(argv[op], INPUT_MODE);
908 if (infile == NULL)
909 {
910 printf("** Failed to open %s\n", argv[op]);
911 yield = 1;
912 goto EXIT;
913 }
914 }
915
916 if (argc > 2)
917 {
918 outfile = fopen(argv[op+1], OUTPUT_MODE);
919 if (outfile == NULL)
920 {
921 printf("** Failed to open %s\n", argv[op+1]);
922 yield = 1;
923 goto EXIT;
924 }
925 }
926
927 /* Set alternative malloc function */
928
929 pcre_malloc = new_malloc;
930 pcre_free = new_free;
931 pcre_stack_malloc = stack_malloc;
932 pcre_stack_free = stack_free;
933
934 /* Heading line unless quiet, then prompt for first regex if stdin */
935
936 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
937
938 /* Main loop */
939
940 while (!done)
941 {
942 pcre *re = NULL;
943 pcre_extra *extra = NULL;
944
945 #if !defined NOPOSIX /* There are still compilers that require no indent */
946 regex_t preg;
947 int do_posix = 0;
948 #endif
949
950 const char *error;
951 unsigned char *p, *pp, *ppp;
952 unsigned char *to_file = NULL;
953 const unsigned char *tables = NULL;
954 unsigned long int true_size, true_study_size = 0;
955 size_t size, regex_gotten_store;
956 int do_study = 0;
957 int do_debug = debug;
958 int do_G = 0;
959 int do_g = 0;
960 int do_showinfo = showinfo;
961 int do_showrest = 0;
962 int do_flip = 0;
963 int erroroffset, len, delimiter, poffset;
964
965 use_utf8 = 0;
966 debug_lengths = 1;
967
968 if (infile == stdin) printf(" re> ");
969 if (extend_inputline(infile, buffer) == NULL) break;
970 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
971 fflush(outfile);
972
973 p = buffer;
974 while (isspace(*p)) p++;
975 if (*p == 0) continue;
976
977 /* See if the pattern is to be loaded pre-compiled from a file. */
978
979 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
980 {
981 unsigned long int magic, get_options;
982 uschar sbuf[8];
983 FILE *f;
984
985 p++;
986 pp = p + (int)strlen((char *)p);
987 while (isspace(pp[-1])) pp--;
988 *pp = 0;
989
990 f = fopen((char *)p, "rb");
991 if (f == NULL)
992 {
993 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
994 continue;
995 }
996
997 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
998
999 true_size =
1000 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1001 true_study_size =
1002 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1003
1004 re = (real_pcre *)new_malloc(true_size);
1005 regex_gotten_store = gotten_store;
1006
1007 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1008
1009 magic = ((real_pcre *)re)->magic_number;
1010 if (magic != MAGIC_NUMBER)
1011 {
1012 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1013 {
1014 do_flip = 1;
1015 }
1016 else
1017 {
1018 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1019 fclose(f);
1020 continue;
1021 }
1022 }
1023
1024 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1025 do_flip? " (byte-inverted)" : "", p);
1026
1027 /* Need to know if UTF-8 for printing data strings */
1028
1029 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1030 use_utf8 = (get_options & PCRE_UTF8) != 0;
1031
1032 /* Now see if there is any following study data */
1033
1034 if (true_study_size != 0)
1035 {
1036 pcre_study_data *psd;
1037
1038 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1039 extra->flags = PCRE_EXTRA_STUDY_DATA;
1040
1041 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1042 extra->study_data = psd;
1043
1044 if (fread(psd, 1, true_study_size, f) != true_study_size)
1045 {
1046 FAIL_READ:
1047 fprintf(outfile, "Failed to read data from %s\n", p);
1048 if (extra != NULL) new_free(extra);
1049 if (re != NULL) new_free(re);
1050 fclose(f);
1051 continue;
1052 }
1053 fprintf(outfile, "Study data loaded from %s\n", p);
1054 do_study = 1; /* To get the data output if requested */
1055 }
1056 else fprintf(outfile, "No study data\n");
1057
1058 fclose(f);
1059 goto SHOW_INFO;
1060 }
1061
1062 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1063 the pattern; if is isn't complete, read more. */
1064
1065 delimiter = *p++;
1066
1067 if (isalnum(delimiter) || delimiter == '\\')
1068 {
1069 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1070 goto SKIP_DATA;
1071 }
1072
1073 pp = p;
1074 poffset = p - buffer;
1075
1076 for(;;)
1077 {
1078 while (*pp != 0)
1079 {
1080 if (*pp == '\\' && pp[1] != 0) pp++;
1081 else if (*pp == delimiter) break;
1082 pp++;
1083 }
1084 if (*pp != 0) break;
1085 if (infile == stdin) printf(" > ");
1086 if ((pp = extend_inputline(infile, pp)) == NULL)
1087 {
1088 fprintf(outfile, "** Unexpected EOF\n");
1089 done = 1;
1090 goto CONTINUE;
1091 }
1092 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1093 }
1094
1095 /* The buffer may have moved while being extended; reset the start of data
1096 pointer to the correct relative point in the buffer. */
1097
1098 p = buffer + poffset;
1099
1100 /* If the first character after the delimiter is backslash, make
1101 the pattern end with backslash. This is purely to provide a way
1102 of testing for the error message when a pattern ends with backslash. */
1103
1104 if (pp[1] == '\\') *pp++ = '\\';
1105
1106 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1107 for callouts. */
1108
1109 *pp++ = 0;
1110 strcpy((char *)pbuffer, (char *)p);
1111
1112 /* Look for options after final delimiter */
1113
1114 options = 0;
1115 study_options = 0;
1116 log_store = showstore; /* default from command line */
1117
1118 while (*pp != 0)
1119 {
1120 switch (*pp++)
1121 {
1122 case 'f': options |= PCRE_FIRSTLINE; break;
1123 case 'g': do_g = 1; break;
1124 case 'i': options |= PCRE_CASELESS; break;
1125 case 'm': options |= PCRE_MULTILINE; break;
1126 case 's': options |= PCRE_DOTALL; break;
1127 case 'x': options |= PCRE_EXTENDED; break;
1128
1129 case '+': do_showrest = 1; break;
1130 case 'A': options |= PCRE_ANCHORED; break;
1131 case 'B': do_debug = 1; break;
1132 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1133 case 'D': do_debug = do_showinfo = 1; break;
1134 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1135 case 'F': do_flip = 1; break;
1136 case 'G': do_G = 1; break;
1137 case 'I': do_showinfo = 1; break;
1138 case 'J': options |= PCRE_DUPNAMES; break;
1139 case 'M': log_store = 1; break;
1140 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1141
1142 #if !defined NOPOSIX
1143 case 'P': do_posix = 1; break;
1144 #endif
1145
1146 case 'S': do_study = 1; break;
1147 case 'U': options |= PCRE_UNGREEDY; break;
1148 case 'X': options |= PCRE_EXTRA; break;
1149 case 'Z': debug_lengths = 0; break;
1150 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1151 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1152
1153 case 'L':
1154 ppp = pp;
1155 /* The '\r' test here is so that it works on Windows. */
1156 /* The '0' test is just in case this is an unterminated line. */
1157 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1158 *ppp = 0;
1159 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1160 {
1161 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1162 goto SKIP_DATA;
1163 }
1164 locale_set = 1;
1165 tables = pcre_maketables();
1166 pp = ppp;
1167 break;
1168
1169 case '>':
1170 to_file = pp;
1171 while (*pp != 0) pp++;
1172 while (isspace(pp[-1])) pp--;
1173 *pp = 0;
1174 break;
1175
1176 case '<':
1177 {
1178 int x = check_newline(pp, outfile);
1179 if (x == 0) goto SKIP_DATA;
1180 options |= x;
1181 while (*pp++ != '>');
1182 }
1183 break;
1184
1185 case '\r': /* So that it works in Windows */
1186 case '\n':
1187 case ' ':
1188 break;
1189
1190 default:
1191 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1192 goto SKIP_DATA;
1193 }
1194 }
1195
1196 /* Handle compiling via the POSIX interface, which doesn't support the
1197 timing, showing, or debugging options, nor the ability to pass over
1198 local character tables. */
1199
1200 #if !defined NOPOSIX
1201 if (posix || do_posix)
1202 {
1203 int rc;
1204 int cflags = 0;
1205
1206 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1207 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1208 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1209 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1210 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1211
1212 rc = regcomp(&preg, (char *)p, cflags);
1213
1214 /* Compilation failed; go back for another re, skipping to blank line
1215 if non-interactive. */
1216
1217 if (rc != 0)
1218 {
1219 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1220 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1221 goto SKIP_DATA;
1222 }
1223 }
1224
1225 /* Handle compiling via the native interface */
1226
1227 else
1228 #endif /* !defined NOPOSIX */
1229
1230 {
1231 if (timeit > 0)
1232 {
1233 register int i;
1234 clock_t time_taken;
1235 clock_t start_time = clock();
1236 for (i = 0; i < timeit; i++)
1237 {
1238 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1239 if (re != NULL) free(re);
1240 }
1241 time_taken = clock() - start_time;
1242 fprintf(outfile, "Compile time %.4f milliseconds\n",
1243 (((double)time_taken * 1000.0) / (double)timeit) /
1244 (double)CLOCKS_PER_SEC);
1245 }
1246
1247 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1248
1249 /* Compilation failed; go back for another re, skipping to blank line
1250 if non-interactive. */
1251
1252 if (re == NULL)
1253 {
1254 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1255 SKIP_DATA:
1256 if (infile != stdin)
1257 {
1258 for (;;)
1259 {
1260 if (extend_inputline(infile, buffer) == NULL)
1261 {
1262 done = 1;
1263 goto CONTINUE;
1264 }
1265 len = (int)strlen((char *)buffer);
1266 while (len > 0 && isspace(buffer[len-1])) len--;
1267 if (len == 0) break;
1268 }
1269 fprintf(outfile, "\n");
1270 }
1271 goto CONTINUE;
1272 }
1273
1274 /* Compilation succeeded; print data if required. There are now two
1275 info-returning functions. The old one has a limited interface and
1276 returns only limited data. Check that it agrees with the newer one. */
1277
1278 if (log_store)
1279 fprintf(outfile, "Memory allocation (code space): %d\n",
1280 (int)(gotten_store -
1281 sizeof(real_pcre) -
1282 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1283
1284 /* Extract the size for possible writing before possibly flipping it,
1285 and remember the store that was got. */
1286
1287 true_size = ((real_pcre *)re)->size;
1288 regex_gotten_store = gotten_store;
1289
1290 /* If /S was present, study the regexp to generate additional info to
1291 help with the matching. */
1292
1293 if (do_study)
1294 {
1295 if (timeit > 0)
1296 {
1297 register int i;
1298 clock_t time_taken;
1299 clock_t start_time = clock();
1300 for (i = 0; i < timeit; i++)
1301 extra = pcre_study(re, study_options, &error);
1302 time_taken = clock() - start_time;
1303 if (extra != NULL) free(extra);
1304 fprintf(outfile, " Study time %.4f milliseconds\n",
1305 (((double)time_taken * 1000.0) / (double)timeit) /
1306 (double)CLOCKS_PER_SEC);
1307 }
1308 extra = pcre_study(re, study_options, &error);
1309 if (error != NULL)
1310 fprintf(outfile, "Failed to study: %s\n", error);
1311 else if (extra != NULL)
1312 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1313 }
1314
1315 /* If the 'F' option was present, we flip the bytes of all the integer
1316 fields in the regex data block and the study block. This is to make it
1317 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1318 compiled on a different architecture. */
1319
1320 if (do_flip)
1321 {
1322 real_pcre *rre = (real_pcre *)re;
1323 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1324 rre->size = byteflip(rre->size, sizeof(rre->size));
1325 rre->options = byteflip(rre->options, sizeof(rre->options));
1326 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1327 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1328 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1329 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1330 rre->name_table_offset = byteflip(rre->name_table_offset,
1331 sizeof(rre->name_table_offset));
1332 rre->name_entry_size = byteflip(rre->name_entry_size,
1333 sizeof(rre->name_entry_size));
1334 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1335
1336 if (extra != NULL)
1337 {
1338 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1339 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1340 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1341 }
1342 }
1343
1344 /* Extract information from the compiled data if required */
1345
1346 SHOW_INFO:
1347
1348 if (do_debug)
1349 {
1350 fprintf(outfile, "------------------------------------------------------------------\n");
1351 pcre_printint(re, outfile, debug_lengths);
1352 }
1353
1354 if (do_showinfo)
1355 {
1356 unsigned long int get_options, all_options;
1357 #if !defined NOINFOCHECK
1358 int old_first_char, old_options, old_count;
1359 #endif
1360 int count, backrefmax, first_char, need_char, okpartial, jchanged;
1361 int nameentrysize, namecount;
1362 const uschar *nametable;
1363
1364 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1365 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1366 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1367 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1368 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1369 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1370 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1371 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1372 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1373 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1374 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1375
1376 #if !defined NOINFOCHECK
1377 old_count = pcre_info(re, &old_options, &old_first_char);
1378 if (count < 0) fprintf(outfile,
1379 "Error %d from pcre_info()\n", count);
1380 else
1381 {
1382 if (old_count != count) fprintf(outfile,
1383 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1384 old_count);
1385
1386 if (old_first_char != first_char) fprintf(outfile,
1387 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1388 first_char, old_first_char);
1389
1390 if (old_options != (int)get_options) fprintf(outfile,
1391 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1392 get_options, old_options);
1393 }
1394 #endif
1395
1396 if (size != regex_gotten_store) fprintf(outfile,
1397 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1398 (int)size, (int)regex_gotten_store);
1399
1400 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1401 if (backrefmax > 0)
1402 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1403
1404 if (namecount > 0)
1405 {
1406 fprintf(outfile, "Named capturing subpatterns:\n");
1407 while (namecount-- > 0)
1408 {
1409 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1410 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1411 GET2(nametable, 0));
1412 nametable += nameentrysize;
1413 }
1414 }
1415
1416 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1417
1418 all_options = ((real_pcre *)re)->options;
1419 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1420
1421 if (get_options == 0) fprintf(outfile, "No options\n");
1422 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1423 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1424 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1425 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1426 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1427 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1428 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1429 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1430 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1431 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1432 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1433 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1434 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1435 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1436
1437 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1438
1439 switch (get_options & PCRE_NEWLINE_BITS)
1440 {
1441 case PCRE_NEWLINE_CR:
1442 fprintf(outfile, "Forced newline sequence: CR\n");
1443 break;
1444
1445 case PCRE_NEWLINE_LF:
1446 fprintf(outfile, "Forced newline sequence: LF\n");
1447 break;
1448
1449 case PCRE_NEWLINE_CRLF:
1450 fprintf(outfile, "Forced newline sequence: CRLF\n");
1451 break;
1452
1453 case PCRE_NEWLINE_ANYCRLF:
1454 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1455 break;
1456
1457 case PCRE_NEWLINE_ANY:
1458 fprintf(outfile, "Forced newline sequence: ANY\n");
1459 break;
1460
1461 default:
1462 break;
1463 }
1464
1465 if (first_char == -1)
1466 {
1467 fprintf(outfile, "First char at start or follows newline\n");
1468 }
1469 else if (first_char < 0)
1470 {
1471 fprintf(outfile, "No first char\n");
1472 }
1473 else
1474 {
1475 int ch = first_char & 255;
1476 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1477 "" : " (caseless)";
1478 if (PRINTHEX(ch))
1479 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1480 else
1481 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1482 }
1483
1484 if (need_char < 0)
1485 {
1486 fprintf(outfile, "No need char\n");
1487 }
1488 else
1489 {
1490 int ch = need_char & 255;
1491 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1492 "" : " (caseless)";
1493 if (PRINTHEX(ch))
1494 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1495 else
1496 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1497 }
1498
1499 /* Don't output study size; at present it is in any case a fixed
1500 value, but it varies, depending on the computer architecture, and
1501 so messes up the test suite. (And with the /F option, it might be
1502 flipped.) */
1503
1504 if (do_study)
1505 {
1506 if (extra == NULL)
1507 fprintf(outfile, "Study returned NULL\n");
1508 else
1509 {
1510 uschar *start_bits = NULL;
1511 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1512
1513 if (start_bits == NULL)
1514 fprintf(outfile, "No starting byte set\n");
1515 else
1516 {
1517 int i;
1518 int c = 24;
1519 fprintf(outfile, "Starting byte set: ");
1520 for (i = 0; i < 256; i++)
1521 {
1522 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1523 {
1524 if (c > 75)
1525 {
1526 fprintf(outfile, "\n ");
1527 c = 2;
1528 }
1529 if (PRINTHEX(i) && i != ' ')
1530 {
1531 fprintf(outfile, "%c ", i);
1532 c += 2;
1533 }
1534 else
1535 {
1536 fprintf(outfile, "\\x%02x ", i);
1537 c += 5;
1538 }
1539 }
1540 }
1541 fprintf(outfile, "\n");
1542 }
1543 }
1544 }
1545 }
1546
1547 /* If the '>' option was present, we write out the regex to a file, and
1548 that is all. The first 8 bytes of the file are the regex length and then
1549 the study length, in big-endian order. */
1550
1551 if (to_file != NULL)
1552 {
1553 FILE *f = fopen((char *)to_file, "wb");
1554 if (f == NULL)
1555 {
1556 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1557 }
1558 else
1559 {
1560 uschar sbuf[8];
1561 sbuf[0] = (true_size >> 24) & 255;
1562 sbuf[1] = (true_size >> 16) & 255;
1563 sbuf[2] = (true_size >> 8) & 255;
1564 sbuf[3] = (true_size) & 255;
1565
1566 sbuf[4] = (true_study_size >> 24) & 255;
1567 sbuf[5] = (true_study_size >> 16) & 255;
1568 sbuf[6] = (true_study_size >> 8) & 255;
1569 sbuf[7] = (true_study_size) & 255;
1570
1571 if (fwrite(sbuf, 1, 8, f) < 8 ||
1572 fwrite(re, 1, true_size, f) < true_size)
1573 {
1574 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1575 }
1576 else
1577 {
1578 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1579 if (extra != NULL)
1580 {
1581 if (fwrite(extra->study_data, 1, true_study_size, f) <
1582 true_study_size)
1583 {
1584 fprintf(outfile, "Write error on %s: %s\n", to_file,
1585 strerror(errno));
1586 }
1587 else fprintf(outfile, "Study data written to %s\n", to_file);
1588
1589 }
1590 }
1591 fclose(f);
1592 }
1593
1594 new_free(re);
1595 if (extra != NULL) new_free(extra);
1596 if (tables != NULL) new_free((void *)tables);
1597 continue; /* With next regex */
1598 }
1599 } /* End of non-POSIX compile */
1600
1601 /* Read data lines and test them */
1602
1603 for (;;)
1604 {
1605 uschar *q;
1606 uschar *bptr;
1607 int *use_offsets = offsets;
1608 int use_size_offsets = size_offsets;
1609 int callout_data = 0;
1610 int callout_data_set = 0;
1611 int count, c;
1612 int copystrings = 0;
1613 int find_match_limit = 0;
1614 int getstrings = 0;
1615 int getlist = 0;
1616 int gmatched = 0;
1617 int start_offset = 0;
1618 int g_notempty = 0;
1619 int use_dfa = 0;
1620
1621 options = 0;
1622
1623 *copynames = 0;
1624 *getnames = 0;
1625
1626 copynamesptr = copynames;
1627 getnamesptr = getnames;
1628
1629 pcre_callout = callout;
1630 first_callout = 1;
1631 callout_extra = 0;
1632 callout_count = 0;
1633 callout_fail_count = 999999;
1634 callout_fail_id = -1;
1635 show_malloc = 0;
1636
1637 if (extra != NULL) extra->flags &=
1638 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1639
1640 len = 0;
1641 for (;;)
1642 {
1643 if (infile == stdin) printf("data> ");
1644 if (extend_inputline(infile, buffer + len) == NULL)
1645 {
1646 if (len > 0) break;
1647 done = 1;
1648 goto CONTINUE;
1649 }
1650 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1651 len = (int)strlen((char *)buffer);
1652 if (buffer[len-1] == '\n') break;
1653 }
1654
1655 while (len > 0 && isspace(buffer[len-1])) len--;
1656 buffer[len] = 0;
1657 if (len == 0) break;
1658
1659 p = buffer;
1660 while (isspace(*p)) p++;
1661
1662 bptr = q = dbuffer;
1663 while ((c = *p++) != 0)
1664 {
1665 int i = 0;
1666 int n = 0;
1667
1668 if (c == '\\') switch ((c = *p++))
1669 {
1670 case 'a': c = 7; break;
1671 case 'b': c = '\b'; break;
1672 case 'e': c = 27; break;
1673 case 'f': c = '\f'; break;
1674 case 'n': c = '\n'; break;
1675 case 'r': c = '\r'; break;
1676 case 't': c = '\t'; break;
1677 case 'v': c = '\v'; break;
1678
1679 case '0': case '1': case '2': case '3':
1680 case '4': case '5': case '6': case '7':
1681 c -= '0';
1682 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1683 c = c * 8 + *p++ - '0';
1684
1685 #if !defined NOUTF8
1686 if (use_utf8 && c > 255)
1687 {
1688 unsigned char buff8[8];
1689 int ii, utn;
1690 utn = ord2utf8(c, buff8);
1691 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1692 c = buff8[ii]; /* Last byte */
1693 }
1694 #endif
1695 break;
1696
1697 case 'x':
1698
1699 /* Handle \x{..} specially - new Perl thing for utf8 */
1700
1701 #if !defined NOUTF8
1702 if (*p == '{')
1703 {
1704 unsigned char *pt = p;
1705 c = 0;
1706 while (isxdigit(*(++pt)))
1707 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1708 if (*pt == '}')
1709 {
1710 unsigned char buff8[8];
1711 int ii, utn;
1712 utn = ord2utf8(c, buff8);
1713 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1714 c = buff8[ii]; /* Last byte */
1715 p = pt + 1;
1716 break;
1717 }
1718 /* Not correct form; fall through */
1719 }
1720 #endif
1721
1722 /* Ordinary \x */
1723
1724 c = 0;
1725 while (i++ < 2 && isxdigit(*p))
1726 {
1727 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1728 p++;
1729 }
1730 break;
1731
1732 case 0: /* \ followed by EOF allows for an empty line */
1733 p--;
1734 continue;
1735
1736 case '>':
1737 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1738 continue;
1739
1740 case 'A': /* Option setting */
1741 options |= PCRE_ANCHORED;
1742 continue;
1743
1744 case 'B':
1745 options |= PCRE_NOTBOL;
1746 continue;
1747
1748 case 'C':
1749 if (isdigit(*p)) /* Set copy string */
1750 {
1751 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1752 copystrings |= 1 << n;
1753 }
1754 else if (isalnum(*p))
1755 {
1756 uschar *npp = copynamesptr;
1757 while (isalnum(*p)) *npp++ = *p++;
1758 *npp++ = 0;
1759 *npp = 0;
1760 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1761 if (n < 0)
1762 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1763 copynamesptr = npp;
1764 }
1765 else if (*p == '+')
1766 {
1767 callout_extra = 1;
1768 p++;
1769 }
1770 else if (*p == '-')
1771 {
1772 pcre_callout = NULL;
1773 p++;
1774 }
1775 else if (*p == '!')
1776 {
1777 callout_fail_id = 0;
1778 p++;
1779 while(isdigit(*p))
1780 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1781 callout_fail_count = 0;
1782 if (*p == '!')
1783 {
1784 p++;
1785 while(isdigit(*p))
1786 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1787 }
1788 }
1789 else if (*p == '*')
1790 {
1791 int sign = 1;
1792 callout_data = 0;
1793 if (*(++p) == '-') { sign = -1; p++; }
1794 while(isdigit(*p))
1795 callout_data = callout_data * 10 + *p++ - '0';
1796 callout_data *= sign;
1797 callout_data_set = 1;
1798 }
1799 continue;
1800
1801 #if !defined NODFA
1802 case 'D':
1803 #if !defined NOPOSIX
1804 if (posix || do_posix)
1805 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1806 else
1807 #endif
1808 use_dfa = 1;
1809 continue;
1810
1811 case 'F':
1812 options |= PCRE_DFA_SHORTEST;
1813 continue;
1814 #endif
1815
1816 case 'G':
1817 if (isdigit(*p))
1818 {
1819 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1820 getstrings |= 1 << n;
1821 }
1822 else if (isalnum(*p))
1823 {
1824 uschar *npp = getnamesptr;
1825 while (isalnum(*p)) *npp++ = *p++;
1826 *npp++ = 0;
1827 *npp = 0;
1828 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1829 if (n < 0)
1830 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1831 getnamesptr = npp;
1832 }
1833 continue;
1834
1835 case 'L':
1836 getlist = 1;
1837 continue;
1838
1839 case 'M':
1840 find_match_limit = 1;
1841 continue;
1842
1843 case 'N':
1844 options |= PCRE_NOTEMPTY;
1845 continue;
1846
1847 case 'O':
1848 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1849 if (n > size_offsets_max)
1850 {
1851 size_offsets_max = n;
1852 free(offsets);
1853 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1854 if (offsets == NULL)
1855 {
1856 printf("** Failed to get %d bytes of memory for offsets vector\n",
1857 (int)(size_offsets_max * sizeof(int)));
1858 yield = 1;
1859 goto EXIT;
1860 }
1861 }
1862 use_size_offsets = n;
1863 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1864 continue;
1865
1866 case 'P':
1867 options |= PCRE_PARTIAL;
1868 continue;
1869
1870 case 'Q':
1871 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1872 if (extra == NULL)
1873 {
1874 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1875 extra->flags = 0;
1876 }
1877 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1878 extra->match_limit_recursion = n;
1879 continue;
1880
1881 case 'q':
1882 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1883 if (extra == NULL)
1884 {
1885 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1886 extra->flags = 0;
1887 }
1888 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1889 extra->match_limit = n;
1890 continue;
1891
1892 #if !defined NODFA
1893 case 'R':
1894 options |= PCRE_DFA_RESTART;
1895 continue;
1896 #endif
1897
1898 case 'S':
1899 show_malloc = 1;
1900 continue;
1901
1902 case 'Z':
1903 options |= PCRE_NOTEOL;
1904 continue;
1905
1906 case '?':
1907 options |= PCRE_NO_UTF8_CHECK;
1908 continue;
1909
1910 case '<':
1911 {
1912 int x = check_newline(p, outfile);
1913 if (x == 0) goto NEXT_DATA;
1914 options |= x;
1915 while (*p++ != '>');
1916 }
1917 continue;
1918 }
1919 *q++ = c;
1920 }
1921 *q = 0;
1922 len = q - dbuffer;
1923
1924 if ((all_use_dfa || use_dfa) && find_match_limit)
1925 {
1926 printf("**Match limit not relevant for DFA matching: ignored\n");
1927 find_match_limit = 0;
1928 }
1929
1930 /* Handle matching via the POSIX interface, which does not
1931 support timing or playing with the match limit or callout data. */
1932
1933 #if !defined NOPOSIX
1934 if (posix || do_posix)
1935 {
1936 int rc;
1937 int eflags = 0;
1938 regmatch_t *pmatch = NULL;
1939 if (use_size_offsets > 0)
1940 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1941 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1942 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1943
1944 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1945
1946 if (rc != 0)
1947 {
1948 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1949 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1950 }
1951 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1952 != 0)
1953 {
1954 fprintf(outfile, "Matched with REG_NOSUB\n");
1955 }
1956 else
1957 {
1958 size_t i;
1959 for (i = 0; i < (size_t)use_size_offsets; i++)
1960 {
1961 if (pmatch[i].rm_so >= 0)
1962 {
1963 fprintf(outfile, "%2d: ", (int)i);
1964 (void)pchars(dbuffer + pmatch[i].rm_so,
1965 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1966 fprintf(outfile, "\n");
1967 if (i == 0 && do_showrest)
1968 {
1969 fprintf(outfile, " 0+ ");
1970 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1971 outfile);
1972 fprintf(outfile, "\n");
1973 }
1974 }
1975 }
1976 }
1977 free(pmatch);
1978 }
1979
1980 /* Handle matching via the native interface - repeats for /g and /G */
1981
1982 else
1983 #endif /* !defined NOPOSIX */
1984
1985 for (;; gmatched++) /* Loop for /g or /G */
1986 {
1987 if (timeitm > 0)
1988 {
1989 register int i;
1990 clock_t time_taken;
1991 clock_t start_time = clock();
1992
1993 #if !defined NODFA
1994 if (all_use_dfa || use_dfa)
1995 {
1996 int workspace[1000];
1997 for (i = 0; i < timeitm; i++)
1998 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1999 options | g_notempty, use_offsets, use_size_offsets, workspace,
2000 sizeof(workspace)/sizeof(int));
2001 }
2002 else
2003 #endif
2004
2005 for (i = 0; i < timeitm; i++)
2006 count = pcre_exec(re, extra, (char *)bptr, len,
2007 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2008
2009 time_taken = clock() - start_time;
2010 fprintf(outfile, "Execute time %.4f milliseconds\n",
2011 (((double)time_taken * 1000.0) / (double)timeitm) /
2012 (double)CLOCKS_PER_SEC);
2013 }
2014
2015 /* If find_match_limit is set, we want to do repeated matches with
2016 varying limits in order to find the minimum value for the match limit and
2017 for the recursion limit. */
2018
2019 if (find_match_limit)
2020 {
2021 if (extra == NULL)
2022 {
2023 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024 extra->flags = 0;
2025 }
2026
2027 (void)check_match_limit(re, extra, bptr, len, start_offset,
2028 options|g_notempty, use_offsets, use_size_offsets,
2029 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2030 PCRE_ERROR_MATCHLIMIT, "match()");
2031
2032 count = check_match_limit(re, extra, bptr, len, start_offset,
2033 options|g_notempty, use_offsets, use_size_offsets,
2034 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2035 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2036 }
2037
2038 /* If callout_data is set, use the interface with additional data */
2039
2040 else if (callout_data_set)
2041 {
2042 if (extra == NULL)
2043 {
2044 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2045 extra->flags = 0;
2046 }
2047 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2048 extra->callout_data = &callout_data;
2049 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2050 options | g_notempty, use_offsets, use_size_offsets);
2051 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2052 }
2053
2054 /* The normal case is just to do the match once, with the default
2055 value of match_limit. */
2056
2057 #if !defined NODFA
2058 else if (all_use_dfa || use_dfa)
2059 {
2060 int workspace[1000];
2061 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2062 options | g_notempty, use_offsets, use_size_offsets, workspace,
2063 sizeof(workspace)/sizeof(int));
2064 if (count == 0)
2065 {
2066 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2067 count = use_size_offsets/2;
2068 }
2069 }
2070 #endif
2071
2072 else
2073 {
2074 count = pcre_exec(re, extra, (char *)bptr, len,
2075 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2076 if (count == 0)
2077 {
2078 fprintf(outfile, "Matched, but too many substrings\n");
2079 count = use_size_offsets/3;
2080 }
2081 }
2082
2083 /* Matched */
2084
2085 if (count >= 0)
2086 {
2087 int i, maxcount;
2088
2089 #if !defined NODFA
2090 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2091 #endif
2092 maxcount = use_size_offsets/3;
2093
2094 /* This is a check against a lunatic return value. */
2095
2096 if (count > maxcount)
2097 {
2098 fprintf(outfile,
2099 "** PCRE error: returned count %d is too big for offset size %d\n",
2100 count, use_size_offsets);
2101 count = use_size_offsets/3;
2102 if (do_g || do_G)
2103 {
2104 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2105 do_g = do_G = FALSE; /* Break g/G loop */
2106 }
2107 }
2108
2109 for (i = 0; i < count * 2; i += 2)
2110 {
2111 if (use_offsets[i] < 0)
2112 fprintf(outfile, "%2d: <unset>\n", i/2);
2113 else
2114 {
2115 fprintf(outfile, "%2d: ", i/2);
2116 (void)pchars(bptr + use_offsets[i],
2117 use_offsets[i+1] - use_offsets[i], outfile);
2118 fprintf(outfile, "\n");
2119 if (i == 0)
2120 {
2121 if (do_showrest)
2122 {
2123 fprintf(outfile, " 0+ ");
2124 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2125 outfile);
2126 fprintf(outfile, "\n");
2127 }
2128 }
2129 }
2130 }
2131
2132 for (i = 0; i < 32; i++)
2133 {
2134 if ((copystrings & (1 << i)) != 0)
2135 {
2136 char copybuffer[256];
2137 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2138 i, copybuffer, sizeof(copybuffer));
2139 if (rc < 0)
2140 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2141 else
2142 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2143 }
2144 }
2145
2146 for (copynamesptr = copynames;
2147 *copynamesptr != 0;
2148 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2149 {
2150 char copybuffer[256];
2151 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2152 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2153 if (rc < 0)
2154 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2155 else
2156 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2157 }
2158
2159 for (i = 0; i < 32; i++)
2160 {
2161 if ((getstrings & (1 << i)) != 0)
2162 {
2163 const char *substring;
2164 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2165 i, &substring);
2166 if (rc < 0)
2167 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2168 else
2169 {
2170 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2171 pcre_free_substring(substring);
2172 }
2173 }
2174 }
2175
2176 for (getnamesptr = getnames;
2177 *getnamesptr != 0;
2178 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2179 {
2180 const char *substring;
2181 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2182 count, (char *)getnamesptr, &substring);
2183 if (rc < 0)
2184 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2185 else
2186 {
2187 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2188 pcre_free_substring(substring);
2189 }
2190 }
2191
2192 if (getlist)
2193 {
2194 const char **stringlist;
2195 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2196 &stringlist);
2197 if (rc < 0)
2198 fprintf(outfile, "get substring list failed %d\n", rc);
2199 else
2200 {
2201 for (i = 0; i < count; i++)
2202 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2203 if (stringlist[i] != NULL)
2204 fprintf(outfile, "string list not terminated by NULL\n");
2205 /* free((void *)stringlist); */
2206 pcre_free_substring_list(stringlist);
2207 }
2208 }
2209 }
2210
2211 /* There was a partial match */
2212
2213 else if (count == PCRE_ERROR_PARTIAL)
2214 {
2215 fprintf(outfile, "Partial match");
2216 #if !defined NODFA
2217 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2218 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2219 bptr + use_offsets[0]);
2220 #endif
2221 fprintf(outfile, "\n");
2222 break; /* Out of the /g loop */
2223 }
2224
2225 /* Failed to match. If this is a /g or /G loop and we previously set
2226 g_notempty after a null match, this is not necessarily the end. We want
2227 to advance the start offset, and continue. We won't be at the end of the
2228 string - that was checked before setting g_notempty.
2229
2230 Complication arises in the case when the newline option is "any" or
2231 "anycrlf". If the previous match was at the end of a line terminated by
2232 CRLF, an advance of one character just passes the \r, whereas we should
2233 prefer the longer newline sequence, as does the code in pcre_exec().
2234 Fudge the offset value to achieve this.
2235
2236 Otherwise, in the case of UTF-8 matching, the advance must be one
2237 character, not one byte. */
2238
2239 else
2240 {
2241 if (g_notempty != 0)
2242 {
2243 int onechar = 1;
2244 unsigned int obits = ((real_pcre *)re)->options;
2245 use_offsets[0] = start_offset;
2246 if ((obits & PCRE_NEWLINE_BITS) == 0)
2247 {
2248 int d;
2249 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2250 obits = (d == '\r')? PCRE_NEWLINE_CR :
2251 (d == '\n')? PCRE_NEWLINE_LF :
2252 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2253 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2254 (d == -1)? PCRE_NEWLINE_ANY : 0;
2255 }
2256 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2257 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2258 &&
2259 start_offset < len - 1 &&
2260 bptr[start_offset] == '\r' &&
2261 bptr[start_offset+1] == '\n')
2262 onechar++;
2263 else if (use_utf8)
2264 {
2265 while (start_offset + onechar < len)
2266 {
2267 int tb = bptr[start_offset+onechar];
2268 if (tb <= 127) break;
2269 tb &= 0xc0;
2270 if (tb != 0 && tb != 0xc0) onechar++;
2271 }
2272 }
2273 use_offsets[1] = start_offset + onechar;
2274 }
2275 else
2276 {
2277 if (count == PCRE_ERROR_NOMATCH)
2278 {
2279 if (gmatched == 0) fprintf(outfile, "No match\n");
2280 }
2281 else fprintf(outfile, "Error %d\n", count);
2282 break; /* Out of the /g loop */
2283 }
2284 }
2285
2286 /* If not /g or /G we are done */
2287
2288 if (!do_g && !do_G) break;
2289
2290 /* If we have matched an empty string, first check to see if we are at
2291 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2292 what Perl's /g options does. This turns out to be rather cunning. First
2293 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2294 same point. If this fails (picked up above) we advance to the next
2295 character. */
2296
2297 g_notempty = 0;
2298
2299 if (use_offsets[0] == use_offsets[1])
2300 {
2301 if (use_offsets[0] == len) break;
2302 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2303 }
2304
2305 /* For /g, update the start offset, leaving the rest alone */
2306
2307 if (do_g) start_offset = use_offsets[1];
2308
2309 /* For /G, update the pointer and length */
2310
2311 else
2312 {
2313 bptr += use_offsets[1];
2314 len -= use_offsets[1];
2315 }
2316 } /* End of loop for /g and /G */
2317
2318 NEXT_DATA: continue;
2319 } /* End of loop for data lines */
2320
2321 CONTINUE:
2322
2323 #if !defined NOPOSIX
2324 if (posix || do_posix) regfree(&preg);
2325 #endif
2326
2327 if (re != NULL) new_free(re);
2328 if (extra != NULL) new_free(extra);
2329 if (tables != NULL)
2330 {
2331 new_free((void *)tables);
2332 setlocale(LC_CTYPE, "C");
2333 locale_set = 0;
2334 }
2335 }
2336
2337 if (infile == stdin) fprintf(outfile, "\n");
2338
2339 EXIT:
2340
2341 if (infile != NULL && infile != stdin) fclose(infile);
2342 if (outfile != NULL && outfile != stdout) fclose(outfile);
2343
2344 free(buffer);
2345 free(dbuffer);
2346 free(pbuffer);
2347 free(offsets);
2348
2349 return yield;
2350 }
2351
2352 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12