/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (show annotations) (download)
Tue Jul 31 14:39:09 2007 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 68757 byte(s)
Daniel's patch for config.h and Windows DLL declarations (not fully working).

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_OP_lengths OP_lengths
98
99 #include "pcre_tables.c"
100
101 /* We also need the pcre_printint() function for printing out compiled
102 patterns. This function is in a separate file so that it can be included in
103 pcre_compile.c when that module is compiled with debugging enabled.
104
105 The definition of the macro PRINTABLE, which determines whether to print an
106 output character as-is or as a hex value when showing compiled patterns, is
107 contained in this file. We uses it here also, in cases when the locale has not
108 been explicitly changed, so as to get consistent output from systems that
109 differ in their output from isprint() even in the "C" locale. */
110
111 #include "pcre_printint.src"
112
113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114
115
116 /* It is possible to compile this test program without including support for
117 testing the POSIX interface, though this is not available via the standard
118 Makefile. */
119
120 #if !defined NOPOSIX
121 #include "pcreposix.h"
122 #endif
123
124 /* It is also possible, for the benefit of the version currently imported into
125 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126 interface to the DFA matcher (NODFA), and without the doublecheck of the old
127 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128 UTF8 support if PCRE is built without it. */
129
130 #ifndef SUPPORT_UTF8
131 #ifndef NOUTF8
132 #define NOUTF8
133 #endif
134 #endif
135
136
137 /* Other parameters */
138
139 #ifndef CLOCKS_PER_SEC
140 #ifdef CLK_TCK
141 #define CLOCKS_PER_SEC CLK_TCK
142 #else
143 #define CLOCKS_PER_SEC 100
144 #endif
145 #endif
146
147 /* This is the default loop count for timing. */
148
149 #define LOOPREPEAT 500000
150
151 /* Static variables */
152
153 static FILE *outfile;
154 static int log_store = 0;
155 static int callout_count;
156 static int callout_extra;
157 static int callout_fail_count;
158 static int callout_fail_id;
159 static int first_callout;
160 static int locale_set = 0;
161 static int show_malloc;
162 static int use_utf8;
163 static size_t gotten_store;
164
165 /* The buffers grow automatically if very long input lines are encountered. */
166
167 static int buffer_size = 50000;
168 static uschar *buffer = NULL;
169 static uschar *dbuffer = NULL;
170 static uschar *pbuffer = NULL;
171
172
173
174 /*************************************************
175 * Read or extend an input line *
176 *************************************************/
177
178 /* Input lines are read into buffer, but both patterns and data lines can be
179 continued over multiple input lines. In addition, if the buffer fills up, we
180 want to automatically expand it so as to be able to handle extremely large
181 lines that are needed for certain stress tests. When the input buffer is
182 expanded, the other two buffers must also be expanded likewise, and the
183 contents of pbuffer, which are a copy of the input for callouts, must be
184 preserved (for when expansion happens for a data line). This is not the most
185 optimal way of handling this, but hey, this is just a test program!
186
187 Arguments:
188 f the file to read
189 start where in buffer to start (this *must* be within buffer)
190
191 Returns: pointer to the start of new data
192 could be a copy of start, or could be moved
193 NULL if no data read and EOF reached
194 */
195
196 static uschar *
197 extend_inputline(FILE *f, uschar *start)
198 {
199 uschar *here = start;
200
201 for (;;)
202 {
203 int rlen = buffer_size - (here - buffer);
204
205 if (rlen > 1000)
206 {
207 int dlen;
208 if (fgets((char *)here, rlen, f) == NULL)
209 return (here == start)? NULL : start;
210 dlen = (int)strlen((char *)here);
211 if (dlen > 0 && here[dlen - 1] == '\n') return start;
212 here += dlen;
213 }
214
215 else
216 {
217 int new_buffer_size = 2*buffer_size;
218 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
219 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
220 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
221
222 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
223 {
224 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
225 exit(1);
226 }
227
228 memcpy(new_buffer, buffer, buffer_size);
229 memcpy(new_pbuffer, pbuffer, buffer_size);
230
231 buffer_size = new_buffer_size;
232
233 start = new_buffer + (start - buffer);
234 here = new_buffer + (here - buffer);
235
236 free(buffer);
237 free(dbuffer);
238 free(pbuffer);
239
240 buffer = new_buffer;
241 dbuffer = new_dbuffer;
242 pbuffer = new_pbuffer;
243 }
244 }
245
246 return NULL; /* Control never gets here */
247 }
248
249
250
251
252
253
254
255 /*************************************************
256 * Read number from string *
257 *************************************************/
258
259 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
260 around with conditional compilation, just do the job by hand. It is only used
261 for unpicking arguments, so just keep it simple.
262
263 Arguments:
264 str string to be converted
265 endptr where to put the end pointer
266
267 Returns: the unsigned long
268 */
269
270 static int
271 get_value(unsigned char *str, unsigned char **endptr)
272 {
273 int result = 0;
274 while(*str != 0 && isspace(*str)) str++;
275 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
276 *endptr = str;
277 return(result);
278 }
279
280
281
282
283 /*************************************************
284 * Convert UTF-8 string to value *
285 *************************************************/
286
287 /* This function takes one or more bytes that represents a UTF-8 character,
288 and returns the value of the character.
289
290 Argument:
291 utf8bytes a pointer to the byte vector
292 vptr a pointer to an int to receive the value
293
294 Returns: > 0 => the number of bytes consumed
295 -6 to 0 => malformed UTF-8 character at offset = (-return)
296 */
297
298 #if !defined NOUTF8
299
300 static int
301 utf82ord(unsigned char *utf8bytes, int *vptr)
302 {
303 int c = *utf8bytes++;
304 int d = c;
305 int i, j, s;
306
307 for (i = -1; i < 6; i++) /* i is number of additional bytes */
308 {
309 if ((d & 0x80) == 0) break;
310 d <<= 1;
311 }
312
313 if (i == -1) { *vptr = c; return 1; } /* ascii character */
314 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
315
316 /* i now has a value in the range 1-5 */
317
318 s = 6*i;
319 d = (c & utf8_table3[i]) << s;
320
321 for (j = 0; j < i; j++)
322 {
323 c = *utf8bytes++;
324 if ((c & 0xc0) != 0x80) return -(j+1);
325 s -= 6;
326 d |= (c & 0x3f) << s;
327 }
328
329 /* Check that encoding was the correct unique one */
330
331 for (j = 0; j < utf8_table1_size; j++)
332 if (d <= utf8_table1[j]) break;
333 if (j != i) return -(i+1);
334
335 /* Valid value */
336
337 *vptr = d;
338 return i+1;
339 }
340
341 #endif
342
343
344
345 /*************************************************
346 * Convert character value to UTF-8 *
347 *************************************************/
348
349 /* This function takes an integer value in the range 0 - 0x7fffffff
350 and encodes it as a UTF-8 character in 0 to 6 bytes.
351
352 Arguments:
353 cvalue the character value
354 utf8bytes pointer to buffer for result - at least 6 bytes long
355
356 Returns: number of characters placed in the buffer
357 */
358
359 #if !defined NOUTF8
360
361 static int
362 ord2utf8(int cvalue, uschar *utf8bytes)
363 {
364 register int i, j;
365 for (i = 0; i < utf8_table1_size; i++)
366 if (cvalue <= utf8_table1[i]) break;
367 utf8bytes += i;
368 for (j = i; j > 0; j--)
369 {
370 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
371 cvalue >>= 6;
372 }
373 *utf8bytes = utf8_table2[i] | cvalue;
374 return i + 1;
375 }
376
377 #endif
378
379
380
381 /*************************************************
382 * Print character string *
383 *************************************************/
384
385 /* Character string printing function. Must handle UTF-8 strings in utf8
386 mode. Yields number of characters printed. If handed a NULL file, just counts
387 chars without printing. */
388
389 static int pchars(unsigned char *p, int length, FILE *f)
390 {
391 int c = 0;
392 int yield = 0;
393
394 while (length-- > 0)
395 {
396 #if !defined NOUTF8
397 if (use_utf8)
398 {
399 int rc = utf82ord(p, &c);
400
401 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
402 {
403 length -= rc - 1;
404 p += rc;
405 if (PRINTHEX(c))
406 {
407 if (f != NULL) fprintf(f, "%c", c);
408 yield++;
409 }
410 else
411 {
412 int n = 4;
413 if (f != NULL) fprintf(f, "\\x{%02x}", c);
414 yield += (n <= 0x000000ff)? 2 :
415 (n <= 0x00000fff)? 3 :
416 (n <= 0x0000ffff)? 4 :
417 (n <= 0x000fffff)? 5 : 6;
418 }
419 continue;
420 }
421 }
422 #endif
423
424 /* Not UTF-8, or malformed UTF-8 */
425
426 c = *p++;
427 if (PRINTHEX(c))
428 {
429 if (f != NULL) fprintf(f, "%c", c);
430 yield++;
431 }
432 else
433 {
434 if (f != NULL) fprintf(f, "\\x%02x", c);
435 yield += 4;
436 }
437 }
438
439 return yield;
440 }
441
442
443
444 /*************************************************
445 * Callout function *
446 *************************************************/
447
448 /* Called from PCRE as a result of the (?C) item. We print out where we are in
449 the match. Yield zero unless more callouts than the fail count, or the callout
450 data is not zero. */
451
452 static int callout(pcre_callout_block *cb)
453 {
454 FILE *f = (first_callout | callout_extra)? outfile : NULL;
455 int i, pre_start, post_start, subject_length;
456
457 if (callout_extra)
458 {
459 fprintf(f, "Callout %d: last capture = %d\n",
460 cb->callout_number, cb->capture_last);
461
462 for (i = 0; i < cb->capture_top * 2; i += 2)
463 {
464 if (cb->offset_vector[i] < 0)
465 fprintf(f, "%2d: <unset>\n", i/2);
466 else
467 {
468 fprintf(f, "%2d: ", i/2);
469 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
470 cb->offset_vector[i+1] - cb->offset_vector[i], f);
471 fprintf(f, "\n");
472 }
473 }
474 }
475
476 /* Re-print the subject in canonical form, the first time or if giving full
477 datails. On subsequent calls in the same match, we use pchars just to find the
478 printed lengths of the substrings. */
479
480 if (f != NULL) fprintf(f, "--->");
481
482 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
483 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
484 cb->current_position - cb->start_match, f);
485
486 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
487
488 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
489 cb->subject_length - cb->current_position, f);
490
491 if (f != NULL) fprintf(f, "\n");
492
493 /* Always print appropriate indicators, with callout number if not already
494 shown. For automatic callouts, show the pattern offset. */
495
496 if (cb->callout_number == 255)
497 {
498 fprintf(outfile, "%+3d ", cb->pattern_position);
499 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
500 }
501 else
502 {
503 if (callout_extra) fprintf(outfile, " ");
504 else fprintf(outfile, "%3d ", cb->callout_number);
505 }
506
507 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
508 fprintf(outfile, "^");
509
510 if (post_start > 0)
511 {
512 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
513 fprintf(outfile, "^");
514 }
515
516 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
517 fprintf(outfile, " ");
518
519 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
520 pbuffer + cb->pattern_position);
521
522 fprintf(outfile, "\n");
523 first_callout = 0;
524
525 if (cb->callout_data != NULL)
526 {
527 int callout_data = *((int *)(cb->callout_data));
528 if (callout_data != 0)
529 {
530 fprintf(outfile, "Callout data = %d\n", callout_data);
531 return callout_data;
532 }
533 }
534
535 return (cb->callout_number != callout_fail_id)? 0 :
536 (++callout_count >= callout_fail_count)? 1 : 0;
537 }
538
539
540 /*************************************************
541 * Local malloc functions *
542 *************************************************/
543
544 /* Alternative malloc function, to test functionality and show the size of the
545 compiled re. */
546
547 static void *new_malloc(size_t size)
548 {
549 void *block = malloc(size);
550 gotten_store = size;
551 if (show_malloc)
552 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
553 return block;
554 }
555
556 static void new_free(void *block)
557 {
558 if (show_malloc)
559 fprintf(outfile, "free %p\n", block);
560 free(block);
561 }
562
563
564 /* For recursion malloc/free, to test stacking calls */
565
566 static void *stack_malloc(size_t size)
567 {
568 void *block = malloc(size);
569 if (show_malloc)
570 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
571 return block;
572 }
573
574 static void stack_free(void *block)
575 {
576 if (show_malloc)
577 fprintf(outfile, "stack_free %p\n", block);
578 free(block);
579 }
580
581
582 /*************************************************
583 * Call pcre_fullinfo() *
584 *************************************************/
585
586 /* Get one piece of information from the pcre_fullinfo() function */
587
588 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
589 {
590 int rc;
591 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
592 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
593 }
594
595
596
597 /*************************************************
598 * Byte flipping function *
599 *************************************************/
600
601 static unsigned long int
602 byteflip(unsigned long int value, int n)
603 {
604 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
605 return ((value & 0x000000ff) << 24) |
606 ((value & 0x0000ff00) << 8) |
607 ((value & 0x00ff0000) >> 8) |
608 ((value & 0xff000000) >> 24);
609 }
610
611
612
613
614 /*************************************************
615 * Check match or recursion limit *
616 *************************************************/
617
618 static int
619 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
620 int start_offset, int options, int *use_offsets, int use_size_offsets,
621 int flag, unsigned long int *limit, int errnumber, const char *msg)
622 {
623 int count;
624 int min = 0;
625 int mid = 64;
626 int max = -1;
627
628 extra->flags |= flag;
629
630 for (;;)
631 {
632 *limit = mid;
633
634 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
635 use_offsets, use_size_offsets);
636
637 if (count == errnumber)
638 {
639 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
640 min = mid;
641 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
642 }
643
644 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
645 count == PCRE_ERROR_PARTIAL)
646 {
647 if (mid == min + 1)
648 {
649 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
650 break;
651 }
652 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
653 max = mid;
654 mid = (min + mid)/2;
655 }
656 else break; /* Some other error */
657 }
658
659 extra->flags &= ~flag;
660 return count;
661 }
662
663
664
665 /*************************************************
666 * Check newline indicator *
667 *************************************************/
668
669 /* This is used both at compile and run-time to check for <xxx> escapes, where
670 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671 no match.
672
673 Arguments:
674 p points after the leading '<'
675 f file for error message
676
677 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
678 */
679
680 static int
681 check_newline(uschar *p, FILE *f)
682 {
683 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688 fprintf(f, "Unknown newline type at: <%s\n", p);
689 return 0;
690 }
691
692
693
694 /*************************************************
695 * Usage function *
696 *************************************************/
697
698 static void
699 usage(void)
700 {
701 printf("Usage: pcretest [options] [<input> [<output>]]\n");
702 printf(" -b show compiled code (bytecode)\n");
703 printf(" -C show PCRE compile-time options and exit\n");
704 printf(" -d debug: show compiled code and information (-b and -i)\n");
705 #if !defined NODFA
706 printf(" -dfa force DFA matching for all subjects\n");
707 #endif
708 printf(" -help show usage information\n");
709 printf(" -i show information about compiled patterns\n"
710 " -m output memory used information\n"
711 " -o <n> set size of offsets vector to <n>\n");
712 #if !defined NOPOSIX
713 printf(" -p use POSIX interface\n");
714 #endif
715 printf(" -q quiet: do not output PCRE version number at start\n");
716 printf(" -S <n> set stack size to <n> megabytes\n");
717 printf(" -s output store (memory) used information\n"
718 " -t time compilation and execution\n");
719 printf(" -t <n> time compilation and execution, repeating <n> times\n");
720 printf(" -tm time execution (matching) only\n");
721 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
722 }
723
724
725
726 /*************************************************
727 * Main Program *
728 *************************************************/
729
730 /* Read lines from named file or stdin and write to named file or stdout; lines
731 consist of a regular expression, in delimiters and optionally followed by
732 options, followed by a set of test data, terminated by an empty line. */
733
734 int main(int argc, char **argv)
735 {
736 FILE *infile = stdin;
737 int options = 0;
738 int study_options = 0;
739 int op = 1;
740 int timeit = 0;
741 int timeitm = 0;
742 int showinfo = 0;
743 int showstore = 0;
744 int quiet = 0;
745 int size_offsets = 45;
746 int size_offsets_max;
747 int *offsets = NULL;
748 #if !defined NOPOSIX
749 int posix = 0;
750 #endif
751 int debug = 0;
752 int done = 0;
753 int all_use_dfa = 0;
754 int yield = 0;
755 int stack_size;
756
757 /* These vectors store, end-to-end, a list of captured substring names. Assume
758 that 1024 is plenty long enough for the few names we'll be testing. */
759
760 uschar copynames[1024];
761 uschar getnames[1024];
762
763 uschar *copynamesptr;
764 uschar *getnamesptr;
765
766 /* Get buffers from malloc() so that Electric Fence will check their misuse
767 when I am debugging. They grow automatically when very long lines are read. */
768
769 buffer = (unsigned char *)malloc(buffer_size);
770 dbuffer = (unsigned char *)malloc(buffer_size);
771 pbuffer = (unsigned char *)malloc(buffer_size);
772
773 /* The outfile variable is static so that new_malloc can use it. */
774
775 outfile = stdout;
776
777 /* The following _setmode() stuff is some Windows magic that tells its runtime
778 library to translate CRLF into a single LF character. At least, that's what
779 I've been told: never having used Windows I take this all on trust. Originally
780 it set 0x8000, but then I was advised that _O_BINARY was better. */
781
782 #if defined(_WIN32) || defined(WIN32)
783 _setmode( _fileno( stdout ), _O_BINARY );
784 #endif
785
786 /* Scan options */
787
788 while (argc > 1 && argv[op][0] == '-')
789 {
790 unsigned char *endptr;
791
792 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
793 showstore = 1;
794 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
795 else if (strcmp(argv[op], "-b") == 0) debug = 1;
796 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
797 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
798 #if !defined NODFA
799 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
800 #endif
801 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
802 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
803 *endptr == 0))
804 {
805 op++;
806 argc--;
807 }
808 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
809 {
810 int both = argv[op][2] == 0;
811 int temp;
812 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
813 *endptr == 0))
814 {
815 timeitm = temp;
816 op++;
817 argc--;
818 }
819 else timeitm = LOOPREPEAT;
820 if (both) timeit = timeitm;
821 }
822 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
823 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824 *endptr == 0))
825 {
826 #if defined(_WIN32) || defined(WIN32)
827 printf("PCRE: -S not supported on this OS\n");
828 exit(1);
829 #else
830 int rc;
831 struct rlimit rlim;
832 getrlimit(RLIMIT_STACK, &rlim);
833 rlim.rlim_cur = stack_size * 1024 * 1024;
834 rc = setrlimit(RLIMIT_STACK, &rlim);
835 if (rc != 0)
836 {
837 printf("PCRE: setrlimit() failed with error %d\n", rc);
838 exit(1);
839 }
840 op++;
841 argc--;
842 #endif
843 }
844 #if !defined NOPOSIX
845 else if (strcmp(argv[op], "-p") == 0) posix = 1;
846 #endif
847 else if (strcmp(argv[op], "-C") == 0)
848 {
849 int rc;
850 printf("PCRE version %s\n", pcre_version());
851 printf("Compiled with\n");
852 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
853 printf(" %sUTF-8 support\n", rc? "" : "No ");
854 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
855 printf(" %sUnicode properties support\n", rc? "" : "No ");
856 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
858 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859 (rc == -2)? "ANYCRLF" :
860 (rc == -1)? "ANY" : "???");
861 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862 printf(" Internal link size = %d\n", rc);
863 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
864 printf(" POSIX malloc threshold = %d\n", rc);
865 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
866 printf(" Default match limit = %d\n", rc);
867 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
868 printf(" Default recursion depth limit = %d\n", rc);
869 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
871 goto EXIT;
872 }
873 else if (strcmp(argv[op], "-help") == 0 ||
874 strcmp(argv[op], "--help") == 0)
875 {
876 usage();
877 goto EXIT;
878 }
879 else
880 {
881 printf("** Unknown or malformed option %s\n", argv[op]);
882 usage();
883 yield = 1;
884 goto EXIT;
885 }
886 op++;
887 argc--;
888 }
889
890 /* Get the store for the offsets vector, and remember what it was */
891
892 size_offsets_max = size_offsets;
893 offsets = (int *)malloc(size_offsets_max * sizeof(int));
894 if (offsets == NULL)
895 {
896 printf("** Failed to get %d bytes of memory for offsets vector\n",
897 (int)(size_offsets_max * sizeof(int)));
898 yield = 1;
899 goto EXIT;
900 }
901
902 /* Sort out the input and output files */
903
904 if (argc > 1)
905 {
906 infile = fopen(argv[op], INPUT_MODE);
907 if (infile == NULL)
908 {
909 printf("** Failed to open %s\n", argv[op]);
910 yield = 1;
911 goto EXIT;
912 }
913 }
914
915 if (argc > 2)
916 {
917 outfile = fopen(argv[op+1], OUTPUT_MODE);
918 if (outfile == NULL)
919 {
920 printf("** Failed to open %s\n", argv[op+1]);
921 yield = 1;
922 goto EXIT;
923 }
924 }
925
926 /* Set alternative malloc function */
927
928 pcre_malloc = new_malloc;
929 pcre_free = new_free;
930 pcre_stack_malloc = stack_malloc;
931 pcre_stack_free = stack_free;
932
933 /* Heading line unless quiet, then prompt for first regex if stdin */
934
935 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
936
937 /* Main loop */
938
939 while (!done)
940 {
941 pcre *re = NULL;
942 pcre_extra *extra = NULL;
943
944 #if !defined NOPOSIX /* There are still compilers that require no indent */
945 regex_t preg;
946 int do_posix = 0;
947 #endif
948
949 const char *error;
950 unsigned char *p, *pp, *ppp;
951 unsigned char *to_file = NULL;
952 const unsigned char *tables = NULL;
953 unsigned long int true_size, true_study_size = 0;
954 size_t size, regex_gotten_store;
955 int do_study = 0;
956 int do_debug = debug;
957 int debug_lengths = 1;
958 int do_G = 0;
959 int do_g = 0;
960 int do_showinfo = showinfo;
961 int do_showrest = 0;
962 int do_flip = 0;
963 int erroroffset, len, delimiter, poffset;
964
965 use_utf8 = 0;
966
967 if (infile == stdin) printf(" re> ");
968 if (extend_inputline(infile, buffer) == NULL) break;
969 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
970 fflush(outfile);
971
972 p = buffer;
973 while (isspace(*p)) p++;
974 if (*p == 0) continue;
975
976 /* See if the pattern is to be loaded pre-compiled from a file. */
977
978 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
979 {
980 unsigned long int magic, get_options;
981 uschar sbuf[8];
982 FILE *f;
983
984 p++;
985 pp = p + (int)strlen((char *)p);
986 while (isspace(pp[-1])) pp--;
987 *pp = 0;
988
989 f = fopen((char *)p, "rb");
990 if (f == NULL)
991 {
992 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
993 continue;
994 }
995
996 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
997
998 true_size =
999 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1000 true_study_size =
1001 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1002
1003 re = (real_pcre *)new_malloc(true_size);
1004 regex_gotten_store = gotten_store;
1005
1006 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1007
1008 magic = ((real_pcre *)re)->magic_number;
1009 if (magic != MAGIC_NUMBER)
1010 {
1011 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1012 {
1013 do_flip = 1;
1014 }
1015 else
1016 {
1017 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1018 fclose(f);
1019 continue;
1020 }
1021 }
1022
1023 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1024 do_flip? " (byte-inverted)" : "", p);
1025
1026 /* Need to know if UTF-8 for printing data strings */
1027
1028 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1029 use_utf8 = (get_options & PCRE_UTF8) != 0;
1030
1031 /* Now see if there is any following study data */
1032
1033 if (true_study_size != 0)
1034 {
1035 pcre_study_data *psd;
1036
1037 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1038 extra->flags = PCRE_EXTRA_STUDY_DATA;
1039
1040 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1041 extra->study_data = psd;
1042
1043 if (fread(psd, 1, true_study_size, f) != true_study_size)
1044 {
1045 FAIL_READ:
1046 fprintf(outfile, "Failed to read data from %s\n", p);
1047 if (extra != NULL) new_free(extra);
1048 if (re != NULL) new_free(re);
1049 fclose(f);
1050 continue;
1051 }
1052 fprintf(outfile, "Study data loaded from %s\n", p);
1053 do_study = 1; /* To get the data output if requested */
1054 }
1055 else fprintf(outfile, "No study data\n");
1056
1057 fclose(f);
1058 goto SHOW_INFO;
1059 }
1060
1061 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1062 the pattern; if is isn't complete, read more. */
1063
1064 delimiter = *p++;
1065
1066 if (isalnum(delimiter) || delimiter == '\\')
1067 {
1068 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1069 goto SKIP_DATA;
1070 }
1071
1072 pp = p;
1073 poffset = p - buffer;
1074
1075 for(;;)
1076 {
1077 while (*pp != 0)
1078 {
1079 if (*pp == '\\' && pp[1] != 0) pp++;
1080 else if (*pp == delimiter) break;
1081 pp++;
1082 }
1083 if (*pp != 0) break;
1084 if (infile == stdin) printf(" > ");
1085 if ((pp = extend_inputline(infile, pp)) == NULL)
1086 {
1087 fprintf(outfile, "** Unexpected EOF\n");
1088 done = 1;
1089 goto CONTINUE;
1090 }
1091 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092 }
1093
1094 /* The buffer may have moved while being extended; reset the start of data
1095 pointer to the correct relative point in the buffer. */
1096
1097 p = buffer + poffset;
1098
1099 /* If the first character after the delimiter is backslash, make
1100 the pattern end with backslash. This is purely to provide a way
1101 of testing for the error message when a pattern ends with backslash. */
1102
1103 if (pp[1] == '\\') *pp++ = '\\';
1104
1105 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1106 for callouts. */
1107
1108 *pp++ = 0;
1109 strcpy((char *)pbuffer, (char *)p);
1110
1111 /* Look for options after final delimiter */
1112
1113 options = 0;
1114 study_options = 0;
1115 log_store = showstore; /* default from command line */
1116
1117 while (*pp != 0)
1118 {
1119 switch (*pp++)
1120 {
1121 case 'f': options |= PCRE_FIRSTLINE; break;
1122 case 'g': do_g = 1; break;
1123 case 'i': options |= PCRE_CASELESS; break;
1124 case 'm': options |= PCRE_MULTILINE; break;
1125 case 's': options |= PCRE_DOTALL; break;
1126 case 'x': options |= PCRE_EXTENDED; break;
1127
1128 case '+': do_showrest = 1; break;
1129 case 'A': options |= PCRE_ANCHORED; break;
1130 case 'B': do_debug = 1; break;
1131 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1132 case 'D': do_debug = do_showinfo = 1; break;
1133 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1134 case 'F': do_flip = 1; break;
1135 case 'G': do_G = 1; break;
1136 case 'I': do_showinfo = 1; break;
1137 case 'J': options |= PCRE_DUPNAMES; break;
1138 case 'M': log_store = 1; break;
1139 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1140
1141 #if !defined NOPOSIX
1142 case 'P': do_posix = 1; break;
1143 #endif
1144
1145 case 'S': do_study = 1; break;
1146 case 'U': options |= PCRE_UNGREEDY; break;
1147 case 'X': options |= PCRE_EXTRA; break;
1148 case 'Z': debug_lengths = 0; break;
1149 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151
1152 case 'L':
1153 ppp = pp;
1154 /* The '\r' test here is so that it works on Windows. */
1155 /* The '0' test is just in case this is an unterminated line. */
1156 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1157 *ppp = 0;
1158 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1159 {
1160 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1161 goto SKIP_DATA;
1162 }
1163 locale_set = 1;
1164 tables = pcre_maketables();
1165 pp = ppp;
1166 break;
1167
1168 case '>':
1169 to_file = pp;
1170 while (*pp != 0) pp++;
1171 while (isspace(pp[-1])) pp--;
1172 *pp = 0;
1173 break;
1174
1175 case '<':
1176 {
1177 int x = check_newline(pp, outfile);
1178 if (x == 0) goto SKIP_DATA;
1179 options |= x;
1180 while (*pp++ != '>');
1181 }
1182 break;
1183
1184 case '\r': /* So that it works in Windows */
1185 case '\n':
1186 case ' ':
1187 break;
1188
1189 default:
1190 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1191 goto SKIP_DATA;
1192 }
1193 }
1194
1195 /* Handle compiling via the POSIX interface, which doesn't support the
1196 timing, showing, or debugging options, nor the ability to pass over
1197 local character tables. */
1198
1199 #if !defined NOPOSIX
1200 if (posix || do_posix)
1201 {
1202 int rc;
1203 int cflags = 0;
1204
1205 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1206 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1207 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1208 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1209 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1210
1211 rc = regcomp(&preg, (char *)p, cflags);
1212
1213 /* Compilation failed; go back for another re, skipping to blank line
1214 if non-interactive. */
1215
1216 if (rc != 0)
1217 {
1218 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1219 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1220 goto SKIP_DATA;
1221 }
1222 }
1223
1224 /* Handle compiling via the native interface */
1225
1226 else
1227 #endif /* !defined NOPOSIX */
1228
1229 {
1230 if (timeit > 0)
1231 {
1232 register int i;
1233 clock_t time_taken;
1234 clock_t start_time = clock();
1235 for (i = 0; i < timeit; i++)
1236 {
1237 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1238 if (re != NULL) free(re);
1239 }
1240 time_taken = clock() - start_time;
1241 fprintf(outfile, "Compile time %.4f milliseconds\n",
1242 (((double)time_taken * 1000.0) / (double)timeit) /
1243 (double)CLOCKS_PER_SEC);
1244 }
1245
1246 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1247
1248 /* Compilation failed; go back for another re, skipping to blank line
1249 if non-interactive. */
1250
1251 if (re == NULL)
1252 {
1253 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1254 SKIP_DATA:
1255 if (infile != stdin)
1256 {
1257 for (;;)
1258 {
1259 if (extend_inputline(infile, buffer) == NULL)
1260 {
1261 done = 1;
1262 goto CONTINUE;
1263 }
1264 len = (int)strlen((char *)buffer);
1265 while (len > 0 && isspace(buffer[len-1])) len--;
1266 if (len == 0) break;
1267 }
1268 fprintf(outfile, "\n");
1269 }
1270 goto CONTINUE;
1271 }
1272
1273 /* Compilation succeeded; print data if required. There are now two
1274 info-returning functions. The old one has a limited interface and
1275 returns only limited data. Check that it agrees with the newer one. */
1276
1277 if (log_store)
1278 fprintf(outfile, "Memory allocation (code space): %d\n",
1279 (int)(gotten_store -
1280 sizeof(real_pcre) -
1281 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1282
1283 /* Extract the size for possible writing before possibly flipping it,
1284 and remember the store that was got. */
1285
1286 true_size = ((real_pcre *)re)->size;
1287 regex_gotten_store = gotten_store;
1288
1289 /* If /S was present, study the regexp to generate additional info to
1290 help with the matching. */
1291
1292 if (do_study)
1293 {
1294 if (timeit > 0)
1295 {
1296 register int i;
1297 clock_t time_taken;
1298 clock_t start_time = clock();
1299 for (i = 0; i < timeit; i++)
1300 extra = pcre_study(re, study_options, &error);
1301 time_taken = clock() - start_time;
1302 if (extra != NULL) free(extra);
1303 fprintf(outfile, " Study time %.4f milliseconds\n",
1304 (((double)time_taken * 1000.0) / (double)timeit) /
1305 (double)CLOCKS_PER_SEC);
1306 }
1307 extra = pcre_study(re, study_options, &error);
1308 if (error != NULL)
1309 fprintf(outfile, "Failed to study: %s\n", error);
1310 else if (extra != NULL)
1311 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1312 }
1313
1314 /* If the 'F' option was present, we flip the bytes of all the integer
1315 fields in the regex data block and the study block. This is to make it
1316 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1317 compiled on a different architecture. */
1318
1319 if (do_flip)
1320 {
1321 real_pcre *rre = (real_pcre *)re;
1322 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1323 rre->size = byteflip(rre->size, sizeof(rre->size));
1324 rre->options = byteflip(rre->options, sizeof(rre->options));
1325 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1326 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1327 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1328 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1329 rre->name_table_offset = byteflip(rre->name_table_offset,
1330 sizeof(rre->name_table_offset));
1331 rre->name_entry_size = byteflip(rre->name_entry_size,
1332 sizeof(rre->name_entry_size));
1333 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1334
1335 if (extra != NULL)
1336 {
1337 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1338 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1339 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1340 }
1341 }
1342
1343 /* Extract information from the compiled data if required */
1344
1345 SHOW_INFO:
1346
1347 if (do_debug)
1348 {
1349 fprintf(outfile, "------------------------------------------------------------------\n");
1350 pcre_printint(re, outfile, debug_lengths);
1351 }
1352
1353 if (do_showinfo)
1354 {
1355 unsigned long int get_options, all_options;
1356 #if !defined NOINFOCHECK
1357 int old_first_char, old_options, old_count;
1358 #endif
1359 int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360 int nameentrysize, namecount;
1361 const uschar *nametable;
1362
1363 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1364 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1365 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1366 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1367 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1368 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1369 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374
1375 #if !defined NOINFOCHECK
1376 old_count = pcre_info(re, &old_options, &old_first_char);
1377 if (count < 0) fprintf(outfile,
1378 "Error %d from pcre_info()\n", count);
1379 else
1380 {
1381 if (old_count != count) fprintf(outfile,
1382 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1383 old_count);
1384
1385 if (old_first_char != first_char) fprintf(outfile,
1386 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1387 first_char, old_first_char);
1388
1389 if (old_options != (int)get_options) fprintf(outfile,
1390 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1391 get_options, old_options);
1392 }
1393 #endif
1394
1395 if (size != regex_gotten_store) fprintf(outfile,
1396 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1397 (int)size, (int)regex_gotten_store);
1398
1399 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1400 if (backrefmax > 0)
1401 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1402
1403 if (namecount > 0)
1404 {
1405 fprintf(outfile, "Named capturing subpatterns:\n");
1406 while (namecount-- > 0)
1407 {
1408 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1409 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1410 GET2(nametable, 0));
1411 nametable += nameentrysize;
1412 }
1413 }
1414
1415 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1416
1417 all_options = ((real_pcre *)re)->options;
1418 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1419
1420 if (get_options == 0) fprintf(outfile, "No options\n");
1421 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1422 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1423 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1424 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1425 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1426 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1427 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1428 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1429 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1430 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1431 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1432 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1433 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435
1436 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437
1438 switch (get_options & PCRE_NEWLINE_BITS)
1439 {
1440 case PCRE_NEWLINE_CR:
1441 fprintf(outfile, "Forced newline sequence: CR\n");
1442 break;
1443
1444 case PCRE_NEWLINE_LF:
1445 fprintf(outfile, "Forced newline sequence: LF\n");
1446 break;
1447
1448 case PCRE_NEWLINE_CRLF:
1449 fprintf(outfile, "Forced newline sequence: CRLF\n");
1450 break;
1451
1452 case PCRE_NEWLINE_ANYCRLF:
1453 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454 break;
1455
1456 case PCRE_NEWLINE_ANY:
1457 fprintf(outfile, "Forced newline sequence: ANY\n");
1458 break;
1459
1460 default:
1461 break;
1462 }
1463
1464 if (first_char == -1)
1465 {
1466 fprintf(outfile, "First char at start or follows newline\n");
1467 }
1468 else if (first_char < 0)
1469 {
1470 fprintf(outfile, "No first char\n");
1471 }
1472 else
1473 {
1474 int ch = first_char & 255;
1475 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476 "" : " (caseless)";
1477 if (PRINTHEX(ch))
1478 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1479 else
1480 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1481 }
1482
1483 if (need_char < 0)
1484 {
1485 fprintf(outfile, "No need char\n");
1486 }
1487 else
1488 {
1489 int ch = need_char & 255;
1490 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491 "" : " (caseless)";
1492 if (PRINTHEX(ch))
1493 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1494 else
1495 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1496 }
1497
1498 /* Don't output study size; at present it is in any case a fixed
1499 value, but it varies, depending on the computer architecture, and
1500 so messes up the test suite. (And with the /F option, it might be
1501 flipped.) */
1502
1503 if (do_study)
1504 {
1505 if (extra == NULL)
1506 fprintf(outfile, "Study returned NULL\n");
1507 else
1508 {
1509 uschar *start_bits = NULL;
1510 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1511
1512 if (start_bits == NULL)
1513 fprintf(outfile, "No starting byte set\n");
1514 else
1515 {
1516 int i;
1517 int c = 24;
1518 fprintf(outfile, "Starting byte set: ");
1519 for (i = 0; i < 256; i++)
1520 {
1521 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1522 {
1523 if (c > 75)
1524 {
1525 fprintf(outfile, "\n ");
1526 c = 2;
1527 }
1528 if (PRINTHEX(i) && i != ' ')
1529 {
1530 fprintf(outfile, "%c ", i);
1531 c += 2;
1532 }
1533 else
1534 {
1535 fprintf(outfile, "\\x%02x ", i);
1536 c += 5;
1537 }
1538 }
1539 }
1540 fprintf(outfile, "\n");
1541 }
1542 }
1543 }
1544 }
1545
1546 /* If the '>' option was present, we write out the regex to a file, and
1547 that is all. The first 8 bytes of the file are the regex length and then
1548 the study length, in big-endian order. */
1549
1550 if (to_file != NULL)
1551 {
1552 FILE *f = fopen((char *)to_file, "wb");
1553 if (f == NULL)
1554 {
1555 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1556 }
1557 else
1558 {
1559 uschar sbuf[8];
1560 sbuf[0] = (true_size >> 24) & 255;
1561 sbuf[1] = (true_size >> 16) & 255;
1562 sbuf[2] = (true_size >> 8) & 255;
1563 sbuf[3] = (true_size) & 255;
1564
1565 sbuf[4] = (true_study_size >> 24) & 255;
1566 sbuf[5] = (true_study_size >> 16) & 255;
1567 sbuf[6] = (true_study_size >> 8) & 255;
1568 sbuf[7] = (true_study_size) & 255;
1569
1570 if (fwrite(sbuf, 1, 8, f) < 8 ||
1571 fwrite(re, 1, true_size, f) < true_size)
1572 {
1573 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1574 }
1575 else
1576 {
1577 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1578 if (extra != NULL)
1579 {
1580 if (fwrite(extra->study_data, 1, true_study_size, f) <
1581 true_study_size)
1582 {
1583 fprintf(outfile, "Write error on %s: %s\n", to_file,
1584 strerror(errno));
1585 }
1586 else fprintf(outfile, "Study data written to %s\n", to_file);
1587
1588 }
1589 }
1590 fclose(f);
1591 }
1592
1593 new_free(re);
1594 if (extra != NULL) new_free(extra);
1595 if (tables != NULL) new_free((void *)tables);
1596 continue; /* With next regex */
1597 }
1598 } /* End of non-POSIX compile */
1599
1600 /* Read data lines and test them */
1601
1602 for (;;)
1603 {
1604 uschar *q;
1605 uschar *bptr;
1606 int *use_offsets = offsets;
1607 int use_size_offsets = size_offsets;
1608 int callout_data = 0;
1609 int callout_data_set = 0;
1610 int count, c;
1611 int copystrings = 0;
1612 int find_match_limit = 0;
1613 int getstrings = 0;
1614 int getlist = 0;
1615 int gmatched = 0;
1616 int start_offset = 0;
1617 int g_notempty = 0;
1618 int use_dfa = 0;
1619
1620 options = 0;
1621
1622 *copynames = 0;
1623 *getnames = 0;
1624
1625 copynamesptr = copynames;
1626 getnamesptr = getnames;
1627
1628 pcre_callout = callout;
1629 first_callout = 1;
1630 callout_extra = 0;
1631 callout_count = 0;
1632 callout_fail_count = 999999;
1633 callout_fail_id = -1;
1634 show_malloc = 0;
1635
1636 if (extra != NULL) extra->flags &=
1637 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1638
1639 len = 0;
1640 for (;;)
1641 {
1642 if (infile == stdin) printf("data> ");
1643 if (extend_inputline(infile, buffer + len) == NULL)
1644 {
1645 if (len > 0) break;
1646 done = 1;
1647 goto CONTINUE;
1648 }
1649 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1650 len = (int)strlen((char *)buffer);
1651 if (buffer[len-1] == '\n') break;
1652 }
1653
1654 while (len > 0 && isspace(buffer[len-1])) len--;
1655 buffer[len] = 0;
1656 if (len == 0) break;
1657
1658 p = buffer;
1659 while (isspace(*p)) p++;
1660
1661 bptr = q = dbuffer;
1662 while ((c = *p++) != 0)
1663 {
1664 int i = 0;
1665 int n = 0;
1666
1667 if (c == '\\') switch ((c = *p++))
1668 {
1669 case 'a': c = 7; break;
1670 case 'b': c = '\b'; break;
1671 case 'e': c = 27; break;
1672 case 'f': c = '\f'; break;
1673 case 'n': c = '\n'; break;
1674 case 'r': c = '\r'; break;
1675 case 't': c = '\t'; break;
1676 case 'v': c = '\v'; break;
1677
1678 case '0': case '1': case '2': case '3':
1679 case '4': case '5': case '6': case '7':
1680 c -= '0';
1681 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1682 c = c * 8 + *p++ - '0';
1683
1684 #if !defined NOUTF8
1685 if (use_utf8 && c > 255)
1686 {
1687 unsigned char buff8[8];
1688 int ii, utn;
1689 utn = ord2utf8(c, buff8);
1690 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1691 c = buff8[ii]; /* Last byte */
1692 }
1693 #endif
1694 break;
1695
1696 case 'x':
1697
1698 /* Handle \x{..} specially - new Perl thing for utf8 */
1699
1700 #if !defined NOUTF8
1701 if (*p == '{')
1702 {
1703 unsigned char *pt = p;
1704 c = 0;
1705 while (isxdigit(*(++pt)))
1706 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1707 if (*pt == '}')
1708 {
1709 unsigned char buff8[8];
1710 int ii, utn;
1711 utn = ord2utf8(c, buff8);
1712 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1713 c = buff8[ii]; /* Last byte */
1714 p = pt + 1;
1715 break;
1716 }
1717 /* Not correct form; fall through */
1718 }
1719 #endif
1720
1721 /* Ordinary \x */
1722
1723 c = 0;
1724 while (i++ < 2 && isxdigit(*p))
1725 {
1726 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1727 p++;
1728 }
1729 break;
1730
1731 case 0: /* \ followed by EOF allows for an empty line */
1732 p--;
1733 continue;
1734
1735 case '>':
1736 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1737 continue;
1738
1739 case 'A': /* Option setting */
1740 options |= PCRE_ANCHORED;
1741 continue;
1742
1743 case 'B':
1744 options |= PCRE_NOTBOL;
1745 continue;
1746
1747 case 'C':
1748 if (isdigit(*p)) /* Set copy string */
1749 {
1750 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1751 copystrings |= 1 << n;
1752 }
1753 else if (isalnum(*p))
1754 {
1755 uschar *npp = copynamesptr;
1756 while (isalnum(*p)) *npp++ = *p++;
1757 *npp++ = 0;
1758 *npp = 0;
1759 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1760 if (n < 0)
1761 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1762 copynamesptr = npp;
1763 }
1764 else if (*p == '+')
1765 {
1766 callout_extra = 1;
1767 p++;
1768 }
1769 else if (*p == '-')
1770 {
1771 pcre_callout = NULL;
1772 p++;
1773 }
1774 else if (*p == '!')
1775 {
1776 callout_fail_id = 0;
1777 p++;
1778 while(isdigit(*p))
1779 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1780 callout_fail_count = 0;
1781 if (*p == '!')
1782 {
1783 p++;
1784 while(isdigit(*p))
1785 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1786 }
1787 }
1788 else if (*p == '*')
1789 {
1790 int sign = 1;
1791 callout_data = 0;
1792 if (*(++p) == '-') { sign = -1; p++; }
1793 while(isdigit(*p))
1794 callout_data = callout_data * 10 + *p++ - '0';
1795 callout_data *= sign;
1796 callout_data_set = 1;
1797 }
1798 continue;
1799
1800 #if !defined NODFA
1801 case 'D':
1802 #if !defined NOPOSIX
1803 if (posix || do_posix)
1804 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1805 else
1806 #endif
1807 use_dfa = 1;
1808 continue;
1809
1810 case 'F':
1811 options |= PCRE_DFA_SHORTEST;
1812 continue;
1813 #endif
1814
1815 case 'G':
1816 if (isdigit(*p))
1817 {
1818 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1819 getstrings |= 1 << n;
1820 }
1821 else if (isalnum(*p))
1822 {
1823 uschar *npp = getnamesptr;
1824 while (isalnum(*p)) *npp++ = *p++;
1825 *npp++ = 0;
1826 *npp = 0;
1827 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1828 if (n < 0)
1829 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1830 getnamesptr = npp;
1831 }
1832 continue;
1833
1834 case 'L':
1835 getlist = 1;
1836 continue;
1837
1838 case 'M':
1839 find_match_limit = 1;
1840 continue;
1841
1842 case 'N':
1843 options |= PCRE_NOTEMPTY;
1844 continue;
1845
1846 case 'O':
1847 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1848 if (n > size_offsets_max)
1849 {
1850 size_offsets_max = n;
1851 free(offsets);
1852 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1853 if (offsets == NULL)
1854 {
1855 printf("** Failed to get %d bytes of memory for offsets vector\n",
1856 (int)(size_offsets_max * sizeof(int)));
1857 yield = 1;
1858 goto EXIT;
1859 }
1860 }
1861 use_size_offsets = n;
1862 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1863 continue;
1864
1865 case 'P':
1866 options |= PCRE_PARTIAL;
1867 continue;
1868
1869 case 'Q':
1870 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871 if (extra == NULL)
1872 {
1873 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874 extra->flags = 0;
1875 }
1876 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1877 extra->match_limit_recursion = n;
1878 continue;
1879
1880 case 'q':
1881 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1882 if (extra == NULL)
1883 {
1884 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1885 extra->flags = 0;
1886 }
1887 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1888 extra->match_limit = n;
1889 continue;
1890
1891 #if !defined NODFA
1892 case 'R':
1893 options |= PCRE_DFA_RESTART;
1894 continue;
1895 #endif
1896
1897 case 'S':
1898 show_malloc = 1;
1899 continue;
1900
1901 case 'Z':
1902 options |= PCRE_NOTEOL;
1903 continue;
1904
1905 case '?':
1906 options |= PCRE_NO_UTF8_CHECK;
1907 continue;
1908
1909 case '<':
1910 {
1911 int x = check_newline(p, outfile);
1912 if (x == 0) goto NEXT_DATA;
1913 options |= x;
1914 while (*p++ != '>');
1915 }
1916 continue;
1917 }
1918 *q++ = c;
1919 }
1920 *q = 0;
1921 len = q - dbuffer;
1922
1923 if ((all_use_dfa || use_dfa) && find_match_limit)
1924 {
1925 printf("**Match limit not relevant for DFA matching: ignored\n");
1926 find_match_limit = 0;
1927 }
1928
1929 /* Handle matching via the POSIX interface, which does not
1930 support timing or playing with the match limit or callout data. */
1931
1932 #if !defined NOPOSIX
1933 if (posix || do_posix)
1934 {
1935 int rc;
1936 int eflags = 0;
1937 regmatch_t *pmatch = NULL;
1938 if (use_size_offsets > 0)
1939 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1940 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1941 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1942
1943 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1944
1945 if (rc != 0)
1946 {
1947 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1948 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1949 }
1950 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1951 != 0)
1952 {
1953 fprintf(outfile, "Matched with REG_NOSUB\n");
1954 }
1955 else
1956 {
1957 size_t i;
1958 for (i = 0; i < (size_t)use_size_offsets; i++)
1959 {
1960 if (pmatch[i].rm_so >= 0)
1961 {
1962 fprintf(outfile, "%2d: ", (int)i);
1963 (void)pchars(dbuffer + pmatch[i].rm_so,
1964 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1965 fprintf(outfile, "\n");
1966 if (i == 0 && do_showrest)
1967 {
1968 fprintf(outfile, " 0+ ");
1969 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1970 outfile);
1971 fprintf(outfile, "\n");
1972 }
1973 }
1974 }
1975 }
1976 free(pmatch);
1977 }
1978
1979 /* Handle matching via the native interface - repeats for /g and /G */
1980
1981 else
1982 #endif /* !defined NOPOSIX */
1983
1984 for (;; gmatched++) /* Loop for /g or /G */
1985 {
1986 if (timeitm > 0)
1987 {
1988 register int i;
1989 clock_t time_taken;
1990 clock_t start_time = clock();
1991
1992 #if !defined NODFA
1993 if (all_use_dfa || use_dfa)
1994 {
1995 int workspace[1000];
1996 for (i = 0; i < timeitm; i++)
1997 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1998 options | g_notempty, use_offsets, use_size_offsets, workspace,
1999 sizeof(workspace)/sizeof(int));
2000 }
2001 else
2002 #endif
2003
2004 for (i = 0; i < timeitm; i++)
2005 count = pcre_exec(re, extra, (char *)bptr, len,
2006 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2007
2008 time_taken = clock() - start_time;
2009 fprintf(outfile, "Execute time %.4f milliseconds\n",
2010 (((double)time_taken * 1000.0) / (double)timeitm) /
2011 (double)CLOCKS_PER_SEC);
2012 }
2013
2014 /* If find_match_limit is set, we want to do repeated matches with
2015 varying limits in order to find the minimum value for the match limit and
2016 for the recursion limit. */
2017
2018 if (find_match_limit)
2019 {
2020 if (extra == NULL)
2021 {
2022 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2023 extra->flags = 0;
2024 }
2025
2026 (void)check_match_limit(re, extra, bptr, len, start_offset,
2027 options|g_notempty, use_offsets, use_size_offsets,
2028 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2029 PCRE_ERROR_MATCHLIMIT, "match()");
2030
2031 count = check_match_limit(re, extra, bptr, len, start_offset,
2032 options|g_notempty, use_offsets, use_size_offsets,
2033 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2034 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2035 }
2036
2037 /* If callout_data is set, use the interface with additional data */
2038
2039 else if (callout_data_set)
2040 {
2041 if (extra == NULL)
2042 {
2043 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2044 extra->flags = 0;
2045 }
2046 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2047 extra->callout_data = &callout_data;
2048 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2049 options | g_notempty, use_offsets, use_size_offsets);
2050 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2051 }
2052
2053 /* The normal case is just to do the match once, with the default
2054 value of match_limit. */
2055
2056 #if !defined NODFA
2057 else if (all_use_dfa || use_dfa)
2058 {
2059 int workspace[1000];
2060 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2061 options | g_notempty, use_offsets, use_size_offsets, workspace,
2062 sizeof(workspace)/sizeof(int));
2063 if (count == 0)
2064 {
2065 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2066 count = use_size_offsets/2;
2067 }
2068 }
2069 #endif
2070
2071 else
2072 {
2073 count = pcre_exec(re, extra, (char *)bptr, len,
2074 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2075 if (count == 0)
2076 {
2077 fprintf(outfile, "Matched, but too many substrings\n");
2078 count = use_size_offsets/3;
2079 }
2080 }
2081
2082 /* Matched */
2083
2084 if (count >= 0)
2085 {
2086 int i, maxcount;
2087
2088 #if !defined NODFA
2089 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2090 #endif
2091 maxcount = use_size_offsets/3;
2092
2093 /* This is a check against a lunatic return value. */
2094
2095 if (count > maxcount)
2096 {
2097 fprintf(outfile,
2098 "** PCRE error: returned count %d is too big for offset size %d\n",
2099 count, use_size_offsets);
2100 count = use_size_offsets/3;
2101 if (do_g || do_G)
2102 {
2103 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2104 do_g = do_G = FALSE; /* Break g/G loop */
2105 }
2106 }
2107
2108 for (i = 0; i < count * 2; i += 2)
2109 {
2110 if (use_offsets[i] < 0)
2111 fprintf(outfile, "%2d: <unset>\n", i/2);
2112 else
2113 {
2114 fprintf(outfile, "%2d: ", i/2);
2115 (void)pchars(bptr + use_offsets[i],
2116 use_offsets[i+1] - use_offsets[i], outfile);
2117 fprintf(outfile, "\n");
2118 if (i == 0)
2119 {
2120 if (do_showrest)
2121 {
2122 fprintf(outfile, " 0+ ");
2123 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2124 outfile);
2125 fprintf(outfile, "\n");
2126 }
2127 }
2128 }
2129 }
2130
2131 for (i = 0; i < 32; i++)
2132 {
2133 if ((copystrings & (1 << i)) != 0)
2134 {
2135 char copybuffer[256];
2136 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2137 i, copybuffer, sizeof(copybuffer));
2138 if (rc < 0)
2139 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2140 else
2141 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2142 }
2143 }
2144
2145 for (copynamesptr = copynames;
2146 *copynamesptr != 0;
2147 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2148 {
2149 char copybuffer[256];
2150 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2151 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2152 if (rc < 0)
2153 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2154 else
2155 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2156 }
2157
2158 for (i = 0; i < 32; i++)
2159 {
2160 if ((getstrings & (1 << i)) != 0)
2161 {
2162 const char *substring;
2163 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2164 i, &substring);
2165 if (rc < 0)
2166 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2167 else
2168 {
2169 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2170 pcre_free_substring(substring);
2171 }
2172 }
2173 }
2174
2175 for (getnamesptr = getnames;
2176 *getnamesptr != 0;
2177 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2178 {
2179 const char *substring;
2180 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2181 count, (char *)getnamesptr, &substring);
2182 if (rc < 0)
2183 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2184 else
2185 {
2186 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2187 pcre_free_substring(substring);
2188 }
2189 }
2190
2191 if (getlist)
2192 {
2193 const char **stringlist;
2194 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2195 &stringlist);
2196 if (rc < 0)
2197 fprintf(outfile, "get substring list failed %d\n", rc);
2198 else
2199 {
2200 for (i = 0; i < count; i++)
2201 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2202 if (stringlist[i] != NULL)
2203 fprintf(outfile, "string list not terminated by NULL\n");
2204 /* free((void *)stringlist); */
2205 pcre_free_substring_list(stringlist);
2206 }
2207 }
2208 }
2209
2210 /* There was a partial match */
2211
2212 else if (count == PCRE_ERROR_PARTIAL)
2213 {
2214 fprintf(outfile, "Partial match");
2215 #if !defined NODFA
2216 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2217 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2218 bptr + use_offsets[0]);
2219 #endif
2220 fprintf(outfile, "\n");
2221 break; /* Out of the /g loop */
2222 }
2223
2224 /* Failed to match. If this is a /g or /G loop and we previously set
2225 g_notempty after a null match, this is not necessarily the end. We want
2226 to advance the start offset, and continue. We won't be at the end of the
2227 string - that was checked before setting g_notempty.
2228
2229 Complication arises in the case when the newline option is "any" or
2230 "anycrlf". If the previous match was at the end of a line terminated by
2231 CRLF, an advance of one character just passes the \r, whereas we should
2232 prefer the longer newline sequence, as does the code in pcre_exec().
2233 Fudge the offset value to achieve this.
2234
2235 Otherwise, in the case of UTF-8 matching, the advance must be one
2236 character, not one byte. */
2237
2238 else
2239 {
2240 if (g_notempty != 0)
2241 {
2242 int onechar = 1;
2243 unsigned int obits = ((real_pcre *)re)->options;
2244 use_offsets[0] = start_offset;
2245 if ((obits & PCRE_NEWLINE_BITS) == 0)
2246 {
2247 int d;
2248 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249 obits = (d == '\r')? PCRE_NEWLINE_CR :
2250 (d == '\n')? PCRE_NEWLINE_LF :
2251 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253 (d == -1)? PCRE_NEWLINE_ANY : 0;
2254 }
2255 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257 &&
2258 start_offset < len - 1 &&
2259 bptr[start_offset] == '\r' &&
2260 bptr[start_offset+1] == '\n')
2261 onechar++;
2262 else if (use_utf8)
2263 {
2264 while (start_offset + onechar < len)
2265 {
2266 int tb = bptr[start_offset+onechar];
2267 if (tb <= 127) break;
2268 tb &= 0xc0;
2269 if (tb != 0 && tb != 0xc0) onechar++;
2270 }
2271 }
2272 use_offsets[1] = start_offset + onechar;
2273 }
2274 else
2275 {
2276 if (count == PCRE_ERROR_NOMATCH)
2277 {
2278 if (gmatched == 0) fprintf(outfile, "No match\n");
2279 }
2280 else fprintf(outfile, "Error %d\n", count);
2281 break; /* Out of the /g loop */
2282 }
2283 }
2284
2285 /* If not /g or /G we are done */
2286
2287 if (!do_g && !do_G) break;
2288
2289 /* If we have matched an empty string, first check to see if we are at
2290 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2291 what Perl's /g options does. This turns out to be rather cunning. First
2292 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2293 same point. If this fails (picked up above) we advance to the next
2294 character. */
2295
2296 g_notempty = 0;
2297
2298 if (use_offsets[0] == use_offsets[1])
2299 {
2300 if (use_offsets[0] == len) break;
2301 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2302 }
2303
2304 /* For /g, update the start offset, leaving the rest alone */
2305
2306 if (do_g) start_offset = use_offsets[1];
2307
2308 /* For /G, update the pointer and length */
2309
2310 else
2311 {
2312 bptr += use_offsets[1];
2313 len -= use_offsets[1];
2314 }
2315 } /* End of loop for /g and /G */
2316
2317 NEXT_DATA: continue;
2318 } /* End of loop for data lines */
2319
2320 CONTINUE:
2321
2322 #if !defined NOPOSIX
2323 if (posix || do_posix) regfree(&preg);
2324 #endif
2325
2326 if (re != NULL) new_free(re);
2327 if (extra != NULL) new_free(extra);
2328 if (tables != NULL)
2329 {
2330 new_free((void *)tables);
2331 setlocale(LC_CTYPE, "C");
2332 locale_set = 0;
2333 }
2334 }
2335
2336 if (infile == stdin) fprintf(outfile, "\n");
2337
2338 EXIT:
2339
2340 if (infile != NULL && infile != stdin) fclose(infile);
2341 if (outfile != NULL && outfile != stdout) fclose(outfile);
2342
2343 free(buffer);
2344 free(dbuffer);
2345 free(pbuffer);
2346 free(offsets);
2347
2348 return yield;
2349 }
2350
2351 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12