/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 530 - (show annotations) (download)
Tue Jun 1 13:42:06 2010 UTC (4 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 75021 byte(s)
Added a lot of (int) casts to avoid compiler warnings in systems where      
size_t is 64-bit.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 #else
83 #include <sys/time.h> /* These two includes are needed */
84 #include <sys/resource.h> /* for setrlimit(). */
85 #define INPUT_MODE "rb"
86 #define OUTPUT_MODE "wb"
87 #endif
88
89
90 /* We have to include pcre_internal.h because we need the internal info for
91 displaying the results of pcre_study() and we also need to know about the
92 internal macros, structures, and other internal data values; pcretest has
93 "inside information" compared to a program that strictly follows the PCRE API.
94
95 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97 appropriately for an application, not for building PCRE. */
98
99 #include "pcre.h"
100 #include "pcre_internal.h"
101
102 /* We need access to some of the data tables that PCRE uses. So as not to have
103 to keep two copies, we include the source file here, changing the names of the
104 external symbols to prevent clashes. */
105
106 #define _pcre_ucp_gentype ucp_gentype
107 #define _pcre_utf8_table1 utf8_table1
108 #define _pcre_utf8_table1_size utf8_table1_size
109 #define _pcre_utf8_table2 utf8_table2
110 #define _pcre_utf8_table3 utf8_table3
111 #define _pcre_utf8_table4 utf8_table4
112 #define _pcre_utt utt
113 #define _pcre_utt_size utt_size
114 #define _pcre_utt_names utt_names
115 #define _pcre_OP_lengths OP_lengths
116
117 #include "pcre_tables.c"
118
119 /* We also need the pcre_printint() function for printing out compiled
120 patterns. This function is in a separate file so that it can be included in
121 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 know which case is being compiled. */
123
124 #define COMPILING_PCRETEST
125 #include "pcre_printint.src"
126
127 /* The definition of the macro PRINTABLE, which determines whether to print an
128 output character as-is or as a hex value when showing compiled patterns, is
129 contained in the printint.src file. We uses it here also, in cases when the
130 locale has not been explicitly changed, so as to get consistent output from
131 systems that differ in their output from isprint() even in the "C" locale. */
132
133 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134
135 /* It is possible to compile this test program without including support for
136 testing the POSIX interface, though this is not available via the standard
137 Makefile. */
138
139 #if !defined NOPOSIX
140 #include "pcreposix.h"
141 #endif
142
143 /* It is also possible, for the benefit of the version currently imported into
144 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145 interface to the DFA matcher (NODFA), and without the doublecheck of the old
146 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147 UTF8 support if PCRE is built without it. */
148
149 #ifndef SUPPORT_UTF8
150 #ifndef NOUTF8
151 #define NOUTF8
152 #endif
153 #endif
154
155
156 /* Other parameters */
157
158 #ifndef CLOCKS_PER_SEC
159 #ifdef CLK_TCK
160 #define CLOCKS_PER_SEC CLK_TCK
161 #else
162 #define CLOCKS_PER_SEC 100
163 #endif
164 #endif
165
166 /* This is the default loop count for timing. */
167
168 #define LOOPREPEAT 500000
169
170 /* Static variables */
171
172 static FILE *outfile;
173 static int log_store = 0;
174 static int callout_count;
175 static int callout_extra;
176 static int callout_fail_count;
177 static int callout_fail_id;
178 static int debug_lengths;
179 static int first_callout;
180 static int locale_set = 0;
181 static int show_malloc;
182 static int use_utf8;
183 static size_t gotten_store;
184
185 /* The buffers grow automatically if very long input lines are encountered. */
186
187 static int buffer_size = 50000;
188 static uschar *buffer = NULL;
189 static uschar *dbuffer = NULL;
190 static uschar *pbuffer = NULL;
191
192
193
194 /*************************************************
195 * Read or extend an input line *
196 *************************************************/
197
198 /* Input lines are read into buffer, but both patterns and data lines can be
199 continued over multiple input lines. In addition, if the buffer fills up, we
200 want to automatically expand it so as to be able to handle extremely large
201 lines that are needed for certain stress tests. When the input buffer is
202 expanded, the other two buffers must also be expanded likewise, and the
203 contents of pbuffer, which are a copy of the input for callouts, must be
204 preserved (for when expansion happens for a data line). This is not the most
205 optimal way of handling this, but hey, this is just a test program!
206
207 Arguments:
208 f the file to read
209 start where in buffer to start (this *must* be within buffer)
210 prompt for stdin or readline()
211
212 Returns: pointer to the start of new data
213 could be a copy of start, or could be moved
214 NULL if no data read and EOF reached
215 */
216
217 static uschar *
218 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 {
220 uschar *here = start;
221
222 for (;;)
223 {
224 int rlen = (int)(buffer_size - (here - buffer));
225
226 if (rlen > 1000)
227 {
228 int dlen;
229
230 /* If libreadline support is required, use readline() to read a line if the
231 input is a terminal. Note that readline() removes the trailing newline, so
232 we must put it back again, to be compatible with fgets(). */
233
234 #ifdef SUPPORT_LIBREADLINE
235 if (isatty(fileno(f)))
236 {
237 size_t len;
238 char *s = readline(prompt);
239 if (s == NULL) return (here == start)? NULL : start;
240 len = strlen(s);
241 if (len > 0) add_history(s);
242 if (len > rlen - 1) len = rlen - 1;
243 memcpy(here, s, len);
244 here[len] = '\n';
245 here[len+1] = 0;
246 free(s);
247 }
248 else
249 #endif
250
251 /* Read the next line by normal means, prompting if the file is stdin. */
252
253 {
254 if (f == stdin) printf("%s", prompt);
255 if (fgets((char *)here, rlen, f) == NULL)
256 return (here == start)? NULL : start;
257 }
258
259 dlen = (int)strlen((char *)here);
260 if (dlen > 0 && here[dlen - 1] == '\n') return start;
261 here += dlen;
262 }
263
264 else
265 {
266 int new_buffer_size = 2*buffer_size;
267 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270
271 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272 {
273 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274 exit(1);
275 }
276
277 memcpy(new_buffer, buffer, buffer_size);
278 memcpy(new_pbuffer, pbuffer, buffer_size);
279
280 buffer_size = new_buffer_size;
281
282 start = new_buffer + (start - buffer);
283 here = new_buffer + (here - buffer);
284
285 free(buffer);
286 free(dbuffer);
287 free(pbuffer);
288
289 buffer = new_buffer;
290 dbuffer = new_dbuffer;
291 pbuffer = new_pbuffer;
292 }
293 }
294
295 return NULL; /* Control never gets here */
296 }
297
298
299
300
301
302
303
304 /*************************************************
305 * Read number from string *
306 *************************************************/
307
308 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309 around with conditional compilation, just do the job by hand. It is only used
310 for unpicking arguments, so just keep it simple.
311
312 Arguments:
313 str string to be converted
314 endptr where to put the end pointer
315
316 Returns: the unsigned long
317 */
318
319 static int
320 get_value(unsigned char *str, unsigned char **endptr)
321 {
322 int result = 0;
323 while(*str != 0 && isspace(*str)) str++;
324 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325 *endptr = str;
326 return(result);
327 }
328
329
330
331
332 /*************************************************
333 * Convert UTF-8 string to value *
334 *************************************************/
335
336 /* This function takes one or more bytes that represents a UTF-8 character,
337 and returns the value of the character.
338
339 Argument:
340 utf8bytes a pointer to the byte vector
341 vptr a pointer to an int to receive the value
342
343 Returns: > 0 => the number of bytes consumed
344 -6 to 0 => malformed UTF-8 character at offset = (-return)
345 */
346
347 #if !defined NOUTF8
348
349 static int
350 utf82ord(unsigned char *utf8bytes, int *vptr)
351 {
352 int c = *utf8bytes++;
353 int d = c;
354 int i, j, s;
355
356 for (i = -1; i < 6; i++) /* i is number of additional bytes */
357 {
358 if ((d & 0x80) == 0) break;
359 d <<= 1;
360 }
361
362 if (i == -1) { *vptr = c; return 1; } /* ascii character */
363 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364
365 /* i now has a value in the range 1-5 */
366
367 s = 6*i;
368 d = (c & utf8_table3[i]) << s;
369
370 for (j = 0; j < i; j++)
371 {
372 c = *utf8bytes++;
373 if ((c & 0xc0) != 0x80) return -(j+1);
374 s -= 6;
375 d |= (c & 0x3f) << s;
376 }
377
378 /* Check that encoding was the correct unique one */
379
380 for (j = 0; j < utf8_table1_size; j++)
381 if (d <= utf8_table1[j]) break;
382 if (j != i) return -(i+1);
383
384 /* Valid value */
385
386 *vptr = d;
387 return i+1;
388 }
389
390 #endif
391
392
393
394 /*************************************************
395 * Convert character value to UTF-8 *
396 *************************************************/
397
398 /* This function takes an integer value in the range 0 - 0x7fffffff
399 and encodes it as a UTF-8 character in 0 to 6 bytes.
400
401 Arguments:
402 cvalue the character value
403 utf8bytes pointer to buffer for result - at least 6 bytes long
404
405 Returns: number of characters placed in the buffer
406 */
407
408 #if !defined NOUTF8
409
410 static int
411 ord2utf8(int cvalue, uschar *utf8bytes)
412 {
413 register int i, j;
414 for (i = 0; i < utf8_table1_size; i++)
415 if (cvalue <= utf8_table1[i]) break;
416 utf8bytes += i;
417 for (j = i; j > 0; j--)
418 {
419 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 cvalue >>= 6;
421 }
422 *utf8bytes = utf8_table2[i] | cvalue;
423 return i + 1;
424 }
425
426 #endif
427
428
429
430 /*************************************************
431 * Print character string *
432 *************************************************/
433
434 /* Character string printing function. Must handle UTF-8 strings in utf8
435 mode. Yields number of characters printed. If handed a NULL file, just counts
436 chars without printing. */
437
438 static int pchars(unsigned char *p, int length, FILE *f)
439 {
440 int c = 0;
441 int yield = 0;
442
443 while (length-- > 0)
444 {
445 #if !defined NOUTF8
446 if (use_utf8)
447 {
448 int rc = utf82ord(p, &c);
449
450 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451 {
452 length -= rc - 1;
453 p += rc;
454 if (PRINTHEX(c))
455 {
456 if (f != NULL) fprintf(f, "%c", c);
457 yield++;
458 }
459 else
460 {
461 int n = 4;
462 if (f != NULL) fprintf(f, "\\x{%02x}", c);
463 yield += (n <= 0x000000ff)? 2 :
464 (n <= 0x00000fff)? 3 :
465 (n <= 0x0000ffff)? 4 :
466 (n <= 0x000fffff)? 5 : 6;
467 }
468 continue;
469 }
470 }
471 #endif
472
473 /* Not UTF-8, or malformed UTF-8 */
474
475 c = *p++;
476 if (PRINTHEX(c))
477 {
478 if (f != NULL) fprintf(f, "%c", c);
479 yield++;
480 }
481 else
482 {
483 if (f != NULL) fprintf(f, "\\x%02x", c);
484 yield += 4;
485 }
486 }
487
488 return yield;
489 }
490
491
492
493 /*************************************************
494 * Callout function *
495 *************************************************/
496
497 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498 the match. Yield zero unless more callouts than the fail count, or the callout
499 data is not zero. */
500
501 static int callout(pcre_callout_block *cb)
502 {
503 FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 int i, pre_start, post_start, subject_length;
505
506 if (callout_extra)
507 {
508 fprintf(f, "Callout %d: last capture = %d\n",
509 cb->callout_number, cb->capture_last);
510
511 for (i = 0; i < cb->capture_top * 2; i += 2)
512 {
513 if (cb->offset_vector[i] < 0)
514 fprintf(f, "%2d: <unset>\n", i/2);
515 else
516 {
517 fprintf(f, "%2d: ", i/2);
518 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519 cb->offset_vector[i+1] - cb->offset_vector[i], f);
520 fprintf(f, "\n");
521 }
522 }
523 }
524
525 /* Re-print the subject in canonical form, the first time or if giving full
526 datails. On subsequent calls in the same match, we use pchars just to find the
527 printed lengths of the substrings. */
528
529 if (f != NULL) fprintf(f, "--->");
530
531 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533 cb->current_position - cb->start_match, f);
534
535 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536
537 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538 cb->subject_length - cb->current_position, f);
539
540 if (f != NULL) fprintf(f, "\n");
541
542 /* Always print appropriate indicators, with callout number if not already
543 shown. For automatic callouts, show the pattern offset. */
544
545 if (cb->callout_number == 255)
546 {
547 fprintf(outfile, "%+3d ", cb->pattern_position);
548 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549 }
550 else
551 {
552 if (callout_extra) fprintf(outfile, " ");
553 else fprintf(outfile, "%3d ", cb->callout_number);
554 }
555
556 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557 fprintf(outfile, "^");
558
559 if (post_start > 0)
560 {
561 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562 fprintf(outfile, "^");
563 }
564
565 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566 fprintf(outfile, " ");
567
568 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569 pbuffer + cb->pattern_position);
570
571 fprintf(outfile, "\n");
572 first_callout = 0;
573
574 if (cb->callout_data != NULL)
575 {
576 int callout_data = *((int *)(cb->callout_data));
577 if (callout_data != 0)
578 {
579 fprintf(outfile, "Callout data = %d\n", callout_data);
580 return callout_data;
581 }
582 }
583
584 return (cb->callout_number != callout_fail_id)? 0 :
585 (++callout_count >= callout_fail_count)? 1 : 0;
586 }
587
588
589 /*************************************************
590 * Local malloc functions *
591 *************************************************/
592
593 /* Alternative malloc function, to test functionality and show the size of the
594 compiled re. */
595
596 static void *new_malloc(size_t size)
597 {
598 void *block = malloc(size);
599 gotten_store = size;
600 if (show_malloc)
601 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 return block;
603 }
604
605 static void new_free(void *block)
606 {
607 if (show_malloc)
608 fprintf(outfile, "free %p\n", block);
609 free(block);
610 }
611
612
613 /* For recursion malloc/free, to test stacking calls */
614
615 static void *stack_malloc(size_t size)
616 {
617 void *block = malloc(size);
618 if (show_malloc)
619 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 return block;
621 }
622
623 static void stack_free(void *block)
624 {
625 if (show_malloc)
626 fprintf(outfile, "stack_free %p\n", block);
627 free(block);
628 }
629
630
631 /*************************************************
632 * Call pcre_fullinfo() *
633 *************************************************/
634
635 /* Get one piece of information from the pcre_fullinfo() function */
636
637 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638 {
639 int rc;
640 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642 }
643
644
645
646 /*************************************************
647 * Byte flipping function *
648 *************************************************/
649
650 static unsigned long int
651 byteflip(unsigned long int value, int n)
652 {
653 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654 return ((value & 0x000000ff) << 24) |
655 ((value & 0x0000ff00) << 8) |
656 ((value & 0x00ff0000) >> 8) |
657 ((value & 0xff000000) >> 24);
658 }
659
660
661
662
663 /*************************************************
664 * Check match or recursion limit *
665 *************************************************/
666
667 static int
668 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669 int start_offset, int options, int *use_offsets, int use_size_offsets,
670 int flag, unsigned long int *limit, int errnumber, const char *msg)
671 {
672 int count;
673 int min = 0;
674 int mid = 64;
675 int max = -1;
676
677 extra->flags |= flag;
678
679 for (;;)
680 {
681 *limit = mid;
682
683 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684 use_offsets, use_size_offsets);
685
686 if (count == errnumber)
687 {
688 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689 min = mid;
690 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691 }
692
693 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694 count == PCRE_ERROR_PARTIAL)
695 {
696 if (mid == min + 1)
697 {
698 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699 break;
700 }
701 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702 max = mid;
703 mid = (min + mid)/2;
704 }
705 else break; /* Some other error */
706 }
707
708 extra->flags &= ~flag;
709 return count;
710 }
711
712
713
714 /*************************************************
715 * Case-independent strncmp() function *
716 *************************************************/
717
718 /*
719 Arguments:
720 s first string
721 t second string
722 n number of characters to compare
723
724 Returns: < 0, = 0, or > 0, according to the comparison
725 */
726
727 static int
728 strncmpic(uschar *s, uschar *t, int n)
729 {
730 while (n--)
731 {
732 int c = tolower(*s++) - tolower(*t++);
733 if (c) return c;
734 }
735 return 0;
736 }
737
738
739
740 /*************************************************
741 * Check newline indicator *
742 *************************************************/
743
744 /* This is used both at compile and run-time to check for <xxx> escapes. Print
745 a message and return 0 if there is no match.
746
747 Arguments:
748 p points after the leading '<'
749 f file for error message
750
751 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752 */
753
754 static int
755 check_newline(uschar *p, FILE *f)
756 {
757 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 fprintf(f, "Unknown newline type at: <%s\n", p);
765 return 0;
766 }
767
768
769
770 /*************************************************
771 * Usage function *
772 *************************************************/
773
774 static void
775 usage(void)
776 {
777 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778 printf("Input and output default to stdin and stdout.\n");
779 #ifdef SUPPORT_LIBREADLINE
780 printf("If input is a terminal, readline() is used to read from it.\n");
781 #else
782 printf("This version of pcretest is not linked with readline().\n");
783 #endif
784 printf("\nOptions:\n");
785 printf(" -b show compiled code (bytecode)\n");
786 printf(" -C show PCRE compile-time options and exit\n");
787 printf(" -d debug: show compiled code and information (-b and -i)\n");
788 #if !defined NODFA
789 printf(" -dfa force DFA matching for all subjects\n");
790 #endif
791 printf(" -help show usage information\n");
792 printf(" -i show information about compiled patterns\n"
793 " -M find MATCH_LIMIT minimum for each subject\n"
794 " -m output memory used information\n"
795 " -o <n> set size of offsets vector to <n>\n");
796 #if !defined NOPOSIX
797 printf(" -p use POSIX interface\n");
798 #endif
799 printf(" -q quiet: do not output PCRE version number at start\n");
800 printf(" -S <n> set stack size to <n> megabytes\n");
801 printf(" -s output store (memory) used information\n"
802 " -t time compilation and execution\n");
803 printf(" -t <n> time compilation and execution, repeating <n> times\n");
804 printf(" -tm time execution (matching) only\n");
805 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806 }
807
808
809
810 /*************************************************
811 * Main Program *
812 *************************************************/
813
814 /* Read lines from named file or stdin and write to named file or stdout; lines
815 consist of a regular expression, in delimiters and optionally followed by
816 options, followed by a set of test data, terminated by an empty line. */
817
818 int main(int argc, char **argv)
819 {
820 FILE *infile = stdin;
821 int options = 0;
822 int study_options = 0;
823 int default_find_match_limit = FALSE;
824 int op = 1;
825 int timeit = 0;
826 int timeitm = 0;
827 int showinfo = 0;
828 int showstore = 0;
829 int quiet = 0;
830 int size_offsets = 45;
831 int size_offsets_max;
832 int *offsets = NULL;
833 #if !defined NOPOSIX
834 int posix = 0;
835 #endif
836 int debug = 0;
837 int done = 0;
838 int all_use_dfa = 0;
839 int yield = 0;
840 int stack_size;
841
842 /* These vectors store, end-to-end, a list of captured substring names. Assume
843 that 1024 is plenty long enough for the few names we'll be testing. */
844
845 uschar copynames[1024];
846 uschar getnames[1024];
847
848 uschar *copynamesptr;
849 uschar *getnamesptr;
850
851 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 when I am debugging. They grow automatically when very long lines are read. */
853
854 buffer = (unsigned char *)malloc(buffer_size);
855 dbuffer = (unsigned char *)malloc(buffer_size);
856 pbuffer = (unsigned char *)malloc(buffer_size);
857
858 /* The outfile variable is static so that new_malloc can use it. */
859
860 outfile = stdout;
861
862 /* The following _setmode() stuff is some Windows magic that tells its runtime
863 library to translate CRLF into a single LF character. At least, that's what
864 I've been told: never having used Windows I take this all on trust. Originally
865 it set 0x8000, but then I was advised that _O_BINARY was better. */
866
867 #if defined(_WIN32) || defined(WIN32)
868 _setmode( _fileno( stdout ), _O_BINARY );
869 #endif
870
871 /* Scan options */
872
873 while (argc > 1 && argv[op][0] == '-')
874 {
875 unsigned char *endptr;
876
877 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878 showstore = 1;
879 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 #if !defined NODFA
885 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 #endif
887 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889 *endptr == 0))
890 {
891 op++;
892 argc--;
893 }
894 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895 {
896 int both = argv[op][2] == 0;
897 int temp;
898 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899 *endptr == 0))
900 {
901 timeitm = temp;
902 op++;
903 argc--;
904 }
905 else timeitm = LOOPREPEAT;
906 if (both) timeit = timeitm;
907 }
908 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910 *endptr == 0))
911 {
912 #if defined(_WIN32) || defined(WIN32)
913 printf("PCRE: -S not supported on this OS\n");
914 exit(1);
915 #else
916 int rc;
917 struct rlimit rlim;
918 getrlimit(RLIMIT_STACK, &rlim);
919 rlim.rlim_cur = stack_size * 1024 * 1024;
920 rc = setrlimit(RLIMIT_STACK, &rlim);
921 if (rc != 0)
922 {
923 printf("PCRE: setrlimit() failed with error %d\n", rc);
924 exit(1);
925 }
926 op++;
927 argc--;
928 #endif
929 }
930 #if !defined NOPOSIX
931 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 #endif
933 else if (strcmp(argv[op], "-C") == 0)
934 {
935 int rc;
936 unsigned long int lrc;
937 printf("PCRE version %s\n", pcre_version());
938 printf("Compiled with\n");
939 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940 printf(" %sUTF-8 support\n", rc? "" : "No ");
941 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942 printf(" %sUnicode properties support\n", rc? "" : "No ");
943 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 /* Note that these values are always the ASCII values, even
945 in EBCDIC environments. CR is 13 and NL is 10. */
946 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 (rc == -2)? "ANYCRLF" :
949 (rc == -1)? "ANY" : "???");
950 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952 "all Unicode newlines");
953 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954 printf(" Internal link size = %d\n", rc);
955 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956 printf(" POSIX malloc threshold = %d\n", rc);
957 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958 printf(" Default match limit = %ld\n", lrc);
959 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960 printf(" Default recursion depth limit = %ld\n", lrc);
961 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 goto EXIT;
964 }
965 else if (strcmp(argv[op], "-help") == 0 ||
966 strcmp(argv[op], "--help") == 0)
967 {
968 usage();
969 goto EXIT;
970 }
971 else
972 {
973 printf("** Unknown or malformed option %s\n", argv[op]);
974 usage();
975 yield = 1;
976 goto EXIT;
977 }
978 op++;
979 argc--;
980 }
981
982 /* Get the store for the offsets vector, and remember what it was */
983
984 size_offsets_max = size_offsets;
985 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 if (offsets == NULL)
987 {
988 printf("** Failed to get %d bytes of memory for offsets vector\n",
989 (int)(size_offsets_max * sizeof(int)));
990 yield = 1;
991 goto EXIT;
992 }
993
994 /* Sort out the input and output files */
995
996 if (argc > 1)
997 {
998 infile = fopen(argv[op], INPUT_MODE);
999 if (infile == NULL)
1000 {
1001 printf("** Failed to open %s\n", argv[op]);
1002 yield = 1;
1003 goto EXIT;
1004 }
1005 }
1006
1007 if (argc > 2)
1008 {
1009 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 if (outfile == NULL)
1011 {
1012 printf("** Failed to open %s\n", argv[op+1]);
1013 yield = 1;
1014 goto EXIT;
1015 }
1016 }
1017
1018 /* Set alternative malloc function */
1019
1020 pcre_malloc = new_malloc;
1021 pcre_free = new_free;
1022 pcre_stack_malloc = stack_malloc;
1023 pcre_stack_free = stack_free;
1024
1025 /* Heading line unless quiet, then prompt for first regex if stdin */
1026
1027 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028
1029 /* Main loop */
1030
1031 while (!done)
1032 {
1033 pcre *re = NULL;
1034 pcre_extra *extra = NULL;
1035
1036 #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 regex_t preg;
1038 int do_posix = 0;
1039 #endif
1040
1041 const char *error;
1042 unsigned char *markptr;
1043 unsigned char *p, *pp, *ppp;
1044 unsigned char *to_file = NULL;
1045 const unsigned char *tables = NULL;
1046 unsigned long int true_size, true_study_size = 0;
1047 size_t size, regex_gotten_store;
1048 int do_mark = 0;
1049 int do_study = 0;
1050 int do_debug = debug;
1051 int do_G = 0;
1052 int do_g = 0;
1053 int do_showinfo = showinfo;
1054 int do_showrest = 0;
1055 int do_flip = 0;
1056 int erroroffset, len, delimiter, poffset;
1057
1058 use_utf8 = 0;
1059 debug_lengths = 1;
1060
1061 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1062 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1063 fflush(outfile);
1064
1065 p = buffer;
1066 while (isspace(*p)) p++;
1067 if (*p == 0) continue;
1068
1069 /* See if the pattern is to be loaded pre-compiled from a file. */
1070
1071 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1072 {
1073 unsigned long int magic, get_options;
1074 uschar sbuf[8];
1075 FILE *f;
1076
1077 p++;
1078 pp = p + (int)strlen((char *)p);
1079 while (isspace(pp[-1])) pp--;
1080 *pp = 0;
1081
1082 f = fopen((char *)p, "rb");
1083 if (f == NULL)
1084 {
1085 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1086 continue;
1087 }
1088
1089 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1090
1091 true_size =
1092 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1093 true_study_size =
1094 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1095
1096 re = (real_pcre *)new_malloc(true_size);
1097 regex_gotten_store = gotten_store;
1098
1099 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1100
1101 magic = ((real_pcre *)re)->magic_number;
1102 if (magic != MAGIC_NUMBER)
1103 {
1104 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1105 {
1106 do_flip = 1;
1107 }
1108 else
1109 {
1110 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1111 fclose(f);
1112 continue;
1113 }
1114 }
1115
1116 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1117 do_flip? " (byte-inverted)" : "", p);
1118
1119 /* Need to know if UTF-8 for printing data strings */
1120
1121 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1122 use_utf8 = (get_options & PCRE_UTF8) != 0;
1123
1124 /* Now see if there is any following study data */
1125
1126 if (true_study_size != 0)
1127 {
1128 pcre_study_data *psd;
1129
1130 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1131 extra->flags = PCRE_EXTRA_STUDY_DATA;
1132
1133 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1134 extra->study_data = psd;
1135
1136 if (fread(psd, 1, true_study_size, f) != true_study_size)
1137 {
1138 FAIL_READ:
1139 fprintf(outfile, "Failed to read data from %s\n", p);
1140 if (extra != NULL) new_free(extra);
1141 if (re != NULL) new_free(re);
1142 fclose(f);
1143 continue;
1144 }
1145 fprintf(outfile, "Study data loaded from %s\n", p);
1146 do_study = 1; /* To get the data output if requested */
1147 }
1148 else fprintf(outfile, "No study data\n");
1149
1150 fclose(f);
1151 goto SHOW_INFO;
1152 }
1153
1154 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1155 the pattern; if is isn't complete, read more. */
1156
1157 delimiter = *p++;
1158
1159 if (isalnum(delimiter) || delimiter == '\\')
1160 {
1161 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1162 goto SKIP_DATA;
1163 }
1164
1165 pp = p;
1166 poffset = (int)(p - buffer);
1167
1168 for(;;)
1169 {
1170 while (*pp != 0)
1171 {
1172 if (*pp == '\\' && pp[1] != 0) pp++;
1173 else if (*pp == delimiter) break;
1174 pp++;
1175 }
1176 if (*pp != 0) break;
1177 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1178 {
1179 fprintf(outfile, "** Unexpected EOF\n");
1180 done = 1;
1181 goto CONTINUE;
1182 }
1183 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1184 }
1185
1186 /* The buffer may have moved while being extended; reset the start of data
1187 pointer to the correct relative point in the buffer. */
1188
1189 p = buffer + poffset;
1190
1191 /* If the first character after the delimiter is backslash, make
1192 the pattern end with backslash. This is purely to provide a way
1193 of testing for the error message when a pattern ends with backslash. */
1194
1195 if (pp[1] == '\\') *pp++ = '\\';
1196
1197 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1198 for callouts. */
1199
1200 *pp++ = 0;
1201 strcpy((char *)pbuffer, (char *)p);
1202
1203 /* Look for options after final delimiter */
1204
1205 options = 0;
1206 study_options = 0;
1207 log_store = showstore; /* default from command line */
1208
1209 while (*pp != 0)
1210 {
1211 switch (*pp++)
1212 {
1213 case 'f': options |= PCRE_FIRSTLINE; break;
1214 case 'g': do_g = 1; break;
1215 case 'i': options |= PCRE_CASELESS; break;
1216 case 'm': options |= PCRE_MULTILINE; break;
1217 case 's': options |= PCRE_DOTALL; break;
1218 case 'x': options |= PCRE_EXTENDED; break;
1219
1220 case '+': do_showrest = 1; break;
1221 case 'A': options |= PCRE_ANCHORED; break;
1222 case 'B': do_debug = 1; break;
1223 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1224 case 'D': do_debug = do_showinfo = 1; break;
1225 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1226 case 'F': do_flip = 1; break;
1227 case 'G': do_G = 1; break;
1228 case 'I': do_showinfo = 1; break;
1229 case 'J': options |= PCRE_DUPNAMES; break;
1230 case 'K': do_mark = 1; break;
1231 case 'M': log_store = 1; break;
1232 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1233
1234 #if !defined NOPOSIX
1235 case 'P': do_posix = 1; break;
1236 #endif
1237
1238 case 'S': do_study = 1; break;
1239 case 'U': options |= PCRE_UNGREEDY; break;
1240 case 'W': options |= PCRE_UCP; break;
1241 case 'X': options |= PCRE_EXTRA; break;
1242 case 'Z': debug_lengths = 0; break;
1243 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1244 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1245
1246 case 'L':
1247 ppp = pp;
1248 /* The '\r' test here is so that it works on Windows. */
1249 /* The '0' test is just in case this is an unterminated line. */
1250 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1251 *ppp = 0;
1252 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1253 {
1254 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1255 goto SKIP_DATA;
1256 }
1257 locale_set = 1;
1258 tables = pcre_maketables();
1259 pp = ppp;
1260 break;
1261
1262 case '>':
1263 to_file = pp;
1264 while (*pp != 0) pp++;
1265 while (isspace(pp[-1])) pp--;
1266 *pp = 0;
1267 break;
1268
1269 case '<':
1270 {
1271 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1272 {
1273 options |= PCRE_JAVASCRIPT_COMPAT;
1274 pp += 3;
1275 }
1276 else
1277 {
1278 int x = check_newline(pp, outfile);
1279 if (x == 0) goto SKIP_DATA;
1280 options |= x;
1281 while (*pp++ != '>');
1282 }
1283 }
1284 break;
1285
1286 case '\r': /* So that it works in Windows */
1287 case '\n':
1288 case ' ':
1289 break;
1290
1291 default:
1292 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1293 goto SKIP_DATA;
1294 }
1295 }
1296
1297 /* Handle compiling via the POSIX interface, which doesn't support the
1298 timing, showing, or debugging options, nor the ability to pass over
1299 local character tables. */
1300
1301 #if !defined NOPOSIX
1302 if (posix || do_posix)
1303 {
1304 int rc;
1305 int cflags = 0;
1306
1307 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1308 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1309 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1310 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1311 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1312 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1313 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1314
1315 rc = regcomp(&preg, (char *)p, cflags);
1316
1317 /* Compilation failed; go back for another re, skipping to blank line
1318 if non-interactive. */
1319
1320 if (rc != 0)
1321 {
1322 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1323 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1324 goto SKIP_DATA;
1325 }
1326 }
1327
1328 /* Handle compiling via the native interface */
1329
1330 else
1331 #endif /* !defined NOPOSIX */
1332
1333 {
1334 unsigned long int get_options;
1335
1336 if (timeit > 0)
1337 {
1338 register int i;
1339 clock_t time_taken;
1340 clock_t start_time = clock();
1341 for (i = 0; i < timeit; i++)
1342 {
1343 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1344 if (re != NULL) free(re);
1345 }
1346 time_taken = clock() - start_time;
1347 fprintf(outfile, "Compile time %.4f milliseconds\n",
1348 (((double)time_taken * 1000.0) / (double)timeit) /
1349 (double)CLOCKS_PER_SEC);
1350 }
1351
1352 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1353
1354 /* Compilation failed; go back for another re, skipping to blank line
1355 if non-interactive. */
1356
1357 if (re == NULL)
1358 {
1359 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1360 SKIP_DATA:
1361 if (infile != stdin)
1362 {
1363 for (;;)
1364 {
1365 if (extend_inputline(infile, buffer, NULL) == NULL)
1366 {
1367 done = 1;
1368 goto CONTINUE;
1369 }
1370 len = (int)strlen((char *)buffer);
1371 while (len > 0 && isspace(buffer[len-1])) len--;
1372 if (len == 0) break;
1373 }
1374 fprintf(outfile, "\n");
1375 }
1376 goto CONTINUE;
1377 }
1378
1379 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1380 within the regex; check for this so that we know how to process the data
1381 lines. */
1382
1383 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1384 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1385
1386 /* Print information if required. There are now two info-returning
1387 functions. The old one has a limited interface and returns only limited
1388 data. Check that it agrees with the newer one. */
1389
1390 if (log_store)
1391 fprintf(outfile, "Memory allocation (code space): %d\n",
1392 (int)(gotten_store -
1393 sizeof(real_pcre) -
1394 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1395
1396 /* Extract the size for possible writing before possibly flipping it,
1397 and remember the store that was got. */
1398
1399 true_size = ((real_pcre *)re)->size;
1400 regex_gotten_store = gotten_store;
1401
1402 /* If /S was present, study the regexp to generate additional info to
1403 help with the matching. */
1404
1405 if (do_study)
1406 {
1407 if (timeit > 0)
1408 {
1409 register int i;
1410 clock_t time_taken;
1411 clock_t start_time = clock();
1412 for (i = 0; i < timeit; i++)
1413 extra = pcre_study(re, study_options, &error);
1414 time_taken = clock() - start_time;
1415 if (extra != NULL) free(extra);
1416 fprintf(outfile, " Study time %.4f milliseconds\n",
1417 (((double)time_taken * 1000.0) / (double)timeit) /
1418 (double)CLOCKS_PER_SEC);
1419 }
1420 extra = pcre_study(re, study_options, &error);
1421 if (error != NULL)
1422 fprintf(outfile, "Failed to study: %s\n", error);
1423 else if (extra != NULL)
1424 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1425 }
1426
1427 /* If /K was present, we set up for handling MARK data. */
1428
1429 if (do_mark)
1430 {
1431 if (extra == NULL)
1432 {
1433 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1434 extra->flags = 0;
1435 }
1436 extra->mark = &markptr;
1437 extra->flags |= PCRE_EXTRA_MARK;
1438 }
1439
1440 /* If the 'F' option was present, we flip the bytes of all the integer
1441 fields in the regex data block and the study block. This is to make it
1442 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1443 compiled on a different architecture. */
1444
1445 if (do_flip)
1446 {
1447 real_pcre *rre = (real_pcre *)re;
1448 rre->magic_number =
1449 byteflip(rre->magic_number, sizeof(rre->magic_number));
1450 rre->size = byteflip(rre->size, sizeof(rre->size));
1451 rre->options = byteflip(rre->options, sizeof(rre->options));
1452 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1453 rre->top_bracket =
1454 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1455 rre->top_backref =
1456 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1457 rre->first_byte =
1458 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1459 rre->req_byte =
1460 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1461 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1462 sizeof(rre->name_table_offset));
1463 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1464 sizeof(rre->name_entry_size));
1465 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1466 sizeof(rre->name_count));
1467
1468 if (extra != NULL)
1469 {
1470 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1471 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1472 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1473 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1474 }
1475 }
1476
1477 /* Extract information from the compiled data if required */
1478
1479 SHOW_INFO:
1480
1481 if (do_debug)
1482 {
1483 fprintf(outfile, "------------------------------------------------------------------\n");
1484 pcre_printint(re, outfile, debug_lengths);
1485 }
1486
1487 /* We already have the options in get_options (see above) */
1488
1489 if (do_showinfo)
1490 {
1491 unsigned long int all_options;
1492 #if !defined NOINFOCHECK
1493 int old_first_char, old_options, old_count;
1494 #endif
1495 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1496 hascrorlf;
1497 int nameentrysize, namecount;
1498 const uschar *nametable;
1499
1500 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1501 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1502 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1503 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1504 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1505 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1506 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1507 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1508 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1509 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1510 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1511
1512 #if !defined NOINFOCHECK
1513 old_count = pcre_info(re, &old_options, &old_first_char);
1514 if (count < 0) fprintf(outfile,
1515 "Error %d from pcre_info()\n", count);
1516 else
1517 {
1518 if (old_count != count) fprintf(outfile,
1519 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1520 old_count);
1521
1522 if (old_first_char != first_char) fprintf(outfile,
1523 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1524 first_char, old_first_char);
1525
1526 if (old_options != (int)get_options) fprintf(outfile,
1527 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1528 get_options, old_options);
1529 }
1530 #endif
1531
1532 if (size != regex_gotten_store) fprintf(outfile,
1533 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1534 (int)size, (int)regex_gotten_store);
1535
1536 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1537 if (backrefmax > 0)
1538 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1539
1540 if (namecount > 0)
1541 {
1542 fprintf(outfile, "Named capturing subpatterns:\n");
1543 while (namecount-- > 0)
1544 {
1545 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1546 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1547 GET2(nametable, 0));
1548 nametable += nameentrysize;
1549 }
1550 }
1551
1552 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1553 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1554
1555 all_options = ((real_pcre *)re)->options;
1556 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1557
1558 if (get_options == 0) fprintf(outfile, "No options\n");
1559 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1560 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1561 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1562 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1563 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1564 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1565 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1566 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1567 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1568 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1569 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1570 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1571 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1572 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1573 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1574 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1575 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1576
1577 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1578
1579 switch (get_options & PCRE_NEWLINE_BITS)
1580 {
1581 case PCRE_NEWLINE_CR:
1582 fprintf(outfile, "Forced newline sequence: CR\n");
1583 break;
1584
1585 case PCRE_NEWLINE_LF:
1586 fprintf(outfile, "Forced newline sequence: LF\n");
1587 break;
1588
1589 case PCRE_NEWLINE_CRLF:
1590 fprintf(outfile, "Forced newline sequence: CRLF\n");
1591 break;
1592
1593 case PCRE_NEWLINE_ANYCRLF:
1594 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1595 break;
1596
1597 case PCRE_NEWLINE_ANY:
1598 fprintf(outfile, "Forced newline sequence: ANY\n");
1599 break;
1600
1601 default:
1602 break;
1603 }
1604
1605 if (first_char == -1)
1606 {
1607 fprintf(outfile, "First char at start or follows newline\n");
1608 }
1609 else if (first_char < 0)
1610 {
1611 fprintf(outfile, "No first char\n");
1612 }
1613 else
1614 {
1615 int ch = first_char & 255;
1616 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1617 "" : " (caseless)";
1618 if (PRINTHEX(ch))
1619 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1620 else
1621 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1622 }
1623
1624 if (need_char < 0)
1625 {
1626 fprintf(outfile, "No need char\n");
1627 }
1628 else
1629 {
1630 int ch = need_char & 255;
1631 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1632 "" : " (caseless)";
1633 if (PRINTHEX(ch))
1634 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1635 else
1636 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1637 }
1638
1639 /* Don't output study size; at present it is in any case a fixed
1640 value, but it varies, depending on the computer architecture, and
1641 so messes up the test suite. (And with the /F option, it might be
1642 flipped.) */
1643
1644 if (do_study)
1645 {
1646 if (extra == NULL)
1647 fprintf(outfile, "Study returned NULL\n");
1648 else
1649 {
1650 uschar *start_bits = NULL;
1651 int minlength;
1652
1653 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1654 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1655
1656 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1657 if (start_bits == NULL)
1658 fprintf(outfile, "No set of starting bytes\n");
1659 else
1660 {
1661 int i;
1662 int c = 24;
1663 fprintf(outfile, "Starting byte set: ");
1664 for (i = 0; i < 256; i++)
1665 {
1666 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1667 {
1668 if (c > 75)
1669 {
1670 fprintf(outfile, "\n ");
1671 c = 2;
1672 }
1673 if (PRINTHEX(i) && i != ' ')
1674 {
1675 fprintf(outfile, "%c ", i);
1676 c += 2;
1677 }
1678 else
1679 {
1680 fprintf(outfile, "\\x%02x ", i);
1681 c += 5;
1682 }
1683 }
1684 }
1685 fprintf(outfile, "\n");
1686 }
1687 }
1688 }
1689 }
1690
1691 /* If the '>' option was present, we write out the regex to a file, and
1692 that is all. The first 8 bytes of the file are the regex length and then
1693 the study length, in big-endian order. */
1694
1695 if (to_file != NULL)
1696 {
1697 FILE *f = fopen((char *)to_file, "wb");
1698 if (f == NULL)
1699 {
1700 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1701 }
1702 else
1703 {
1704 uschar sbuf[8];
1705 sbuf[0] = (uschar)((true_size >> 24) & 255);
1706 sbuf[1] = (uschar)((true_size >> 16) & 255);
1707 sbuf[2] = (uschar)((true_size >> 8) & 255);
1708 sbuf[3] = (uschar)((true_size) & 255);
1709
1710 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1711 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1712 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1713 sbuf[7] = (uschar)((true_study_size) & 255);
1714
1715 if (fwrite(sbuf, 1, 8, f) < 8 ||
1716 fwrite(re, 1, true_size, f) < true_size)
1717 {
1718 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1719 }
1720 else
1721 {
1722 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1723 if (extra != NULL)
1724 {
1725 if (fwrite(extra->study_data, 1, true_study_size, f) <
1726 true_study_size)
1727 {
1728 fprintf(outfile, "Write error on %s: %s\n", to_file,
1729 strerror(errno));
1730 }
1731 else fprintf(outfile, "Study data written to %s\n", to_file);
1732
1733 }
1734 }
1735 fclose(f);
1736 }
1737
1738 new_free(re);
1739 if (extra != NULL) new_free(extra);
1740 if (tables != NULL) new_free((void *)tables);
1741 continue; /* With next regex */
1742 }
1743 } /* End of non-POSIX compile */
1744
1745 /* Read data lines and test them */
1746
1747 for (;;)
1748 {
1749 uschar *q;
1750 uschar *bptr;
1751 int *use_offsets = offsets;
1752 int use_size_offsets = size_offsets;
1753 int callout_data = 0;
1754 int callout_data_set = 0;
1755 int count, c;
1756 int copystrings = 0;
1757 int find_match_limit = default_find_match_limit;
1758 int getstrings = 0;
1759 int getlist = 0;
1760 int gmatched = 0;
1761 int start_offset = 0;
1762 int g_notempty = 0;
1763 int use_dfa = 0;
1764
1765 options = 0;
1766
1767 *copynames = 0;
1768 *getnames = 0;
1769
1770 copynamesptr = copynames;
1771 getnamesptr = getnames;
1772
1773 pcre_callout = callout;
1774 first_callout = 1;
1775 callout_extra = 0;
1776 callout_count = 0;
1777 callout_fail_count = 999999;
1778 callout_fail_id = -1;
1779 show_malloc = 0;
1780
1781 if (extra != NULL) extra->flags &=
1782 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1783
1784 len = 0;
1785 for (;;)
1786 {
1787 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1788 {
1789 if (len > 0) break;
1790 done = 1;
1791 goto CONTINUE;
1792 }
1793 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1794 len = (int)strlen((char *)buffer);
1795 if (buffer[len-1] == '\n') break;
1796 }
1797
1798 while (len > 0 && isspace(buffer[len-1])) len--;
1799 buffer[len] = 0;
1800 if (len == 0) break;
1801
1802 p = buffer;
1803 while (isspace(*p)) p++;
1804
1805 bptr = q = dbuffer;
1806 while ((c = *p++) != 0)
1807 {
1808 int i = 0;
1809 int n = 0;
1810
1811 if (c == '\\') switch ((c = *p++))
1812 {
1813 case 'a': c = 7; break;
1814 case 'b': c = '\b'; break;
1815 case 'e': c = 27; break;
1816 case 'f': c = '\f'; break;
1817 case 'n': c = '\n'; break;
1818 case 'r': c = '\r'; break;
1819 case 't': c = '\t'; break;
1820 case 'v': c = '\v'; break;
1821
1822 case '0': case '1': case '2': case '3':
1823 case '4': case '5': case '6': case '7':
1824 c -= '0';
1825 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1826 c = c * 8 + *p++ - '0';
1827
1828 #if !defined NOUTF8
1829 if (use_utf8 && c > 255)
1830 {
1831 unsigned char buff8[8];
1832 int ii, utn;
1833 utn = ord2utf8(c, buff8);
1834 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1835 c = buff8[ii]; /* Last byte */
1836 }
1837 #endif
1838 break;
1839
1840 case 'x':
1841
1842 /* Handle \x{..} specially - new Perl thing for utf8 */
1843
1844 #if !defined NOUTF8
1845 if (*p == '{')
1846 {
1847 unsigned char *pt = p;
1848 c = 0;
1849 while (isxdigit(*(++pt)))
1850 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1851 if (*pt == '}')
1852 {
1853 unsigned char buff8[8];
1854 int ii, utn;
1855 if (use_utf8)
1856 {
1857 utn = ord2utf8(c, buff8);
1858 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1859 c = buff8[ii]; /* Last byte */
1860 }
1861 else
1862 {
1863 if (c > 255)
1864 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1865 "UTF-8 mode is not enabled.\n"
1866 "** Truncation will probably give the wrong result.\n", c);
1867 }
1868 p = pt + 1;
1869 break;
1870 }
1871 /* Not correct form; fall through */
1872 }
1873 #endif
1874
1875 /* Ordinary \x */
1876
1877 c = 0;
1878 while (i++ < 2 && isxdigit(*p))
1879 {
1880 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1881 p++;
1882 }
1883 break;
1884
1885 case 0: /* \ followed by EOF allows for an empty line */
1886 p--;
1887 continue;
1888
1889 case '>':
1890 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1891 continue;
1892
1893 case 'A': /* Option setting */
1894 options |= PCRE_ANCHORED;
1895 continue;
1896
1897 case 'B':
1898 options |= PCRE_NOTBOL;
1899 continue;
1900
1901 case 'C':
1902 if (isdigit(*p)) /* Set copy string */
1903 {
1904 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1905 copystrings |= 1 << n;
1906 }
1907 else if (isalnum(*p))
1908 {
1909 uschar *npp = copynamesptr;
1910 while (isalnum(*p)) *npp++ = *p++;
1911 *npp++ = 0;
1912 *npp = 0;
1913 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1914 if (n < 0)
1915 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1916 copynamesptr = npp;
1917 }
1918 else if (*p == '+')
1919 {
1920 callout_extra = 1;
1921 p++;
1922 }
1923 else if (*p == '-')
1924 {
1925 pcre_callout = NULL;
1926 p++;
1927 }
1928 else if (*p == '!')
1929 {
1930 callout_fail_id = 0;
1931 p++;
1932 while(isdigit(*p))
1933 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1934 callout_fail_count = 0;
1935 if (*p == '!')
1936 {
1937 p++;
1938 while(isdigit(*p))
1939 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1940 }
1941 }
1942 else if (*p == '*')
1943 {
1944 int sign = 1;
1945 callout_data = 0;
1946 if (*(++p) == '-') { sign = -1; p++; }
1947 while(isdigit(*p))
1948 callout_data = callout_data * 10 + *p++ - '0';
1949 callout_data *= sign;
1950 callout_data_set = 1;
1951 }
1952 continue;
1953
1954 #if !defined NODFA
1955 case 'D':
1956 #if !defined NOPOSIX
1957 if (posix || do_posix)
1958 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1959 else
1960 #endif
1961 use_dfa = 1;
1962 continue;
1963
1964 case 'F':
1965 options |= PCRE_DFA_SHORTEST;
1966 continue;
1967 #endif
1968
1969 case 'G':
1970 if (isdigit(*p))
1971 {
1972 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1973 getstrings |= 1 << n;
1974 }
1975 else if (isalnum(*p))
1976 {
1977 uschar *npp = getnamesptr;
1978 while (isalnum(*p)) *npp++ = *p++;
1979 *npp++ = 0;
1980 *npp = 0;
1981 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1982 if (n < 0)
1983 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1984 getnamesptr = npp;
1985 }
1986 continue;
1987
1988 case 'L':
1989 getlist = 1;
1990 continue;
1991
1992 case 'M':
1993 find_match_limit = 1;
1994 continue;
1995
1996 case 'N':
1997 if ((options & PCRE_NOTEMPTY) != 0)
1998 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1999 else
2000 options |= PCRE_NOTEMPTY;
2001 continue;
2002
2003 case 'O':
2004 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2005 if (n > size_offsets_max)
2006 {
2007 size_offsets_max = n;
2008 free(offsets);
2009 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2010 if (offsets == NULL)
2011 {
2012 printf("** Failed to get %d bytes of memory for offsets vector\n",
2013 (int)(size_offsets_max * sizeof(int)));
2014 yield = 1;
2015 goto EXIT;
2016 }
2017 }
2018 use_size_offsets = n;
2019 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2020 continue;
2021
2022 case 'P':
2023 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2024 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2025 continue;
2026
2027 case 'Q':
2028 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2029 if (extra == NULL)
2030 {
2031 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2032 extra->flags = 0;
2033 }
2034 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2035 extra->match_limit_recursion = n;
2036 continue;
2037
2038 case 'q':
2039 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2040 if (extra == NULL)
2041 {
2042 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2043 extra->flags = 0;
2044 }
2045 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2046 extra->match_limit = n;
2047 continue;
2048
2049 #if !defined NODFA
2050 case 'R':
2051 options |= PCRE_DFA_RESTART;
2052 continue;
2053 #endif
2054
2055 case 'S':
2056 show_malloc = 1;
2057 continue;
2058
2059 case 'Y':
2060 options |= PCRE_NO_START_OPTIMIZE;
2061 continue;
2062
2063 case 'Z':
2064 options |= PCRE_NOTEOL;
2065 continue;
2066
2067 case '?':
2068 options |= PCRE_NO_UTF8_CHECK;
2069 continue;
2070
2071 case '<':
2072 {
2073 int x = check_newline(p, outfile);
2074 if (x == 0) goto NEXT_DATA;
2075 options |= x;
2076 while (*p++ != '>');
2077 }
2078 continue;
2079 }
2080 *q++ = c;
2081 }
2082 *q = 0;
2083 len = (int)(q - dbuffer);
2084
2085 /* Move the data to the end of the buffer so that a read over the end of
2086 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2087 we are using the POSIX interface, we must include the terminating zero. */
2088
2089 #if !defined NOPOSIX
2090 if (posix || do_posix)
2091 {
2092 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2093 bptr += buffer_size - len - 1;
2094 }
2095 else
2096 #endif
2097 {
2098 memmove(bptr + buffer_size - len, bptr, len);
2099 bptr += buffer_size - len;
2100 }
2101
2102 if ((all_use_dfa || use_dfa) && find_match_limit)
2103 {
2104 printf("**Match limit not relevant for DFA matching: ignored\n");
2105 find_match_limit = 0;
2106 }
2107
2108 /* Handle matching via the POSIX interface, which does not
2109 support timing or playing with the match limit or callout data. */
2110
2111 #if !defined NOPOSIX
2112 if (posix || do_posix)
2113 {
2114 int rc;
2115 int eflags = 0;
2116 regmatch_t *pmatch = NULL;
2117 if (use_size_offsets > 0)
2118 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2119 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2120 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2121 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2122
2123 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2124
2125 if (rc != 0)
2126 {
2127 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2128 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2129 }
2130 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2131 != 0)
2132 {
2133 fprintf(outfile, "Matched with REG_NOSUB\n");
2134 }
2135 else
2136 {
2137 size_t i;
2138 for (i = 0; i < (size_t)use_size_offsets; i++)
2139 {
2140 if (pmatch[i].rm_so >= 0)
2141 {
2142 fprintf(outfile, "%2d: ", (int)i);
2143 (void)pchars(dbuffer + pmatch[i].rm_so,
2144 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2145 fprintf(outfile, "\n");
2146 if (i == 0 && do_showrest)
2147 {
2148 fprintf(outfile, " 0+ ");
2149 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2150 outfile);
2151 fprintf(outfile, "\n");
2152 }
2153 }
2154 }
2155 }
2156 free(pmatch);
2157 }
2158
2159 /* Handle matching via the native interface - repeats for /g and /G */
2160
2161 else
2162 #endif /* !defined NOPOSIX */
2163
2164 for (;; gmatched++) /* Loop for /g or /G */
2165 {
2166 markptr = NULL;
2167
2168 if (timeitm > 0)
2169 {
2170 register int i;
2171 clock_t time_taken;
2172 clock_t start_time = clock();
2173
2174 #if !defined NODFA
2175 if (all_use_dfa || use_dfa)
2176 {
2177 int workspace[1000];
2178 for (i = 0; i < timeitm; i++)
2179 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2180 options | g_notempty, use_offsets, use_size_offsets, workspace,
2181 sizeof(workspace)/sizeof(int));
2182 }
2183 else
2184 #endif
2185
2186 for (i = 0; i < timeitm; i++)
2187 count = pcre_exec(re, extra, (char *)bptr, len,
2188 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2189
2190 time_taken = clock() - start_time;
2191 fprintf(outfile, "Execute time %.4f milliseconds\n",
2192 (((double)time_taken * 1000.0) / (double)timeitm) /
2193 (double)CLOCKS_PER_SEC);
2194 }
2195
2196 /* If find_match_limit is set, we want to do repeated matches with
2197 varying limits in order to find the minimum value for the match limit and
2198 for the recursion limit. */
2199
2200 if (find_match_limit)
2201 {
2202 if (extra == NULL)
2203 {
2204 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2205 extra->flags = 0;
2206 }
2207
2208 (void)check_match_limit(re, extra, bptr, len, start_offset,
2209 options|g_notempty, use_offsets, use_size_offsets,
2210 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2211 PCRE_ERROR_MATCHLIMIT, "match()");
2212
2213 count = check_match_limit(re, extra, bptr, len, start_offset,
2214 options|g_notempty, use_offsets, use_size_offsets,
2215 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2216 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2217 }
2218
2219 /* If callout_data is set, use the interface with additional data */
2220
2221 else if (callout_data_set)
2222 {
2223 if (extra == NULL)
2224 {
2225 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2226 extra->flags = 0;
2227 }
2228 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2229 extra->callout_data = &callout_data;
2230 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2231 options | g_notempty, use_offsets, use_size_offsets);
2232 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2233 }
2234
2235 /* The normal case is just to do the match once, with the default
2236 value of match_limit. */
2237
2238 #if !defined NODFA
2239 else if (all_use_dfa || use_dfa)
2240 {
2241 int workspace[1000];
2242 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2243 options | g_notempty, use_offsets, use_size_offsets, workspace,
2244 sizeof(workspace)/sizeof(int));
2245 if (count == 0)
2246 {
2247 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2248 count = use_size_offsets/2;
2249 }
2250 }
2251 #endif
2252
2253 else
2254 {
2255 count = pcre_exec(re, extra, (char *)bptr, len,
2256 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2257 if (count == 0)
2258 {
2259 fprintf(outfile, "Matched, but too many substrings\n");
2260 count = use_size_offsets/3;
2261 }
2262 }
2263
2264 /* Matched */
2265
2266 if (count >= 0)
2267 {
2268 int i, maxcount;
2269
2270 #if !defined NODFA
2271 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2272 #endif
2273 maxcount = use_size_offsets/3;
2274
2275 /* This is a check against a lunatic return value. */
2276
2277 if (count > maxcount)
2278 {
2279 fprintf(outfile,
2280 "** PCRE error: returned count %d is too big for offset size %d\n",
2281 count, use_size_offsets);
2282 count = use_size_offsets/3;
2283 if (do_g || do_G)
2284 {
2285 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2286 do_g = do_G = FALSE; /* Break g/G loop */
2287 }
2288 }
2289
2290 for (i = 0; i < count * 2; i += 2)
2291 {
2292 if (use_offsets[i] < 0)
2293 fprintf(outfile, "%2d: <unset>\n", i/2);
2294 else
2295 {
2296 fprintf(outfile, "%2d: ", i/2);
2297 (void)pchars(bptr + use_offsets[i],
2298 use_offsets[i+1] - use_offsets[i], outfile);
2299 fprintf(outfile, "\n");
2300 if (i == 0)
2301 {
2302 if (do_showrest)
2303 {
2304 fprintf(outfile, " 0+ ");
2305 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2306 outfile);
2307 fprintf(outfile, "\n");
2308 }
2309 }
2310 }
2311 }
2312
2313 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2314
2315 for (i = 0; i < 32; i++)
2316 {
2317 if ((copystrings & (1 << i)) != 0)
2318 {
2319 char copybuffer[256];
2320 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2321 i, copybuffer, sizeof(copybuffer));
2322 if (rc < 0)
2323 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2324 else
2325 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2326 }
2327 }
2328
2329 for (copynamesptr = copynames;
2330 *copynamesptr != 0;
2331 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2332 {
2333 char copybuffer[256];
2334 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2335 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2336 if (rc < 0)
2337 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2338 else
2339 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2340 }
2341
2342 for (i = 0; i < 32; i++)
2343 {
2344 if ((getstrings & (1 << i)) != 0)
2345 {
2346 const char *substring;
2347 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2348 i, &substring);
2349 if (rc < 0)
2350 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2351 else
2352 {
2353 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2354 pcre_free_substring(substring);
2355 }
2356 }
2357 }
2358
2359 for (getnamesptr = getnames;
2360 *getnamesptr != 0;
2361 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2362 {
2363 const char *substring;
2364 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2365 count, (char *)getnamesptr, &substring);
2366 if (rc < 0)
2367 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2368 else
2369 {
2370 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2371 pcre_free_substring(substring);
2372 }
2373 }
2374
2375 if (getlist)
2376 {
2377 const char **stringlist;
2378 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2379 &stringlist);
2380 if (rc < 0)
2381 fprintf(outfile, "get substring list failed %d\n", rc);
2382 else
2383 {
2384 for (i = 0; i < count; i++)
2385 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2386 if (stringlist[i] != NULL)
2387 fprintf(outfile, "string list not terminated by NULL\n");
2388 /* free((void *)stringlist); */
2389 pcre_free_substring_list(stringlist);
2390 }
2391 }
2392 }
2393
2394 /* There was a partial match */
2395
2396 else if (count == PCRE_ERROR_PARTIAL)
2397 {
2398 if (markptr == NULL) fprintf(outfile, "Partial match");
2399 else fprintf(outfile, "Partial match, mark=%s", markptr);
2400 if (use_size_offsets > 1)
2401 {
2402 fprintf(outfile, ": ");
2403 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2404 outfile);
2405 }
2406 fprintf(outfile, "\n");
2407 break; /* Out of the /g loop */
2408 }
2409
2410 /* Failed to match. If this is a /g or /G loop and we previously set
2411 g_notempty after a null match, this is not necessarily the end. We want
2412 to advance the start offset, and continue. We won't be at the end of the
2413 string - that was checked before setting g_notempty.
2414
2415 Complication arises in the case when the newline option is "any" or
2416 "anycrlf". If the previous match was at the end of a line terminated by
2417 CRLF, an advance of one character just passes the \r, whereas we should
2418 prefer the longer newline sequence, as does the code in pcre_exec().
2419 Fudge the offset value to achieve this.
2420
2421 Otherwise, in the case of UTF-8 matching, the advance must be one
2422 character, not one byte. */
2423
2424 else
2425 {
2426 if (g_notempty != 0)
2427 {
2428 int onechar = 1;
2429 unsigned int obits = ((real_pcre *)re)->options;
2430 use_offsets[0] = start_offset;
2431 if ((obits & PCRE_NEWLINE_BITS) == 0)
2432 {
2433 int d;
2434 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2435 /* Note that these values are always the ASCII ones, even in
2436 EBCDIC environments. CR = 13, NL = 10. */
2437 obits = (d == 13)? PCRE_NEWLINE_CR :
2438 (d == 10)? PCRE_NEWLINE_LF :
2439 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2440 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2441 (d == -1)? PCRE_NEWLINE_ANY : 0;
2442 }
2443 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2444 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2445 &&
2446 start_offset < len - 1 &&
2447 bptr[start_offset] == '\r' &&
2448 bptr[start_offset+1] == '\n')
2449 onechar++;
2450 else if (use_utf8)
2451 {
2452 while (start_offset + onechar < len)
2453 {
2454 int tb = bptr[start_offset+onechar];
2455 if (tb <= 127) break;
2456 tb &= 0xc0;
2457 if (tb != 0 && tb != 0xc0) onechar++;
2458 }
2459 }
2460 use_offsets[1] = start_offset + onechar;
2461 }
2462 else
2463 {
2464 if (count == PCRE_ERROR_NOMATCH)
2465 {
2466 if (gmatched == 0)
2467 {
2468 if (markptr == NULL) fprintf(outfile, "No match\n");
2469 else fprintf(outfile, "No match, mark = %s\n", markptr);
2470 }
2471 }
2472 else fprintf(outfile, "Error %d\n", count);
2473 break; /* Out of the /g loop */
2474 }
2475 }
2476
2477 /* If not /g or /G we are done */
2478
2479 if (!do_g && !do_G) break;
2480
2481 /* If we have matched an empty string, first check to see if we are at
2482 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2483 Perl's /g options does. This turns out to be rather cunning. First we set
2484 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2485 same point. If this fails (picked up above) we advance to the next
2486 character. */
2487
2488 g_notempty = 0;
2489
2490 if (use_offsets[0] == use_offsets[1])
2491 {
2492 if (use_offsets[0] == len) break;
2493 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2494 }
2495
2496 /* For /g, update the start offset, leaving the rest alone */
2497
2498 if (do_g) start_offset = use_offsets[1];
2499
2500 /* For /G, update the pointer and length */
2501
2502 else
2503 {
2504 bptr += use_offsets[1];
2505 len -= use_offsets[1];
2506 }
2507 } /* End of loop for /g and /G */
2508
2509 NEXT_DATA: continue;
2510 } /* End of loop for data lines */
2511
2512 CONTINUE:
2513
2514 #if !defined NOPOSIX
2515 if (posix || do_posix) regfree(&preg);
2516 #endif
2517
2518 if (re != NULL) new_free(re);
2519 if (extra != NULL) new_free(extra);
2520 if (tables != NULL)
2521 {
2522 new_free((void *)tables);
2523 setlocale(LC_CTYPE, "C");
2524 locale_set = 0;
2525 }
2526 }
2527
2528 if (infile == stdin) fprintf(outfile, "\n");
2529
2530 EXIT:
2531
2532 if (infile != NULL && infile != stdin) fclose(infile);
2533 if (outfile != NULL && outfile != stdout) fclose(outfile);
2534
2535 free(buffer);
2536 free(dbuffer);
2537 free(pbuffer);
2538 free(offsets);
2539
2540 return yield;
2541 }
2542
2543 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12