/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 512 - (show annotations) (download)
Tue Mar 30 11:11:52 2010 UTC (4 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 74877 byte(s)
Fix compile problems when heap is in use

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 #else
83 #include <sys/time.h> /* These two includes are needed */
84 #include <sys/resource.h> /* for setrlimit(). */
85 #define INPUT_MODE "rb"
86 #define OUTPUT_MODE "wb"
87 #endif
88
89
90 /* We have to include pcre_internal.h because we need the internal info for
91 displaying the results of pcre_study() and we also need to know about the
92 internal macros, structures, and other internal data values; pcretest has
93 "inside information" compared to a program that strictly follows the PCRE API.
94
95 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97 appropriately for an application, not for building PCRE. */
98
99 #include "pcre.h"
100 #include "pcre_internal.h"
101
102 /* We need access to some of the data tables that PCRE uses. So as not to have
103 to keep two copies, we include the source file here, changing the names of the
104 external symbols to prevent clashes. */
105
106 #define _pcre_ucp_gentype ucp_gentype
107 #define _pcre_utf8_table1 utf8_table1
108 #define _pcre_utf8_table1_size utf8_table1_size
109 #define _pcre_utf8_table2 utf8_table2
110 #define _pcre_utf8_table3 utf8_table3
111 #define _pcre_utf8_table4 utf8_table4
112 #define _pcre_utt utt
113 #define _pcre_utt_size utt_size
114 #define _pcre_utt_names utt_names
115 #define _pcre_OP_lengths OP_lengths
116
117 #include "pcre_tables.c"
118
119 /* We also need the pcre_printint() function for printing out compiled
120 patterns. This function is in a separate file so that it can be included in
121 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 know which case is being compiled. */
123
124 #define COMPILING_PCRETEST
125 #include "pcre_printint.src"
126
127 /* The definition of the macro PRINTABLE, which determines whether to print an
128 output character as-is or as a hex value when showing compiled patterns, is
129 contained in the printint.src file. We uses it here also, in cases when the
130 locale has not been explicitly changed, so as to get consistent output from
131 systems that differ in their output from isprint() even in the "C" locale. */
132
133 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134
135 /* It is possible to compile this test program without including support for
136 testing the POSIX interface, though this is not available via the standard
137 Makefile. */
138
139 #if !defined NOPOSIX
140 #include "pcreposix.h"
141 #endif
142
143 /* It is also possible, for the benefit of the version currently imported into
144 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145 interface to the DFA matcher (NODFA), and without the doublecheck of the old
146 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147 UTF8 support if PCRE is built without it. */
148
149 #ifndef SUPPORT_UTF8
150 #ifndef NOUTF8
151 #define NOUTF8
152 #endif
153 #endif
154
155
156 /* Other parameters */
157
158 #ifndef CLOCKS_PER_SEC
159 #ifdef CLK_TCK
160 #define CLOCKS_PER_SEC CLK_TCK
161 #else
162 #define CLOCKS_PER_SEC 100
163 #endif
164 #endif
165
166 /* This is the default loop count for timing. */
167
168 #define LOOPREPEAT 500000
169
170 /* Static variables */
171
172 static FILE *outfile;
173 static int log_store = 0;
174 static int callout_count;
175 static int callout_extra;
176 static int callout_fail_count;
177 static int callout_fail_id;
178 static int debug_lengths;
179 static int first_callout;
180 static int locale_set = 0;
181 static int show_malloc;
182 static int use_utf8;
183 static size_t gotten_store;
184
185 /* The buffers grow automatically if very long input lines are encountered. */
186
187 static int buffer_size = 50000;
188 static uschar *buffer = NULL;
189 static uschar *dbuffer = NULL;
190 static uschar *pbuffer = NULL;
191
192
193
194 /*************************************************
195 * Read or extend an input line *
196 *************************************************/
197
198 /* Input lines are read into buffer, but both patterns and data lines can be
199 continued over multiple input lines. In addition, if the buffer fills up, we
200 want to automatically expand it so as to be able to handle extremely large
201 lines that are needed for certain stress tests. When the input buffer is
202 expanded, the other two buffers must also be expanded likewise, and the
203 contents of pbuffer, which are a copy of the input for callouts, must be
204 preserved (for when expansion happens for a data line). This is not the most
205 optimal way of handling this, but hey, this is just a test program!
206
207 Arguments:
208 f the file to read
209 start where in buffer to start (this *must* be within buffer)
210 prompt for stdin or readline()
211
212 Returns: pointer to the start of new data
213 could be a copy of start, or could be moved
214 NULL if no data read and EOF reached
215 */
216
217 static uschar *
218 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 {
220 uschar *here = start;
221
222 for (;;)
223 {
224 int rlen = buffer_size - (here - buffer);
225
226 if (rlen > 1000)
227 {
228 int dlen;
229
230 /* If libreadline support is required, use readline() to read a line if the
231 input is a terminal. Note that readline() removes the trailing newline, so
232 we must put it back again, to be compatible with fgets(). */
233
234 #ifdef SUPPORT_LIBREADLINE
235 if (isatty(fileno(f)))
236 {
237 size_t len;
238 char *s = readline(prompt);
239 if (s == NULL) return (here == start)? NULL : start;
240 len = strlen(s);
241 if (len > 0) add_history(s);
242 if (len > rlen - 1) len = rlen - 1;
243 memcpy(here, s, len);
244 here[len] = '\n';
245 here[len+1] = 0;
246 free(s);
247 }
248 else
249 #endif
250
251 /* Read the next line by normal means, prompting if the file is stdin. */
252
253 {
254 if (f == stdin) printf(prompt);
255 if (fgets((char *)here, rlen, f) == NULL)
256 return (here == start)? NULL : start;
257 }
258
259 dlen = (int)strlen((char *)here);
260 if (dlen > 0 && here[dlen - 1] == '\n') return start;
261 here += dlen;
262 }
263
264 else
265 {
266 int new_buffer_size = 2*buffer_size;
267 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270
271 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272 {
273 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274 exit(1);
275 }
276
277 memcpy(new_buffer, buffer, buffer_size);
278 memcpy(new_pbuffer, pbuffer, buffer_size);
279
280 buffer_size = new_buffer_size;
281
282 start = new_buffer + (start - buffer);
283 here = new_buffer + (here - buffer);
284
285 free(buffer);
286 free(dbuffer);
287 free(pbuffer);
288
289 buffer = new_buffer;
290 dbuffer = new_dbuffer;
291 pbuffer = new_pbuffer;
292 }
293 }
294
295 return NULL; /* Control never gets here */
296 }
297
298
299
300
301
302
303
304 /*************************************************
305 * Read number from string *
306 *************************************************/
307
308 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309 around with conditional compilation, just do the job by hand. It is only used
310 for unpicking arguments, so just keep it simple.
311
312 Arguments:
313 str string to be converted
314 endptr where to put the end pointer
315
316 Returns: the unsigned long
317 */
318
319 static int
320 get_value(unsigned char *str, unsigned char **endptr)
321 {
322 int result = 0;
323 while(*str != 0 && isspace(*str)) str++;
324 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325 *endptr = str;
326 return(result);
327 }
328
329
330
331
332 /*************************************************
333 * Convert UTF-8 string to value *
334 *************************************************/
335
336 /* This function takes one or more bytes that represents a UTF-8 character,
337 and returns the value of the character.
338
339 Argument:
340 utf8bytes a pointer to the byte vector
341 vptr a pointer to an int to receive the value
342
343 Returns: > 0 => the number of bytes consumed
344 -6 to 0 => malformed UTF-8 character at offset = (-return)
345 */
346
347 #if !defined NOUTF8
348
349 static int
350 utf82ord(unsigned char *utf8bytes, int *vptr)
351 {
352 int c = *utf8bytes++;
353 int d = c;
354 int i, j, s;
355
356 for (i = -1; i < 6; i++) /* i is number of additional bytes */
357 {
358 if ((d & 0x80) == 0) break;
359 d <<= 1;
360 }
361
362 if (i == -1) { *vptr = c; return 1; } /* ascii character */
363 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364
365 /* i now has a value in the range 1-5 */
366
367 s = 6*i;
368 d = (c & utf8_table3[i]) << s;
369
370 for (j = 0; j < i; j++)
371 {
372 c = *utf8bytes++;
373 if ((c & 0xc0) != 0x80) return -(j+1);
374 s -= 6;
375 d |= (c & 0x3f) << s;
376 }
377
378 /* Check that encoding was the correct unique one */
379
380 for (j = 0; j < utf8_table1_size; j++)
381 if (d <= utf8_table1[j]) break;
382 if (j != i) return -(i+1);
383
384 /* Valid value */
385
386 *vptr = d;
387 return i+1;
388 }
389
390 #endif
391
392
393
394 /*************************************************
395 * Convert character value to UTF-8 *
396 *************************************************/
397
398 /* This function takes an integer value in the range 0 - 0x7fffffff
399 and encodes it as a UTF-8 character in 0 to 6 bytes.
400
401 Arguments:
402 cvalue the character value
403 utf8bytes pointer to buffer for result - at least 6 bytes long
404
405 Returns: number of characters placed in the buffer
406 */
407
408 #if !defined NOUTF8
409
410 static int
411 ord2utf8(int cvalue, uschar *utf8bytes)
412 {
413 register int i, j;
414 for (i = 0; i < utf8_table1_size; i++)
415 if (cvalue <= utf8_table1[i]) break;
416 utf8bytes += i;
417 for (j = i; j > 0; j--)
418 {
419 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 cvalue >>= 6;
421 }
422 *utf8bytes = utf8_table2[i] | cvalue;
423 return i + 1;
424 }
425
426 #endif
427
428
429
430 /*************************************************
431 * Print character string *
432 *************************************************/
433
434 /* Character string printing function. Must handle UTF-8 strings in utf8
435 mode. Yields number of characters printed. If handed a NULL file, just counts
436 chars without printing. */
437
438 static int pchars(unsigned char *p, int length, FILE *f)
439 {
440 int c = 0;
441 int yield = 0;
442
443 while (length-- > 0)
444 {
445 #if !defined NOUTF8
446 if (use_utf8)
447 {
448 int rc = utf82ord(p, &c);
449
450 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451 {
452 length -= rc - 1;
453 p += rc;
454 if (PRINTHEX(c))
455 {
456 if (f != NULL) fprintf(f, "%c", c);
457 yield++;
458 }
459 else
460 {
461 int n = 4;
462 if (f != NULL) fprintf(f, "\\x{%02x}", c);
463 yield += (n <= 0x000000ff)? 2 :
464 (n <= 0x00000fff)? 3 :
465 (n <= 0x0000ffff)? 4 :
466 (n <= 0x000fffff)? 5 : 6;
467 }
468 continue;
469 }
470 }
471 #endif
472
473 /* Not UTF-8, or malformed UTF-8 */
474
475 c = *p++;
476 if (PRINTHEX(c))
477 {
478 if (f != NULL) fprintf(f, "%c", c);
479 yield++;
480 }
481 else
482 {
483 if (f != NULL) fprintf(f, "\\x%02x", c);
484 yield += 4;
485 }
486 }
487
488 return yield;
489 }
490
491
492
493 /*************************************************
494 * Callout function *
495 *************************************************/
496
497 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498 the match. Yield zero unless more callouts than the fail count, or the callout
499 data is not zero. */
500
501 static int callout(pcre_callout_block *cb)
502 {
503 FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 int i, pre_start, post_start, subject_length;
505
506 if (callout_extra)
507 {
508 fprintf(f, "Callout %d: last capture = %d\n",
509 cb->callout_number, cb->capture_last);
510
511 for (i = 0; i < cb->capture_top * 2; i += 2)
512 {
513 if (cb->offset_vector[i] < 0)
514 fprintf(f, "%2d: <unset>\n", i/2);
515 else
516 {
517 fprintf(f, "%2d: ", i/2);
518 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519 cb->offset_vector[i+1] - cb->offset_vector[i], f);
520 fprintf(f, "\n");
521 }
522 }
523 }
524
525 /* Re-print the subject in canonical form, the first time or if giving full
526 datails. On subsequent calls in the same match, we use pchars just to find the
527 printed lengths of the substrings. */
528
529 if (f != NULL) fprintf(f, "--->");
530
531 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533 cb->current_position - cb->start_match, f);
534
535 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536
537 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538 cb->subject_length - cb->current_position, f);
539
540 if (f != NULL) fprintf(f, "\n");
541
542 /* Always print appropriate indicators, with callout number if not already
543 shown. For automatic callouts, show the pattern offset. */
544
545 if (cb->callout_number == 255)
546 {
547 fprintf(outfile, "%+3d ", cb->pattern_position);
548 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549 }
550 else
551 {
552 if (callout_extra) fprintf(outfile, " ");
553 else fprintf(outfile, "%3d ", cb->callout_number);
554 }
555
556 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557 fprintf(outfile, "^");
558
559 if (post_start > 0)
560 {
561 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562 fprintf(outfile, "^");
563 }
564
565 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566 fprintf(outfile, " ");
567
568 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569 pbuffer + cb->pattern_position);
570
571 fprintf(outfile, "\n");
572 first_callout = 0;
573
574 if (cb->callout_data != NULL)
575 {
576 int callout_data = *((int *)(cb->callout_data));
577 if (callout_data != 0)
578 {
579 fprintf(outfile, "Callout data = %d\n", callout_data);
580 return callout_data;
581 }
582 }
583
584 return (cb->callout_number != callout_fail_id)? 0 :
585 (++callout_count >= callout_fail_count)? 1 : 0;
586 }
587
588
589 /*************************************************
590 * Local malloc functions *
591 *************************************************/
592
593 /* Alternative malloc function, to test functionality and show the size of the
594 compiled re. */
595
596 static void *new_malloc(size_t size)
597 {
598 void *block = malloc(size);
599 gotten_store = size;
600 if (show_malloc)
601 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 return block;
603 }
604
605 static void new_free(void *block)
606 {
607 if (show_malloc)
608 fprintf(outfile, "free %p\n", block);
609 free(block);
610 }
611
612
613 /* For recursion malloc/free, to test stacking calls */
614
615 static void *stack_malloc(size_t size)
616 {
617 void *block = malloc(size);
618 if (show_malloc)
619 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 return block;
621 }
622
623 static void stack_free(void *block)
624 {
625 if (show_malloc)
626 fprintf(outfile, "stack_free %p\n", block);
627 free(block);
628 }
629
630
631 /*************************************************
632 * Call pcre_fullinfo() *
633 *************************************************/
634
635 /* Get one piece of information from the pcre_fullinfo() function */
636
637 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638 {
639 int rc;
640 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642 }
643
644
645
646 /*************************************************
647 * Byte flipping function *
648 *************************************************/
649
650 static unsigned long int
651 byteflip(unsigned long int value, int n)
652 {
653 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654 return ((value & 0x000000ff) << 24) |
655 ((value & 0x0000ff00) << 8) |
656 ((value & 0x00ff0000) >> 8) |
657 ((value & 0xff000000) >> 24);
658 }
659
660
661
662
663 /*************************************************
664 * Check match or recursion limit *
665 *************************************************/
666
667 static int
668 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669 int start_offset, int options, int *use_offsets, int use_size_offsets,
670 int flag, unsigned long int *limit, int errnumber, const char *msg)
671 {
672 int count;
673 int min = 0;
674 int mid = 64;
675 int max = -1;
676
677 extra->flags |= flag;
678
679 for (;;)
680 {
681 *limit = mid;
682
683 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684 use_offsets, use_size_offsets);
685
686 if (count == errnumber)
687 {
688 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689 min = mid;
690 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691 }
692
693 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694 count == PCRE_ERROR_PARTIAL)
695 {
696 if (mid == min + 1)
697 {
698 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699 break;
700 }
701 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702 max = mid;
703 mid = (min + mid)/2;
704 }
705 else break; /* Some other error */
706 }
707
708 extra->flags &= ~flag;
709 return count;
710 }
711
712
713
714 /*************************************************
715 * Case-independent strncmp() function *
716 *************************************************/
717
718 /*
719 Arguments:
720 s first string
721 t second string
722 n number of characters to compare
723
724 Returns: < 0, = 0, or > 0, according to the comparison
725 */
726
727 static int
728 strncmpic(uschar *s, uschar *t, int n)
729 {
730 while (n--)
731 {
732 int c = tolower(*s++) - tolower(*t++);
733 if (c) return c;
734 }
735 return 0;
736 }
737
738
739
740 /*************************************************
741 * Check newline indicator *
742 *************************************************/
743
744 /* This is used both at compile and run-time to check for <xxx> escapes, where
745 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
746 no match.
747
748 Arguments:
749 p points after the leading '<'
750 f file for error message
751
752 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
753 */
754
755 static int
756 check_newline(uschar *p, FILE *f)
757 {
758 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
759 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
760 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
761 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
762 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
763 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
764 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
765 fprintf(f, "Unknown newline type at: <%s\n", p);
766 return 0;
767 }
768
769
770
771 /*************************************************
772 * Usage function *
773 *************************************************/
774
775 static void
776 usage(void)
777 {
778 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
779 printf("Input and output default to stdin and stdout.\n");
780 #ifdef SUPPORT_LIBREADLINE
781 printf("If input is a terminal, readline() is used to read from it.\n");
782 #else
783 printf("This version of pcretest is not linked with readline().\n");
784 #endif
785 printf("\nOptions:\n");
786 printf(" -b show compiled code (bytecode)\n");
787 printf(" -C show PCRE compile-time options and exit\n");
788 printf(" -d debug: show compiled code and information (-b and -i)\n");
789 #if !defined NODFA
790 printf(" -dfa force DFA matching for all subjects\n");
791 #endif
792 printf(" -help show usage information\n");
793 printf(" -i show information about compiled patterns\n"
794 " -M find MATCH_LIMIT minimum for each subject\n"
795 " -m output memory used information\n"
796 " -o <n> set size of offsets vector to <n>\n");
797 #if !defined NOPOSIX
798 printf(" -p use POSIX interface\n");
799 #endif
800 printf(" -q quiet: do not output PCRE version number at start\n");
801 printf(" -S <n> set stack size to <n> megabytes\n");
802 printf(" -s output store (memory) used information\n"
803 " -t time compilation and execution\n");
804 printf(" -t <n> time compilation and execution, repeating <n> times\n");
805 printf(" -tm time execution (matching) only\n");
806 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
807 }
808
809
810
811 /*************************************************
812 * Main Program *
813 *************************************************/
814
815 /* Read lines from named file or stdin and write to named file or stdout; lines
816 consist of a regular expression, in delimiters and optionally followed by
817 options, followed by a set of test data, terminated by an empty line. */
818
819 int main(int argc, char **argv)
820 {
821 FILE *infile = stdin;
822 int options = 0;
823 int study_options = 0;
824 int default_find_match_limit = FALSE;
825 int op = 1;
826 int timeit = 0;
827 int timeitm = 0;
828 int showinfo = 0;
829 int showstore = 0;
830 int quiet = 0;
831 int size_offsets = 45;
832 int size_offsets_max;
833 int *offsets = NULL;
834 #if !defined NOPOSIX
835 int posix = 0;
836 #endif
837 int debug = 0;
838 int done = 0;
839 int all_use_dfa = 0;
840 int yield = 0;
841 int stack_size;
842
843 /* These vectors store, end-to-end, a list of captured substring names. Assume
844 that 1024 is plenty long enough for the few names we'll be testing. */
845
846 uschar copynames[1024];
847 uschar getnames[1024];
848
849 uschar *copynamesptr;
850 uschar *getnamesptr;
851
852 /* Get buffers from malloc() so that Electric Fence will check their misuse
853 when I am debugging. They grow automatically when very long lines are read. */
854
855 buffer = (unsigned char *)malloc(buffer_size);
856 dbuffer = (unsigned char *)malloc(buffer_size);
857 pbuffer = (unsigned char *)malloc(buffer_size);
858
859 /* The outfile variable is static so that new_malloc can use it. */
860
861 outfile = stdout;
862
863 /* The following _setmode() stuff is some Windows magic that tells its runtime
864 library to translate CRLF into a single LF character. At least, that's what
865 I've been told: never having used Windows I take this all on trust. Originally
866 it set 0x8000, but then I was advised that _O_BINARY was better. */
867
868 #if defined(_WIN32) || defined(WIN32)
869 _setmode( _fileno( stdout ), _O_BINARY );
870 #endif
871
872 /* Scan options */
873
874 while (argc > 1 && argv[op][0] == '-')
875 {
876 unsigned char *endptr;
877
878 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
879 showstore = 1;
880 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
881 else if (strcmp(argv[op], "-b") == 0) debug = 1;
882 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
883 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
884 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
885 #if !defined NODFA
886 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
887 #endif
888 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
889 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
890 *endptr == 0))
891 {
892 op++;
893 argc--;
894 }
895 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
896 {
897 int both = argv[op][2] == 0;
898 int temp;
899 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
900 *endptr == 0))
901 {
902 timeitm = temp;
903 op++;
904 argc--;
905 }
906 else timeitm = LOOPREPEAT;
907 if (both) timeit = timeitm;
908 }
909 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
910 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
911 *endptr == 0))
912 {
913 #if defined(_WIN32) || defined(WIN32)
914 printf("PCRE: -S not supported on this OS\n");
915 exit(1);
916 #else
917 int rc;
918 struct rlimit rlim;
919 getrlimit(RLIMIT_STACK, &rlim);
920 rlim.rlim_cur = stack_size * 1024 * 1024;
921 rc = setrlimit(RLIMIT_STACK, &rlim);
922 if (rc != 0)
923 {
924 printf("PCRE: setrlimit() failed with error %d\n", rc);
925 exit(1);
926 }
927 op++;
928 argc--;
929 #endif
930 }
931 #if !defined NOPOSIX
932 else if (strcmp(argv[op], "-p") == 0) posix = 1;
933 #endif
934 else if (strcmp(argv[op], "-C") == 0)
935 {
936 int rc;
937 unsigned long int lrc;
938 printf("PCRE version %s\n", pcre_version());
939 printf("Compiled with\n");
940 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
941 printf(" %sUTF-8 support\n", rc? "" : "No ");
942 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
943 printf(" %sUnicode properties support\n", rc? "" : "No ");
944 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
945 /* Note that these values are always the ASCII values, even
946 in EBCDIC environments. CR is 13 and NL is 10. */
947 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
948 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
949 (rc == -2)? "ANYCRLF" :
950 (rc == -1)? "ANY" : "???");
951 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
952 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
953 "all Unicode newlines");
954 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
955 printf(" Internal link size = %d\n", rc);
956 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
957 printf(" POSIX malloc threshold = %d\n", rc);
958 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
959 printf(" Default match limit = %ld\n", lrc);
960 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
961 printf(" Default recursion depth limit = %ld\n", lrc);
962 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
963 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
964 goto EXIT;
965 }
966 else if (strcmp(argv[op], "-help") == 0 ||
967 strcmp(argv[op], "--help") == 0)
968 {
969 usage();
970 goto EXIT;
971 }
972 else
973 {
974 printf("** Unknown or malformed option %s\n", argv[op]);
975 usage();
976 yield = 1;
977 goto EXIT;
978 }
979 op++;
980 argc--;
981 }
982
983 /* Get the store for the offsets vector, and remember what it was */
984
985 size_offsets_max = size_offsets;
986 offsets = (int *)malloc(size_offsets_max * sizeof(int));
987 if (offsets == NULL)
988 {
989 printf("** Failed to get %d bytes of memory for offsets vector\n",
990 (int)(size_offsets_max * sizeof(int)));
991 yield = 1;
992 goto EXIT;
993 }
994
995 /* Sort out the input and output files */
996
997 if (argc > 1)
998 {
999 infile = fopen(argv[op], INPUT_MODE);
1000 if (infile == NULL)
1001 {
1002 printf("** Failed to open %s\n", argv[op]);
1003 yield = 1;
1004 goto EXIT;
1005 }
1006 }
1007
1008 if (argc > 2)
1009 {
1010 outfile = fopen(argv[op+1], OUTPUT_MODE);
1011 if (outfile == NULL)
1012 {
1013 printf("** Failed to open %s\n", argv[op+1]);
1014 yield = 1;
1015 goto EXIT;
1016 }
1017 }
1018
1019 /* Set alternative malloc function */
1020
1021 pcre_malloc = new_malloc;
1022 pcre_free = new_free;
1023 pcre_stack_malloc = stack_malloc;
1024 pcre_stack_free = stack_free;
1025
1026 /* Heading line unless quiet, then prompt for first regex if stdin */
1027
1028 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1029
1030 /* Main loop */
1031
1032 while (!done)
1033 {
1034 pcre *re = NULL;
1035 pcre_extra *extra = NULL;
1036
1037 #if !defined NOPOSIX /* There are still compilers that require no indent */
1038 regex_t preg;
1039 int do_posix = 0;
1040 #endif
1041
1042 const char *error;
1043 unsigned char *markptr;
1044 unsigned char *p, *pp, *ppp;
1045 unsigned char *to_file = NULL;
1046 const unsigned char *tables = NULL;
1047 unsigned long int true_size, true_study_size = 0;
1048 size_t size, regex_gotten_store;
1049 int do_mark = 0;
1050 int do_study = 0;
1051 int do_debug = debug;
1052 int do_G = 0;
1053 int do_g = 0;
1054 int do_showinfo = showinfo;
1055 int do_showrest = 0;
1056 int do_flip = 0;
1057 int erroroffset, len, delimiter, poffset;
1058
1059 use_utf8 = 0;
1060 debug_lengths = 1;
1061
1062 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1063 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1064 fflush(outfile);
1065
1066 p = buffer;
1067 while (isspace(*p)) p++;
1068 if (*p == 0) continue;
1069
1070 /* See if the pattern is to be loaded pre-compiled from a file. */
1071
1072 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1073 {
1074 unsigned long int magic, get_options;
1075 uschar sbuf[8];
1076 FILE *f;
1077
1078 p++;
1079 pp = p + (int)strlen((char *)p);
1080 while (isspace(pp[-1])) pp--;
1081 *pp = 0;
1082
1083 f = fopen((char *)p, "rb");
1084 if (f == NULL)
1085 {
1086 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1087 continue;
1088 }
1089
1090 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1091
1092 true_size =
1093 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1094 true_study_size =
1095 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1096
1097 re = (real_pcre *)new_malloc(true_size);
1098 regex_gotten_store = gotten_store;
1099
1100 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1101
1102 magic = ((real_pcre *)re)->magic_number;
1103 if (magic != MAGIC_NUMBER)
1104 {
1105 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1106 {
1107 do_flip = 1;
1108 }
1109 else
1110 {
1111 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1112 fclose(f);
1113 continue;
1114 }
1115 }
1116
1117 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1118 do_flip? " (byte-inverted)" : "", p);
1119
1120 /* Need to know if UTF-8 for printing data strings */
1121
1122 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1123 use_utf8 = (get_options & PCRE_UTF8) != 0;
1124
1125 /* Now see if there is any following study data */
1126
1127 if (true_study_size != 0)
1128 {
1129 pcre_study_data *psd;
1130
1131 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1132 extra->flags = PCRE_EXTRA_STUDY_DATA;
1133
1134 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1135 extra->study_data = psd;
1136
1137 if (fread(psd, 1, true_study_size, f) != true_study_size)
1138 {
1139 FAIL_READ:
1140 fprintf(outfile, "Failed to read data from %s\n", p);
1141 if (extra != NULL) new_free(extra);
1142 if (re != NULL) new_free(re);
1143 fclose(f);
1144 continue;
1145 }
1146 fprintf(outfile, "Study data loaded from %s\n", p);
1147 do_study = 1; /* To get the data output if requested */
1148 }
1149 else fprintf(outfile, "No study data\n");
1150
1151 fclose(f);
1152 goto SHOW_INFO;
1153 }
1154
1155 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1156 the pattern; if is isn't complete, read more. */
1157
1158 delimiter = *p++;
1159
1160 if (isalnum(delimiter) || delimiter == '\\')
1161 {
1162 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1163 goto SKIP_DATA;
1164 }
1165
1166 pp = p;
1167 poffset = p - buffer;
1168
1169 for(;;)
1170 {
1171 while (*pp != 0)
1172 {
1173 if (*pp == '\\' && pp[1] != 0) pp++;
1174 else if (*pp == delimiter) break;
1175 pp++;
1176 }
1177 if (*pp != 0) break;
1178 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1179 {
1180 fprintf(outfile, "** Unexpected EOF\n");
1181 done = 1;
1182 goto CONTINUE;
1183 }
1184 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1185 }
1186
1187 /* The buffer may have moved while being extended; reset the start of data
1188 pointer to the correct relative point in the buffer. */
1189
1190 p = buffer + poffset;
1191
1192 /* If the first character after the delimiter is backslash, make
1193 the pattern end with backslash. This is purely to provide a way
1194 of testing for the error message when a pattern ends with backslash. */
1195
1196 if (pp[1] == '\\') *pp++ = '\\';
1197
1198 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1199 for callouts. */
1200
1201 *pp++ = 0;
1202 strcpy((char *)pbuffer, (char *)p);
1203
1204 /* Look for options after final delimiter */
1205
1206 options = 0;
1207 study_options = 0;
1208 log_store = showstore; /* default from command line */
1209
1210 while (*pp != 0)
1211 {
1212 switch (*pp++)
1213 {
1214 case 'f': options |= PCRE_FIRSTLINE; break;
1215 case 'g': do_g = 1; break;
1216 case 'i': options |= PCRE_CASELESS; break;
1217 case 'm': options |= PCRE_MULTILINE; break;
1218 case 's': options |= PCRE_DOTALL; break;
1219 case 'x': options |= PCRE_EXTENDED; break;
1220
1221 case '+': do_showrest = 1; break;
1222 case 'A': options |= PCRE_ANCHORED; break;
1223 case 'B': do_debug = 1; break;
1224 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1225 case 'D': do_debug = do_showinfo = 1; break;
1226 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1227 case 'F': do_flip = 1; break;
1228 case 'G': do_G = 1; break;
1229 case 'I': do_showinfo = 1; break;
1230 case 'J': options |= PCRE_DUPNAMES; break;
1231 case 'K': do_mark = 1; break;
1232 case 'M': log_store = 1; break;
1233 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1234
1235 #if !defined NOPOSIX
1236 case 'P': do_posix = 1; break;
1237 #endif
1238
1239 case 'S': do_study = 1; break;
1240 case 'U': options |= PCRE_UNGREEDY; break;
1241 case 'X': options |= PCRE_EXTRA; break;
1242 case 'Z': debug_lengths = 0; break;
1243 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1244 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1245
1246 case 'L':
1247 ppp = pp;
1248 /* The '\r' test here is so that it works on Windows. */
1249 /* The '0' test is just in case this is an unterminated line. */
1250 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1251 *ppp = 0;
1252 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1253 {
1254 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1255 goto SKIP_DATA;
1256 }
1257 locale_set = 1;
1258 tables = pcre_maketables();
1259 pp = ppp;
1260 break;
1261
1262 case '>':
1263 to_file = pp;
1264 while (*pp != 0) pp++;
1265 while (isspace(pp[-1])) pp--;
1266 *pp = 0;
1267 break;
1268
1269 case '<':
1270 {
1271 if (strncmp((char *)pp, "JS>", 3) == 0)
1272 {
1273 options |= PCRE_JAVASCRIPT_COMPAT;
1274 pp += 3;
1275 }
1276 else
1277 {
1278 int x = check_newline(pp, outfile);
1279 if (x == 0) goto SKIP_DATA;
1280 options |= x;
1281 while (*pp++ != '>');
1282 }
1283 }
1284 break;
1285
1286 case '\r': /* So that it works in Windows */
1287 case '\n':
1288 case ' ':
1289 break;
1290
1291 default:
1292 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1293 goto SKIP_DATA;
1294 }
1295 }
1296
1297 /* Handle compiling via the POSIX interface, which doesn't support the
1298 timing, showing, or debugging options, nor the ability to pass over
1299 local character tables. */
1300
1301 #if !defined NOPOSIX
1302 if (posix || do_posix)
1303 {
1304 int rc;
1305 int cflags = 0;
1306
1307 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1308 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1309 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1310 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1311 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1312 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1313
1314 rc = regcomp(&preg, (char *)p, cflags);
1315
1316 /* Compilation failed; go back for another re, skipping to blank line
1317 if non-interactive. */
1318
1319 if (rc != 0)
1320 {
1321 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1322 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1323 goto SKIP_DATA;
1324 }
1325 }
1326
1327 /* Handle compiling via the native interface */
1328
1329 else
1330 #endif /* !defined NOPOSIX */
1331
1332 {
1333 unsigned long int get_options;
1334
1335 if (timeit > 0)
1336 {
1337 register int i;
1338 clock_t time_taken;
1339 clock_t start_time = clock();
1340 for (i = 0; i < timeit; i++)
1341 {
1342 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1343 if (re != NULL) free(re);
1344 }
1345 time_taken = clock() - start_time;
1346 fprintf(outfile, "Compile time %.4f milliseconds\n",
1347 (((double)time_taken * 1000.0) / (double)timeit) /
1348 (double)CLOCKS_PER_SEC);
1349 }
1350
1351 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1352
1353 /* Compilation failed; go back for another re, skipping to blank line
1354 if non-interactive. */
1355
1356 if (re == NULL)
1357 {
1358 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1359 SKIP_DATA:
1360 if (infile != stdin)
1361 {
1362 for (;;)
1363 {
1364 if (extend_inputline(infile, buffer, NULL) == NULL)
1365 {
1366 done = 1;
1367 goto CONTINUE;
1368 }
1369 len = (int)strlen((char *)buffer);
1370 while (len > 0 && isspace(buffer[len-1])) len--;
1371 if (len == 0) break;
1372 }
1373 fprintf(outfile, "\n");
1374 }
1375 goto CONTINUE;
1376 }
1377
1378 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1379 within the regex; check for this so that we know how to process the data
1380 lines. */
1381
1382 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1383 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1384
1385 /* Print information if required. There are now two info-returning
1386 functions. The old one has a limited interface and returns only limited
1387 data. Check that it agrees with the newer one. */
1388
1389 if (log_store)
1390 fprintf(outfile, "Memory allocation (code space): %d\n",
1391 (int)(gotten_store -
1392 sizeof(real_pcre) -
1393 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1394
1395 /* Extract the size for possible writing before possibly flipping it,
1396 and remember the store that was got. */
1397
1398 true_size = ((real_pcre *)re)->size;
1399 regex_gotten_store = gotten_store;
1400
1401 /* If /S was present, study the regexp to generate additional info to
1402 help with the matching. */
1403
1404 if (do_study)
1405 {
1406 if (timeit > 0)
1407 {
1408 register int i;
1409 clock_t time_taken;
1410 clock_t start_time = clock();
1411 for (i = 0; i < timeit; i++)
1412 extra = pcre_study(re, study_options, &error);
1413 time_taken = clock() - start_time;
1414 if (extra != NULL) free(extra);
1415 fprintf(outfile, " Study time %.4f milliseconds\n",
1416 (((double)time_taken * 1000.0) / (double)timeit) /
1417 (double)CLOCKS_PER_SEC);
1418 }
1419 extra = pcre_study(re, study_options, &error);
1420 if (error != NULL)
1421 fprintf(outfile, "Failed to study: %s\n", error);
1422 else if (extra != NULL)
1423 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1424 }
1425
1426 /* If /K was present, we set up for handling MARK data. */
1427
1428 if (do_mark)
1429 {
1430 if (extra == NULL)
1431 {
1432 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1433 extra->flags = 0;
1434 }
1435 extra->mark = &markptr;
1436 extra->flags |= PCRE_EXTRA_MARK;
1437 }
1438
1439 /* If the 'F' option was present, we flip the bytes of all the integer
1440 fields in the regex data block and the study block. This is to make it
1441 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1442 compiled on a different architecture. */
1443
1444 if (do_flip)
1445 {
1446 real_pcre *rre = (real_pcre *)re;
1447 rre->magic_number =
1448 byteflip(rre->magic_number, sizeof(rre->magic_number));
1449 rre->size = byteflip(rre->size, sizeof(rre->size));
1450 rre->options = byteflip(rre->options, sizeof(rre->options));
1451 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1452 rre->top_bracket =
1453 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1454 rre->top_backref =
1455 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1456 rre->first_byte =
1457 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1458 rre->req_byte =
1459 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1460 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1461 sizeof(rre->name_table_offset));
1462 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1463 sizeof(rre->name_entry_size));
1464 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1465 sizeof(rre->name_count));
1466
1467 if (extra != NULL)
1468 {
1469 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1470 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1471 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1472 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1473 }
1474 }
1475
1476 /* Extract information from the compiled data if required */
1477
1478 SHOW_INFO:
1479
1480 if (do_debug)
1481 {
1482 fprintf(outfile, "------------------------------------------------------------------\n");
1483 pcre_printint(re, outfile, debug_lengths);
1484 }
1485
1486 /* We already have the options in get_options (see above) */
1487
1488 if (do_showinfo)
1489 {
1490 unsigned long int all_options;
1491 #if !defined NOINFOCHECK
1492 int old_first_char, old_options, old_count;
1493 #endif
1494 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1495 hascrorlf;
1496 int nameentrysize, namecount;
1497 const uschar *nametable;
1498
1499 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1500 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1501 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1502 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1503 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1504 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1505 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1506 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1507 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1508 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1509 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1510
1511 #if !defined NOINFOCHECK
1512 old_count = pcre_info(re, &old_options, &old_first_char);
1513 if (count < 0) fprintf(outfile,
1514 "Error %d from pcre_info()\n", count);
1515 else
1516 {
1517 if (old_count != count) fprintf(outfile,
1518 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1519 old_count);
1520
1521 if (old_first_char != first_char) fprintf(outfile,
1522 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1523 first_char, old_first_char);
1524
1525 if (old_options != (int)get_options) fprintf(outfile,
1526 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1527 get_options, old_options);
1528 }
1529 #endif
1530
1531 if (size != regex_gotten_store) fprintf(outfile,
1532 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1533 (int)size, (int)regex_gotten_store);
1534
1535 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1536 if (backrefmax > 0)
1537 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1538
1539 if (namecount > 0)
1540 {
1541 fprintf(outfile, "Named capturing subpatterns:\n");
1542 while (namecount-- > 0)
1543 {
1544 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1545 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1546 GET2(nametable, 0));
1547 nametable += nameentrysize;
1548 }
1549 }
1550
1551 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1552 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1553
1554 all_options = ((real_pcre *)re)->options;
1555 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1556
1557 if (get_options == 0) fprintf(outfile, "No options\n");
1558 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1559 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1560 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1561 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1562 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1563 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1564 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1565 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1566 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1567 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1568 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1569 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1570 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1571 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1572 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1573 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1574
1575 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1576
1577 switch (get_options & PCRE_NEWLINE_BITS)
1578 {
1579 case PCRE_NEWLINE_CR:
1580 fprintf(outfile, "Forced newline sequence: CR\n");
1581 break;
1582
1583 case PCRE_NEWLINE_LF:
1584 fprintf(outfile, "Forced newline sequence: LF\n");
1585 break;
1586
1587 case PCRE_NEWLINE_CRLF:
1588 fprintf(outfile, "Forced newline sequence: CRLF\n");
1589 break;
1590
1591 case PCRE_NEWLINE_ANYCRLF:
1592 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1593 break;
1594
1595 case PCRE_NEWLINE_ANY:
1596 fprintf(outfile, "Forced newline sequence: ANY\n");
1597 break;
1598
1599 default:
1600 break;
1601 }
1602
1603 if (first_char == -1)
1604 {
1605 fprintf(outfile, "First char at start or follows newline\n");
1606 }
1607 else if (first_char < 0)
1608 {
1609 fprintf(outfile, "No first char\n");
1610 }
1611 else
1612 {
1613 int ch = first_char & 255;
1614 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1615 "" : " (caseless)";
1616 if (PRINTHEX(ch))
1617 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1618 else
1619 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1620 }
1621
1622 if (need_char < 0)
1623 {
1624 fprintf(outfile, "No need char\n");
1625 }
1626 else
1627 {
1628 int ch = need_char & 255;
1629 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1630 "" : " (caseless)";
1631 if (PRINTHEX(ch))
1632 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1633 else
1634 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1635 }
1636
1637 /* Don't output study size; at present it is in any case a fixed
1638 value, but it varies, depending on the computer architecture, and
1639 so messes up the test suite. (And with the /F option, it might be
1640 flipped.) */
1641
1642 if (do_study)
1643 {
1644 if (extra == NULL)
1645 fprintf(outfile, "Study returned NULL\n");
1646 else
1647 {
1648 uschar *start_bits = NULL;
1649 int minlength;
1650
1651 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1652 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1653
1654 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1655 if (start_bits == NULL)
1656 fprintf(outfile, "No set of starting bytes\n");
1657 else
1658 {
1659 int i;
1660 int c = 24;
1661 fprintf(outfile, "Starting byte set: ");
1662 for (i = 0; i < 256; i++)
1663 {
1664 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1665 {
1666 if (c > 75)
1667 {
1668 fprintf(outfile, "\n ");
1669 c = 2;
1670 }
1671 if (PRINTHEX(i) && i != ' ')
1672 {
1673 fprintf(outfile, "%c ", i);
1674 c += 2;
1675 }
1676 else
1677 {
1678 fprintf(outfile, "\\x%02x ", i);
1679 c += 5;
1680 }
1681 }
1682 }
1683 fprintf(outfile, "\n");
1684 }
1685 }
1686 }
1687 }
1688
1689 /* If the '>' option was present, we write out the regex to a file, and
1690 that is all. The first 8 bytes of the file are the regex length and then
1691 the study length, in big-endian order. */
1692
1693 if (to_file != NULL)
1694 {
1695 FILE *f = fopen((char *)to_file, "wb");
1696 if (f == NULL)
1697 {
1698 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1699 }
1700 else
1701 {
1702 uschar sbuf[8];
1703 sbuf[0] = (uschar)((true_size >> 24) & 255);
1704 sbuf[1] = (uschar)((true_size >> 16) & 255);
1705 sbuf[2] = (uschar)((true_size >> 8) & 255);
1706 sbuf[3] = (uschar)((true_size) & 255);
1707
1708 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1709 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1710 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1711 sbuf[7] = (uschar)((true_study_size) & 255);
1712
1713 if (fwrite(sbuf, 1, 8, f) < 8 ||
1714 fwrite(re, 1, true_size, f) < true_size)
1715 {
1716 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1717 }
1718 else
1719 {
1720 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1721 if (extra != NULL)
1722 {
1723 if (fwrite(extra->study_data, 1, true_study_size, f) <
1724 true_study_size)
1725 {
1726 fprintf(outfile, "Write error on %s: %s\n", to_file,
1727 strerror(errno));
1728 }
1729 else fprintf(outfile, "Study data written to %s\n", to_file);
1730
1731 }
1732 }
1733 fclose(f);
1734 }
1735
1736 new_free(re);
1737 if (extra != NULL) new_free(extra);
1738 if (tables != NULL) new_free((void *)tables);
1739 continue; /* With next regex */
1740 }
1741 } /* End of non-POSIX compile */
1742
1743 /* Read data lines and test them */
1744
1745 for (;;)
1746 {
1747 uschar *q;
1748 uschar *bptr;
1749 int *use_offsets = offsets;
1750 int use_size_offsets = size_offsets;
1751 int callout_data = 0;
1752 int callout_data_set = 0;
1753 int count, c;
1754 int copystrings = 0;
1755 int find_match_limit = default_find_match_limit;
1756 int getstrings = 0;
1757 int getlist = 0;
1758 int gmatched = 0;
1759 int start_offset = 0;
1760 int g_notempty = 0;
1761 int use_dfa = 0;
1762
1763 options = 0;
1764
1765 *copynames = 0;
1766 *getnames = 0;
1767
1768 copynamesptr = copynames;
1769 getnamesptr = getnames;
1770
1771 pcre_callout = callout;
1772 first_callout = 1;
1773 callout_extra = 0;
1774 callout_count = 0;
1775 callout_fail_count = 999999;
1776 callout_fail_id = -1;
1777 show_malloc = 0;
1778
1779 if (extra != NULL) extra->flags &=
1780 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1781
1782 len = 0;
1783 for (;;)
1784 {
1785 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1786 {
1787 if (len > 0) break;
1788 done = 1;
1789 goto CONTINUE;
1790 }
1791 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1792 len = (int)strlen((char *)buffer);
1793 if (buffer[len-1] == '\n') break;
1794 }
1795
1796 while (len > 0 && isspace(buffer[len-1])) len--;
1797 buffer[len] = 0;
1798 if (len == 0) break;
1799
1800 p = buffer;
1801 while (isspace(*p)) p++;
1802
1803 bptr = q = dbuffer;
1804 while ((c = *p++) != 0)
1805 {
1806 int i = 0;
1807 int n = 0;
1808
1809 if (c == '\\') switch ((c = *p++))
1810 {
1811 case 'a': c = 7; break;
1812 case 'b': c = '\b'; break;
1813 case 'e': c = 27; break;
1814 case 'f': c = '\f'; break;
1815 case 'n': c = '\n'; break;
1816 case 'r': c = '\r'; break;
1817 case 't': c = '\t'; break;
1818 case 'v': c = '\v'; break;
1819
1820 case '0': case '1': case '2': case '3':
1821 case '4': case '5': case '6': case '7':
1822 c -= '0';
1823 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1824 c = c * 8 + *p++ - '0';
1825
1826 #if !defined NOUTF8
1827 if (use_utf8 && c > 255)
1828 {
1829 unsigned char buff8[8];
1830 int ii, utn;
1831 utn = ord2utf8(c, buff8);
1832 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1833 c = buff8[ii]; /* Last byte */
1834 }
1835 #endif
1836 break;
1837
1838 case 'x':
1839
1840 /* Handle \x{..} specially - new Perl thing for utf8 */
1841
1842 #if !defined NOUTF8
1843 if (*p == '{')
1844 {
1845 unsigned char *pt = p;
1846 c = 0;
1847 while (isxdigit(*(++pt)))
1848 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1849 if (*pt == '}')
1850 {
1851 unsigned char buff8[8];
1852 int ii, utn;
1853 if (use_utf8)
1854 {
1855 utn = ord2utf8(c, buff8);
1856 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1857 c = buff8[ii]; /* Last byte */
1858 }
1859 else
1860 {
1861 if (c > 255)
1862 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1863 "UTF-8 mode is not enabled.\n"
1864 "** Truncation will probably give the wrong result.\n", c);
1865 }
1866 p = pt + 1;
1867 break;
1868 }
1869 /* Not correct form; fall through */
1870 }
1871 #endif
1872
1873 /* Ordinary \x */
1874
1875 c = 0;
1876 while (i++ < 2 && isxdigit(*p))
1877 {
1878 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1879 p++;
1880 }
1881 break;
1882
1883 case 0: /* \ followed by EOF allows for an empty line */
1884 p--;
1885 continue;
1886
1887 case '>':
1888 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1889 continue;
1890
1891 case 'A': /* Option setting */
1892 options |= PCRE_ANCHORED;
1893 continue;
1894
1895 case 'B':
1896 options |= PCRE_NOTBOL;
1897 continue;
1898
1899 case 'C':
1900 if (isdigit(*p)) /* Set copy string */
1901 {
1902 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1903 copystrings |= 1 << n;
1904 }
1905 else if (isalnum(*p))
1906 {
1907 uschar *npp = copynamesptr;
1908 while (isalnum(*p)) *npp++ = *p++;
1909 *npp++ = 0;
1910 *npp = 0;
1911 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1912 if (n < 0)
1913 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1914 copynamesptr = npp;
1915 }
1916 else if (*p == '+')
1917 {
1918 callout_extra = 1;
1919 p++;
1920 }
1921 else if (*p == '-')
1922 {
1923 pcre_callout = NULL;
1924 p++;
1925 }
1926 else if (*p == '!')
1927 {
1928 callout_fail_id = 0;
1929 p++;
1930 while(isdigit(*p))
1931 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1932 callout_fail_count = 0;
1933 if (*p == '!')
1934 {
1935 p++;
1936 while(isdigit(*p))
1937 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1938 }
1939 }
1940 else if (*p == '*')
1941 {
1942 int sign = 1;
1943 callout_data = 0;
1944 if (*(++p) == '-') { sign = -1; p++; }
1945 while(isdigit(*p))
1946 callout_data = callout_data * 10 + *p++ - '0';
1947 callout_data *= sign;
1948 callout_data_set = 1;
1949 }
1950 continue;
1951
1952 #if !defined NODFA
1953 case 'D':
1954 #if !defined NOPOSIX
1955 if (posix || do_posix)
1956 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1957 else
1958 #endif
1959 use_dfa = 1;
1960 continue;
1961
1962 case 'F':
1963 options |= PCRE_DFA_SHORTEST;
1964 continue;
1965 #endif
1966
1967 case 'G':
1968 if (isdigit(*p))
1969 {
1970 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1971 getstrings |= 1 << n;
1972 }
1973 else if (isalnum(*p))
1974 {
1975 uschar *npp = getnamesptr;
1976 while (isalnum(*p)) *npp++ = *p++;
1977 *npp++ = 0;
1978 *npp = 0;
1979 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1980 if (n < 0)
1981 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1982 getnamesptr = npp;
1983 }
1984 continue;
1985
1986 case 'L':
1987 getlist = 1;
1988 continue;
1989
1990 case 'M':
1991 find_match_limit = 1;
1992 continue;
1993
1994 case 'N':
1995 if ((options & PCRE_NOTEMPTY) != 0)
1996 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1997 else
1998 options |= PCRE_NOTEMPTY;
1999 continue;
2000
2001 case 'O':
2002 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2003 if (n > size_offsets_max)
2004 {
2005 size_offsets_max = n;
2006 free(offsets);
2007 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2008 if (offsets == NULL)
2009 {
2010 printf("** Failed to get %d bytes of memory for offsets vector\n",
2011 (int)(size_offsets_max * sizeof(int)));
2012 yield = 1;
2013 goto EXIT;
2014 }
2015 }
2016 use_size_offsets = n;
2017 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2018 continue;
2019
2020 case 'P':
2021 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2022 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2023 continue;
2024
2025 case 'Q':
2026 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2027 if (extra == NULL)
2028 {
2029 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2030 extra->flags = 0;
2031 }
2032 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2033 extra->match_limit_recursion = n;
2034 continue;
2035
2036 case 'q':
2037 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2038 if (extra == NULL)
2039 {
2040 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2041 extra->flags = 0;
2042 }
2043 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2044 extra->match_limit = n;
2045 continue;
2046
2047 #if !defined NODFA
2048 case 'R':
2049 options |= PCRE_DFA_RESTART;
2050 continue;
2051 #endif
2052
2053 case 'S':
2054 show_malloc = 1;
2055 continue;
2056
2057 case 'Y':
2058 options |= PCRE_NO_START_OPTIMIZE;
2059 continue;
2060
2061 case 'Z':
2062 options |= PCRE_NOTEOL;
2063 continue;
2064
2065 case '?':
2066 options |= PCRE_NO_UTF8_CHECK;
2067 continue;
2068
2069 case '<':
2070 {
2071 int x = check_newline(p, outfile);
2072 if (x == 0) goto NEXT_DATA;
2073 options |= x;
2074 while (*p++ != '>');
2075 }
2076 continue;
2077 }
2078 *q++ = c;
2079 }
2080 *q = 0;
2081 len = q - dbuffer;
2082
2083 /* Move the data to the end of the buffer so that a read over the end of
2084 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2085 we are using the POSIX interface, we must include the terminating zero. */
2086
2087 #if !defined NOPOSIX
2088 if (posix || do_posix)
2089 {
2090 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2091 bptr += buffer_size - len - 1;
2092 }
2093 else
2094 #endif
2095 {
2096 memmove(bptr + buffer_size - len, bptr, len);
2097 bptr += buffer_size - len;
2098 }
2099
2100 if ((all_use_dfa || use_dfa) && find_match_limit)
2101 {
2102 printf("**Match limit not relevant for DFA matching: ignored\n");
2103 find_match_limit = 0;
2104 }
2105
2106 /* Handle matching via the POSIX interface, which does not
2107 support timing or playing with the match limit or callout data. */
2108
2109 #if !defined NOPOSIX
2110 if (posix || do_posix)
2111 {
2112 int rc;
2113 int eflags = 0;
2114 regmatch_t *pmatch = NULL;
2115 if (use_size_offsets > 0)
2116 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2117 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2118 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2119 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2120
2121 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2122
2123 if (rc != 0)
2124 {
2125 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2126 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2127 }
2128 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2129 != 0)
2130 {
2131 fprintf(outfile, "Matched with REG_NOSUB\n");
2132 }
2133 else
2134 {
2135 size_t i;
2136 for (i = 0; i < (size_t)use_size_offsets; i++)
2137 {
2138 if (pmatch[i].rm_so >= 0)
2139 {
2140 fprintf(outfile, "%2d: ", (int)i);
2141 (void)pchars(dbuffer + pmatch[i].rm_so,
2142 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2143 fprintf(outfile, "\n");
2144 if (i == 0 && do_showrest)
2145 {
2146 fprintf(outfile, " 0+ ");
2147 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2148 outfile);
2149 fprintf(outfile, "\n");
2150 }
2151 }
2152 }
2153 }
2154 free(pmatch);
2155 }
2156
2157 /* Handle matching via the native interface - repeats for /g and /G */
2158
2159 else
2160 #endif /* !defined NOPOSIX */
2161
2162 for (;; gmatched++) /* Loop for /g or /G */
2163 {
2164 markptr = NULL;
2165
2166 if (timeitm > 0)
2167 {
2168 register int i;
2169 clock_t time_taken;
2170 clock_t start_time = clock();
2171
2172 #if !defined NODFA
2173 if (all_use_dfa || use_dfa)
2174 {
2175 int workspace[1000];
2176 for (i = 0; i < timeitm; i++)
2177 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2178 options | g_notempty, use_offsets, use_size_offsets, workspace,
2179 sizeof(workspace)/sizeof(int));
2180 }
2181 else
2182 #endif
2183
2184 for (i = 0; i < timeitm; i++)
2185 count = pcre_exec(re, extra, (char *)bptr, len,
2186 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2187
2188 time_taken = clock() - start_time;
2189 fprintf(outfile, "Execute time %.4f milliseconds\n",
2190 (((double)time_taken * 1000.0) / (double)timeitm) /
2191 (double)CLOCKS_PER_SEC);
2192 }
2193
2194 /* If find_match_limit is set, we want to do repeated matches with
2195 varying limits in order to find the minimum value for the match limit and
2196 for the recursion limit. */
2197
2198 if (find_match_limit)
2199 {
2200 if (extra == NULL)
2201 {
2202 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2203 extra->flags = 0;
2204 }
2205
2206 (void)check_match_limit(re, extra, bptr, len, start_offset,
2207 options|g_notempty, use_offsets, use_size_offsets,
2208 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2209 PCRE_ERROR_MATCHLIMIT, "match()");
2210
2211 count = check_match_limit(re, extra, bptr, len, start_offset,
2212 options|g_notempty, use_offsets, use_size_offsets,
2213 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2214 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2215 }
2216
2217 /* If callout_data is set, use the interface with additional data */
2218
2219 else if (callout_data_set)
2220 {
2221 if (extra == NULL)
2222 {
2223 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2224 extra->flags = 0;
2225 }
2226 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2227 extra->callout_data = &callout_data;
2228 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2229 options | g_notempty, use_offsets, use_size_offsets);
2230 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2231 }
2232
2233 /* The normal case is just to do the match once, with the default
2234 value of match_limit. */
2235
2236 #if !defined NODFA
2237 else if (all_use_dfa || use_dfa)
2238 {
2239 int workspace[1000];
2240 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2241 options | g_notempty, use_offsets, use_size_offsets, workspace,
2242 sizeof(workspace)/sizeof(int));
2243 if (count == 0)
2244 {
2245 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2246 count = use_size_offsets/2;
2247 }
2248 }
2249 #endif
2250
2251 else
2252 {
2253 count = pcre_exec(re, extra, (char *)bptr, len,
2254 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2255 if (count == 0)
2256 {
2257 fprintf(outfile, "Matched, but too many substrings\n");
2258 count = use_size_offsets/3;
2259 }
2260 }
2261
2262 /* Matched */
2263
2264 if (count >= 0)
2265 {
2266 int i, maxcount;
2267
2268 #if !defined NODFA
2269 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2270 #endif
2271 maxcount = use_size_offsets/3;
2272
2273 /* This is a check against a lunatic return value. */
2274
2275 if (count > maxcount)
2276 {
2277 fprintf(outfile,
2278 "** PCRE error: returned count %d is too big for offset size %d\n",
2279 count, use_size_offsets);
2280 count = use_size_offsets/3;
2281 if (do_g || do_G)
2282 {
2283 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2284 do_g = do_G = FALSE; /* Break g/G loop */
2285 }
2286 }
2287
2288 for (i = 0; i < count * 2; i += 2)
2289 {
2290 if (use_offsets[i] < 0)
2291 fprintf(outfile, "%2d: <unset>\n", i/2);
2292 else
2293 {
2294 fprintf(outfile, "%2d: ", i/2);
2295 (void)pchars(bptr + use_offsets[i],
2296 use_offsets[i+1] - use_offsets[i], outfile);
2297 fprintf(outfile, "\n");
2298 if (i == 0)
2299 {
2300 if (do_showrest)
2301 {
2302 fprintf(outfile, " 0+ ");
2303 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2304 outfile);
2305 fprintf(outfile, "\n");
2306 }
2307 }
2308 }
2309 }
2310
2311 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2312
2313 for (i = 0; i < 32; i++)
2314 {
2315 if ((copystrings & (1 << i)) != 0)
2316 {
2317 char copybuffer[256];
2318 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2319 i, copybuffer, sizeof(copybuffer));
2320 if (rc < 0)
2321 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2322 else
2323 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2324 }
2325 }
2326
2327 for (copynamesptr = copynames;
2328 *copynamesptr != 0;
2329 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2330 {
2331 char copybuffer[256];
2332 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2333 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2334 if (rc < 0)
2335 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2336 else
2337 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2338 }
2339
2340 for (i = 0; i < 32; i++)
2341 {
2342 if ((getstrings & (1 << i)) != 0)
2343 {
2344 const char *substring;
2345 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2346 i, &substring);
2347 if (rc < 0)
2348 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2349 else
2350 {
2351 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2352 pcre_free_substring(substring);
2353 }
2354 }
2355 }
2356
2357 for (getnamesptr = getnames;
2358 *getnamesptr != 0;
2359 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2360 {
2361 const char *substring;
2362 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2363 count, (char *)getnamesptr, &substring);
2364 if (rc < 0)
2365 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2366 else
2367 {
2368 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2369 pcre_free_substring(substring);
2370 }
2371 }
2372
2373 if (getlist)
2374 {
2375 const char **stringlist;
2376 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2377 &stringlist);
2378 if (rc < 0)
2379 fprintf(outfile, "get substring list failed %d\n", rc);
2380 else
2381 {
2382 for (i = 0; i < count; i++)
2383 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2384 if (stringlist[i] != NULL)
2385 fprintf(outfile, "string list not terminated by NULL\n");
2386 /* free((void *)stringlist); */
2387 pcre_free_substring_list(stringlist);
2388 }
2389 }
2390 }
2391
2392 /* There was a partial match */
2393
2394 else if (count == PCRE_ERROR_PARTIAL)
2395 {
2396 if (markptr == NULL) fprintf(outfile, "Partial match");
2397 else fprintf(outfile, "Partial match, mark=%s", markptr);
2398 if (use_size_offsets > 1)
2399 {
2400 fprintf(outfile, ": ");
2401 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2402 outfile);
2403 }
2404 fprintf(outfile, "\n");
2405 break; /* Out of the /g loop */
2406 }
2407
2408 /* Failed to match. If this is a /g or /G loop and we previously set
2409 g_notempty after a null match, this is not necessarily the end. We want
2410 to advance the start offset, and continue. We won't be at the end of the
2411 string - that was checked before setting g_notempty.
2412
2413 Complication arises in the case when the newline option is "any" or
2414 "anycrlf". If the previous match was at the end of a line terminated by
2415 CRLF, an advance of one character just passes the \r, whereas we should
2416 prefer the longer newline sequence, as does the code in pcre_exec().
2417 Fudge the offset value to achieve this.
2418
2419 Otherwise, in the case of UTF-8 matching, the advance must be one
2420 character, not one byte. */
2421
2422 else
2423 {
2424 if (g_notempty != 0)
2425 {
2426 int onechar = 1;
2427 unsigned int obits = ((real_pcre *)re)->options;
2428 use_offsets[0] = start_offset;
2429 if ((obits & PCRE_NEWLINE_BITS) == 0)
2430 {
2431 int d;
2432 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2433 /* Note that these values are always the ASCII ones, even in
2434 EBCDIC environments. CR = 13, NL = 10. */
2435 obits = (d == 13)? PCRE_NEWLINE_CR :
2436 (d == 10)? PCRE_NEWLINE_LF :
2437 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2438 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2439 (d == -1)? PCRE_NEWLINE_ANY : 0;
2440 }
2441 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2442 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2443 &&
2444 start_offset < len - 1 &&
2445 bptr[start_offset] == '\r' &&
2446 bptr[start_offset+1] == '\n')
2447 onechar++;
2448 else if (use_utf8)
2449 {
2450 while (start_offset + onechar < len)
2451 {
2452 int tb = bptr[start_offset+onechar];
2453 if (tb <= 127) break;
2454 tb &= 0xc0;
2455 if (tb != 0 && tb != 0xc0) onechar++;
2456 }
2457 }
2458 use_offsets[1] = start_offset + onechar;
2459 }
2460 else
2461 {
2462 if (count == PCRE_ERROR_NOMATCH)
2463 {
2464 if (gmatched == 0)
2465 {
2466 if (markptr == NULL) fprintf(outfile, "No match\n");
2467 else fprintf(outfile, "No match, mark = %s\n", markptr);
2468 }
2469 }
2470 else fprintf(outfile, "Error %d\n", count);
2471 break; /* Out of the /g loop */
2472 }
2473 }
2474
2475 /* If not /g or /G we are done */
2476
2477 if (!do_g && !do_G) break;
2478
2479 /* If we have matched an empty string, first check to see if we are at
2480 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2481 Perl's /g options does. This turns out to be rather cunning. First we set
2482 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2483 same point. If this fails (picked up above) we advance to the next
2484 character. */
2485
2486 g_notempty = 0;
2487
2488 if (use_offsets[0] == use_offsets[1])
2489 {
2490 if (use_offsets[0] == len) break;
2491 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2492 }
2493
2494 /* For /g, update the start offset, leaving the rest alone */
2495
2496 if (do_g) start_offset = use_offsets[1];
2497
2498 /* For /G, update the pointer and length */
2499
2500 else
2501 {
2502 bptr += use_offsets[1];
2503 len -= use_offsets[1];
2504 }
2505 } /* End of loop for /g and /G */
2506
2507 NEXT_DATA: continue;
2508 } /* End of loop for data lines */
2509
2510 CONTINUE:
2511
2512 #if !defined NOPOSIX
2513 if (posix || do_posix) regfree(&preg);
2514 #endif
2515
2516 if (re != NULL) new_free(re);
2517 if (extra != NULL) new_free(extra);
2518 if (tables != NULL)
2519 {
2520 new_free((void *)tables);
2521 setlocale(LC_CTYPE, "C");
2522 locale_set = 0;
2523 }
2524 }
2525
2526 if (infile == stdin) fprintf(outfile, "\n");
2527
2528 EXIT:
2529
2530 if (infile != NULL && infile != stdin) fclose(infile);
2531 if (outfile != NULL && outfile != stdout) fclose(outfile);
2532
2533 free(buffer);
2534 free(dbuffer);
2535 free(pbuffer);
2536 free(offsets);
2537
2538 return yield;
2539 }
2540
2541 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12