/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 79 - (show annotations) (download)
Sat Feb 24 21:40:52 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 52916 byte(s)
Load pcre-6.1 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47 #define PCRE_SPY /* For Win32 build, import data, not export */
48
49 /* We need the internal info for displaying the results of pcre_study() and
50 other internal data; pcretest also uses some of the fixed tables, and generally
51 has "inside information" compared to a program that strictly follows the PCRE
52 API. */
53
54 #include "pcre_internal.h"
55
56
57 /* It is possible to compile this test program without including support for
58 testing the POSIX interface, though this is not available via the standard
59 Makefile. */
60
61 #if !defined NOPOSIX
62 #include "pcreposix.h"
63 #endif
64
65 /* It is also possible, for the benefit of the version imported into Exim, to
66 build pcretest without support for UTF8 (define NOUTF8), without the interface
67 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
68 function (define NOINFOCHECK). */
69
70
71 #ifndef CLOCKS_PER_SEC
72 #ifdef CLK_TCK
73 #define CLOCKS_PER_SEC CLK_TCK
74 #else
75 #define CLOCKS_PER_SEC 100
76 #endif
77 #endif
78
79 #define LOOPREPEAT 500000
80
81 #define BUFFER_SIZE 30000
82 #define PBUFFER_SIZE BUFFER_SIZE
83 #define DBUFFER_SIZE BUFFER_SIZE
84
85
86 static FILE *outfile;
87 static int log_store = 0;
88 static int callout_count;
89 static int callout_extra;
90 static int callout_fail_count;
91 static int callout_fail_id;
92 static int first_callout;
93 static int show_malloc;
94 static int use_utf8;
95 static size_t gotten_store;
96
97 static uschar *pbuffer = NULL;
98
99
100
101 /*************************************************
102 * Read number from string *
103 *************************************************/
104
105 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
106 around with conditional compilation, just do the job by hand. It is only used
107 for unpicking the -o argument, so just keep it simple.
108
109 Arguments:
110 str string to be converted
111 endptr where to put the end pointer
112
113 Returns: the unsigned long
114 */
115
116 static int
117 get_value(unsigned char *str, unsigned char **endptr)
118 {
119 int result = 0;
120 while(*str != 0 && isspace(*str)) str++;
121 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
122 *endptr = str;
123 return(result);
124 }
125
126
127
128
129 /*************************************************
130 * Convert UTF-8 string to value *
131 *************************************************/
132
133 /* This function takes one or more bytes that represents a UTF-8 character,
134 and returns the value of the character.
135
136 Argument:
137 buffer a pointer to the byte vector
138 vptr a pointer to an int to receive the value
139
140 Returns: > 0 => the number of bytes consumed
141 -6 to 0 => malformed UTF-8 character at offset = (-return)
142 */
143
144 #if !defined NOUTF8
145
146 static int
147 utf82ord(unsigned char *buffer, int *vptr)
148 {
149 int c = *buffer++;
150 int d = c;
151 int i, j, s;
152
153 for (i = -1; i < 6; i++) /* i is number of additional bytes */
154 {
155 if ((d & 0x80) == 0) break;
156 d <<= 1;
157 }
158
159 if (i == -1) { *vptr = c; return 1; } /* ascii character */
160 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
161
162 /* i now has a value in the range 1-5 */
163
164 s = 6*i;
165 d = (c & _pcre_utf8_table3[i]) << s;
166
167 for (j = 0; j < i; j++)
168 {
169 c = *buffer++;
170 if ((c & 0xc0) != 0x80) return -(j+1);
171 s -= 6;
172 d |= (c & 0x3f) << s;
173 }
174
175 /* Check that encoding was the correct unique one */
176
177 for (j = 0; j < _pcre_utf8_table1_size; j++)
178 if (d <= _pcre_utf8_table1[j]) break;
179 if (j != i) return -(i+1);
180
181 /* Valid value */
182
183 *vptr = d;
184 return i+1;
185 }
186
187 #endif
188
189
190
191 /*************************************************
192 * Print character string *
193 *************************************************/
194
195 /* Character string printing function. Must handle UTF-8 strings in utf8
196 mode. Yields number of characters printed. If handed a NULL file, just counts
197 chars without printing. */
198
199 static int pchars(unsigned char *p, int length, FILE *f)
200 {
201 int c;
202 int yield = 0;
203
204 while (length-- > 0)
205 {
206 #if !defined NOUTF8
207 if (use_utf8)
208 {
209 int rc = utf82ord(p, &c);
210
211 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
212 {
213 length -= rc - 1;
214 p += rc;
215 if (c < 256 && isprint(c))
216 {
217 if (f != NULL) fprintf(f, "%c", c);
218 yield++;
219 }
220 else
221 {
222 int n;
223 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
224 yield += n;
225 }
226 continue;
227 }
228 }
229 #endif
230
231 /* Not UTF-8, or malformed UTF-8 */
232
233 if (isprint(c = *(p++)))
234 {
235 if (f != NULL) fprintf(f, "%c", c);
236 yield++;
237 }
238 else
239 {
240 if (f != NULL) fprintf(f, "\\x%02x", c);
241 yield += 4;
242 }
243 }
244
245 return yield;
246 }
247
248
249
250 /*************************************************
251 * Callout function *
252 *************************************************/
253
254 /* Called from PCRE as a result of the (?C) item. We print out where we are in
255 the match. Yield zero unless more callouts than the fail count, or the callout
256 data is not zero. */
257
258 static int callout(pcre_callout_block *cb)
259 {
260 FILE *f = (first_callout | callout_extra)? outfile : NULL;
261 int i, pre_start, post_start, subject_length;
262
263 if (callout_extra)
264 {
265 fprintf(f, "Callout %d: last capture = %d\n",
266 cb->callout_number, cb->capture_last);
267
268 for (i = 0; i < cb->capture_top * 2; i += 2)
269 {
270 if (cb->offset_vector[i] < 0)
271 fprintf(f, "%2d: <unset>\n", i/2);
272 else
273 {
274 fprintf(f, "%2d: ", i/2);
275 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
276 cb->offset_vector[i+1] - cb->offset_vector[i], f);
277 fprintf(f, "\n");
278 }
279 }
280 }
281
282 /* Re-print the subject in canonical form, the first time or if giving full
283 datails. On subsequent calls in the same match, we use pchars just to find the
284 printed lengths of the substrings. */
285
286 if (f != NULL) fprintf(f, "--->");
287
288 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
289 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
290 cb->current_position - cb->start_match, f);
291
292 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
293
294 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
295 cb->subject_length - cb->current_position, f);
296
297 if (f != NULL) fprintf(f, "\n");
298
299 /* Always print appropriate indicators, with callout number if not already
300 shown. For automatic callouts, show the pattern offset. */
301
302 if (cb->callout_number == 255)
303 {
304 fprintf(outfile, "%+3d ", cb->pattern_position);
305 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
306 }
307 else
308 {
309 if (callout_extra) fprintf(outfile, " ");
310 else fprintf(outfile, "%3d ", cb->callout_number);
311 }
312
313 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314 fprintf(outfile, "^");
315
316 if (post_start > 0)
317 {
318 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
319 fprintf(outfile, "^");
320 }
321
322 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
323 fprintf(outfile, " ");
324
325 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
326 pbuffer + cb->pattern_position);
327
328 fprintf(outfile, "\n");
329 first_callout = 0;
330
331 if (cb->callout_data != NULL)
332 {
333 int callout_data = *((int *)(cb->callout_data));
334 if (callout_data != 0)
335 {
336 fprintf(outfile, "Callout data = %d\n", callout_data);
337 return callout_data;
338 }
339 }
340
341 return (cb->callout_number != callout_fail_id)? 0 :
342 (++callout_count >= callout_fail_count)? 1 : 0;
343 }
344
345
346 /*************************************************
347 * Local malloc functions *
348 *************************************************/
349
350 /* Alternative malloc function, to test functionality and show the size of the
351 compiled re. */
352
353 static void *new_malloc(size_t size)
354 {
355 void *block = malloc(size);
356 gotten_store = size;
357 if (show_malloc)
358 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
359 return block;
360 }
361
362 static void new_free(void *block)
363 {
364 if (show_malloc)
365 fprintf(outfile, "free %p\n", block);
366 free(block);
367 }
368
369
370 /* For recursion malloc/free, to test stacking calls */
371
372 static void *stack_malloc(size_t size)
373 {
374 void *block = malloc(size);
375 if (show_malloc)
376 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
377 return block;
378 }
379
380 static void stack_free(void *block)
381 {
382 if (show_malloc)
383 fprintf(outfile, "stack_free %p\n", block);
384 free(block);
385 }
386
387
388 /*************************************************
389 * Call pcre_fullinfo() *
390 *************************************************/
391
392 /* Get one piece of information from the pcre_fullinfo() function */
393
394 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
395 {
396 int rc;
397 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
398 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
399 }
400
401
402
403 /*************************************************
404 * Byte flipping function *
405 *************************************************/
406
407 static long int
408 byteflip(long int value, int n)
409 {
410 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
411 return ((value & 0x000000ff) << 24) |
412 ((value & 0x0000ff00) << 8) |
413 ((value & 0x00ff0000) >> 8) |
414 ((value & 0xff000000) >> 24);
415 }
416
417
418
419
420 /*************************************************
421 * Main Program *
422 *************************************************/
423
424 /* Read lines from named file or stdin and write to named file or stdout; lines
425 consist of a regular expression, in delimiters and optionally followed by
426 options, followed by a set of test data, terminated by an empty line. */
427
428 int main(int argc, char **argv)
429 {
430 FILE *infile = stdin;
431 int options = 0;
432 int study_options = 0;
433 int op = 1;
434 int timeit = 0;
435 int showinfo = 0;
436 int showstore = 0;
437 int size_offsets = 45;
438 int size_offsets_max;
439 int *offsets = NULL;
440 #if !defined NOPOSIX
441 int posix = 0;
442 #endif
443 int debug = 0;
444 int done = 0;
445 int all_use_dfa = 0;
446 int yield = 0;
447
448 unsigned char *buffer;
449 unsigned char *dbuffer;
450
451 /* Get buffers from malloc() so that Electric Fence will check their misuse
452 when I am debugging. */
453
454 buffer = (unsigned char *)malloc(BUFFER_SIZE);
455 dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
456 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
457
458 /* The outfile variable is static so that new_malloc can use it. The _setmode()
459 stuff is some magic that I don't understand, but which apparently does good
460 things in Windows. It's related to line terminations. */
461
462 #if defined(_WIN32) || defined(WIN32)
463 _setmode( _fileno( stdout ), 0x8000 );
464 #endif /* defined(_WIN32) || defined(WIN32) */
465
466 outfile = stdout;
467
468 /* Scan options */
469
470 while (argc > 1 && argv[op][0] == '-')
471 {
472 unsigned char *endptr;
473
474 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
475 showstore = 1;
476 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
477 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
478 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
479 #if !defined NODFA
480 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
481 #endif
482 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
483 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
484 *endptr == 0))
485 {
486 op++;
487 argc--;
488 }
489 #if !defined NOPOSIX
490 else if (strcmp(argv[op], "-p") == 0) posix = 1;
491 #endif
492 else if (strcmp(argv[op], "-C") == 0)
493 {
494 int rc;
495 printf("PCRE version %s\n", pcre_version());
496 printf("Compiled with\n");
497 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
498 printf(" %sUTF-8 support\n", rc? "" : "No ");
499 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
500 printf(" %sUnicode properties support\n", rc? "" : "No ");
501 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
502 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
503 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
504 printf(" Internal link size = %d\n", rc);
505 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
506 printf(" POSIX malloc threshold = %d\n", rc);
507 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
508 printf(" Default match limit = %d\n", rc);
509 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
510 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
511 exit(0);
512 }
513 else
514 {
515 printf("** Unknown or malformed option %s\n", argv[op]);
516 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
517 printf(" -C show PCRE compile-time options and exit\n");
518 printf(" -d debug: show compiled code; implies -i\n");
519 #if !defined NODFA
520 printf(" -dfa force DFA matching for all subjects\n");
521 #endif
522 printf(" -i show information about compiled pattern\n"
523 " -m output memory used information\n"
524 " -o <n> set size of offsets vector to <n>\n");
525 #if !defined NOPOSIX
526 printf(" -p use POSIX interface\n");
527 #endif
528 printf(" -s output store (memory) used information\n"
529 " -t time compilation and execution\n");
530 yield = 1;
531 goto EXIT;
532 }
533 op++;
534 argc--;
535 }
536
537 /* Get the store for the offsets vector, and remember what it was */
538
539 size_offsets_max = size_offsets;
540 offsets = (int *)malloc(size_offsets_max * sizeof(int));
541 if (offsets == NULL)
542 {
543 printf("** Failed to get %d bytes of memory for offsets vector\n",
544 size_offsets_max * sizeof(int));
545 yield = 1;
546 goto EXIT;
547 }
548
549 /* Sort out the input and output files */
550
551 if (argc > 1)
552 {
553 infile = fopen(argv[op], "rb");
554 if (infile == NULL)
555 {
556 printf("** Failed to open %s\n", argv[op]);
557 yield = 1;
558 goto EXIT;
559 }
560 }
561
562 if (argc > 2)
563 {
564 outfile = fopen(argv[op+1], "wb");
565 if (outfile == NULL)
566 {
567 printf("** Failed to open %s\n", argv[op+1]);
568 yield = 1;
569 goto EXIT;
570 }
571 }
572
573 /* Set alternative malloc function */
574
575 pcre_malloc = new_malloc;
576 pcre_free = new_free;
577 pcre_stack_malloc = stack_malloc;
578 pcre_stack_free = stack_free;
579
580 /* Heading line, then prompt for first regex if stdin */
581
582 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
583
584 /* Main loop */
585
586 while (!done)
587 {
588 pcre *re = NULL;
589 pcre_extra *extra = NULL;
590
591 #if !defined NOPOSIX /* There are still compilers that require no indent */
592 regex_t preg;
593 int do_posix = 0;
594 #endif
595
596 const char *error;
597 unsigned char *p, *pp, *ppp;
598 unsigned char *to_file = NULL;
599 const unsigned char *tables = NULL;
600 unsigned long int true_size, true_study_size = 0;
601 size_t size, regex_gotten_store;
602 int do_study = 0;
603 int do_debug = debug;
604 int do_G = 0;
605 int do_g = 0;
606 int do_showinfo = showinfo;
607 int do_showrest = 0;
608 int do_flip = 0;
609 int erroroffset, len, delimiter;
610
611 use_utf8 = 0;
612
613 if (infile == stdin) printf(" re> ");
614 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
615 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
616 fflush(outfile);
617
618 p = buffer;
619 while (isspace(*p)) p++;
620 if (*p == 0) continue;
621
622 /* See if the pattern is to be loaded pre-compiled from a file. */
623
624 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
625 {
626 unsigned long int magic;
627 uschar sbuf[8];
628 FILE *f;
629
630 p++;
631 pp = p + (int)strlen((char *)p);
632 while (isspace(pp[-1])) pp--;
633 *pp = 0;
634
635 f = fopen((char *)p, "rb");
636 if (f == NULL)
637 {
638 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
639 continue;
640 }
641
642 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
643
644 true_size =
645 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
646 true_study_size =
647 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
648
649 re = (real_pcre *)new_malloc(true_size);
650 regex_gotten_store = gotten_store;
651
652 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
653
654 magic = ((real_pcre *)re)->magic_number;
655 if (magic != MAGIC_NUMBER)
656 {
657 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
658 {
659 do_flip = 1;
660 }
661 else
662 {
663 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
664 fclose(f);
665 continue;
666 }
667 }
668
669 fprintf(outfile, "Compiled regex%s loaded from %s\n",
670 do_flip? " (byte-inverted)" : "", p);
671
672 /* Need to know if UTF-8 for printing data strings */
673
674 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
675 use_utf8 = (options & PCRE_UTF8) != 0;
676
677 /* Now see if there is any following study data */
678
679 if (true_study_size != 0)
680 {
681 pcre_study_data *psd;
682
683 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
684 extra->flags = PCRE_EXTRA_STUDY_DATA;
685
686 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
687 extra->study_data = psd;
688
689 if (fread(psd, 1, true_study_size, f) != true_study_size)
690 {
691 FAIL_READ:
692 fprintf(outfile, "Failed to read data from %s\n", p);
693 if (extra != NULL) new_free(extra);
694 if (re != NULL) new_free(re);
695 fclose(f);
696 continue;
697 }
698 fprintf(outfile, "Study data loaded from %s\n", p);
699 do_study = 1; /* To get the data output if requested */
700 }
701 else fprintf(outfile, "No study data\n");
702
703 fclose(f);
704 goto SHOW_INFO;
705 }
706
707 /* In-line pattern (the usual case). Get the delimiter and seek the end of
708 the pattern; if is isn't complete, read more. */
709
710 delimiter = *p++;
711
712 if (isalnum(delimiter) || delimiter == '\\')
713 {
714 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
715 goto SKIP_DATA;
716 }
717
718 pp = p;
719
720 for(;;)
721 {
722 while (*pp != 0)
723 {
724 if (*pp == '\\' && pp[1] != 0) pp++;
725 else if (*pp == delimiter) break;
726 pp++;
727 }
728 if (*pp != 0) break;
729
730 len = BUFFER_SIZE - (pp - buffer);
731 if (len < 256)
732 {
733 fprintf(outfile, "** Expression too long - missing delimiter?\n");
734 goto SKIP_DATA;
735 }
736
737 if (infile == stdin) printf(" > ");
738 if (fgets((char *)pp, len, infile) == NULL)
739 {
740 fprintf(outfile, "** Unexpected EOF\n");
741 done = 1;
742 goto CONTINUE;
743 }
744 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
745 }
746
747 /* If the first character after the delimiter is backslash, make
748 the pattern end with backslash. This is purely to provide a way
749 of testing for the error message when a pattern ends with backslash. */
750
751 if (pp[1] == '\\') *pp++ = '\\';
752
753 /* Terminate the pattern at the delimiter, and save a copy of the pattern
754 for callouts. */
755
756 *pp++ = 0;
757 strcpy((char *)pbuffer, (char *)p);
758
759 /* Look for options after final delimiter */
760
761 options = 0;
762 study_options = 0;
763 log_store = showstore; /* default from command line */
764
765 while (*pp != 0)
766 {
767 switch (*pp++)
768 {
769 case 'f': options |= PCRE_FIRSTLINE; break;
770 case 'g': do_g = 1; break;
771 case 'i': options |= PCRE_CASELESS; break;
772 case 'm': options |= PCRE_MULTILINE; break;
773 case 's': options |= PCRE_DOTALL; break;
774 case 'x': options |= PCRE_EXTENDED; break;
775
776 case '+': do_showrest = 1; break;
777 case 'A': options |= PCRE_ANCHORED; break;
778 case 'C': options |= PCRE_AUTO_CALLOUT; break;
779 case 'D': do_debug = do_showinfo = 1; break;
780 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
781 case 'F': do_flip = 1; break;
782 case 'G': do_G = 1; break;
783 case 'I': do_showinfo = 1; break;
784 case 'M': log_store = 1; break;
785 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
786
787 #if !defined NOPOSIX
788 case 'P': do_posix = 1; break;
789 #endif
790
791 case 'S': do_study = 1; break;
792 case 'U': options |= PCRE_UNGREEDY; break;
793 case 'X': options |= PCRE_EXTRA; break;
794 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
795 case '?': options |= PCRE_NO_UTF8_CHECK; break;
796
797 case 'L':
798 ppp = pp;
799 /* The '\r' test here is so that it works on Windows */
800 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
801 *ppp = 0;
802 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
803 {
804 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
805 goto SKIP_DATA;
806 }
807 tables = pcre_maketables();
808 pp = ppp;
809 break;
810
811 case '>':
812 to_file = pp;
813 while (*pp != 0) pp++;
814 while (isspace(pp[-1])) pp--;
815 *pp = 0;
816 break;
817
818 case '\r': /* So that it works in Windows */
819 case '\n':
820 case ' ':
821 break;
822
823 default:
824 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
825 goto SKIP_DATA;
826 }
827 }
828
829 /* Handle compiling via the POSIX interface, which doesn't support the
830 timing, showing, or debugging options, nor the ability to pass over
831 local character tables. */
832
833 #if !defined NOPOSIX
834 if (posix || do_posix)
835 {
836 int rc;
837 int cflags = 0;
838
839 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
840 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
841 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
842 rc = regcomp(&preg, (char *)p, cflags);
843
844 /* Compilation failed; go back for another re, skipping to blank line
845 if non-interactive. */
846
847 if (rc != 0)
848 {
849 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
850 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
851 goto SKIP_DATA;
852 }
853 }
854
855 /* Handle compiling via the native interface */
856
857 else
858 #endif /* !defined NOPOSIX */
859
860 {
861 if (timeit)
862 {
863 register int i;
864 clock_t time_taken;
865 clock_t start_time = clock();
866 for (i = 0; i < LOOPREPEAT; i++)
867 {
868 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
869 if (re != NULL) free(re);
870 }
871 time_taken = clock() - start_time;
872 fprintf(outfile, "Compile time %.3f milliseconds\n",
873 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
874 (double)CLOCKS_PER_SEC);
875 }
876
877 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
878
879 /* Compilation failed; go back for another re, skipping to blank line
880 if non-interactive. */
881
882 if (re == NULL)
883 {
884 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
885 SKIP_DATA:
886 if (infile != stdin)
887 {
888 for (;;)
889 {
890 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
891 {
892 done = 1;
893 goto CONTINUE;
894 }
895 len = (int)strlen((char *)buffer);
896 while (len > 0 && isspace(buffer[len-1])) len--;
897 if (len == 0) break;
898 }
899 fprintf(outfile, "\n");
900 }
901 goto CONTINUE;
902 }
903
904 /* Compilation succeeded; print data if required. There are now two
905 info-returning functions. The old one has a limited interface and
906 returns only limited data. Check that it agrees with the newer one. */
907
908 if (log_store)
909 fprintf(outfile, "Memory allocation (code space): %d\n",
910 (int)(gotten_store -
911 sizeof(real_pcre) -
912 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
913
914 /* Extract the size for possible writing before possibly flipping it,
915 and remember the store that was got. */
916
917 true_size = ((real_pcre *)re)->size;
918 regex_gotten_store = gotten_store;
919
920 /* If /S was present, study the regexp to generate additional info to
921 help with the matching. */
922
923 if (do_study)
924 {
925 if (timeit)
926 {
927 register int i;
928 clock_t time_taken;
929 clock_t start_time = clock();
930 for (i = 0; i < LOOPREPEAT; i++)
931 extra = pcre_study(re, study_options, &error);
932 time_taken = clock() - start_time;
933 if (extra != NULL) free(extra);
934 fprintf(outfile, " Study time %.3f milliseconds\n",
935 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
936 (double)CLOCKS_PER_SEC);
937 }
938 extra = pcre_study(re, study_options, &error);
939 if (error != NULL)
940 fprintf(outfile, "Failed to study: %s\n", error);
941 else if (extra != NULL)
942 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
943 }
944
945 /* If the 'F' option was present, we flip the bytes of all the integer
946 fields in the regex data block and the study block. This is to make it
947 possible to test PCRE's handling of byte-flipped patterns, e.g. those
948 compiled on a different architecture. */
949
950 if (do_flip)
951 {
952 real_pcre *rre = (real_pcre *)re;
953 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
954 rre->size = byteflip(rre->size, sizeof(rre->size));
955 rre->options = byteflip(rre->options, sizeof(rre->options));
956 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
957 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
958 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
959 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
960 rre->name_table_offset = byteflip(rre->name_table_offset,
961 sizeof(rre->name_table_offset));
962 rre->name_entry_size = byteflip(rre->name_entry_size,
963 sizeof(rre->name_entry_size));
964 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
965
966 if (extra != NULL)
967 {
968 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
969 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
970 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
971 }
972 }
973
974 /* Extract information from the compiled data if required */
975
976 SHOW_INFO:
977
978 if (do_showinfo)
979 {
980 unsigned long int get_options, all_options;
981 #if !defined NOINFOCHECK
982 int old_first_char, old_options, old_count;
983 #endif
984 int count, backrefmax, first_char, need_char;
985 int nameentrysize, namecount;
986 const uschar *nametable;
987
988 if (do_debug)
989 {
990 fprintf(outfile, "------------------------------------------------------------------\n");
991 _pcre_printint(re, outfile);
992 }
993
994 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
995 new_info(re, NULL, PCRE_INFO_SIZE, &size);
996 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
997 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
998 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
999 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1000 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1001 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1002 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1003
1004 #if !defined NOINFOCHECK
1005 old_count = pcre_info(re, &old_options, &old_first_char);
1006 if (count < 0) fprintf(outfile,
1007 "Error %d from pcre_info()\n", count);
1008 else
1009 {
1010 if (old_count != count) fprintf(outfile,
1011 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1012 old_count);
1013
1014 if (old_first_char != first_char) fprintf(outfile,
1015 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1016 first_char, old_first_char);
1017
1018 if (old_options != (int)get_options) fprintf(outfile,
1019 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1020 get_options, old_options);
1021 }
1022 #endif
1023
1024 if (size != regex_gotten_store) fprintf(outfile,
1025 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1026 (int)size, (int)regex_gotten_store);
1027
1028 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1029 if (backrefmax > 0)
1030 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1031
1032 if (namecount > 0)
1033 {
1034 fprintf(outfile, "Named capturing subpatterns:\n");
1035 while (namecount-- > 0)
1036 {
1037 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1038 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1039 GET2(nametable, 0));
1040 nametable += nameentrysize;
1041 }
1042 }
1043
1044 /* The NOPARTIAL bit is a private bit in the options, so we have
1045 to fish it out via out back door */
1046
1047 all_options = ((real_pcre *)re)->options;
1048 if (do_flip)
1049 {
1050 all_options = byteflip(all_options, sizeof(all_options));
1051 }
1052
1053 if ((all_options & PCRE_NOPARTIAL) != 0)
1054 fprintf(outfile, "Partial matching not supported\n");
1055
1056 if (get_options == 0) fprintf(outfile, "No options\n");
1057 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1058 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1059 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1060 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1061 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1062 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1063 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1064 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1065 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1066 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1067 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1068 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1069
1070 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1071 fprintf(outfile, "Case state changes\n");
1072
1073 if (first_char == -1)
1074 {
1075 fprintf(outfile, "First char at start or follows \\n\n");
1076 }
1077 else if (first_char < 0)
1078 {
1079 fprintf(outfile, "No first char\n");
1080 }
1081 else
1082 {
1083 int ch = first_char & 255;
1084 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1085 "" : " (caseless)";
1086 if (isprint(ch))
1087 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1088 else
1089 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1090 }
1091
1092 if (need_char < 0)
1093 {
1094 fprintf(outfile, "No need char\n");
1095 }
1096 else
1097 {
1098 int ch = need_char & 255;
1099 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1100 "" : " (caseless)";
1101 if (isprint(ch))
1102 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1103 else
1104 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1105 }
1106
1107 /* Don't output study size; at present it is in any case a fixed
1108 value, but it varies, depending on the computer architecture, and
1109 so messes up the test suite. (And with the /F option, it might be
1110 flipped.) */
1111
1112 if (do_study)
1113 {
1114 if (extra == NULL)
1115 fprintf(outfile, "Study returned NULL\n");
1116 else
1117 {
1118 uschar *start_bits = NULL;
1119 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1120
1121 if (start_bits == NULL)
1122 fprintf(outfile, "No starting byte set\n");
1123 else
1124 {
1125 int i;
1126 int c = 24;
1127 fprintf(outfile, "Starting byte set: ");
1128 for (i = 0; i < 256; i++)
1129 {
1130 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1131 {
1132 if (c > 75)
1133 {
1134 fprintf(outfile, "\n ");
1135 c = 2;
1136 }
1137 if (isprint(i) && i != ' ')
1138 {
1139 fprintf(outfile, "%c ", i);
1140 c += 2;
1141 }
1142 else
1143 {
1144 fprintf(outfile, "\\x%02x ", i);
1145 c += 5;
1146 }
1147 }
1148 }
1149 fprintf(outfile, "\n");
1150 }
1151 }
1152 }
1153 }
1154
1155 /* If the '>' option was present, we write out the regex to a file, and
1156 that is all. The first 8 bytes of the file are the regex length and then
1157 the study length, in big-endian order. */
1158
1159 if (to_file != NULL)
1160 {
1161 FILE *f = fopen((char *)to_file, "wb");
1162 if (f == NULL)
1163 {
1164 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1165 }
1166 else
1167 {
1168 uschar sbuf[8];
1169 sbuf[0] = (true_size >> 24) & 255;
1170 sbuf[1] = (true_size >> 16) & 255;
1171 sbuf[2] = (true_size >> 8) & 255;
1172 sbuf[3] = (true_size) & 255;
1173
1174 sbuf[4] = (true_study_size >> 24) & 255;
1175 sbuf[5] = (true_study_size >> 16) & 255;
1176 sbuf[6] = (true_study_size >> 8) & 255;
1177 sbuf[7] = (true_study_size) & 255;
1178
1179 if (fwrite(sbuf, 1, 8, f) < 8 ||
1180 fwrite(re, 1, true_size, f) < true_size)
1181 {
1182 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1183 }
1184 else
1185 {
1186 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1187 if (extra != NULL)
1188 {
1189 if (fwrite(extra->study_data, 1, true_study_size, f) <
1190 true_study_size)
1191 {
1192 fprintf(outfile, "Write error on %s: %s\n", to_file,
1193 strerror(errno));
1194 }
1195 else fprintf(outfile, "Study data written to %s\n", to_file);
1196 }
1197 }
1198 fclose(f);
1199 }
1200
1201 new_free(re);
1202 if (extra != NULL) new_free(extra);
1203 if (tables != NULL) new_free((void *)tables);
1204 continue; /* With next regex */
1205 }
1206 } /* End of non-POSIX compile */
1207
1208 /* Read data lines and test them */
1209
1210 for (;;)
1211 {
1212 unsigned char *q;
1213 unsigned char *bptr = dbuffer;
1214 int *use_offsets = offsets;
1215 int use_size_offsets = size_offsets;
1216 int callout_data = 0;
1217 int callout_data_set = 0;
1218 int count, c;
1219 int copystrings = 0;
1220 int find_match_limit = 0;
1221 int getstrings = 0;
1222 int getlist = 0;
1223 int gmatched = 0;
1224 int start_offset = 0;
1225 int g_notempty = 0;
1226 int use_dfa = 0;
1227
1228 options = 0;
1229
1230 pcre_callout = callout;
1231 first_callout = 1;
1232 callout_extra = 0;
1233 callout_count = 0;
1234 callout_fail_count = 999999;
1235 callout_fail_id = -1;
1236 show_malloc = 0;
1237
1238 if (infile == stdin) printf("data> ");
1239 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1240 {
1241 done = 1;
1242 goto CONTINUE;
1243 }
1244 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1245
1246 len = (int)strlen((char *)buffer);
1247 while (len > 0 && isspace(buffer[len-1])) len--;
1248 buffer[len] = 0;
1249 if (len == 0) break;
1250
1251 p = buffer;
1252 while (isspace(*p)) p++;
1253
1254 q = dbuffer;
1255 while ((c = *p++) != 0)
1256 {
1257 int i = 0;
1258 int n = 0;
1259
1260 if (c == '\\') switch ((c = *p++))
1261 {
1262 case 'a': c = 7; break;
1263 case 'b': c = '\b'; break;
1264 case 'e': c = 27; break;
1265 case 'f': c = '\f'; break;
1266 case 'n': c = '\n'; break;
1267 case 'r': c = '\r'; break;
1268 case 't': c = '\t'; break;
1269 case 'v': c = '\v'; break;
1270
1271 case '0': case '1': case '2': case '3':
1272 case '4': case '5': case '6': case '7':
1273 c -= '0';
1274 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1275 c = c * 8 + *p++ - '0';
1276 break;
1277
1278 case 'x':
1279
1280 /* Handle \x{..} specially - new Perl thing for utf8 */
1281
1282 #if !defined NOUTF8
1283 if (*p == '{')
1284 {
1285 unsigned char *pt = p;
1286 c = 0;
1287 while (isxdigit(*(++pt)))
1288 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1289 if (*pt == '}')
1290 {
1291 unsigned char buff8[8];
1292 int ii, utn;
1293 utn = _pcre_ord2utf8(c, buff8);
1294 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1295 c = buff8[ii]; /* Last byte */
1296 p = pt + 1;
1297 break;
1298 }
1299 /* Not correct form; fall through */
1300 }
1301 #endif
1302
1303 /* Ordinary \x */
1304
1305 c = 0;
1306 while (i++ < 2 && isxdigit(*p))
1307 {
1308 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1309 p++;
1310 }
1311 break;
1312
1313 case 0: /* \ followed by EOF allows for an empty line */
1314 p--;
1315 continue;
1316
1317 case '>':
1318 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1319 continue;
1320
1321 case 'A': /* Option setting */
1322 options |= PCRE_ANCHORED;
1323 continue;
1324
1325 case 'B':
1326 options |= PCRE_NOTBOL;
1327 continue;
1328
1329 case 'C':
1330 if (isdigit(*p)) /* Set copy string */
1331 {
1332 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1333 copystrings |= 1 << n;
1334 }
1335 else if (isalnum(*p))
1336 {
1337 uschar name[256];
1338 uschar *npp = name;
1339 while (isalnum(*p)) *npp++ = *p++;
1340 *npp = 0;
1341 n = pcre_get_stringnumber(re, (char *)name);
1342 if (n < 0)
1343 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1344 else copystrings |= 1 << n;
1345 }
1346 else if (*p == '+')
1347 {
1348 callout_extra = 1;
1349 p++;
1350 }
1351 else if (*p == '-')
1352 {
1353 pcre_callout = NULL;
1354 p++;
1355 }
1356 else if (*p == '!')
1357 {
1358 callout_fail_id = 0;
1359 p++;
1360 while(isdigit(*p))
1361 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1362 callout_fail_count = 0;
1363 if (*p == '!')
1364 {
1365 p++;
1366 while(isdigit(*p))
1367 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1368 }
1369 }
1370 else if (*p == '*')
1371 {
1372 int sign = 1;
1373 callout_data = 0;
1374 if (*(++p) == '-') { sign = -1; p++; }
1375 while(isdigit(*p))
1376 callout_data = callout_data * 10 + *p++ - '0';
1377 callout_data *= sign;
1378 callout_data_set = 1;
1379 }
1380 continue;
1381
1382 #if !defined NODFA
1383 case 'D':
1384 #if !defined NOPOSIX
1385 if (posix || do_posix)
1386 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1387 else
1388 #endif
1389 use_dfa = 1;
1390 continue;
1391
1392 case 'F':
1393 options |= PCRE_DFA_SHORTEST;
1394 continue;
1395 #endif
1396
1397 case 'G':
1398 if (isdigit(*p))
1399 {
1400 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1401 getstrings |= 1 << n;
1402 }
1403 else if (isalnum(*p))
1404 {
1405 uschar name[256];
1406 uschar *npp = name;
1407 while (isalnum(*p)) *npp++ = *p++;
1408 *npp = 0;
1409 n = pcre_get_stringnumber(re, (char *)name);
1410 if (n < 0)
1411 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1412 else getstrings |= 1 << n;
1413 }
1414 continue;
1415
1416 case 'L':
1417 getlist = 1;
1418 continue;
1419
1420 case 'M':
1421 find_match_limit = 1;
1422 continue;
1423
1424 case 'N':
1425 options |= PCRE_NOTEMPTY;
1426 continue;
1427
1428 case 'O':
1429 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1430 if (n > size_offsets_max)
1431 {
1432 size_offsets_max = n;
1433 free(offsets);
1434 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1435 if (offsets == NULL)
1436 {
1437 printf("** Failed to get %d bytes of memory for offsets vector\n",
1438 size_offsets_max * sizeof(int));
1439 yield = 1;
1440 goto EXIT;
1441 }
1442 }
1443 use_size_offsets = n;
1444 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1445 continue;
1446
1447 case 'P':
1448 options |= PCRE_PARTIAL;
1449 continue;
1450
1451 #if !defined NODFA
1452 case 'R':
1453 options |= PCRE_DFA_RESTART;
1454 continue;
1455 #endif
1456
1457 case 'S':
1458 show_malloc = 1;
1459 continue;
1460
1461 case 'Z':
1462 options |= PCRE_NOTEOL;
1463 continue;
1464
1465 case '?':
1466 options |= PCRE_NO_UTF8_CHECK;
1467 continue;
1468 }
1469 *q++ = c;
1470 }
1471 *q = 0;
1472 len = q - dbuffer;
1473
1474 if ((all_use_dfa || use_dfa) && find_match_limit)
1475 {
1476 printf("**Match limit not relevant for DFA matching: ignored\n");
1477 find_match_limit = 0;
1478 }
1479
1480 /* Handle matching via the POSIX interface, which does not
1481 support timing or playing with the match limit or callout data. */
1482
1483 #if !defined NOPOSIX
1484 if (posix || do_posix)
1485 {
1486 int rc;
1487 int eflags = 0;
1488 regmatch_t *pmatch = NULL;
1489 if (use_size_offsets > 0)
1490 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1491 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1492 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1493
1494 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1495
1496 if (rc != 0)
1497 {
1498 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1499 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1500 }
1501 else
1502 {
1503 size_t i;
1504 for (i = 0; i < (size_t)use_size_offsets; i++)
1505 {
1506 if (pmatch[i].rm_so >= 0)
1507 {
1508 fprintf(outfile, "%2d: ", (int)i);
1509 (void)pchars(dbuffer + pmatch[i].rm_so,
1510 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1511 fprintf(outfile, "\n");
1512 if (i == 0 && do_showrest)
1513 {
1514 fprintf(outfile, " 0+ ");
1515 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1516 outfile);
1517 fprintf(outfile, "\n");
1518 }
1519 }
1520 }
1521 }
1522 free(pmatch);
1523 }
1524
1525 /* Handle matching via the native interface - repeats for /g and /G */
1526
1527 else
1528 #endif /* !defined NOPOSIX */
1529
1530 for (;; gmatched++) /* Loop for /g or /G */
1531 {
1532 if (timeit)
1533 {
1534 register int i;
1535 clock_t time_taken;
1536 clock_t start_time = clock();
1537
1538 #if !defined NODFA
1539 if (all_use_dfa || use_dfa)
1540 {
1541 int workspace[1000];
1542 for (i = 0; i < LOOPREPEAT; i++)
1543 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1544 options | g_notempty, use_offsets, use_size_offsets, workspace,
1545 sizeof(workspace)/sizeof(int));
1546 }
1547 else
1548 #endif
1549
1550 for (i = 0; i < LOOPREPEAT; i++)
1551 count = pcre_exec(re, extra, (char *)bptr, len,
1552 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1553
1554 time_taken = clock() - start_time;
1555 fprintf(outfile, "Execute time %.3f milliseconds\n",
1556 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1557 (double)CLOCKS_PER_SEC);
1558 }
1559
1560 /* If find_match_limit is set, we want to do repeated matches with
1561 varying limits in order to find the minimum value. */
1562
1563 if (find_match_limit)
1564 {
1565 int min = 0;
1566 int mid = 64;
1567 int max = -1;
1568
1569 if (extra == NULL)
1570 {
1571 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1572 extra->flags = 0;
1573 }
1574 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1575
1576 for (;;)
1577 {
1578 extra->match_limit = mid;
1579 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1580 options | g_notempty, use_offsets, use_size_offsets);
1581 if (count == PCRE_ERROR_MATCHLIMIT)
1582 {
1583 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1584 min = mid;
1585 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1586 }
1587 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1588 count == PCRE_ERROR_PARTIAL)
1589 {
1590 if (mid == min + 1)
1591 {
1592 fprintf(outfile, "Minimum match limit = %d\n", mid);
1593 break;
1594 }
1595 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1596 max = mid;
1597 mid = (min + mid)/2;
1598 }
1599 else break; /* Some other error */
1600 }
1601
1602 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1603 }
1604
1605 /* If callout_data is set, use the interface with additional data */
1606
1607 else if (callout_data_set)
1608 {
1609 if (extra == NULL)
1610 {
1611 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1612 extra->flags = 0;
1613 }
1614 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1615 extra->callout_data = &callout_data;
1616 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1617 options | g_notempty, use_offsets, use_size_offsets);
1618 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1619 }
1620
1621 /* The normal case is just to do the match once, with the default
1622 value of match_limit. */
1623
1624 #if !defined NODFA
1625 else if (all_use_dfa || use_dfa)
1626 {
1627 int workspace[1000];
1628 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1629 options | g_notempty, use_offsets, use_size_offsets, workspace,
1630 sizeof(workspace)/sizeof(int));
1631 if (count == 0)
1632 {
1633 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1634 count = use_size_offsets/2;
1635 }
1636 }
1637 #endif
1638
1639 else
1640 {
1641 count = pcre_exec(re, extra, (char *)bptr, len,
1642 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1643 if (count == 0)
1644 {
1645 fprintf(outfile, "Matched, but too many substrings\n");
1646 count = use_size_offsets/3;
1647 }
1648 }
1649
1650 /* Matched */
1651
1652 if (count >= 0)
1653 {
1654 int i;
1655 for (i = 0; i < count * 2; i += 2)
1656 {
1657 if (use_offsets[i] < 0)
1658 fprintf(outfile, "%2d: <unset>\n", i/2);
1659 else
1660 {
1661 fprintf(outfile, "%2d: ", i/2);
1662 (void)pchars(bptr + use_offsets[i],
1663 use_offsets[i+1] - use_offsets[i], outfile);
1664 fprintf(outfile, "\n");
1665 if (i == 0)
1666 {
1667 if (do_showrest)
1668 {
1669 fprintf(outfile, " 0+ ");
1670 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1671 outfile);
1672 fprintf(outfile, "\n");
1673 }
1674 }
1675 }
1676 }
1677
1678 for (i = 0; i < 32; i++)
1679 {
1680 if ((copystrings & (1 << i)) != 0)
1681 {
1682 char copybuffer[16];
1683 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1684 i, copybuffer, sizeof(copybuffer));
1685 if (rc < 0)
1686 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1687 else
1688 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1689 }
1690 }
1691
1692 for (i = 0; i < 32; i++)
1693 {
1694 if ((getstrings & (1 << i)) != 0)
1695 {
1696 const char *substring;
1697 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1698 i, &substring);
1699 if (rc < 0)
1700 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1701 else
1702 {
1703 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1704 /* free((void *)substring); */
1705 pcre_free_substring(substring);
1706 }
1707 }
1708 }
1709
1710 if (getlist)
1711 {
1712 const char **stringlist;
1713 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1714 &stringlist);
1715 if (rc < 0)
1716 fprintf(outfile, "get substring list failed %d\n", rc);
1717 else
1718 {
1719 for (i = 0; i < count; i++)
1720 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1721 if (stringlist[i] != NULL)
1722 fprintf(outfile, "string list not terminated by NULL\n");
1723 /* free((void *)stringlist); */
1724 pcre_free_substring_list(stringlist);
1725 }
1726 }
1727 }
1728
1729 /* There was a partial match */
1730
1731 else if (count == PCRE_ERROR_PARTIAL)
1732 {
1733 fprintf(outfile, "Partial match");
1734 #if !defined NODFA
1735 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1736 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1737 bptr + use_offsets[0]);
1738 #endif
1739 fprintf(outfile, "\n");
1740 break; /* Out of the /g loop */
1741 }
1742
1743 /* Failed to match. If this is a /g or /G loop and we previously set
1744 g_notempty after a null match, this is not necessarily the end.
1745 We want to advance the start offset, and continue. In the case of UTF-8
1746 matching, the advance must be one character, not one byte. Fudge the
1747 offset values to achieve this. We won't be at the end of the string -
1748 that was checked before setting g_notempty. */
1749
1750 else
1751 {
1752 if (g_notempty != 0)
1753 {
1754 int onechar = 1;
1755 use_offsets[0] = start_offset;
1756 if (use_utf8)
1757 {
1758 while (start_offset + onechar < len)
1759 {
1760 int tb = bptr[start_offset+onechar];
1761 if (tb <= 127) break;
1762 tb &= 0xc0;
1763 if (tb != 0 && tb != 0xc0) onechar++;
1764 }
1765 }
1766 use_offsets[1] = start_offset + onechar;
1767 }
1768 else
1769 {
1770 if (count == PCRE_ERROR_NOMATCH)
1771 {
1772 if (gmatched == 0) fprintf(outfile, "No match\n");
1773 }
1774 else fprintf(outfile, "Error %d\n", count);
1775 break; /* Out of the /g loop */
1776 }
1777 }
1778
1779 /* If not /g or /G we are done */
1780
1781 if (!do_g && !do_G) break;
1782
1783 /* If we have matched an empty string, first check to see if we are at
1784 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1785 what Perl's /g options does. This turns out to be rather cunning. First
1786 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1787 same point. If this fails (picked up above) we advance to the next
1788 character. */
1789
1790 g_notempty = 0;
1791 if (use_offsets[0] == use_offsets[1])
1792 {
1793 if (use_offsets[0] == len) break;
1794 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1795 }
1796
1797 /* For /g, update the start offset, leaving the rest alone */
1798
1799 if (do_g) start_offset = use_offsets[1];
1800
1801 /* For /G, update the pointer and length */
1802
1803 else
1804 {
1805 bptr += use_offsets[1];
1806 len -= use_offsets[1];
1807 }
1808 } /* End of loop for /g and /G */
1809 } /* End of loop for data lines */
1810
1811 CONTINUE:
1812
1813 #if !defined NOPOSIX
1814 if (posix || do_posix) regfree(&preg);
1815 #endif
1816
1817 if (re != NULL) new_free(re);
1818 if (extra != NULL) new_free(extra);
1819 if (tables != NULL)
1820 {
1821 new_free((void *)tables);
1822 setlocale(LC_CTYPE, "C");
1823 }
1824 }
1825
1826 if (infile == stdin) fprintf(outfile, "\n");
1827
1828 EXIT:
1829
1830 if (infile != NULL && infile != stdin) fclose(infile);
1831 if (outfile != NULL && outfile != stdout) fclose(outfile);
1832
1833 free(buffer);
1834 free(dbuffer);
1835 free(pbuffer);
1836 free(offsets);
1837
1838 return yield;
1839 }
1840
1841 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12