/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (show annotations) (download)
Sat Feb 24 21:40:45 2007 UTC (7 years, 8 months ago) by nigel
File MIME type: text/plain
File size: 52292 byte(s)
Load pcre-6.0 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47 #define PCRE_SPY /* For Win32 build, import data, not export */
48
49 /* We need the internal info for displaying the results of pcre_study() and
50 other internal data; pcretest also uses some of the fixed tables, and generally
51 has "inside information" compared to a program that strictly follows the PCRE
52 API. */
53
54 #include "pcre_internal.h"
55
56
57 /* It is possible to compile this test program without including support for
58 testing the POSIX interface, though this is not available via the standard
59 Makefile. */
60
61 #if !defined NOPOSIX
62 #include "pcreposix.h"
63 #endif
64
65 #ifndef CLOCKS_PER_SEC
66 #ifdef CLK_TCK
67 #define CLOCKS_PER_SEC CLK_TCK
68 #else
69 #define CLOCKS_PER_SEC 100
70 #endif
71 #endif
72
73 #define LOOPREPEAT 500000
74
75 #define BUFFER_SIZE 30000
76 #define PBUFFER_SIZE BUFFER_SIZE
77 #define DBUFFER_SIZE BUFFER_SIZE
78
79
80 static FILE *outfile;
81 static int log_store = 0;
82 static int callout_count;
83 static int callout_extra;
84 static int callout_fail_count;
85 static int callout_fail_id;
86 static int first_callout;
87 static int show_malloc;
88 static int use_utf8;
89 static size_t gotten_store;
90
91 static uschar *pbuffer = NULL;
92
93
94
95 /*************************************************
96 * Read number from string *
97 *************************************************/
98
99 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
100 around with conditional compilation, just do the job by hand. It is only used
101 for unpicking the -o argument, so just keep it simple.
102
103 Arguments:
104 str string to be converted
105 endptr where to put the end pointer
106
107 Returns: the unsigned long
108 */
109
110 static int
111 get_value(unsigned char *str, unsigned char **endptr)
112 {
113 int result = 0;
114 while(*str != 0 && isspace(*str)) str++;
115 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
116 *endptr = str;
117 return(result);
118 }
119
120
121
122
123 /*************************************************
124 * Convert UTF-8 string to value *
125 *************************************************/
126
127 /* This function takes one or more bytes that represents a UTF-8 character,
128 and returns the value of the character.
129
130 Argument:
131 buffer a pointer to the byte vector
132 vptr a pointer to an int to receive the value
133
134 Returns: > 0 => the number of bytes consumed
135 -6 to 0 => malformed UTF-8 character at offset = (-return)
136 */
137
138 static int
139 utf82ord(unsigned char *buffer, int *vptr)
140 {
141 int c = *buffer++;
142 int d = c;
143 int i, j, s;
144
145 for (i = -1; i < 6; i++) /* i is number of additional bytes */
146 {
147 if ((d & 0x80) == 0) break;
148 d <<= 1;
149 }
150
151 if (i == -1) { *vptr = c; return 1; } /* ascii character */
152 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
153
154 /* i now has a value in the range 1-5 */
155
156 s = 6*i;
157 d = (c & _pcre_utf8_table3[i]) << s;
158
159 for (j = 0; j < i; j++)
160 {
161 c = *buffer++;
162 if ((c & 0xc0) != 0x80) return -(j+1);
163 s -= 6;
164 d |= (c & 0x3f) << s;
165 }
166
167 /* Check that encoding was the correct unique one */
168
169 for (j = 0; j < _pcre_utf8_table1_size; j++)
170 if (d <= _pcre_utf8_table1[j]) break;
171 if (j != i) return -(i+1);
172
173 /* Valid value */
174
175 *vptr = d;
176 return i+1;
177 }
178
179
180
181 /*************************************************
182 * Print character string *
183 *************************************************/
184
185 /* Character string printing function. Must handle UTF-8 strings in utf8
186 mode. Yields number of characters printed. If handed a NULL file, just counts
187 chars without printing. */
188
189 static int pchars(unsigned char *p, int length, FILE *f)
190 {
191 int c;
192 int yield = 0;
193
194 while (length-- > 0)
195 {
196 if (use_utf8)
197 {
198 int rc = utf82ord(p, &c);
199
200 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
201 {
202 length -= rc - 1;
203 p += rc;
204 if (c < 256 && isprint(c))
205 {
206 if (f != NULL) fprintf(f, "%c", c);
207 yield++;
208 }
209 else
210 {
211 int n;
212 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
213 yield += n;
214 }
215 continue;
216 }
217 }
218
219 /* Not UTF-8, or malformed UTF-8 */
220
221 if (isprint(c = *(p++)))
222 {
223 if (f != NULL) fprintf(f, "%c", c);
224 yield++;
225 }
226 else
227 {
228 if (f != NULL) fprintf(f, "\\x%02x", c);
229 yield += 4;
230 }
231 }
232
233 return yield;
234 }
235
236
237
238 /*************************************************
239 * Callout function *
240 *************************************************/
241
242 /* Called from PCRE as a result of the (?C) item. We print out where we are in
243 the match. Yield zero unless more callouts than the fail count, or the callout
244 data is not zero. */
245
246 static int callout(pcre_callout_block *cb)
247 {
248 FILE *f = (first_callout | callout_extra)? outfile : NULL;
249 int i, pre_start, post_start, subject_length;
250
251 if (callout_extra)
252 {
253 fprintf(f, "Callout %d: last capture = %d\n",
254 cb->callout_number, cb->capture_last);
255
256 for (i = 0; i < cb->capture_top * 2; i += 2)
257 {
258 if (cb->offset_vector[i] < 0)
259 fprintf(f, "%2d: <unset>\n", i/2);
260 else
261 {
262 fprintf(f, "%2d: ", i/2);
263 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
264 cb->offset_vector[i+1] - cb->offset_vector[i], f);
265 fprintf(f, "\n");
266 }
267 }
268 }
269
270 /* Re-print the subject in canonical form, the first time or if giving full
271 datails. On subsequent calls in the same match, we use pchars just to find the
272 printed lengths of the substrings. */
273
274 if (f != NULL) fprintf(f, "--->");
275
276 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
277 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
278 cb->current_position - cb->start_match, f);
279
280 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
281
282 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
283 cb->subject_length - cb->current_position, f);
284
285 if (f != NULL) fprintf(f, "\n");
286
287 /* Always print appropriate indicators, with callout number if not already
288 shown. For automatic callouts, show the pattern offset. */
289
290 if (cb->callout_number == 255)
291 {
292 fprintf(outfile, "%+3d ", cb->pattern_position);
293 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
294 }
295 else
296 {
297 if (callout_extra) fprintf(outfile, " ");
298 else fprintf(outfile, "%3d ", cb->callout_number);
299 }
300
301 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
302 fprintf(outfile, "^");
303
304 if (post_start > 0)
305 {
306 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
307 fprintf(outfile, "^");
308 }
309
310 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
311 fprintf(outfile, " ");
312
313 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
314 pbuffer + cb->pattern_position);
315
316 fprintf(outfile, "\n");
317 first_callout = 0;
318
319 if (cb->callout_data != NULL)
320 {
321 int callout_data = *((int *)(cb->callout_data));
322 if (callout_data != 0)
323 {
324 fprintf(outfile, "Callout data = %d\n", callout_data);
325 return callout_data;
326 }
327 }
328
329 return (cb->callout_number != callout_fail_id)? 0 :
330 (++callout_count >= callout_fail_count)? 1 : 0;
331 }
332
333
334 /*************************************************
335 * Local malloc functions *
336 *************************************************/
337
338 /* Alternative malloc function, to test functionality and show the size of the
339 compiled re. */
340
341 static void *new_malloc(size_t size)
342 {
343 void *block = malloc(size);
344 gotten_store = size;
345 if (show_malloc)
346 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
347 return block;
348 }
349
350 static void new_free(void *block)
351 {
352 if (show_malloc)
353 fprintf(outfile, "free %p\n", block);
354 free(block);
355 }
356
357
358 /* For recursion malloc/free, to test stacking calls */
359
360 static void *stack_malloc(size_t size)
361 {
362 void *block = malloc(size);
363 if (show_malloc)
364 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
365 return block;
366 }
367
368 static void stack_free(void *block)
369 {
370 if (show_malloc)
371 fprintf(outfile, "stack_free %p\n", block);
372 free(block);
373 }
374
375
376 /*************************************************
377 * Call pcre_fullinfo() *
378 *************************************************/
379
380 /* Get one piece of information from the pcre_fullinfo() function */
381
382 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
383 {
384 int rc;
385 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
386 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
387 }
388
389
390
391 /*************************************************
392 * Byte flipping function *
393 *************************************************/
394
395 static long int
396 byteflip(long int value, int n)
397 {
398 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
399 return ((value & 0x000000ff) << 24) |
400 ((value & 0x0000ff00) << 8) |
401 ((value & 0x00ff0000) >> 8) |
402 ((value & 0xff000000) >> 24);
403 }
404
405
406
407
408 /*************************************************
409 * Main Program *
410 *************************************************/
411
412 /* Read lines from named file or stdin and write to named file or stdout; lines
413 consist of a regular expression, in delimiters and optionally followed by
414 options, followed by a set of test data, terminated by an empty line. */
415
416 int main(int argc, char **argv)
417 {
418 FILE *infile = stdin;
419 int options = 0;
420 int study_options = 0;
421 int op = 1;
422 int timeit = 0;
423 int showinfo = 0;
424 int showstore = 0;
425 int size_offsets = 45;
426 int size_offsets_max;
427 int *offsets = NULL;
428 #if !defined NOPOSIX
429 int posix = 0;
430 #endif
431 int debug = 0;
432 int done = 0;
433 int all_use_dfa = 0;
434 int yield = 0;
435
436 unsigned char *buffer;
437 unsigned char *dbuffer;
438
439 /* Get buffers from malloc() so that Electric Fence will check their misuse
440 when I am debugging. */
441
442 buffer = (unsigned char *)malloc(BUFFER_SIZE);
443 dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
444 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
445
446 /* The outfile variable is static so that new_malloc can use it. The _setmode()
447 stuff is some magic that I don't understand, but which apparently does good
448 things in Windows. It's related to line terminations. */
449
450 #if defined(_WIN32) || defined(WIN32)
451 _setmode( _fileno( stdout ), 0x8000 );
452 #endif /* defined(_WIN32) || defined(WIN32) */
453
454 outfile = stdout;
455
456 /* Scan options */
457
458 while (argc > 1 && argv[op][0] == '-')
459 {
460 unsigned char *endptr;
461
462 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
463 showstore = 1;
464 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
465 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
466 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
467 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
468 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
469 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
470 *endptr == 0))
471 {
472 op++;
473 argc--;
474 }
475 #if !defined NOPOSIX
476 else if (strcmp(argv[op], "-p") == 0) posix = 1;
477 #endif
478 else if (strcmp(argv[op], "-C") == 0)
479 {
480 int rc;
481 printf("PCRE version %s\n", pcre_version());
482 printf("Compiled with\n");
483 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
484 printf(" %sUTF-8 support\n", rc? "" : "No ");
485 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
486 printf(" %sUnicode properties support\n", rc? "" : "No ");
487 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
488 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
489 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
490 printf(" Internal link size = %d\n", rc);
491 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
492 printf(" POSIX malloc threshold = %d\n", rc);
493 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
494 printf(" Default match limit = %d\n", rc);
495 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
496 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
497 exit(0);
498 }
499 else
500 {
501 printf("** Unknown or malformed option %s\n", argv[op]);
502 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
503 printf(" -C show PCRE compile-time options and exit\n");
504 printf(" -d debug: show compiled code; implies -i\n");
505 printf(" -dfa force DFA matching for all subjects\n");
506 printf(" -i show information about compiled pattern\n"
507 " -m output memory used information\n"
508 " -o <n> set size of offsets vector to <n>\n");
509 #if !defined NOPOSIX
510 printf(" -p use POSIX interface\n");
511 #endif
512 printf(" -s output store (memory) used information\n"
513 " -t time compilation and execution\n");
514 yield = 1;
515 goto EXIT;
516 }
517 op++;
518 argc--;
519 }
520
521 /* Get the store for the offsets vector, and remember what it was */
522
523 size_offsets_max = size_offsets;
524 offsets = (int *)malloc(size_offsets_max * sizeof(int));
525 if (offsets == NULL)
526 {
527 printf("** Failed to get %d bytes of memory for offsets vector\n",
528 size_offsets_max * sizeof(int));
529 yield = 1;
530 goto EXIT;
531 }
532
533 /* Sort out the input and output files */
534
535 if (argc > 1)
536 {
537 infile = fopen(argv[op], "rb");
538 if (infile == NULL)
539 {
540 printf("** Failed to open %s\n", argv[op]);
541 yield = 1;
542 goto EXIT;
543 }
544 }
545
546 if (argc > 2)
547 {
548 outfile = fopen(argv[op+1], "wb");
549 if (outfile == NULL)
550 {
551 printf("** Failed to open %s\n", argv[op+1]);
552 yield = 1;
553 goto EXIT;
554 }
555 }
556
557 /* Set alternative malloc function */
558
559 pcre_malloc = new_malloc;
560 pcre_free = new_free;
561 pcre_stack_malloc = stack_malloc;
562 pcre_stack_free = stack_free;
563
564 /* Heading line, then prompt for first regex if stdin */
565
566 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
567
568 /* Main loop */
569
570 while (!done)
571 {
572 pcre *re = NULL;
573 pcre_extra *extra = NULL;
574
575 #if !defined NOPOSIX /* There are still compilers that require no indent */
576 regex_t preg;
577 int do_posix = 0;
578 #endif
579
580 const char *error;
581 unsigned char *p, *pp, *ppp;
582 unsigned char *to_file = NULL;
583 const unsigned char *tables = NULL;
584 unsigned long int true_size, true_study_size = 0;
585 size_t size, regex_gotten_store;
586 int do_study = 0;
587 int do_debug = debug;
588 int do_G = 0;
589 int do_g = 0;
590 int do_showinfo = showinfo;
591 int do_showrest = 0;
592 int do_flip = 0;
593 int erroroffset, len, delimiter;
594
595 use_utf8 = 0;
596
597 if (infile == stdin) printf(" re> ");
598 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
599 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
600 fflush(outfile);
601
602 p = buffer;
603 while (isspace(*p)) p++;
604 if (*p == 0) continue;
605
606 /* See if the pattern is to be loaded pre-compiled from a file. */
607
608 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
609 {
610 unsigned long int magic;
611 uschar sbuf[8];
612 FILE *f;
613
614 p++;
615 pp = p + (int)strlen((char *)p);
616 while (isspace(pp[-1])) pp--;
617 *pp = 0;
618
619 f = fopen((char *)p, "rb");
620 if (f == NULL)
621 {
622 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
623 continue;
624 }
625
626 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
627
628 true_size =
629 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
630 true_study_size =
631 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
632
633 re = (real_pcre *)new_malloc(true_size);
634 regex_gotten_store = gotten_store;
635
636 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
637
638 magic = ((real_pcre *)re)->magic_number;
639 if (magic != MAGIC_NUMBER)
640 {
641 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
642 {
643 do_flip = 1;
644 }
645 else
646 {
647 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
648 fclose(f);
649 continue;
650 }
651 }
652
653 fprintf(outfile, "Compiled regex%s loaded from %s\n",
654 do_flip? " (byte-inverted)" : "", p);
655
656 /* Need to know if UTF-8 for printing data strings */
657
658 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
659 use_utf8 = (options & PCRE_UTF8) != 0;
660
661 /* Now see if there is any following study data */
662
663 if (true_study_size != 0)
664 {
665 pcre_study_data *psd;
666
667 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
668 extra->flags = PCRE_EXTRA_STUDY_DATA;
669
670 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
671 extra->study_data = psd;
672
673 if (fread(psd, 1, true_study_size, f) != true_study_size)
674 {
675 FAIL_READ:
676 fprintf(outfile, "Failed to read data from %s\n", p);
677 if (extra != NULL) new_free(extra);
678 if (re != NULL) new_free(re);
679 fclose(f);
680 continue;
681 }
682 fprintf(outfile, "Study data loaded from %s\n", p);
683 do_study = 1; /* To get the data output if requested */
684 }
685 else fprintf(outfile, "No study data\n");
686
687 fclose(f);
688 goto SHOW_INFO;
689 }
690
691 /* In-line pattern (the usual case). Get the delimiter and seek the end of
692 the pattern; if is isn't complete, read more. */
693
694 delimiter = *p++;
695
696 if (isalnum(delimiter) || delimiter == '\\')
697 {
698 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
699 goto SKIP_DATA;
700 }
701
702 pp = p;
703
704 for(;;)
705 {
706 while (*pp != 0)
707 {
708 if (*pp == '\\' && pp[1] != 0) pp++;
709 else if (*pp == delimiter) break;
710 pp++;
711 }
712 if (*pp != 0) break;
713
714 len = BUFFER_SIZE - (pp - buffer);
715 if (len < 256)
716 {
717 fprintf(outfile, "** Expression too long - missing delimiter?\n");
718 goto SKIP_DATA;
719 }
720
721 if (infile == stdin) printf(" > ");
722 if (fgets((char *)pp, len, infile) == NULL)
723 {
724 fprintf(outfile, "** Unexpected EOF\n");
725 done = 1;
726 goto CONTINUE;
727 }
728 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
729 }
730
731 /* If the first character after the delimiter is backslash, make
732 the pattern end with backslash. This is purely to provide a way
733 of testing for the error message when a pattern ends with backslash. */
734
735 if (pp[1] == '\\') *pp++ = '\\';
736
737 /* Terminate the pattern at the delimiter, and save a copy of the pattern
738 for callouts. */
739
740 *pp++ = 0;
741 strcpy((char *)pbuffer, (char *)p);
742
743 /* Look for options after final delimiter */
744
745 options = 0;
746 study_options = 0;
747 log_store = showstore; /* default from command line */
748
749 while (*pp != 0)
750 {
751 switch (*pp++)
752 {
753 case 'f': options |= PCRE_FIRSTLINE; break;
754 case 'g': do_g = 1; break;
755 case 'i': options |= PCRE_CASELESS; break;
756 case 'm': options |= PCRE_MULTILINE; break;
757 case 's': options |= PCRE_DOTALL; break;
758 case 'x': options |= PCRE_EXTENDED; break;
759
760 case '+': do_showrest = 1; break;
761 case 'A': options |= PCRE_ANCHORED; break;
762 case 'C': options |= PCRE_AUTO_CALLOUT; break;
763 case 'D': do_debug = do_showinfo = 1; break;
764 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
765 case 'F': do_flip = 1; break;
766 case 'G': do_G = 1; break;
767 case 'I': do_showinfo = 1; break;
768 case 'M': log_store = 1; break;
769 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
770
771 #if !defined NOPOSIX
772 case 'P': do_posix = 1; break;
773 #endif
774
775 case 'S': do_study = 1; break;
776 case 'U': options |= PCRE_UNGREEDY; break;
777 case 'X': options |= PCRE_EXTRA; break;
778 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
779 case '?': options |= PCRE_NO_UTF8_CHECK; break;
780
781 case 'L':
782 ppp = pp;
783 /* The '\r' test here is so that it works on Windows */
784 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
785 *ppp = 0;
786 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
787 {
788 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
789 goto SKIP_DATA;
790 }
791 tables = pcre_maketables();
792 pp = ppp;
793 break;
794
795 case '>':
796 to_file = pp;
797 while (*pp != 0) pp++;
798 while (isspace(pp[-1])) pp--;
799 *pp = 0;
800 break;
801
802 case '\r': /* So that it works in Windows */
803 case '\n':
804 case ' ':
805 break;
806
807 default:
808 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
809 goto SKIP_DATA;
810 }
811 }
812
813 /* Handle compiling via the POSIX interface, which doesn't support the
814 timing, showing, or debugging options, nor the ability to pass over
815 local character tables. */
816
817 #if !defined NOPOSIX
818 if (posix || do_posix)
819 {
820 int rc;
821 int cflags = 0;
822
823 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
824 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
825 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
826 rc = regcomp(&preg, (char *)p, cflags);
827
828 /* Compilation failed; go back for another re, skipping to blank line
829 if non-interactive. */
830
831 if (rc != 0)
832 {
833 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
834 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
835 goto SKIP_DATA;
836 }
837 }
838
839 /* Handle compiling via the native interface */
840
841 else
842 #endif /* !defined NOPOSIX */
843
844 {
845 if (timeit)
846 {
847 register int i;
848 clock_t time_taken;
849 clock_t start_time = clock();
850 for (i = 0; i < LOOPREPEAT; i++)
851 {
852 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
853 if (re != NULL) free(re);
854 }
855 time_taken = clock() - start_time;
856 fprintf(outfile, "Compile time %.3f milliseconds\n",
857 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
858 (double)CLOCKS_PER_SEC);
859 }
860
861 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
862
863 /* Compilation failed; go back for another re, skipping to blank line
864 if non-interactive. */
865
866 if (re == NULL)
867 {
868 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
869 SKIP_DATA:
870 if (infile != stdin)
871 {
872 for (;;)
873 {
874 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
875 {
876 done = 1;
877 goto CONTINUE;
878 }
879 len = (int)strlen((char *)buffer);
880 while (len > 0 && isspace(buffer[len-1])) len--;
881 if (len == 0) break;
882 }
883 fprintf(outfile, "\n");
884 }
885 goto CONTINUE;
886 }
887
888 /* Compilation succeeded; print data if required. There are now two
889 info-returning functions. The old one has a limited interface and
890 returns only limited data. Check that it agrees with the newer one. */
891
892 if (log_store)
893 fprintf(outfile, "Memory allocation (code space): %d\n",
894 (int)(gotten_store -
895 sizeof(real_pcre) -
896 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
897
898 /* Extract the size for possible writing before possibly flipping it,
899 and remember the store that was got. */
900
901 true_size = ((real_pcre *)re)->size;
902 regex_gotten_store = gotten_store;
903
904 /* If /S was present, study the regexp to generate additional info to
905 help with the matching. */
906
907 if (do_study)
908 {
909 if (timeit)
910 {
911 register int i;
912 clock_t time_taken;
913 clock_t start_time = clock();
914 for (i = 0; i < LOOPREPEAT; i++)
915 extra = pcre_study(re, study_options, &error);
916 time_taken = clock() - start_time;
917 if (extra != NULL) free(extra);
918 fprintf(outfile, " Study time %.3f milliseconds\n",
919 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
920 (double)CLOCKS_PER_SEC);
921 }
922 extra = pcre_study(re, study_options, &error);
923 if (error != NULL)
924 fprintf(outfile, "Failed to study: %s\n", error);
925 else if (extra != NULL)
926 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
927 }
928
929 /* If the 'F' option was present, we flip the bytes of all the integer
930 fields in the regex data block and the study block. This is to make it
931 possible to test PCRE's handling of byte-flipped patterns, e.g. those
932 compiled on a different architecture. */
933
934 if (do_flip)
935 {
936 real_pcre *rre = (real_pcre *)re;
937 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
938 rre->size = byteflip(rre->size, sizeof(rre->size));
939 rre->options = byteflip(rre->options, sizeof(rre->options));
940 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
941 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
942 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
943 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
944 rre->name_table_offset = byteflip(rre->name_table_offset,
945 sizeof(rre->name_table_offset));
946 rre->name_entry_size = byteflip(rre->name_entry_size,
947 sizeof(rre->name_entry_size));
948 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
949
950 if (extra != NULL)
951 {
952 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
953 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
954 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
955 }
956 }
957
958 /* Extract information from the compiled data if required */
959
960 SHOW_INFO:
961
962 if (do_showinfo)
963 {
964 unsigned long int get_options, all_options;
965 int old_first_char, old_options, old_count;
966 int count, backrefmax, first_char, need_char;
967 int nameentrysize, namecount;
968 const uschar *nametable;
969
970 if (do_debug)
971 {
972 fprintf(outfile, "------------------------------------------------------------------\n");
973 _pcre_printint(re, outfile);
974 }
975
976 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
977 new_info(re, NULL, PCRE_INFO_SIZE, &size);
978 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
979 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
980 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
981 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
982 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
983 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
984 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
985
986 old_count = pcre_info(re, &old_options, &old_first_char);
987 if (count < 0) fprintf(outfile,
988 "Error %d from pcre_info()\n", count);
989 else
990 {
991 if (old_count != count) fprintf(outfile,
992 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
993 old_count);
994
995 if (old_first_char != first_char) fprintf(outfile,
996 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
997 first_char, old_first_char);
998
999 if (old_options != (int)get_options) fprintf(outfile,
1000 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1001 get_options, old_options);
1002 }
1003
1004 if (size != regex_gotten_store) fprintf(outfile,
1005 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1006 (int)size, (int)regex_gotten_store);
1007
1008 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1009 if (backrefmax > 0)
1010 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1011
1012 if (namecount > 0)
1013 {
1014 fprintf(outfile, "Named capturing subpatterns:\n");
1015 while (namecount-- > 0)
1016 {
1017 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1018 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1019 GET2(nametable, 0));
1020 nametable += nameentrysize;
1021 }
1022 }
1023
1024 /* The NOPARTIAL bit is a private bit in the options, so we have
1025 to fish it out via out back door */
1026
1027 all_options = ((real_pcre *)re)->options;
1028 if (do_flip)
1029 {
1030 all_options = byteflip(all_options, sizeof(all_options));
1031 }
1032
1033 if ((all_options & PCRE_NOPARTIAL) != 0)
1034 fprintf(outfile, "Partial matching not supported\n");
1035
1036 if (get_options == 0) fprintf(outfile, "No options\n");
1037 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1038 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1039 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1040 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1041 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1042 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1043 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1044 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1045 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1046 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1047 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1048 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1049
1050 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1051 fprintf(outfile, "Case state changes\n");
1052
1053 if (first_char == -1)
1054 {
1055 fprintf(outfile, "First char at start or follows \\n\n");
1056 }
1057 else if (first_char < 0)
1058 {
1059 fprintf(outfile, "No first char\n");
1060 }
1061 else
1062 {
1063 int ch = first_char & 255;
1064 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1065 "" : " (caseless)";
1066 if (isprint(ch))
1067 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1068 else
1069 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1070 }
1071
1072 if (need_char < 0)
1073 {
1074 fprintf(outfile, "No need char\n");
1075 }
1076 else
1077 {
1078 int ch = need_char & 255;
1079 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1080 "" : " (caseless)";
1081 if (isprint(ch))
1082 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1083 else
1084 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1085 }
1086
1087 /* Don't output study size; at present it is in any case a fixed
1088 value, but it varies, depending on the computer architecture, and
1089 so messes up the test suite. (And with the /F option, it might be
1090 flipped.) */
1091
1092 if (do_study)
1093 {
1094 if (extra == NULL)
1095 fprintf(outfile, "Study returned NULL\n");
1096 else
1097 {
1098 uschar *start_bits = NULL;
1099 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1100
1101 if (start_bits == NULL)
1102 fprintf(outfile, "No starting byte set\n");
1103 else
1104 {
1105 int i;
1106 int c = 24;
1107 fprintf(outfile, "Starting byte set: ");
1108 for (i = 0; i < 256; i++)
1109 {
1110 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1111 {
1112 if (c > 75)
1113 {
1114 fprintf(outfile, "\n ");
1115 c = 2;
1116 }
1117 if (isprint(i) && i != ' ')
1118 {
1119 fprintf(outfile, "%c ", i);
1120 c += 2;
1121 }
1122 else
1123 {
1124 fprintf(outfile, "\\x%02x ", i);
1125 c += 5;
1126 }
1127 }
1128 }
1129 fprintf(outfile, "\n");
1130 }
1131 }
1132 }
1133 }
1134
1135 /* If the '>' option was present, we write out the regex to a file, and
1136 that is all. The first 8 bytes of the file are the regex length and then
1137 the study length, in big-endian order. */
1138
1139 if (to_file != NULL)
1140 {
1141 FILE *f = fopen((char *)to_file, "wb");
1142 if (f == NULL)
1143 {
1144 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1145 }
1146 else
1147 {
1148 uschar sbuf[8];
1149 sbuf[0] = (true_size >> 24) & 255;
1150 sbuf[1] = (true_size >> 16) & 255;
1151 sbuf[2] = (true_size >> 8) & 255;
1152 sbuf[3] = (true_size) & 255;
1153
1154 sbuf[4] = (true_study_size >> 24) & 255;
1155 sbuf[5] = (true_study_size >> 16) & 255;
1156 sbuf[6] = (true_study_size >> 8) & 255;
1157 sbuf[7] = (true_study_size) & 255;
1158
1159 if (fwrite(sbuf, 1, 8, f) < 8 ||
1160 fwrite(re, 1, true_size, f) < true_size)
1161 {
1162 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1163 }
1164 else
1165 {
1166 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1167 if (extra != NULL)
1168 {
1169 if (fwrite(extra->study_data, 1, true_study_size, f) <
1170 true_study_size)
1171 {
1172 fprintf(outfile, "Write error on %s: %s\n", to_file,
1173 strerror(errno));
1174 }
1175 else fprintf(outfile, "Study data written to %s\n", to_file);
1176 }
1177 }
1178 fclose(f);
1179 }
1180
1181 new_free(re);
1182 if (extra != NULL) new_free(extra);
1183 if (tables != NULL) new_free((void *)tables);
1184 continue; /* With next regex */
1185 }
1186 } /* End of non-POSIX compile */
1187
1188 /* Read data lines and test them */
1189
1190 for (;;)
1191 {
1192 unsigned char *q;
1193 unsigned char *bptr = dbuffer;
1194 int *use_offsets = offsets;
1195 int use_size_offsets = size_offsets;
1196 int callout_data = 0;
1197 int callout_data_set = 0;
1198 int count, c;
1199 int copystrings = 0;
1200 int find_match_limit = 0;
1201 int getstrings = 0;
1202 int getlist = 0;
1203 int gmatched = 0;
1204 int start_offset = 0;
1205 int g_notempty = 0;
1206 int use_dfa = 0;
1207
1208 options = 0;
1209
1210 pcre_callout = callout;
1211 first_callout = 1;
1212 callout_extra = 0;
1213 callout_count = 0;
1214 callout_fail_count = 999999;
1215 callout_fail_id = -1;
1216 show_malloc = 0;
1217
1218 if (infile == stdin) printf("data> ");
1219 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1220 {
1221 done = 1;
1222 goto CONTINUE;
1223 }
1224 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1225
1226 len = (int)strlen((char *)buffer);
1227 while (len > 0 && isspace(buffer[len-1])) len--;
1228 buffer[len] = 0;
1229 if (len == 0) break;
1230
1231 p = buffer;
1232 while (isspace(*p)) p++;
1233
1234 q = dbuffer;
1235 while ((c = *p++) != 0)
1236 {
1237 int i = 0;
1238 int n = 0;
1239
1240 if (c == '\\') switch ((c = *p++))
1241 {
1242 case 'a': c = 7; break;
1243 case 'b': c = '\b'; break;
1244 case 'e': c = 27; break;
1245 case 'f': c = '\f'; break;
1246 case 'n': c = '\n'; break;
1247 case 'r': c = '\r'; break;
1248 case 't': c = '\t'; break;
1249 case 'v': c = '\v'; break;
1250
1251 case '0': case '1': case '2': case '3':
1252 case '4': case '5': case '6': case '7':
1253 c -= '0';
1254 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1255 c = c * 8 + *p++ - '0';
1256 break;
1257
1258 case 'x':
1259
1260 /* Handle \x{..} specially - new Perl thing for utf8 */
1261
1262 if (*p == '{')
1263 {
1264 unsigned char *pt = p;
1265 c = 0;
1266 while (isxdigit(*(++pt)))
1267 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1268 if (*pt == '}')
1269 {
1270 unsigned char buff8[8];
1271 int ii, utn;
1272 utn = _pcre_ord2utf8(c, buff8);
1273 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1274 c = buff8[ii]; /* Last byte */
1275 p = pt + 1;
1276 break;
1277 }
1278 /* Not correct form; fall through */
1279 }
1280
1281 /* Ordinary \x */
1282
1283 c = 0;
1284 while (i++ < 2 && isxdigit(*p))
1285 {
1286 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1287 p++;
1288 }
1289 break;
1290
1291 case 0: /* \ followed by EOF allows for an empty line */
1292 p--;
1293 continue;
1294
1295 case '>':
1296 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1297 continue;
1298
1299 case 'A': /* Option setting */
1300 options |= PCRE_ANCHORED;
1301 continue;
1302
1303 case 'B':
1304 options |= PCRE_NOTBOL;
1305 continue;
1306
1307 case 'C':
1308 if (isdigit(*p)) /* Set copy string */
1309 {
1310 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1311 copystrings |= 1 << n;
1312 }
1313 else if (isalnum(*p))
1314 {
1315 uschar name[256];
1316 uschar *npp = name;
1317 while (isalnum(*p)) *npp++ = *p++;
1318 *npp = 0;
1319 n = pcre_get_stringnumber(re, (char *)name);
1320 if (n < 0)
1321 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1322 else copystrings |= 1 << n;
1323 }
1324 else if (*p == '+')
1325 {
1326 callout_extra = 1;
1327 p++;
1328 }
1329 else if (*p == '-')
1330 {
1331 pcre_callout = NULL;
1332 p++;
1333 }
1334 else if (*p == '!')
1335 {
1336 callout_fail_id = 0;
1337 p++;
1338 while(isdigit(*p))
1339 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1340 callout_fail_count = 0;
1341 if (*p == '!')
1342 {
1343 p++;
1344 while(isdigit(*p))
1345 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1346 }
1347 }
1348 else if (*p == '*')
1349 {
1350 int sign = 1;
1351 callout_data = 0;
1352 if (*(++p) == '-') { sign = -1; p++; }
1353 while(isdigit(*p))
1354 callout_data = callout_data * 10 + *p++ - '0';
1355 callout_data *= sign;
1356 callout_data_set = 1;
1357 }
1358 continue;
1359
1360 case 'D':
1361 if (posix || do_posix)
1362 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1363 else
1364 use_dfa = 1;
1365 continue;
1366
1367 case 'F':
1368 options |= PCRE_DFA_SHORTEST;
1369 continue;
1370
1371 case 'G':
1372 if (isdigit(*p))
1373 {
1374 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1375 getstrings |= 1 << n;
1376 }
1377 else if (isalnum(*p))
1378 {
1379 uschar name[256];
1380 uschar *npp = name;
1381 while (isalnum(*p)) *npp++ = *p++;
1382 *npp = 0;
1383 n = pcre_get_stringnumber(re, (char *)name);
1384 if (n < 0)
1385 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1386 else getstrings |= 1 << n;
1387 }
1388 continue;
1389
1390 case 'L':
1391 getlist = 1;
1392 continue;
1393
1394 case 'M':
1395 find_match_limit = 1;
1396 continue;
1397
1398 case 'N':
1399 options |= PCRE_NOTEMPTY;
1400 continue;
1401
1402 case 'O':
1403 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404 if (n > size_offsets_max)
1405 {
1406 size_offsets_max = n;
1407 free(offsets);
1408 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409 if (offsets == NULL)
1410 {
1411 printf("** Failed to get %d bytes of memory for offsets vector\n",
1412 size_offsets_max * sizeof(int));
1413 yield = 1;
1414 goto EXIT;
1415 }
1416 }
1417 use_size_offsets = n;
1418 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1419 continue;
1420
1421 case 'P':
1422 options |= PCRE_PARTIAL;
1423 continue;
1424
1425 case 'R':
1426 options |= PCRE_DFA_RESTART;
1427 continue;
1428
1429 case 'S':
1430 show_malloc = 1;
1431 continue;
1432
1433 case 'Z':
1434 options |= PCRE_NOTEOL;
1435 continue;
1436
1437 case '?':
1438 options |= PCRE_NO_UTF8_CHECK;
1439 continue;
1440 }
1441 *q++ = c;
1442 }
1443 *q = 0;
1444 len = q - dbuffer;
1445
1446 if ((all_use_dfa || use_dfa) && find_match_limit)
1447 {
1448 printf("**Match limit not relevant for DFA matching: ignored\n");
1449 find_match_limit = 0;
1450 }
1451
1452 /* Handle matching via the POSIX interface, which does not
1453 support timing or playing with the match limit or callout data. */
1454
1455 #if !defined NOPOSIX
1456 if (posix || do_posix)
1457 {
1458 int rc;
1459 int eflags = 0;
1460 regmatch_t *pmatch = NULL;
1461 if (use_size_offsets > 0)
1462 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1463 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1464 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1465
1466 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1467
1468 if (rc != 0)
1469 {
1470 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1471 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1472 }
1473 else
1474 {
1475 size_t i;
1476 for (i = 0; i < (size_t)use_size_offsets; i++)
1477 {
1478 if (pmatch[i].rm_so >= 0)
1479 {
1480 fprintf(outfile, "%2d: ", (int)i);
1481 (void)pchars(dbuffer + pmatch[i].rm_so,
1482 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1483 fprintf(outfile, "\n");
1484 if (i == 0 && do_showrest)
1485 {
1486 fprintf(outfile, " 0+ ");
1487 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1488 outfile);
1489 fprintf(outfile, "\n");
1490 }
1491 }
1492 }
1493 }
1494 free(pmatch);
1495 }
1496
1497 /* Handle matching via the native interface - repeats for /g and /G */
1498
1499 else
1500 #endif /* !defined NOPOSIX */
1501
1502 for (;; gmatched++) /* Loop for /g or /G */
1503 {
1504 if (timeit)
1505 {
1506 register int i;
1507 clock_t time_taken;
1508 clock_t start_time = clock();
1509
1510 if (all_use_dfa || use_dfa)
1511 {
1512 int workspace[1000];
1513 for (i = 0; i < LOOPREPEAT; i++)
1514 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1515 options | g_notempty, use_offsets, use_size_offsets, workspace,
1516 sizeof(workspace)/sizeof(int));
1517 }
1518 else
1519
1520 for (i = 0; i < LOOPREPEAT; i++)
1521 count = pcre_exec(re, extra, (char *)bptr, len,
1522 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1523
1524 time_taken = clock() - start_time;
1525 fprintf(outfile, "Execute time %.3f milliseconds\n",
1526 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1527 (double)CLOCKS_PER_SEC);
1528 }
1529
1530 /* If find_match_limit is set, we want to do repeated matches with
1531 varying limits in order to find the minimum value. */
1532
1533 if (find_match_limit)
1534 {
1535 int min = 0;
1536 int mid = 64;
1537 int max = -1;
1538
1539 if (extra == NULL)
1540 {
1541 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1542 extra->flags = 0;
1543 }
1544 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1545
1546 for (;;)
1547 {
1548 extra->match_limit = mid;
1549 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1550 options | g_notempty, use_offsets, use_size_offsets);
1551 if (count == PCRE_ERROR_MATCHLIMIT)
1552 {
1553 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1554 min = mid;
1555 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1556 }
1557 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1558 count == PCRE_ERROR_PARTIAL)
1559 {
1560 if (mid == min + 1)
1561 {
1562 fprintf(outfile, "Minimum match limit = %d\n", mid);
1563 break;
1564 }
1565 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1566 max = mid;
1567 mid = (min + mid)/2;
1568 }
1569 else break; /* Some other error */
1570 }
1571
1572 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1573 }
1574
1575 /* If callout_data is set, use the interface with additional data */
1576
1577 else if (callout_data_set)
1578 {
1579 if (extra == NULL)
1580 {
1581 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1582 extra->flags = 0;
1583 }
1584 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1585 extra->callout_data = &callout_data;
1586 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1587 options | g_notempty, use_offsets, use_size_offsets);
1588 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1589 }
1590
1591 /* The normal case is just to do the match once, with the default
1592 value of match_limit. */
1593
1594 else if (all_use_dfa || use_dfa)
1595 {
1596 int workspace[1000];
1597 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1598 options | g_notempty, use_offsets, use_size_offsets, workspace,
1599 sizeof(workspace)/sizeof(int));
1600 if (count == 0)
1601 {
1602 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1603 count = use_size_offsets/2;
1604 }
1605 }
1606
1607 else
1608 {
1609 count = pcre_exec(re, extra, (char *)bptr, len,
1610 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1611 if (count == 0)
1612 {
1613 fprintf(outfile, "Matched, but too many substrings\n");
1614 count = use_size_offsets/3;
1615 }
1616 }
1617
1618 /* Matched */
1619
1620 if (count >= 0)
1621 {
1622 int i;
1623 for (i = 0; i < count * 2; i += 2)
1624 {
1625 if (use_offsets[i] < 0)
1626 fprintf(outfile, "%2d: <unset>\n", i/2);
1627 else
1628 {
1629 fprintf(outfile, "%2d: ", i/2);
1630 (void)pchars(bptr + use_offsets[i],
1631 use_offsets[i+1] - use_offsets[i], outfile);
1632 fprintf(outfile, "\n");
1633 if (i == 0)
1634 {
1635 if (do_showrest)
1636 {
1637 fprintf(outfile, " 0+ ");
1638 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1639 outfile);
1640 fprintf(outfile, "\n");
1641 }
1642 }
1643 }
1644 }
1645
1646 for (i = 0; i < 32; i++)
1647 {
1648 if ((copystrings & (1 << i)) != 0)
1649 {
1650 char copybuffer[16];
1651 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1652 i, copybuffer, sizeof(copybuffer));
1653 if (rc < 0)
1654 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1655 else
1656 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1657 }
1658 }
1659
1660 for (i = 0; i < 32; i++)
1661 {
1662 if ((getstrings & (1 << i)) != 0)
1663 {
1664 const char *substring;
1665 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1666 i, &substring);
1667 if (rc < 0)
1668 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1669 else
1670 {
1671 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1672 /* free((void *)substring); */
1673 pcre_free_substring(substring);
1674 }
1675 }
1676 }
1677
1678 if (getlist)
1679 {
1680 const char **stringlist;
1681 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1682 &stringlist);
1683 if (rc < 0)
1684 fprintf(outfile, "get substring list failed %d\n", rc);
1685 else
1686 {
1687 for (i = 0; i < count; i++)
1688 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1689 if (stringlist[i] != NULL)
1690 fprintf(outfile, "string list not terminated by NULL\n");
1691 /* free((void *)stringlist); */
1692 pcre_free_substring_list(stringlist);
1693 }
1694 }
1695 }
1696
1697 /* There was a partial match */
1698
1699 else if (count == PCRE_ERROR_PARTIAL)
1700 {
1701 fprintf(outfile, "Partial match");
1702 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1703 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1704 bptr + use_offsets[0]);
1705 fprintf(outfile, "\n");
1706 break; /* Out of the /g loop */
1707 }
1708
1709 /* Failed to match. If this is a /g or /G loop and we previously set
1710 g_notempty after a null match, this is not necessarily the end.
1711 We want to advance the start offset, and continue. In the case of UTF-8
1712 matching, the advance must be one character, not one byte. Fudge the
1713 offset values to achieve this. We won't be at the end of the string -
1714 that was checked before setting g_notempty. */
1715
1716 else
1717 {
1718 if (g_notempty != 0)
1719 {
1720 int onechar = 1;
1721 use_offsets[0] = start_offset;
1722 if (use_utf8)
1723 {
1724 while (start_offset + onechar < len)
1725 {
1726 int tb = bptr[start_offset+onechar];
1727 if (tb <= 127) break;
1728 tb &= 0xc0;
1729 if (tb != 0 && tb != 0xc0) onechar++;
1730 }
1731 }
1732 use_offsets[1] = start_offset + onechar;
1733 }
1734 else
1735 {
1736 if (count == PCRE_ERROR_NOMATCH)
1737 {
1738 if (gmatched == 0) fprintf(outfile, "No match\n");
1739 }
1740 else fprintf(outfile, "Error %d\n", count);
1741 break; /* Out of the /g loop */
1742 }
1743 }
1744
1745 /* If not /g or /G we are done */
1746
1747 if (!do_g && !do_G) break;
1748
1749 /* If we have matched an empty string, first check to see if we are at
1750 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1751 what Perl's /g options does. This turns out to be rather cunning. First
1752 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1753 same point. If this fails (picked up above) we advance to the next
1754 character. */
1755
1756 g_notempty = 0;
1757 if (use_offsets[0] == use_offsets[1])
1758 {
1759 if (use_offsets[0] == len) break;
1760 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1761 }
1762
1763 /* For /g, update the start offset, leaving the rest alone */
1764
1765 if (do_g) start_offset = use_offsets[1];
1766
1767 /* For /G, update the pointer and length */
1768
1769 else
1770 {
1771 bptr += use_offsets[1];
1772 len -= use_offsets[1];
1773 }
1774 } /* End of loop for /g and /G */
1775 } /* End of loop for data lines */
1776
1777 CONTINUE:
1778
1779 #if !defined NOPOSIX
1780 if (posix || do_posix) regfree(&preg);
1781 #endif
1782
1783 if (re != NULL) new_free(re);
1784 if (extra != NULL) new_free(extra);
1785 if (tables != NULL)
1786 {
1787 new_free((void *)tables);
1788 setlocale(LC_CTYPE, "C");
1789 }
1790 }
1791
1792 if (infile == stdin) fprintf(outfile, "\n");
1793
1794 EXIT:
1795
1796 if (infile != NULL && infile != stdin) fclose(infile);
1797 if (outfile != NULL && outfile != stdout) fclose(outfile);
1798
1799 free(buffer);
1800 free(dbuffer);
1801 free(pbuffer);
1802 free(offsets);
1803
1804 return yield;
1805 }
1806
1807 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12