/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 75 - (show annotations) (download)
Sat Feb 24 21:40:37 2007 UTC (7 years, 7 months ago) by nigel
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 51609 byte(s)
Load pcre-5.0 into code/trunk.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <time.h>
44 #include <locale.h>
45 #include <errno.h>
46
47 /* We need the internal info for displaying the results of pcre_study(). Also
48 for getting the opcodes for showing compiled code. */
49
50 #define PCRE_SPY /* For Win32 build, import data, not export */
51 #include "internal.h"
52
53 /* It is possible to compile this test program without including support for
54 testing the POSIX interface, though this is not available via the standard
55 Makefile. */
56
57 #if !defined NOPOSIX
58 #include "pcreposix.h"
59 #endif
60
61 #ifndef CLOCKS_PER_SEC
62 #ifdef CLK_TCK
63 #define CLOCKS_PER_SEC CLK_TCK
64 #else
65 #define CLOCKS_PER_SEC 100
66 #endif
67 #endif
68
69 #define LOOPREPEAT 500000
70
71 #define BUFFER_SIZE 30000
72 #define PBUFFER_SIZE BUFFER_SIZE
73 #define DBUFFER_SIZE BUFFER_SIZE
74
75
76 static FILE *outfile;
77 static int log_store = 0;
78 static int callout_count;
79 static int callout_extra;
80 static int callout_fail_count;
81 static int callout_fail_id;
82 static int first_callout;
83 static int show_malloc;
84 static int use_utf8;
85 static size_t gotten_store;
86
87 static uschar *pbuffer = NULL;
88
89
90 static const int utf8_table1[] = {
91 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
92
93 static const int utf8_table2[] = {
94 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
95
96 static const int utf8_table3[] = {
97 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
98
99
100
101 /*************************************************
102 * Print compiled regex *
103 *************************************************/
104
105 /* The code for doing this is held in a separate file that is also included in
106 pcre.c when it is compiled with the debug switch. It defines a function called
107 print_internals(), which uses a table of opcode lengths defined by the macro
108 OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
109 Unicode property names to numbers; this is kept in a separate file. */
110
111 static uschar OP_lengths[] = { OP_LENGTHS };
112
113 #include "ucp.h"
114 #include "ucptypetable.c"
115 #include "printint.c"
116
117
118
119 /*************************************************
120 * Read number from string *
121 *************************************************/
122
123 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
124 around with conditional compilation, just do the job by hand. It is only used
125 for unpicking the -o argument, so just keep it simple.
126
127 Arguments:
128 str string to be converted
129 endptr where to put the end pointer
130
131 Returns: the unsigned long
132 */
133
134 static int
135 get_value(unsigned char *str, unsigned char **endptr)
136 {
137 int result = 0;
138 while(*str != 0 && isspace(*str)) str++;
139 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
140 *endptr = str;
141 return(result);
142 }
143
144
145
146 /*************************************************
147 * Convert character value to UTF-8 *
148 *************************************************/
149
150 /* This function takes an integer value in the range 0 - 0x7fffffff
151 and encodes it as a UTF-8 character in 0 to 6 bytes.
152
153 Arguments:
154 cvalue the character value
155 buffer pointer to buffer for result - at least 6 bytes long
156
157 Returns: number of characters placed in the buffer
158 -1 if input character is negative
159 0 if input character is positive but too big (only when
160 int is longer than 32 bits)
161 */
162
163 static int
164 ord2utf8(int cvalue, unsigned char *buffer)
165 {
166 register int i, j;
167 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
168 if (cvalue <= utf8_table1[i]) break;
169 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
170 if (cvalue < 0) return -1;
171
172 buffer += i;
173 for (j = i; j > 0; j--)
174 {
175 *buffer-- = 0x80 | (cvalue & 0x3f);
176 cvalue >>= 6;
177 }
178 *buffer = utf8_table2[i] | cvalue;
179 return i + 1;
180 }
181
182
183 /*************************************************
184 * Convert UTF-8 string to value *
185 *************************************************/
186
187 /* This function takes one or more bytes that represents a UTF-8 character,
188 and returns the value of the character.
189
190 Argument:
191 buffer a pointer to the byte vector
192 vptr a pointer to an int to receive the value
193
194 Returns: > 0 => the number of bytes consumed
195 -6 to 0 => malformed UTF-8 character at offset = (-return)
196 */
197
198 static int
199 utf82ord(unsigned char *buffer, int *vptr)
200 {
201 int c = *buffer++;
202 int d = c;
203 int i, j, s;
204
205 for (i = -1; i < 6; i++) /* i is number of additional bytes */
206 {
207 if ((d & 0x80) == 0) break;
208 d <<= 1;
209 }
210
211 if (i == -1) { *vptr = c; return 1; } /* ascii character */
212 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
213
214 /* i now has a value in the range 1-5 */
215
216 s = 6*i;
217 d = (c & utf8_table3[i]) << s;
218
219 for (j = 0; j < i; j++)
220 {
221 c = *buffer++;
222 if ((c & 0xc0) != 0x80) return -(j+1);
223 s -= 6;
224 d |= (c & 0x3f) << s;
225 }
226
227 /* Check that encoding was the correct unique one */
228
229 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
230 if (d <= utf8_table1[j]) break;
231 if (j != i) return -(i+1);
232
233 /* Valid value */
234
235 *vptr = d;
236 return i+1;
237 }
238
239
240
241 /*************************************************
242 * Print character string *
243 *************************************************/
244
245 /* Character string printing function. Must handle UTF-8 strings in utf8
246 mode. Yields number of characters printed. If handed a NULL file, just counts
247 chars without printing. */
248
249 static int pchars(unsigned char *p, int length, FILE *f)
250 {
251 int c;
252 int yield = 0;
253
254 while (length-- > 0)
255 {
256 if (use_utf8)
257 {
258 int rc = utf82ord(p, &c);
259
260 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
261 {
262 length -= rc - 1;
263 p += rc;
264 if (c < 256 && isprint(c))
265 {
266 if (f != NULL) fprintf(f, "%c", c);
267 yield++;
268 }
269 else
270 {
271 int n;
272 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
273 yield += n;
274 }
275 continue;
276 }
277 }
278
279 /* Not UTF-8, or malformed UTF-8 */
280
281 if (isprint(c = *(p++)))
282 {
283 if (f != NULL) fprintf(f, "%c", c);
284 yield++;
285 }
286 else
287 {
288 if (f != NULL) fprintf(f, "\\x%02x", c);
289 yield += 4;
290 }
291 }
292
293 return yield;
294 }
295
296
297
298 /*************************************************
299 * Callout function *
300 *************************************************/
301
302 /* Called from PCRE as a result of the (?C) item. We print out where we are in
303 the match. Yield zero unless more callouts than the fail count, or the callout
304 data is not zero. */
305
306 static int callout(pcre_callout_block *cb)
307 {
308 FILE *f = (first_callout | callout_extra)? outfile : NULL;
309 int i, pre_start, post_start, subject_length;
310
311 if (callout_extra)
312 {
313 fprintf(f, "Callout %d: last capture = %d\n",
314 cb->callout_number, cb->capture_last);
315
316 for (i = 0; i < cb->capture_top * 2; i += 2)
317 {
318 if (cb->offset_vector[i] < 0)
319 fprintf(f, "%2d: <unset>\n", i/2);
320 else
321 {
322 fprintf(f, "%2d: ", i/2);
323 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
324 cb->offset_vector[i+1] - cb->offset_vector[i], f);
325 fprintf(f, "\n");
326 }
327 }
328 }
329
330 /* Re-print the subject in canonical form, the first time or if giving full
331 datails. On subsequent calls in the same match, we use pchars just to find the
332 printed lengths of the substrings. */
333
334 if (f != NULL) fprintf(f, "--->");
335
336 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
337 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
338 cb->current_position - cb->start_match, f);
339
340 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
341
342 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
343 cb->subject_length - cb->current_position, f);
344
345 if (f != NULL) fprintf(f, "\n");
346
347 /* Always print appropriate indicators, with callout number if not already
348 shown. For automatic callouts, show the pattern offset. */
349
350 if (cb->callout_number == 255)
351 {
352 fprintf(outfile, "%+3d ", cb->pattern_position);
353 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
354 }
355 else
356 {
357 if (callout_extra) fprintf(outfile, " ");
358 else fprintf(outfile, "%3d ", cb->callout_number);
359 }
360
361 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
362 fprintf(outfile, "^");
363
364 if (post_start > 0)
365 {
366 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
367 fprintf(outfile, "^");
368 }
369
370 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
371 fprintf(outfile, " ");
372
373 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
374 pbuffer + cb->pattern_position);
375
376 fprintf(outfile, "\n");
377 first_callout = 0;
378
379 if (cb->callout_data != NULL)
380 {
381 int callout_data = *((int *)(cb->callout_data));
382 if (callout_data != 0)
383 {
384 fprintf(outfile, "Callout data = %d\n", callout_data);
385 return callout_data;
386 }
387 }
388
389 return (cb->callout_number != callout_fail_id)? 0 :
390 (++callout_count >= callout_fail_count)? 1 : 0;
391 }
392
393
394 /*************************************************
395 * Local malloc functions *
396 *************************************************/
397
398 /* Alternative malloc function, to test functionality and show the size of the
399 compiled re. */
400
401 static void *new_malloc(size_t size)
402 {
403 void *block = malloc(size);
404 gotten_store = size;
405 if (show_malloc)
406 fprintf(outfile, "malloc %3d %p\n", size, block);
407 return block;
408 }
409
410 static void new_free(void *block)
411 {
412 if (show_malloc)
413 fprintf(outfile, "free %p\n", block);
414 free(block);
415 }
416
417
418 /* For recursion malloc/free, to test stacking calls */
419
420 static void *stack_malloc(size_t size)
421 {
422 void *block = malloc(size);
423 if (show_malloc)
424 fprintf(outfile, "stack_malloc %3d %p\n", size, block);
425 return block;
426 }
427
428 static void stack_free(void *block)
429 {
430 if (show_malloc)
431 fprintf(outfile, "stack_free %p\n", block);
432 free(block);
433 }
434
435
436 /*************************************************
437 * Call pcre_fullinfo() *
438 *************************************************/
439
440 /* Get one piece of information from the pcre_fullinfo() function */
441
442 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
443 {
444 int rc;
445 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
446 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
447 }
448
449
450
451 /*************************************************
452 * Byte flipping function *
453 *************************************************/
454
455 static long int
456 byteflip(long int value, int n)
457 {
458 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
459 return ((value & 0x000000ff) << 24) |
460 ((value & 0x0000ff00) << 8) |
461 ((value & 0x00ff0000) >> 8) |
462 ((value & 0xff000000) >> 24);
463 }
464
465
466
467
468 /*************************************************
469 * Main Program *
470 *************************************************/
471
472 /* Read lines from named file or stdin and write to named file or stdout; lines
473 consist of a regular expression, in delimiters and optionally followed by
474 options, followed by a set of test data, terminated by an empty line. */
475
476 int main(int argc, char **argv)
477 {
478 FILE *infile = stdin;
479 int options = 0;
480 int study_options = 0;
481 int op = 1;
482 int timeit = 0;
483 int showinfo = 0;
484 int showstore = 0;
485 int size_offsets = 45;
486 int size_offsets_max;
487 int *offsets;
488 #if !defined NOPOSIX
489 int posix = 0;
490 #endif
491 int debug = 0;
492 int done = 0;
493
494 unsigned char *buffer;
495 unsigned char *dbuffer;
496
497 /* Get buffers from malloc() so that Electric Fence will check their misuse
498 when I am debugging. */
499
500 buffer = (unsigned char *)malloc(BUFFER_SIZE);
501 dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
502 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
503
504 /* The outfile variable is static so that new_malloc can use it. The _setmode()
505 stuff is some magic that I don't understand, but which apparently does good
506 things in Windows. It's related to line terminations. */
507
508 #if defined(_WIN32) || defined(WIN32)
509 _setmode( _fileno( stdout ), 0x8000 );
510 #endif /* defined(_WIN32) || defined(WIN32) */
511
512 outfile = stdout;
513
514 /* Scan options */
515
516 while (argc > 1 && argv[op][0] == '-')
517 {
518 unsigned char *endptr;
519
520 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
521 showstore = 1;
522 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
523 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
524 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
525 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
526 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
527 *endptr == 0))
528 {
529 op++;
530 argc--;
531 }
532 #if !defined NOPOSIX
533 else if (strcmp(argv[op], "-p") == 0) posix = 1;
534 #endif
535 else if (strcmp(argv[op], "-C") == 0)
536 {
537 int rc;
538 printf("PCRE version %s\n", pcre_version());
539 printf("Compiled with\n");
540 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
541 printf(" %sUTF-8 support\n", rc? "" : "No ");
542 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
543 printf(" %sUnicode properties support\n", rc? "" : "No ");
544 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
545 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
546 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
547 printf(" Internal link size = %d\n", rc);
548 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
549 printf(" POSIX malloc threshold = %d\n", rc);
550 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
551 printf(" Default match limit = %d\n", rc);
552 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
553 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
554 exit(0);
555 }
556 else
557 {
558 printf("** Unknown or malformed option %s\n", argv[op]);
559 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
560 printf(" -C show PCRE compile-time options and exit\n");
561 printf(" -d debug: show compiled code; implies -i\n"
562 " -i show information about compiled pattern\n"
563 " -m output memory used information\n"
564 " -o <n> set size of offsets vector to <n>\n");
565 #if !defined NOPOSIX
566 printf(" -p use POSIX interface\n");
567 #endif
568 printf(" -s output store (memory) used information\n"
569 " -t time compilation and execution\n");
570 return 1;
571 }
572 op++;
573 argc--;
574 }
575
576 /* Get the store for the offsets vector, and remember what it was */
577
578 size_offsets_max = size_offsets;
579 offsets = (int *)malloc(size_offsets_max * sizeof(int));
580 if (offsets == NULL)
581 {
582 printf("** Failed to get %d bytes of memory for offsets vector\n",
583 size_offsets_max * sizeof(int));
584 return 1;
585 }
586
587 /* Sort out the input and output files */
588
589 if (argc > 1)
590 {
591 infile = fopen(argv[op], "rb");
592 if (infile == NULL)
593 {
594 printf("** Failed to open %s\n", argv[op]);
595 return 1;
596 }
597 }
598
599 if (argc > 2)
600 {
601 outfile = fopen(argv[op+1], "wb");
602 if (outfile == NULL)
603 {
604 printf("** Failed to open %s\n", argv[op+1]);
605 return 1;
606 }
607 }
608
609 /* Set alternative malloc function */
610
611 pcre_malloc = new_malloc;
612 pcre_free = new_free;
613 pcre_stack_malloc = stack_malloc;
614 pcre_stack_free = stack_free;
615
616 /* Heading line, then prompt for first regex if stdin */
617
618 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
619
620 /* Main loop */
621
622 while (!done)
623 {
624 pcre *re = NULL;
625 pcre_extra *extra = NULL;
626
627 #if !defined NOPOSIX /* There are still compilers that require no indent */
628 regex_t preg;
629 int do_posix = 0;
630 #endif
631
632 const char *error;
633 unsigned char *p, *pp, *ppp;
634 unsigned char *to_file = NULL;
635 const unsigned char *tables = NULL;
636 unsigned long int true_size, true_study_size = 0;
637 size_t size, regex_gotten_store;
638 int do_study = 0;
639 int do_debug = debug;
640 int do_G = 0;
641 int do_g = 0;
642 int do_showinfo = showinfo;
643 int do_showrest = 0;
644 int do_flip = 0;
645 int erroroffset, len, delimiter;
646
647 use_utf8 = 0;
648
649 if (infile == stdin) printf(" re> ");
650 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
651 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
652 fflush(outfile);
653
654 p = buffer;
655 while (isspace(*p)) p++;
656 if (*p == 0) continue;
657
658 /* See if the pattern is to be loaded pre-compiled from a file. */
659
660 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
661 {
662 unsigned long int magic;
663 uschar sbuf[8];
664 FILE *f;
665
666 p++;
667 pp = p + (int)strlen((char *)p);
668 while (isspace(pp[-1])) pp--;
669 *pp = 0;
670
671 f = fopen((char *)p, "rb");
672 if (f == NULL)
673 {
674 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
675 continue;
676 }
677
678 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
679
680 true_size =
681 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
682 true_study_size =
683 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
684
685 re = (real_pcre *)new_malloc(true_size);
686 regex_gotten_store = gotten_store;
687
688 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
689
690 magic = ((real_pcre *)re)->magic_number;
691 if (magic != MAGIC_NUMBER)
692 {
693 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
694 {
695 do_flip = 1;
696 }
697 else
698 {
699 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
700 fclose(f);
701 continue;
702 }
703 }
704
705 fprintf(outfile, "Compiled regex%s loaded from %s\n",
706 do_flip? " (byte-inverted)" : "", p);
707
708 /* Need to know if UTF-8 for printing data strings */
709
710 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
711 use_utf8 = (options & PCRE_UTF8) != 0;
712
713 /* Now see if there is any following study data */
714
715 if (true_study_size != 0)
716 {
717 pcre_study_data *psd;
718
719 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
720 extra->flags = PCRE_EXTRA_STUDY_DATA;
721
722 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
723 extra->study_data = psd;
724
725 if (fread(psd, 1, true_study_size, f) != true_study_size)
726 {
727 FAIL_READ:
728 fprintf(outfile, "Failed to read data from %s\n", p);
729 if (extra != NULL) new_free(extra);
730 if (re != NULL) new_free(re);
731 fclose(f);
732 continue;
733 }
734 fprintf(outfile, "Study data loaded from %s\n", p);
735 do_study = 1; /* To get the data output if requested */
736 }
737 else fprintf(outfile, "No study data\n");
738
739 fclose(f);
740 goto SHOW_INFO;
741 }
742
743 /* In-line pattern (the usual case). Get the delimiter and seek the end of
744 the pattern; if is isn't complete, read more. */
745
746 delimiter = *p++;
747
748 if (isalnum(delimiter) || delimiter == '\\')
749 {
750 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
751 goto SKIP_DATA;
752 }
753
754 pp = p;
755
756 for(;;)
757 {
758 while (*pp != 0)
759 {
760 if (*pp == '\\' && pp[1] != 0) pp++;
761 else if (*pp == delimiter) break;
762 pp++;
763 }
764 if (*pp != 0) break;
765
766 len = BUFFER_SIZE - (pp - buffer);
767 if (len < 256)
768 {
769 fprintf(outfile, "** Expression too long - missing delimiter?\n");
770 goto SKIP_DATA;
771 }
772
773 if (infile == stdin) printf(" > ");
774 if (fgets((char *)pp, len, infile) == NULL)
775 {
776 fprintf(outfile, "** Unexpected EOF\n");
777 done = 1;
778 goto CONTINUE;
779 }
780 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
781 }
782
783 /* If the first character after the delimiter is backslash, make
784 the pattern end with backslash. This is purely to provide a way
785 of testing for the error message when a pattern ends with backslash. */
786
787 if (pp[1] == '\\') *pp++ = '\\';
788
789 /* Terminate the pattern at the delimiter, and save a copy of the pattern
790 for callouts. */
791
792 *pp++ = 0;
793 strcpy((char *)pbuffer, (char *)p);
794
795 /* Look for options after final delimiter */
796
797 options = 0;
798 study_options = 0;
799 log_store = showstore; /* default from command line */
800
801 while (*pp != 0)
802 {
803 switch (*pp++)
804 {
805 case 'g': do_g = 1; break;
806 case 'i': options |= PCRE_CASELESS; break;
807 case 'm': options |= PCRE_MULTILINE; break;
808 case 's': options |= PCRE_DOTALL; break;
809 case 'x': options |= PCRE_EXTENDED; break;
810
811 case '+': do_showrest = 1; break;
812 case 'A': options |= PCRE_ANCHORED; break;
813 case 'C': options |= PCRE_AUTO_CALLOUT; break;
814 case 'D': do_debug = do_showinfo = 1; break;
815 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
816 case 'F': do_flip = 1; break;
817 case 'G': do_G = 1; break;
818 case 'I': do_showinfo = 1; break;
819 case 'M': log_store = 1; break;
820 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
821
822 #if !defined NOPOSIX
823 case 'P': do_posix = 1; break;
824 #endif
825
826 case 'S': do_study = 1; break;
827 case 'U': options |= PCRE_UNGREEDY; break;
828 case 'X': options |= PCRE_EXTRA; break;
829 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
830 case '?': options |= PCRE_NO_UTF8_CHECK; break;
831
832 case 'L':
833 ppp = pp;
834 while (*ppp != '\n' && *ppp != ' ') ppp++;
835 *ppp = 0;
836 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
837 {
838 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
839 goto SKIP_DATA;
840 }
841 tables = pcre_maketables();
842 pp = ppp;
843 break;
844
845 case '>':
846 to_file = pp;
847 while (*pp != 0) pp++;
848 while (isspace(pp[-1])) pp--;
849 *pp = 0;
850 break;
851
852 case '\n': case ' ': break;
853
854 default:
855 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
856 goto SKIP_DATA;
857 }
858 }
859
860 /* Handle compiling via the POSIX interface, which doesn't support the
861 timing, showing, or debugging options, nor the ability to pass over
862 local character tables. */
863
864 #if !defined NOPOSIX
865 if (posix || do_posix)
866 {
867 int rc;
868 int cflags = 0;
869
870 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
871 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
872 rc = regcomp(&preg, (char *)p, cflags);
873
874 /* Compilation failed; go back for another re, skipping to blank line
875 if non-interactive. */
876
877 if (rc != 0)
878 {
879 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
880 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
881 goto SKIP_DATA;
882 }
883 }
884
885 /* Handle compiling via the native interface */
886
887 else
888 #endif /* !defined NOPOSIX */
889
890 {
891 if (timeit)
892 {
893 register int i;
894 clock_t time_taken;
895 clock_t start_time = clock();
896 for (i = 0; i < LOOPREPEAT; i++)
897 {
898 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
899 if (re != NULL) free(re);
900 }
901 time_taken = clock() - start_time;
902 fprintf(outfile, "Compile time %.3f milliseconds\n",
903 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
904 (double)CLOCKS_PER_SEC);
905 }
906
907 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
908
909 /* Compilation failed; go back for another re, skipping to blank line
910 if non-interactive. */
911
912 if (re == NULL)
913 {
914 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
915 SKIP_DATA:
916 if (infile != stdin)
917 {
918 for (;;)
919 {
920 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
921 {
922 done = 1;
923 goto CONTINUE;
924 }
925 len = (int)strlen((char *)buffer);
926 while (len > 0 && isspace(buffer[len-1])) len--;
927 if (len == 0) break;
928 }
929 fprintf(outfile, "\n");
930 }
931 goto CONTINUE;
932 }
933
934 /* Compilation succeeded; print data if required. There are now two
935 info-returning functions. The old one has a limited interface and
936 returns only limited data. Check that it agrees with the newer one. */
937
938 if (log_store)
939 fprintf(outfile, "Memory allocation (code space): %d\n",
940 (int)(gotten_store -
941 sizeof(real_pcre) -
942 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
943
944 /* Extract the size for possible writing before possibly flipping it,
945 and remember the store that was got. */
946
947 true_size = ((real_pcre *)re)->size;
948 regex_gotten_store = gotten_store;
949
950 /* If /S was present, study the regexp to generate additional info to
951 help with the matching. */
952
953 if (do_study)
954 {
955 if (timeit)
956 {
957 register int i;
958 clock_t time_taken;
959 clock_t start_time = clock();
960 for (i = 0; i < LOOPREPEAT; i++)
961 extra = pcre_study(re, study_options, &error);
962 time_taken = clock() - start_time;
963 if (extra != NULL) free(extra);
964 fprintf(outfile, " Study time %.3f milliseconds\n",
965 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
966 (double)CLOCKS_PER_SEC);
967 }
968 extra = pcre_study(re, study_options, &error);
969 if (error != NULL)
970 fprintf(outfile, "Failed to study: %s\n", error);
971 else if (extra != NULL)
972 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
973 }
974
975 /* If the 'F' option was present, we flip the bytes of all the integer
976 fields in the regex data block and the study block. This is to make it
977 possible to test PCRE's handling of byte-flipped patterns, e.g. those
978 compiled on a different architecture. */
979
980 if (do_flip)
981 {
982 real_pcre *rre = (real_pcre *)re;
983 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
984 rre->size = byteflip(rre->size, sizeof(rre->size));
985 rre->options = byteflip(rre->options, sizeof(rre->options));
986 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
987 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
988 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
989 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
990 rre->name_table_offset = byteflip(rre->name_table_offset,
991 sizeof(rre->name_table_offset));
992 rre->name_entry_size = byteflip(rre->name_entry_size,
993 sizeof(rre->name_entry_size));
994 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
995
996 if (extra != NULL)
997 {
998 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
999 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1000 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1001 }
1002 }
1003
1004 /* Extract information from the compiled data if required */
1005
1006 SHOW_INFO:
1007
1008 if (do_showinfo)
1009 {
1010 unsigned long int get_options, all_options;
1011 int old_first_char, old_options, old_count;
1012 int count, backrefmax, first_char, need_char;
1013 int nameentrysize, namecount;
1014 const uschar *nametable;
1015
1016 if (do_debug)
1017 {
1018 fprintf(outfile, "------------------------------------------------------------------\n");
1019 print_internals(re, outfile);
1020 }
1021
1022 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1023 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1024 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1025 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1026 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1027 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1028 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1029 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1030 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1031
1032 old_count = pcre_info(re, &old_options, &old_first_char);
1033 if (count < 0) fprintf(outfile,
1034 "Error %d from pcre_info()\n", count);
1035 else
1036 {
1037 if (old_count != count) fprintf(outfile,
1038 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1039 old_count);
1040
1041 if (old_first_char != first_char) fprintf(outfile,
1042 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1043 first_char, old_first_char);
1044
1045 if (old_options != (int)get_options) fprintf(outfile,
1046 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1047 get_options, old_options);
1048 }
1049
1050 if (size != regex_gotten_store) fprintf(outfile,
1051 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1052 size, regex_gotten_store);
1053
1054 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1055 if (backrefmax > 0)
1056 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1057
1058 if (namecount > 0)
1059 {
1060 fprintf(outfile, "Named capturing subpatterns:\n");
1061 while (namecount-- > 0)
1062 {
1063 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1064 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1065 GET2(nametable, 0));
1066 nametable += nameentrysize;
1067 }
1068 }
1069
1070 /* The NOPARTIAL bit is a private bit in the options, so we have
1071 to fish it out via out back door */
1072
1073 all_options = ((real_pcre *)re)->options;
1074 if (do_flip)
1075 {
1076 all_options = byteflip(all_options, sizeof(all_options));
1077 }
1078
1079 if ((all_options & PCRE_NOPARTIAL) != 0)
1080 fprintf(outfile, "Partial matching not supported\n");
1081
1082 if (get_options == 0) fprintf(outfile, "No options\n");
1083 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
1084 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1085 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1086 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1087 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1088 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1089 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1090 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1091 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1092 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1093 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1094
1095 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1096 fprintf(outfile, "Case state changes\n");
1097
1098 if (first_char == -1)
1099 {
1100 fprintf(outfile, "First char at start or follows \\n\n");
1101 }
1102 else if (first_char < 0)
1103 {
1104 fprintf(outfile, "No first char\n");
1105 }
1106 else
1107 {
1108 int ch = first_char & 255;
1109 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1110 "" : " (caseless)";
1111 if (isprint(ch))
1112 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1113 else
1114 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1115 }
1116
1117 if (need_char < 0)
1118 {
1119 fprintf(outfile, "No need char\n");
1120 }
1121 else
1122 {
1123 int ch = need_char & 255;
1124 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1125 "" : " (caseless)";
1126 if (isprint(ch))
1127 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1128 else
1129 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1130 }
1131
1132 /* Don't output study size; at present it is in any case a fixed
1133 value, but it varies, depending on the computer architecture, and
1134 so messes up the test suite. (And with the /F option, it might be
1135 flipped.) */
1136
1137 if (do_study)
1138 {
1139 if (extra == NULL)
1140 fprintf(outfile, "Study returned NULL\n");
1141 else
1142 {
1143 uschar *start_bits = NULL;
1144 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1145
1146 if (start_bits == NULL)
1147 fprintf(outfile, "No starting byte set\n");
1148 else
1149 {
1150 int i;
1151 int c = 24;
1152 fprintf(outfile, "Starting byte set: ");
1153 for (i = 0; i < 256; i++)
1154 {
1155 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1156 {
1157 if (c > 75)
1158 {
1159 fprintf(outfile, "\n ");
1160 c = 2;
1161 }
1162 if (isprint(i) && i != ' ')
1163 {
1164 fprintf(outfile, "%c ", i);
1165 c += 2;
1166 }
1167 else
1168 {
1169 fprintf(outfile, "\\x%02x ", i);
1170 c += 5;
1171 }
1172 }
1173 }
1174 fprintf(outfile, "\n");
1175 }
1176 }
1177 }
1178 }
1179
1180 /* If the '>' option was present, we write out the regex to a file, and
1181 that is all. The first 8 bytes of the file are the regex length and then
1182 the study length, in big-endian order. */
1183
1184 if (to_file != NULL)
1185 {
1186 FILE *f = fopen((char *)to_file, "wb");
1187 if (f == NULL)
1188 {
1189 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1190 }
1191 else
1192 {
1193 uschar sbuf[8];
1194 sbuf[0] = (true_size >> 24) & 255;
1195 sbuf[1] = (true_size >> 16) & 255;
1196 sbuf[2] = (true_size >> 8) & 255;
1197 sbuf[3] = (true_size) & 255;
1198
1199 sbuf[4] = (true_study_size >> 24) & 255;
1200 sbuf[5] = (true_study_size >> 16) & 255;
1201 sbuf[6] = (true_study_size >> 8) & 255;
1202 sbuf[7] = (true_study_size) & 255;
1203
1204 if (fwrite(sbuf, 1, 8, f) < 8 ||
1205 fwrite(re, 1, true_size, f) < true_size)
1206 {
1207 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1208 }
1209 else
1210 {
1211 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1212 if (extra != NULL)
1213 {
1214 if (fwrite(extra->study_data, 1, true_study_size, f) <
1215 true_study_size)
1216 {
1217 fprintf(outfile, "Write error on %s: %s\n", to_file,
1218 strerror(errno));
1219 }
1220 else fprintf(outfile, "Study data written to %s\n", to_file);
1221 }
1222 }
1223 fclose(f);
1224 }
1225 continue; /* With next regex */
1226 }
1227 } /* End of non-POSIX compile */
1228
1229 /* Read data lines and test them */
1230
1231 for (;;)
1232 {
1233 unsigned char *q;
1234 unsigned char *bptr = dbuffer;
1235 int *use_offsets = offsets;
1236 int use_size_offsets = size_offsets;
1237 int callout_data = 0;
1238 int callout_data_set = 0;
1239 int count, c;
1240 int copystrings = 0;
1241 int find_match_limit = 0;
1242 int getstrings = 0;
1243 int getlist = 0;
1244 int gmatched = 0;
1245 int start_offset = 0;
1246 int g_notempty = 0;
1247
1248 options = 0;
1249
1250 pcre_callout = callout;
1251 first_callout = 1;
1252 callout_extra = 0;
1253 callout_count = 0;
1254 callout_fail_count = 999999;
1255 callout_fail_id = -1;
1256 show_malloc = 0;
1257
1258 if (infile == stdin) printf("data> ");
1259 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1260 {
1261 done = 1;
1262 goto CONTINUE;
1263 }
1264 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1265
1266 len = (int)strlen((char *)buffer);
1267 while (len > 0 && isspace(buffer[len-1])) len--;
1268 buffer[len] = 0;
1269 if (len == 0) break;
1270
1271 p = buffer;
1272 while (isspace(*p)) p++;
1273
1274 q = dbuffer;
1275 while ((c = *p++) != 0)
1276 {
1277 int i = 0;
1278 int n = 0;
1279
1280 if (c == '\\') switch ((c = *p++))
1281 {
1282 case 'a': c = 7; break;
1283 case 'b': c = '\b'; break;
1284 case 'e': c = 27; break;
1285 case 'f': c = '\f'; break;
1286 case 'n': c = '\n'; break;
1287 case 'r': c = '\r'; break;
1288 case 't': c = '\t'; break;
1289 case 'v': c = '\v'; break;
1290
1291 case '0': case '1': case '2': case '3':
1292 case '4': case '5': case '6': case '7':
1293 c -= '0';
1294 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1295 c = c * 8 + *p++ - '0';
1296 break;
1297
1298 case 'x':
1299
1300 /* Handle \x{..} specially - new Perl thing for utf8 */
1301
1302 if (*p == '{')
1303 {
1304 unsigned char *pt = p;
1305 c = 0;
1306 while (isxdigit(*(++pt)))
1307 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1308 if (*pt == '}')
1309 {
1310 unsigned char buff8[8];
1311 int ii, utn;
1312 utn = ord2utf8(c, buff8);
1313 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1314 c = buff8[ii]; /* Last byte */
1315 p = pt + 1;
1316 break;
1317 }
1318 /* Not correct form; fall through */
1319 }
1320
1321 /* Ordinary \x */
1322
1323 c = 0;
1324 while (i++ < 2 && isxdigit(*p))
1325 {
1326 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1327 p++;
1328 }
1329 break;
1330
1331 case 0: /* \ followed by EOF allows for an empty line */
1332 p--;
1333 continue;
1334
1335 case '>':
1336 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1337 continue;
1338
1339 case 'A': /* Option setting */
1340 options |= PCRE_ANCHORED;
1341 continue;
1342
1343 case 'B':
1344 options |= PCRE_NOTBOL;
1345 continue;
1346
1347 case 'C':
1348 if (isdigit(*p)) /* Set copy string */
1349 {
1350 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1351 copystrings |= 1 << n;
1352 }
1353 else if (isalnum(*p))
1354 {
1355 uschar name[256];
1356 uschar *npp = name;
1357 while (isalnum(*p)) *npp++ = *p++;
1358 *npp = 0;
1359 n = pcre_get_stringnumber(re, (char *)name);
1360 if (n < 0)
1361 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1362 else copystrings |= 1 << n;
1363 }
1364 else if (*p == '+')
1365 {
1366 callout_extra = 1;
1367 p++;
1368 }
1369 else if (*p == '-')
1370 {
1371 pcre_callout = NULL;
1372 p++;
1373 }
1374 else if (*p == '!')
1375 {
1376 callout_fail_id = 0;
1377 p++;
1378 while(isdigit(*p))
1379 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1380 callout_fail_count = 0;
1381 if (*p == '!')
1382 {
1383 p++;
1384 while(isdigit(*p))
1385 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1386 }
1387 }
1388 else if (*p == '*')
1389 {
1390 int sign = 1;
1391 callout_data = 0;
1392 if (*(++p) == '-') { sign = -1; p++; }
1393 while(isdigit(*p))
1394 callout_data = callout_data * 10 + *p++ - '0';
1395 callout_data *= sign;
1396 callout_data_set = 1;
1397 }
1398 continue;
1399
1400 case 'G':
1401 if (isdigit(*p))
1402 {
1403 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404 getstrings |= 1 << n;
1405 }
1406 else if (isalnum(*p))
1407 {
1408 uschar name[256];
1409 uschar *npp = name;
1410 while (isalnum(*p)) *npp++ = *p++;
1411 *npp = 0;
1412 n = pcre_get_stringnumber(re, (char *)name);
1413 if (n < 0)
1414 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1415 else getstrings |= 1 << n;
1416 }
1417 continue;
1418
1419 case 'L':
1420 getlist = 1;
1421 continue;
1422
1423 case 'M':
1424 find_match_limit = 1;
1425 continue;
1426
1427 case 'N':
1428 options |= PCRE_NOTEMPTY;
1429 continue;
1430
1431 case 'O':
1432 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1433 if (n > size_offsets_max)
1434 {
1435 size_offsets_max = n;
1436 free(offsets);
1437 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1438 if (offsets == NULL)
1439 {
1440 printf("** Failed to get %d bytes of memory for offsets vector\n",
1441 size_offsets_max * sizeof(int));
1442 return 1;
1443 }
1444 }
1445 use_size_offsets = n;
1446 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1447 continue;
1448
1449 case 'P':
1450 options |= PCRE_PARTIAL;
1451 continue;
1452
1453 case 'S':
1454 show_malloc = 1;
1455 continue;
1456
1457 case 'Z':
1458 options |= PCRE_NOTEOL;
1459 continue;
1460
1461 case '?':
1462 options |= PCRE_NO_UTF8_CHECK;
1463 continue;
1464 }
1465 *q++ = c;
1466 }
1467 *q = 0;
1468 len = q - dbuffer;
1469
1470 /* Handle matching via the POSIX interface, which does not
1471 support timing or playing with the match limit or callout data. */
1472
1473 #if !defined NOPOSIX
1474 if (posix || do_posix)
1475 {
1476 int rc;
1477 int eflags = 0;
1478 regmatch_t *pmatch = NULL;
1479 if (use_size_offsets > 0)
1480 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1481 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1482 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1483
1484 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1485
1486 if (rc != 0)
1487 {
1488 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1489 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1490 }
1491 else
1492 {
1493 size_t i;
1494 for (i = 0; i < (size_t)use_size_offsets; i++)
1495 {
1496 if (pmatch[i].rm_so >= 0)
1497 {
1498 fprintf(outfile, "%2d: ", (int)i);
1499 (void)pchars(dbuffer + pmatch[i].rm_so,
1500 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1501 fprintf(outfile, "\n");
1502 if (i == 0 && do_showrest)
1503 {
1504 fprintf(outfile, " 0+ ");
1505 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1506 outfile);
1507 fprintf(outfile, "\n");
1508 }
1509 }
1510 }
1511 }
1512 free(pmatch);
1513 }
1514
1515 /* Handle matching via the native interface - repeats for /g and /G */
1516
1517 else
1518 #endif /* !defined NOPOSIX */
1519
1520 for (;; gmatched++) /* Loop for /g or /G */
1521 {
1522 if (timeit)
1523 {
1524 register int i;
1525 clock_t time_taken;
1526 clock_t start_time = clock();
1527 for (i = 0; i < LOOPREPEAT; i++)
1528 count = pcre_exec(re, extra, (char *)bptr, len,
1529 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1530 time_taken = clock() - start_time;
1531 fprintf(outfile, "Execute time %.3f milliseconds\n",
1532 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1533 (double)CLOCKS_PER_SEC);
1534 }
1535
1536 /* If find_match_limit is set, we want to do repeated matches with
1537 varying limits in order to find the minimum value. */
1538
1539 if (find_match_limit)
1540 {
1541 int min = 0;
1542 int mid = 64;
1543 int max = -1;
1544
1545 if (extra == NULL)
1546 {
1547 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1548 extra->flags = 0;
1549 }
1550 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1551
1552 for (;;)
1553 {
1554 extra->match_limit = mid;
1555 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1556 options | g_notempty, use_offsets, use_size_offsets);
1557 if (count == PCRE_ERROR_MATCHLIMIT)
1558 {
1559 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1560 min = mid;
1561 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1562 }
1563 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1564 count == PCRE_ERROR_PARTIAL)
1565 {
1566 if (mid == min + 1)
1567 {
1568 fprintf(outfile, "Minimum match limit = %d\n", mid);
1569 break;
1570 }
1571 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1572 max = mid;
1573 mid = (min + mid)/2;
1574 }
1575 else break; /* Some other error */
1576 }
1577
1578 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1579 }
1580
1581 /* If callout_data is set, use the interface with additional data */
1582
1583 else if (callout_data_set)
1584 {
1585 if (extra == NULL)
1586 {
1587 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1588 extra->flags = 0;
1589 }
1590 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1591 extra->callout_data = &callout_data;
1592 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1593 options | g_notempty, use_offsets, use_size_offsets);
1594 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1595 }
1596
1597 /* The normal case is just to do the match once, with the default
1598 value of match_limit. */
1599
1600 else
1601 {
1602 count = pcre_exec(re, extra, (char *)bptr, len,
1603 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1604 }
1605
1606 if (count == 0)
1607 {
1608 fprintf(outfile, "Matched, but too many substrings\n");
1609 count = use_size_offsets/3;
1610 }
1611
1612 /* Matched */
1613
1614 if (count >= 0)
1615 {
1616 int i;
1617 for (i = 0; i < count * 2; i += 2)
1618 {
1619 if (use_offsets[i] < 0)
1620 fprintf(outfile, "%2d: <unset>\n", i/2);
1621 else
1622 {
1623 fprintf(outfile, "%2d: ", i/2);
1624 (void)pchars(bptr + use_offsets[i],
1625 use_offsets[i+1] - use_offsets[i], outfile);
1626 fprintf(outfile, "\n");
1627 if (i == 0)
1628 {
1629 if (do_showrest)
1630 {
1631 fprintf(outfile, " 0+ ");
1632 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1633 outfile);
1634 fprintf(outfile, "\n");
1635 }
1636 }
1637 }
1638 }
1639
1640 for (i = 0; i < 32; i++)
1641 {
1642 if ((copystrings & (1 << i)) != 0)
1643 {
1644 char copybuffer[16];
1645 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1646 i, copybuffer, sizeof(copybuffer));
1647 if (rc < 0)
1648 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1649 else
1650 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1651 }
1652 }
1653
1654 for (i = 0; i < 32; i++)
1655 {
1656 if ((getstrings & (1 << i)) != 0)
1657 {
1658 const char *substring;
1659 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1660 i, &substring);
1661 if (rc < 0)
1662 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1663 else
1664 {
1665 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1666 /* free((void *)substring); */
1667 pcre_free_substring(substring);
1668 }
1669 }
1670 }
1671
1672 if (getlist)
1673 {
1674 const char **stringlist;
1675 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1676 &stringlist);
1677 if (rc < 0)
1678 fprintf(outfile, "get substring list failed %d\n", rc);
1679 else
1680 {
1681 for (i = 0; i < count; i++)
1682 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1683 if (stringlist[i] != NULL)
1684 fprintf(outfile, "string list not terminated by NULL\n");
1685 /* free((void *)stringlist); */
1686 pcre_free_substring_list(stringlist);
1687 }
1688 }
1689 }
1690
1691 /* There was a partial match */
1692
1693 else if (count == PCRE_ERROR_PARTIAL)
1694 {
1695 fprintf(outfile, "Partial match\n");
1696 break; /* Out of the /g loop */
1697 }
1698
1699 /* Failed to match. If this is a /g or /G loop and we previously set
1700 g_notempty after a null match, this is not necessarily the end.
1701 We want to advance the start offset, and continue. In the case of UTF-8
1702 matching, the advance must be one character, not one byte. Fudge the
1703 offset values to achieve this. We won't be at the end of the string -
1704 that was checked before setting g_notempty. */
1705
1706 else
1707 {
1708 if (g_notempty != 0)
1709 {
1710 int onechar = 1;
1711 use_offsets[0] = start_offset;
1712 if (use_utf8)
1713 {
1714 while (start_offset + onechar < len)
1715 {
1716 int tb = bptr[start_offset+onechar];
1717 if (tb <= 127) break;
1718 tb &= 0xc0;
1719 if (tb != 0 && tb != 0xc0) onechar++;
1720 }
1721 }
1722 use_offsets[1] = start_offset + onechar;
1723 }
1724 else
1725 {
1726 if (count == PCRE_ERROR_NOMATCH)
1727 {
1728 if (gmatched == 0) fprintf(outfile, "No match\n");
1729 }
1730 else fprintf(outfile, "Error %d\n", count);
1731 break; /* Out of the /g loop */
1732 }
1733 }
1734
1735 /* If not /g or /G we are done */
1736
1737 if (!do_g && !do_G) break;
1738
1739 /* If we have matched an empty string, first check to see if we are at
1740 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1741 what Perl's /g options does. This turns out to be rather cunning. First
1742 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1743 same point. If this fails (picked up above) we advance to the next
1744 character. */
1745
1746 g_notempty = 0;
1747 if (use_offsets[0] == use_offsets[1])
1748 {
1749 if (use_offsets[0] == len) break;
1750 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1751 }
1752
1753 /* For /g, update the start offset, leaving the rest alone */
1754
1755 if (do_g) start_offset = use_offsets[1];
1756
1757 /* For /G, update the pointer and length */
1758
1759 else
1760 {
1761 bptr += use_offsets[1];
1762 len -= use_offsets[1];
1763 }
1764 } /* End of loop for /g and /G */
1765 } /* End of loop for data lines */
1766
1767 CONTINUE:
1768
1769 #if !defined NOPOSIX
1770 if (posix || do_posix) regfree(&preg);
1771 #endif
1772
1773 if (re != NULL) free(re);
1774 if (extra != NULL) free(extra);
1775 if (tables != NULL)
1776 {
1777 free((void *)tables);
1778 setlocale(LC_CTYPE, "C");
1779 }
1780 }
1781
1782 if (infile == stdin) fprintf(outfile, "\n");
1783 return 0;
1784 }
1785
1786 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12