/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 73 - (hide annotations) (download)
Sat Feb 24 21:40:30 2007 UTC (7 years, 9 months ago) by nigel
File MIME type: text/plain
File size: 41274 byte(s)
Load pcre-4.5 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7     been extended and consequently is now rather untidy in places. */
8    
9 nigel 3 #include <ctype.h>
10     #include <stdio.h>
11     #include <string.h>
12     #include <stdlib.h>
13     #include <time.h>
14 nigel 25 #include <locale.h>
15 nigel 3
16 nigel 63 /* We need the internal info for displaying the results of pcre_study(). Also
17     for getting the opcodes for showing compiled code. */
18 nigel 3
19 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
20 nigel 3 #include "internal.h"
21 nigel 37
22     /* It is possible to compile this test program without including support for
23     testing the POSIX interface, though this is not available via the standard
24     Makefile. */
25    
26     #if !defined NOPOSIX
27 nigel 3 #include "pcreposix.h"
28 nigel 37 #endif
29 nigel 3
30     #ifndef CLOCKS_PER_SEC
31     #ifdef CLK_TCK
32     #define CLOCKS_PER_SEC CLK_TCK
33     #else
34     #define CLOCKS_PER_SEC 100
35     #endif
36     #endif
37    
38 nigel 63 #define LOOPREPEAT 50000
39 nigel 3
40 nigel 69 #define BUFFER_SIZE 30000
41 nigel 73 #define DBUFFER_SIZE BUFFER_SIZE
42 nigel 23
43 nigel 69
44 nigel 3 static FILE *outfile;
45     static int log_store = 0;
46 nigel 63 static int callout_count;
47     static int callout_extra;
48     static int callout_fail_count;
49     static int callout_fail_id;
50     static int first_callout;
51 nigel 73 static int show_malloc;
52 nigel 67 static int use_utf8;
53 nigel 43 static size_t gotten_store;
54 nigel 3
55    
56 nigel 69 static const int utf8_table1[] = {
57 nigel 49 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59 nigel 69 static const int utf8_table2[] = {
60 nigel 49 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62 nigel 69 static const int utf8_table3[] = {
63 nigel 49 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64    
65    
66 nigel 63
67 nigel 49 /*************************************************
68 nigel 63 * Print compiled regex *
69     *************************************************/
70    
71     /* The code for doing this is held in a separate file that is also included in
72     pcre.c when it is compiled with the debug switch. It defines a function called
73     print_internals(), which uses a table of opcode lengths defined by the macro
74     OP_LENGTHS, whose name must be OP_lengths. */
75    
76     static uschar OP_lengths[] = { OP_LENGTHS };
77    
78     #include "printint.c"
79    
80    
81    
82     /*************************************************
83     * Read number from string *
84     *************************************************/
85    
86     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87     around with conditional compilation, just do the job by hand. It is only used
88     for unpicking the -o argument, so just keep it simple.
89    
90     Arguments:
91     str string to be converted
92     endptr where to put the end pointer
93    
94     Returns: the unsigned long
95     */
96    
97     static int
98     get_value(unsigned char *str, unsigned char **endptr)
99     {
100     int result = 0;
101     while(*str != 0 && isspace(*str)) str++;
102     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103     *endptr = str;
104     return(result);
105     }
106    
107    
108    
109     /*************************************************
110 nigel 49 * Convert character value to UTF-8 *
111     *************************************************/
112    
113     /* This function takes an integer value in the range 0 - 0x7fffffff
114     and encodes it as a UTF-8 character in 0 to 6 bytes.
115    
116     Arguments:
117     cvalue the character value
118     buffer pointer to buffer for result - at least 6 bytes long
119    
120     Returns: number of characters placed in the buffer
121     -1 if input character is negative
122     0 if input character is positive but too big (only when
123     int is longer than 32 bits)
124     */
125    
126     static int
127     ord2utf8(int cvalue, unsigned char *buffer)
128     {
129     register int i, j;
130     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131     if (cvalue <= utf8_table1[i]) break;
132     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133     if (cvalue < 0) return -1;
134 nigel 59
135     buffer += i;
136     for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141     *buffer = utf8_table2[i] | cvalue;
142 nigel 49 return i + 1;
143     }
144    
145    
146     /*************************************************
147     * Convert UTF-8 string to value *
148     *************************************************/
149    
150     /* This function takes one or more bytes that represents a UTF-8 character,
151     and returns the value of the character.
152    
153     Argument:
154     buffer a pointer to the byte vector
155     vptr a pointer to an int to receive the value
156    
157     Returns: > 0 => the number of bytes consumed
158     -6 to 0 => malformed UTF-8 character at offset = (-return)
159     */
160    
161 nigel 67 static int
162 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
163     {
164     int c = *buffer++;
165     int d = c;
166     int i, j, s;
167    
168     for (i = -1; i < 6; i++) /* i is number of additional bytes */
169     {
170     if ((d & 0x80) == 0) break;
171     d <<= 1;
172     }
173    
174     if (i == -1) { *vptr = c; return 1; } /* ascii character */
175     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
176    
177     /* i now has a value in the range 1-5 */
178    
179 nigel 59 s = 6*i;
180     d = (c & utf8_table3[i]) << s;
181 nigel 49
182     for (j = 0; j < i; j++)
183     {
184     c = *buffer++;
185     if ((c & 0xc0) != 0x80) return -(j+1);
186 nigel 59 s -= 6;
187 nigel 49 d |= (c & 0x3f) << s;
188     }
189    
190     /* Check that encoding was the correct unique one */
191    
192     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193     if (d <= utf8_table1[j]) break;
194     if (j != i) return -(i+1);
195    
196     /* Valid value */
197    
198     *vptr = d;
199     return i+1;
200     }
201    
202    
203    
204 nigel 63 /*************************************************
205     * Print character string *
206     *************************************************/
207 nigel 49
208 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
209     mode. Yields number of characters printed. If handed a NULL file, just counts
210     chars without printing. */
211 nigel 49
212 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
213 nigel 3 {
214 nigel 63 int c;
215     int yield = 0;
216 nigel 3
217 nigel 63 while (length-- > 0)
218 nigel 3 {
219 nigel 67 if (use_utf8)
220 nigel 63 {
221     int rc = utf82ord(p, &c);
222 nigel 3
223 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
224     {
225     length -= rc - 1;
226     p += rc;
227     if (c < 256 && isprint(c))
228     {
229     if (f != NULL) fprintf(f, "%c", c);
230     yield++;
231     }
232     else
233     {
234     int n;
235     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236     yield += n;
237     }
238     continue;
239     }
240     }
241 nigel 3
242 nigel 63 /* Not UTF-8, or malformed UTF-8 */
243    
244     if (isprint(c = *(p++)))
245 nigel 3 {
246 nigel 63 if (f != NULL) fprintf(f, "%c", c);
247     yield++;
248 nigel 3 }
249 nigel 63 else
250 nigel 3 {
251 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
252     yield += 4;
253     }
254     }
255 nigel 3
256 nigel 63 return yield;
257     }
258 nigel 23
259 nigel 3
260 nigel 23
261 nigel 63 /*************************************************
262     * Callout function *
263     *************************************************/
264 nigel 3
265 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
266     the match. Yield zero unless more callouts than the fail count, or the callout
267     data is not zero. */
268 nigel 3
269 nigel 63 static int callout(pcre_callout_block *cb)
270     {
271     FILE *f = (first_callout | callout_extra)? outfile : NULL;
272     int i, pre_start, post_start;
273 nigel 3
274 nigel 63 if (callout_extra)
275     {
276     fprintf(f, "Callout %d: last capture = %d\n",
277     cb->callout_number, cb->capture_last);
278 nigel 3
279 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
280     {
281     if (cb->offset_vector[i] < 0)
282     fprintf(f, "%2d: <unset>\n", i/2);
283     else
284     {
285     fprintf(f, "%2d: ", i/2);
286     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287     cb->offset_vector[i+1] - cb->offset_vector[i], f);
288     fprintf(f, "\n");
289     }
290     }
291     }
292 nigel 3
293 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
294     datails. On subsequent calls in the same match, we use pchars just to find the
295     printed lengths of the substrings. */
296 nigel 3
297 nigel 63 if (f != NULL) fprintf(f, "--->");
298 nigel 3
299 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301     cb->current_position - cb->start_match, f);
302 nigel 3
303 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304     cb->subject_length - cb->current_position, f);
305 nigel 3
306 nigel 63 if (f != NULL) fprintf(f, "\n");
307 nigel 9
308 nigel 63 /* Always print appropriate indicators, with callout number if not already
309     shown */
310 nigel 3
311 nigel 63 if (callout_extra) fprintf(outfile, " ");
312     else fprintf(outfile, "%3d ", cb->callout_number);
313 nigel 3
314 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315     fprintf(outfile, "^");
316 nigel 3
317 nigel 63 if (post_start > 0)
318     {
319     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320     fprintf(outfile, "^");
321 nigel 3 }
322    
323 nigel 63 fprintf(outfile, "\n");
324     first_callout = 0;
325 nigel 3
326 nigel 71 if (cb->callout_data != NULL)
327 nigel 49 {
328 nigel 71 int callout_data = *((int *)(cb->callout_data));
329     if (callout_data != 0)
330     {
331     fprintf(outfile, "Callout data = %d\n", callout_data);
332     return callout_data;
333     }
334 nigel 63 }
335 nigel 49
336 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
337     (++callout_count >= callout_fail_count)? 1 : 0;
338 nigel 3 }
339    
340    
341 nigel 63 /*************************************************
342 nigel 73 * Local malloc functions *
343 nigel 63 *************************************************/
344 nigel 3
345     /* Alternative malloc function, to test functionality and show the size of the
346     compiled re. */
347    
348     static void *new_malloc(size_t size)
349     {
350 nigel 73 void *block = malloc(size);
351 nigel 43 gotten_store = size;
352 nigel 73 if (show_malloc)
353     fprintf(outfile, "malloc %3d %p\n", size, block);
354     return block;
355 nigel 3 }
356    
357 nigel 73 static void new_free(void *block)
358     {
359     if (show_malloc)
360     fprintf(outfile, "free %p\n", block);
361     free(block);
362     }
363 nigel 3
364    
365 nigel 73 /* For recursion malloc/free, to test stacking calls */
366    
367     static void *stack_malloc(size_t size)
368     {
369     void *block = malloc(size);
370     if (show_malloc)
371     fprintf(outfile, "stack_malloc %3d %p\n", size, block);
372     return block;
373     }
374    
375     static void stack_free(void *block)
376     {
377     if (show_malloc)
378     fprintf(outfile, "stack_free %p\n", block);
379     free(block);
380     }
381    
382    
383 nigel 63 /*************************************************
384     * Call pcre_fullinfo() *
385     *************************************************/
386 nigel 43
387     /* Get one piece of information from the pcre_fullinfo() function */
388    
389     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
390     {
391     int rc;
392     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
393     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
394     }
395    
396    
397    
398 nigel 63 /*************************************************
399     * Main Program *
400     *************************************************/
401 nigel 43
402 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
403     consist of a regular expression, in delimiters and optionally followed by
404     options, followed by a set of test data, terminated by an empty line. */
405    
406     int main(int argc, char **argv)
407     {
408     FILE *infile = stdin;
409     int options = 0;
410     int study_options = 0;
411     int op = 1;
412     int timeit = 0;
413     int showinfo = 0;
414 nigel 31 int showstore = 0;
415 nigel 53 int size_offsets = 45;
416     int size_offsets_max;
417     int *offsets;
418     #if !defined NOPOSIX
419 nigel 3 int posix = 0;
420 nigel 53 #endif
421 nigel 3 int debug = 0;
422 nigel 11 int done = 0;
423 nigel 3
424 nigel 69 unsigned char *buffer;
425     unsigned char *dbuffer;
426    
427     /* Get buffers from malloc() so that Electric Fence will check their misuse
428     when I am debugging. */
429    
430 nigel 71 buffer = (unsigned char *)malloc(BUFFER_SIZE);
431     dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
432 nigel 69
433 nigel 3 /* Static so that new_malloc can use it. */
434    
435     outfile = stdout;
436    
437     /* Scan options */
438    
439     while (argc > 1 && argv[op][0] == '-')
440     {
441 nigel 63 unsigned char *endptr;
442 nigel 53
443 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
444     showstore = 1;
445 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
446     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
447     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
448 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
449 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
450     *endptr == 0))
451 nigel 53 {
452     op++;
453     argc--;
454     }
455     #if !defined NOPOSIX
456 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
457 nigel 53 #endif
458 nigel 63 else if (strcmp(argv[op], "-C") == 0)
459     {
460     int rc;
461     printf("PCRE version %s\n", pcre_version());
462     printf("Compiled with\n");
463     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
464     printf(" %sUTF-8 support\n", rc? "" : "No ");
465     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
466     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
467     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
468     printf(" Internal link size = %d\n", rc);
469     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
470     printf(" POSIX malloc threshold = %d\n", rc);
471     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
472     printf(" Default match limit = %d\n", rc);
473 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
474     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
475 nigel 63 exit(0);
476     }
477 nigel 3 else
478     {
479 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
480     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
481 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
482 nigel 53 printf(" -d debug: show compiled code; implies -i\n"
483     " -i show information about compiled pattern\n"
484     " -o <n> set size of offsets vector to <n>\n");
485     #if !defined NOPOSIX
486     printf(" -p use POSIX interface\n");
487     #endif
488     printf(" -s output store information\n"
489     " -t time compilation and execution\n");
490 nigel 3 return 1;
491     }
492     op++;
493     argc--;
494     }
495    
496 nigel 53 /* Get the store for the offsets vector, and remember what it was */
497    
498     size_offsets_max = size_offsets;
499 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
500 nigel 53 if (offsets == NULL)
501     {
502     printf("** Failed to get %d bytes of memory for offsets vector\n",
503     size_offsets_max * sizeof(int));
504     return 1;
505     }
506    
507 nigel 3 /* Sort out the input and output files */
508    
509     if (argc > 1)
510     {
511     infile = fopen(argv[op], "r");
512     if (infile == NULL)
513     {
514     printf("** Failed to open %s\n", argv[op]);
515     return 1;
516     }
517     }
518    
519     if (argc > 2)
520     {
521     outfile = fopen(argv[op+1], "w");
522     if (outfile == NULL)
523     {
524     printf("** Failed to open %s\n", argv[op+1]);
525     return 1;
526     }
527     }
528    
529     /* Set alternative malloc function */
530    
531     pcre_malloc = new_malloc;
532 nigel 73 pcre_free = new_free;
533     pcre_stack_malloc = stack_malloc;
534     pcre_stack_free = stack_free;
535 nigel 3
536 nigel 23 /* Heading line, then prompt for first regex if stdin */
537 nigel 3
538     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
539    
540     /* Main loop */
541    
542 nigel 11 while (!done)
543 nigel 3 {
544     pcre *re = NULL;
545     pcre_extra *extra = NULL;
546 nigel 37
547     #if !defined NOPOSIX /* There are still compilers that require no indent */
548 nigel 3 regex_t preg;
549 nigel 45 int do_posix = 0;
550 nigel 37 #endif
551    
552 nigel 7 const char *error;
553 nigel 25 unsigned char *p, *pp, *ppp;
554 nigel 53 const unsigned char *tables = NULL;
555 nigel 3 int do_study = 0;
556 nigel 25 int do_debug = debug;
557 nigel 35 int do_G = 0;
558     int do_g = 0;
559 nigel 25 int do_showinfo = showinfo;
560 nigel 35 int do_showrest = 0;
561 nigel 3 int erroroffset, len, delimiter;
562    
563 nigel 67 use_utf8 = 0;
564 nigel 63
565 nigel 3 if (infile == stdin) printf(" re> ");
566 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
567 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
568 nigel 63 fflush(outfile);
569 nigel 3
570     p = buffer;
571     while (isspace(*p)) p++;
572     if (*p == 0) continue;
573    
574     /* Get the delimiter and seek the end of the pattern; if is isn't
575     complete, read more. */
576    
577     delimiter = *p++;
578    
579 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
580 nigel 3 {
581 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
582 nigel 3 goto SKIP_DATA;
583     }
584    
585     pp = p;
586    
587     for(;;)
588     {
589 nigel 29 while (*pp != 0)
590     {
591     if (*pp == '\\' && pp[1] != 0) pp++;
592     else if (*pp == delimiter) break;
593     pp++;
594     }
595 nigel 3 if (*pp != 0) break;
596    
597 nigel 69 len = BUFFER_SIZE - (pp - buffer);
598 nigel 3 if (len < 256)
599     {
600     fprintf(outfile, "** Expression too long - missing delimiter?\n");
601     goto SKIP_DATA;
602     }
603    
604     if (infile == stdin) printf(" > ");
605     if (fgets((char *)pp, len, infile) == NULL)
606     {
607     fprintf(outfile, "** Unexpected EOF\n");
608 nigel 11 done = 1;
609     goto CONTINUE;
610 nigel 3 }
611 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
612 nigel 3 }
613    
614 nigel 29 /* If the first character after the delimiter is backslash, make
615     the pattern end with backslash. This is purely to provide a way
616     of testing for the error message when a pattern ends with backslash. */
617    
618     if (pp[1] == '\\') *pp++ = '\\';
619    
620 nigel 3 /* Terminate the pattern at the delimiter */
621    
622     *pp++ = 0;
623    
624     /* Look for options after final delimiter */
625    
626     options = 0;
627     study_options = 0;
628 nigel 31 log_store = showstore; /* default from command line */
629    
630 nigel 3 while (*pp != 0)
631     {
632     switch (*pp++)
633     {
634 nigel 35 case 'g': do_g = 1; break;
635 nigel 3 case 'i': options |= PCRE_CASELESS; break;
636     case 'm': options |= PCRE_MULTILINE; break;
637     case 's': options |= PCRE_DOTALL; break;
638     case 'x': options |= PCRE_EXTENDED; break;
639 nigel 25
640 nigel 35 case '+': do_showrest = 1; break;
641 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
642 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
643 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
644 nigel 35 case 'G': do_G = 1; break;
645 nigel 25 case 'I': do_showinfo = 1; break;
646 nigel 31 case 'M': log_store = 1; break;
647 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
648 nigel 37
649     #if !defined NOPOSIX
650 nigel 3 case 'P': do_posix = 1; break;
651 nigel 37 #endif
652    
653 nigel 3 case 'S': do_study = 1; break;
654 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
655 nigel 3 case 'X': options |= PCRE_EXTRA; break;
656 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
657 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
658 nigel 25
659     case 'L':
660     ppp = pp;
661     while (*ppp != '\n' && *ppp != ' ') ppp++;
662     *ppp = 0;
663     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
664     {
665     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
666     goto SKIP_DATA;
667     }
668     tables = pcre_maketables();
669     pp = ppp;
670     break;
671    
672 nigel 3 case '\n': case ' ': break;
673     default:
674     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
675     goto SKIP_DATA;
676     }
677     }
678    
679 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
680 nigel 25 timing, showing, or debugging options, nor the ability to pass over
681     local character tables. */
682 nigel 3
683 nigel 37 #if !defined NOPOSIX
684 nigel 3 if (posix || do_posix)
685     {
686     int rc;
687     int cflags = 0;
688     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
689     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
690     rc = regcomp(&preg, (char *)p, cflags);
691    
692     /* Compilation failed; go back for another re, skipping to blank line
693     if non-interactive. */
694    
695     if (rc != 0)
696     {
697 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
698 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
699     goto SKIP_DATA;
700     }
701     }
702    
703     /* Handle compiling via the native interface */
704    
705     else
706 nigel 37 #endif /* !defined NOPOSIX */
707    
708 nigel 3 {
709     if (timeit)
710     {
711     register int i;
712     clock_t time_taken;
713     clock_t start_time = clock();
714 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
715 nigel 3 {
716 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
717 nigel 3 if (re != NULL) free(re);
718     }
719     time_taken = clock() - start_time;
720 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
721 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
722     (double)CLOCKS_PER_SEC);
723 nigel 3 }
724    
725 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
726 nigel 3
727     /* Compilation failed; go back for another re, skipping to blank line
728     if non-interactive. */
729    
730     if (re == NULL)
731     {
732     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
733     SKIP_DATA:
734     if (infile != stdin)
735     {
736     for (;;)
737     {
738 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
739 nigel 11 {
740     done = 1;
741     goto CONTINUE;
742     }
743 nigel 3 len = (int)strlen((char *)buffer);
744     while (len > 0 && isspace(buffer[len-1])) len--;
745     if (len == 0) break;
746     }
747     fprintf(outfile, "\n");
748     }
749 nigel 25 goto CONTINUE;
750 nigel 3 }
751    
752 nigel 43 /* Compilation succeeded; print data if required. There are now two
753     info-returning functions. The old one has a limited interface and
754     returns only limited data. Check that it agrees with the newer one. */
755 nigel 3
756 nigel 63 if (log_store)
757     fprintf(outfile, "Memory allocation (code space): %d\n",
758     (int)(gotten_store -
759     sizeof(real_pcre) -
760     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
761    
762 nigel 25 if (do_showinfo)
763 nigel 3 {
764 nigel 53 unsigned long int get_options;
765 nigel 43 int old_first_char, old_options, old_count;
766     int count, backrefmax, first_char, need_char;
767 nigel 63 int nameentrysize, namecount;
768     const uschar *nametable;
769 nigel 43 size_t size;
770 nigel 3
771 nigel 63 if (do_debug)
772     {
773     fprintf(outfile, "------------------------------------------------------------------\n");
774     print_internals(re, outfile);
775     }
776 nigel 3
777 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
778 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
779     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
780     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
781 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
782 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
783 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
784     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
785 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
786 nigel 43
787     old_count = pcre_info(re, &old_options, &old_first_char);
788 nigel 3 if (count < 0) fprintf(outfile,
789 nigel 43 "Error %d from pcre_info()\n", count);
790 nigel 3 else
791     {
792 nigel 43 if (old_count != count) fprintf(outfile,
793     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
794     old_count);
795 nigel 37
796 nigel 43 if (old_first_char != first_char) fprintf(outfile,
797     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
798     first_char, old_first_char);
799 nigel 37
800 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
801     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
802     get_options, old_options);
803 nigel 43 }
804    
805     if (size != gotten_store) fprintf(outfile,
806     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
807     size, gotten_store);
808    
809     fprintf(outfile, "Capturing subpattern count = %d\n", count);
810     if (backrefmax > 0)
811     fprintf(outfile, "Max back reference = %d\n", backrefmax);
812 nigel 63
813     if (namecount > 0)
814     {
815     fprintf(outfile, "Named capturing subpatterns:\n");
816     while (namecount-- > 0)
817     {
818     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
819     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
820     GET2(nametable, 0));
821     nametable += nameentrysize;
822     }
823     }
824    
825 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
826 nigel 71 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
827 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
828     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
829     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
830     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
831     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
832     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
833     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
834     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
835 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
836     ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
837 nigel 43
838     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
839     fprintf(outfile, "Case state changes\n");
840    
841     if (first_char == -1)
842     {
843     fprintf(outfile, "First char at start or follows \\n\n");
844     }
845     else if (first_char < 0)
846     {
847     fprintf(outfile, "No first char\n");
848     }
849     else
850     {
851 nigel 63 int ch = first_char & 255;
852 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
853 nigel 63 "" : " (caseless)";
854     if (isprint(ch))
855     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
856 nigel 3 else
857 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
858 nigel 43 }
859 nigel 37
860 nigel 43 if (need_char < 0)
861     {
862     fprintf(outfile, "No need char\n");
863 nigel 3 }
864 nigel 43 else
865     {
866 nigel 63 int ch = need_char & 255;
867 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
868 nigel 63 "" : " (caseless)";
869     if (isprint(ch))
870     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
871 nigel 43 else
872 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
873 nigel 43 }
874 nigel 3 }
875    
876     /* If /S was present, study the regexp to generate additional info to
877     help with the matching. */
878    
879     if (do_study)
880     {
881     if (timeit)
882     {
883     register int i;
884     clock_t time_taken;
885     clock_t start_time = clock();
886 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
887 nigel 3 extra = pcre_study(re, study_options, &error);
888     time_taken = clock() - start_time;
889     if (extra != NULL) free(extra);
890 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
891 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
892     (double)CLOCKS_PER_SEC);
893 nigel 3 }
894    
895     extra = pcre_study(re, study_options, &error);
896     if (error != NULL)
897     fprintf(outfile, "Failed to study: %s\n", error);
898     else if (extra == NULL)
899     fprintf(outfile, "Study returned NULL\n");
900    
901 nigel 71 /* Don't output study size; at present it is in any case a fixed
902     value, but it varies, depending on the computer architecture, and
903     so messes up the test suite. */
904    
905 nigel 25 else if (do_showinfo)
906 nigel 3 {
907 nigel 63 size_t size;
908 nigel 43 uschar *start_bits = NULL;
909 nigel 63 new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
910 nigel 43 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
911 nigel 71 /* fprintf(outfile, "Study size = %d\n", size); */
912 nigel 43 if (start_bits == NULL)
913 nigel 3 fprintf(outfile, "No starting character set\n");
914     else
915     {
916     int i;
917     int c = 24;
918     fprintf(outfile, "Starting character set: ");
919     for (i = 0; i < 256; i++)
920     {
921 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
922 nigel 3 {
923     if (c > 75)
924     {
925     fprintf(outfile, "\n ");
926     c = 2;
927     }
928     if (isprint(i) && i != ' ')
929     {
930     fprintf(outfile, "%c ", i);
931     c += 2;
932     }
933     else
934     {
935     fprintf(outfile, "\\x%02x ", i);
936     c += 5;
937     }
938     }
939     }
940     fprintf(outfile, "\n");
941     }
942     }
943     }
944     }
945    
946     /* Read data lines and test them */
947    
948     for (;;)
949     {
950 nigel 9 unsigned char *q;
951 nigel 35 unsigned char *bptr = dbuffer;
952 nigel 57 int *use_offsets = offsets;
953 nigel 53 int use_size_offsets = size_offsets;
954 nigel 63 int callout_data = 0;
955     int callout_data_set = 0;
956 nigel 3 int count, c;
957 nigel 29 int copystrings = 0;
958 nigel 63 int find_match_limit = 0;
959 nigel 29 int getstrings = 0;
960     int getlist = 0;
961 nigel 39 int gmatched = 0;
962 nigel 35 int start_offset = 0;
963 nigel 41 int g_notempty = 0;
964 nigel 3
965     options = 0;
966    
967 nigel 63 pcre_callout = callout;
968     first_callout = 1;
969     callout_extra = 0;
970     callout_count = 0;
971     callout_fail_count = 999999;
972     callout_fail_id = -1;
973 nigel 73 show_malloc = 0;
974 nigel 63
975 nigel 35 if (infile == stdin) printf("data> ");
976 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
977 nigel 11 {
978     done = 1;
979     goto CONTINUE;
980     }
981 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
982 nigel 3
983     len = (int)strlen((char *)buffer);
984     while (len > 0 && isspace(buffer[len-1])) len--;
985     buffer[len] = 0;
986     if (len == 0) break;
987    
988     p = buffer;
989     while (isspace(*p)) p++;
990    
991 nigel 9 q = dbuffer;
992 nigel 3 while ((c = *p++) != 0)
993     {
994     int i = 0;
995     int n = 0;
996 nigel 63
997 nigel 3 if (c == '\\') switch ((c = *p++))
998     {
999     case 'a': c = 7; break;
1000     case 'b': c = '\b'; break;
1001     case 'e': c = 27; break;
1002     case 'f': c = '\f'; break;
1003     case 'n': c = '\n'; break;
1004     case 'r': c = '\r'; break;
1005     case 't': c = '\t'; break;
1006     case 'v': c = '\v'; break;
1007    
1008     case '0': case '1': case '2': case '3':
1009     case '4': case '5': case '6': case '7':
1010     c -= '0';
1011     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1012     c = c * 8 + *p++ - '0';
1013     break;
1014    
1015     case 'x':
1016 nigel 49
1017     /* Handle \x{..} specially - new Perl thing for utf8 */
1018    
1019     if (*p == '{')
1020     {
1021     unsigned char *pt = p;
1022     c = 0;
1023     while (isxdigit(*(++pt)))
1024     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1025     if (*pt == '}')
1026     {
1027 nigel 67 unsigned char buff8[8];
1028 nigel 49 int ii, utn;
1029 nigel 67 utn = ord2utf8(c, buff8);
1030     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1031     c = buff8[ii]; /* Last byte */
1032 nigel 49 p = pt + 1;
1033     break;
1034     }
1035     /* Not correct form; fall through */
1036     }
1037    
1038     /* Ordinary \x */
1039    
1040 nigel 3 c = 0;
1041     while (i++ < 2 && isxdigit(*p))
1042     {
1043     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1044     p++;
1045     }
1046     break;
1047    
1048     case 0: /* Allows for an empty line */
1049     p--;
1050     continue;
1051    
1052     case 'A': /* Option setting */
1053     options |= PCRE_ANCHORED;
1054     continue;
1055    
1056     case 'B':
1057     options |= PCRE_NOTBOL;
1058     continue;
1059    
1060 nigel 29 case 'C':
1061 nigel 63 if (isdigit(*p)) /* Set copy string */
1062     {
1063     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1064     copystrings |= 1 << n;
1065     }
1066     else if (isalnum(*p))
1067     {
1068     uschar name[256];
1069 nigel 67 uschar *npp = name;
1070     while (isalnum(*p)) *npp++ = *p++;
1071     *npp = 0;
1072 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1073 nigel 63 if (n < 0)
1074     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1075     else copystrings |= 1 << n;
1076     }
1077     else if (*p == '+')
1078     {
1079     callout_extra = 1;
1080     p++;
1081     }
1082     else if (*p == '-')
1083     {
1084     pcre_callout = NULL;
1085     p++;
1086     }
1087     else if (*p == '!')
1088     {
1089     callout_fail_id = 0;
1090     p++;
1091     while(isdigit(*p))
1092     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1093     callout_fail_count = 0;
1094     if (*p == '!')
1095     {
1096     p++;
1097     while(isdigit(*p))
1098     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1099     }
1100     }
1101     else if (*p == '*')
1102     {
1103     int sign = 1;
1104     callout_data = 0;
1105     if (*(++p) == '-') { sign = -1; p++; }
1106     while(isdigit(*p))
1107     callout_data = callout_data * 10 + *p++ - '0';
1108     callout_data *= sign;
1109     callout_data_set = 1;
1110     }
1111 nigel 29 continue;
1112    
1113     case 'G':
1114 nigel 63 if (isdigit(*p))
1115     {
1116     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1117     getstrings |= 1 << n;
1118     }
1119     else if (isalnum(*p))
1120     {
1121     uschar name[256];
1122 nigel 67 uschar *npp = name;
1123     while (isalnum(*p)) *npp++ = *p++;
1124     *npp = 0;
1125 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1126 nigel 63 if (n < 0)
1127     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1128     else getstrings |= 1 << n;
1129     }
1130 nigel 29 continue;
1131    
1132     case 'L':
1133     getlist = 1;
1134     continue;
1135    
1136 nigel 63 case 'M':
1137     find_match_limit = 1;
1138     continue;
1139    
1140 nigel 37 case 'N':
1141     options |= PCRE_NOTEMPTY;
1142     continue;
1143    
1144 nigel 3 case 'O':
1145     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1146 nigel 53 if (n > size_offsets_max)
1147     {
1148     size_offsets_max = n;
1149 nigel 57 free(offsets);
1150 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1151 nigel 53 if (offsets == NULL)
1152     {
1153     printf("** Failed to get %d bytes of memory for offsets vector\n",
1154     size_offsets_max * sizeof(int));
1155     return 1;
1156     }
1157     }
1158     use_size_offsets = n;
1159 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1160 nigel 3 continue;
1161    
1162 nigel 73 case 'S':
1163     show_malloc = 1;
1164     continue;
1165    
1166 nigel 3 case 'Z':
1167     options |= PCRE_NOTEOL;
1168     continue;
1169 nigel 71
1170     case '?':
1171     options |= PCRE_NO_UTF8_CHECK;
1172     continue;
1173 nigel 3 }
1174 nigel 9 *q++ = c;
1175 nigel 3 }
1176 nigel 9 *q = 0;
1177     len = q - dbuffer;
1178 nigel 3
1179     /* Handle matching via the POSIX interface, which does not
1180 nigel 63 support timing or playing with the match limit or callout data. */
1181 nigel 3
1182 nigel 37 #if !defined NOPOSIX
1183 nigel 3 if (posix || do_posix)
1184     {
1185     int rc;
1186     int eflags = 0;
1187 nigel 63 regmatch_t *pmatch = NULL;
1188     if (use_size_offsets > 0)
1189 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1190 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1191     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1192    
1193 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1194 nigel 3
1195     if (rc != 0)
1196     {
1197 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1198 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1199     }
1200     else
1201     {
1202 nigel 7 size_t i;
1203 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1204 nigel 3 {
1205     if (pmatch[i].rm_so >= 0)
1206     {
1207 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1208 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1209     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1210 nigel 3 fprintf(outfile, "\n");
1211 nigel 35 if (i == 0 && do_showrest)
1212     {
1213     fprintf(outfile, " 0+ ");
1214 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1215     outfile);
1216 nigel 35 fprintf(outfile, "\n");
1217     }
1218 nigel 3 }
1219     }
1220     }
1221 nigel 53 free(pmatch);
1222 nigel 3 }
1223    
1224 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1225 nigel 3
1226 nigel 37 else
1227     #endif /* !defined NOPOSIX */
1228    
1229 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1230 nigel 3 {
1231     if (timeit)
1232     {
1233     register int i;
1234     clock_t time_taken;
1235     clock_t start_time = clock();
1236 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1237 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1238 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1239 nigel 3 time_taken = clock() - start_time;
1240 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1241 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1242     (double)CLOCKS_PER_SEC);
1243 nigel 3 }
1244    
1245 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1246     varying limits in order to find the minimum value. */
1247    
1248     if (find_match_limit)
1249     {
1250     int min = 0;
1251     int mid = 64;
1252     int max = -1;
1253    
1254     if (extra == NULL)
1255     {
1256 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1257 nigel 63 extra->flags = 0;
1258     }
1259     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1260    
1261     for (;;)
1262     {
1263     extra->match_limit = mid;
1264     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1265     options | g_notempty, use_offsets, use_size_offsets);
1266     if (count == PCRE_ERROR_MATCHLIMIT)
1267     {
1268     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1269     min = mid;
1270     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1271     }
1272     else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1273     {
1274     if (mid == min + 1)
1275     {
1276     fprintf(outfile, "Minimum match limit = %d\n", mid);
1277     break;
1278     }
1279     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1280     max = mid;
1281     mid = (min + mid)/2;
1282     }
1283     else break; /* Some other error */
1284     }
1285    
1286     extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1287     }
1288    
1289     /* If callout_data is set, use the interface with additional data */
1290    
1291     else if (callout_data_set)
1292     {
1293     if (extra == NULL)
1294     {
1295 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1296 nigel 63 extra->flags = 0;
1297     }
1298     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1299 nigel 71 extra->callout_data = &callout_data;
1300 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1301     options | g_notempty, use_offsets, use_size_offsets);
1302     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1303     }
1304    
1305     /* The normal case is just to do the match once, with the default
1306     value of match_limit. */
1307    
1308     else count = pcre_exec(re, extra, (char *)bptr, len,
1309 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1310 nigel 3
1311     if (count == 0)
1312     {
1313     fprintf(outfile, "Matched, but too many substrings\n");
1314 nigel 53 count = use_size_offsets/3;
1315 nigel 3 }
1316    
1317 nigel 39 /* Matched */
1318    
1319 nigel 3 if (count >= 0)
1320     {
1321     int i;
1322 nigel 29 for (i = 0; i < count * 2; i += 2)
1323 nigel 3 {
1324 nigel 57 if (use_offsets[i] < 0)
1325 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1326     else
1327     {
1328     fprintf(outfile, "%2d: ", i/2);
1329 nigel 63 (void)pchars(bptr + use_offsets[i],
1330     use_offsets[i+1] - use_offsets[i], outfile);
1331 nigel 3 fprintf(outfile, "\n");
1332 nigel 35 if (i == 0)
1333     {
1334     if (do_showrest)
1335     {
1336     fprintf(outfile, " 0+ ");
1337 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1338     outfile);
1339 nigel 35 fprintf(outfile, "\n");
1340     }
1341     }
1342 nigel 3 }
1343     }
1344 nigel 29
1345     for (i = 0; i < 32; i++)
1346     {
1347     if ((copystrings & (1 << i)) != 0)
1348     {
1349 nigel 37 char copybuffer[16];
1350 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1351 nigel 37 i, copybuffer, sizeof(copybuffer));
1352 nigel 29 if (rc < 0)
1353     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1354     else
1355 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1356 nigel 29 }
1357     }
1358    
1359     for (i = 0; i < 32; i++)
1360     {
1361     if ((getstrings & (1 << i)) != 0)
1362     {
1363     const char *substring;
1364 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1365 nigel 29 i, &substring);
1366     if (rc < 0)
1367     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1368     else
1369     {
1370     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1371 nigel 49 /* free((void *)substring); */
1372     pcre_free_substring(substring);
1373 nigel 29 }
1374     }
1375     }
1376    
1377     if (getlist)
1378     {
1379     const char **stringlist;
1380 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1381 nigel 29 &stringlist);
1382     if (rc < 0)
1383     fprintf(outfile, "get substring list failed %d\n", rc);
1384     else
1385     {
1386     for (i = 0; i < count; i++)
1387     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1388     if (stringlist[i] != NULL)
1389     fprintf(outfile, "string list not terminated by NULL\n");
1390 nigel 49 /* free((void *)stringlist); */
1391     pcre_free_substring_list(stringlist);
1392 nigel 29 }
1393     }
1394 nigel 39 }
1395 nigel 29
1396 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1397 nigel 47 g_notempty after a null match, this is not necessarily the end.
1398 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
1399     matching, the advance must be one character, not one byte. Fudge the
1400     offset values to achieve this. We won't be at the end of the string -
1401     that was checked before setting g_notempty. */
1402 nigel 39
1403 nigel 3 else
1404     {
1405 nigel 41 if (g_notempty != 0)
1406 nigel 35 {
1407 nigel 73 int onechar = 1;
1408 nigel 57 use_offsets[0] = start_offset;
1409 nigel 73 if (use_utf8)
1410     {
1411     while (start_offset + onechar < len)
1412     {
1413     int tb = bptr[start_offset+onechar];
1414     if (tb <= 127) break;
1415     tb &= 0xc0;
1416     if (tb != 0 && tb != 0xc0) onechar++;
1417     }
1418     }
1419     use_offsets[1] = start_offset + onechar;
1420 nigel 35 }
1421 nigel 41 else
1422     {
1423 nigel 73 if (count == PCRE_ERROR_NOMATCH)
1424 nigel 41 {
1425 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
1426 nigel 41 }
1427 nigel 73 else fprintf(outfile, "Error %d\n", count);
1428 nigel 41 break; /* Out of the /g loop */
1429     }
1430 nigel 3 }
1431 nigel 35
1432 nigel 39 /* If not /g or /G we are done */
1433    
1434     if (!do_g && !do_G) break;
1435    
1436 nigel 41 /* If we have matched an empty string, first check to see if we are at
1437     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1438     what Perl's /g options does. This turns out to be rather cunning. First
1439 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1440     same point. If this fails (picked up above) we advance to the next
1441     character. */
1442 nigel 39
1443 nigel 41 g_notempty = 0;
1444 nigel 57 if (use_offsets[0] == use_offsets[1])
1445 nigel 41 {
1446 nigel 57 if (use_offsets[0] == len) break;
1447 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1448 nigel 41 }
1449 nigel 39
1450     /* For /g, update the start offset, leaving the rest alone */
1451    
1452 nigel 57 if (do_g) start_offset = use_offsets[1];
1453 nigel 39
1454     /* For /G, update the pointer and length */
1455    
1456     else
1457 nigel 35 {
1458 nigel 57 bptr += use_offsets[1];
1459     len -= use_offsets[1];
1460 nigel 35 }
1461 nigel 39 } /* End of loop for /g and /G */
1462     } /* End of loop for data lines */
1463 nigel 3
1464 nigel 11 CONTINUE:
1465 nigel 37
1466     #if !defined NOPOSIX
1467 nigel 3 if (posix || do_posix) regfree(&preg);
1468 nigel 37 #endif
1469    
1470 nigel 3 if (re != NULL) free(re);
1471     if (extra != NULL) free(extra);
1472 nigel 25 if (tables != NULL)
1473     {
1474     free((void *)tables);
1475     setlocale(LC_CTYPE, "C");
1476     }
1477 nigel 3 }
1478    
1479 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
1480 nigel 3 return 0;
1481     }
1482    
1483     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12