/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 69 - (hide annotations) (download)
Sat Feb 24 21:40:18 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 39441 byte(s)
Load pcre-4.3 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7     been extended and consequently is now rather untidy in places. */
8    
9 nigel 3 #include <ctype.h>
10     #include <stdio.h>
11     #include <string.h>
12     #include <stdlib.h>
13     #include <time.h>
14 nigel 25 #include <locale.h>
15 nigel 3
16 nigel 63 /* We need the internal info for displaying the results of pcre_study(). Also
17     for getting the opcodes for showing compiled code. */
18 nigel 3
19 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
20 nigel 3 #include "internal.h"
21 nigel 37
22     /* It is possible to compile this test program without including support for
23     testing the POSIX interface, though this is not available via the standard
24     Makefile. */
25    
26     #if !defined NOPOSIX
27 nigel 3 #include "pcreposix.h"
28 nigel 37 #endif
29 nigel 3
30     #ifndef CLOCKS_PER_SEC
31     #ifdef CLK_TCK
32     #define CLOCKS_PER_SEC CLK_TCK
33     #else
34     #define CLOCKS_PER_SEC 100
35     #endif
36     #endif
37    
38 nigel 63 #define LOOPREPEAT 50000
39 nigel 3
40 nigel 69 #define BUFFER_SIZE 30000
41     #define DBUFFER_SIZE 1024
42 nigel 23
43 nigel 69
44 nigel 3 static FILE *outfile;
45     static int log_store = 0;
46 nigel 63 static int callout_count;
47     static int callout_extra;
48     static int callout_fail_count;
49     static int callout_fail_id;
50     static int first_callout;
51 nigel 67 static int use_utf8;
52 nigel 43 static size_t gotten_store;
53 nigel 3
54    
55    
56 nigel 69 static const int utf8_table1[] = {
57 nigel 49 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
58    
59 nigel 69 static const int utf8_table2[] = {
60 nigel 49 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
61    
62 nigel 69 static const int utf8_table3[] = {
63 nigel 49 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64    
65    
66 nigel 63
67 nigel 49 /*************************************************
68 nigel 63 * Print compiled regex *
69     *************************************************/
70    
71     /* The code for doing this is held in a separate file that is also included in
72     pcre.c when it is compiled with the debug switch. It defines a function called
73     print_internals(), which uses a table of opcode lengths defined by the macro
74     OP_LENGTHS, whose name must be OP_lengths. */
75    
76     static uschar OP_lengths[] = { OP_LENGTHS };
77    
78     #include "printint.c"
79    
80    
81    
82     /*************************************************
83     * Read number from string *
84     *************************************************/
85    
86     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
87     around with conditional compilation, just do the job by hand. It is only used
88     for unpicking the -o argument, so just keep it simple.
89    
90     Arguments:
91     str string to be converted
92     endptr where to put the end pointer
93    
94     Returns: the unsigned long
95     */
96    
97     static int
98     get_value(unsigned char *str, unsigned char **endptr)
99     {
100     int result = 0;
101     while(*str != 0 && isspace(*str)) str++;
102     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
103     *endptr = str;
104     return(result);
105     }
106    
107    
108    
109     /*************************************************
110 nigel 49 * Convert character value to UTF-8 *
111     *************************************************/
112    
113     /* This function takes an integer value in the range 0 - 0x7fffffff
114     and encodes it as a UTF-8 character in 0 to 6 bytes.
115    
116     Arguments:
117     cvalue the character value
118     buffer pointer to buffer for result - at least 6 bytes long
119    
120     Returns: number of characters placed in the buffer
121     -1 if input character is negative
122     0 if input character is positive but too big (only when
123     int is longer than 32 bits)
124     */
125    
126     static int
127     ord2utf8(int cvalue, unsigned char *buffer)
128     {
129     register int i, j;
130     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
131     if (cvalue <= utf8_table1[i]) break;
132     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
133     if (cvalue < 0) return -1;
134 nigel 59
135     buffer += i;
136     for (j = i; j > 0; j--)
137     {
138     *buffer-- = 0x80 | (cvalue & 0x3f);
139     cvalue >>= 6;
140     }
141     *buffer = utf8_table2[i] | cvalue;
142 nigel 49 return i + 1;
143     }
144    
145    
146     /*************************************************
147     * Convert UTF-8 string to value *
148     *************************************************/
149    
150     /* This function takes one or more bytes that represents a UTF-8 character,
151     and returns the value of the character.
152    
153     Argument:
154     buffer a pointer to the byte vector
155     vptr a pointer to an int to receive the value
156    
157     Returns: > 0 => the number of bytes consumed
158     -6 to 0 => malformed UTF-8 character at offset = (-return)
159     */
160    
161 nigel 67 static int
162 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
163     {
164     int c = *buffer++;
165     int d = c;
166     int i, j, s;
167    
168     for (i = -1; i < 6; i++) /* i is number of additional bytes */
169     {
170     if ((d & 0x80) == 0) break;
171     d <<= 1;
172     }
173    
174     if (i == -1) { *vptr = c; return 1; } /* ascii character */
175     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
176    
177     /* i now has a value in the range 1-5 */
178    
179 nigel 59 s = 6*i;
180     d = (c & utf8_table3[i]) << s;
181 nigel 49
182     for (j = 0; j < i; j++)
183     {
184     c = *buffer++;
185     if ((c & 0xc0) != 0x80) return -(j+1);
186 nigel 59 s -= 6;
187 nigel 49 d |= (c & 0x3f) << s;
188     }
189    
190     /* Check that encoding was the correct unique one */
191    
192     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
193     if (d <= utf8_table1[j]) break;
194     if (j != i) return -(i+1);
195    
196     /* Valid value */
197    
198     *vptr = d;
199     return i+1;
200     }
201    
202    
203    
204 nigel 63 /*************************************************
205     * Print character string *
206     *************************************************/
207 nigel 49
208 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
209     mode. Yields number of characters printed. If handed a NULL file, just counts
210     chars without printing. */
211 nigel 49
212 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
213 nigel 3 {
214 nigel 63 int c;
215     int yield = 0;
216 nigel 3
217 nigel 63 while (length-- > 0)
218 nigel 3 {
219 nigel 67 if (use_utf8)
220 nigel 63 {
221     int rc = utf82ord(p, &c);
222 nigel 3
223 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
224     {
225     length -= rc - 1;
226     p += rc;
227     if (c < 256 && isprint(c))
228     {
229     if (f != NULL) fprintf(f, "%c", c);
230     yield++;
231     }
232     else
233     {
234     int n;
235     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
236     yield += n;
237     }
238     continue;
239     }
240     }
241 nigel 3
242 nigel 63 /* Not UTF-8, or malformed UTF-8 */
243    
244     if (isprint(c = *(p++)))
245 nigel 3 {
246 nigel 63 if (f != NULL) fprintf(f, "%c", c);
247     yield++;
248 nigel 3 }
249 nigel 63 else
250 nigel 3 {
251 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
252     yield += 4;
253     }
254     }
255 nigel 3
256 nigel 63 return yield;
257     }
258 nigel 23
259 nigel 3
260 nigel 23
261 nigel 63 /*************************************************
262     * Callout function *
263     *************************************************/
264 nigel 3
265 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
266     the match. Yield zero unless more callouts than the fail count, or the callout
267     data is not zero. */
268 nigel 3
269 nigel 63 static int callout(pcre_callout_block *cb)
270     {
271     FILE *f = (first_callout | callout_extra)? outfile : NULL;
272     int i, pre_start, post_start;
273 nigel 3
274 nigel 63 if (callout_extra)
275     {
276     fprintf(f, "Callout %d: last capture = %d\n",
277     cb->callout_number, cb->capture_last);
278 nigel 3
279 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
280     {
281     if (cb->offset_vector[i] < 0)
282     fprintf(f, "%2d: <unset>\n", i/2);
283     else
284     {
285     fprintf(f, "%2d: ", i/2);
286     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
287     cb->offset_vector[i+1] - cb->offset_vector[i], f);
288     fprintf(f, "\n");
289     }
290     }
291     }
292 nigel 3
293 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
294     datails. On subsequent calls in the same match, we use pchars just to find the
295     printed lengths of the substrings. */
296 nigel 3
297 nigel 63 if (f != NULL) fprintf(f, "--->");
298 nigel 3
299 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
300     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
301     cb->current_position - cb->start_match, f);
302 nigel 3
303 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
304     cb->subject_length - cb->current_position, f);
305 nigel 3
306 nigel 63 if (f != NULL) fprintf(f, "\n");
307 nigel 9
308 nigel 63 /* Always print appropriate indicators, with callout number if not already
309     shown */
310 nigel 3
311 nigel 63 if (callout_extra) fprintf(outfile, " ");
312     else fprintf(outfile, "%3d ", cb->callout_number);
313 nigel 3
314 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
315     fprintf(outfile, "^");
316 nigel 3
317 nigel 63 if (post_start > 0)
318     {
319     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
320     fprintf(outfile, "^");
321 nigel 3 }
322    
323 nigel 63 fprintf(outfile, "\n");
324 nigel 3
325 nigel 63 first_callout = 0;
326 nigel 3
327 nigel 63 if ((int)(cb->callout_data) != 0)
328 nigel 49 {
329 nigel 63 fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
330     return (int)(cb->callout_data);
331     }
332 nigel 49
333 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
334     (++callout_count >= callout_fail_count)? 1 : 0;
335 nigel 3 }
336    
337    
338 nigel 63 /*************************************************
339     * Local malloc function *
340     *************************************************/
341 nigel 3
342     /* Alternative malloc function, to test functionality and show the size of the
343     compiled re. */
344    
345     static void *new_malloc(size_t size)
346     {
347 nigel 43 gotten_store = size;
348 nigel 3 return malloc(size);
349     }
350    
351    
352    
353 nigel 63 /*************************************************
354     * Call pcre_fullinfo() *
355     *************************************************/
356 nigel 43
357     /* Get one piece of information from the pcre_fullinfo() function */
358    
359     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
360     {
361     int rc;
362     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
363     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
364     }
365    
366    
367    
368 nigel 63 /*************************************************
369     * Main Program *
370     *************************************************/
371 nigel 43
372 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
373     consist of a regular expression, in delimiters and optionally followed by
374     options, followed by a set of test data, terminated by an empty line. */
375    
376     int main(int argc, char **argv)
377     {
378     FILE *infile = stdin;
379     int options = 0;
380     int study_options = 0;
381     int op = 1;
382     int timeit = 0;
383     int showinfo = 0;
384 nigel 31 int showstore = 0;
385 nigel 53 int size_offsets = 45;
386     int size_offsets_max;
387     int *offsets;
388     #if !defined NOPOSIX
389 nigel 3 int posix = 0;
390 nigel 53 #endif
391 nigel 3 int debug = 0;
392 nigel 11 int done = 0;
393 nigel 3
394 nigel 69 unsigned char *buffer;
395     unsigned char *dbuffer;
396    
397     /* Get buffers from malloc() so that Electric Fence will check their misuse
398     when I am debugging. */
399    
400     buffer = malloc(BUFFER_SIZE);
401     dbuffer = malloc(DBUFFER_SIZE);
402    
403 nigel 3 /* Static so that new_malloc can use it. */
404    
405     outfile = stdout;
406    
407     /* Scan options */
408    
409     while (argc > 1 && argv[op][0] == '-')
410     {
411 nigel 63 unsigned char *endptr;
412 nigel 53
413 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
414     showstore = 1;
415 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
416     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
417     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
418 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
419 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
420     *endptr == 0))
421 nigel 53 {
422     op++;
423     argc--;
424     }
425     #if !defined NOPOSIX
426 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
427 nigel 53 #endif
428 nigel 63 else if (strcmp(argv[op], "-C") == 0)
429     {
430     int rc;
431     printf("PCRE version %s\n", pcre_version());
432     printf("Compiled with\n");
433     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
434     printf(" %sUTF-8 support\n", rc? "" : "No ");
435     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
436     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
437     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
438     printf(" Internal link size = %d\n", rc);
439     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
440     printf(" POSIX malloc threshold = %d\n", rc);
441     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
442     printf(" Default match limit = %d\n", rc);
443     exit(0);
444     }
445 nigel 3 else
446     {
447 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
448     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
449 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
450 nigel 53 printf(" -d debug: show compiled code; implies -i\n"
451     " -i show information about compiled pattern\n"
452     " -o <n> set size of offsets vector to <n>\n");
453     #if !defined NOPOSIX
454     printf(" -p use POSIX interface\n");
455     #endif
456     printf(" -s output store information\n"
457     " -t time compilation and execution\n");
458 nigel 3 return 1;
459     }
460     op++;
461     argc--;
462     }
463    
464 nigel 53 /* Get the store for the offsets vector, and remember what it was */
465    
466     size_offsets_max = size_offsets;
467     offsets = malloc(size_offsets_max * sizeof(int));
468     if (offsets == NULL)
469     {
470     printf("** Failed to get %d bytes of memory for offsets vector\n",
471     size_offsets_max * sizeof(int));
472     return 1;
473     }
474    
475 nigel 3 /* Sort out the input and output files */
476    
477     if (argc > 1)
478     {
479     infile = fopen(argv[op], "r");
480     if (infile == NULL)
481     {
482     printf("** Failed to open %s\n", argv[op]);
483     return 1;
484     }
485     }
486    
487     if (argc > 2)
488     {
489     outfile = fopen(argv[op+1], "w");
490     if (outfile == NULL)
491     {
492     printf("** Failed to open %s\n", argv[op+1]);
493     return 1;
494     }
495     }
496    
497     /* Set alternative malloc function */
498    
499     pcre_malloc = new_malloc;
500    
501 nigel 23 /* Heading line, then prompt for first regex if stdin */
502 nigel 3
503     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
504    
505     /* Main loop */
506    
507 nigel 11 while (!done)
508 nigel 3 {
509     pcre *re = NULL;
510     pcre_extra *extra = NULL;
511 nigel 37
512     #if !defined NOPOSIX /* There are still compilers that require no indent */
513 nigel 3 regex_t preg;
514 nigel 45 int do_posix = 0;
515 nigel 37 #endif
516    
517 nigel 7 const char *error;
518 nigel 25 unsigned char *p, *pp, *ppp;
519 nigel 53 const unsigned char *tables = NULL;
520 nigel 3 int do_study = 0;
521 nigel 25 int do_debug = debug;
522 nigel 35 int do_G = 0;
523     int do_g = 0;
524 nigel 25 int do_showinfo = showinfo;
525 nigel 35 int do_showrest = 0;
526 nigel 3 int erroroffset, len, delimiter;
527    
528 nigel 67 use_utf8 = 0;
529 nigel 63
530 nigel 3 if (infile == stdin) printf(" re> ");
531 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
532 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
533 nigel 63 fflush(outfile);
534 nigel 3
535     p = buffer;
536     while (isspace(*p)) p++;
537     if (*p == 0) continue;
538    
539     /* Get the delimiter and seek the end of the pattern; if is isn't
540     complete, read more. */
541    
542     delimiter = *p++;
543    
544 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
545 nigel 3 {
546 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
547 nigel 3 goto SKIP_DATA;
548     }
549    
550     pp = p;
551    
552     for(;;)
553     {
554 nigel 29 while (*pp != 0)
555     {
556     if (*pp == '\\' && pp[1] != 0) pp++;
557     else if (*pp == delimiter) break;
558     pp++;
559     }
560 nigel 3 if (*pp != 0) break;
561    
562 nigel 69 len = BUFFER_SIZE - (pp - buffer);
563 nigel 3 if (len < 256)
564     {
565     fprintf(outfile, "** Expression too long - missing delimiter?\n");
566     goto SKIP_DATA;
567     }
568    
569     if (infile == stdin) printf(" > ");
570     if (fgets((char *)pp, len, infile) == NULL)
571     {
572     fprintf(outfile, "** Unexpected EOF\n");
573 nigel 11 done = 1;
574     goto CONTINUE;
575 nigel 3 }
576 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
577 nigel 3 }
578    
579 nigel 29 /* If the first character after the delimiter is backslash, make
580     the pattern end with backslash. This is purely to provide a way
581     of testing for the error message when a pattern ends with backslash. */
582    
583     if (pp[1] == '\\') *pp++ = '\\';
584    
585 nigel 3 /* Terminate the pattern at the delimiter */
586    
587     *pp++ = 0;
588    
589     /* Look for options after final delimiter */
590    
591     options = 0;
592     study_options = 0;
593 nigel 31 log_store = showstore; /* default from command line */
594    
595 nigel 3 while (*pp != 0)
596     {
597     switch (*pp++)
598     {
599 nigel 35 case 'g': do_g = 1; break;
600 nigel 3 case 'i': options |= PCRE_CASELESS; break;
601     case 'm': options |= PCRE_MULTILINE; break;
602     case 's': options |= PCRE_DOTALL; break;
603     case 'x': options |= PCRE_EXTENDED; break;
604 nigel 25
605 nigel 35 case '+': do_showrest = 1; break;
606 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
607 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
608 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
609 nigel 35 case 'G': do_G = 1; break;
610 nigel 25 case 'I': do_showinfo = 1; break;
611 nigel 31 case 'M': log_store = 1; break;
612 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
613 nigel 37
614     #if !defined NOPOSIX
615 nigel 3 case 'P': do_posix = 1; break;
616 nigel 37 #endif
617    
618 nigel 3 case 'S': do_study = 1; break;
619 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
620 nigel 3 case 'X': options |= PCRE_EXTRA; break;
621 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
622 nigel 25
623     case 'L':
624     ppp = pp;
625     while (*ppp != '\n' && *ppp != ' ') ppp++;
626     *ppp = 0;
627     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
628     {
629     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
630     goto SKIP_DATA;
631     }
632     tables = pcre_maketables();
633     pp = ppp;
634     break;
635    
636 nigel 3 case '\n': case ' ': break;
637     default:
638     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
639     goto SKIP_DATA;
640     }
641     }
642    
643 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
644 nigel 25 timing, showing, or debugging options, nor the ability to pass over
645     local character tables. */
646 nigel 3
647 nigel 37 #if !defined NOPOSIX
648 nigel 3 if (posix || do_posix)
649     {
650     int rc;
651     int cflags = 0;
652     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
653     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
654     rc = regcomp(&preg, (char *)p, cflags);
655    
656     /* Compilation failed; go back for another re, skipping to blank line
657     if non-interactive. */
658    
659     if (rc != 0)
660     {
661 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
662 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
663     goto SKIP_DATA;
664     }
665     }
666    
667     /* Handle compiling via the native interface */
668    
669     else
670 nigel 37 #endif /* !defined NOPOSIX */
671    
672 nigel 3 {
673     if (timeit)
674     {
675     register int i;
676     clock_t time_taken;
677     clock_t start_time = clock();
678 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
679 nigel 3 {
680 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
681 nigel 3 if (re != NULL) free(re);
682     }
683     time_taken = clock() - start_time;
684 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
685 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
686     (double)CLOCKS_PER_SEC);
687 nigel 3 }
688    
689 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
690 nigel 3
691     /* Compilation failed; go back for another re, skipping to blank line
692     if non-interactive. */
693    
694     if (re == NULL)
695     {
696     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
697     SKIP_DATA:
698     if (infile != stdin)
699     {
700     for (;;)
701     {
702 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
703 nigel 11 {
704     done = 1;
705     goto CONTINUE;
706     }
707 nigel 3 len = (int)strlen((char *)buffer);
708     while (len > 0 && isspace(buffer[len-1])) len--;
709     if (len == 0) break;
710     }
711     fprintf(outfile, "\n");
712     }
713 nigel 25 goto CONTINUE;
714 nigel 3 }
715    
716 nigel 43 /* Compilation succeeded; print data if required. There are now two
717     info-returning functions. The old one has a limited interface and
718     returns only limited data. Check that it agrees with the newer one. */
719 nigel 3
720 nigel 63 if (log_store)
721     fprintf(outfile, "Memory allocation (code space): %d\n",
722     (int)(gotten_store -
723     sizeof(real_pcre) -
724     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
725    
726 nigel 25 if (do_showinfo)
727 nigel 3 {
728 nigel 53 unsigned long int get_options;
729 nigel 43 int old_first_char, old_options, old_count;
730     int count, backrefmax, first_char, need_char;
731 nigel 63 int nameentrysize, namecount;
732     const uschar *nametable;
733 nigel 43 size_t size;
734 nigel 3
735 nigel 63 if (do_debug)
736     {
737     fprintf(outfile, "------------------------------------------------------------------\n");
738     print_internals(re, outfile);
739     }
740 nigel 3
741 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
742 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
743     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
744     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
745 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
746 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
747 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
748     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
749 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
750 nigel 43
751     old_count = pcre_info(re, &old_options, &old_first_char);
752 nigel 3 if (count < 0) fprintf(outfile,
753 nigel 43 "Error %d from pcre_info()\n", count);
754 nigel 3 else
755     {
756 nigel 43 if (old_count != count) fprintf(outfile,
757     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
758     old_count);
759 nigel 37
760 nigel 43 if (old_first_char != first_char) fprintf(outfile,
761     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
762     first_char, old_first_char);
763 nigel 37
764 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
765     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
766     get_options, old_options);
767 nigel 43 }
768    
769     if (size != gotten_store) fprintf(outfile,
770     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
771     size, gotten_store);
772    
773     fprintf(outfile, "Capturing subpattern count = %d\n", count);
774     if (backrefmax > 0)
775     fprintf(outfile, "Max back reference = %d\n", backrefmax);
776 nigel 63
777     if (namecount > 0)
778     {
779     fprintf(outfile, "Named capturing subpatterns:\n");
780     while (namecount-- > 0)
781     {
782     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
783     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
784     GET2(nametable, 0));
785     nametable += nameentrysize;
786     }
787     }
788    
789 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
790 nigel 49 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
791 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
792     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
793     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
794     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
795     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
796     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
797     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
798     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
799     ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
800 nigel 43
801     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
802     fprintf(outfile, "Case state changes\n");
803    
804     if (first_char == -1)
805     {
806     fprintf(outfile, "First char at start or follows \\n\n");
807     }
808     else if (first_char < 0)
809     {
810     fprintf(outfile, "No first char\n");
811     }
812     else
813     {
814 nigel 63 int ch = first_char & 255;
815 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
816 nigel 63 "" : " (caseless)";
817     if (isprint(ch))
818     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
819 nigel 3 else
820 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
821 nigel 43 }
822 nigel 37
823 nigel 43 if (need_char < 0)
824     {
825     fprintf(outfile, "No need char\n");
826 nigel 3 }
827 nigel 43 else
828     {
829 nigel 63 int ch = need_char & 255;
830 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
831 nigel 63 "" : " (caseless)";
832     if (isprint(ch))
833     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
834 nigel 43 else
835 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
836 nigel 43 }
837 nigel 3 }
838    
839     /* If /S was present, study the regexp to generate additional info to
840     help with the matching. */
841    
842     if (do_study)
843     {
844     if (timeit)
845     {
846     register int i;
847     clock_t time_taken;
848     clock_t start_time = clock();
849 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
850 nigel 3 extra = pcre_study(re, study_options, &error);
851     time_taken = clock() - start_time;
852     if (extra != NULL) free(extra);
853 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
854 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
855     (double)CLOCKS_PER_SEC);
856 nigel 3 }
857    
858     extra = pcre_study(re, study_options, &error);
859     if (error != NULL)
860     fprintf(outfile, "Failed to study: %s\n", error);
861     else if (extra == NULL)
862     fprintf(outfile, "Study returned NULL\n");
863    
864 nigel 25 else if (do_showinfo)
865 nigel 3 {
866 nigel 63 size_t size;
867 nigel 43 uschar *start_bits = NULL;
868 nigel 63 new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
869 nigel 43 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
870 nigel 63 fprintf(outfile, "Study size = %d\n", size);
871 nigel 43 if (start_bits == NULL)
872 nigel 3 fprintf(outfile, "No starting character set\n");
873     else
874     {
875     int i;
876     int c = 24;
877     fprintf(outfile, "Starting character set: ");
878     for (i = 0; i < 256; i++)
879     {
880 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
881 nigel 3 {
882     if (c > 75)
883     {
884     fprintf(outfile, "\n ");
885     c = 2;
886     }
887     if (isprint(i) && i != ' ')
888     {
889     fprintf(outfile, "%c ", i);
890     c += 2;
891     }
892     else
893     {
894     fprintf(outfile, "\\x%02x ", i);
895     c += 5;
896     }
897     }
898     }
899     fprintf(outfile, "\n");
900     }
901     }
902     }
903     }
904    
905     /* Read data lines and test them */
906    
907     for (;;)
908     {
909 nigel 9 unsigned char *q;
910 nigel 35 unsigned char *bptr = dbuffer;
911 nigel 57 int *use_offsets = offsets;
912 nigel 53 int use_size_offsets = size_offsets;
913 nigel 63 int callout_data = 0;
914     int callout_data_set = 0;
915 nigel 3 int count, c;
916 nigel 29 int copystrings = 0;
917 nigel 63 int find_match_limit = 0;
918 nigel 29 int getstrings = 0;
919     int getlist = 0;
920 nigel 39 int gmatched = 0;
921 nigel 35 int start_offset = 0;
922 nigel 41 int g_notempty = 0;
923 nigel 3
924     options = 0;
925    
926 nigel 63 pcre_callout = callout;
927     first_callout = 1;
928     callout_extra = 0;
929     callout_count = 0;
930     callout_fail_count = 999999;
931     callout_fail_id = -1;
932    
933 nigel 35 if (infile == stdin) printf("data> ");
934 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
935 nigel 11 {
936     done = 1;
937     goto CONTINUE;
938     }
939 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
940 nigel 3
941     len = (int)strlen((char *)buffer);
942     while (len > 0 && isspace(buffer[len-1])) len--;
943     buffer[len] = 0;
944     if (len == 0) break;
945    
946     p = buffer;
947     while (isspace(*p)) p++;
948    
949 nigel 9 q = dbuffer;
950 nigel 3 while ((c = *p++) != 0)
951     {
952     int i = 0;
953     int n = 0;
954 nigel 63
955 nigel 3 if (c == '\\') switch ((c = *p++))
956     {
957     case 'a': c = 7; break;
958     case 'b': c = '\b'; break;
959     case 'e': c = 27; break;
960     case 'f': c = '\f'; break;
961     case 'n': c = '\n'; break;
962     case 'r': c = '\r'; break;
963     case 't': c = '\t'; break;
964     case 'v': c = '\v'; break;
965    
966     case '0': case '1': case '2': case '3':
967     case '4': case '5': case '6': case '7':
968     c -= '0';
969     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
970     c = c * 8 + *p++ - '0';
971     break;
972    
973     case 'x':
974 nigel 49
975     /* Handle \x{..} specially - new Perl thing for utf8 */
976    
977     if (*p == '{')
978     {
979     unsigned char *pt = p;
980     c = 0;
981     while (isxdigit(*(++pt)))
982     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
983     if (*pt == '}')
984     {
985 nigel 67 unsigned char buff8[8];
986 nigel 49 int ii, utn;
987 nigel 67 utn = ord2utf8(c, buff8);
988     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
989     c = buff8[ii]; /* Last byte */
990 nigel 49 p = pt + 1;
991     break;
992     }
993     /* Not correct form; fall through */
994     }
995    
996     /* Ordinary \x */
997    
998 nigel 3 c = 0;
999     while (i++ < 2 && isxdigit(*p))
1000     {
1001     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1002     p++;
1003     }
1004     break;
1005    
1006     case 0: /* Allows for an empty line */
1007     p--;
1008     continue;
1009    
1010     case 'A': /* Option setting */
1011     options |= PCRE_ANCHORED;
1012     continue;
1013    
1014     case 'B':
1015     options |= PCRE_NOTBOL;
1016     continue;
1017    
1018 nigel 29 case 'C':
1019 nigel 63 if (isdigit(*p)) /* Set copy string */
1020     {
1021     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1022     copystrings |= 1 << n;
1023     }
1024     else if (isalnum(*p))
1025     {
1026     uschar name[256];
1027 nigel 67 uschar *npp = name;
1028     while (isalnum(*p)) *npp++ = *p++;
1029     *npp = 0;
1030 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1031 nigel 63 if (n < 0)
1032     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1033     else copystrings |= 1 << n;
1034     }
1035     else if (*p == '+')
1036     {
1037     callout_extra = 1;
1038     p++;
1039     }
1040     else if (*p == '-')
1041     {
1042     pcre_callout = NULL;
1043     p++;
1044     }
1045     else if (*p == '!')
1046     {
1047     callout_fail_id = 0;
1048     p++;
1049     while(isdigit(*p))
1050     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1051     callout_fail_count = 0;
1052     if (*p == '!')
1053     {
1054     p++;
1055     while(isdigit(*p))
1056     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1057     }
1058     }
1059     else if (*p == '*')
1060     {
1061     int sign = 1;
1062     callout_data = 0;
1063     if (*(++p) == '-') { sign = -1; p++; }
1064     while(isdigit(*p))
1065     callout_data = callout_data * 10 + *p++ - '0';
1066     callout_data *= sign;
1067     callout_data_set = 1;
1068     }
1069 nigel 29 continue;
1070    
1071     case 'G':
1072 nigel 63 if (isdigit(*p))
1073     {
1074     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1075     getstrings |= 1 << n;
1076     }
1077     else if (isalnum(*p))
1078     {
1079     uschar name[256];
1080 nigel 67 uschar *npp = name;
1081     while (isalnum(*p)) *npp++ = *p++;
1082     *npp = 0;
1083 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1084 nigel 63 if (n < 0)
1085     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1086     else getstrings |= 1 << n;
1087     }
1088 nigel 29 continue;
1089    
1090     case 'L':
1091     getlist = 1;
1092     continue;
1093    
1094 nigel 63 case 'M':
1095     find_match_limit = 1;
1096     continue;
1097    
1098 nigel 37 case 'N':
1099     options |= PCRE_NOTEMPTY;
1100     continue;
1101    
1102 nigel 3 case 'O':
1103     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1104 nigel 53 if (n > size_offsets_max)
1105     {
1106     size_offsets_max = n;
1107 nigel 57 free(offsets);
1108     use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1109 nigel 53 if (offsets == NULL)
1110     {
1111     printf("** Failed to get %d bytes of memory for offsets vector\n",
1112     size_offsets_max * sizeof(int));
1113     return 1;
1114     }
1115     }
1116     use_size_offsets = n;
1117 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1118 nigel 3 continue;
1119    
1120     case 'Z':
1121     options |= PCRE_NOTEOL;
1122     continue;
1123     }
1124 nigel 9 *q++ = c;
1125 nigel 3 }
1126 nigel 9 *q = 0;
1127     len = q - dbuffer;
1128 nigel 3
1129     /* Handle matching via the POSIX interface, which does not
1130 nigel 63 support timing or playing with the match limit or callout data. */
1131 nigel 3
1132 nigel 37 #if !defined NOPOSIX
1133 nigel 3 if (posix || do_posix)
1134     {
1135     int rc;
1136     int eflags = 0;
1137 nigel 63 regmatch_t *pmatch = NULL;
1138     if (use_size_offsets > 0)
1139     pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1140 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1141     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1142    
1143 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1144 nigel 3
1145     if (rc != 0)
1146     {
1147 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1148 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1149     }
1150     else
1151     {
1152 nigel 7 size_t i;
1153 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1154 nigel 3 {
1155     if (pmatch[i].rm_so >= 0)
1156     {
1157 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1158 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1159     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1160 nigel 3 fprintf(outfile, "\n");
1161 nigel 35 if (i == 0 && do_showrest)
1162     {
1163     fprintf(outfile, " 0+ ");
1164 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1165     outfile);
1166 nigel 35 fprintf(outfile, "\n");
1167     }
1168 nigel 3 }
1169     }
1170     }
1171 nigel 53 free(pmatch);
1172 nigel 3 }
1173    
1174 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1175 nigel 3
1176 nigel 37 else
1177     #endif /* !defined NOPOSIX */
1178    
1179 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1180 nigel 3 {
1181     if (timeit)
1182     {
1183     register int i;
1184     clock_t time_taken;
1185     clock_t start_time = clock();
1186 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1187 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1188 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1189 nigel 3 time_taken = clock() - start_time;
1190 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1191 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1192     (double)CLOCKS_PER_SEC);
1193 nigel 3 }
1194    
1195 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1196     varying limits in order to find the minimum value. */
1197    
1198     if (find_match_limit)
1199     {
1200     int min = 0;
1201     int mid = 64;
1202     int max = -1;
1203    
1204     if (extra == NULL)
1205     {
1206     extra = malloc(sizeof(pcre_extra));
1207     extra->flags = 0;
1208     }
1209     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1210    
1211     for (;;)
1212     {
1213     extra->match_limit = mid;
1214     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1215     options | g_notempty, use_offsets, use_size_offsets);
1216     if (count == PCRE_ERROR_MATCHLIMIT)
1217     {
1218     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1219     min = mid;
1220     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1221     }
1222     else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1223     {
1224     if (mid == min + 1)
1225     {
1226     fprintf(outfile, "Minimum match limit = %d\n", mid);
1227     break;
1228     }
1229     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1230     max = mid;
1231     mid = (min + mid)/2;
1232     }
1233     else break; /* Some other error */
1234     }
1235    
1236     extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1237     }
1238    
1239     /* If callout_data is set, use the interface with additional data */
1240    
1241     else if (callout_data_set)
1242     {
1243     if (extra == NULL)
1244     {
1245     extra = malloc(sizeof(pcre_extra));
1246     extra->flags = 0;
1247     }
1248     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1249     extra->callout_data = (void *)callout_data;
1250     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1251     options | g_notempty, use_offsets, use_size_offsets);
1252     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1253     }
1254    
1255     /* The normal case is just to do the match once, with the default
1256     value of match_limit. */
1257    
1258     else count = pcre_exec(re, extra, (char *)bptr, len,
1259 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1260 nigel 3
1261     if (count == 0)
1262     {
1263     fprintf(outfile, "Matched, but too many substrings\n");
1264 nigel 53 count = use_size_offsets/3;
1265 nigel 3 }
1266    
1267 nigel 39 /* Matched */
1268    
1269 nigel 3 if (count >= 0)
1270     {
1271     int i;
1272 nigel 29 for (i = 0; i < count * 2; i += 2)
1273 nigel 3 {
1274 nigel 57 if (use_offsets[i] < 0)
1275 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1276     else
1277     {
1278     fprintf(outfile, "%2d: ", i/2);
1279 nigel 63 (void)pchars(bptr + use_offsets[i],
1280     use_offsets[i+1] - use_offsets[i], outfile);
1281 nigel 3 fprintf(outfile, "\n");
1282 nigel 35 if (i == 0)
1283     {
1284     if (do_showrest)
1285     {
1286     fprintf(outfile, " 0+ ");
1287 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1288     outfile);
1289 nigel 35 fprintf(outfile, "\n");
1290     }
1291     }
1292 nigel 3 }
1293     }
1294 nigel 29
1295     for (i = 0; i < 32; i++)
1296     {
1297     if ((copystrings & (1 << i)) != 0)
1298     {
1299 nigel 37 char copybuffer[16];
1300 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1301 nigel 37 i, copybuffer, sizeof(copybuffer));
1302 nigel 29 if (rc < 0)
1303     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1304     else
1305 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1306 nigel 29 }
1307     }
1308    
1309     for (i = 0; i < 32; i++)
1310     {
1311     if ((getstrings & (1 << i)) != 0)
1312     {
1313     const char *substring;
1314 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1315 nigel 29 i, &substring);
1316     if (rc < 0)
1317     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1318     else
1319     {
1320     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1321 nigel 49 /* free((void *)substring); */
1322     pcre_free_substring(substring);
1323 nigel 29 }
1324     }
1325     }
1326    
1327     if (getlist)
1328     {
1329     const char **stringlist;
1330 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1331 nigel 29 &stringlist);
1332     if (rc < 0)
1333     fprintf(outfile, "get substring list failed %d\n", rc);
1334     else
1335     {
1336     for (i = 0; i < count; i++)
1337     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1338     if (stringlist[i] != NULL)
1339     fprintf(outfile, "string list not terminated by NULL\n");
1340 nigel 49 /* free((void *)stringlist); */
1341     pcre_free_substring_list(stringlist);
1342 nigel 29 }
1343     }
1344 nigel 39 }
1345 nigel 29
1346 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1347 nigel 47 g_notempty after a null match, this is not necessarily the end.
1348 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1349     values to achieve this. We won't be at the end of the string - that
1350 nigel 47 was checked before setting g_notempty. */
1351 nigel 39
1352 nigel 3 else
1353     {
1354 nigel 41 if (g_notempty != 0)
1355 nigel 35 {
1356 nigel 57 use_offsets[0] = start_offset;
1357     use_offsets[1] = start_offset + 1;
1358 nigel 35 }
1359 nigel 41 else
1360     {
1361     if (gmatched == 0) /* Error if no previous matches */
1362     {
1363     if (count == -1) fprintf(outfile, "No match\n");
1364     else fprintf(outfile, "Error %d\n", count);
1365     }
1366     break; /* Out of the /g loop */
1367     }
1368 nigel 3 }
1369 nigel 35
1370 nigel 39 /* If not /g or /G we are done */
1371    
1372     if (!do_g && !do_G) break;
1373    
1374 nigel 41 /* If we have matched an empty string, first check to see if we are at
1375     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1376     what Perl's /g options does. This turns out to be rather cunning. First
1377 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1378     same point. If this fails (picked up above) we advance to the next
1379     character. */
1380 nigel 39
1381 nigel 41 g_notempty = 0;
1382 nigel 57 if (use_offsets[0] == use_offsets[1])
1383 nigel 41 {
1384 nigel 57 if (use_offsets[0] == len) break;
1385 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1386 nigel 41 }
1387 nigel 39
1388     /* For /g, update the start offset, leaving the rest alone */
1389    
1390 nigel 57 if (do_g) start_offset = use_offsets[1];
1391 nigel 39
1392     /* For /G, update the pointer and length */
1393    
1394     else
1395 nigel 35 {
1396 nigel 57 bptr += use_offsets[1];
1397     len -= use_offsets[1];
1398 nigel 35 }
1399 nigel 39 } /* End of loop for /g and /G */
1400     } /* End of loop for data lines */
1401 nigel 3
1402 nigel 11 CONTINUE:
1403 nigel 37
1404     #if !defined NOPOSIX
1405 nigel 3 if (posix || do_posix) regfree(&preg);
1406 nigel 37 #endif
1407    
1408 nigel 3 if (re != NULL) free(re);
1409     if (extra != NULL) free(extra);
1410 nigel 25 if (tables != NULL)
1411     {
1412     free((void *)tables);
1413     setlocale(LC_CTYPE, "C");
1414     }
1415 nigel 3 }
1416    
1417     fprintf(outfile, "\n");
1418     return 0;
1419     }
1420    
1421     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12