/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 67 - (hide annotations) (download)
Sat Feb 24 21:40:13 2007 UTC (7 years, 2 months ago) by nigel
File MIME type: text/plain
File size: 39234 byte(s)
Load pcre-4.2 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7     been extended and consequently is now rather untidy in places. */
8    
9 nigel 3 #include <ctype.h>
10     #include <stdio.h>
11     #include <string.h>
12     #include <stdlib.h>
13     #include <time.h>
14 nigel 25 #include <locale.h>
15 nigel 3
16 nigel 63 /* We need the internal info for displaying the results of pcre_study(). Also
17     for getting the opcodes for showing compiled code. */
18 nigel 3
19 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
20 nigel 3 #include "internal.h"
21 nigel 37
22     /* It is possible to compile this test program without including support for
23     testing the POSIX interface, though this is not available via the standard
24     Makefile. */
25    
26     #if !defined NOPOSIX
27 nigel 3 #include "pcreposix.h"
28 nigel 37 #endif
29 nigel 3
30     #ifndef CLOCKS_PER_SEC
31     #ifdef CLK_TCK
32     #define CLOCKS_PER_SEC CLK_TCK
33     #else
34     #define CLOCKS_PER_SEC 100
35     #endif
36     #endif
37    
38 nigel 63 #define LOOPREPEAT 50000
39 nigel 3
40 nigel 23
41 nigel 3 static FILE *outfile;
42     static int log_store = 0;
43 nigel 63 static int callout_count;
44     static int callout_extra;
45     static int callout_fail_count;
46     static int callout_fail_id;
47     static int first_callout;
48 nigel 67 static int use_utf8;
49 nigel 43 static size_t gotten_store;
50 nigel 3
51    
52    
53 nigel 49 static int utf8_table1[] = {
54     0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
55    
56     static int utf8_table2[] = {
57     0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
58    
59     static int utf8_table3[] = {
60     0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
61    
62    
63 nigel 63
64 nigel 49 /*************************************************
65 nigel 63 * Print compiled regex *
66     *************************************************/
67    
68     /* The code for doing this is held in a separate file that is also included in
69     pcre.c when it is compiled with the debug switch. It defines a function called
70     print_internals(), which uses a table of opcode lengths defined by the macro
71     OP_LENGTHS, whose name must be OP_lengths. */
72    
73     static uschar OP_lengths[] = { OP_LENGTHS };
74    
75     #include "printint.c"
76    
77    
78    
79     /*************************************************
80     * Read number from string *
81     *************************************************/
82    
83     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
84     around with conditional compilation, just do the job by hand. It is only used
85     for unpicking the -o argument, so just keep it simple.
86    
87     Arguments:
88     str string to be converted
89     endptr where to put the end pointer
90    
91     Returns: the unsigned long
92     */
93    
94     static int
95     get_value(unsigned char *str, unsigned char **endptr)
96     {
97     int result = 0;
98     while(*str != 0 && isspace(*str)) str++;
99     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
100     *endptr = str;
101     return(result);
102     }
103    
104    
105    
106     /*************************************************
107 nigel 49 * Convert character value to UTF-8 *
108     *************************************************/
109    
110     /* This function takes an integer value in the range 0 - 0x7fffffff
111     and encodes it as a UTF-8 character in 0 to 6 bytes.
112    
113     Arguments:
114     cvalue the character value
115     buffer pointer to buffer for result - at least 6 bytes long
116    
117     Returns: number of characters placed in the buffer
118     -1 if input character is negative
119     0 if input character is positive but too big (only when
120     int is longer than 32 bits)
121     */
122    
123     static int
124     ord2utf8(int cvalue, unsigned char *buffer)
125     {
126     register int i, j;
127     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
128     if (cvalue <= utf8_table1[i]) break;
129     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
130     if (cvalue < 0) return -1;
131 nigel 59
132     buffer += i;
133     for (j = i; j > 0; j--)
134     {
135     *buffer-- = 0x80 | (cvalue & 0x3f);
136     cvalue >>= 6;
137     }
138     *buffer = utf8_table2[i] | cvalue;
139 nigel 49 return i + 1;
140     }
141    
142    
143     /*************************************************
144     * Convert UTF-8 string to value *
145     *************************************************/
146    
147     /* This function takes one or more bytes that represents a UTF-8 character,
148     and returns the value of the character.
149    
150     Argument:
151     buffer a pointer to the byte vector
152     vptr a pointer to an int to receive the value
153    
154     Returns: > 0 => the number of bytes consumed
155     -6 to 0 => malformed UTF-8 character at offset = (-return)
156     */
157    
158 nigel 67 static int
159 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
160     {
161     int c = *buffer++;
162     int d = c;
163     int i, j, s;
164    
165     for (i = -1; i < 6; i++) /* i is number of additional bytes */
166     {
167     if ((d & 0x80) == 0) break;
168     d <<= 1;
169     }
170    
171     if (i == -1) { *vptr = c; return 1; } /* ascii character */
172     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
173    
174     /* i now has a value in the range 1-5 */
175    
176 nigel 59 s = 6*i;
177     d = (c & utf8_table3[i]) << s;
178 nigel 49
179     for (j = 0; j < i; j++)
180     {
181     c = *buffer++;
182     if ((c & 0xc0) != 0x80) return -(j+1);
183 nigel 59 s -= 6;
184 nigel 49 d |= (c & 0x3f) << s;
185     }
186    
187     /* Check that encoding was the correct unique one */
188    
189     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
190     if (d <= utf8_table1[j]) break;
191     if (j != i) return -(i+1);
192    
193     /* Valid value */
194    
195     *vptr = d;
196     return i+1;
197     }
198    
199    
200    
201 nigel 63 /*************************************************
202     * Print character string *
203     *************************************************/
204 nigel 49
205 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
206     mode. Yields number of characters printed. If handed a NULL file, just counts
207     chars without printing. */
208 nigel 49
209 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
210 nigel 3 {
211 nigel 63 int c;
212     int yield = 0;
213 nigel 3
214 nigel 63 while (length-- > 0)
215 nigel 3 {
216 nigel 67 if (use_utf8)
217 nigel 63 {
218     int rc = utf82ord(p, &c);
219 nigel 3
220 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
221     {
222     length -= rc - 1;
223     p += rc;
224     if (c < 256 && isprint(c))
225     {
226     if (f != NULL) fprintf(f, "%c", c);
227     yield++;
228     }
229     else
230     {
231     int n;
232     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
233     yield += n;
234     }
235     continue;
236     }
237     }
238 nigel 3
239 nigel 63 /* Not UTF-8, or malformed UTF-8 */
240    
241     if (isprint(c = *(p++)))
242 nigel 3 {
243 nigel 63 if (f != NULL) fprintf(f, "%c", c);
244     yield++;
245 nigel 3 }
246 nigel 63 else
247 nigel 3 {
248 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
249     yield += 4;
250     }
251     }
252 nigel 3
253 nigel 63 return yield;
254     }
255 nigel 23
256 nigel 3
257 nigel 23
258 nigel 63 /*************************************************
259     * Callout function *
260     *************************************************/
261 nigel 3
262 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
263     the match. Yield zero unless more callouts than the fail count, or the callout
264     data is not zero. */
265 nigel 3
266 nigel 63 static int callout(pcre_callout_block *cb)
267     {
268     FILE *f = (first_callout | callout_extra)? outfile : NULL;
269     int i, pre_start, post_start;
270 nigel 3
271 nigel 63 if (callout_extra)
272     {
273     fprintf(f, "Callout %d: last capture = %d\n",
274     cb->callout_number, cb->capture_last);
275 nigel 3
276 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
277     {
278     if (cb->offset_vector[i] < 0)
279     fprintf(f, "%2d: <unset>\n", i/2);
280     else
281     {
282     fprintf(f, "%2d: ", i/2);
283     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
284     cb->offset_vector[i+1] - cb->offset_vector[i], f);
285     fprintf(f, "\n");
286     }
287     }
288     }
289 nigel 3
290 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
291     datails. On subsequent calls in the same match, we use pchars just to find the
292     printed lengths of the substrings. */
293 nigel 3
294 nigel 63 if (f != NULL) fprintf(f, "--->");
295 nigel 3
296 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
297     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
298     cb->current_position - cb->start_match, f);
299 nigel 3
300 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
301     cb->subject_length - cb->current_position, f);
302 nigel 3
303 nigel 63 if (f != NULL) fprintf(f, "\n");
304 nigel 9
305 nigel 63 /* Always print appropriate indicators, with callout number if not already
306     shown */
307 nigel 3
308 nigel 63 if (callout_extra) fprintf(outfile, " ");
309     else fprintf(outfile, "%3d ", cb->callout_number);
310 nigel 3
311 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
312     fprintf(outfile, "^");
313 nigel 3
314 nigel 63 if (post_start > 0)
315     {
316     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
317     fprintf(outfile, "^");
318 nigel 3 }
319    
320 nigel 63 fprintf(outfile, "\n");
321 nigel 3
322 nigel 63 first_callout = 0;
323 nigel 3
324 nigel 63 if ((int)(cb->callout_data) != 0)
325 nigel 49 {
326 nigel 63 fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));
327     return (int)(cb->callout_data);
328     }
329 nigel 49
330 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
331     (++callout_count >= callout_fail_count)? 1 : 0;
332 nigel 3 }
333    
334    
335 nigel 63 /*************************************************
336     * Local malloc function *
337     *************************************************/
338 nigel 3
339     /* Alternative malloc function, to test functionality and show the size of the
340     compiled re. */
341    
342     static void *new_malloc(size_t size)
343     {
344 nigel 43 gotten_store = size;
345 nigel 3 return malloc(size);
346     }
347    
348    
349    
350 nigel 63 /*************************************************
351     * Call pcre_fullinfo() *
352     *************************************************/
353 nigel 43
354     /* Get one piece of information from the pcre_fullinfo() function */
355    
356     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
357     {
358     int rc;
359     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
360     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
361     }
362    
363    
364    
365 nigel 63 /*************************************************
366     * Main Program *
367     *************************************************/
368 nigel 43
369 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
370     consist of a regular expression, in delimiters and optionally followed by
371     options, followed by a set of test data, terminated by an empty line. */
372    
373     int main(int argc, char **argv)
374     {
375     FILE *infile = stdin;
376     int options = 0;
377     int study_options = 0;
378     int op = 1;
379     int timeit = 0;
380     int showinfo = 0;
381 nigel 31 int showstore = 0;
382 nigel 53 int size_offsets = 45;
383     int size_offsets_max;
384     int *offsets;
385     #if !defined NOPOSIX
386 nigel 3 int posix = 0;
387 nigel 53 #endif
388 nigel 3 int debug = 0;
389 nigel 11 int done = 0;
390 nigel 3 unsigned char buffer[30000];
391     unsigned char dbuffer[1024];
392    
393     /* Static so that new_malloc can use it. */
394    
395     outfile = stdout;
396    
397     /* Scan options */
398    
399     while (argc > 1 && argv[op][0] == '-')
400     {
401 nigel 63 unsigned char *endptr;
402 nigel 53
403 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
404     showstore = 1;
405 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
406     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
407     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
408 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
409 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
410     *endptr == 0))
411 nigel 53 {
412     op++;
413     argc--;
414     }
415     #if !defined NOPOSIX
416 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
417 nigel 53 #endif
418 nigel 63 else if (strcmp(argv[op], "-C") == 0)
419     {
420     int rc;
421     printf("PCRE version %s\n", pcre_version());
422     printf("Compiled with\n");
423     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
424     printf(" %sUTF-8 support\n", rc? "" : "No ");
425     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
426     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
427     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
428     printf(" Internal link size = %d\n", rc);
429     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
430     printf(" POSIX malloc threshold = %d\n", rc);
431     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
432     printf(" Default match limit = %d\n", rc);
433     exit(0);
434     }
435 nigel 3 else
436     {
437 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
438     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
439 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
440 nigel 53 printf(" -d debug: show compiled code; implies -i\n"
441     " -i show information about compiled pattern\n"
442     " -o <n> set size of offsets vector to <n>\n");
443     #if !defined NOPOSIX
444     printf(" -p use POSIX interface\n");
445     #endif
446     printf(" -s output store information\n"
447     " -t time compilation and execution\n");
448 nigel 3 return 1;
449     }
450     op++;
451     argc--;
452     }
453    
454 nigel 53 /* Get the store for the offsets vector, and remember what it was */
455    
456     size_offsets_max = size_offsets;
457     offsets = malloc(size_offsets_max * sizeof(int));
458     if (offsets == NULL)
459     {
460     printf("** Failed to get %d bytes of memory for offsets vector\n",
461     size_offsets_max * sizeof(int));
462     return 1;
463     }
464    
465 nigel 3 /* Sort out the input and output files */
466    
467     if (argc > 1)
468     {
469     infile = fopen(argv[op], "r");
470     if (infile == NULL)
471     {
472     printf("** Failed to open %s\n", argv[op]);
473     return 1;
474     }
475     }
476    
477     if (argc > 2)
478     {
479     outfile = fopen(argv[op+1], "w");
480     if (outfile == NULL)
481     {
482     printf("** Failed to open %s\n", argv[op+1]);
483     return 1;
484     }
485     }
486    
487     /* Set alternative malloc function */
488    
489     pcre_malloc = new_malloc;
490    
491 nigel 23 /* Heading line, then prompt for first regex if stdin */
492 nigel 3
493     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
494    
495     /* Main loop */
496    
497 nigel 11 while (!done)
498 nigel 3 {
499     pcre *re = NULL;
500     pcre_extra *extra = NULL;
501 nigel 37
502     #if !defined NOPOSIX /* There are still compilers that require no indent */
503 nigel 3 regex_t preg;
504 nigel 45 int do_posix = 0;
505 nigel 37 #endif
506    
507 nigel 7 const char *error;
508 nigel 25 unsigned char *p, *pp, *ppp;
509 nigel 53 const unsigned char *tables = NULL;
510 nigel 3 int do_study = 0;
511 nigel 25 int do_debug = debug;
512 nigel 35 int do_G = 0;
513     int do_g = 0;
514 nigel 25 int do_showinfo = showinfo;
515 nigel 35 int do_showrest = 0;
516 nigel 3 int erroroffset, len, delimiter;
517    
518 nigel 67 use_utf8 = 0;
519 nigel 63
520 nigel 3 if (infile == stdin) printf(" re> ");
521     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
522 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
523 nigel 63 fflush(outfile);
524 nigel 3
525     p = buffer;
526     while (isspace(*p)) p++;
527     if (*p == 0) continue;
528    
529     /* Get the delimiter and seek the end of the pattern; if is isn't
530     complete, read more. */
531    
532     delimiter = *p++;
533    
534 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
535 nigel 3 {
536 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
537 nigel 3 goto SKIP_DATA;
538     }
539    
540     pp = p;
541    
542     for(;;)
543     {
544 nigel 29 while (*pp != 0)
545     {
546     if (*pp == '\\' && pp[1] != 0) pp++;
547     else if (*pp == delimiter) break;
548     pp++;
549     }
550 nigel 3 if (*pp != 0) break;
551    
552     len = sizeof(buffer) - (pp - buffer);
553     if (len < 256)
554     {
555     fprintf(outfile, "** Expression too long - missing delimiter?\n");
556     goto SKIP_DATA;
557     }
558    
559     if (infile == stdin) printf(" > ");
560     if (fgets((char *)pp, len, infile) == NULL)
561     {
562     fprintf(outfile, "** Unexpected EOF\n");
563 nigel 11 done = 1;
564     goto CONTINUE;
565 nigel 3 }
566 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
567 nigel 3 }
568    
569 nigel 29 /* If the first character after the delimiter is backslash, make
570     the pattern end with backslash. This is purely to provide a way
571     of testing for the error message when a pattern ends with backslash. */
572    
573     if (pp[1] == '\\') *pp++ = '\\';
574    
575 nigel 3 /* Terminate the pattern at the delimiter */
576    
577     *pp++ = 0;
578    
579     /* Look for options after final delimiter */
580    
581     options = 0;
582     study_options = 0;
583 nigel 31 log_store = showstore; /* default from command line */
584    
585 nigel 3 while (*pp != 0)
586     {
587     switch (*pp++)
588     {
589 nigel 35 case 'g': do_g = 1; break;
590 nigel 3 case 'i': options |= PCRE_CASELESS; break;
591     case 'm': options |= PCRE_MULTILINE; break;
592     case 's': options |= PCRE_DOTALL; break;
593     case 'x': options |= PCRE_EXTENDED; break;
594 nigel 25
595 nigel 35 case '+': do_showrest = 1; break;
596 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
597 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
598 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
599 nigel 35 case 'G': do_G = 1; break;
600 nigel 25 case 'I': do_showinfo = 1; break;
601 nigel 31 case 'M': log_store = 1; break;
602 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
603 nigel 37
604     #if !defined NOPOSIX
605 nigel 3 case 'P': do_posix = 1; break;
606 nigel 37 #endif
607    
608 nigel 3 case 'S': do_study = 1; break;
609 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
610 nigel 3 case 'X': options |= PCRE_EXTRA; break;
611 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
612 nigel 25
613     case 'L':
614     ppp = pp;
615     while (*ppp != '\n' && *ppp != ' ') ppp++;
616     *ppp = 0;
617     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
618     {
619     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
620     goto SKIP_DATA;
621     }
622     tables = pcre_maketables();
623     pp = ppp;
624     break;
625    
626 nigel 3 case '\n': case ' ': break;
627     default:
628     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
629     goto SKIP_DATA;
630     }
631     }
632    
633 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
634 nigel 25 timing, showing, or debugging options, nor the ability to pass over
635     local character tables. */
636 nigel 3
637 nigel 37 #if !defined NOPOSIX
638 nigel 3 if (posix || do_posix)
639     {
640     int rc;
641     int cflags = 0;
642     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
643     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
644     rc = regcomp(&preg, (char *)p, cflags);
645    
646     /* Compilation failed; go back for another re, skipping to blank line
647     if non-interactive. */
648    
649     if (rc != 0)
650     {
651     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
652     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
653     goto SKIP_DATA;
654     }
655     }
656    
657     /* Handle compiling via the native interface */
658    
659     else
660 nigel 37 #endif /* !defined NOPOSIX */
661    
662 nigel 3 {
663     if (timeit)
664     {
665     register int i;
666     clock_t time_taken;
667     clock_t start_time = clock();
668 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
669 nigel 3 {
670 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
671 nigel 3 if (re != NULL) free(re);
672     }
673     time_taken = clock() - start_time;
674 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
675 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
676     (double)CLOCKS_PER_SEC);
677 nigel 3 }
678    
679 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
680 nigel 3
681     /* Compilation failed; go back for another re, skipping to blank line
682     if non-interactive. */
683    
684     if (re == NULL)
685     {
686     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
687     SKIP_DATA:
688     if (infile != stdin)
689     {
690     for (;;)
691     {
692     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
693 nigel 11 {
694     done = 1;
695     goto CONTINUE;
696     }
697 nigel 3 len = (int)strlen((char *)buffer);
698     while (len > 0 && isspace(buffer[len-1])) len--;
699     if (len == 0) break;
700     }
701     fprintf(outfile, "\n");
702     }
703 nigel 25 goto CONTINUE;
704 nigel 3 }
705    
706 nigel 43 /* Compilation succeeded; print data if required. There are now two
707     info-returning functions. The old one has a limited interface and
708     returns only limited data. Check that it agrees with the newer one. */
709 nigel 3
710 nigel 63 if (log_store)
711     fprintf(outfile, "Memory allocation (code space): %d\n",
712     (int)(gotten_store -
713     sizeof(real_pcre) -
714     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
715    
716 nigel 25 if (do_showinfo)
717 nigel 3 {
718 nigel 53 unsigned long int get_options;
719 nigel 43 int old_first_char, old_options, old_count;
720     int count, backrefmax, first_char, need_char;
721 nigel 63 int nameentrysize, namecount;
722     const uschar *nametable;
723 nigel 43 size_t size;
724 nigel 3
725 nigel 63 if (do_debug)
726     {
727     fprintf(outfile, "------------------------------------------------------------------\n");
728     print_internals(re, outfile);
729     }
730 nigel 3
731 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
732 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
733     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
734     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
735 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
736 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
737 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
738     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
739 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
740 nigel 43
741     old_count = pcre_info(re, &old_options, &old_first_char);
742 nigel 3 if (count < 0) fprintf(outfile,
743 nigel 43 "Error %d from pcre_info()\n", count);
744 nigel 3 else
745     {
746 nigel 43 if (old_count != count) fprintf(outfile,
747     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
748     old_count);
749 nigel 37
750 nigel 43 if (old_first_char != first_char) fprintf(outfile,
751     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
752     first_char, old_first_char);
753 nigel 37
754 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
755     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
756     get_options, old_options);
757 nigel 43 }
758    
759     if (size != gotten_store) fprintf(outfile,
760     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
761     size, gotten_store);
762    
763     fprintf(outfile, "Capturing subpattern count = %d\n", count);
764     if (backrefmax > 0)
765     fprintf(outfile, "Max back reference = %d\n", backrefmax);
766 nigel 63
767     if (namecount > 0)
768     {
769     fprintf(outfile, "Named capturing subpatterns:\n");
770     while (namecount-- > 0)
771     {
772     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
773     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
774     GET2(nametable, 0));
775     nametable += nameentrysize;
776     }
777     }
778    
779 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
780 nigel 49 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
781 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
782     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
783     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
784     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
785     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
786     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
787     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
788     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
789     ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
790 nigel 43
791     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
792     fprintf(outfile, "Case state changes\n");
793    
794     if (first_char == -1)
795     {
796     fprintf(outfile, "First char at start or follows \\n\n");
797     }
798     else if (first_char < 0)
799     {
800     fprintf(outfile, "No first char\n");
801     }
802     else
803     {
804 nigel 63 int ch = first_char & 255;
805 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
806 nigel 63 "" : " (caseless)";
807     if (isprint(ch))
808     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
809 nigel 3 else
810 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
811 nigel 43 }
812 nigel 37
813 nigel 43 if (need_char < 0)
814     {
815     fprintf(outfile, "No need char\n");
816 nigel 3 }
817 nigel 43 else
818     {
819 nigel 63 int ch = need_char & 255;
820 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
821 nigel 63 "" : " (caseless)";
822     if (isprint(ch))
823     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
824 nigel 43 else
825 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
826 nigel 43 }
827 nigel 3 }
828    
829     /* If /S was present, study the regexp to generate additional info to
830     help with the matching. */
831    
832     if (do_study)
833     {
834     if (timeit)
835     {
836     register int i;
837     clock_t time_taken;
838     clock_t start_time = clock();
839 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
840 nigel 3 extra = pcre_study(re, study_options, &error);
841     time_taken = clock() - start_time;
842     if (extra != NULL) free(extra);
843 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
844 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
845     (double)CLOCKS_PER_SEC);
846 nigel 3 }
847    
848     extra = pcre_study(re, study_options, &error);
849     if (error != NULL)
850     fprintf(outfile, "Failed to study: %s\n", error);
851     else if (extra == NULL)
852     fprintf(outfile, "Study returned NULL\n");
853    
854 nigel 25 else if (do_showinfo)
855 nigel 3 {
856 nigel 63 size_t size;
857 nigel 43 uschar *start_bits = NULL;
858 nigel 63 new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
859 nigel 43 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
860 nigel 63 fprintf(outfile, "Study size = %d\n", size);
861 nigel 43 if (start_bits == NULL)
862 nigel 3 fprintf(outfile, "No starting character set\n");
863     else
864     {
865     int i;
866     int c = 24;
867     fprintf(outfile, "Starting character set: ");
868     for (i = 0; i < 256; i++)
869     {
870 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
871 nigel 3 {
872     if (c > 75)
873     {
874     fprintf(outfile, "\n ");
875     c = 2;
876     }
877     if (isprint(i) && i != ' ')
878     {
879     fprintf(outfile, "%c ", i);
880     c += 2;
881     }
882     else
883     {
884     fprintf(outfile, "\\x%02x ", i);
885     c += 5;
886     }
887     }
888     }
889     fprintf(outfile, "\n");
890     }
891     }
892     }
893     }
894    
895     /* Read data lines and test them */
896    
897     for (;;)
898     {
899 nigel 9 unsigned char *q;
900 nigel 35 unsigned char *bptr = dbuffer;
901 nigel 57 int *use_offsets = offsets;
902 nigel 53 int use_size_offsets = size_offsets;
903 nigel 63 int callout_data = 0;
904     int callout_data_set = 0;
905 nigel 3 int count, c;
906 nigel 29 int copystrings = 0;
907 nigel 63 int find_match_limit = 0;
908 nigel 29 int getstrings = 0;
909     int getlist = 0;
910 nigel 39 int gmatched = 0;
911 nigel 35 int start_offset = 0;
912 nigel 41 int g_notempty = 0;
913 nigel 3
914     options = 0;
915    
916 nigel 63 pcre_callout = callout;
917     first_callout = 1;
918     callout_extra = 0;
919     callout_count = 0;
920     callout_fail_count = 999999;
921     callout_fail_id = -1;
922    
923 nigel 35 if (infile == stdin) printf("data> ");
924 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
925     {
926     done = 1;
927     goto CONTINUE;
928     }
929 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
930 nigel 3
931     len = (int)strlen((char *)buffer);
932     while (len > 0 && isspace(buffer[len-1])) len--;
933     buffer[len] = 0;
934     if (len == 0) break;
935    
936     p = buffer;
937     while (isspace(*p)) p++;
938    
939 nigel 9 q = dbuffer;
940 nigel 3 while ((c = *p++) != 0)
941     {
942     int i = 0;
943     int n = 0;
944 nigel 63
945 nigel 3 if (c == '\\') switch ((c = *p++))
946     {
947     case 'a': c = 7; break;
948     case 'b': c = '\b'; break;
949     case 'e': c = 27; break;
950     case 'f': c = '\f'; break;
951     case 'n': c = '\n'; break;
952     case 'r': c = '\r'; break;
953     case 't': c = '\t'; break;
954     case 'v': c = '\v'; break;
955    
956     case '0': case '1': case '2': case '3':
957     case '4': case '5': case '6': case '7':
958     c -= '0';
959     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
960     c = c * 8 + *p++ - '0';
961     break;
962    
963     case 'x':
964 nigel 49
965     /* Handle \x{..} specially - new Perl thing for utf8 */
966    
967     if (*p == '{')
968     {
969     unsigned char *pt = p;
970     c = 0;
971     while (isxdigit(*(++pt)))
972     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
973     if (*pt == '}')
974     {
975 nigel 67 unsigned char buff8[8];
976 nigel 49 int ii, utn;
977 nigel 67 utn = ord2utf8(c, buff8);
978     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
979     c = buff8[ii]; /* Last byte */
980 nigel 49 p = pt + 1;
981     break;
982     }
983     /* Not correct form; fall through */
984     }
985    
986     /* Ordinary \x */
987    
988 nigel 3 c = 0;
989     while (i++ < 2 && isxdigit(*p))
990     {
991     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
992     p++;
993     }
994     break;
995    
996     case 0: /* Allows for an empty line */
997     p--;
998     continue;
999    
1000     case 'A': /* Option setting */
1001     options |= PCRE_ANCHORED;
1002     continue;
1003    
1004     case 'B':
1005     options |= PCRE_NOTBOL;
1006     continue;
1007    
1008 nigel 29 case 'C':
1009 nigel 63 if (isdigit(*p)) /* Set copy string */
1010     {
1011     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1012     copystrings |= 1 << n;
1013     }
1014     else if (isalnum(*p))
1015     {
1016     uschar name[256];
1017 nigel 67 uschar *npp = name;
1018     while (isalnum(*p)) *npp++ = *p++;
1019     *npp = 0;
1020 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1021 nigel 63 if (n < 0)
1022     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1023     else copystrings |= 1 << n;
1024     }
1025     else if (*p == '+')
1026     {
1027     callout_extra = 1;
1028     p++;
1029     }
1030     else if (*p == '-')
1031     {
1032     pcre_callout = NULL;
1033     p++;
1034     }
1035     else if (*p == '!')
1036     {
1037     callout_fail_id = 0;
1038     p++;
1039     while(isdigit(*p))
1040     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1041     callout_fail_count = 0;
1042     if (*p == '!')
1043     {
1044     p++;
1045     while(isdigit(*p))
1046     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1047     }
1048     }
1049     else if (*p == '*')
1050     {
1051     int sign = 1;
1052     callout_data = 0;
1053     if (*(++p) == '-') { sign = -1; p++; }
1054     while(isdigit(*p))
1055     callout_data = callout_data * 10 + *p++ - '0';
1056     callout_data *= sign;
1057     callout_data_set = 1;
1058     }
1059 nigel 29 continue;
1060    
1061     case 'G':
1062 nigel 63 if (isdigit(*p))
1063     {
1064     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1065     getstrings |= 1 << n;
1066     }
1067     else if (isalnum(*p))
1068     {
1069     uschar name[256];
1070 nigel 67 uschar *npp = name;
1071     while (isalnum(*p)) *npp++ = *p++;
1072     *npp = 0;
1073 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1074 nigel 63 if (n < 0)
1075     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1076     else getstrings |= 1 << n;
1077     }
1078 nigel 29 continue;
1079    
1080     case 'L':
1081     getlist = 1;
1082     continue;
1083    
1084 nigel 63 case 'M':
1085     find_match_limit = 1;
1086     continue;
1087    
1088 nigel 37 case 'N':
1089     options |= PCRE_NOTEMPTY;
1090     continue;
1091    
1092 nigel 3 case 'O':
1093     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1094 nigel 53 if (n > size_offsets_max)
1095     {
1096     size_offsets_max = n;
1097 nigel 57 free(offsets);
1098     use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1099 nigel 53 if (offsets == NULL)
1100     {
1101     printf("** Failed to get %d bytes of memory for offsets vector\n",
1102     size_offsets_max * sizeof(int));
1103     return 1;
1104     }
1105     }
1106     use_size_offsets = n;
1107 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1108 nigel 3 continue;
1109    
1110     case 'Z':
1111     options |= PCRE_NOTEOL;
1112     continue;
1113     }
1114 nigel 9 *q++ = c;
1115 nigel 3 }
1116 nigel 9 *q = 0;
1117     len = q - dbuffer;
1118 nigel 3
1119     /* Handle matching via the POSIX interface, which does not
1120 nigel 63 support timing or playing with the match limit or callout data. */
1121 nigel 3
1122 nigel 37 #if !defined NOPOSIX
1123 nigel 3 if (posix || do_posix)
1124     {
1125     int rc;
1126     int eflags = 0;
1127 nigel 63 regmatch_t *pmatch = NULL;
1128     if (use_size_offsets > 0)
1129     pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1130 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1131     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1132    
1133 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1134 nigel 3
1135     if (rc != 0)
1136     {
1137     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1138     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1139     }
1140     else
1141     {
1142 nigel 7 size_t i;
1143 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1144 nigel 3 {
1145     if (pmatch[i].rm_so >= 0)
1146     {
1147 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1148 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1149     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1150 nigel 3 fprintf(outfile, "\n");
1151 nigel 35 if (i == 0 && do_showrest)
1152     {
1153     fprintf(outfile, " 0+ ");
1154 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1155     outfile);
1156 nigel 35 fprintf(outfile, "\n");
1157     }
1158 nigel 3 }
1159     }
1160     }
1161 nigel 53 free(pmatch);
1162 nigel 3 }
1163    
1164 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1165 nigel 3
1166 nigel 37 else
1167     #endif /* !defined NOPOSIX */
1168    
1169 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1170 nigel 3 {
1171     if (timeit)
1172     {
1173     register int i;
1174     clock_t time_taken;
1175     clock_t start_time = clock();
1176 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1177 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1178 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1179 nigel 3 time_taken = clock() - start_time;
1180 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1181 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1182     (double)CLOCKS_PER_SEC);
1183 nigel 3 }
1184    
1185 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1186     varying limits in order to find the minimum value. */
1187    
1188     if (find_match_limit)
1189     {
1190     int min = 0;
1191     int mid = 64;
1192     int max = -1;
1193    
1194     if (extra == NULL)
1195     {
1196     extra = malloc(sizeof(pcre_extra));
1197     extra->flags = 0;
1198     }
1199     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1200    
1201     for (;;)
1202     {
1203     extra->match_limit = mid;
1204     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1205     options | g_notempty, use_offsets, use_size_offsets);
1206     if (count == PCRE_ERROR_MATCHLIMIT)
1207     {
1208     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1209     min = mid;
1210     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1211     }
1212     else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1213     {
1214     if (mid == min + 1)
1215     {
1216     fprintf(outfile, "Minimum match limit = %d\n", mid);
1217     break;
1218     }
1219     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1220     max = mid;
1221     mid = (min + mid)/2;
1222     }
1223     else break; /* Some other error */
1224     }
1225    
1226     extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1227     }
1228    
1229     /* If callout_data is set, use the interface with additional data */
1230    
1231     else if (callout_data_set)
1232     {
1233     if (extra == NULL)
1234     {
1235     extra = malloc(sizeof(pcre_extra));
1236     extra->flags = 0;
1237     }
1238     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1239     extra->callout_data = (void *)callout_data;
1240     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1241     options | g_notempty, use_offsets, use_size_offsets);
1242     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1243     }
1244    
1245     /* The normal case is just to do the match once, with the default
1246     value of match_limit. */
1247    
1248     else count = pcre_exec(re, extra, (char *)bptr, len,
1249 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1250 nigel 3
1251     if (count == 0)
1252     {
1253     fprintf(outfile, "Matched, but too many substrings\n");
1254 nigel 53 count = use_size_offsets/3;
1255 nigel 3 }
1256    
1257 nigel 39 /* Matched */
1258    
1259 nigel 3 if (count >= 0)
1260     {
1261     int i;
1262 nigel 29 for (i = 0; i < count * 2; i += 2)
1263 nigel 3 {
1264 nigel 57 if (use_offsets[i] < 0)
1265 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1266     else
1267     {
1268     fprintf(outfile, "%2d: ", i/2);
1269 nigel 63 (void)pchars(bptr + use_offsets[i],
1270     use_offsets[i+1] - use_offsets[i], outfile);
1271 nigel 3 fprintf(outfile, "\n");
1272 nigel 35 if (i == 0)
1273     {
1274     if (do_showrest)
1275     {
1276     fprintf(outfile, " 0+ ");
1277 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1278     outfile);
1279 nigel 35 fprintf(outfile, "\n");
1280     }
1281     }
1282 nigel 3 }
1283     }
1284 nigel 29
1285     for (i = 0; i < 32; i++)
1286     {
1287     if ((copystrings & (1 << i)) != 0)
1288     {
1289 nigel 37 char copybuffer[16];
1290 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1291 nigel 37 i, copybuffer, sizeof(copybuffer));
1292 nigel 29 if (rc < 0)
1293     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1294     else
1295 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1296 nigel 29 }
1297     }
1298    
1299     for (i = 0; i < 32; i++)
1300     {
1301     if ((getstrings & (1 << i)) != 0)
1302     {
1303     const char *substring;
1304 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1305 nigel 29 i, &substring);
1306     if (rc < 0)
1307     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1308     else
1309     {
1310     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1311 nigel 49 /* free((void *)substring); */
1312     pcre_free_substring(substring);
1313 nigel 29 }
1314     }
1315     }
1316    
1317     if (getlist)
1318     {
1319     const char **stringlist;
1320 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1321 nigel 29 &stringlist);
1322     if (rc < 0)
1323     fprintf(outfile, "get substring list failed %d\n", rc);
1324     else
1325     {
1326     for (i = 0; i < count; i++)
1327     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1328     if (stringlist[i] != NULL)
1329     fprintf(outfile, "string list not terminated by NULL\n");
1330 nigel 49 /* free((void *)stringlist); */
1331     pcre_free_substring_list(stringlist);
1332 nigel 29 }
1333     }
1334 nigel 39 }
1335 nigel 29
1336 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1337 nigel 47 g_notempty after a null match, this is not necessarily the end.
1338 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1339     values to achieve this. We won't be at the end of the string - that
1340 nigel 47 was checked before setting g_notempty. */
1341 nigel 39
1342 nigel 3 else
1343     {
1344 nigel 41 if (g_notempty != 0)
1345 nigel 35 {
1346 nigel 57 use_offsets[0] = start_offset;
1347     use_offsets[1] = start_offset + 1;
1348 nigel 35 }
1349 nigel 41 else
1350     {
1351     if (gmatched == 0) /* Error if no previous matches */
1352     {
1353     if (count == -1) fprintf(outfile, "No match\n");
1354     else fprintf(outfile, "Error %d\n", count);
1355     }
1356     break; /* Out of the /g loop */
1357     }
1358 nigel 3 }
1359 nigel 35
1360 nigel 39 /* If not /g or /G we are done */
1361    
1362     if (!do_g && !do_G) break;
1363    
1364 nigel 41 /* If we have matched an empty string, first check to see if we are at
1365     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1366     what Perl's /g options does. This turns out to be rather cunning. First
1367 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1368     same point. If this fails (picked up above) we advance to the next
1369     character. */
1370 nigel 39
1371 nigel 41 g_notempty = 0;
1372 nigel 57 if (use_offsets[0] == use_offsets[1])
1373 nigel 41 {
1374 nigel 57 if (use_offsets[0] == len) break;
1375 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1376 nigel 41 }
1377 nigel 39
1378     /* For /g, update the start offset, leaving the rest alone */
1379    
1380 nigel 57 if (do_g) start_offset = use_offsets[1];
1381 nigel 39
1382     /* For /G, update the pointer and length */
1383    
1384     else
1385 nigel 35 {
1386 nigel 57 bptr += use_offsets[1];
1387     len -= use_offsets[1];
1388 nigel 35 }
1389 nigel 39 } /* End of loop for /g and /G */
1390     } /* End of loop for data lines */
1391 nigel 3
1392 nigel 11 CONTINUE:
1393 nigel 37
1394     #if !defined NOPOSIX
1395 nigel 3 if (posix || do_posix) regfree(&preg);
1396 nigel 37 #endif
1397    
1398 nigel 3 if (re != NULL) free(re);
1399     if (extra != NULL) free(extra);
1400 nigel 25 if (tables != NULL)
1401     {
1402     free((void *)tables);
1403     setlocale(LC_CTYPE, "C");
1404     }
1405 nigel 3 }
1406    
1407     fprintf(outfile, "\n");
1408     return 0;
1409     }
1410    
1411     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12