/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (hide annotations) (download)
Sat Feb 24 21:40:24 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 39984 byte(s)
Load pcre-4.4 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7     been extended and consequently is now rather untidy in places. */
8    
9 nigel 3 #include <ctype.h>
10     #include <stdio.h>
11     #include <string.h>
12     #include <stdlib.h>
13     #include <time.h>
14 nigel 25 #include <locale.h>
15 nigel 3
16 nigel 63 /* We need the internal info for displaying the results of pcre_study(). Also
17     for getting the opcodes for showing compiled code. */
18 nigel 3
19 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
20 nigel 3 #include "internal.h"
21 nigel 37
22     /* It is possible to compile this test program without including support for
23     testing the POSIX interface, though this is not available via the standard
24     Makefile. */
25    
26     #if !defined NOPOSIX
27 nigel 3 #include "pcreposix.h"
28 nigel 37 #endif
29 nigel 3
30     #ifndef CLOCKS_PER_SEC
31     #ifdef CLK_TCK
32     #define CLOCKS_PER_SEC CLK_TCK
33     #else
34     #define CLOCKS_PER_SEC 100
35     #endif
36     #endif
37    
38 nigel 63 #define LOOPREPEAT 50000
39 nigel 3
40 nigel 69 #define BUFFER_SIZE 30000
41     #define DBUFFER_SIZE 1024
42 nigel 23
43 nigel 69
44 nigel 3 static FILE *outfile;
45     static int log_store = 0;
46 nigel 63 static int callout_count;
47     static int callout_extra;
48     static int callout_fail_count;
49     static int callout_fail_id;
50     static int first_callout;
51 nigel 67 static int use_utf8;
52 nigel 43 static size_t gotten_store;
53 nigel 3
54    
55 nigel 69 static const int utf8_table1[] = {
56 nigel 49 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
57    
58 nigel 69 static const int utf8_table2[] = {
59 nigel 49 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
60    
61 nigel 69 static const int utf8_table3[] = {
62 nigel 49 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
63    
64    
65 nigel 63
66 nigel 49 /*************************************************
67 nigel 63 * Print compiled regex *
68     *************************************************/
69    
70     /* The code for doing this is held in a separate file that is also included in
71     pcre.c when it is compiled with the debug switch. It defines a function called
72     print_internals(), which uses a table of opcode lengths defined by the macro
73     OP_LENGTHS, whose name must be OP_lengths. */
74    
75     static uschar OP_lengths[] = { OP_LENGTHS };
76    
77     #include "printint.c"
78    
79    
80    
81     /*************************************************
82     * Read number from string *
83     *************************************************/
84    
85     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
86     around with conditional compilation, just do the job by hand. It is only used
87     for unpicking the -o argument, so just keep it simple.
88    
89     Arguments:
90     str string to be converted
91     endptr where to put the end pointer
92    
93     Returns: the unsigned long
94     */
95    
96     static int
97     get_value(unsigned char *str, unsigned char **endptr)
98     {
99     int result = 0;
100     while(*str != 0 && isspace(*str)) str++;
101     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
102     *endptr = str;
103     return(result);
104     }
105    
106    
107    
108     /*************************************************
109 nigel 49 * Convert character value to UTF-8 *
110     *************************************************/
111    
112     /* This function takes an integer value in the range 0 - 0x7fffffff
113     and encodes it as a UTF-8 character in 0 to 6 bytes.
114    
115     Arguments:
116     cvalue the character value
117     buffer pointer to buffer for result - at least 6 bytes long
118    
119     Returns: number of characters placed in the buffer
120     -1 if input character is negative
121     0 if input character is positive but too big (only when
122     int is longer than 32 bits)
123     */
124    
125     static int
126     ord2utf8(int cvalue, unsigned char *buffer)
127     {
128     register int i, j;
129     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
130     if (cvalue <= utf8_table1[i]) break;
131     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
132     if (cvalue < 0) return -1;
133 nigel 59
134     buffer += i;
135     for (j = i; j > 0; j--)
136     {
137     *buffer-- = 0x80 | (cvalue & 0x3f);
138     cvalue >>= 6;
139     }
140     *buffer = utf8_table2[i] | cvalue;
141 nigel 49 return i + 1;
142     }
143    
144    
145     /*************************************************
146     * Convert UTF-8 string to value *
147     *************************************************/
148    
149     /* This function takes one or more bytes that represents a UTF-8 character,
150     and returns the value of the character.
151    
152     Argument:
153     buffer a pointer to the byte vector
154     vptr a pointer to an int to receive the value
155    
156     Returns: > 0 => the number of bytes consumed
157     -6 to 0 => malformed UTF-8 character at offset = (-return)
158     */
159    
160 nigel 67 static int
161 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
162     {
163     int c = *buffer++;
164     int d = c;
165     int i, j, s;
166    
167     for (i = -1; i < 6; i++) /* i is number of additional bytes */
168     {
169     if ((d & 0x80) == 0) break;
170     d <<= 1;
171     }
172    
173     if (i == -1) { *vptr = c; return 1; } /* ascii character */
174     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
175    
176     /* i now has a value in the range 1-5 */
177    
178 nigel 59 s = 6*i;
179     d = (c & utf8_table3[i]) << s;
180 nigel 49
181     for (j = 0; j < i; j++)
182     {
183     c = *buffer++;
184     if ((c & 0xc0) != 0x80) return -(j+1);
185 nigel 59 s -= 6;
186 nigel 49 d |= (c & 0x3f) << s;
187     }
188    
189     /* Check that encoding was the correct unique one */
190    
191     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
192     if (d <= utf8_table1[j]) break;
193     if (j != i) return -(i+1);
194    
195     /* Valid value */
196    
197     *vptr = d;
198     return i+1;
199     }
200    
201    
202    
203 nigel 63 /*************************************************
204     * Print character string *
205     *************************************************/
206 nigel 49
207 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
208     mode. Yields number of characters printed. If handed a NULL file, just counts
209     chars without printing. */
210 nigel 49
211 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
212 nigel 3 {
213 nigel 63 int c;
214     int yield = 0;
215 nigel 3
216 nigel 63 while (length-- > 0)
217 nigel 3 {
218 nigel 67 if (use_utf8)
219 nigel 63 {
220     int rc = utf82ord(p, &c);
221 nigel 3
222 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
223     {
224     length -= rc - 1;
225     p += rc;
226     if (c < 256 && isprint(c))
227     {
228     if (f != NULL) fprintf(f, "%c", c);
229     yield++;
230     }
231     else
232     {
233     int n;
234     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
235     yield += n;
236     }
237     continue;
238     }
239     }
240 nigel 3
241 nigel 63 /* Not UTF-8, or malformed UTF-8 */
242    
243     if (isprint(c = *(p++)))
244 nigel 3 {
245 nigel 63 if (f != NULL) fprintf(f, "%c", c);
246     yield++;
247 nigel 3 }
248 nigel 63 else
249 nigel 3 {
250 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
251     yield += 4;
252     }
253     }
254 nigel 3
255 nigel 63 return yield;
256     }
257 nigel 23
258 nigel 3
259 nigel 23
260 nigel 63 /*************************************************
261     * Callout function *
262     *************************************************/
263 nigel 3
264 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
265     the match. Yield zero unless more callouts than the fail count, or the callout
266     data is not zero. */
267 nigel 3
268 nigel 63 static int callout(pcre_callout_block *cb)
269     {
270     FILE *f = (first_callout | callout_extra)? outfile : NULL;
271     int i, pre_start, post_start;
272 nigel 3
273 nigel 63 if (callout_extra)
274     {
275     fprintf(f, "Callout %d: last capture = %d\n",
276     cb->callout_number, cb->capture_last);
277 nigel 3
278 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
279     {
280     if (cb->offset_vector[i] < 0)
281     fprintf(f, "%2d: <unset>\n", i/2);
282     else
283     {
284     fprintf(f, "%2d: ", i/2);
285     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
286     cb->offset_vector[i+1] - cb->offset_vector[i], f);
287     fprintf(f, "\n");
288     }
289     }
290     }
291 nigel 3
292 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
293     datails. On subsequent calls in the same match, we use pchars just to find the
294     printed lengths of the substrings. */
295 nigel 3
296 nigel 63 if (f != NULL) fprintf(f, "--->");
297 nigel 3
298 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
299     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
300     cb->current_position - cb->start_match, f);
301 nigel 3
302 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
303     cb->subject_length - cb->current_position, f);
304 nigel 3
305 nigel 63 if (f != NULL) fprintf(f, "\n");
306 nigel 9
307 nigel 63 /* Always print appropriate indicators, with callout number if not already
308     shown */
309 nigel 3
310 nigel 63 if (callout_extra) fprintf(outfile, " ");
311     else fprintf(outfile, "%3d ", cb->callout_number);
312 nigel 3
313 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314     fprintf(outfile, "^");
315 nigel 3
316 nigel 63 if (post_start > 0)
317     {
318     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
319     fprintf(outfile, "^");
320 nigel 3 }
321    
322 nigel 63 fprintf(outfile, "\n");
323     first_callout = 0;
324 nigel 3
325 nigel 71 if (cb->callout_data != NULL)
326 nigel 49 {
327 nigel 71 int callout_data = *((int *)(cb->callout_data));
328     if (callout_data != 0)
329     {
330     fprintf(outfile, "Callout data = %d\n", callout_data);
331     return callout_data;
332     }
333 nigel 63 }
334 nigel 49
335 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
336     (++callout_count >= callout_fail_count)? 1 : 0;
337 nigel 3 }
338    
339    
340 nigel 63 /*************************************************
341     * Local malloc function *
342     *************************************************/
343 nigel 3
344     /* Alternative malloc function, to test functionality and show the size of the
345     compiled re. */
346    
347     static void *new_malloc(size_t size)
348     {
349 nigel 43 gotten_store = size;
350 nigel 3 return malloc(size);
351     }
352    
353    
354    
355 nigel 63 /*************************************************
356     * Call pcre_fullinfo() *
357     *************************************************/
358 nigel 43
359     /* Get one piece of information from the pcre_fullinfo() function */
360    
361     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
362     {
363     int rc;
364     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
365     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
366     }
367    
368    
369    
370 nigel 63 /*************************************************
371     * Main Program *
372     *************************************************/
373 nigel 43
374 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
375     consist of a regular expression, in delimiters and optionally followed by
376     options, followed by a set of test data, terminated by an empty line. */
377    
378     int main(int argc, char **argv)
379     {
380     FILE *infile = stdin;
381     int options = 0;
382     int study_options = 0;
383     int op = 1;
384     int timeit = 0;
385     int showinfo = 0;
386 nigel 31 int showstore = 0;
387 nigel 53 int size_offsets = 45;
388     int size_offsets_max;
389     int *offsets;
390     #if !defined NOPOSIX
391 nigel 3 int posix = 0;
392 nigel 53 #endif
393 nigel 3 int debug = 0;
394 nigel 11 int done = 0;
395 nigel 3
396 nigel 69 unsigned char *buffer;
397     unsigned char *dbuffer;
398    
399     /* Get buffers from malloc() so that Electric Fence will check their misuse
400     when I am debugging. */
401    
402 nigel 71 buffer = (unsigned char *)malloc(BUFFER_SIZE);
403     dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
404 nigel 69
405 nigel 3 /* Static so that new_malloc can use it. */
406    
407     outfile = stdout;
408    
409     /* Scan options */
410    
411     while (argc > 1 && argv[op][0] == '-')
412     {
413 nigel 63 unsigned char *endptr;
414 nigel 53
415 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
416     showstore = 1;
417 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
418     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
419     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
420 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
421 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
422     *endptr == 0))
423 nigel 53 {
424     op++;
425     argc--;
426     }
427     #if !defined NOPOSIX
428 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
429 nigel 53 #endif
430 nigel 63 else if (strcmp(argv[op], "-C") == 0)
431     {
432     int rc;
433     printf("PCRE version %s\n", pcre_version());
434     printf("Compiled with\n");
435     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
436     printf(" %sUTF-8 support\n", rc? "" : "No ");
437     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
438     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
439     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
440     printf(" Internal link size = %d\n", rc);
441     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
442     printf(" POSIX malloc threshold = %d\n", rc);
443     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
444     printf(" Default match limit = %d\n", rc);
445     exit(0);
446     }
447 nigel 3 else
448     {
449 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
450     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
451 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
452 nigel 53 printf(" -d debug: show compiled code; implies -i\n"
453     " -i show information about compiled pattern\n"
454     " -o <n> set size of offsets vector to <n>\n");
455     #if !defined NOPOSIX
456     printf(" -p use POSIX interface\n");
457     #endif
458     printf(" -s output store information\n"
459     " -t time compilation and execution\n");
460 nigel 3 return 1;
461     }
462     op++;
463     argc--;
464     }
465    
466 nigel 53 /* Get the store for the offsets vector, and remember what it was */
467    
468     size_offsets_max = size_offsets;
469 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
470 nigel 53 if (offsets == NULL)
471     {
472     printf("** Failed to get %d bytes of memory for offsets vector\n",
473     size_offsets_max * sizeof(int));
474     return 1;
475     }
476    
477 nigel 3 /* Sort out the input and output files */
478    
479     if (argc > 1)
480     {
481     infile = fopen(argv[op], "r");
482     if (infile == NULL)
483     {
484     printf("** Failed to open %s\n", argv[op]);
485     return 1;
486     }
487     }
488    
489     if (argc > 2)
490     {
491     outfile = fopen(argv[op+1], "w");
492     if (outfile == NULL)
493     {
494     printf("** Failed to open %s\n", argv[op+1]);
495     return 1;
496     }
497     }
498    
499     /* Set alternative malloc function */
500    
501     pcre_malloc = new_malloc;
502    
503 nigel 23 /* Heading line, then prompt for first regex if stdin */
504 nigel 3
505     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
506    
507     /* Main loop */
508    
509 nigel 11 while (!done)
510 nigel 3 {
511     pcre *re = NULL;
512     pcre_extra *extra = NULL;
513 nigel 37
514     #if !defined NOPOSIX /* There are still compilers that require no indent */
515 nigel 3 regex_t preg;
516 nigel 45 int do_posix = 0;
517 nigel 37 #endif
518    
519 nigel 7 const char *error;
520 nigel 25 unsigned char *p, *pp, *ppp;
521 nigel 53 const unsigned char *tables = NULL;
522 nigel 3 int do_study = 0;
523 nigel 25 int do_debug = debug;
524 nigel 35 int do_G = 0;
525     int do_g = 0;
526 nigel 25 int do_showinfo = showinfo;
527 nigel 35 int do_showrest = 0;
528 nigel 3 int erroroffset, len, delimiter;
529    
530 nigel 67 use_utf8 = 0;
531 nigel 63
532 nigel 3 if (infile == stdin) printf(" re> ");
533 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
534 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
535 nigel 63 fflush(outfile);
536 nigel 3
537     p = buffer;
538     while (isspace(*p)) p++;
539     if (*p == 0) continue;
540    
541     /* Get the delimiter and seek the end of the pattern; if is isn't
542     complete, read more. */
543    
544     delimiter = *p++;
545    
546 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
547 nigel 3 {
548 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
549 nigel 3 goto SKIP_DATA;
550     }
551    
552     pp = p;
553    
554     for(;;)
555     {
556 nigel 29 while (*pp != 0)
557     {
558     if (*pp == '\\' && pp[1] != 0) pp++;
559     else if (*pp == delimiter) break;
560     pp++;
561     }
562 nigel 3 if (*pp != 0) break;
563    
564 nigel 69 len = BUFFER_SIZE - (pp - buffer);
565 nigel 3 if (len < 256)
566     {
567     fprintf(outfile, "** Expression too long - missing delimiter?\n");
568     goto SKIP_DATA;
569     }
570    
571     if (infile == stdin) printf(" > ");
572     if (fgets((char *)pp, len, infile) == NULL)
573     {
574     fprintf(outfile, "** Unexpected EOF\n");
575 nigel 11 done = 1;
576     goto CONTINUE;
577 nigel 3 }
578 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
579 nigel 3 }
580    
581 nigel 29 /* If the first character after the delimiter is backslash, make
582     the pattern end with backslash. This is purely to provide a way
583     of testing for the error message when a pattern ends with backslash. */
584    
585     if (pp[1] == '\\') *pp++ = '\\';
586    
587 nigel 3 /* Terminate the pattern at the delimiter */
588    
589     *pp++ = 0;
590    
591     /* Look for options after final delimiter */
592    
593     options = 0;
594     study_options = 0;
595 nigel 31 log_store = showstore; /* default from command line */
596    
597 nigel 3 while (*pp != 0)
598     {
599     switch (*pp++)
600     {
601 nigel 35 case 'g': do_g = 1; break;
602 nigel 3 case 'i': options |= PCRE_CASELESS; break;
603     case 'm': options |= PCRE_MULTILINE; break;
604     case 's': options |= PCRE_DOTALL; break;
605     case 'x': options |= PCRE_EXTENDED; break;
606 nigel 25
607 nigel 35 case '+': do_showrest = 1; break;
608 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
609 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
610 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611 nigel 35 case 'G': do_G = 1; break;
612 nigel 25 case 'I': do_showinfo = 1; break;
613 nigel 31 case 'M': log_store = 1; break;
614 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
615 nigel 37
616     #if !defined NOPOSIX
617 nigel 3 case 'P': do_posix = 1; break;
618 nigel 37 #endif
619    
620 nigel 3 case 'S': do_study = 1; break;
621 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
622 nigel 3 case 'X': options |= PCRE_EXTRA; break;
623 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
624 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
625 nigel 25
626     case 'L':
627     ppp = pp;
628     while (*ppp != '\n' && *ppp != ' ') ppp++;
629     *ppp = 0;
630     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
631     {
632     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
633     goto SKIP_DATA;
634     }
635     tables = pcre_maketables();
636     pp = ppp;
637     break;
638    
639 nigel 3 case '\n': case ' ': break;
640     default:
641     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
642     goto SKIP_DATA;
643     }
644     }
645    
646 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
647 nigel 25 timing, showing, or debugging options, nor the ability to pass over
648     local character tables. */
649 nigel 3
650 nigel 37 #if !defined NOPOSIX
651 nigel 3 if (posix || do_posix)
652     {
653     int rc;
654     int cflags = 0;
655     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
656     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
657     rc = regcomp(&preg, (char *)p, cflags);
658    
659     /* Compilation failed; go back for another re, skipping to blank line
660     if non-interactive. */
661    
662     if (rc != 0)
663     {
664 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
665 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
666     goto SKIP_DATA;
667     }
668     }
669    
670     /* Handle compiling via the native interface */
671    
672     else
673 nigel 37 #endif /* !defined NOPOSIX */
674    
675 nigel 3 {
676     if (timeit)
677     {
678     register int i;
679     clock_t time_taken;
680     clock_t start_time = clock();
681 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
682 nigel 3 {
683 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
684 nigel 3 if (re != NULL) free(re);
685     }
686     time_taken = clock() - start_time;
687 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
688 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
689     (double)CLOCKS_PER_SEC);
690 nigel 3 }
691    
692 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
693 nigel 3
694     /* Compilation failed; go back for another re, skipping to blank line
695     if non-interactive. */
696    
697     if (re == NULL)
698     {
699     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
700     SKIP_DATA:
701     if (infile != stdin)
702     {
703     for (;;)
704     {
705 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
706 nigel 11 {
707     done = 1;
708     goto CONTINUE;
709     }
710 nigel 3 len = (int)strlen((char *)buffer);
711     while (len > 0 && isspace(buffer[len-1])) len--;
712     if (len == 0) break;
713     }
714     fprintf(outfile, "\n");
715     }
716 nigel 25 goto CONTINUE;
717 nigel 3 }
718    
719 nigel 43 /* Compilation succeeded; print data if required. There are now two
720     info-returning functions. The old one has a limited interface and
721     returns only limited data. Check that it agrees with the newer one. */
722 nigel 3
723 nigel 63 if (log_store)
724     fprintf(outfile, "Memory allocation (code space): %d\n",
725     (int)(gotten_store -
726     sizeof(real_pcre) -
727     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
728    
729 nigel 25 if (do_showinfo)
730 nigel 3 {
731 nigel 53 unsigned long int get_options;
732 nigel 43 int old_first_char, old_options, old_count;
733     int count, backrefmax, first_char, need_char;
734 nigel 63 int nameentrysize, namecount;
735     const uschar *nametable;
736 nigel 43 size_t size;
737 nigel 3
738 nigel 63 if (do_debug)
739     {
740     fprintf(outfile, "------------------------------------------------------------------\n");
741     print_internals(re, outfile);
742     }
743 nigel 3
744 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
745 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
746     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
747     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
748 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
749 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
750 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
751     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
752 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
753 nigel 43
754     old_count = pcre_info(re, &old_options, &old_first_char);
755 nigel 3 if (count < 0) fprintf(outfile,
756 nigel 43 "Error %d from pcre_info()\n", count);
757 nigel 3 else
758     {
759 nigel 43 if (old_count != count) fprintf(outfile,
760     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
761     old_count);
762 nigel 37
763 nigel 43 if (old_first_char != first_char) fprintf(outfile,
764     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
765     first_char, old_first_char);
766 nigel 37
767 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
768     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
769     get_options, old_options);
770 nigel 43 }
771    
772     if (size != gotten_store) fprintf(outfile,
773     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
774     size, gotten_store);
775    
776     fprintf(outfile, "Capturing subpattern count = %d\n", count);
777     if (backrefmax > 0)
778     fprintf(outfile, "Max back reference = %d\n", backrefmax);
779 nigel 63
780     if (namecount > 0)
781     {
782     fprintf(outfile, "Named capturing subpatterns:\n");
783     while (namecount-- > 0)
784     {
785     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
786     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
787     GET2(nametable, 0));
788     nametable += nameentrysize;
789     }
790     }
791    
792 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
793 nigel 71 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
794 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
795     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
796     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
797     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
798     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
799     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
800     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
801     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
802 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
803     ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
804 nigel 43
805     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
806     fprintf(outfile, "Case state changes\n");
807    
808     if (first_char == -1)
809     {
810     fprintf(outfile, "First char at start or follows \\n\n");
811     }
812     else if (first_char < 0)
813     {
814     fprintf(outfile, "No first char\n");
815     }
816     else
817     {
818 nigel 63 int ch = first_char & 255;
819 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
820 nigel 63 "" : " (caseless)";
821     if (isprint(ch))
822     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
823 nigel 3 else
824 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
825 nigel 43 }
826 nigel 37
827 nigel 43 if (need_char < 0)
828     {
829     fprintf(outfile, "No need char\n");
830 nigel 3 }
831 nigel 43 else
832     {
833 nigel 63 int ch = need_char & 255;
834 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
835 nigel 63 "" : " (caseless)";
836     if (isprint(ch))
837     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
838 nigel 43 else
839 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
840 nigel 43 }
841 nigel 3 }
842    
843     /* If /S was present, study the regexp to generate additional info to
844     help with the matching. */
845    
846     if (do_study)
847     {
848     if (timeit)
849     {
850     register int i;
851     clock_t time_taken;
852     clock_t start_time = clock();
853 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
854 nigel 3 extra = pcre_study(re, study_options, &error);
855     time_taken = clock() - start_time;
856     if (extra != NULL) free(extra);
857 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
858 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
859     (double)CLOCKS_PER_SEC);
860 nigel 3 }
861    
862     extra = pcre_study(re, study_options, &error);
863     if (error != NULL)
864     fprintf(outfile, "Failed to study: %s\n", error);
865     else if (extra == NULL)
866     fprintf(outfile, "Study returned NULL\n");
867    
868 nigel 71 /* Don't output study size; at present it is in any case a fixed
869     value, but it varies, depending on the computer architecture, and
870     so messes up the test suite. */
871    
872 nigel 25 else if (do_showinfo)
873 nigel 3 {
874 nigel 63 size_t size;
875 nigel 43 uschar *start_bits = NULL;
876 nigel 63 new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);
877 nigel 43 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
878 nigel 71 /* fprintf(outfile, "Study size = %d\n", size); */
879 nigel 43 if (start_bits == NULL)
880 nigel 3 fprintf(outfile, "No starting character set\n");
881     else
882     {
883     int i;
884     int c = 24;
885     fprintf(outfile, "Starting character set: ");
886     for (i = 0; i < 256; i++)
887     {
888 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
889 nigel 3 {
890     if (c > 75)
891     {
892     fprintf(outfile, "\n ");
893     c = 2;
894     }
895     if (isprint(i) && i != ' ')
896     {
897     fprintf(outfile, "%c ", i);
898     c += 2;
899     }
900     else
901     {
902     fprintf(outfile, "\\x%02x ", i);
903     c += 5;
904     }
905     }
906     }
907     fprintf(outfile, "\n");
908     }
909     }
910     }
911     }
912    
913     /* Read data lines and test them */
914    
915     for (;;)
916     {
917 nigel 9 unsigned char *q;
918 nigel 35 unsigned char *bptr = dbuffer;
919 nigel 57 int *use_offsets = offsets;
920 nigel 53 int use_size_offsets = size_offsets;
921 nigel 63 int callout_data = 0;
922     int callout_data_set = 0;
923 nigel 3 int count, c;
924 nigel 29 int copystrings = 0;
925 nigel 63 int find_match_limit = 0;
926 nigel 29 int getstrings = 0;
927     int getlist = 0;
928 nigel 39 int gmatched = 0;
929 nigel 35 int start_offset = 0;
930 nigel 41 int g_notempty = 0;
931 nigel 3
932     options = 0;
933    
934 nigel 63 pcre_callout = callout;
935     first_callout = 1;
936     callout_extra = 0;
937     callout_count = 0;
938     callout_fail_count = 999999;
939     callout_fail_id = -1;
940    
941 nigel 35 if (infile == stdin) printf("data> ");
942 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
943 nigel 11 {
944     done = 1;
945     goto CONTINUE;
946     }
947 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
948 nigel 3
949     len = (int)strlen((char *)buffer);
950     while (len > 0 && isspace(buffer[len-1])) len--;
951     buffer[len] = 0;
952     if (len == 0) break;
953    
954     p = buffer;
955     while (isspace(*p)) p++;
956    
957 nigel 9 q = dbuffer;
958 nigel 3 while ((c = *p++) != 0)
959     {
960     int i = 0;
961     int n = 0;
962 nigel 63
963 nigel 3 if (c == '\\') switch ((c = *p++))
964     {
965     case 'a': c = 7; break;
966     case 'b': c = '\b'; break;
967     case 'e': c = 27; break;
968     case 'f': c = '\f'; break;
969     case 'n': c = '\n'; break;
970     case 'r': c = '\r'; break;
971     case 't': c = '\t'; break;
972     case 'v': c = '\v'; break;
973    
974     case '0': case '1': case '2': case '3':
975     case '4': case '5': case '6': case '7':
976     c -= '0';
977     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
978     c = c * 8 + *p++ - '0';
979     break;
980    
981     case 'x':
982 nigel 49
983     /* Handle \x{..} specially - new Perl thing for utf8 */
984    
985     if (*p == '{')
986     {
987     unsigned char *pt = p;
988     c = 0;
989     while (isxdigit(*(++pt)))
990     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
991     if (*pt == '}')
992     {
993 nigel 67 unsigned char buff8[8];
994 nigel 49 int ii, utn;
995 nigel 67 utn = ord2utf8(c, buff8);
996     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
997     c = buff8[ii]; /* Last byte */
998 nigel 49 p = pt + 1;
999     break;
1000     }
1001     /* Not correct form; fall through */
1002     }
1003    
1004     /* Ordinary \x */
1005    
1006 nigel 3 c = 0;
1007     while (i++ < 2 && isxdigit(*p))
1008     {
1009     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1010     p++;
1011     }
1012     break;
1013    
1014     case 0: /* Allows for an empty line */
1015     p--;
1016     continue;
1017    
1018     case 'A': /* Option setting */
1019     options |= PCRE_ANCHORED;
1020     continue;
1021    
1022     case 'B':
1023     options |= PCRE_NOTBOL;
1024     continue;
1025    
1026 nigel 29 case 'C':
1027 nigel 63 if (isdigit(*p)) /* Set copy string */
1028     {
1029     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1030     copystrings |= 1 << n;
1031     }
1032     else if (isalnum(*p))
1033     {
1034     uschar name[256];
1035 nigel 67 uschar *npp = name;
1036     while (isalnum(*p)) *npp++ = *p++;
1037     *npp = 0;
1038 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1039 nigel 63 if (n < 0)
1040     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1041     else copystrings |= 1 << n;
1042     }
1043     else if (*p == '+')
1044     {
1045     callout_extra = 1;
1046     p++;
1047     }
1048     else if (*p == '-')
1049     {
1050     pcre_callout = NULL;
1051     p++;
1052     }
1053     else if (*p == '!')
1054     {
1055     callout_fail_id = 0;
1056     p++;
1057     while(isdigit(*p))
1058     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1059     callout_fail_count = 0;
1060     if (*p == '!')
1061     {
1062     p++;
1063     while(isdigit(*p))
1064     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1065     }
1066     }
1067     else if (*p == '*')
1068     {
1069     int sign = 1;
1070     callout_data = 0;
1071     if (*(++p) == '-') { sign = -1; p++; }
1072     while(isdigit(*p))
1073     callout_data = callout_data * 10 + *p++ - '0';
1074     callout_data *= sign;
1075     callout_data_set = 1;
1076     }
1077 nigel 29 continue;
1078    
1079     case 'G':
1080 nigel 63 if (isdigit(*p))
1081     {
1082     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1083     getstrings |= 1 << n;
1084     }
1085     else if (isalnum(*p))
1086     {
1087     uschar name[256];
1088 nigel 67 uschar *npp = name;
1089     while (isalnum(*p)) *npp++ = *p++;
1090     *npp = 0;
1091 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1092 nigel 63 if (n < 0)
1093     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1094     else getstrings |= 1 << n;
1095     }
1096 nigel 29 continue;
1097    
1098     case 'L':
1099     getlist = 1;
1100     continue;
1101    
1102 nigel 63 case 'M':
1103     find_match_limit = 1;
1104     continue;
1105    
1106 nigel 37 case 'N':
1107     options |= PCRE_NOTEMPTY;
1108     continue;
1109    
1110 nigel 3 case 'O':
1111     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1112 nigel 53 if (n > size_offsets_max)
1113     {
1114     size_offsets_max = n;
1115 nigel 57 free(offsets);
1116 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1117 nigel 53 if (offsets == NULL)
1118     {
1119     printf("** Failed to get %d bytes of memory for offsets vector\n",
1120     size_offsets_max * sizeof(int));
1121     return 1;
1122     }
1123     }
1124     use_size_offsets = n;
1125 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1126 nigel 3 continue;
1127    
1128     case 'Z':
1129     options |= PCRE_NOTEOL;
1130     continue;
1131 nigel 71
1132     case '?':
1133     options |= PCRE_NO_UTF8_CHECK;
1134     continue;
1135 nigel 3 }
1136 nigel 9 *q++ = c;
1137 nigel 3 }
1138 nigel 9 *q = 0;
1139     len = q - dbuffer;
1140 nigel 3
1141     /* Handle matching via the POSIX interface, which does not
1142 nigel 63 support timing or playing with the match limit or callout data. */
1143 nigel 3
1144 nigel 37 #if !defined NOPOSIX
1145 nigel 3 if (posix || do_posix)
1146     {
1147     int rc;
1148     int eflags = 0;
1149 nigel 63 regmatch_t *pmatch = NULL;
1150     if (use_size_offsets > 0)
1151 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1152 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1153     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1154    
1155 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1156 nigel 3
1157     if (rc != 0)
1158     {
1159 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1160 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1161     }
1162     else
1163     {
1164 nigel 7 size_t i;
1165 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1166 nigel 3 {
1167     if (pmatch[i].rm_so >= 0)
1168     {
1169 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1170 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1171     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1172 nigel 3 fprintf(outfile, "\n");
1173 nigel 35 if (i == 0 && do_showrest)
1174     {
1175     fprintf(outfile, " 0+ ");
1176 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1177     outfile);
1178 nigel 35 fprintf(outfile, "\n");
1179     }
1180 nigel 3 }
1181     }
1182     }
1183 nigel 53 free(pmatch);
1184 nigel 3 }
1185    
1186 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1187 nigel 3
1188 nigel 37 else
1189     #endif /* !defined NOPOSIX */
1190    
1191 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1192 nigel 3 {
1193     if (timeit)
1194     {
1195     register int i;
1196     clock_t time_taken;
1197     clock_t start_time = clock();
1198 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1199 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1200 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1201 nigel 3 time_taken = clock() - start_time;
1202 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1203 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1204     (double)CLOCKS_PER_SEC);
1205 nigel 3 }
1206    
1207 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1208     varying limits in order to find the minimum value. */
1209    
1210     if (find_match_limit)
1211     {
1212     int min = 0;
1213     int mid = 64;
1214     int max = -1;
1215    
1216     if (extra == NULL)
1217     {
1218 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1219 nigel 63 extra->flags = 0;
1220     }
1221     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1222    
1223     for (;;)
1224     {
1225     extra->match_limit = mid;
1226     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1227     options | g_notempty, use_offsets, use_size_offsets);
1228     if (count == PCRE_ERROR_MATCHLIMIT)
1229     {
1230     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1231     min = mid;
1232     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1233     }
1234     else if (count >= 0 || count == PCRE_ERROR_NOMATCH)
1235     {
1236     if (mid == min + 1)
1237     {
1238     fprintf(outfile, "Minimum match limit = %d\n", mid);
1239     break;
1240     }
1241     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1242     max = mid;
1243     mid = (min + mid)/2;
1244     }
1245     else break; /* Some other error */
1246     }
1247    
1248     extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1249     }
1250    
1251     /* If callout_data is set, use the interface with additional data */
1252    
1253     else if (callout_data_set)
1254     {
1255     if (extra == NULL)
1256     {
1257 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1258 nigel 63 extra->flags = 0;
1259     }
1260     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1261 nigel 71 extra->callout_data = &callout_data;
1262 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1263     options | g_notempty, use_offsets, use_size_offsets);
1264     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1265     }
1266    
1267     /* The normal case is just to do the match once, with the default
1268     value of match_limit. */
1269    
1270     else count = pcre_exec(re, extra, (char *)bptr, len,
1271 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1272 nigel 3
1273     if (count == 0)
1274     {
1275     fprintf(outfile, "Matched, but too many substrings\n");
1276 nigel 53 count = use_size_offsets/3;
1277 nigel 3 }
1278    
1279 nigel 39 /* Matched */
1280    
1281 nigel 3 if (count >= 0)
1282     {
1283     int i;
1284 nigel 29 for (i = 0; i < count * 2; i += 2)
1285 nigel 3 {
1286 nigel 57 if (use_offsets[i] < 0)
1287 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1288     else
1289     {
1290     fprintf(outfile, "%2d: ", i/2);
1291 nigel 63 (void)pchars(bptr + use_offsets[i],
1292     use_offsets[i+1] - use_offsets[i], outfile);
1293 nigel 3 fprintf(outfile, "\n");
1294 nigel 35 if (i == 0)
1295     {
1296     if (do_showrest)
1297     {
1298     fprintf(outfile, " 0+ ");
1299 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1300     outfile);
1301 nigel 35 fprintf(outfile, "\n");
1302     }
1303     }
1304 nigel 3 }
1305     }
1306 nigel 29
1307     for (i = 0; i < 32; i++)
1308     {
1309     if ((copystrings & (1 << i)) != 0)
1310     {
1311 nigel 37 char copybuffer[16];
1312 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1313 nigel 37 i, copybuffer, sizeof(copybuffer));
1314 nigel 29 if (rc < 0)
1315     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1316     else
1317 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1318 nigel 29 }
1319     }
1320    
1321     for (i = 0; i < 32; i++)
1322     {
1323     if ((getstrings & (1 << i)) != 0)
1324     {
1325     const char *substring;
1326 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1327 nigel 29 i, &substring);
1328     if (rc < 0)
1329     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1330     else
1331     {
1332     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1333 nigel 49 /* free((void *)substring); */
1334     pcre_free_substring(substring);
1335 nigel 29 }
1336     }
1337     }
1338    
1339     if (getlist)
1340     {
1341     const char **stringlist;
1342 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1343 nigel 29 &stringlist);
1344     if (rc < 0)
1345     fprintf(outfile, "get substring list failed %d\n", rc);
1346     else
1347     {
1348     for (i = 0; i < count; i++)
1349     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1350     if (stringlist[i] != NULL)
1351     fprintf(outfile, "string list not terminated by NULL\n");
1352 nigel 49 /* free((void *)stringlist); */
1353     pcre_free_substring_list(stringlist);
1354 nigel 29 }
1355     }
1356 nigel 39 }
1357 nigel 29
1358 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1359 nigel 47 g_notempty after a null match, this is not necessarily the end.
1360 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1361     values to achieve this. We won't be at the end of the string - that
1362 nigel 47 was checked before setting g_notempty. */
1363 nigel 39
1364 nigel 3 else
1365     {
1366 nigel 41 if (g_notempty != 0)
1367 nigel 35 {
1368 nigel 57 use_offsets[0] = start_offset;
1369     use_offsets[1] = start_offset + 1;
1370 nigel 35 }
1371 nigel 41 else
1372     {
1373     if (gmatched == 0) /* Error if no previous matches */
1374     {
1375     if (count == -1) fprintf(outfile, "No match\n");
1376     else fprintf(outfile, "Error %d\n", count);
1377     }
1378     break; /* Out of the /g loop */
1379     }
1380 nigel 3 }
1381 nigel 35
1382 nigel 39 /* If not /g or /G we are done */
1383    
1384     if (!do_g && !do_G) break;
1385    
1386 nigel 41 /* If we have matched an empty string, first check to see if we are at
1387     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1388     what Perl's /g options does. This turns out to be rather cunning. First
1389 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1390     same point. If this fails (picked up above) we advance to the next
1391     character. */
1392 nigel 39
1393 nigel 41 g_notempty = 0;
1394 nigel 57 if (use_offsets[0] == use_offsets[1])
1395 nigel 41 {
1396 nigel 57 if (use_offsets[0] == len) break;
1397 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1398 nigel 41 }
1399 nigel 39
1400     /* For /g, update the start offset, leaving the rest alone */
1401    
1402 nigel 57 if (do_g) start_offset = use_offsets[1];
1403 nigel 39
1404     /* For /G, update the pointer and length */
1405    
1406     else
1407 nigel 35 {
1408 nigel 57 bptr += use_offsets[1];
1409     len -= use_offsets[1];
1410 nigel 35 }
1411 nigel 39 } /* End of loop for /g and /G */
1412     } /* End of loop for data lines */
1413 nigel 3
1414 nigel 11 CONTINUE:
1415 nigel 37
1416     #if !defined NOPOSIX
1417 nigel 3 if (posix || do_posix) regfree(&preg);
1418 nigel 37 #endif
1419    
1420 nigel 3 if (re != NULL) free(re);
1421     if (extra != NULL) free(extra);
1422 nigel 25 if (tables != NULL)
1423     {
1424     free((void *)tables);
1425     setlocale(LC_CTYPE, "C");
1426     }
1427 nigel 3 }
1428    
1429     fprintf(outfile, "\n");
1430     return 0;
1431     }
1432    
1433     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12