/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 41 - (hide annotations) (download)
Sat Feb 24 21:39:17 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 27307 byte(s)
Load pcre-2.08a into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15 nigel 37
16     /* It is possible to compile this test program without including support for
17     testing the POSIX interface, though this is not available via the standard
18     Makefile. */
19    
20     #if !defined NOPOSIX
21 nigel 3 #include "pcreposix.h"
22 nigel 37 #endif
23 nigel 3
24     #ifndef CLOCKS_PER_SEC
25     #ifdef CLK_TCK
26     #define CLOCKS_PER_SEC CLK_TCK
27     #else
28     #define CLOCKS_PER_SEC 100
29     #endif
30     #endif
31    
32 nigel 27 #define LOOPREPEAT 20000
33 nigel 3
34 nigel 23
35 nigel 3 static FILE *outfile;
36     static int log_store = 0;
37    
38    
39    
40     /* Debugging function to print the internal form of the regex. This is the same
41     code as contained in pcre.c under the DEBUG macro. */
42    
43 nigel 7 static const char *OP_names[] = {
44     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
45 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
46     "Opt", "^", "$", "Any", "chars", "not",
47 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
48     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50     "*", "*?", "+", "+?", "?", "??", "{", "{",
51 nigel 23 "class", "Ref",
52     "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
53     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
54 nigel 3 "Brazero", "Braminzero", "Bra"
55     };
56    
57    
58 nigel 37 static void print_internals(pcre *re)
59 nigel 3 {
60     unsigned char *code = ((real_pcre *)re)->code;
61    
62 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
63 nigel 3
64     for(;;)
65     {
66     int c;
67     int charlength;
68    
69 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
70 nigel 3
71     if (*code >= OP_BRA)
72     {
73 nigel 23 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
74 nigel 3 code += 2;
75     }
76    
77     else switch(*code)
78     {
79     case OP_END:
80 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
81     fprintf(outfile, "------------------------------------------------------------------\n");
82 nigel 3 return;
83    
84 nigel 23 case OP_OPT:
85     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
86     code++;
87     break;
88    
89     case OP_COND:
90     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
91     code += 2;
92     break;
93    
94     case OP_CREF:
95     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
96     code++;
97     break;
98    
99 nigel 3 case OP_CHARS:
100     charlength = *(++code);
101 nigel 23 fprintf(outfile, "%3d ", charlength);
102 nigel 3 while (charlength-- > 0)
103 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
104     else fprintf(outfile, "\\x%02x", c);
105 nigel 3 break;
106    
107     case OP_KETRMAX:
108     case OP_KETRMIN:
109     case OP_ALT:
110     case OP_KET:
111     case OP_ASSERT:
112     case OP_ASSERT_NOT:
113 nigel 23 case OP_ASSERTBACK:
114     case OP_ASSERTBACK_NOT:
115 nigel 3 case OP_ONCE:
116 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
117 nigel 3 code += 2;
118     break;
119    
120 nigel 23 case OP_REVERSE:
121     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
122     code += 2;
123     break;
124    
125 nigel 3 case OP_STAR:
126     case OP_MINSTAR:
127     case OP_PLUS:
128     case OP_MINPLUS:
129     case OP_QUERY:
130     case OP_MINQUERY:
131     case OP_TYPESTAR:
132     case OP_TYPEMINSTAR:
133     case OP_TYPEPLUS:
134     case OP_TYPEMINPLUS:
135     case OP_TYPEQUERY:
136     case OP_TYPEMINQUERY:
137     if (*code >= OP_TYPESTAR)
138 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
139     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
140     else fprintf(outfile, " \\x%02x", c);
141     fprintf(outfile, "%s", OP_names[*code++]);
142 nigel 3 break;
143    
144     case OP_EXACT:
145     case OP_UPTO:
146     case OP_MINUPTO:
147 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
148     else fprintf(outfile, " \\x%02x{", c);
149     if (*code != OP_EXACT) fprintf(outfile, ",");
150     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
151     if (*code == OP_MINUPTO) fprintf(outfile, "?");
152 nigel 3 code += 3;
153     break;
154    
155     case OP_TYPEEXACT:
156     case OP_TYPEUPTO:
157     case OP_TYPEMINUPTO:
158 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
159     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
160     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
161     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
162 nigel 3 code += 3;
163     break;
164    
165     case OP_NOT:
166 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
167     else fprintf(outfile, " [^\\x%02x]", c);
168 nigel 3 break;
169    
170     case OP_NOTSTAR:
171     case OP_NOTMINSTAR:
172     case OP_NOTPLUS:
173     case OP_NOTMINPLUS:
174     case OP_NOTQUERY:
175     case OP_NOTMINQUERY:
176 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
177     else fprintf(outfile, " [^\\x%02x]", c);
178     fprintf(outfile, "%s", OP_names[*code++]);
179 nigel 3 break;
180    
181     case OP_NOTEXACT:
182     case OP_NOTUPTO:
183     case OP_NOTMINUPTO:
184 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
185     else fprintf(outfile, " [^\\x%02x]{", c);
186     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
187     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
188     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
189 nigel 3 code += 3;
190     break;
191    
192     case OP_REF:
193 nigel 23 fprintf(outfile, " \\%d", *(++code));
194 nigel 9 code++;
195     goto CLASS_REF_REPEAT;
196 nigel 3
197     case OP_CLASS:
198     {
199     int i, min, max;
200 nigel 23 code++;
201     fprintf(outfile, " [");
202 nigel 3
203     for (i = 0; i < 256; i++)
204     {
205     if ((code[i/8] & (1 << (i&7))) != 0)
206     {
207     int j;
208     for (j = i+1; j < 256; j++)
209     if ((code[j/8] & (1 << (j&7))) == 0) break;
210 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
211     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
212 nigel 3 if (--j > i)
213     {
214 nigel 23 fprintf(outfile, "-");
215     if (j == '-' || j == ']') fprintf(outfile, "\\");
216     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
217 nigel 3 }
218     i = j;
219     }
220     }
221 nigel 23 fprintf(outfile, "]");
222 nigel 3 code += 32;
223    
224 nigel 9 CLASS_REF_REPEAT:
225    
226 nigel 3 switch(*code)
227     {
228     case OP_CRSTAR:
229     case OP_CRMINSTAR:
230     case OP_CRPLUS:
231     case OP_CRMINPLUS:
232     case OP_CRQUERY:
233     case OP_CRMINQUERY:
234 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
235 nigel 3 break;
236    
237     case OP_CRRANGE:
238     case OP_CRMINRANGE:
239     min = (code[1] << 8) + code[2];
240     max = (code[3] << 8) + code[4];
241 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
242     else fprintf(outfile, "{%d,%d}", min, max);
243     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
244 nigel 3 code += 4;
245     break;
246    
247     default:
248     code--;
249     }
250     }
251     break;
252    
253     /* Anything else is just a one-node item */
254    
255     default:
256 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
257 nigel 3 break;
258     }
259    
260     code++;
261 nigel 23 fprintf(outfile, "\n");
262 nigel 3 }
263     }
264    
265    
266    
267     /* Character string printing function. */
268    
269     static void pchars(unsigned char *p, int length)
270     {
271     int c;
272     while (length-- > 0)
273     if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
274     else fprintf(outfile, "\\x%02x", c);
275     }
276    
277    
278    
279     /* Alternative malloc function, to test functionality and show the size of the
280     compiled re. */
281    
282     static void *new_malloc(size_t size)
283     {
284 nigel 31 if (log_store)
285 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
286     (int)((int)size - offsetof(real_pcre, code[0])));
287 nigel 3 return malloc(size);
288     }
289    
290    
291    
292     /* Read lines from named file or stdin and write to named file or stdout; lines
293     consist of a regular expression, in delimiters and optionally followed by
294     options, followed by a set of test data, terminated by an empty line. */
295    
296     int main(int argc, char **argv)
297     {
298     FILE *infile = stdin;
299     int options = 0;
300     int study_options = 0;
301     int op = 1;
302     int timeit = 0;
303     int showinfo = 0;
304 nigel 31 int showstore = 0;
305 nigel 3 int posix = 0;
306     int debug = 0;
307 nigel 11 int done = 0;
308 nigel 3 unsigned char buffer[30000];
309     unsigned char dbuffer[1024];
310    
311     /* Static so that new_malloc can use it. */
312    
313     outfile = stdout;
314    
315     /* Scan options */
316    
317     while (argc > 1 && argv[op][0] == '-')
318     {
319 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
320     showstore = 1;
321 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
322     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
323     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
324     else if (strcmp(argv[op], "-p") == 0) posix = 1;
325     else
326     {
327     printf("*** Unknown option %s\n", argv[op]);
328 nigel 25 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
329     printf(" -d debug: show compiled code; implies -i\n"
330     " -i show information about compiled pattern\n"
331     " -p use POSIX interface\n"
332     " -s output store information\n"
333     " -t time compilation and execution\n");
334 nigel 3 return 1;
335     }
336     op++;
337     argc--;
338     }
339    
340     /* Sort out the input and output files */
341    
342     if (argc > 1)
343     {
344     infile = fopen(argv[op], "r");
345     if (infile == NULL)
346     {
347     printf("** Failed to open %s\n", argv[op]);
348     return 1;
349     }
350     }
351    
352     if (argc > 2)
353     {
354     outfile = fopen(argv[op+1], "w");
355     if (outfile == NULL)
356     {
357     printf("** Failed to open %s\n", argv[op+1]);
358     return 1;
359     }
360     }
361    
362     /* Set alternative malloc function */
363    
364     pcre_malloc = new_malloc;
365    
366 nigel 23 /* Heading line, then prompt for first regex if stdin */
367 nigel 3
368     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
369    
370     /* Main loop */
371    
372 nigel 11 while (!done)
373 nigel 3 {
374     pcre *re = NULL;
375     pcre_extra *extra = NULL;
376 nigel 37
377     #if !defined NOPOSIX /* There are still compilers that require no indent */
378 nigel 3 regex_t preg;
379 nigel 37 #endif
380    
381 nigel 7 const char *error;
382 nigel 25 unsigned char *p, *pp, *ppp;
383     unsigned const char *tables = NULL;
384 nigel 3 int do_study = 0;
385 nigel 25 int do_debug = debug;
386 nigel 35 int do_G = 0;
387     int do_g = 0;
388 nigel 25 int do_showinfo = showinfo;
389 nigel 35 int do_showrest = 0;
390 nigel 3 int do_posix = 0;
391     int erroroffset, len, delimiter;
392    
393     if (infile == stdin) printf(" re> ");
394     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
395 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
396 nigel 3
397     p = buffer;
398     while (isspace(*p)) p++;
399     if (*p == 0) continue;
400    
401     /* Get the delimiter and seek the end of the pattern; if is isn't
402     complete, read more. */
403    
404     delimiter = *p++;
405    
406 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
407 nigel 3 {
408 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
409 nigel 3 goto SKIP_DATA;
410     }
411    
412     pp = p;
413    
414     for(;;)
415     {
416 nigel 29 while (*pp != 0)
417     {
418     if (*pp == '\\' && pp[1] != 0) pp++;
419     else if (*pp == delimiter) break;
420     pp++;
421     }
422 nigel 3 if (*pp != 0) break;
423    
424     len = sizeof(buffer) - (pp - buffer);
425     if (len < 256)
426     {
427     fprintf(outfile, "** Expression too long - missing delimiter?\n");
428     goto SKIP_DATA;
429     }
430    
431     if (infile == stdin) printf(" > ");
432     if (fgets((char *)pp, len, infile) == NULL)
433     {
434     fprintf(outfile, "** Unexpected EOF\n");
435 nigel 11 done = 1;
436     goto CONTINUE;
437 nigel 3 }
438 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
439 nigel 3 }
440    
441 nigel 29 /* If the first character after the delimiter is backslash, make
442     the pattern end with backslash. This is purely to provide a way
443     of testing for the error message when a pattern ends with backslash. */
444    
445     if (pp[1] == '\\') *pp++ = '\\';
446    
447 nigel 3 /* Terminate the pattern at the delimiter */
448    
449     *pp++ = 0;
450    
451     /* Look for options after final delimiter */
452    
453     options = 0;
454     study_options = 0;
455 nigel 31 log_store = showstore; /* default from command line */
456    
457 nigel 3 while (*pp != 0)
458     {
459     switch (*pp++)
460     {
461 nigel 35 case 'g': do_g = 1; break;
462 nigel 3 case 'i': options |= PCRE_CASELESS; break;
463     case 'm': options |= PCRE_MULTILINE; break;
464     case 's': options |= PCRE_DOTALL; break;
465     case 'x': options |= PCRE_EXTENDED; break;
466 nigel 25
467 nigel 35 case '+': do_showrest = 1; break;
468 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
469 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
470 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
471 nigel 35 case 'G': do_G = 1; break;
472 nigel 25 case 'I': do_showinfo = 1; break;
473 nigel 31 case 'M': log_store = 1; break;
474 nigel 37
475     #if !defined NOPOSIX
476 nigel 3 case 'P': do_posix = 1; break;
477 nigel 37 #endif
478    
479 nigel 3 case 'S': do_study = 1; break;
480 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
481 nigel 3 case 'X': options |= PCRE_EXTRA; break;
482 nigel 25
483     case 'L':
484     ppp = pp;
485     while (*ppp != '\n' && *ppp != ' ') ppp++;
486     *ppp = 0;
487     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
488     {
489     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
490     goto SKIP_DATA;
491     }
492     tables = pcre_maketables();
493     pp = ppp;
494     break;
495    
496 nigel 3 case '\n': case ' ': break;
497     default:
498     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
499     goto SKIP_DATA;
500     }
501     }
502    
503 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
504 nigel 25 timing, showing, or debugging options, nor the ability to pass over
505     local character tables. */
506 nigel 3
507 nigel 37 #if !defined NOPOSIX
508 nigel 3 if (posix || do_posix)
509     {
510     int rc;
511     int cflags = 0;
512     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
513     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
514     rc = regcomp(&preg, (char *)p, cflags);
515    
516     /* Compilation failed; go back for another re, skipping to blank line
517     if non-interactive. */
518    
519     if (rc != 0)
520     {
521     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
522     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
523     goto SKIP_DATA;
524     }
525     }
526    
527     /* Handle compiling via the native interface */
528    
529     else
530 nigel 37 #endif /* !defined NOPOSIX */
531    
532 nigel 3 {
533     if (timeit)
534     {
535     register int i;
536     clock_t time_taken;
537     clock_t start_time = clock();
538 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
539 nigel 3 {
540 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
541 nigel 3 if (re != NULL) free(re);
542     }
543     time_taken = clock() - start_time;
544 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
545     ((double)time_taken * 1000.0) /
546     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
547 nigel 3 }
548    
549 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
550 nigel 3
551     /* Compilation failed; go back for another re, skipping to blank line
552     if non-interactive. */
553    
554     if (re == NULL)
555     {
556     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
557     SKIP_DATA:
558     if (infile != stdin)
559     {
560     for (;;)
561     {
562     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
563 nigel 11 {
564     done = 1;
565     goto CONTINUE;
566     }
567 nigel 3 len = (int)strlen((char *)buffer);
568     while (len > 0 && isspace(buffer[len-1])) len--;
569     if (len == 0) break;
570     }
571     fprintf(outfile, "\n");
572     }
573 nigel 25 goto CONTINUE;
574 nigel 3 }
575    
576     /* Compilation succeeded; print data if required */
577    
578 nigel 25 if (do_showinfo)
579 nigel 3 {
580     int first_char, count;
581    
582 nigel 37 if (do_debug) print_internals(re);
583 nigel 3
584     count = pcre_info(re, &options, &first_char);
585     if (count < 0) fprintf(outfile,
586     "Error %d while reading info\n", count);
587     else
588     {
589     fprintf(outfile, "Identifying subpattern count = %d\n", count);
590     if (options == 0) fprintf(outfile, "No options\n");
591 nigel 19 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
592 nigel 3 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
593     ((options & PCRE_CASELESS) != 0)? " caseless" : "",
594     ((options & PCRE_EXTENDED) != 0)? " extended" : "",
595     ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
596     ((options & PCRE_DOTALL) != 0)? " dotall" : "",
597     ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
598 nigel 19 ((options & PCRE_EXTRA) != 0)? " extra" : "",
599     ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
600 nigel 37
601     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
602     fprintf(outfile, "Case state changes\n");
603    
604 nigel 3 if (first_char == -1)
605     {
606     fprintf(outfile, "First char at start or follows \\n\n");
607     }
608     else if (first_char < 0)
609     {
610     fprintf(outfile, "No first char\n");
611     }
612     else
613     {
614     if (isprint(first_char))
615     fprintf(outfile, "First char = \'%c\'\n", first_char);
616     else
617     fprintf(outfile, "First char = %d\n", first_char);
618     }
619 nigel 37
620     if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)
621     {
622     int req_char = ((real_pcre *)re)->req_char;
623     if (isprint(req_char))
624     fprintf(outfile, "Req char = \'%c\'\n", req_char);
625     else
626     fprintf(outfile, "Req char = %d\n", req_char);
627     }
628     else fprintf(outfile, "No req char\n");
629 nigel 3 }
630     }
631    
632     /* If /S was present, study the regexp to generate additional info to
633     help with the matching. */
634    
635     if (do_study)
636     {
637     if (timeit)
638     {
639     register int i;
640     clock_t time_taken;
641     clock_t start_time = clock();
642 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
643 nigel 3 extra = pcre_study(re, study_options, &error);
644     time_taken = clock() - start_time;
645     if (extra != NULL) free(extra);
646 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
647     ((double)time_taken * 1000.0)/
648     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
649 nigel 3 }
650    
651     extra = pcre_study(re, study_options, &error);
652     if (error != NULL)
653     fprintf(outfile, "Failed to study: %s\n", error);
654     else if (extra == NULL)
655     fprintf(outfile, "Study returned NULL\n");
656    
657     /* This looks at internal information. A bit kludgy to do it this
658     way, but it is useful for testing. */
659    
660 nigel 25 else if (do_showinfo)
661 nigel 3 {
662     real_pcre_extra *xx = (real_pcre_extra *)extra;
663     if ((xx->options & PCRE_STUDY_MAPPED) == 0)
664     fprintf(outfile, "No starting character set\n");
665     else
666     {
667     int i;
668     int c = 24;
669     fprintf(outfile, "Starting character set: ");
670     for (i = 0; i < 256; i++)
671     {
672     if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
673     {
674     if (c > 75)
675     {
676     fprintf(outfile, "\n ");
677     c = 2;
678     }
679     if (isprint(i) && i != ' ')
680     {
681     fprintf(outfile, "%c ", i);
682     c += 2;
683     }
684     else
685     {
686     fprintf(outfile, "\\x%02x ", i);
687     c += 5;
688     }
689     }
690     }
691     fprintf(outfile, "\n");
692     }
693     }
694     }
695     }
696    
697     /* Read data lines and test them */
698    
699     for (;;)
700     {
701 nigel 9 unsigned char *q;
702 nigel 35 unsigned char *bptr = dbuffer;
703 nigel 3 int count, c;
704 nigel 29 int copystrings = 0;
705     int getstrings = 0;
706     int getlist = 0;
707 nigel 39 int gmatched = 0;
708 nigel 35 int start_offset = 0;
709 nigel 41 int g_notempty = 0;
710 nigel 23 int offsets[45];
711 nigel 3 int size_offsets = sizeof(offsets)/sizeof(int);
712    
713     options = 0;
714    
715 nigel 35 if (infile == stdin) printf("data> ");
716 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
717     {
718     done = 1;
719     goto CONTINUE;
720     }
721 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
722 nigel 3
723     len = (int)strlen((char *)buffer);
724     while (len > 0 && isspace(buffer[len-1])) len--;
725     buffer[len] = 0;
726     if (len == 0) break;
727    
728     p = buffer;
729     while (isspace(*p)) p++;
730    
731 nigel 9 q = dbuffer;
732 nigel 3 while ((c = *p++) != 0)
733     {
734     int i = 0;
735     int n = 0;
736     if (c == '\\') switch ((c = *p++))
737     {
738     case 'a': c = 7; break;
739     case 'b': c = '\b'; break;
740     case 'e': c = 27; break;
741     case 'f': c = '\f'; break;
742     case 'n': c = '\n'; break;
743     case 'r': c = '\r'; break;
744     case 't': c = '\t'; break;
745     case 'v': c = '\v'; break;
746    
747     case '0': case '1': case '2': case '3':
748     case '4': case '5': case '6': case '7':
749     c -= '0';
750     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
751     c = c * 8 + *p++ - '0';
752     break;
753    
754     case 'x':
755     c = 0;
756     while (i++ < 2 && isxdigit(*p))
757     {
758     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
759     p++;
760     }
761     break;
762    
763     case 0: /* Allows for an empty line */
764     p--;
765     continue;
766    
767     case 'A': /* Option setting */
768     options |= PCRE_ANCHORED;
769     continue;
770    
771     case 'B':
772     options |= PCRE_NOTBOL;
773     continue;
774    
775 nigel 29 case 'C':
776     while(isdigit(*p)) n = n * 10 + *p++ - '0';
777     copystrings |= 1 << n;
778     continue;
779    
780     case 'G':
781     while(isdigit(*p)) n = n * 10 + *p++ - '0';
782     getstrings |= 1 << n;
783     continue;
784    
785     case 'L':
786     getlist = 1;
787     continue;
788    
789 nigel 37 case 'N':
790     options |= PCRE_NOTEMPTY;
791     continue;
792    
793 nigel 3 case 'O':
794     while(isdigit(*p)) n = n * 10 + *p++ - '0';
795 nigel 9 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
796 nigel 3 continue;
797    
798     case 'Z':
799     options |= PCRE_NOTEOL;
800     continue;
801     }
802 nigel 9 *q++ = c;
803 nigel 3 }
804 nigel 9 *q = 0;
805     len = q - dbuffer;
806 nigel 3
807     /* Handle matching via the POSIX interface, which does not
808     support timing. */
809    
810 nigel 37 #if !defined NOPOSIX
811 nigel 3 if (posix || do_posix)
812     {
813     int rc;
814     int eflags = 0;
815 nigel 41 regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
816 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
817     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
818    
819 nigel 41 rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
820 nigel 3
821     if (rc != 0)
822     {
823     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
824     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
825     }
826     else
827     {
828 nigel 7 size_t i;
829 nigel 41 for (i = 0; i < size_offsets; i++)
830 nigel 3 {
831     if (pmatch[i].rm_so >= 0)
832     {
833 nigel 23 fprintf(outfile, "%2d: ", (int)i);
834 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
835     pmatch[i].rm_eo - pmatch[i].rm_so);
836     fprintf(outfile, "\n");
837 nigel 35 if (i == 0 && do_showrest)
838     {
839     fprintf(outfile, " 0+ ");
840     pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
841     fprintf(outfile, "\n");
842     }
843 nigel 3 }
844     }
845     }
846     }
847    
848 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
849 nigel 3
850 nigel 37 else
851     #endif /* !defined NOPOSIX */
852    
853 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
854 nigel 3 {
855     if (timeit)
856     {
857     register int i;
858     clock_t time_taken;
859     clock_t start_time = clock();
860 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
861 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
862 nigel 41 start_offset, options | g_notempty, offsets, size_offsets);
863 nigel 3 time_taken = clock() - start_time;
864 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
865     ((double)time_taken * 1000.0)/
866     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
867 nigel 3 }
868    
869 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
870 nigel 41 start_offset, options | g_notempty, offsets, size_offsets);
871 nigel 3
872     if (count == 0)
873     {
874     fprintf(outfile, "Matched, but too many substrings\n");
875 nigel 23 count = size_offsets/3;
876 nigel 3 }
877    
878 nigel 39 /* Matched */
879    
880 nigel 3 if (count >= 0)
881     {
882     int i;
883 nigel 29 for (i = 0; i < count * 2; i += 2)
884 nigel 3 {
885     if (offsets[i] < 0)
886     fprintf(outfile, "%2d: <unset>\n", i/2);
887     else
888     {
889     fprintf(outfile, "%2d: ", i/2);
890 nigel 35 pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
891 nigel 3 fprintf(outfile, "\n");
892 nigel 35 if (i == 0)
893     {
894     if (do_showrest)
895     {
896     fprintf(outfile, " 0+ ");
897     pchars(bptr + offsets[i+1], len - offsets[i+1]);
898     fprintf(outfile, "\n");
899     }
900     }
901 nigel 3 }
902     }
903 nigel 29
904     for (i = 0; i < 32; i++)
905     {
906     if ((copystrings & (1 << i)) != 0)
907     {
908 nigel 37 char copybuffer[16];
909 nigel 35 int rc = pcre_copy_substring((char *)bptr, offsets, count,
910 nigel 37 i, copybuffer, sizeof(copybuffer));
911 nigel 29 if (rc < 0)
912     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
913     else
914 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
915 nigel 29 }
916     }
917    
918     for (i = 0; i < 32; i++)
919     {
920     if ((getstrings & (1 << i)) != 0)
921     {
922     const char *substring;
923 nigel 35 int rc = pcre_get_substring((char *)bptr, offsets, count,
924 nigel 29 i, &substring);
925     if (rc < 0)
926     fprintf(outfile, "get substring %d failed %d\n", i, rc);
927     else
928     {
929     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
930     free((void *)substring);
931     }
932     }
933     }
934    
935     if (getlist)
936     {
937     const char **stringlist;
938 nigel 35 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
939 nigel 29 &stringlist);
940     if (rc < 0)
941     fprintf(outfile, "get substring list failed %d\n", rc);
942     else
943     {
944     for (i = 0; i < count; i++)
945     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
946     if (stringlist[i] != NULL)
947     fprintf(outfile, "string list not terminated by NULL\n");
948     free((void *)stringlist);
949     }
950     }
951 nigel 39 }
952 nigel 29
953 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
954     PCRE_NOTEMPTY after a null match, this is not necessarily the end.
955     We want to advance the start offset, and continue. Fudge the offset
956     values to achieve this. We won't be at the end of the string - that
957     was checked before setting PCRE_NOTEMPTY. */
958 nigel 39
959 nigel 3 else
960     {
961 nigel 41 if (g_notempty != 0)
962 nigel 35 {
963 nigel 41 offsets[0] = start_offset;
964     offsets[1] = start_offset + 1;
965 nigel 35 }
966 nigel 41 else
967     {
968     if (gmatched == 0) /* Error if no previous matches */
969     {
970     if (count == -1) fprintf(outfile, "No match\n");
971     else fprintf(outfile, "Error %d\n", count);
972     }
973     break; /* Out of the /g loop */
974     }
975 nigel 3 }
976 nigel 35
977 nigel 39 /* If not /g or /G we are done */
978    
979     if (!do_g && !do_G) break;
980    
981 nigel 41 /* If we have matched an empty string, first check to see if we are at
982     the end of the subject. If so, the /g loop is over. Otherwise, mimic
983     what Perl's /g options does. This turns out to be rather cunning. First
984     we set PCRE_NOTEMPTY and try the match again at the same point. If this
985     fails (picked up above) we advance to the next character. */
986 nigel 39
987 nigel 41 g_notempty = 0;
988     if (offsets[0] == offsets[1])
989     {
990     if (offsets[0] == len) break;
991     g_notempty = PCRE_NOTEMPTY;
992     }
993 nigel 39
994     /* For /g, update the start offset, leaving the rest alone */
995    
996     if (do_g) start_offset = offsets[1];
997    
998     /* For /G, update the pointer and length */
999    
1000     else
1001 nigel 35 {
1002 nigel 39 bptr += offsets[1];
1003     len -= offsets[1];
1004 nigel 35 }
1005 nigel 39 } /* End of loop for /g and /G */
1006     } /* End of loop for data lines */
1007 nigel 3
1008 nigel 11 CONTINUE:
1009 nigel 37
1010     #if !defined NOPOSIX
1011 nigel 3 if (posix || do_posix) regfree(&preg);
1012 nigel 37 #endif
1013    
1014 nigel 3 if (re != NULL) free(re);
1015     if (extra != NULL) free(extra);
1016 nigel 25 if (tables != NULL)
1017     {
1018     free((void *)tables);
1019     setlocale(LC_CTYPE, "C");
1020     }
1021 nigel 3 }
1022    
1023     fprintf(outfile, "\n");
1024     return 0;
1025     }
1026    
1027     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12