/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 47 - (hide annotations) (download)
Sat Feb 24 21:39:29 2007 UTC (7 years, 5 months ago) by nigel
File MIME type: text/plain
File size: 28778 byte(s)
Load pcre-3.2 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15 nigel 37
16     /* It is possible to compile this test program without including support for
17     testing the POSIX interface, though this is not available via the standard
18     Makefile. */
19    
20     #if !defined NOPOSIX
21 nigel 3 #include "pcreposix.h"
22 nigel 37 #endif
23 nigel 3
24     #ifndef CLOCKS_PER_SEC
25     #ifdef CLK_TCK
26     #define CLOCKS_PER_SEC CLK_TCK
27     #else
28     #define CLOCKS_PER_SEC 100
29     #endif
30     #endif
31    
32 nigel 27 #define LOOPREPEAT 20000
33 nigel 3
34 nigel 23
35 nigel 3 static FILE *outfile;
36     static int log_store = 0;
37 nigel 43 static size_t gotten_store;
38 nigel 3
39    
40    
41     /* Debugging function to print the internal form of the regex. This is the same
42     code as contained in pcre.c under the DEBUG macro. */
43    
44 nigel 7 static const char *OP_names[] = {
45     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
46 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
47     "Opt", "^", "$", "Any", "chars", "not",
48 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
49     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
51     "*", "*?", "+", "+?", "?", "??", "{", "{",
52 nigel 43 "class", "Ref", "Recurse",
53 nigel 23 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
54     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
55 nigel 3 "Brazero", "Braminzero", "Bra"
56     };
57    
58    
59 nigel 37 static void print_internals(pcre *re)
60 nigel 3 {
61     unsigned char *code = ((real_pcre *)re)->code;
62    
63 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
64 nigel 3
65     for(;;)
66     {
67     int c;
68     int charlength;
69    
70 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
71 nigel 3
72     if (*code >= OP_BRA)
73     {
74 nigel 23 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
75 nigel 3 code += 2;
76     }
77    
78     else switch(*code)
79     {
80     case OP_END:
81 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
82     fprintf(outfile, "------------------------------------------------------------------\n");
83 nigel 3 return;
84    
85 nigel 23 case OP_OPT:
86     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
87     code++;
88     break;
89    
90     case OP_COND:
91     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
92     code += 2;
93     break;
94    
95     case OP_CREF:
96     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
97     code++;
98     break;
99    
100 nigel 3 case OP_CHARS:
101     charlength = *(++code);
102 nigel 23 fprintf(outfile, "%3d ", charlength);
103 nigel 3 while (charlength-- > 0)
104 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
105     else fprintf(outfile, "\\x%02x", c);
106 nigel 3 break;
107    
108     case OP_KETRMAX:
109     case OP_KETRMIN:
110     case OP_ALT:
111     case OP_KET:
112     case OP_ASSERT:
113     case OP_ASSERT_NOT:
114 nigel 23 case OP_ASSERTBACK:
115     case OP_ASSERTBACK_NOT:
116 nigel 3 case OP_ONCE:
117 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
118 nigel 3 code += 2;
119     break;
120    
121 nigel 23 case OP_REVERSE:
122     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
123     code += 2;
124     break;
125    
126 nigel 3 case OP_STAR:
127     case OP_MINSTAR:
128     case OP_PLUS:
129     case OP_MINPLUS:
130     case OP_QUERY:
131     case OP_MINQUERY:
132     case OP_TYPESTAR:
133     case OP_TYPEMINSTAR:
134     case OP_TYPEPLUS:
135     case OP_TYPEMINPLUS:
136     case OP_TYPEQUERY:
137     case OP_TYPEMINQUERY:
138     if (*code >= OP_TYPESTAR)
139 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
140     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
141     else fprintf(outfile, " \\x%02x", c);
142     fprintf(outfile, "%s", OP_names[*code++]);
143 nigel 3 break;
144    
145     case OP_EXACT:
146     case OP_UPTO:
147     case OP_MINUPTO:
148 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
149     else fprintf(outfile, " \\x%02x{", c);
150     if (*code != OP_EXACT) fprintf(outfile, ",");
151     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
152     if (*code == OP_MINUPTO) fprintf(outfile, "?");
153 nigel 3 code += 3;
154     break;
155    
156     case OP_TYPEEXACT:
157     case OP_TYPEUPTO:
158     case OP_TYPEMINUPTO:
159 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
160     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
161     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
162     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
163 nigel 3 code += 3;
164     break;
165    
166     case OP_NOT:
167 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
168     else fprintf(outfile, " [^\\x%02x]", c);
169 nigel 3 break;
170    
171     case OP_NOTSTAR:
172     case OP_NOTMINSTAR:
173     case OP_NOTPLUS:
174     case OP_NOTMINPLUS:
175     case OP_NOTQUERY:
176     case OP_NOTMINQUERY:
177 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
178     else fprintf(outfile, " [^\\x%02x]", c);
179     fprintf(outfile, "%s", OP_names[*code++]);
180 nigel 3 break;
181    
182     case OP_NOTEXACT:
183     case OP_NOTUPTO:
184     case OP_NOTMINUPTO:
185 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
186     else fprintf(outfile, " [^\\x%02x]{", c);
187     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
188     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
189     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
190 nigel 3 code += 3;
191     break;
192    
193     case OP_REF:
194 nigel 23 fprintf(outfile, " \\%d", *(++code));
195 nigel 9 code++;
196     goto CLASS_REF_REPEAT;
197 nigel 3
198     case OP_CLASS:
199     {
200     int i, min, max;
201 nigel 23 code++;
202     fprintf(outfile, " [");
203 nigel 3
204     for (i = 0; i < 256; i++)
205     {
206     if ((code[i/8] & (1 << (i&7))) != 0)
207     {
208     int j;
209     for (j = i+1; j < 256; j++)
210     if ((code[j/8] & (1 << (j&7))) == 0) break;
211 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
212     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
213 nigel 3 if (--j > i)
214     {
215 nigel 23 fprintf(outfile, "-");
216     if (j == '-' || j == ']') fprintf(outfile, "\\");
217     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
218 nigel 3 }
219     i = j;
220     }
221     }
222 nigel 23 fprintf(outfile, "]");
223 nigel 3 code += 32;
224    
225 nigel 9 CLASS_REF_REPEAT:
226    
227 nigel 3 switch(*code)
228     {
229     case OP_CRSTAR:
230     case OP_CRMINSTAR:
231     case OP_CRPLUS:
232     case OP_CRMINPLUS:
233     case OP_CRQUERY:
234     case OP_CRMINQUERY:
235 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
236 nigel 3 break;
237    
238     case OP_CRRANGE:
239     case OP_CRMINRANGE:
240     min = (code[1] << 8) + code[2];
241     max = (code[3] << 8) + code[4];
242 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
243     else fprintf(outfile, "{%d,%d}", min, max);
244     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
245 nigel 3 code += 4;
246     break;
247    
248     default:
249     code--;
250     }
251     }
252     break;
253    
254     /* Anything else is just a one-node item */
255    
256     default:
257 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
258 nigel 3 break;
259     }
260    
261     code++;
262 nigel 23 fprintf(outfile, "\n");
263 nigel 3 }
264     }
265    
266    
267    
268     /* Character string printing function. */
269    
270     static void pchars(unsigned char *p, int length)
271     {
272     int c;
273     while (length-- > 0)
274     if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
275     else fprintf(outfile, "\\x%02x", c);
276     }
277    
278    
279    
280     /* Alternative malloc function, to test functionality and show the size of the
281     compiled re. */
282    
283     static void *new_malloc(size_t size)
284     {
285 nigel 43 gotten_store = size;
286 nigel 31 if (log_store)
287 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
288     (int)((int)size - offsetof(real_pcre, code[0])));
289 nigel 3 return malloc(size);
290     }
291    
292    
293    
294 nigel 43
295     /* Get one piece of information from the pcre_fullinfo() function */
296    
297     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
298     {
299     int rc;
300     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
301     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
302     }
303    
304    
305    
306    
307 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
308     consist of a regular expression, in delimiters and optionally followed by
309     options, followed by a set of test data, terminated by an empty line. */
310    
311     int main(int argc, char **argv)
312     {
313     FILE *infile = stdin;
314     int options = 0;
315     int study_options = 0;
316     int op = 1;
317     int timeit = 0;
318     int showinfo = 0;
319 nigel 31 int showstore = 0;
320 nigel 3 int posix = 0;
321     int debug = 0;
322 nigel 11 int done = 0;
323 nigel 3 unsigned char buffer[30000];
324     unsigned char dbuffer[1024];
325    
326     /* Static so that new_malloc can use it. */
327    
328     outfile = stdout;
329    
330     /* Scan options */
331    
332     while (argc > 1 && argv[op][0] == '-')
333     {
334 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
335     showstore = 1;
336 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
337     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
338     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
339     else if (strcmp(argv[op], "-p") == 0) posix = 1;
340     else
341     {
342     printf("*** Unknown option %s\n", argv[op]);
343 nigel 25 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
344     printf(" -d debug: show compiled code; implies -i\n"
345     " -i show information about compiled pattern\n"
346     " -p use POSIX interface\n"
347     " -s output store information\n"
348     " -t time compilation and execution\n");
349 nigel 3 return 1;
350     }
351     op++;
352     argc--;
353     }
354    
355     /* Sort out the input and output files */
356    
357     if (argc > 1)
358     {
359     infile = fopen(argv[op], "r");
360     if (infile == NULL)
361     {
362     printf("** Failed to open %s\n", argv[op]);
363     return 1;
364     }
365     }
366    
367     if (argc > 2)
368     {
369     outfile = fopen(argv[op+1], "w");
370     if (outfile == NULL)
371     {
372     printf("** Failed to open %s\n", argv[op+1]);
373     return 1;
374     }
375     }
376    
377     /* Set alternative malloc function */
378    
379     pcre_malloc = new_malloc;
380    
381 nigel 23 /* Heading line, then prompt for first regex if stdin */
382 nigel 3
383     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
384    
385     /* Main loop */
386    
387 nigel 11 while (!done)
388 nigel 3 {
389     pcre *re = NULL;
390     pcre_extra *extra = NULL;
391 nigel 37
392     #if !defined NOPOSIX /* There are still compilers that require no indent */
393 nigel 3 regex_t preg;
394 nigel 45 int do_posix = 0;
395 nigel 37 #endif
396    
397 nigel 7 const char *error;
398 nigel 25 unsigned char *p, *pp, *ppp;
399     unsigned const char *tables = NULL;
400 nigel 3 int do_study = 0;
401 nigel 25 int do_debug = debug;
402 nigel 35 int do_G = 0;
403     int do_g = 0;
404 nigel 25 int do_showinfo = showinfo;
405 nigel 35 int do_showrest = 0;
406 nigel 3 int erroroffset, len, delimiter;
407    
408     if (infile == stdin) printf(" re> ");
409     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
410 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
411 nigel 3
412     p = buffer;
413     while (isspace(*p)) p++;
414     if (*p == 0) continue;
415    
416     /* Get the delimiter and seek the end of the pattern; if is isn't
417     complete, read more. */
418    
419     delimiter = *p++;
420    
421 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
422 nigel 3 {
423 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
424 nigel 3 goto SKIP_DATA;
425     }
426    
427     pp = p;
428    
429     for(;;)
430     {
431 nigel 29 while (*pp != 0)
432     {
433     if (*pp == '\\' && pp[1] != 0) pp++;
434     else if (*pp == delimiter) break;
435     pp++;
436     }
437 nigel 3 if (*pp != 0) break;
438    
439     len = sizeof(buffer) - (pp - buffer);
440     if (len < 256)
441     {
442     fprintf(outfile, "** Expression too long - missing delimiter?\n");
443     goto SKIP_DATA;
444     }
445    
446     if (infile == stdin) printf(" > ");
447     if (fgets((char *)pp, len, infile) == NULL)
448     {
449     fprintf(outfile, "** Unexpected EOF\n");
450 nigel 11 done = 1;
451     goto CONTINUE;
452 nigel 3 }
453 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
454 nigel 3 }
455    
456 nigel 29 /* If the first character after the delimiter is backslash, make
457     the pattern end with backslash. This is purely to provide a way
458     of testing for the error message when a pattern ends with backslash. */
459    
460     if (pp[1] == '\\') *pp++ = '\\';
461    
462 nigel 3 /* Terminate the pattern at the delimiter */
463    
464     *pp++ = 0;
465    
466     /* Look for options after final delimiter */
467    
468     options = 0;
469     study_options = 0;
470 nigel 31 log_store = showstore; /* default from command line */
471    
472 nigel 3 while (*pp != 0)
473     {
474     switch (*pp++)
475     {
476 nigel 35 case 'g': do_g = 1; break;
477 nigel 3 case 'i': options |= PCRE_CASELESS; break;
478     case 'm': options |= PCRE_MULTILINE; break;
479     case 's': options |= PCRE_DOTALL; break;
480     case 'x': options |= PCRE_EXTENDED; break;
481 nigel 25
482 nigel 35 case '+': do_showrest = 1; break;
483 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
484 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
485 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
486 nigel 35 case 'G': do_G = 1; break;
487 nigel 25 case 'I': do_showinfo = 1; break;
488 nigel 31 case 'M': log_store = 1; break;
489 nigel 37
490     #if !defined NOPOSIX
491 nigel 3 case 'P': do_posix = 1; break;
492 nigel 37 #endif
493    
494 nigel 3 case 'S': do_study = 1; break;
495 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
496 nigel 3 case 'X': options |= PCRE_EXTRA; break;
497 nigel 25
498     case 'L':
499     ppp = pp;
500     while (*ppp != '\n' && *ppp != ' ') ppp++;
501     *ppp = 0;
502     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
503     {
504     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
505     goto SKIP_DATA;
506     }
507     tables = pcre_maketables();
508     pp = ppp;
509     break;
510    
511 nigel 3 case '\n': case ' ': break;
512     default:
513     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
514     goto SKIP_DATA;
515     }
516     }
517    
518 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
519 nigel 25 timing, showing, or debugging options, nor the ability to pass over
520     local character tables. */
521 nigel 3
522 nigel 37 #if !defined NOPOSIX
523 nigel 3 if (posix || do_posix)
524     {
525     int rc;
526     int cflags = 0;
527     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
528     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
529     rc = regcomp(&preg, (char *)p, cflags);
530    
531     /* Compilation failed; go back for another re, skipping to blank line
532     if non-interactive. */
533    
534     if (rc != 0)
535     {
536     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
537     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
538     goto SKIP_DATA;
539     }
540     }
541    
542     /* Handle compiling via the native interface */
543    
544     else
545 nigel 37 #endif /* !defined NOPOSIX */
546    
547 nigel 3 {
548     if (timeit)
549     {
550     register int i;
551     clock_t time_taken;
552     clock_t start_time = clock();
553 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
554 nigel 3 {
555 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
556 nigel 3 if (re != NULL) free(re);
557     }
558     time_taken = clock() - start_time;
559 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
560     ((double)time_taken * 1000.0) /
561     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
562 nigel 3 }
563    
564 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
565 nigel 3
566     /* Compilation failed; go back for another re, skipping to blank line
567     if non-interactive. */
568    
569     if (re == NULL)
570     {
571     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
572     SKIP_DATA:
573     if (infile != stdin)
574     {
575     for (;;)
576     {
577     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
578 nigel 11 {
579     done = 1;
580     goto CONTINUE;
581     }
582 nigel 3 len = (int)strlen((char *)buffer);
583     while (len > 0 && isspace(buffer[len-1])) len--;
584     if (len == 0) break;
585     }
586     fprintf(outfile, "\n");
587     }
588 nigel 25 goto CONTINUE;
589 nigel 3 }
590    
591 nigel 43 /* Compilation succeeded; print data if required. There are now two
592     info-returning functions. The old one has a limited interface and
593     returns only limited data. Check that it agrees with the newer one. */
594 nigel 3
595 nigel 25 if (do_showinfo)
596 nigel 3 {
597 nigel 43 int old_first_char, old_options, old_count;
598     int count, backrefmax, first_char, need_char;
599     size_t size;
600 nigel 3
601 nigel 37 if (do_debug) print_internals(re);
602 nigel 3
603 nigel 43 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
604     new_info(re, NULL, PCRE_INFO_SIZE, &size);
605     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
606     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
607     new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
608     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
609    
610     old_count = pcre_info(re, &old_options, &old_first_char);
611 nigel 3 if (count < 0) fprintf(outfile,
612 nigel 43 "Error %d from pcre_info()\n", count);
613 nigel 3 else
614     {
615 nigel 43 if (old_count != count) fprintf(outfile,
616     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
617     old_count);
618 nigel 37
619 nigel 43 if (old_first_char != first_char) fprintf(outfile,
620     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
621     first_char, old_first_char);
622 nigel 37
623 nigel 43 if (old_options != options) fprintf(outfile,
624     "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
625     old_options);
626     }
627    
628     if (size != gotten_store) fprintf(outfile,
629     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
630     size, gotten_store);
631    
632     fprintf(outfile, "Capturing subpattern count = %d\n", count);
633     if (backrefmax > 0)
634     fprintf(outfile, "Max back reference = %d\n", backrefmax);
635     if (options == 0) fprintf(outfile, "No options\n");
636     else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
637     ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
638     ((options & PCRE_CASELESS) != 0)? " caseless" : "",
639     ((options & PCRE_EXTENDED) != 0)? " extended" : "",
640     ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
641     ((options & PCRE_DOTALL) != 0)? " dotall" : "",
642     ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
643     ((options & PCRE_EXTRA) != 0)? " extra" : "",
644     ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
645    
646     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
647     fprintf(outfile, "Case state changes\n");
648    
649     if (first_char == -1)
650     {
651     fprintf(outfile, "First char at start or follows \\n\n");
652     }
653     else if (first_char < 0)
654     {
655     fprintf(outfile, "No first char\n");
656     }
657     else
658     {
659     if (isprint(first_char))
660     fprintf(outfile, "First char = \'%c\'\n", first_char);
661 nigel 3 else
662 nigel 43 fprintf(outfile, "First char = %d\n", first_char);
663     }
664 nigel 37
665 nigel 43 if (need_char < 0)
666     {
667     fprintf(outfile, "No need char\n");
668 nigel 3 }
669 nigel 43 else
670     {
671     if (isprint(need_char))
672     fprintf(outfile, "Need char = \'%c\'\n", need_char);
673     else
674     fprintf(outfile, "Need char = %d\n", need_char);
675     }
676 nigel 3 }
677    
678     /* If /S was present, study the regexp to generate additional info to
679     help with the matching. */
680    
681     if (do_study)
682     {
683     if (timeit)
684     {
685     register int i;
686     clock_t time_taken;
687     clock_t start_time = clock();
688 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
689 nigel 3 extra = pcre_study(re, study_options, &error);
690     time_taken = clock() - start_time;
691     if (extra != NULL) free(extra);
692 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
693     ((double)time_taken * 1000.0)/
694     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
695 nigel 3 }
696    
697     extra = pcre_study(re, study_options, &error);
698     if (error != NULL)
699     fprintf(outfile, "Failed to study: %s\n", error);
700     else if (extra == NULL)
701     fprintf(outfile, "Study returned NULL\n");
702    
703 nigel 25 else if (do_showinfo)
704 nigel 3 {
705 nigel 43 uschar *start_bits = NULL;
706     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
707     if (start_bits == NULL)
708 nigel 3 fprintf(outfile, "No starting character set\n");
709     else
710     {
711     int i;
712     int c = 24;
713     fprintf(outfile, "Starting character set: ");
714     for (i = 0; i < 256; i++)
715     {
716 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
717 nigel 3 {
718     if (c > 75)
719     {
720     fprintf(outfile, "\n ");
721     c = 2;
722     }
723     if (isprint(i) && i != ' ')
724     {
725     fprintf(outfile, "%c ", i);
726     c += 2;
727     }
728     else
729     {
730     fprintf(outfile, "\\x%02x ", i);
731     c += 5;
732     }
733     }
734     }
735     fprintf(outfile, "\n");
736     }
737     }
738     }
739     }
740    
741     /* Read data lines and test them */
742    
743     for (;;)
744     {
745 nigel 9 unsigned char *q;
746 nigel 35 unsigned char *bptr = dbuffer;
747 nigel 3 int count, c;
748 nigel 29 int copystrings = 0;
749     int getstrings = 0;
750     int getlist = 0;
751 nigel 39 int gmatched = 0;
752 nigel 35 int start_offset = 0;
753 nigel 41 int g_notempty = 0;
754 nigel 23 int offsets[45];
755 nigel 3 int size_offsets = sizeof(offsets)/sizeof(int);
756    
757     options = 0;
758    
759 nigel 35 if (infile == stdin) printf("data> ");
760 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
761     {
762     done = 1;
763     goto CONTINUE;
764     }
765 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
766 nigel 3
767     len = (int)strlen((char *)buffer);
768     while (len > 0 && isspace(buffer[len-1])) len--;
769     buffer[len] = 0;
770     if (len == 0) break;
771    
772     p = buffer;
773     while (isspace(*p)) p++;
774    
775 nigel 9 q = dbuffer;
776 nigel 3 while ((c = *p++) != 0)
777     {
778     int i = 0;
779     int n = 0;
780     if (c == '\\') switch ((c = *p++))
781     {
782     case 'a': c = 7; break;
783     case 'b': c = '\b'; break;
784     case 'e': c = 27; break;
785     case 'f': c = '\f'; break;
786     case 'n': c = '\n'; break;
787     case 'r': c = '\r'; break;
788     case 't': c = '\t'; break;
789     case 'v': c = '\v'; break;
790    
791     case '0': case '1': case '2': case '3':
792     case '4': case '5': case '6': case '7':
793     c -= '0';
794     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
795     c = c * 8 + *p++ - '0';
796     break;
797    
798     case 'x':
799     c = 0;
800     while (i++ < 2 && isxdigit(*p))
801     {
802     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
803     p++;
804     }
805     break;
806    
807     case 0: /* Allows for an empty line */
808     p--;
809     continue;
810    
811     case 'A': /* Option setting */
812     options |= PCRE_ANCHORED;
813     continue;
814    
815     case 'B':
816     options |= PCRE_NOTBOL;
817     continue;
818    
819 nigel 29 case 'C':
820     while(isdigit(*p)) n = n * 10 + *p++ - '0';
821     copystrings |= 1 << n;
822     continue;
823    
824     case 'G':
825     while(isdigit(*p)) n = n * 10 + *p++ - '0';
826     getstrings |= 1 << n;
827     continue;
828    
829     case 'L':
830     getlist = 1;
831     continue;
832    
833 nigel 37 case 'N':
834     options |= PCRE_NOTEMPTY;
835     continue;
836    
837 nigel 3 case 'O':
838     while(isdigit(*p)) n = n * 10 + *p++ - '0';
839 nigel 9 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
840 nigel 3 continue;
841    
842     case 'Z':
843     options |= PCRE_NOTEOL;
844     continue;
845     }
846 nigel 9 *q++ = c;
847 nigel 3 }
848 nigel 9 *q = 0;
849     len = q - dbuffer;
850 nigel 3
851     /* Handle matching via the POSIX interface, which does not
852     support timing. */
853    
854 nigel 37 #if !defined NOPOSIX
855 nigel 3 if (posix || do_posix)
856     {
857     int rc;
858     int eflags = 0;
859 nigel 41 regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
860 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
861     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
862    
863 nigel 41 rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
864 nigel 3
865     if (rc != 0)
866     {
867     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
868     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
869     }
870     else
871     {
872 nigel 7 size_t i;
873 nigel 41 for (i = 0; i < size_offsets; i++)
874 nigel 3 {
875     if (pmatch[i].rm_so >= 0)
876     {
877 nigel 23 fprintf(outfile, "%2d: ", (int)i);
878 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
879     pmatch[i].rm_eo - pmatch[i].rm_so);
880     fprintf(outfile, "\n");
881 nigel 35 if (i == 0 && do_showrest)
882     {
883     fprintf(outfile, " 0+ ");
884     pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
885     fprintf(outfile, "\n");
886     }
887 nigel 3 }
888     }
889     }
890     }
891    
892 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
893 nigel 3
894 nigel 37 else
895     #endif /* !defined NOPOSIX */
896    
897 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
898 nigel 3 {
899     if (timeit)
900     {
901     register int i;
902     clock_t time_taken;
903     clock_t start_time = clock();
904 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
905 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
906 nigel 41 start_offset, options | g_notempty, offsets, size_offsets);
907 nigel 3 time_taken = clock() - start_time;
908 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
909     ((double)time_taken * 1000.0)/
910     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
911 nigel 3 }
912    
913 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
914 nigel 41 start_offset, options | g_notempty, offsets, size_offsets);
915 nigel 3
916     if (count == 0)
917     {
918     fprintf(outfile, "Matched, but too many substrings\n");
919 nigel 23 count = size_offsets/3;
920 nigel 3 }
921    
922 nigel 39 /* Matched */
923    
924 nigel 3 if (count >= 0)
925     {
926     int i;
927 nigel 29 for (i = 0; i < count * 2; i += 2)
928 nigel 3 {
929     if (offsets[i] < 0)
930     fprintf(outfile, "%2d: <unset>\n", i/2);
931     else
932     {
933     fprintf(outfile, "%2d: ", i/2);
934 nigel 35 pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
935 nigel 3 fprintf(outfile, "\n");
936 nigel 35 if (i == 0)
937     {
938     if (do_showrest)
939     {
940     fprintf(outfile, " 0+ ");
941     pchars(bptr + offsets[i+1], len - offsets[i+1]);
942     fprintf(outfile, "\n");
943     }
944     }
945 nigel 3 }
946     }
947 nigel 29
948     for (i = 0; i < 32; i++)
949     {
950     if ((copystrings & (1 << i)) != 0)
951     {
952 nigel 37 char copybuffer[16];
953 nigel 35 int rc = pcre_copy_substring((char *)bptr, offsets, count,
954 nigel 37 i, copybuffer, sizeof(copybuffer));
955 nigel 29 if (rc < 0)
956     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
957     else
958 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
959 nigel 29 }
960     }
961    
962     for (i = 0; i < 32; i++)
963     {
964     if ((getstrings & (1 << i)) != 0)
965     {
966     const char *substring;
967 nigel 35 int rc = pcre_get_substring((char *)bptr, offsets, count,
968 nigel 29 i, &substring);
969     if (rc < 0)
970     fprintf(outfile, "get substring %d failed %d\n", i, rc);
971     else
972     {
973     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
974     free((void *)substring);
975     }
976     }
977     }
978    
979     if (getlist)
980     {
981     const char **stringlist;
982 nigel 35 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
983 nigel 29 &stringlist);
984     if (rc < 0)
985     fprintf(outfile, "get substring list failed %d\n", rc);
986     else
987     {
988     for (i = 0; i < count; i++)
989     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
990     if (stringlist[i] != NULL)
991     fprintf(outfile, "string list not terminated by NULL\n");
992     free((void *)stringlist);
993     }
994     }
995 nigel 39 }
996 nigel 29
997 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
998 nigel 47 g_notempty after a null match, this is not necessarily the end.
999 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1000     values to achieve this. We won't be at the end of the string - that
1001 nigel 47 was checked before setting g_notempty. */
1002 nigel 39
1003 nigel 3 else
1004     {
1005 nigel 41 if (g_notempty != 0)
1006 nigel 35 {
1007 nigel 41 offsets[0] = start_offset;
1008     offsets[1] = start_offset + 1;
1009 nigel 35 }
1010 nigel 41 else
1011     {
1012     if (gmatched == 0) /* Error if no previous matches */
1013     {
1014     if (count == -1) fprintf(outfile, "No match\n");
1015     else fprintf(outfile, "Error %d\n", count);
1016     }
1017     break; /* Out of the /g loop */
1018     }
1019 nigel 3 }
1020 nigel 35
1021 nigel 39 /* If not /g or /G we are done */
1022    
1023     if (!do_g && !do_G) break;
1024    
1025 nigel 41 /* If we have matched an empty string, first check to see if we are at
1026     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1027     what Perl's /g options does. This turns out to be rather cunning. First
1028 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1029     same point. If this fails (picked up above) we advance to the next
1030     character. */
1031 nigel 39
1032 nigel 41 g_notempty = 0;
1033     if (offsets[0] == offsets[1])
1034     {
1035     if (offsets[0] == len) break;
1036 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1037 nigel 41 }
1038 nigel 39
1039     /* For /g, update the start offset, leaving the rest alone */
1040    
1041     if (do_g) start_offset = offsets[1];
1042    
1043     /* For /G, update the pointer and length */
1044    
1045     else
1046 nigel 35 {
1047 nigel 39 bptr += offsets[1];
1048     len -= offsets[1];
1049 nigel 35 }
1050 nigel 39 } /* End of loop for /g and /G */
1051     } /* End of loop for data lines */
1052 nigel 3
1053 nigel 11 CONTINUE:
1054 nigel 37
1055     #if !defined NOPOSIX
1056 nigel 3 if (posix || do_posix) regfree(&preg);
1057 nigel 37 #endif
1058    
1059 nigel 3 if (re != NULL) free(re);
1060     if (extra != NULL) free(extra);
1061 nigel 25 if (tables != NULL)
1062     {
1063     free((void *)tables);
1064     setlocale(LC_CTYPE, "C");
1065     }
1066 nigel 3 }
1067    
1068     fprintf(outfile, "\n");
1069     return 0;
1070     }
1071    
1072     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12