/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 35 - (hide annotations) (download)
Sat Feb 24 21:39:05 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 24927 byte(s)
Load pcre-2.06 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15     #include "pcreposix.h"
16    
17     #ifndef CLOCKS_PER_SEC
18     #ifdef CLK_TCK
19     #define CLOCKS_PER_SEC CLK_TCK
20     #else
21     #define CLOCKS_PER_SEC 100
22     #endif
23     #endif
24    
25 nigel 27 #define LOOPREPEAT 20000
26 nigel 3
27 nigel 23
28 nigel 3 static FILE *outfile;
29     static int log_store = 0;
30    
31    
32    
33     /* Debugging function to print the internal form of the regex. This is the same
34     code as contained in pcre.c under the DEBUG macro. */
35    
36 nigel 7 static const char *OP_names[] = {
37     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
38 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
39     "Opt", "^", "$", "Any", "chars", "not",
40 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
41     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
42     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
43     "*", "*?", "+", "+?", "?", "??", "{", "{",
44 nigel 23 "class", "Ref",
45     "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
46     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
47 nigel 3 "Brazero", "Braminzero", "Bra"
48     };
49    
50    
51 nigel 23 static void print_internals(pcre *re, FILE *outfile)
52 nigel 3 {
53     unsigned char *code = ((real_pcre *)re)->code;
54    
55 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
56 nigel 3
57     for(;;)
58     {
59     int c;
60     int charlength;
61    
62 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
63 nigel 3
64     if (*code >= OP_BRA)
65     {
66 nigel 23 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
67 nigel 3 code += 2;
68     }
69    
70     else switch(*code)
71     {
72     case OP_END:
73 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
74     fprintf(outfile, "------------------------------------------------------------------\n");
75 nigel 3 return;
76    
77 nigel 23 case OP_OPT:
78     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
79     code++;
80     break;
81    
82     case OP_COND:
83     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
84     code += 2;
85     break;
86    
87     case OP_CREF:
88     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
89     code++;
90     break;
91    
92 nigel 3 case OP_CHARS:
93     charlength = *(++code);
94 nigel 23 fprintf(outfile, "%3d ", charlength);
95 nigel 3 while (charlength-- > 0)
96 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
97     else fprintf(outfile, "\\x%02x", c);
98 nigel 3 break;
99    
100     case OP_KETRMAX:
101     case OP_KETRMIN:
102     case OP_ALT:
103     case OP_KET:
104     case OP_ASSERT:
105     case OP_ASSERT_NOT:
106 nigel 23 case OP_ASSERTBACK:
107     case OP_ASSERTBACK_NOT:
108 nigel 3 case OP_ONCE:
109 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
110 nigel 3 code += 2;
111     break;
112    
113 nigel 23 case OP_REVERSE:
114     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
115     code += 2;
116     break;
117    
118 nigel 3 case OP_STAR:
119     case OP_MINSTAR:
120     case OP_PLUS:
121     case OP_MINPLUS:
122     case OP_QUERY:
123     case OP_MINQUERY:
124     case OP_TYPESTAR:
125     case OP_TYPEMINSTAR:
126     case OP_TYPEPLUS:
127     case OP_TYPEMINPLUS:
128     case OP_TYPEQUERY:
129     case OP_TYPEMINQUERY:
130     if (*code >= OP_TYPESTAR)
131 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
132     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
133     else fprintf(outfile, " \\x%02x", c);
134     fprintf(outfile, "%s", OP_names[*code++]);
135 nigel 3 break;
136    
137     case OP_EXACT:
138     case OP_UPTO:
139     case OP_MINUPTO:
140 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
141     else fprintf(outfile, " \\x%02x{", c);
142     if (*code != OP_EXACT) fprintf(outfile, ",");
143     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
144     if (*code == OP_MINUPTO) fprintf(outfile, "?");
145 nigel 3 code += 3;
146     break;
147    
148     case OP_TYPEEXACT:
149     case OP_TYPEUPTO:
150     case OP_TYPEMINUPTO:
151 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
152     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
153     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
154     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
155 nigel 3 code += 3;
156     break;
157    
158     case OP_NOT:
159 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
160     else fprintf(outfile, " [^\\x%02x]", c);
161 nigel 3 break;
162    
163     case OP_NOTSTAR:
164     case OP_NOTMINSTAR:
165     case OP_NOTPLUS:
166     case OP_NOTMINPLUS:
167     case OP_NOTQUERY:
168     case OP_NOTMINQUERY:
169 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
170     else fprintf(outfile, " [^\\x%02x]", c);
171     fprintf(outfile, "%s", OP_names[*code++]);
172 nigel 3 break;
173    
174     case OP_NOTEXACT:
175     case OP_NOTUPTO:
176     case OP_NOTMINUPTO:
177 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
178     else fprintf(outfile, " [^\\x%02x]{", c);
179     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
180     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
181     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
182 nigel 3 code += 3;
183     break;
184    
185     case OP_REF:
186 nigel 23 fprintf(outfile, " \\%d", *(++code));
187 nigel 9 code++;
188     goto CLASS_REF_REPEAT;
189 nigel 3
190     case OP_CLASS:
191     {
192     int i, min, max;
193 nigel 23 code++;
194     fprintf(outfile, " [");
195 nigel 3
196     for (i = 0; i < 256; i++)
197     {
198     if ((code[i/8] & (1 << (i&7))) != 0)
199     {
200     int j;
201     for (j = i+1; j < 256; j++)
202     if ((code[j/8] & (1 << (j&7))) == 0) break;
203 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
204     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
205 nigel 3 if (--j > i)
206     {
207 nigel 23 fprintf(outfile, "-");
208     if (j == '-' || j == ']') fprintf(outfile, "\\");
209     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
210 nigel 3 }
211     i = j;
212     }
213     }
214 nigel 23 fprintf(outfile, "]");
215 nigel 3 code += 32;
216    
217 nigel 9 CLASS_REF_REPEAT:
218    
219 nigel 3 switch(*code)
220     {
221     case OP_CRSTAR:
222     case OP_CRMINSTAR:
223     case OP_CRPLUS:
224     case OP_CRMINPLUS:
225     case OP_CRQUERY:
226     case OP_CRMINQUERY:
227 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
228 nigel 3 break;
229    
230     case OP_CRRANGE:
231     case OP_CRMINRANGE:
232     min = (code[1] << 8) + code[2];
233     max = (code[3] << 8) + code[4];
234 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
235     else fprintf(outfile, "{%d,%d}", min, max);
236     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
237 nigel 3 code += 4;
238     break;
239    
240     default:
241     code--;
242     }
243     }
244     break;
245    
246     /* Anything else is just a one-node item */
247    
248     default:
249 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
250 nigel 3 break;
251     }
252    
253     code++;
254 nigel 23 fprintf(outfile, "\n");
255 nigel 3 }
256     }
257    
258    
259    
260     /* Character string printing function. */
261    
262     static void pchars(unsigned char *p, int length)
263     {
264     int c;
265     while (length-- > 0)
266     if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
267     else fprintf(outfile, "\\x%02x", c);
268     }
269    
270    
271    
272     /* Alternative malloc function, to test functionality and show the size of the
273     compiled re. */
274    
275     static void *new_malloc(size_t size)
276     {
277 nigel 31 if (log_store)
278 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
279     (int)((int)size - offsetof(real_pcre, code[0])));
280 nigel 3 return malloc(size);
281     }
282    
283    
284    
285     /* Read lines from named file or stdin and write to named file or stdout; lines
286     consist of a regular expression, in delimiters and optionally followed by
287     options, followed by a set of test data, terminated by an empty line. */
288    
289     int main(int argc, char **argv)
290     {
291     FILE *infile = stdin;
292     int options = 0;
293     int study_options = 0;
294     int op = 1;
295     int timeit = 0;
296     int showinfo = 0;
297 nigel 31 int showstore = 0;
298 nigel 3 int posix = 0;
299     int debug = 0;
300 nigel 11 int done = 0;
301 nigel 3 unsigned char buffer[30000];
302     unsigned char dbuffer[1024];
303    
304     /* Static so that new_malloc can use it. */
305    
306     outfile = stdout;
307    
308     /* Scan options */
309    
310     while (argc > 1 && argv[op][0] == '-')
311     {
312 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
313     showstore = 1;
314 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
315     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
316     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
317     else if (strcmp(argv[op], "-p") == 0) posix = 1;
318     else
319     {
320     printf("*** Unknown option %s\n", argv[op]);
321 nigel 25 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
322     printf(" -d debug: show compiled code; implies -i\n"
323     " -i show information about compiled pattern\n"
324     " -p use POSIX interface\n"
325     " -s output store information\n"
326     " -t time compilation and execution\n");
327 nigel 3 return 1;
328     }
329     op++;
330     argc--;
331     }
332    
333     /* Sort out the input and output files */
334    
335     if (argc > 1)
336     {
337     infile = fopen(argv[op], "r");
338     if (infile == NULL)
339     {
340     printf("** Failed to open %s\n", argv[op]);
341     return 1;
342     }
343     }
344    
345     if (argc > 2)
346     {
347     outfile = fopen(argv[op+1], "w");
348     if (outfile == NULL)
349     {
350     printf("** Failed to open %s\n", argv[op+1]);
351     return 1;
352     }
353     }
354    
355     /* Set alternative malloc function */
356    
357     pcre_malloc = new_malloc;
358    
359 nigel 23 /* Heading line, then prompt for first regex if stdin */
360 nigel 3
361     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
362    
363     /* Main loop */
364    
365 nigel 11 while (!done)
366 nigel 3 {
367     pcre *re = NULL;
368     pcre_extra *extra = NULL;
369     regex_t preg;
370 nigel 7 const char *error;
371 nigel 25 unsigned char *p, *pp, *ppp;
372     unsigned const char *tables = NULL;
373 nigel 3 int do_study = 0;
374 nigel 25 int do_debug = debug;
375 nigel 35 int do_G = 0;
376     int do_g = 0;
377 nigel 25 int do_showinfo = showinfo;
378 nigel 35 int do_showrest = 0;
379 nigel 3 int do_posix = 0;
380     int erroroffset, len, delimiter;
381    
382     if (infile == stdin) printf(" re> ");
383     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
384 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
385 nigel 3
386     p = buffer;
387     while (isspace(*p)) p++;
388     if (*p == 0) continue;
389    
390     /* Get the delimiter and seek the end of the pattern; if is isn't
391     complete, read more. */
392    
393     delimiter = *p++;
394    
395 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
396 nigel 3 {
397 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
398 nigel 3 goto SKIP_DATA;
399     }
400    
401     pp = p;
402    
403     for(;;)
404     {
405 nigel 29 while (*pp != 0)
406     {
407     if (*pp == '\\' && pp[1] != 0) pp++;
408     else if (*pp == delimiter) break;
409     pp++;
410     }
411 nigel 3 if (*pp != 0) break;
412    
413     len = sizeof(buffer) - (pp - buffer);
414     if (len < 256)
415     {
416     fprintf(outfile, "** Expression too long - missing delimiter?\n");
417     goto SKIP_DATA;
418     }
419    
420     if (infile == stdin) printf(" > ");
421     if (fgets((char *)pp, len, infile) == NULL)
422     {
423     fprintf(outfile, "** Unexpected EOF\n");
424 nigel 11 done = 1;
425     goto CONTINUE;
426 nigel 3 }
427 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
428 nigel 3 }
429    
430 nigel 29 /* If the first character after the delimiter is backslash, make
431     the pattern end with backslash. This is purely to provide a way
432     of testing for the error message when a pattern ends with backslash. */
433    
434     if (pp[1] == '\\') *pp++ = '\\';
435    
436 nigel 3 /* Terminate the pattern at the delimiter */
437    
438     *pp++ = 0;
439    
440     /* Look for options after final delimiter */
441    
442     options = 0;
443     study_options = 0;
444 nigel 31 log_store = showstore; /* default from command line */
445    
446 nigel 3 while (*pp != 0)
447     {
448     switch (*pp++)
449     {
450 nigel 35 case 'g': do_g = 1; break;
451 nigel 3 case 'i': options |= PCRE_CASELESS; break;
452     case 'm': options |= PCRE_MULTILINE; break;
453     case 's': options |= PCRE_DOTALL; break;
454     case 'x': options |= PCRE_EXTENDED; break;
455 nigel 25
456 nigel 35 case '+': do_showrest = 1; break;
457 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
458 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
459 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
460 nigel 35 case 'G': do_G = 1; break;
461 nigel 25 case 'I': do_showinfo = 1; break;
462 nigel 31 case 'M': log_store = 1; break;
463 nigel 3 case 'P': do_posix = 1; break;
464     case 'S': do_study = 1; break;
465 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
466 nigel 3 case 'X': options |= PCRE_EXTRA; break;
467 nigel 25
468     case 'L':
469     ppp = pp;
470     while (*ppp != '\n' && *ppp != ' ') ppp++;
471     *ppp = 0;
472     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
473     {
474     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
475     goto SKIP_DATA;
476     }
477     tables = pcre_maketables();
478     pp = ppp;
479     break;
480    
481 nigel 3 case '\n': case ' ': break;
482     default:
483     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
484     goto SKIP_DATA;
485     }
486     }
487    
488 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
489 nigel 25 timing, showing, or debugging options, nor the ability to pass over
490     local character tables. */
491 nigel 3
492     if (posix || do_posix)
493     {
494     int rc;
495     int cflags = 0;
496     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
497     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
498     rc = regcomp(&preg, (char *)p, cflags);
499    
500     /* Compilation failed; go back for another re, skipping to blank line
501     if non-interactive. */
502    
503     if (rc != 0)
504     {
505     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
506     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
507     goto SKIP_DATA;
508     }
509     }
510    
511     /* Handle compiling via the native interface */
512    
513     else
514     {
515     if (timeit)
516     {
517     register int i;
518     clock_t time_taken;
519     clock_t start_time = clock();
520 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
521 nigel 3 {
522 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
523 nigel 3 if (re != NULL) free(re);
524     }
525     time_taken = clock() - start_time;
526 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
527     ((double)time_taken * 1000.0) /
528     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
529 nigel 3 }
530    
531 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
532 nigel 3
533     /* Compilation failed; go back for another re, skipping to blank line
534     if non-interactive. */
535    
536     if (re == NULL)
537     {
538     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
539     SKIP_DATA:
540     if (infile != stdin)
541     {
542     for (;;)
543     {
544     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
545 nigel 11 {
546     done = 1;
547     goto CONTINUE;
548     }
549 nigel 3 len = (int)strlen((char *)buffer);
550     while (len > 0 && isspace(buffer[len-1])) len--;
551     if (len == 0) break;
552     }
553     fprintf(outfile, "\n");
554     }
555 nigel 25 goto CONTINUE;
556 nigel 3 }
557    
558     /* Compilation succeeded; print data if required */
559    
560 nigel 25 if (do_showinfo)
561 nigel 3 {
562     int first_char, count;
563    
564 nigel 25 if (do_debug) print_internals(re, outfile);
565 nigel 3
566     count = pcre_info(re, &options, &first_char);
567     if (count < 0) fprintf(outfile,
568     "Error %d while reading info\n", count);
569     else
570     {
571     fprintf(outfile, "Identifying subpattern count = %d\n", count);
572     if (options == 0) fprintf(outfile, "No options\n");
573 nigel 19 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
574 nigel 3 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
575     ((options & PCRE_CASELESS) != 0)? " caseless" : "",
576     ((options & PCRE_EXTENDED) != 0)? " extended" : "",
577     ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
578     ((options & PCRE_DOTALL) != 0)? " dotall" : "",
579     ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
580 nigel 19 ((options & PCRE_EXTRA) != 0)? " extra" : "",
581     ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
582 nigel 3 if (first_char == -1)
583     {
584     fprintf(outfile, "First char at start or follows \\n\n");
585     }
586     else if (first_char < 0)
587     {
588     fprintf(outfile, "No first char\n");
589     }
590     else
591     {
592     if (isprint(first_char))
593     fprintf(outfile, "First char = \'%c\'\n", first_char);
594     else
595     fprintf(outfile, "First char = %d\n", first_char);
596     }
597     }
598     }
599    
600     /* If /S was present, study the regexp to generate additional info to
601     help with the matching. */
602    
603     if (do_study)
604     {
605     if (timeit)
606     {
607     register int i;
608     clock_t time_taken;
609     clock_t start_time = clock();
610 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
611 nigel 3 extra = pcre_study(re, study_options, &error);
612     time_taken = clock() - start_time;
613     if (extra != NULL) free(extra);
614 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
615     ((double)time_taken * 1000.0)/
616     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
617 nigel 3 }
618    
619     extra = pcre_study(re, study_options, &error);
620     if (error != NULL)
621     fprintf(outfile, "Failed to study: %s\n", error);
622     else if (extra == NULL)
623     fprintf(outfile, "Study returned NULL\n");
624    
625     /* This looks at internal information. A bit kludgy to do it this
626     way, but it is useful for testing. */
627    
628 nigel 25 else if (do_showinfo)
629 nigel 3 {
630     real_pcre_extra *xx = (real_pcre_extra *)extra;
631     if ((xx->options & PCRE_STUDY_MAPPED) == 0)
632     fprintf(outfile, "No starting character set\n");
633     else
634     {
635     int i;
636     int c = 24;
637     fprintf(outfile, "Starting character set: ");
638     for (i = 0; i < 256; i++)
639     {
640     if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)
641     {
642     if (c > 75)
643     {
644     fprintf(outfile, "\n ");
645     c = 2;
646     }
647     if (isprint(i) && i != ' ')
648     {
649     fprintf(outfile, "%c ", i);
650     c += 2;
651     }
652     else
653     {
654     fprintf(outfile, "\\x%02x ", i);
655     c += 5;
656     }
657     }
658     }
659     fprintf(outfile, "\n");
660     }
661     }
662     }
663     }
664    
665     /* Read data lines and test them */
666    
667     for (;;)
668     {
669 nigel 9 unsigned char *q;
670 nigel 35 unsigned char *bptr = dbuffer;
671 nigel 3 int count, c;
672 nigel 29 int copystrings = 0;
673     int getstrings = 0;
674     int getlist = 0;
675 nigel 35 int start_offset = 0;
676 nigel 23 int offsets[45];
677 nigel 3 int size_offsets = sizeof(offsets)/sizeof(int);
678    
679     options = 0;
680    
681 nigel 35 if (infile == stdin) printf("data> ");
682 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
683     {
684     done = 1;
685     goto CONTINUE;
686     }
687 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
688 nigel 3
689     len = (int)strlen((char *)buffer);
690     while (len > 0 && isspace(buffer[len-1])) len--;
691     buffer[len] = 0;
692     if (len == 0) break;
693    
694     p = buffer;
695     while (isspace(*p)) p++;
696    
697 nigel 9 q = dbuffer;
698 nigel 3 while ((c = *p++) != 0)
699     {
700     int i = 0;
701     int n = 0;
702     if (c == '\\') switch ((c = *p++))
703     {
704     case 'a': c = 7; break;
705     case 'b': c = '\b'; break;
706     case 'e': c = 27; break;
707     case 'f': c = '\f'; break;
708     case 'n': c = '\n'; break;
709     case 'r': c = '\r'; break;
710     case 't': c = '\t'; break;
711     case 'v': c = '\v'; break;
712    
713     case '0': case '1': case '2': case '3':
714     case '4': case '5': case '6': case '7':
715     c -= '0';
716     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
717     c = c * 8 + *p++ - '0';
718     break;
719    
720     case 'x':
721     c = 0;
722     while (i++ < 2 && isxdigit(*p))
723     {
724     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
725     p++;
726     }
727     break;
728    
729     case 0: /* Allows for an empty line */
730     p--;
731     continue;
732    
733     case 'A': /* Option setting */
734     options |= PCRE_ANCHORED;
735     continue;
736    
737     case 'B':
738     options |= PCRE_NOTBOL;
739     continue;
740    
741 nigel 29 case 'C':
742     while(isdigit(*p)) n = n * 10 + *p++ - '0';
743     copystrings |= 1 << n;
744     continue;
745    
746     case 'G':
747     while(isdigit(*p)) n = n * 10 + *p++ - '0';
748     getstrings |= 1 << n;
749     continue;
750    
751     case 'L':
752     getlist = 1;
753     continue;
754    
755 nigel 3 case 'O':
756     while(isdigit(*p)) n = n * 10 + *p++ - '0';
757 nigel 9 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
758 nigel 3 continue;
759    
760     case 'Z':
761     options |= PCRE_NOTEOL;
762     continue;
763     }
764 nigel 9 *q++ = c;
765 nigel 3 }
766 nigel 9 *q = 0;
767     len = q - dbuffer;
768 nigel 3
769     /* Handle matching via the POSIX interface, which does not
770     support timing. */
771    
772     if (posix || do_posix)
773     {
774     int rc;
775     int eflags = 0;
776     regmatch_t pmatch[30];
777     if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
778     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
779    
780 nigel 35 rc = regexec(&preg, (unsigned char *)bptr,
781     sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);
782 nigel 3
783     if (rc != 0)
784     {
785     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
786     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
787     }
788     else
789     {
790 nigel 7 size_t i;
791 nigel 3 for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)
792     {
793     if (pmatch[i].rm_so >= 0)
794     {
795 nigel 23 fprintf(outfile, "%2d: ", (int)i);
796 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
797     pmatch[i].rm_eo - pmatch[i].rm_so);
798     fprintf(outfile, "\n");
799 nigel 35 if (i == 0 && do_showrest)
800     {
801     fprintf(outfile, " 0+ ");
802     pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);
803     fprintf(outfile, "\n");
804     }
805 nigel 3 }
806     }
807     }
808     }
809    
810 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
811 nigel 3
812 nigel 35 else for (;;)
813 nigel 3 {
814     if (timeit)
815     {
816     register int i;
817     clock_t time_taken;
818     clock_t start_time = clock();
819 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
820 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
821     (do_g? start_offset : 0), options, offsets, size_offsets);
822 nigel 3 time_taken = clock() - start_time;
823 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
824     ((double)time_taken * 1000.0)/
825     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
826 nigel 3 }
827    
828 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
829     (do_g? start_offset : 0), options, offsets, size_offsets);
830 nigel 3
831     if (count == 0)
832     {
833     fprintf(outfile, "Matched, but too many substrings\n");
834 nigel 23 count = size_offsets/3;
835 nigel 3 }
836    
837     if (count >= 0)
838     {
839     int i;
840 nigel 29 for (i = 0; i < count * 2; i += 2)
841 nigel 3 {
842     if (offsets[i] < 0)
843     fprintf(outfile, "%2d: <unset>\n", i/2);
844     else
845     {
846     fprintf(outfile, "%2d: ", i/2);
847 nigel 35 pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);
848 nigel 3 fprintf(outfile, "\n");
849 nigel 35 if (i == 0)
850     {
851     start_offset = offsets[1];
852     if (do_showrest)
853     {
854     fprintf(outfile, " 0+ ");
855     pchars(bptr + offsets[i+1], len - offsets[i+1]);
856     fprintf(outfile, "\n");
857     }
858     }
859 nigel 3 }
860     }
861 nigel 29
862     for (i = 0; i < 32; i++)
863     {
864     if ((copystrings & (1 << i)) != 0)
865     {
866     char buffer[16];
867 nigel 35 int rc = pcre_copy_substring((char *)bptr, offsets, count,
868 nigel 29 i, buffer, sizeof(buffer));
869     if (rc < 0)
870     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
871     else
872     fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);
873     }
874     }
875    
876     for (i = 0; i < 32; i++)
877     {
878     if ((getstrings & (1 << i)) != 0)
879     {
880     const char *substring;
881 nigel 35 int rc = pcre_get_substring((char *)bptr, offsets, count,
882 nigel 29 i, &substring);
883     if (rc < 0)
884     fprintf(outfile, "get substring %d failed %d\n", i, rc);
885     else
886     {
887     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
888     free((void *)substring);
889     }
890     }
891     }
892    
893     if (getlist)
894     {
895     const char **stringlist;
896 nigel 35 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
897 nigel 29 &stringlist);
898     if (rc < 0)
899     fprintf(outfile, "get substring list failed %d\n", rc);
900     else
901     {
902     for (i = 0; i < count; i++)
903     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
904     if (stringlist[i] != NULL)
905     fprintf(outfile, "string list not terminated by NULL\n");
906     free((void *)stringlist);
907     }
908     }
909    
910 nigel 3 }
911     else
912     {
913 nigel 35 if (start_offset == 0)
914     {
915     if (count == -1) fprintf(outfile, "No match\n");
916     else fprintf(outfile, "Error %d\n", count);
917     }
918     start_offset = -1;
919 nigel 3 }
920 nigel 35
921     if ((!do_g && !do_G) || start_offset <= 0) break;
922     if (do_G)
923     {
924     bptr += start_offset;
925     len -= start_offset;
926     }
927 nigel 3 }
928     }
929    
930 nigel 11 CONTINUE:
931 nigel 3 if (posix || do_posix) regfree(&preg);
932     if (re != NULL) free(re);
933     if (extra != NULL) free(extra);
934 nigel 25 if (tables != NULL)
935     {
936     free((void *)tables);
937     setlocale(LC_CTYPE, "C");
938     }
939 nigel 3 }
940    
941     fprintf(outfile, "\n");
942     return 0;
943     }
944    
945     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12