/[pcre]/code/tags/pcre-6.4/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-6.4/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 49 - (hide annotations) (download)
Sat Feb 24 21:39:33 2007 UTC (7 years, 1 month ago) by nigel
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 32558 byte(s)
Load pcre-3.3 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15 nigel 37
16     /* It is possible to compile this test program without including support for
17     testing the POSIX interface, though this is not available via the standard
18     Makefile. */
19    
20     #if !defined NOPOSIX
21 nigel 3 #include "pcreposix.h"
22 nigel 37 #endif
23 nigel 3
24     #ifndef CLOCKS_PER_SEC
25     #ifdef CLK_TCK
26     #define CLOCKS_PER_SEC CLK_TCK
27     #else
28     #define CLOCKS_PER_SEC 100
29     #endif
30     #endif
31    
32 nigel 27 #define LOOPREPEAT 20000
33 nigel 3
34 nigel 23
35 nigel 3 static FILE *outfile;
36     static int log_store = 0;
37 nigel 43 static size_t gotten_store;
38 nigel 3
39    
40    
41 nigel 49 static int utf8_table1[] = {
42     0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44     static int utf8_table2[] = {
45     0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47     static int utf8_table3[] = {
48     0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51     /*************************************************
52     * Convert character value to UTF-8 *
53     *************************************************/
54    
55     /* This function takes an integer value in the range 0 - 0x7fffffff
56     and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58     Arguments:
59     cvalue the character value
60     buffer pointer to buffer for result - at least 6 bytes long
61    
62     Returns: number of characters placed in the buffer
63     -1 if input character is negative
64     0 if input character is positive but too big (only when
65     int is longer than 32 bits)
66     */
67    
68     static int
69     ord2utf8(int cvalue, unsigned char *buffer)
70     {
71     register int i, j;
72     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73     if (cvalue <= utf8_table1[i]) break;
74     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75     if (cvalue < 0) return -1;
76     *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77     cvalue >>= 6 - i;
78     for (j = 0; j < i; j++)
79     {
80     *buffer++ = 0x80 | (cvalue & 0x3f);
81     cvalue >>= 6;
82     }
83     return i + 1;
84     }
85    
86    
87     /*************************************************
88     * Convert UTF-8 string to value *
89     *************************************************/
90    
91     /* This function takes one or more bytes that represents a UTF-8 character,
92     and returns the value of the character.
93    
94     Argument:
95     buffer a pointer to the byte vector
96     vptr a pointer to an int to receive the value
97    
98     Returns: > 0 => the number of bytes consumed
99     -6 to 0 => malformed UTF-8 character at offset = (-return)
100     */
101    
102     int
103     utf82ord(unsigned char *buffer, int *vptr)
104     {
105     int c = *buffer++;
106     int d = c;
107     int i, j, s;
108    
109     for (i = -1; i < 6; i++) /* i is number of additional bytes */
110     {
111     if ((d & 0x80) == 0) break;
112     d <<= 1;
113     }
114    
115     if (i == -1) { *vptr = c; return 1; } /* ascii character */
116     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
117    
118     /* i now has a value in the range 1-5 */
119    
120     d = c & utf8_table3[i];
121     s = 6 - i;
122    
123     for (j = 0; j < i; j++)
124     {
125     c = *buffer++;
126     if ((c & 0xc0) != 0x80) return -(j+1);
127     d |= (c & 0x3f) << s;
128     s += 6;
129     }
130    
131     /* Check that encoding was the correct unique one */
132    
133     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134     if (d <= utf8_table1[j]) break;
135     if (j != i) return -(i+1);
136    
137     /* Valid value */
138    
139     *vptr = d;
140     return i+1;
141     }
142    
143    
144    
145    
146    
147    
148 nigel 3 /* Debugging function to print the internal form of the regex. This is the same
149     code as contained in pcre.c under the DEBUG macro. */
150    
151 nigel 7 static const char *OP_names[] = {
152     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154     "Opt", "^", "$", "Any", "chars", "not",
155 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158     "*", "*?", "+", "+?", "?", "??", "{", "{",
159 nigel 43 "class", "Ref", "Recurse",
160 nigel 23 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162 nigel 3 "Brazero", "Braminzero", "Bra"
163     };
164    
165    
166 nigel 37 static void print_internals(pcre *re)
167 nigel 3 {
168     unsigned char *code = ((real_pcre *)re)->code;
169    
170 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
171 nigel 3
172     for(;;)
173     {
174     int c;
175     int charlength;
176    
177 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
178 nigel 3
179     if (*code >= OP_BRA)
180     {
181 nigel 23 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
182 nigel 3 code += 2;
183     }
184    
185     else switch(*code)
186     {
187     case OP_END:
188 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
189     fprintf(outfile, "------------------------------------------------------------------\n");
190 nigel 3 return;
191    
192 nigel 23 case OP_OPT:
193     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
194     code++;
195     break;
196    
197     case OP_COND:
198     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
199     code += 2;
200     break;
201    
202     case OP_CREF:
203     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
204     code++;
205     break;
206    
207 nigel 3 case OP_CHARS:
208     charlength = *(++code);
209 nigel 23 fprintf(outfile, "%3d ", charlength);
210 nigel 3 while (charlength-- > 0)
211 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
212     else fprintf(outfile, "\\x%02x", c);
213 nigel 3 break;
214    
215     case OP_KETRMAX:
216     case OP_KETRMIN:
217     case OP_ALT:
218     case OP_KET:
219     case OP_ASSERT:
220     case OP_ASSERT_NOT:
221 nigel 23 case OP_ASSERTBACK:
222     case OP_ASSERTBACK_NOT:
223 nigel 3 case OP_ONCE:
224 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
225 nigel 3 code += 2;
226     break;
227    
228 nigel 23 case OP_REVERSE:
229     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
230     code += 2;
231     break;
232    
233 nigel 3 case OP_STAR:
234     case OP_MINSTAR:
235     case OP_PLUS:
236     case OP_MINPLUS:
237     case OP_QUERY:
238     case OP_MINQUERY:
239     case OP_TYPESTAR:
240     case OP_TYPEMINSTAR:
241     case OP_TYPEPLUS:
242     case OP_TYPEMINPLUS:
243     case OP_TYPEQUERY:
244     case OP_TYPEMINQUERY:
245     if (*code >= OP_TYPESTAR)
246 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
247     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
248     else fprintf(outfile, " \\x%02x", c);
249     fprintf(outfile, "%s", OP_names[*code++]);
250 nigel 3 break;
251    
252     case OP_EXACT:
253     case OP_UPTO:
254     case OP_MINUPTO:
255 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
256     else fprintf(outfile, " \\x%02x{", c);
257     if (*code != OP_EXACT) fprintf(outfile, ",");
258     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
259     if (*code == OP_MINUPTO) fprintf(outfile, "?");
260 nigel 3 code += 3;
261     break;
262    
263     case OP_TYPEEXACT:
264     case OP_TYPEUPTO:
265     case OP_TYPEMINUPTO:
266 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
267     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
268     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
269     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
270 nigel 3 code += 3;
271     break;
272    
273     case OP_NOT:
274 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
275     else fprintf(outfile, " [^\\x%02x]", c);
276 nigel 3 break;
277    
278     case OP_NOTSTAR:
279     case OP_NOTMINSTAR:
280     case OP_NOTPLUS:
281     case OP_NOTMINPLUS:
282     case OP_NOTQUERY:
283     case OP_NOTMINQUERY:
284 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
285     else fprintf(outfile, " [^\\x%02x]", c);
286     fprintf(outfile, "%s", OP_names[*code++]);
287 nigel 3 break;
288    
289     case OP_NOTEXACT:
290     case OP_NOTUPTO:
291     case OP_NOTMINUPTO:
292 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
293     else fprintf(outfile, " [^\\x%02x]{", c);
294     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
295     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
296     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
297 nigel 3 code += 3;
298     break;
299    
300     case OP_REF:
301 nigel 23 fprintf(outfile, " \\%d", *(++code));
302 nigel 9 code++;
303     goto CLASS_REF_REPEAT;
304 nigel 3
305     case OP_CLASS:
306     {
307     int i, min, max;
308 nigel 23 code++;
309     fprintf(outfile, " [");
310 nigel 3
311     for (i = 0; i < 256; i++)
312     {
313     if ((code[i/8] & (1 << (i&7))) != 0)
314     {
315     int j;
316     for (j = i+1; j < 256; j++)
317     if ((code[j/8] & (1 << (j&7))) == 0) break;
318 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
319     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
320 nigel 3 if (--j > i)
321     {
322 nigel 23 fprintf(outfile, "-");
323     if (j == '-' || j == ']') fprintf(outfile, "\\");
324     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
325 nigel 3 }
326     i = j;
327     }
328     }
329 nigel 23 fprintf(outfile, "]");
330 nigel 3 code += 32;
331    
332 nigel 9 CLASS_REF_REPEAT:
333    
334 nigel 3 switch(*code)
335     {
336     case OP_CRSTAR:
337     case OP_CRMINSTAR:
338     case OP_CRPLUS:
339     case OP_CRMINPLUS:
340     case OP_CRQUERY:
341     case OP_CRMINQUERY:
342 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
343 nigel 3 break;
344    
345     case OP_CRRANGE:
346     case OP_CRMINRANGE:
347     min = (code[1] << 8) + code[2];
348     max = (code[3] << 8) + code[4];
349 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
350     else fprintf(outfile, "{%d,%d}", min, max);
351     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
352 nigel 3 code += 4;
353     break;
354    
355     default:
356     code--;
357     }
358     }
359     break;
360    
361     /* Anything else is just a one-node item */
362    
363     default:
364 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
365 nigel 3 break;
366     }
367    
368     code++;
369 nigel 23 fprintf(outfile, "\n");
370 nigel 3 }
371     }
372    
373    
374    
375 nigel 49 /* Character string printing function. A "normal" and a UTF-8 version. */
376 nigel 3
377 nigel 49 static void pchars(unsigned char *p, int length, int utf8)
378 nigel 3 {
379     int c;
380     while (length-- > 0)
381 nigel 49 {
382     if (utf8)
383     {
384     int rc = utf82ord(p, &c);
385     if (rc > 0)
386     {
387     length -= rc - 1;
388     p += rc;
389     if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
390     else fprintf(outfile, "\\x{%02x}", c);
391     continue;
392     }
393     }
394    
395     /* Not UTF-8, or malformed UTF-8 */
396    
397 nigel 3 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
398     else fprintf(outfile, "\\x%02x", c);
399 nigel 49 }
400 nigel 3 }
401    
402    
403    
404     /* Alternative malloc function, to test functionality and show the size of the
405     compiled re. */
406    
407     static void *new_malloc(size_t size)
408     {
409 nigel 43 gotten_store = size;
410 nigel 31 if (log_store)
411 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
412     (int)((int)size - offsetof(real_pcre, code[0])));
413 nigel 3 return malloc(size);
414     }
415    
416    
417    
418 nigel 43
419     /* Get one piece of information from the pcre_fullinfo() function */
420    
421     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
422     {
423     int rc;
424     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
425     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
426     }
427    
428    
429    
430    
431 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
432     consist of a regular expression, in delimiters and optionally followed by
433     options, followed by a set of test data, terminated by an empty line. */
434    
435     int main(int argc, char **argv)
436     {
437     FILE *infile = stdin;
438     int options = 0;
439     int study_options = 0;
440     int op = 1;
441     int timeit = 0;
442     int showinfo = 0;
443 nigel 31 int showstore = 0;
444 nigel 3 int posix = 0;
445     int debug = 0;
446 nigel 11 int done = 0;
447 nigel 3 unsigned char buffer[30000];
448     unsigned char dbuffer[1024];
449    
450     /* Static so that new_malloc can use it. */
451    
452     outfile = stdout;
453    
454     /* Scan options */
455    
456     while (argc > 1 && argv[op][0] == '-')
457     {
458 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459     showstore = 1;
460 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463     else if (strcmp(argv[op], "-p") == 0) posix = 1;
464     else
465     {
466     printf("*** Unknown option %s\n", argv[op]);
467 nigel 25 printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
468     printf(" -d debug: show compiled code; implies -i\n"
469     " -i show information about compiled pattern\n"
470     " -p use POSIX interface\n"
471     " -s output store information\n"
472     " -t time compilation and execution\n");
473 nigel 3 return 1;
474     }
475     op++;
476     argc--;
477     }
478    
479     /* Sort out the input and output files */
480    
481     if (argc > 1)
482     {
483     infile = fopen(argv[op], "r");
484     if (infile == NULL)
485     {
486     printf("** Failed to open %s\n", argv[op]);
487     return 1;
488     }
489     }
490    
491     if (argc > 2)
492     {
493     outfile = fopen(argv[op+1], "w");
494     if (outfile == NULL)
495     {
496     printf("** Failed to open %s\n", argv[op+1]);
497     return 1;
498     }
499     }
500    
501     /* Set alternative malloc function */
502    
503     pcre_malloc = new_malloc;
504    
505 nigel 23 /* Heading line, then prompt for first regex if stdin */
506 nigel 3
507     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
508    
509     /* Main loop */
510    
511 nigel 11 while (!done)
512 nigel 3 {
513     pcre *re = NULL;
514     pcre_extra *extra = NULL;
515 nigel 37
516     #if !defined NOPOSIX /* There are still compilers that require no indent */
517 nigel 3 regex_t preg;
518 nigel 45 int do_posix = 0;
519 nigel 37 #endif
520    
521 nigel 7 const char *error;
522 nigel 25 unsigned char *p, *pp, *ppp;
523     unsigned const char *tables = NULL;
524 nigel 3 int do_study = 0;
525 nigel 25 int do_debug = debug;
526 nigel 35 int do_G = 0;
527     int do_g = 0;
528 nigel 25 int do_showinfo = showinfo;
529 nigel 35 int do_showrest = 0;
530 nigel 49 int utf8 = 0;
531 nigel 3 int erroroffset, len, delimiter;
532    
533     if (infile == stdin) printf(" re> ");
534     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
535 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
536 nigel 3
537     p = buffer;
538     while (isspace(*p)) p++;
539     if (*p == 0) continue;
540    
541     /* Get the delimiter and seek the end of the pattern; if is isn't
542     complete, read more. */
543    
544     delimiter = *p++;
545    
546 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
547 nigel 3 {
548 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
549 nigel 3 goto SKIP_DATA;
550     }
551    
552     pp = p;
553    
554     for(;;)
555     {
556 nigel 29 while (*pp != 0)
557     {
558     if (*pp == '\\' && pp[1] != 0) pp++;
559     else if (*pp == delimiter) break;
560     pp++;
561     }
562 nigel 3 if (*pp != 0) break;
563    
564     len = sizeof(buffer) - (pp - buffer);
565     if (len < 256)
566     {
567     fprintf(outfile, "** Expression too long - missing delimiter?\n");
568     goto SKIP_DATA;
569     }
570    
571     if (infile == stdin) printf(" > ");
572     if (fgets((char *)pp, len, infile) == NULL)
573     {
574     fprintf(outfile, "** Unexpected EOF\n");
575 nigel 11 done = 1;
576     goto CONTINUE;
577 nigel 3 }
578 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
579 nigel 3 }
580    
581 nigel 29 /* If the first character after the delimiter is backslash, make
582     the pattern end with backslash. This is purely to provide a way
583     of testing for the error message when a pattern ends with backslash. */
584    
585     if (pp[1] == '\\') *pp++ = '\\';
586    
587 nigel 3 /* Terminate the pattern at the delimiter */
588    
589     *pp++ = 0;
590    
591     /* Look for options after final delimiter */
592    
593     options = 0;
594     study_options = 0;
595 nigel 31 log_store = showstore; /* default from command line */
596    
597 nigel 3 while (*pp != 0)
598     {
599     switch (*pp++)
600     {
601 nigel 35 case 'g': do_g = 1; break;
602 nigel 3 case 'i': options |= PCRE_CASELESS; break;
603     case 'm': options |= PCRE_MULTILINE; break;
604     case 's': options |= PCRE_DOTALL; break;
605     case 'x': options |= PCRE_EXTENDED; break;
606 nigel 25
607 nigel 35 case '+': do_showrest = 1; break;
608 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
609 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
610 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611 nigel 35 case 'G': do_G = 1; break;
612 nigel 25 case 'I': do_showinfo = 1; break;
613 nigel 31 case 'M': log_store = 1; break;
614 nigel 37
615     #if !defined NOPOSIX
616 nigel 3 case 'P': do_posix = 1; break;
617 nigel 37 #endif
618    
619 nigel 3 case 'S': do_study = 1; break;
620 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
621 nigel 3 case 'X': options |= PCRE_EXTRA; break;
622 nigel 49 case '8': options |= PCRE_UTF8; utf8 = 1; break;
623 nigel 25
624     case 'L':
625     ppp = pp;
626     while (*ppp != '\n' && *ppp != ' ') ppp++;
627     *ppp = 0;
628     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
629     {
630     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
631     goto SKIP_DATA;
632     }
633     tables = pcre_maketables();
634     pp = ppp;
635     break;
636    
637 nigel 3 case '\n': case ' ': break;
638     default:
639     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
640     goto SKIP_DATA;
641     }
642     }
643    
644 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
645 nigel 25 timing, showing, or debugging options, nor the ability to pass over
646     local character tables. */
647 nigel 3
648 nigel 37 #if !defined NOPOSIX
649 nigel 3 if (posix || do_posix)
650     {
651     int rc;
652     int cflags = 0;
653     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
654     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
655     rc = regcomp(&preg, (char *)p, cflags);
656    
657     /* Compilation failed; go back for another re, skipping to blank line
658     if non-interactive. */
659    
660     if (rc != 0)
661     {
662     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
663     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
664     goto SKIP_DATA;
665     }
666     }
667    
668     /* Handle compiling via the native interface */
669    
670     else
671 nigel 37 #endif /* !defined NOPOSIX */
672    
673 nigel 3 {
674     if (timeit)
675     {
676     register int i;
677     clock_t time_taken;
678     clock_t start_time = clock();
679 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
680 nigel 3 {
681 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
682 nigel 3 if (re != NULL) free(re);
683     }
684     time_taken = clock() - start_time;
685 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
686     ((double)time_taken * 1000.0) /
687     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
688 nigel 3 }
689    
690 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
691 nigel 3
692     /* Compilation failed; go back for another re, skipping to blank line
693     if non-interactive. */
694    
695     if (re == NULL)
696     {
697     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
698     SKIP_DATA:
699     if (infile != stdin)
700     {
701     for (;;)
702     {
703     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
704 nigel 11 {
705     done = 1;
706     goto CONTINUE;
707     }
708 nigel 3 len = (int)strlen((char *)buffer);
709     while (len > 0 && isspace(buffer[len-1])) len--;
710     if (len == 0) break;
711     }
712     fprintf(outfile, "\n");
713     }
714 nigel 25 goto CONTINUE;
715 nigel 3 }
716    
717 nigel 43 /* Compilation succeeded; print data if required. There are now two
718     info-returning functions. The old one has a limited interface and
719     returns only limited data. Check that it agrees with the newer one. */
720 nigel 3
721 nigel 25 if (do_showinfo)
722 nigel 3 {
723 nigel 43 int old_first_char, old_options, old_count;
724     int count, backrefmax, first_char, need_char;
725     size_t size;
726 nigel 3
727 nigel 37 if (do_debug) print_internals(re);
728 nigel 3
729 nigel 43 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
730     new_info(re, NULL, PCRE_INFO_SIZE, &size);
731     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
732     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
733     new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
734     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
735    
736     old_count = pcre_info(re, &old_options, &old_first_char);
737 nigel 3 if (count < 0) fprintf(outfile,
738 nigel 43 "Error %d from pcre_info()\n", count);
739 nigel 3 else
740     {
741 nigel 43 if (old_count != count) fprintf(outfile,
742     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
743     old_count);
744 nigel 37
745 nigel 43 if (old_first_char != first_char) fprintf(outfile,
746     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
747     first_char, old_first_char);
748 nigel 37
749 nigel 43 if (old_options != options) fprintf(outfile,
750     "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
751     old_options);
752     }
753    
754     if (size != gotten_store) fprintf(outfile,
755     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
756     size, gotten_store);
757    
758     fprintf(outfile, "Capturing subpattern count = %d\n", count);
759     if (backrefmax > 0)
760     fprintf(outfile, "Max back reference = %d\n", backrefmax);
761     if (options == 0) fprintf(outfile, "No options\n");
762 nigel 49 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
763 nigel 43 ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
764     ((options & PCRE_CASELESS) != 0)? " caseless" : "",
765     ((options & PCRE_EXTENDED) != 0)? " extended" : "",
766     ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
767     ((options & PCRE_DOTALL) != 0)? " dotall" : "",
768     ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
769     ((options & PCRE_EXTRA) != 0)? " extra" : "",
770 nigel 49 ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
771     ((options & PCRE_UTF8) != 0)? " utf8" : "");
772 nigel 43
773     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
774     fprintf(outfile, "Case state changes\n");
775    
776     if (first_char == -1)
777     {
778     fprintf(outfile, "First char at start or follows \\n\n");
779     }
780     else if (first_char < 0)
781     {
782     fprintf(outfile, "No first char\n");
783     }
784     else
785     {
786     if (isprint(first_char))
787     fprintf(outfile, "First char = \'%c\'\n", first_char);
788 nigel 3 else
789 nigel 43 fprintf(outfile, "First char = %d\n", first_char);
790     }
791 nigel 37
792 nigel 43 if (need_char < 0)
793     {
794     fprintf(outfile, "No need char\n");
795 nigel 3 }
796 nigel 43 else
797     {
798     if (isprint(need_char))
799     fprintf(outfile, "Need char = \'%c\'\n", need_char);
800     else
801     fprintf(outfile, "Need char = %d\n", need_char);
802     }
803 nigel 3 }
804    
805     /* If /S was present, study the regexp to generate additional info to
806     help with the matching. */
807    
808     if (do_study)
809     {
810     if (timeit)
811     {
812     register int i;
813     clock_t time_taken;
814     clock_t start_time = clock();
815 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
816 nigel 3 extra = pcre_study(re, study_options, &error);
817     time_taken = clock() - start_time;
818     if (extra != NULL) free(extra);
819 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
820     ((double)time_taken * 1000.0)/
821     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
822 nigel 3 }
823    
824     extra = pcre_study(re, study_options, &error);
825     if (error != NULL)
826     fprintf(outfile, "Failed to study: %s\n", error);
827     else if (extra == NULL)
828     fprintf(outfile, "Study returned NULL\n");
829    
830 nigel 25 else if (do_showinfo)
831 nigel 3 {
832 nigel 43 uschar *start_bits = NULL;
833     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
834     if (start_bits == NULL)
835 nigel 3 fprintf(outfile, "No starting character set\n");
836     else
837     {
838     int i;
839     int c = 24;
840     fprintf(outfile, "Starting character set: ");
841     for (i = 0; i < 256; i++)
842     {
843 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
844 nigel 3 {
845     if (c > 75)
846     {
847     fprintf(outfile, "\n ");
848     c = 2;
849     }
850     if (isprint(i) && i != ' ')
851     {
852     fprintf(outfile, "%c ", i);
853     c += 2;
854     }
855     else
856     {
857     fprintf(outfile, "\\x%02x ", i);
858     c += 5;
859     }
860     }
861     }
862     fprintf(outfile, "\n");
863     }
864     }
865     }
866     }
867    
868     /* Read data lines and test them */
869    
870     for (;;)
871     {
872 nigel 9 unsigned char *q;
873 nigel 35 unsigned char *bptr = dbuffer;
874 nigel 3 int count, c;
875 nigel 29 int copystrings = 0;
876     int getstrings = 0;
877     int getlist = 0;
878 nigel 39 int gmatched = 0;
879 nigel 35 int start_offset = 0;
880 nigel 41 int g_notempty = 0;
881 nigel 23 int offsets[45];
882 nigel 3 int size_offsets = sizeof(offsets)/sizeof(int);
883    
884     options = 0;
885    
886 nigel 35 if (infile == stdin) printf("data> ");
887 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
888     {
889     done = 1;
890     goto CONTINUE;
891     }
892 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
893 nigel 3
894     len = (int)strlen((char *)buffer);
895     while (len > 0 && isspace(buffer[len-1])) len--;
896     buffer[len] = 0;
897     if (len == 0) break;
898    
899     p = buffer;
900     while (isspace(*p)) p++;
901    
902 nigel 9 q = dbuffer;
903 nigel 3 while ((c = *p++) != 0)
904     {
905     int i = 0;
906     int n = 0;
907     if (c == '\\') switch ((c = *p++))
908     {
909     case 'a': c = 7; break;
910     case 'b': c = '\b'; break;
911     case 'e': c = 27; break;
912     case 'f': c = '\f'; break;
913     case 'n': c = '\n'; break;
914     case 'r': c = '\r'; break;
915     case 't': c = '\t'; break;
916     case 'v': c = '\v'; break;
917    
918     case '0': case '1': case '2': case '3':
919     case '4': case '5': case '6': case '7':
920     c -= '0';
921     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
922     c = c * 8 + *p++ - '0';
923     break;
924    
925     case 'x':
926 nigel 49
927     /* Handle \x{..} specially - new Perl thing for utf8 */
928    
929     if (*p == '{')
930     {
931     unsigned char *pt = p;
932     c = 0;
933     while (isxdigit(*(++pt)))
934     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
935     if (*pt == '}')
936     {
937     unsigned char buffer[8];
938     int ii, utn;
939     utn = ord2utf8(c, buffer);
940     for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
941     c = buffer[ii]; /* Last byte */
942     p = pt + 1;
943     break;
944     }
945     /* Not correct form; fall through */
946     }
947    
948     /* Ordinary \x */
949    
950 nigel 3 c = 0;
951     while (i++ < 2 && isxdigit(*p))
952     {
953     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
954     p++;
955     }
956     break;
957    
958     case 0: /* Allows for an empty line */
959     p--;
960     continue;
961    
962     case 'A': /* Option setting */
963     options |= PCRE_ANCHORED;
964     continue;
965    
966     case 'B':
967     options |= PCRE_NOTBOL;
968     continue;
969    
970 nigel 29 case 'C':
971     while(isdigit(*p)) n = n * 10 + *p++ - '0';
972     copystrings |= 1 << n;
973     continue;
974    
975     case 'G':
976     while(isdigit(*p)) n = n * 10 + *p++ - '0';
977     getstrings |= 1 << n;
978     continue;
979    
980     case 'L':
981     getlist = 1;
982     continue;
983    
984 nigel 37 case 'N':
985     options |= PCRE_NOTEMPTY;
986     continue;
987    
988 nigel 3 case 'O':
989     while(isdigit(*p)) n = n * 10 + *p++ - '0';
990 nigel 9 if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
991 nigel 3 continue;
992    
993     case 'Z':
994     options |= PCRE_NOTEOL;
995     continue;
996     }
997 nigel 9 *q++ = c;
998 nigel 3 }
999 nigel 9 *q = 0;
1000     len = q - dbuffer;
1001 nigel 3
1002     /* Handle matching via the POSIX interface, which does not
1003     support timing. */
1004    
1005 nigel 37 #if !defined NOPOSIX
1006 nigel 3 if (posix || do_posix)
1007     {
1008     int rc;
1009     int eflags = 0;
1010 nigel 41 regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
1011 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1012     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1013    
1014 nigel 41 rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
1015 nigel 3
1016     if (rc != 0)
1017     {
1018     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1019     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1020     }
1021     else
1022     {
1023 nigel 7 size_t i;
1024 nigel 41 for (i = 0; i < size_offsets; i++)
1025 nigel 3 {
1026     if (pmatch[i].rm_so >= 0)
1027     {
1028 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1029 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
1030 nigel 49 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1031 nigel 3 fprintf(outfile, "\n");
1032 nigel 35 if (i == 0 && do_showrest)
1033     {
1034     fprintf(outfile, " 0+ ");
1035 nigel 49 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1036 nigel 35 fprintf(outfile, "\n");
1037     }
1038 nigel 3 }
1039     }
1040     }
1041     }
1042    
1043 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1044 nigel 3
1045 nigel 37 else
1046     #endif /* !defined NOPOSIX */
1047    
1048 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1049 nigel 3 {
1050     if (timeit)
1051     {
1052     register int i;
1053     clock_t time_taken;
1054     clock_t start_time = clock();
1055 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1056 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1057 nigel 41 start_offset, options | g_notempty, offsets, size_offsets);
1058 nigel 3 time_taken = clock() - start_time;
1059 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1060     ((double)time_taken * 1000.0)/
1061     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1062 nigel 3 }
1063    
1064 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1065 nigel 41 start_offset, options | g_notempty, offsets, size_offsets);
1066 nigel 3
1067     if (count == 0)
1068     {
1069     fprintf(outfile, "Matched, but too many substrings\n");
1070 nigel 23 count = size_offsets/3;
1071 nigel 3 }
1072    
1073 nigel 39 /* Matched */
1074    
1075 nigel 3 if (count >= 0)
1076     {
1077     int i;
1078 nigel 29 for (i = 0; i < count * 2; i += 2)
1079 nigel 3 {
1080     if (offsets[i] < 0)
1081     fprintf(outfile, "%2d: <unset>\n", i/2);
1082     else
1083     {
1084     fprintf(outfile, "%2d: ", i/2);
1085 nigel 49 pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1086 nigel 3 fprintf(outfile, "\n");
1087 nigel 35 if (i == 0)
1088     {
1089     if (do_showrest)
1090     {
1091     fprintf(outfile, " 0+ ");
1092 nigel 49 pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1093 nigel 35 fprintf(outfile, "\n");
1094     }
1095     }
1096 nigel 3 }
1097     }
1098 nigel 29
1099     for (i = 0; i < 32; i++)
1100     {
1101     if ((copystrings & (1 << i)) != 0)
1102     {
1103 nigel 37 char copybuffer[16];
1104 nigel 35 int rc = pcre_copy_substring((char *)bptr, offsets, count,
1105 nigel 37 i, copybuffer, sizeof(copybuffer));
1106 nigel 29 if (rc < 0)
1107     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1108     else
1109 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1110 nigel 29 }
1111     }
1112    
1113     for (i = 0; i < 32; i++)
1114     {
1115     if ((getstrings & (1 << i)) != 0)
1116     {
1117     const char *substring;
1118 nigel 35 int rc = pcre_get_substring((char *)bptr, offsets, count,
1119 nigel 29 i, &substring);
1120     if (rc < 0)
1121     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1122     else
1123     {
1124     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1125 nigel 49 /* free((void *)substring); */
1126     pcre_free_substring(substring);
1127 nigel 29 }
1128     }
1129     }
1130    
1131     if (getlist)
1132     {
1133     const char **stringlist;
1134 nigel 35 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1135 nigel 29 &stringlist);
1136     if (rc < 0)
1137     fprintf(outfile, "get substring list failed %d\n", rc);
1138     else
1139     {
1140     for (i = 0; i < count; i++)
1141     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1142     if (stringlist[i] != NULL)
1143     fprintf(outfile, "string list not terminated by NULL\n");
1144 nigel 49 /* free((void *)stringlist); */
1145     pcre_free_substring_list(stringlist);
1146 nigel 29 }
1147     }
1148 nigel 39 }
1149 nigel 29
1150 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1151 nigel 47 g_notempty after a null match, this is not necessarily the end.
1152 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1153     values to achieve this. We won't be at the end of the string - that
1154 nigel 47 was checked before setting g_notempty. */
1155 nigel 39
1156 nigel 3 else
1157     {
1158 nigel 41 if (g_notempty != 0)
1159 nigel 35 {
1160 nigel 41 offsets[0] = start_offset;
1161     offsets[1] = start_offset + 1;
1162 nigel 35 }
1163 nigel 41 else
1164     {
1165     if (gmatched == 0) /* Error if no previous matches */
1166     {
1167     if (count == -1) fprintf(outfile, "No match\n");
1168     else fprintf(outfile, "Error %d\n", count);
1169     }
1170     break; /* Out of the /g loop */
1171     }
1172 nigel 3 }
1173 nigel 35
1174 nigel 39 /* If not /g or /G we are done */
1175    
1176     if (!do_g && !do_G) break;
1177    
1178 nigel 41 /* If we have matched an empty string, first check to see if we are at
1179     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1180     what Perl's /g options does. This turns out to be rather cunning. First
1181 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1182     same point. If this fails (picked up above) we advance to the next
1183     character. */
1184 nigel 39
1185 nigel 41 g_notempty = 0;
1186     if (offsets[0] == offsets[1])
1187     {
1188     if (offsets[0] == len) break;
1189 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1190 nigel 41 }
1191 nigel 39
1192     /* For /g, update the start offset, leaving the rest alone */
1193    
1194     if (do_g) start_offset = offsets[1];
1195    
1196     /* For /G, update the pointer and length */
1197    
1198     else
1199 nigel 35 {
1200 nigel 39 bptr += offsets[1];
1201     len -= offsets[1];
1202 nigel 35 }
1203 nigel 39 } /* End of loop for /g and /G */
1204     } /* End of loop for data lines */
1205 nigel 3
1206 nigel 11 CONTINUE:
1207 nigel 37
1208     #if !defined NOPOSIX
1209 nigel 3 if (posix || do_posix) regfree(&preg);
1210 nigel 37 #endif
1211    
1212 nigel 3 if (re != NULL) free(re);
1213     if (extra != NULL) free(extra);
1214 nigel 25 if (tables != NULL)
1215     {
1216     free((void *)tables);
1217     setlocale(LC_CTYPE, "C");
1218     }
1219 nigel 3 }
1220    
1221     fprintf(outfile, "\n");
1222     return 0;
1223     }
1224    
1225     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12