/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 53 - (hide annotations) (download)
Sat Feb 24 21:39:42 2007 UTC (7 years, 5 months ago) by nigel
File MIME type: text/plain
File size: 33642 byte(s)
Load pcre-3.5 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15 nigel 37
16     /* It is possible to compile this test program without including support for
17     testing the POSIX interface, though this is not available via the standard
18     Makefile. */
19    
20     #if !defined NOPOSIX
21 nigel 3 #include "pcreposix.h"
22 nigel 37 #endif
23 nigel 3
24     #ifndef CLOCKS_PER_SEC
25     #ifdef CLK_TCK
26     #define CLOCKS_PER_SEC CLK_TCK
27     #else
28     #define CLOCKS_PER_SEC 100
29     #endif
30     #endif
31    
32 nigel 27 #define LOOPREPEAT 20000
33 nigel 3
34 nigel 23
35 nigel 3 static FILE *outfile;
36     static int log_store = 0;
37 nigel 43 static size_t gotten_store;
38 nigel 3
39    
40    
41 nigel 49 static int utf8_table1[] = {
42     0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44     static int utf8_table2[] = {
45     0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47     static int utf8_table3[] = {
48     0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51     /*************************************************
52     * Convert character value to UTF-8 *
53     *************************************************/
54    
55     /* This function takes an integer value in the range 0 - 0x7fffffff
56     and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58     Arguments:
59     cvalue the character value
60     buffer pointer to buffer for result - at least 6 bytes long
61    
62     Returns: number of characters placed in the buffer
63     -1 if input character is negative
64     0 if input character is positive but too big (only when
65     int is longer than 32 bits)
66     */
67    
68     static int
69     ord2utf8(int cvalue, unsigned char *buffer)
70     {
71     register int i, j;
72     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73     if (cvalue <= utf8_table1[i]) break;
74     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75     if (cvalue < 0) return -1;
76     *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77     cvalue >>= 6 - i;
78     for (j = 0; j < i; j++)
79     {
80     *buffer++ = 0x80 | (cvalue & 0x3f);
81     cvalue >>= 6;
82     }
83     return i + 1;
84     }
85    
86    
87     /*************************************************
88     * Convert UTF-8 string to value *
89     *************************************************/
90    
91     /* This function takes one or more bytes that represents a UTF-8 character,
92     and returns the value of the character.
93    
94     Argument:
95     buffer a pointer to the byte vector
96     vptr a pointer to an int to receive the value
97    
98     Returns: > 0 => the number of bytes consumed
99     -6 to 0 => malformed UTF-8 character at offset = (-return)
100     */
101    
102     int
103     utf82ord(unsigned char *buffer, int *vptr)
104     {
105     int c = *buffer++;
106     int d = c;
107     int i, j, s;
108    
109     for (i = -1; i < 6; i++) /* i is number of additional bytes */
110     {
111     if ((d & 0x80) == 0) break;
112     d <<= 1;
113     }
114    
115     if (i == -1) { *vptr = c; return 1; } /* ascii character */
116     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
117    
118     /* i now has a value in the range 1-5 */
119    
120     d = c & utf8_table3[i];
121     s = 6 - i;
122    
123     for (j = 0; j < i; j++)
124     {
125     c = *buffer++;
126     if ((c & 0xc0) != 0x80) return -(j+1);
127     d |= (c & 0x3f) << s;
128     s += 6;
129     }
130    
131     /* Check that encoding was the correct unique one */
132    
133     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134     if (d <= utf8_table1[j]) break;
135     if (j != i) return -(i+1);
136    
137     /* Valid value */
138    
139     *vptr = d;
140     return i+1;
141     }
142    
143    
144    
145    
146    
147    
148 nigel 3 /* Debugging function to print the internal form of the regex. This is the same
149     code as contained in pcre.c under the DEBUG macro. */
150    
151 nigel 7 static const char *OP_names[] = {
152     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154     "Opt", "^", "$", "Any", "chars", "not",
155 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158     "*", "*?", "+", "+?", "?", "??", "{", "{",
159 nigel 43 "class", "Ref", "Recurse",
160 nigel 23 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162 nigel 53 "Brazero", "Braminzero", "Branumber", "Bra"
163 nigel 3 };
164    
165    
166 nigel 37 static void print_internals(pcre *re)
167 nigel 3 {
168     unsigned char *code = ((real_pcre *)re)->code;
169    
170 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
171 nigel 3
172     for(;;)
173     {
174     int c;
175     int charlength;
176    
177 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
178 nigel 3
179     if (*code >= OP_BRA)
180     {
181 nigel 53 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
182     fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
183     else
184     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
185 nigel 3 code += 2;
186     }
187    
188     else switch(*code)
189     {
190     case OP_END:
191 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
192     fprintf(outfile, "------------------------------------------------------------------\n");
193 nigel 3 return;
194    
195 nigel 23 case OP_OPT:
196     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
197     code++;
198     break;
199    
200 nigel 3 case OP_CHARS:
201     charlength = *(++code);
202 nigel 23 fprintf(outfile, "%3d ", charlength);
203 nigel 3 while (charlength-- > 0)
204 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
205     else fprintf(outfile, "\\x%02x", c);
206 nigel 3 break;
207    
208     case OP_KETRMAX:
209     case OP_KETRMIN:
210     case OP_ALT:
211     case OP_KET:
212     case OP_ASSERT:
213     case OP_ASSERT_NOT:
214 nigel 23 case OP_ASSERTBACK:
215     case OP_ASSERTBACK_NOT:
216 nigel 3 case OP_ONCE:
217 nigel 53 case OP_COND:
218     case OP_BRANUMBER:
219 nigel 23 case OP_REVERSE:
220 nigel 53 case OP_CREF:
221 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
222     code += 2;
223     break;
224    
225 nigel 3 case OP_STAR:
226     case OP_MINSTAR:
227     case OP_PLUS:
228     case OP_MINPLUS:
229     case OP_QUERY:
230     case OP_MINQUERY:
231     case OP_TYPESTAR:
232     case OP_TYPEMINSTAR:
233     case OP_TYPEPLUS:
234     case OP_TYPEMINPLUS:
235     case OP_TYPEQUERY:
236     case OP_TYPEMINQUERY:
237     if (*code >= OP_TYPESTAR)
238 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
239     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
240     else fprintf(outfile, " \\x%02x", c);
241     fprintf(outfile, "%s", OP_names[*code++]);
242 nigel 3 break;
243    
244     case OP_EXACT:
245     case OP_UPTO:
246     case OP_MINUPTO:
247 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
248     else fprintf(outfile, " \\x%02x{", c);
249     if (*code != OP_EXACT) fprintf(outfile, ",");
250     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
251     if (*code == OP_MINUPTO) fprintf(outfile, "?");
252 nigel 3 code += 3;
253     break;
254    
255     case OP_TYPEEXACT:
256     case OP_TYPEUPTO:
257     case OP_TYPEMINUPTO:
258 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
259     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
260     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
261     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
262 nigel 3 code += 3;
263     break;
264    
265     case OP_NOT:
266 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
267     else fprintf(outfile, " [^\\x%02x]", c);
268 nigel 3 break;
269    
270     case OP_NOTSTAR:
271     case OP_NOTMINSTAR:
272     case OP_NOTPLUS:
273     case OP_NOTMINPLUS:
274     case OP_NOTQUERY:
275     case OP_NOTMINQUERY:
276 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
277     else fprintf(outfile, " [^\\x%02x]", c);
278     fprintf(outfile, "%s", OP_names[*code++]);
279 nigel 3 break;
280    
281     case OP_NOTEXACT:
282     case OP_NOTUPTO:
283     case OP_NOTMINUPTO:
284 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
285     else fprintf(outfile, " [^\\x%02x]{", c);
286     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
287     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
288     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
289 nigel 3 code += 3;
290     break;
291    
292     case OP_REF:
293 nigel 53 fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
294     code += 3;
295 nigel 9 goto CLASS_REF_REPEAT;
296 nigel 3
297     case OP_CLASS:
298     {
299     int i, min, max;
300 nigel 23 code++;
301     fprintf(outfile, " [");
302 nigel 3
303     for (i = 0; i < 256; i++)
304     {
305     if ((code[i/8] & (1 << (i&7))) != 0)
306     {
307     int j;
308     for (j = i+1; j < 256; j++)
309     if ((code[j/8] & (1 << (j&7))) == 0) break;
310 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
311     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
312 nigel 3 if (--j > i)
313     {
314 nigel 23 fprintf(outfile, "-");
315     if (j == '-' || j == ']') fprintf(outfile, "\\");
316     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
317 nigel 3 }
318     i = j;
319     }
320     }
321 nigel 23 fprintf(outfile, "]");
322 nigel 3 code += 32;
323    
324 nigel 9 CLASS_REF_REPEAT:
325    
326 nigel 3 switch(*code)
327     {
328     case OP_CRSTAR:
329     case OP_CRMINSTAR:
330     case OP_CRPLUS:
331     case OP_CRMINPLUS:
332     case OP_CRQUERY:
333     case OP_CRMINQUERY:
334 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
335 nigel 3 break;
336    
337     case OP_CRRANGE:
338     case OP_CRMINRANGE:
339     min = (code[1] << 8) + code[2];
340     max = (code[3] << 8) + code[4];
341 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
342     else fprintf(outfile, "{%d,%d}", min, max);
343     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
344 nigel 3 code += 4;
345     break;
346    
347     default:
348     code--;
349     }
350     }
351     break;
352    
353     /* Anything else is just a one-node item */
354    
355     default:
356 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
357 nigel 3 break;
358     }
359    
360     code++;
361 nigel 23 fprintf(outfile, "\n");
362 nigel 3 }
363     }
364    
365    
366    
367 nigel 49 /* Character string printing function. A "normal" and a UTF-8 version. */
368 nigel 3
369 nigel 49 static void pchars(unsigned char *p, int length, int utf8)
370 nigel 3 {
371     int c;
372     while (length-- > 0)
373 nigel 49 {
374     if (utf8)
375     {
376     int rc = utf82ord(p, &c);
377     if (rc > 0)
378     {
379     length -= rc - 1;
380     p += rc;
381     if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
382     else fprintf(outfile, "\\x{%02x}", c);
383     continue;
384     }
385     }
386    
387     /* Not UTF-8, or malformed UTF-8 */
388    
389 nigel 3 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
390     else fprintf(outfile, "\\x%02x", c);
391 nigel 49 }
392 nigel 3 }
393    
394    
395    
396     /* Alternative malloc function, to test functionality and show the size of the
397     compiled re. */
398    
399     static void *new_malloc(size_t size)
400     {
401 nigel 43 gotten_store = size;
402 nigel 31 if (log_store)
403 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
404     (int)((int)size - offsetof(real_pcre, code[0])));
405 nigel 3 return malloc(size);
406     }
407    
408    
409    
410 nigel 43
411     /* Get one piece of information from the pcre_fullinfo() function */
412    
413     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
414     {
415     int rc;
416     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
417     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
418     }
419    
420    
421    
422    
423 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
424     consist of a regular expression, in delimiters and optionally followed by
425     options, followed by a set of test data, terminated by an empty line. */
426    
427     int main(int argc, char **argv)
428     {
429     FILE *infile = stdin;
430     int options = 0;
431     int study_options = 0;
432     int op = 1;
433     int timeit = 0;
434     int showinfo = 0;
435 nigel 31 int showstore = 0;
436 nigel 53 int size_offsets = 45;
437     int size_offsets_max;
438     int *offsets;
439     #if !defined NOPOSIX
440 nigel 3 int posix = 0;
441 nigel 53 #endif
442 nigel 3 int debug = 0;
443 nigel 11 int done = 0;
444 nigel 3 unsigned char buffer[30000];
445     unsigned char dbuffer[1024];
446    
447     /* Static so that new_malloc can use it. */
448    
449     outfile = stdout;
450    
451     /* Scan options */
452    
453     while (argc > 1 && argv[op][0] == '-')
454     {
455 nigel 53 char *endptr;
456    
457 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
458     showstore = 1;
459 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
460     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
461     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
462 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
463     ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
464     {
465     op++;
466     argc--;
467     }
468     #if !defined NOPOSIX
469 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
470 nigel 53 #endif
471 nigel 3 else
472     {
473 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
474     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
475     printf(" -d debug: show compiled code; implies -i\n"
476     " -i show information about compiled pattern\n"
477     " -o <n> set size of offsets vector to <n>\n");
478     #if !defined NOPOSIX
479     printf(" -p use POSIX interface\n");
480     #endif
481     printf(" -s output store information\n"
482     " -t time compilation and execution\n");
483 nigel 3 return 1;
484     }
485     op++;
486     argc--;
487     }
488    
489 nigel 53 /* Get the store for the offsets vector, and remember what it was */
490    
491     size_offsets_max = size_offsets;
492     offsets = malloc(size_offsets_max * sizeof(int));
493     if (offsets == NULL)
494     {
495     printf("** Failed to get %d bytes of memory for offsets vector\n",
496     size_offsets_max * sizeof(int));
497     return 1;
498     }
499    
500 nigel 3 /* Sort out the input and output files */
501    
502     if (argc > 1)
503     {
504     infile = fopen(argv[op], "r");
505     if (infile == NULL)
506     {
507     printf("** Failed to open %s\n", argv[op]);
508     return 1;
509     }
510     }
511    
512     if (argc > 2)
513     {
514     outfile = fopen(argv[op+1], "w");
515     if (outfile == NULL)
516     {
517     printf("** Failed to open %s\n", argv[op+1]);
518     return 1;
519     }
520     }
521    
522     /* Set alternative malloc function */
523    
524     pcre_malloc = new_malloc;
525    
526 nigel 23 /* Heading line, then prompt for first regex if stdin */
527 nigel 3
528     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
529    
530     /* Main loop */
531    
532 nigel 11 while (!done)
533 nigel 3 {
534     pcre *re = NULL;
535     pcre_extra *extra = NULL;
536 nigel 37
537     #if !defined NOPOSIX /* There are still compilers that require no indent */
538 nigel 3 regex_t preg;
539 nigel 45 int do_posix = 0;
540 nigel 37 #endif
541    
542 nigel 7 const char *error;
543 nigel 25 unsigned char *p, *pp, *ppp;
544 nigel 53 const unsigned char *tables = NULL;
545 nigel 3 int do_study = 0;
546 nigel 25 int do_debug = debug;
547 nigel 35 int do_G = 0;
548     int do_g = 0;
549 nigel 25 int do_showinfo = showinfo;
550 nigel 35 int do_showrest = 0;
551 nigel 49 int utf8 = 0;
552 nigel 3 int erroroffset, len, delimiter;
553    
554     if (infile == stdin) printf(" re> ");
555     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
556 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
557 nigel 3
558     p = buffer;
559     while (isspace(*p)) p++;
560     if (*p == 0) continue;
561    
562     /* Get the delimiter and seek the end of the pattern; if is isn't
563     complete, read more. */
564    
565     delimiter = *p++;
566    
567 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
568 nigel 3 {
569 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
570 nigel 3 goto SKIP_DATA;
571     }
572    
573     pp = p;
574    
575     for(;;)
576     {
577 nigel 29 while (*pp != 0)
578     {
579     if (*pp == '\\' && pp[1] != 0) pp++;
580     else if (*pp == delimiter) break;
581     pp++;
582     }
583 nigel 3 if (*pp != 0) break;
584    
585     len = sizeof(buffer) - (pp - buffer);
586     if (len < 256)
587     {
588     fprintf(outfile, "** Expression too long - missing delimiter?\n");
589     goto SKIP_DATA;
590     }
591    
592     if (infile == stdin) printf(" > ");
593     if (fgets((char *)pp, len, infile) == NULL)
594     {
595     fprintf(outfile, "** Unexpected EOF\n");
596 nigel 11 done = 1;
597     goto CONTINUE;
598 nigel 3 }
599 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
600 nigel 3 }
601    
602 nigel 29 /* If the first character after the delimiter is backslash, make
603     the pattern end with backslash. This is purely to provide a way
604     of testing for the error message when a pattern ends with backslash. */
605    
606     if (pp[1] == '\\') *pp++ = '\\';
607    
608 nigel 3 /* Terminate the pattern at the delimiter */
609    
610     *pp++ = 0;
611    
612     /* Look for options after final delimiter */
613    
614     options = 0;
615     study_options = 0;
616 nigel 31 log_store = showstore; /* default from command line */
617    
618 nigel 3 while (*pp != 0)
619     {
620     switch (*pp++)
621     {
622 nigel 35 case 'g': do_g = 1; break;
623 nigel 3 case 'i': options |= PCRE_CASELESS; break;
624     case 'm': options |= PCRE_MULTILINE; break;
625     case 's': options |= PCRE_DOTALL; break;
626     case 'x': options |= PCRE_EXTENDED; break;
627 nigel 25
628 nigel 35 case '+': do_showrest = 1; break;
629 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
630 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
631 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
632 nigel 35 case 'G': do_G = 1; break;
633 nigel 25 case 'I': do_showinfo = 1; break;
634 nigel 31 case 'M': log_store = 1; break;
635 nigel 37
636     #if !defined NOPOSIX
637 nigel 3 case 'P': do_posix = 1; break;
638 nigel 37 #endif
639    
640 nigel 3 case 'S': do_study = 1; break;
641 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
642 nigel 3 case 'X': options |= PCRE_EXTRA; break;
643 nigel 49 case '8': options |= PCRE_UTF8; utf8 = 1; break;
644 nigel 25
645     case 'L':
646     ppp = pp;
647     while (*ppp != '\n' && *ppp != ' ') ppp++;
648     *ppp = 0;
649     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
650     {
651     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
652     goto SKIP_DATA;
653     }
654     tables = pcre_maketables();
655     pp = ppp;
656     break;
657    
658 nigel 3 case '\n': case ' ': break;
659     default:
660     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
661     goto SKIP_DATA;
662     }
663     }
664    
665 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
666 nigel 25 timing, showing, or debugging options, nor the ability to pass over
667     local character tables. */
668 nigel 3
669 nigel 37 #if !defined NOPOSIX
670 nigel 3 if (posix || do_posix)
671     {
672     int rc;
673     int cflags = 0;
674     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
675     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
676     rc = regcomp(&preg, (char *)p, cflags);
677    
678     /* Compilation failed; go back for another re, skipping to blank line
679     if non-interactive. */
680    
681     if (rc != 0)
682     {
683     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
684     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
685     goto SKIP_DATA;
686     }
687     }
688    
689     /* Handle compiling via the native interface */
690    
691     else
692 nigel 37 #endif /* !defined NOPOSIX */
693    
694 nigel 3 {
695     if (timeit)
696     {
697     register int i;
698     clock_t time_taken;
699     clock_t start_time = clock();
700 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
701 nigel 3 {
702 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
703 nigel 3 if (re != NULL) free(re);
704     }
705     time_taken = clock() - start_time;
706 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
707     ((double)time_taken * 1000.0) /
708     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
709 nigel 3 }
710    
711 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
712 nigel 3
713     /* Compilation failed; go back for another re, skipping to blank line
714     if non-interactive. */
715    
716     if (re == NULL)
717     {
718     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
719     SKIP_DATA:
720     if (infile != stdin)
721     {
722     for (;;)
723     {
724     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
725 nigel 11 {
726     done = 1;
727     goto CONTINUE;
728     }
729 nigel 3 len = (int)strlen((char *)buffer);
730     while (len > 0 && isspace(buffer[len-1])) len--;
731     if (len == 0) break;
732     }
733     fprintf(outfile, "\n");
734     }
735 nigel 25 goto CONTINUE;
736 nigel 3 }
737    
738 nigel 43 /* Compilation succeeded; print data if required. There are now two
739     info-returning functions. The old one has a limited interface and
740     returns only limited data. Check that it agrees with the newer one. */
741 nigel 3
742 nigel 25 if (do_showinfo)
743 nigel 3 {
744 nigel 53 unsigned long int get_options;
745 nigel 43 int old_first_char, old_options, old_count;
746     int count, backrefmax, first_char, need_char;
747     size_t size;
748 nigel 3
749 nigel 37 if (do_debug) print_internals(re);
750 nigel 3
751 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
752 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
753     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
754     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
755     new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
756     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
757    
758     old_count = pcre_info(re, &old_options, &old_first_char);
759 nigel 3 if (count < 0) fprintf(outfile,
760 nigel 43 "Error %d from pcre_info()\n", count);
761 nigel 3 else
762     {
763 nigel 43 if (old_count != count) fprintf(outfile,
764     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
765     old_count);
766 nigel 37
767 nigel 43 if (old_first_char != first_char) fprintf(outfile,
768     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
769     first_char, old_first_char);
770 nigel 37
771 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
772     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
773     get_options, old_options);
774 nigel 43 }
775    
776     if (size != gotten_store) fprintf(outfile,
777     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
778     size, gotten_store);
779    
780     fprintf(outfile, "Capturing subpattern count = %d\n", count);
781     if (backrefmax > 0)
782     fprintf(outfile, "Max back reference = %d\n", backrefmax);
783 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
784 nigel 49 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
785 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
786     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
787     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
788     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
789     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
790     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
791     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
792     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
793     ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
794 nigel 43
795     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
796     fprintf(outfile, "Case state changes\n");
797    
798     if (first_char == -1)
799     {
800     fprintf(outfile, "First char at start or follows \\n\n");
801     }
802     else if (first_char < 0)
803     {
804     fprintf(outfile, "No first char\n");
805     }
806     else
807     {
808     if (isprint(first_char))
809     fprintf(outfile, "First char = \'%c\'\n", first_char);
810 nigel 3 else
811 nigel 43 fprintf(outfile, "First char = %d\n", first_char);
812     }
813 nigel 37
814 nigel 43 if (need_char < 0)
815     {
816     fprintf(outfile, "No need char\n");
817 nigel 3 }
818 nigel 43 else
819     {
820     if (isprint(need_char))
821     fprintf(outfile, "Need char = \'%c\'\n", need_char);
822     else
823     fprintf(outfile, "Need char = %d\n", need_char);
824     }
825 nigel 3 }
826    
827     /* If /S was present, study the regexp to generate additional info to
828     help with the matching. */
829    
830     if (do_study)
831     {
832     if (timeit)
833     {
834     register int i;
835     clock_t time_taken;
836     clock_t start_time = clock();
837 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
838 nigel 3 extra = pcre_study(re, study_options, &error);
839     time_taken = clock() - start_time;
840     if (extra != NULL) free(extra);
841 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
842     ((double)time_taken * 1000.0)/
843     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
844 nigel 3 }
845    
846     extra = pcre_study(re, study_options, &error);
847     if (error != NULL)
848     fprintf(outfile, "Failed to study: %s\n", error);
849     else if (extra == NULL)
850     fprintf(outfile, "Study returned NULL\n");
851    
852 nigel 25 else if (do_showinfo)
853 nigel 3 {
854 nigel 43 uschar *start_bits = NULL;
855     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
856     if (start_bits == NULL)
857 nigel 3 fprintf(outfile, "No starting character set\n");
858     else
859     {
860     int i;
861     int c = 24;
862     fprintf(outfile, "Starting character set: ");
863     for (i = 0; i < 256; i++)
864     {
865 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
866 nigel 3 {
867     if (c > 75)
868     {
869     fprintf(outfile, "\n ");
870     c = 2;
871     }
872     if (isprint(i) && i != ' ')
873     {
874     fprintf(outfile, "%c ", i);
875     c += 2;
876     }
877     else
878     {
879     fprintf(outfile, "\\x%02x ", i);
880     c += 5;
881     }
882     }
883     }
884     fprintf(outfile, "\n");
885     }
886     }
887     }
888     }
889    
890     /* Read data lines and test them */
891    
892     for (;;)
893     {
894 nigel 9 unsigned char *q;
895 nigel 35 unsigned char *bptr = dbuffer;
896 nigel 53 int use_size_offsets = size_offsets;
897 nigel 3 int count, c;
898 nigel 29 int copystrings = 0;
899     int getstrings = 0;
900     int getlist = 0;
901 nigel 39 int gmatched = 0;
902 nigel 35 int start_offset = 0;
903 nigel 41 int g_notempty = 0;
904 nigel 3
905     options = 0;
906    
907 nigel 35 if (infile == stdin) printf("data> ");
908 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
909     {
910     done = 1;
911     goto CONTINUE;
912     }
913 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
914 nigel 3
915     len = (int)strlen((char *)buffer);
916     while (len > 0 && isspace(buffer[len-1])) len--;
917     buffer[len] = 0;
918     if (len == 0) break;
919    
920     p = buffer;
921     while (isspace(*p)) p++;
922    
923 nigel 9 q = dbuffer;
924 nigel 3 while ((c = *p++) != 0)
925     {
926     int i = 0;
927     int n = 0;
928     if (c == '\\') switch ((c = *p++))
929     {
930     case 'a': c = 7; break;
931     case 'b': c = '\b'; break;
932     case 'e': c = 27; break;
933     case 'f': c = '\f'; break;
934     case 'n': c = '\n'; break;
935     case 'r': c = '\r'; break;
936     case 't': c = '\t'; break;
937     case 'v': c = '\v'; break;
938    
939     case '0': case '1': case '2': case '3':
940     case '4': case '5': case '6': case '7':
941     c -= '0';
942     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
943     c = c * 8 + *p++ - '0';
944     break;
945    
946     case 'x':
947 nigel 49
948     /* Handle \x{..} specially - new Perl thing for utf8 */
949    
950     if (*p == '{')
951     {
952     unsigned char *pt = p;
953     c = 0;
954     while (isxdigit(*(++pt)))
955     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
956     if (*pt == '}')
957     {
958     unsigned char buffer[8];
959     int ii, utn;
960     utn = ord2utf8(c, buffer);
961     for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
962     c = buffer[ii]; /* Last byte */
963     p = pt + 1;
964     break;
965     }
966     /* Not correct form; fall through */
967     }
968    
969     /* Ordinary \x */
970    
971 nigel 3 c = 0;
972     while (i++ < 2 && isxdigit(*p))
973     {
974     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
975     p++;
976     }
977     break;
978    
979     case 0: /* Allows for an empty line */
980     p--;
981     continue;
982    
983     case 'A': /* Option setting */
984     options |= PCRE_ANCHORED;
985     continue;
986    
987     case 'B':
988     options |= PCRE_NOTBOL;
989     continue;
990    
991 nigel 29 case 'C':
992     while(isdigit(*p)) n = n * 10 + *p++ - '0';
993     copystrings |= 1 << n;
994     continue;
995    
996     case 'G':
997     while(isdigit(*p)) n = n * 10 + *p++ - '0';
998     getstrings |= 1 << n;
999     continue;
1000    
1001     case 'L':
1002     getlist = 1;
1003     continue;
1004    
1005 nigel 37 case 'N':
1006     options |= PCRE_NOTEMPTY;
1007     continue;
1008    
1009 nigel 3 case 'O':
1010     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1011 nigel 53 if (n > size_offsets_max)
1012     {
1013     free(offsets);
1014     size_offsets_max = n;
1015     offsets = malloc(size_offsets_max * sizeof(int));
1016     if (offsets == NULL)
1017     {
1018     printf("** Failed to get %d bytes of memory for offsets vector\n",
1019     size_offsets_max * sizeof(int));
1020     return 1;
1021     }
1022     }
1023     use_size_offsets = n;
1024 nigel 3 continue;
1025    
1026     case 'Z':
1027     options |= PCRE_NOTEOL;
1028     continue;
1029     }
1030 nigel 9 *q++ = c;
1031 nigel 3 }
1032 nigel 9 *q = 0;
1033     len = q - dbuffer;
1034 nigel 3
1035     /* Handle matching via the POSIX interface, which does not
1036     support timing. */
1037    
1038 nigel 37 #if !defined NOPOSIX
1039 nigel 3 if (posix || do_posix)
1040     {
1041     int rc;
1042     int eflags = 0;
1043 nigel 53 regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1044 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1045     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1046    
1047 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1048 nigel 3
1049     if (rc != 0)
1050     {
1051     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1052     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1053     }
1054     else
1055     {
1056 nigel 7 size_t i;
1057 nigel 53 for (i = 0; i < use_size_offsets; i++)
1058 nigel 3 {
1059     if (pmatch[i].rm_so >= 0)
1060     {
1061 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1062 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
1063 nigel 49 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1064 nigel 3 fprintf(outfile, "\n");
1065 nigel 35 if (i == 0 && do_showrest)
1066     {
1067     fprintf(outfile, " 0+ ");
1068 nigel 49 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1069 nigel 35 fprintf(outfile, "\n");
1070     }
1071 nigel 3 }
1072     }
1073     }
1074 nigel 53 free(pmatch);
1075 nigel 3 }
1076    
1077 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1078 nigel 3
1079 nigel 37 else
1080     #endif /* !defined NOPOSIX */
1081    
1082 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1083 nigel 3 {
1084     if (timeit)
1085     {
1086     register int i;
1087     clock_t time_taken;
1088     clock_t start_time = clock();
1089 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1090 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1091 nigel 53 start_offset, options | g_notempty, offsets, use_size_offsets);
1092 nigel 3 time_taken = clock() - start_time;
1093 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1094     ((double)time_taken * 1000.0)/
1095     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1096 nigel 3 }
1097    
1098 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1099 nigel 53 start_offset, options | g_notempty, offsets, use_size_offsets);
1100 nigel 3
1101     if (count == 0)
1102     {
1103     fprintf(outfile, "Matched, but too many substrings\n");
1104 nigel 53 count = use_size_offsets/3;
1105 nigel 3 }
1106    
1107 nigel 39 /* Matched */
1108    
1109 nigel 3 if (count >= 0)
1110     {
1111     int i;
1112 nigel 29 for (i = 0; i < count * 2; i += 2)
1113 nigel 3 {
1114     if (offsets[i] < 0)
1115     fprintf(outfile, "%2d: <unset>\n", i/2);
1116     else
1117     {
1118     fprintf(outfile, "%2d: ", i/2);
1119 nigel 49 pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1120 nigel 3 fprintf(outfile, "\n");
1121 nigel 35 if (i == 0)
1122     {
1123     if (do_showrest)
1124     {
1125     fprintf(outfile, " 0+ ");
1126 nigel 49 pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1127 nigel 35 fprintf(outfile, "\n");
1128     }
1129     }
1130 nigel 3 }
1131     }
1132 nigel 29
1133     for (i = 0; i < 32; i++)
1134     {
1135     if ((copystrings & (1 << i)) != 0)
1136     {
1137 nigel 37 char copybuffer[16];
1138 nigel 35 int rc = pcre_copy_substring((char *)bptr, offsets, count,
1139 nigel 37 i, copybuffer, sizeof(copybuffer));
1140 nigel 29 if (rc < 0)
1141     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1142     else
1143 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1144 nigel 29 }
1145     }
1146    
1147     for (i = 0; i < 32; i++)
1148     {
1149     if ((getstrings & (1 << i)) != 0)
1150     {
1151     const char *substring;
1152 nigel 35 int rc = pcre_get_substring((char *)bptr, offsets, count,
1153 nigel 29 i, &substring);
1154     if (rc < 0)
1155     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1156     else
1157     {
1158     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1159 nigel 49 /* free((void *)substring); */
1160     pcre_free_substring(substring);
1161 nigel 29 }
1162     }
1163     }
1164    
1165     if (getlist)
1166     {
1167     const char **stringlist;
1168 nigel 35 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1169 nigel 29 &stringlist);
1170     if (rc < 0)
1171     fprintf(outfile, "get substring list failed %d\n", rc);
1172     else
1173     {
1174     for (i = 0; i < count; i++)
1175     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1176     if (stringlist[i] != NULL)
1177     fprintf(outfile, "string list not terminated by NULL\n");
1178 nigel 49 /* free((void *)stringlist); */
1179     pcre_free_substring_list(stringlist);
1180 nigel 29 }
1181     }
1182 nigel 39 }
1183 nigel 29
1184 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1185 nigel 47 g_notempty after a null match, this is not necessarily the end.
1186 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1187     values to achieve this. We won't be at the end of the string - that
1188 nigel 47 was checked before setting g_notempty. */
1189 nigel 39
1190 nigel 3 else
1191     {
1192 nigel 41 if (g_notempty != 0)
1193 nigel 35 {
1194 nigel 41 offsets[0] = start_offset;
1195     offsets[1] = start_offset + 1;
1196 nigel 35 }
1197 nigel 41 else
1198     {
1199     if (gmatched == 0) /* Error if no previous matches */
1200     {
1201     if (count == -1) fprintf(outfile, "No match\n");
1202     else fprintf(outfile, "Error %d\n", count);
1203     }
1204     break; /* Out of the /g loop */
1205     }
1206 nigel 3 }
1207 nigel 35
1208 nigel 39 /* If not /g or /G we are done */
1209    
1210     if (!do_g && !do_G) break;
1211    
1212 nigel 41 /* If we have matched an empty string, first check to see if we are at
1213     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1214     what Perl's /g options does. This turns out to be rather cunning. First
1215 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1216     same point. If this fails (picked up above) we advance to the next
1217     character. */
1218 nigel 39
1219 nigel 41 g_notempty = 0;
1220     if (offsets[0] == offsets[1])
1221     {
1222     if (offsets[0] == len) break;
1223 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1224 nigel 41 }
1225 nigel 39
1226     /* For /g, update the start offset, leaving the rest alone */
1227    
1228     if (do_g) start_offset = offsets[1];
1229    
1230     /* For /G, update the pointer and length */
1231    
1232     else
1233 nigel 35 {
1234 nigel 39 bptr += offsets[1];
1235     len -= offsets[1];
1236 nigel 35 }
1237 nigel 39 } /* End of loop for /g and /G */
1238     } /* End of loop for data lines */
1239 nigel 3
1240 nigel 11 CONTINUE:
1241 nigel 37
1242     #if !defined NOPOSIX
1243 nigel 3 if (posix || do_posix) regfree(&preg);
1244 nigel 37 #endif
1245    
1246 nigel 3 if (re != NULL) free(re);
1247     if (extra != NULL) free(extra);
1248 nigel 25 if (tables != NULL)
1249     {
1250     free((void *)tables);
1251     setlocale(LC_CTYPE, "C");
1252     }
1253 nigel 3 }
1254    
1255     fprintf(outfile, "\n");
1256     return 0;
1257     }
1258    
1259     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12