/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 55 - (hide annotations) (download)
Sat Feb 24 21:39:46 2007 UTC (7 years, 8 months ago) by nigel
File MIME type: text/plain
File size: 33746 byte(s)
Load pcre-3.6 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15 nigel 37
16     /* It is possible to compile this test program without including support for
17     testing the POSIX interface, though this is not available via the standard
18     Makefile. */
19    
20     #if !defined NOPOSIX
21 nigel 3 #include "pcreposix.h"
22 nigel 37 #endif
23 nigel 3
24     #ifndef CLOCKS_PER_SEC
25     #ifdef CLK_TCK
26     #define CLOCKS_PER_SEC CLK_TCK
27     #else
28     #define CLOCKS_PER_SEC 100
29     #endif
30     #endif
31    
32 nigel 27 #define LOOPREPEAT 20000
33 nigel 3
34 nigel 23
35 nigel 3 static FILE *outfile;
36     static int log_store = 0;
37 nigel 43 static size_t gotten_store;
38 nigel 3
39    
40    
41 nigel 49 static int utf8_table1[] = {
42     0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44     static int utf8_table2[] = {
45     0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47     static int utf8_table3[] = {
48     0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51     /*************************************************
52     * Convert character value to UTF-8 *
53     *************************************************/
54    
55     /* This function takes an integer value in the range 0 - 0x7fffffff
56     and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58     Arguments:
59     cvalue the character value
60     buffer pointer to buffer for result - at least 6 bytes long
61    
62     Returns: number of characters placed in the buffer
63     -1 if input character is negative
64     0 if input character is positive but too big (only when
65     int is longer than 32 bits)
66     */
67    
68     static int
69     ord2utf8(int cvalue, unsigned char *buffer)
70     {
71     register int i, j;
72     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73     if (cvalue <= utf8_table1[i]) break;
74     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75     if (cvalue < 0) return -1;
76     *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77     cvalue >>= 6 - i;
78     for (j = 0; j < i; j++)
79     {
80     *buffer++ = 0x80 | (cvalue & 0x3f);
81     cvalue >>= 6;
82     }
83     return i + 1;
84     }
85    
86    
87     /*************************************************
88     * Convert UTF-8 string to value *
89     *************************************************/
90    
91     /* This function takes one or more bytes that represents a UTF-8 character,
92     and returns the value of the character.
93    
94     Argument:
95     buffer a pointer to the byte vector
96     vptr a pointer to an int to receive the value
97    
98     Returns: > 0 => the number of bytes consumed
99     -6 to 0 => malformed UTF-8 character at offset = (-return)
100     */
101    
102     int
103     utf82ord(unsigned char *buffer, int *vptr)
104     {
105     int c = *buffer++;
106     int d = c;
107     int i, j, s;
108    
109     for (i = -1; i < 6; i++) /* i is number of additional bytes */
110     {
111     if ((d & 0x80) == 0) break;
112     d <<= 1;
113     }
114    
115     if (i == -1) { *vptr = c; return 1; } /* ascii character */
116     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
117    
118     /* i now has a value in the range 1-5 */
119    
120     d = c & utf8_table3[i];
121     s = 6 - i;
122    
123     for (j = 0; j < i; j++)
124     {
125     c = *buffer++;
126     if ((c & 0xc0) != 0x80) return -(j+1);
127     d |= (c & 0x3f) << s;
128     s += 6;
129     }
130    
131     /* Check that encoding was the correct unique one */
132    
133     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134     if (d <= utf8_table1[j]) break;
135     if (j != i) return -(i+1);
136    
137     /* Valid value */
138    
139     *vptr = d;
140     return i+1;
141     }
142    
143    
144    
145    
146    
147    
148 nigel 3 /* Debugging function to print the internal form of the regex. This is the same
149     code as contained in pcre.c under the DEBUG macro. */
150    
151 nigel 7 static const char *OP_names[] = {
152     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154     "Opt", "^", "$", "Any", "chars", "not",
155 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158     "*", "*?", "+", "+?", "?", "??", "{", "{",
159 nigel 43 "class", "Ref", "Recurse",
160 nigel 23 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162 nigel 53 "Brazero", "Braminzero", "Branumber", "Bra"
163 nigel 3 };
164    
165    
166 nigel 37 static void print_internals(pcre *re)
167 nigel 3 {
168     unsigned char *code = ((real_pcre *)re)->code;
169    
170 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
171 nigel 3
172     for(;;)
173     {
174     int c;
175     int charlength;
176    
177 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
178 nigel 3
179     if (*code >= OP_BRA)
180     {
181 nigel 53 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
182     fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
183     else
184     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
185 nigel 3 code += 2;
186     }
187    
188     else switch(*code)
189     {
190     case OP_END:
191 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
192     fprintf(outfile, "------------------------------------------------------------------\n");
193 nigel 3 return;
194    
195 nigel 23 case OP_OPT:
196     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
197     code++;
198     break;
199    
200 nigel 3 case OP_CHARS:
201     charlength = *(++code);
202 nigel 23 fprintf(outfile, "%3d ", charlength);
203 nigel 3 while (charlength-- > 0)
204 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
205     else fprintf(outfile, "\\x%02x", c);
206 nigel 3 break;
207    
208     case OP_KETRMAX:
209     case OP_KETRMIN:
210     case OP_ALT:
211     case OP_KET:
212     case OP_ASSERT:
213     case OP_ASSERT_NOT:
214 nigel 23 case OP_ASSERTBACK:
215     case OP_ASSERTBACK_NOT:
216 nigel 3 case OP_ONCE:
217 nigel 53 case OP_COND:
218     case OP_BRANUMBER:
219 nigel 23 case OP_REVERSE:
220 nigel 53 case OP_CREF:
221 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
222     code += 2;
223     break;
224    
225 nigel 3 case OP_STAR:
226     case OP_MINSTAR:
227     case OP_PLUS:
228     case OP_MINPLUS:
229     case OP_QUERY:
230     case OP_MINQUERY:
231     case OP_TYPESTAR:
232     case OP_TYPEMINSTAR:
233     case OP_TYPEPLUS:
234     case OP_TYPEMINPLUS:
235     case OP_TYPEQUERY:
236     case OP_TYPEMINQUERY:
237     if (*code >= OP_TYPESTAR)
238 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
239     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
240     else fprintf(outfile, " \\x%02x", c);
241     fprintf(outfile, "%s", OP_names[*code++]);
242 nigel 3 break;
243    
244     case OP_EXACT:
245     case OP_UPTO:
246     case OP_MINUPTO:
247 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
248     else fprintf(outfile, " \\x%02x{", c);
249     if (*code != OP_EXACT) fprintf(outfile, ",");
250     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
251     if (*code == OP_MINUPTO) fprintf(outfile, "?");
252 nigel 3 code += 3;
253     break;
254    
255     case OP_TYPEEXACT:
256     case OP_TYPEUPTO:
257     case OP_TYPEMINUPTO:
258 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
259     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
260     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
261     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
262 nigel 3 code += 3;
263     break;
264    
265     case OP_NOT:
266 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
267     else fprintf(outfile, " [^\\x%02x]", c);
268 nigel 3 break;
269    
270     case OP_NOTSTAR:
271     case OP_NOTMINSTAR:
272     case OP_NOTPLUS:
273     case OP_NOTMINPLUS:
274     case OP_NOTQUERY:
275     case OP_NOTMINQUERY:
276 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
277     else fprintf(outfile, " [^\\x%02x]", c);
278     fprintf(outfile, "%s", OP_names[*code++]);
279 nigel 3 break;
280    
281     case OP_NOTEXACT:
282     case OP_NOTUPTO:
283     case OP_NOTMINUPTO:
284 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
285     else fprintf(outfile, " [^\\x%02x]{", c);
286     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
287     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
288     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
289 nigel 3 code += 3;
290     break;
291    
292     case OP_REF:
293 nigel 53 fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
294     code += 3;
295 nigel 9 goto CLASS_REF_REPEAT;
296 nigel 3
297     case OP_CLASS:
298     {
299     int i, min, max;
300 nigel 23 code++;
301     fprintf(outfile, " [");
302 nigel 3
303     for (i = 0; i < 256; i++)
304     {
305     if ((code[i/8] & (1 << (i&7))) != 0)
306     {
307     int j;
308     for (j = i+1; j < 256; j++)
309     if ((code[j/8] & (1 << (j&7))) == 0) break;
310 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
311     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
312 nigel 3 if (--j > i)
313     {
314 nigel 23 fprintf(outfile, "-");
315     if (j == '-' || j == ']') fprintf(outfile, "\\");
316     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
317 nigel 3 }
318     i = j;
319     }
320     }
321 nigel 23 fprintf(outfile, "]");
322 nigel 3 code += 32;
323    
324 nigel 9 CLASS_REF_REPEAT:
325    
326 nigel 3 switch(*code)
327     {
328     case OP_CRSTAR:
329     case OP_CRMINSTAR:
330     case OP_CRPLUS:
331     case OP_CRMINPLUS:
332     case OP_CRQUERY:
333     case OP_CRMINQUERY:
334 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
335 nigel 3 break;
336    
337     case OP_CRRANGE:
338     case OP_CRMINRANGE:
339     min = (code[1] << 8) + code[2];
340     max = (code[3] << 8) + code[4];
341 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
342     else fprintf(outfile, "{%d,%d}", min, max);
343     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
344 nigel 3 code += 4;
345     break;
346    
347     default:
348     code--;
349     }
350     }
351     break;
352    
353     /* Anything else is just a one-node item */
354    
355     default:
356 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
357 nigel 3 break;
358     }
359    
360     code++;
361 nigel 23 fprintf(outfile, "\n");
362 nigel 3 }
363     }
364    
365    
366    
367 nigel 49 /* Character string printing function. A "normal" and a UTF-8 version. */
368 nigel 3
369 nigel 49 static void pchars(unsigned char *p, int length, int utf8)
370 nigel 3 {
371     int c;
372     while (length-- > 0)
373 nigel 49 {
374     if (utf8)
375     {
376     int rc = utf82ord(p, &c);
377     if (rc > 0)
378     {
379     length -= rc - 1;
380     p += rc;
381     if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
382     else fprintf(outfile, "\\x{%02x}", c);
383     continue;
384     }
385     }
386    
387     /* Not UTF-8, or malformed UTF-8 */
388    
389 nigel 3 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
390     else fprintf(outfile, "\\x%02x", c);
391 nigel 49 }
392 nigel 3 }
393    
394    
395    
396     /* Alternative malloc function, to test functionality and show the size of the
397     compiled re. */
398    
399     static void *new_malloc(size_t size)
400     {
401 nigel 43 gotten_store = size;
402 nigel 31 if (log_store)
403 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
404     (int)((int)size - offsetof(real_pcre, code[0])));
405 nigel 3 return malloc(size);
406     }
407    
408    
409    
410 nigel 43
411     /* Get one piece of information from the pcre_fullinfo() function */
412    
413     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
414     {
415     int rc;
416     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
417     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
418     }
419    
420    
421    
422    
423 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
424     consist of a regular expression, in delimiters and optionally followed by
425     options, followed by a set of test data, terminated by an empty line. */
426    
427     int main(int argc, char **argv)
428     {
429     FILE *infile = stdin;
430     int options = 0;
431     int study_options = 0;
432     int op = 1;
433     int timeit = 0;
434     int showinfo = 0;
435 nigel 31 int showstore = 0;
436 nigel 53 int size_offsets = 45;
437     int size_offsets_max;
438     int *offsets;
439     #if !defined NOPOSIX
440 nigel 3 int posix = 0;
441 nigel 53 #endif
442 nigel 3 int debug = 0;
443 nigel 11 int done = 0;
444 nigel 3 unsigned char buffer[30000];
445     unsigned char dbuffer[1024];
446    
447     /* Static so that new_malloc can use it. */
448    
449     outfile = stdout;
450    
451     /* Scan options */
452    
453     while (argc > 1 && argv[op][0] == '-')
454     {
455 nigel 53 char *endptr;
456    
457 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
458     showstore = 1;
459 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
460     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
461     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
462 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
463     ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
464     {
465     op++;
466     argc--;
467     }
468     #if !defined NOPOSIX
469 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
470 nigel 53 #endif
471 nigel 3 else
472     {
473 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
474     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
475     printf(" -d debug: show compiled code; implies -i\n"
476     " -i show information about compiled pattern\n"
477     " -o <n> set size of offsets vector to <n>\n");
478     #if !defined NOPOSIX
479     printf(" -p use POSIX interface\n");
480     #endif
481     printf(" -s output store information\n"
482     " -t time compilation and execution\n");
483 nigel 3 return 1;
484     }
485     op++;
486     argc--;
487     }
488    
489 nigel 53 /* Get the store for the offsets vector, and remember what it was */
490    
491     size_offsets_max = size_offsets;
492     offsets = malloc(size_offsets_max * sizeof(int));
493     if (offsets == NULL)
494     {
495     printf("** Failed to get %d bytes of memory for offsets vector\n",
496     size_offsets_max * sizeof(int));
497     return 1;
498     }
499    
500 nigel 3 /* Sort out the input and output files */
501    
502     if (argc > 1)
503     {
504     infile = fopen(argv[op], "r");
505     if (infile == NULL)
506     {
507     printf("** Failed to open %s\n", argv[op]);
508     return 1;
509     }
510     }
511    
512     if (argc > 2)
513     {
514     outfile = fopen(argv[op+1], "w");
515     if (outfile == NULL)
516     {
517     printf("** Failed to open %s\n", argv[op+1]);
518     return 1;
519     }
520     }
521    
522     /* Set alternative malloc function */
523    
524     pcre_malloc = new_malloc;
525    
526 nigel 23 /* Heading line, then prompt for first regex if stdin */
527 nigel 3
528     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
529    
530     /* Main loop */
531    
532 nigel 11 while (!done)
533 nigel 3 {
534     pcre *re = NULL;
535     pcre_extra *extra = NULL;
536 nigel 37
537     #if !defined NOPOSIX /* There are still compilers that require no indent */
538 nigel 3 regex_t preg;
539 nigel 45 int do_posix = 0;
540 nigel 37 #endif
541    
542 nigel 7 const char *error;
543 nigel 25 unsigned char *p, *pp, *ppp;
544 nigel 53 const unsigned char *tables = NULL;
545 nigel 3 int do_study = 0;
546 nigel 25 int do_debug = debug;
547 nigel 35 int do_G = 0;
548     int do_g = 0;
549 nigel 25 int do_showinfo = showinfo;
550 nigel 35 int do_showrest = 0;
551 nigel 49 int utf8 = 0;
552 nigel 3 int erroroffset, len, delimiter;
553    
554     if (infile == stdin) printf(" re> ");
555     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
556 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
557 nigel 3
558     p = buffer;
559     while (isspace(*p)) p++;
560     if (*p == 0) continue;
561    
562     /* Get the delimiter and seek the end of the pattern; if is isn't
563     complete, read more. */
564    
565     delimiter = *p++;
566    
567 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
568 nigel 3 {
569 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
570 nigel 3 goto SKIP_DATA;
571     }
572    
573     pp = p;
574    
575     for(;;)
576     {
577 nigel 29 while (*pp != 0)
578     {
579     if (*pp == '\\' && pp[1] != 0) pp++;
580     else if (*pp == delimiter) break;
581     pp++;
582     }
583 nigel 3 if (*pp != 0) break;
584    
585     len = sizeof(buffer) - (pp - buffer);
586     if (len < 256)
587     {
588     fprintf(outfile, "** Expression too long - missing delimiter?\n");
589     goto SKIP_DATA;
590     }
591    
592     if (infile == stdin) printf(" > ");
593     if (fgets((char *)pp, len, infile) == NULL)
594     {
595     fprintf(outfile, "** Unexpected EOF\n");
596 nigel 11 done = 1;
597     goto CONTINUE;
598 nigel 3 }
599 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
600 nigel 3 }
601    
602 nigel 29 /* If the first character after the delimiter is backslash, make
603     the pattern end with backslash. This is purely to provide a way
604     of testing for the error message when a pattern ends with backslash. */
605    
606     if (pp[1] == '\\') *pp++ = '\\';
607    
608 nigel 3 /* Terminate the pattern at the delimiter */
609    
610     *pp++ = 0;
611    
612     /* Look for options after final delimiter */
613    
614     options = 0;
615     study_options = 0;
616 nigel 31 log_store = showstore; /* default from command line */
617    
618 nigel 3 while (*pp != 0)
619     {
620     switch (*pp++)
621     {
622 nigel 35 case 'g': do_g = 1; break;
623 nigel 3 case 'i': options |= PCRE_CASELESS; break;
624     case 'm': options |= PCRE_MULTILINE; break;
625     case 's': options |= PCRE_DOTALL; break;
626     case 'x': options |= PCRE_EXTENDED; break;
627 nigel 25
628 nigel 35 case '+': do_showrest = 1; break;
629 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
630 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
631 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
632 nigel 35 case 'G': do_G = 1; break;
633 nigel 25 case 'I': do_showinfo = 1; break;
634 nigel 31 case 'M': log_store = 1; break;
635 nigel 37
636     #if !defined NOPOSIX
637 nigel 3 case 'P': do_posix = 1; break;
638 nigel 37 #endif
639    
640 nigel 3 case 'S': do_study = 1; break;
641 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
642 nigel 3 case 'X': options |= PCRE_EXTRA; break;
643 nigel 49 case '8': options |= PCRE_UTF8; utf8 = 1; break;
644 nigel 25
645     case 'L':
646     ppp = pp;
647     while (*ppp != '\n' && *ppp != ' ') ppp++;
648     *ppp = 0;
649     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
650     {
651     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
652     goto SKIP_DATA;
653     }
654     tables = pcre_maketables();
655     pp = ppp;
656     break;
657    
658 nigel 3 case '\n': case ' ': break;
659     default:
660     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
661     goto SKIP_DATA;
662     }
663     }
664    
665 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
666 nigel 25 timing, showing, or debugging options, nor the ability to pass over
667     local character tables. */
668 nigel 3
669 nigel 37 #if !defined NOPOSIX
670 nigel 3 if (posix || do_posix)
671     {
672     int rc;
673     int cflags = 0;
674     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
675     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
676     rc = regcomp(&preg, (char *)p, cflags);
677    
678     /* Compilation failed; go back for another re, skipping to blank line
679     if non-interactive. */
680    
681     if (rc != 0)
682     {
683     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
684     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
685     goto SKIP_DATA;
686     }
687     }
688    
689     /* Handle compiling via the native interface */
690    
691     else
692 nigel 37 #endif /* !defined NOPOSIX */
693    
694 nigel 3 {
695     if (timeit)
696     {
697     register int i;
698     clock_t time_taken;
699     clock_t start_time = clock();
700 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
701 nigel 3 {
702 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
703 nigel 3 if (re != NULL) free(re);
704     }
705     time_taken = clock() - start_time;
706 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
707     ((double)time_taken * 1000.0) /
708     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
709 nigel 3 }
710    
711 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
712 nigel 3
713     /* Compilation failed; go back for another re, skipping to blank line
714     if non-interactive. */
715    
716     if (re == NULL)
717     {
718     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
719     SKIP_DATA:
720     if (infile != stdin)
721     {
722     for (;;)
723     {
724     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
725 nigel 11 {
726     done = 1;
727     goto CONTINUE;
728     }
729 nigel 3 len = (int)strlen((char *)buffer);
730     while (len > 0 && isspace(buffer[len-1])) len--;
731     if (len == 0) break;
732     }
733     fprintf(outfile, "\n");
734     }
735 nigel 25 goto CONTINUE;
736 nigel 3 }
737    
738 nigel 43 /* Compilation succeeded; print data if required. There are now two
739     info-returning functions. The old one has a limited interface and
740     returns only limited data. Check that it agrees with the newer one. */
741 nigel 3
742 nigel 25 if (do_showinfo)
743 nigel 3 {
744 nigel 53 unsigned long int get_options;
745 nigel 43 int old_first_char, old_options, old_count;
746     int count, backrefmax, first_char, need_char;
747     size_t size;
748 nigel 3
749 nigel 37 if (do_debug) print_internals(re);
750 nigel 3
751 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
752 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
753     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
754     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
755     new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
756     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
757    
758     old_count = pcre_info(re, &old_options, &old_first_char);
759 nigel 3 if (count < 0) fprintf(outfile,
760 nigel 43 "Error %d from pcre_info()\n", count);
761 nigel 3 else
762     {
763 nigel 43 if (old_count != count) fprintf(outfile,
764     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
765     old_count);
766 nigel 37
767 nigel 43 if (old_first_char != first_char) fprintf(outfile,
768     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
769     first_char, old_first_char);
770 nigel 37
771 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
772     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
773     get_options, old_options);
774 nigel 43 }
775    
776     if (size != gotten_store) fprintf(outfile,
777     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
778     size, gotten_store);
779    
780     fprintf(outfile, "Capturing subpattern count = %d\n", count);
781     if (backrefmax > 0)
782     fprintf(outfile, "Max back reference = %d\n", backrefmax);
783 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
784 nigel 49 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
785 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
786     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
787     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
788     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
789     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
790     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
791     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
792     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
793     ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
794 nigel 43
795     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
796     fprintf(outfile, "Case state changes\n");
797    
798     if (first_char == -1)
799     {
800     fprintf(outfile, "First char at start or follows \\n\n");
801     }
802     else if (first_char < 0)
803     {
804     fprintf(outfile, "No first char\n");
805     }
806     else
807     {
808     if (isprint(first_char))
809     fprintf(outfile, "First char = \'%c\'\n", first_char);
810 nigel 3 else
811 nigel 43 fprintf(outfile, "First char = %d\n", first_char);
812     }
813 nigel 37
814 nigel 43 if (need_char < 0)
815     {
816     fprintf(outfile, "No need char\n");
817 nigel 3 }
818 nigel 43 else
819     {
820     if (isprint(need_char))
821     fprintf(outfile, "Need char = \'%c\'\n", need_char);
822     else
823     fprintf(outfile, "Need char = %d\n", need_char);
824     }
825 nigel 3 }
826    
827     /* If /S was present, study the regexp to generate additional info to
828     help with the matching. */
829    
830     if (do_study)
831     {
832     if (timeit)
833     {
834     register int i;
835     clock_t time_taken;
836     clock_t start_time = clock();
837 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
838 nigel 3 extra = pcre_study(re, study_options, &error);
839     time_taken = clock() - start_time;
840     if (extra != NULL) free(extra);
841 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
842     ((double)time_taken * 1000.0)/
843     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
844 nigel 3 }
845    
846     extra = pcre_study(re, study_options, &error);
847     if (error != NULL)
848     fprintf(outfile, "Failed to study: %s\n", error);
849     else if (extra == NULL)
850     fprintf(outfile, "Study returned NULL\n");
851    
852 nigel 25 else if (do_showinfo)
853 nigel 3 {
854 nigel 43 uschar *start_bits = NULL;
855     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
856     if (start_bits == NULL)
857 nigel 3 fprintf(outfile, "No starting character set\n");
858     else
859     {
860     int i;
861     int c = 24;
862     fprintf(outfile, "Starting character set: ");
863     for (i = 0; i < 256; i++)
864     {
865 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
866 nigel 3 {
867     if (c > 75)
868     {
869     fprintf(outfile, "\n ");
870     c = 2;
871     }
872     if (isprint(i) && i != ' ')
873     {
874     fprintf(outfile, "%c ", i);
875     c += 2;
876     }
877     else
878     {
879     fprintf(outfile, "\\x%02x ", i);
880     c += 5;
881     }
882     }
883     }
884     fprintf(outfile, "\n");
885     }
886     }
887     }
888     }
889    
890     /* Read data lines and test them */
891    
892     for (;;)
893     {
894 nigel 9 unsigned char *q;
895 nigel 35 unsigned char *bptr = dbuffer;
896 nigel 53 int use_size_offsets = size_offsets;
897 nigel 3 int count, c;
898 nigel 29 int copystrings = 0;
899     int getstrings = 0;
900     int getlist = 0;
901 nigel 39 int gmatched = 0;
902 nigel 35 int start_offset = 0;
903 nigel 41 int g_notempty = 0;
904 nigel 3
905     options = 0;
906    
907 nigel 35 if (infile == stdin) printf("data> ");
908 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
909     {
910     done = 1;
911     goto CONTINUE;
912     }
913 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
914 nigel 3
915     len = (int)strlen((char *)buffer);
916     while (len > 0 && isspace(buffer[len-1])) len--;
917     buffer[len] = 0;
918     if (len == 0) break;
919    
920     p = buffer;
921     while (isspace(*p)) p++;
922    
923 nigel 9 q = dbuffer;
924 nigel 3 while ((c = *p++) != 0)
925     {
926     int i = 0;
927     int n = 0;
928     if (c == '\\') switch ((c = *p++))
929     {
930     case 'a': c = 7; break;
931     case 'b': c = '\b'; break;
932     case 'e': c = 27; break;
933     case 'f': c = '\f'; break;
934     case 'n': c = '\n'; break;
935     case 'r': c = '\r'; break;
936     case 't': c = '\t'; break;
937     case 'v': c = '\v'; break;
938    
939     case '0': case '1': case '2': case '3':
940     case '4': case '5': case '6': case '7':
941     c -= '0';
942     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
943     c = c * 8 + *p++ - '0';
944     break;
945    
946     case 'x':
947 nigel 49
948     /* Handle \x{..} specially - new Perl thing for utf8 */
949    
950     if (*p == '{')
951     {
952     unsigned char *pt = p;
953     c = 0;
954     while (isxdigit(*(++pt)))
955     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
956     if (*pt == '}')
957     {
958     unsigned char buffer[8];
959     int ii, utn;
960     utn = ord2utf8(c, buffer);
961     for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
962     c = buffer[ii]; /* Last byte */
963     p = pt + 1;
964     break;
965     }
966     /* Not correct form; fall through */
967     }
968    
969     /* Ordinary \x */
970    
971 nigel 3 c = 0;
972     while (i++ < 2 && isxdigit(*p))
973     {
974     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
975     p++;
976     }
977     break;
978    
979     case 0: /* Allows for an empty line */
980     p--;
981     continue;
982    
983     case 'A': /* Option setting */
984     options |= PCRE_ANCHORED;
985     continue;
986    
987     case 'B':
988     options |= PCRE_NOTBOL;
989     continue;
990    
991 nigel 29 case 'C':
992     while(isdigit(*p)) n = n * 10 + *p++ - '0';
993     copystrings |= 1 << n;
994     continue;
995    
996     case 'G':
997     while(isdigit(*p)) n = n * 10 + *p++ - '0';
998     getstrings |= 1 << n;
999     continue;
1000    
1001     case 'L':
1002     getlist = 1;
1003     continue;
1004    
1005 nigel 37 case 'N':
1006     options |= PCRE_NOTEMPTY;
1007     continue;
1008    
1009 nigel 3 case 'O':
1010     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1011 nigel 53 if (n > size_offsets_max)
1012     {
1013 nigel 55
1014     if (offsets != NULL)
1015    
1016 nigel 53 free(offsets);
1017     size_offsets_max = n;
1018     offsets = malloc(size_offsets_max * sizeof(int));
1019     if (offsets == NULL)
1020     {
1021     printf("** Failed to get %d bytes of memory for offsets vector\n",
1022     size_offsets_max * sizeof(int));
1023     return 1;
1024     }
1025     }
1026     use_size_offsets = n;
1027 nigel 55
1028     if (n == 0)
1029     {
1030     free(offsets);
1031     offsets = NULL;
1032     size_offsets_max = 0;
1033     }
1034    
1035 nigel 3 continue;
1036    
1037     case 'Z':
1038     options |= PCRE_NOTEOL;
1039     continue;
1040     }
1041 nigel 9 *q++ = c;
1042 nigel 3 }
1043 nigel 9 *q = 0;
1044     len = q - dbuffer;
1045 nigel 3
1046     /* Handle matching via the POSIX interface, which does not
1047     support timing. */
1048    
1049 nigel 37 #if !defined NOPOSIX
1050 nigel 3 if (posix || do_posix)
1051     {
1052     int rc;
1053     int eflags = 0;
1054 nigel 53 regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1055 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1056     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1057    
1058 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1059 nigel 3
1060     if (rc != 0)
1061     {
1062     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1063     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1064     }
1065     else
1066     {
1067 nigel 7 size_t i;
1068 nigel 53 for (i = 0; i < use_size_offsets; i++)
1069 nigel 3 {
1070     if (pmatch[i].rm_so >= 0)
1071     {
1072 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1073 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
1074 nigel 49 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1075 nigel 3 fprintf(outfile, "\n");
1076 nigel 35 if (i == 0 && do_showrest)
1077     {
1078     fprintf(outfile, " 0+ ");
1079 nigel 49 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1080 nigel 35 fprintf(outfile, "\n");
1081     }
1082 nigel 3 }
1083     }
1084     }
1085 nigel 53 free(pmatch);
1086 nigel 3 }
1087    
1088 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1089 nigel 3
1090 nigel 37 else
1091     #endif /* !defined NOPOSIX */
1092    
1093 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1094 nigel 3 {
1095     if (timeit)
1096     {
1097     register int i;
1098     clock_t time_taken;
1099     clock_t start_time = clock();
1100 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1101 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1102 nigel 53 start_offset, options | g_notempty, offsets, use_size_offsets);
1103 nigel 3 time_taken = clock() - start_time;
1104 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1105     ((double)time_taken * 1000.0)/
1106     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1107 nigel 3 }
1108    
1109 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1110 nigel 53 start_offset, options | g_notempty, offsets, use_size_offsets);
1111 nigel 3
1112     if (count == 0)
1113     {
1114     fprintf(outfile, "Matched, but too many substrings\n");
1115 nigel 53 count = use_size_offsets/3;
1116 nigel 3 }
1117    
1118 nigel 39 /* Matched */
1119    
1120 nigel 3 if (count >= 0)
1121     {
1122     int i;
1123 nigel 29 for (i = 0; i < count * 2; i += 2)
1124 nigel 3 {
1125     if (offsets[i] < 0)
1126     fprintf(outfile, "%2d: <unset>\n", i/2);
1127     else
1128     {
1129     fprintf(outfile, "%2d: ", i/2);
1130 nigel 49 pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1131 nigel 3 fprintf(outfile, "\n");
1132 nigel 35 if (i == 0)
1133     {
1134     if (do_showrest)
1135     {
1136     fprintf(outfile, " 0+ ");
1137 nigel 49 pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1138 nigel 35 fprintf(outfile, "\n");
1139     }
1140     }
1141 nigel 3 }
1142     }
1143 nigel 29
1144     for (i = 0; i < 32; i++)
1145     {
1146     if ((copystrings & (1 << i)) != 0)
1147     {
1148 nigel 37 char copybuffer[16];
1149 nigel 35 int rc = pcre_copy_substring((char *)bptr, offsets, count,
1150 nigel 37 i, copybuffer, sizeof(copybuffer));
1151 nigel 29 if (rc < 0)
1152     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1153     else
1154 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1155 nigel 29 }
1156     }
1157    
1158     for (i = 0; i < 32; i++)
1159     {
1160     if ((getstrings & (1 << i)) != 0)
1161     {
1162     const char *substring;
1163 nigel 35 int rc = pcre_get_substring((char *)bptr, offsets, count,
1164 nigel 29 i, &substring);
1165     if (rc < 0)
1166     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1167     else
1168     {
1169     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1170 nigel 49 /* free((void *)substring); */
1171     pcre_free_substring(substring);
1172 nigel 29 }
1173     }
1174     }
1175    
1176     if (getlist)
1177     {
1178     const char **stringlist;
1179 nigel 35 int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1180 nigel 29 &stringlist);
1181     if (rc < 0)
1182     fprintf(outfile, "get substring list failed %d\n", rc);
1183     else
1184     {
1185     for (i = 0; i < count; i++)
1186     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1187     if (stringlist[i] != NULL)
1188     fprintf(outfile, "string list not terminated by NULL\n");
1189 nigel 49 /* free((void *)stringlist); */
1190     pcre_free_substring_list(stringlist);
1191 nigel 29 }
1192     }
1193 nigel 39 }
1194 nigel 29
1195 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1196 nigel 47 g_notempty after a null match, this is not necessarily the end.
1197 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1198     values to achieve this. We won't be at the end of the string - that
1199 nigel 47 was checked before setting g_notempty. */
1200 nigel 39
1201 nigel 3 else
1202     {
1203 nigel 41 if (g_notempty != 0)
1204 nigel 35 {
1205 nigel 41 offsets[0] = start_offset;
1206     offsets[1] = start_offset + 1;
1207 nigel 35 }
1208 nigel 41 else
1209     {
1210     if (gmatched == 0) /* Error if no previous matches */
1211     {
1212     if (count == -1) fprintf(outfile, "No match\n");
1213     else fprintf(outfile, "Error %d\n", count);
1214     }
1215     break; /* Out of the /g loop */
1216     }
1217 nigel 3 }
1218 nigel 35
1219 nigel 39 /* If not /g or /G we are done */
1220    
1221     if (!do_g && !do_G) break;
1222    
1223 nigel 41 /* If we have matched an empty string, first check to see if we are at
1224     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1225     what Perl's /g options does. This turns out to be rather cunning. First
1226 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1227     same point. If this fails (picked up above) we advance to the next
1228     character. */
1229 nigel 39
1230 nigel 41 g_notempty = 0;
1231     if (offsets[0] == offsets[1])
1232     {
1233     if (offsets[0] == len) break;
1234 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1235 nigel 41 }
1236 nigel 39
1237     /* For /g, update the start offset, leaving the rest alone */
1238    
1239     if (do_g) start_offset = offsets[1];
1240    
1241     /* For /G, update the pointer and length */
1242    
1243     else
1244 nigel 35 {
1245 nigel 39 bptr += offsets[1];
1246     len -= offsets[1];
1247 nigel 35 }
1248 nigel 39 } /* End of loop for /g and /G */
1249     } /* End of loop for data lines */
1250 nigel 3
1251 nigel 11 CONTINUE:
1252 nigel 37
1253     #if !defined NOPOSIX
1254 nigel 3 if (posix || do_posix) regfree(&preg);
1255 nigel 37 #endif
1256    
1257 nigel 3 if (re != NULL) free(re);
1258     if (extra != NULL) free(extra);
1259 nigel 25 if (tables != NULL)
1260     {
1261     free((void *)tables);
1262     setlocale(LC_CTYPE, "C");
1263     }
1264 nigel 3 }
1265    
1266     fprintf(outfile, "\n");
1267     return 0;
1268     }
1269    
1270     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12