/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 59 - (hide annotations) (download)
Sat Feb 24 21:39:54 2007 UTC (7 years, 2 months ago) by nigel
File MIME type: text/plain
File size: 33785 byte(s)
Load pcre-3.8 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15 nigel 37
16     /* It is possible to compile this test program without including support for
17     testing the POSIX interface, though this is not available via the standard
18     Makefile. */
19    
20     #if !defined NOPOSIX
21 nigel 3 #include "pcreposix.h"
22 nigel 37 #endif
23 nigel 3
24     #ifndef CLOCKS_PER_SEC
25     #ifdef CLK_TCK
26     #define CLOCKS_PER_SEC CLK_TCK
27     #else
28     #define CLOCKS_PER_SEC 100
29     #endif
30     #endif
31    
32 nigel 27 #define LOOPREPEAT 20000
33 nigel 3
34 nigel 23
35 nigel 3 static FILE *outfile;
36     static int log_store = 0;
37 nigel 43 static size_t gotten_store;
38 nigel 3
39    
40    
41 nigel 49 static int utf8_table1[] = {
42     0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44     static int utf8_table2[] = {
45     0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47     static int utf8_table3[] = {
48     0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51     /*************************************************
52     * Convert character value to UTF-8 *
53     *************************************************/
54    
55     /* This function takes an integer value in the range 0 - 0x7fffffff
56     and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58     Arguments:
59     cvalue the character value
60     buffer pointer to buffer for result - at least 6 bytes long
61    
62     Returns: number of characters placed in the buffer
63     -1 if input character is negative
64     0 if input character is positive but too big (only when
65     int is longer than 32 bits)
66     */
67    
68     static int
69     ord2utf8(int cvalue, unsigned char *buffer)
70     {
71     register int i, j;
72     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73     if (cvalue <= utf8_table1[i]) break;
74     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75     if (cvalue < 0) return -1;
76 nigel 59
77     buffer += i;
78     for (j = i; j > 0; j--)
79     {
80     *buffer-- = 0x80 | (cvalue & 0x3f);
81     cvalue >>= 6;
82     }
83     *buffer = utf8_table2[i] | cvalue;
84 nigel 49 return i + 1;
85     }
86    
87    
88     /*************************************************
89     * Convert UTF-8 string to value *
90     *************************************************/
91    
92     /* This function takes one or more bytes that represents a UTF-8 character,
93     and returns the value of the character.
94    
95     Argument:
96     buffer a pointer to the byte vector
97     vptr a pointer to an int to receive the value
98    
99     Returns: > 0 => the number of bytes consumed
100     -6 to 0 => malformed UTF-8 character at offset = (-return)
101     */
102    
103     int
104     utf82ord(unsigned char *buffer, int *vptr)
105     {
106     int c = *buffer++;
107     int d = c;
108     int i, j, s;
109    
110     for (i = -1; i < 6; i++) /* i is number of additional bytes */
111     {
112     if ((d & 0x80) == 0) break;
113     d <<= 1;
114     }
115    
116     if (i == -1) { *vptr = c; return 1; } /* ascii character */
117     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
118    
119     /* i now has a value in the range 1-5 */
120    
121 nigel 59 s = 6*i;
122     d = (c & utf8_table3[i]) << s;
123 nigel 49
124     for (j = 0; j < i; j++)
125     {
126     c = *buffer++;
127     if ((c & 0xc0) != 0x80) return -(j+1);
128 nigel 59 s -= 6;
129 nigel 49 d |= (c & 0x3f) << s;
130     }
131    
132     /* Check that encoding was the correct unique one */
133    
134     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
135     if (d <= utf8_table1[j]) break;
136     if (j != i) return -(i+1);
137    
138     /* Valid value */
139    
140     *vptr = d;
141     return i+1;
142     }
143    
144    
145    
146    
147    
148    
149 nigel 3 /* Debugging function to print the internal form of the regex. This is the same
150     code as contained in pcre.c under the DEBUG macro. */
151    
152 nigel 7 static const char *OP_names[] = {
153     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
154 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
155     "Opt", "^", "$", "Any", "chars", "not",
156 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
159     "*", "*?", "+", "+?", "?", "??", "{", "{",
160 nigel 43 "class", "Ref", "Recurse",
161 nigel 23 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
162     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
163 nigel 53 "Brazero", "Braminzero", "Branumber", "Bra"
164 nigel 3 };
165    
166    
167 nigel 37 static void print_internals(pcre *re)
168 nigel 3 {
169     unsigned char *code = ((real_pcre *)re)->code;
170    
171 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
172 nigel 3
173     for(;;)
174     {
175     int c;
176     int charlength;
177    
178 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
179 nigel 3
180     if (*code >= OP_BRA)
181     {
182 nigel 53 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
183     fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
184     else
185     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
186 nigel 3 code += 2;
187     }
188    
189     else switch(*code)
190     {
191     case OP_END:
192 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
193     fprintf(outfile, "------------------------------------------------------------------\n");
194 nigel 3 return;
195    
196 nigel 23 case OP_OPT:
197     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
198     code++;
199     break;
200    
201 nigel 3 case OP_CHARS:
202     charlength = *(++code);
203 nigel 23 fprintf(outfile, "%3d ", charlength);
204 nigel 3 while (charlength-- > 0)
205 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
206     else fprintf(outfile, "\\x%02x", c);
207 nigel 3 break;
208    
209     case OP_KETRMAX:
210     case OP_KETRMIN:
211     case OP_ALT:
212     case OP_KET:
213     case OP_ASSERT:
214     case OP_ASSERT_NOT:
215 nigel 23 case OP_ASSERTBACK:
216     case OP_ASSERTBACK_NOT:
217 nigel 3 case OP_ONCE:
218 nigel 53 case OP_COND:
219     case OP_BRANUMBER:
220 nigel 23 case OP_REVERSE:
221 nigel 53 case OP_CREF:
222 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
223     code += 2;
224     break;
225    
226 nigel 3 case OP_STAR:
227     case OP_MINSTAR:
228     case OP_PLUS:
229     case OP_MINPLUS:
230     case OP_QUERY:
231     case OP_MINQUERY:
232     case OP_TYPESTAR:
233     case OP_TYPEMINSTAR:
234     case OP_TYPEPLUS:
235     case OP_TYPEMINPLUS:
236     case OP_TYPEQUERY:
237     case OP_TYPEMINQUERY:
238     if (*code >= OP_TYPESTAR)
239 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
240     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
241     else fprintf(outfile, " \\x%02x", c);
242     fprintf(outfile, "%s", OP_names[*code++]);
243 nigel 3 break;
244    
245     case OP_EXACT:
246     case OP_UPTO:
247     case OP_MINUPTO:
248 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
249     else fprintf(outfile, " \\x%02x{", c);
250     if (*code != OP_EXACT) fprintf(outfile, ",");
251     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
252     if (*code == OP_MINUPTO) fprintf(outfile, "?");
253 nigel 3 code += 3;
254     break;
255    
256     case OP_TYPEEXACT:
257     case OP_TYPEUPTO:
258     case OP_TYPEMINUPTO:
259 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
260     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
261     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
262     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
263 nigel 3 code += 3;
264     break;
265    
266     case OP_NOT:
267 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
268     else fprintf(outfile, " [^\\x%02x]", c);
269 nigel 3 break;
270    
271     case OP_NOTSTAR:
272     case OP_NOTMINSTAR:
273     case OP_NOTPLUS:
274     case OP_NOTMINPLUS:
275     case OP_NOTQUERY:
276     case OP_NOTMINQUERY:
277 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
278     else fprintf(outfile, " [^\\x%02x]", c);
279     fprintf(outfile, "%s", OP_names[*code++]);
280 nigel 3 break;
281    
282     case OP_NOTEXACT:
283     case OP_NOTUPTO:
284     case OP_NOTMINUPTO:
285 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
286     else fprintf(outfile, " [^\\x%02x]{", c);
287     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
288     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
289     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
290 nigel 3 code += 3;
291     break;
292    
293     case OP_REF:
294 nigel 53 fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
295     code += 3;
296 nigel 9 goto CLASS_REF_REPEAT;
297 nigel 3
298     case OP_CLASS:
299     {
300     int i, min, max;
301 nigel 23 code++;
302     fprintf(outfile, " [");
303 nigel 3
304     for (i = 0; i < 256; i++)
305     {
306     if ((code[i/8] & (1 << (i&7))) != 0)
307     {
308     int j;
309     for (j = i+1; j < 256; j++)
310     if ((code[j/8] & (1 << (j&7))) == 0) break;
311 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
312     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
313 nigel 3 if (--j > i)
314     {
315 nigel 23 fprintf(outfile, "-");
316     if (j == '-' || j == ']') fprintf(outfile, "\\");
317     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
318 nigel 3 }
319     i = j;
320     }
321     }
322 nigel 23 fprintf(outfile, "]");
323 nigel 3 code += 32;
324    
325 nigel 9 CLASS_REF_REPEAT:
326    
327 nigel 3 switch(*code)
328     {
329     case OP_CRSTAR:
330     case OP_CRMINSTAR:
331     case OP_CRPLUS:
332     case OP_CRMINPLUS:
333     case OP_CRQUERY:
334     case OP_CRMINQUERY:
335 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
336 nigel 3 break;
337    
338     case OP_CRRANGE:
339     case OP_CRMINRANGE:
340     min = (code[1] << 8) + code[2];
341     max = (code[3] << 8) + code[4];
342 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
343     else fprintf(outfile, "{%d,%d}", min, max);
344     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
345 nigel 3 code += 4;
346     break;
347    
348     default:
349     code--;
350     }
351     }
352     break;
353    
354     /* Anything else is just a one-node item */
355    
356     default:
357 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
358 nigel 3 break;
359     }
360    
361     code++;
362 nigel 23 fprintf(outfile, "\n");
363 nigel 3 }
364     }
365    
366    
367    
368 nigel 49 /* Character string printing function. A "normal" and a UTF-8 version. */
369 nigel 3
370 nigel 49 static void pchars(unsigned char *p, int length, int utf8)
371 nigel 3 {
372     int c;
373     while (length-- > 0)
374 nigel 49 {
375     if (utf8)
376     {
377     int rc = utf82ord(p, &c);
378     if (rc > 0)
379     {
380     length -= rc - 1;
381     p += rc;
382     if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
383     else fprintf(outfile, "\\x{%02x}", c);
384     continue;
385     }
386     }
387    
388     /* Not UTF-8, or malformed UTF-8 */
389    
390 nigel 3 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
391     else fprintf(outfile, "\\x%02x", c);
392 nigel 49 }
393 nigel 3 }
394    
395    
396    
397     /* Alternative malloc function, to test functionality and show the size of the
398     compiled re. */
399    
400     static void *new_malloc(size_t size)
401     {
402 nigel 43 gotten_store = size;
403 nigel 31 if (log_store)
404 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
405     (int)((int)size - offsetof(real_pcre, code[0])));
406 nigel 3 return malloc(size);
407     }
408    
409    
410    
411 nigel 43
412     /* Get one piece of information from the pcre_fullinfo() function */
413    
414     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
415     {
416     int rc;
417     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
418     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
419     }
420    
421    
422    
423    
424 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
425     consist of a regular expression, in delimiters and optionally followed by
426     options, followed by a set of test data, terminated by an empty line. */
427    
428     int main(int argc, char **argv)
429     {
430     FILE *infile = stdin;
431     int options = 0;
432     int study_options = 0;
433     int op = 1;
434     int timeit = 0;
435     int showinfo = 0;
436 nigel 31 int showstore = 0;
437 nigel 53 int size_offsets = 45;
438     int size_offsets_max;
439     int *offsets;
440     #if !defined NOPOSIX
441 nigel 3 int posix = 0;
442 nigel 53 #endif
443 nigel 3 int debug = 0;
444 nigel 11 int done = 0;
445 nigel 3 unsigned char buffer[30000];
446     unsigned char dbuffer[1024];
447    
448     /* Static so that new_malloc can use it. */
449    
450     outfile = stdout;
451    
452     /* Scan options */
453    
454     while (argc > 1 && argv[op][0] == '-')
455     {
456 nigel 53 char *endptr;
457    
458 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459     showstore = 1;
460 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
464 nigel 59 ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
465 nigel 53 {
466     op++;
467     argc--;
468     }
469     #if !defined NOPOSIX
470 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
471 nigel 53 #endif
472 nigel 3 else
473     {
474 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
475     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
476     printf(" -d debug: show compiled code; implies -i\n"
477     " -i show information about compiled pattern\n"
478     " -o <n> set size of offsets vector to <n>\n");
479     #if !defined NOPOSIX
480     printf(" -p use POSIX interface\n");
481     #endif
482     printf(" -s output store information\n"
483     " -t time compilation and execution\n");
484 nigel 3 return 1;
485     }
486     op++;
487     argc--;
488     }
489    
490 nigel 53 /* Get the store for the offsets vector, and remember what it was */
491    
492     size_offsets_max = size_offsets;
493     offsets = malloc(size_offsets_max * sizeof(int));
494     if (offsets == NULL)
495     {
496     printf("** Failed to get %d bytes of memory for offsets vector\n",
497     size_offsets_max * sizeof(int));
498     return 1;
499     }
500    
501 nigel 3 /* Sort out the input and output files */
502    
503     if (argc > 1)
504     {
505     infile = fopen(argv[op], "r");
506     if (infile == NULL)
507     {
508     printf("** Failed to open %s\n", argv[op]);
509     return 1;
510     }
511     }
512    
513     if (argc > 2)
514     {
515     outfile = fopen(argv[op+1], "w");
516     if (outfile == NULL)
517     {
518     printf("** Failed to open %s\n", argv[op+1]);
519     return 1;
520     }
521     }
522    
523     /* Set alternative malloc function */
524    
525     pcre_malloc = new_malloc;
526    
527 nigel 23 /* Heading line, then prompt for first regex if stdin */
528 nigel 3
529     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
530    
531     /* Main loop */
532    
533 nigel 11 while (!done)
534 nigel 3 {
535     pcre *re = NULL;
536     pcre_extra *extra = NULL;
537 nigel 37
538     #if !defined NOPOSIX /* There are still compilers that require no indent */
539 nigel 3 regex_t preg;
540 nigel 45 int do_posix = 0;
541 nigel 37 #endif
542    
543 nigel 7 const char *error;
544 nigel 25 unsigned char *p, *pp, *ppp;
545 nigel 53 const unsigned char *tables = NULL;
546 nigel 3 int do_study = 0;
547 nigel 25 int do_debug = debug;
548 nigel 35 int do_G = 0;
549     int do_g = 0;
550 nigel 25 int do_showinfo = showinfo;
551 nigel 35 int do_showrest = 0;
552 nigel 49 int utf8 = 0;
553 nigel 3 int erroroffset, len, delimiter;
554    
555     if (infile == stdin) printf(" re> ");
556     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
557 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
558 nigel 3
559     p = buffer;
560     while (isspace(*p)) p++;
561     if (*p == 0) continue;
562    
563     /* Get the delimiter and seek the end of the pattern; if is isn't
564     complete, read more. */
565    
566     delimiter = *p++;
567    
568 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
569 nigel 3 {
570 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
571 nigel 3 goto SKIP_DATA;
572     }
573    
574     pp = p;
575    
576     for(;;)
577     {
578 nigel 29 while (*pp != 0)
579     {
580     if (*pp == '\\' && pp[1] != 0) pp++;
581     else if (*pp == delimiter) break;
582     pp++;
583     }
584 nigel 3 if (*pp != 0) break;
585    
586     len = sizeof(buffer) - (pp - buffer);
587     if (len < 256)
588     {
589     fprintf(outfile, "** Expression too long - missing delimiter?\n");
590     goto SKIP_DATA;
591     }
592    
593     if (infile == stdin) printf(" > ");
594     if (fgets((char *)pp, len, infile) == NULL)
595     {
596     fprintf(outfile, "** Unexpected EOF\n");
597 nigel 11 done = 1;
598     goto CONTINUE;
599 nigel 3 }
600 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
601 nigel 3 }
602    
603 nigel 29 /* If the first character after the delimiter is backslash, make
604     the pattern end with backslash. This is purely to provide a way
605     of testing for the error message when a pattern ends with backslash. */
606    
607     if (pp[1] == '\\') *pp++ = '\\';
608    
609 nigel 3 /* Terminate the pattern at the delimiter */
610    
611     *pp++ = 0;
612    
613     /* Look for options after final delimiter */
614    
615     options = 0;
616     study_options = 0;
617 nigel 31 log_store = showstore; /* default from command line */
618    
619 nigel 3 while (*pp != 0)
620     {
621     switch (*pp++)
622     {
623 nigel 35 case 'g': do_g = 1; break;
624 nigel 3 case 'i': options |= PCRE_CASELESS; break;
625     case 'm': options |= PCRE_MULTILINE; break;
626     case 's': options |= PCRE_DOTALL; break;
627     case 'x': options |= PCRE_EXTENDED; break;
628 nigel 25
629 nigel 35 case '+': do_showrest = 1; break;
630 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
631 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
632 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
633 nigel 35 case 'G': do_G = 1; break;
634 nigel 25 case 'I': do_showinfo = 1; break;
635 nigel 31 case 'M': log_store = 1; break;
636 nigel 37
637     #if !defined NOPOSIX
638 nigel 3 case 'P': do_posix = 1; break;
639 nigel 37 #endif
640    
641 nigel 3 case 'S': do_study = 1; break;
642 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
643 nigel 3 case 'X': options |= PCRE_EXTRA; break;
644 nigel 49 case '8': options |= PCRE_UTF8; utf8 = 1; break;
645 nigel 25
646     case 'L':
647     ppp = pp;
648     while (*ppp != '\n' && *ppp != ' ') ppp++;
649     *ppp = 0;
650     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
651     {
652     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
653     goto SKIP_DATA;
654     }
655     tables = pcre_maketables();
656     pp = ppp;
657     break;
658    
659 nigel 3 case '\n': case ' ': break;
660     default:
661     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
662     goto SKIP_DATA;
663     }
664     }
665    
666 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
667 nigel 25 timing, showing, or debugging options, nor the ability to pass over
668     local character tables. */
669 nigel 3
670 nigel 37 #if !defined NOPOSIX
671 nigel 3 if (posix || do_posix)
672     {
673     int rc;
674     int cflags = 0;
675     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
676     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
677     rc = regcomp(&preg, (char *)p, cflags);
678    
679     /* Compilation failed; go back for another re, skipping to blank line
680     if non-interactive. */
681    
682     if (rc != 0)
683     {
684     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
685     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
686     goto SKIP_DATA;
687     }
688     }
689    
690     /* Handle compiling via the native interface */
691    
692     else
693 nigel 37 #endif /* !defined NOPOSIX */
694    
695 nigel 3 {
696     if (timeit)
697     {
698     register int i;
699     clock_t time_taken;
700     clock_t start_time = clock();
701 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
702 nigel 3 {
703 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
704 nigel 3 if (re != NULL) free(re);
705     }
706     time_taken = clock() - start_time;
707 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
708     ((double)time_taken * 1000.0) /
709     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
710 nigel 3 }
711    
712 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
713 nigel 3
714     /* Compilation failed; go back for another re, skipping to blank line
715     if non-interactive. */
716    
717     if (re == NULL)
718     {
719     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
720     SKIP_DATA:
721     if (infile != stdin)
722     {
723     for (;;)
724     {
725     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
726 nigel 11 {
727     done = 1;
728     goto CONTINUE;
729     }
730 nigel 3 len = (int)strlen((char *)buffer);
731     while (len > 0 && isspace(buffer[len-1])) len--;
732     if (len == 0) break;
733     }
734     fprintf(outfile, "\n");
735     }
736 nigel 25 goto CONTINUE;
737 nigel 3 }
738    
739 nigel 43 /* Compilation succeeded; print data if required. There are now two
740     info-returning functions. The old one has a limited interface and
741     returns only limited data. Check that it agrees with the newer one. */
742 nigel 3
743 nigel 25 if (do_showinfo)
744 nigel 3 {
745 nigel 53 unsigned long int get_options;
746 nigel 43 int old_first_char, old_options, old_count;
747     int count, backrefmax, first_char, need_char;
748     size_t size;
749 nigel 3
750 nigel 37 if (do_debug) print_internals(re);
751 nigel 3
752 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
753 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
754     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
755     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
756     new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
757     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
758    
759     old_count = pcre_info(re, &old_options, &old_first_char);
760 nigel 3 if (count < 0) fprintf(outfile,
761 nigel 43 "Error %d from pcre_info()\n", count);
762 nigel 3 else
763     {
764 nigel 43 if (old_count != count) fprintf(outfile,
765     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
766     old_count);
767 nigel 37
768 nigel 43 if (old_first_char != first_char) fprintf(outfile,
769     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
770     first_char, old_first_char);
771 nigel 37
772 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
773     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
774     get_options, old_options);
775 nigel 43 }
776    
777     if (size != gotten_store) fprintf(outfile,
778     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
779     size, gotten_store);
780    
781     fprintf(outfile, "Capturing subpattern count = %d\n", count);
782     if (backrefmax > 0)
783     fprintf(outfile, "Max back reference = %d\n", backrefmax);
784 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
785 nigel 49 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
786 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
787     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
788     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
789     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
790     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
791     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
792     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
793     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
794     ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
795 nigel 43
796     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
797     fprintf(outfile, "Case state changes\n");
798    
799     if (first_char == -1)
800     {
801     fprintf(outfile, "First char at start or follows \\n\n");
802     }
803     else if (first_char < 0)
804     {
805     fprintf(outfile, "No first char\n");
806     }
807     else
808     {
809     if (isprint(first_char))
810     fprintf(outfile, "First char = \'%c\'\n", first_char);
811 nigel 3 else
812 nigel 43 fprintf(outfile, "First char = %d\n", first_char);
813     }
814 nigel 37
815 nigel 43 if (need_char < 0)
816     {
817     fprintf(outfile, "No need char\n");
818 nigel 3 }
819 nigel 43 else
820     {
821     if (isprint(need_char))
822     fprintf(outfile, "Need char = \'%c\'\n", need_char);
823     else
824     fprintf(outfile, "Need char = %d\n", need_char);
825     }
826 nigel 3 }
827    
828     /* If /S was present, study the regexp to generate additional info to
829     help with the matching. */
830    
831     if (do_study)
832     {
833     if (timeit)
834     {
835     register int i;
836     clock_t time_taken;
837     clock_t start_time = clock();
838 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
839 nigel 3 extra = pcre_study(re, study_options, &error);
840     time_taken = clock() - start_time;
841     if (extra != NULL) free(extra);
842 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
843     ((double)time_taken * 1000.0)/
844     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
845 nigel 3 }
846    
847     extra = pcre_study(re, study_options, &error);
848     if (error != NULL)
849     fprintf(outfile, "Failed to study: %s\n", error);
850     else if (extra == NULL)
851     fprintf(outfile, "Study returned NULL\n");
852    
853 nigel 25 else if (do_showinfo)
854 nigel 3 {
855 nigel 43 uschar *start_bits = NULL;
856     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
857     if (start_bits == NULL)
858 nigel 3 fprintf(outfile, "No starting character set\n");
859     else
860     {
861     int i;
862     int c = 24;
863     fprintf(outfile, "Starting character set: ");
864     for (i = 0; i < 256; i++)
865     {
866 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
867 nigel 3 {
868     if (c > 75)
869     {
870     fprintf(outfile, "\n ");
871     c = 2;
872     }
873     if (isprint(i) && i != ' ')
874     {
875     fprintf(outfile, "%c ", i);
876     c += 2;
877     }
878     else
879     {
880     fprintf(outfile, "\\x%02x ", i);
881     c += 5;
882     }
883     }
884     }
885     fprintf(outfile, "\n");
886     }
887     }
888     }
889     }
890    
891     /* Read data lines and test them */
892    
893     for (;;)
894     {
895 nigel 9 unsigned char *q;
896 nigel 35 unsigned char *bptr = dbuffer;
897 nigel 57 int *use_offsets = offsets;
898 nigel 53 int use_size_offsets = size_offsets;
899 nigel 3 int count, c;
900 nigel 29 int copystrings = 0;
901     int getstrings = 0;
902     int getlist = 0;
903 nigel 39 int gmatched = 0;
904 nigel 35 int start_offset = 0;
905 nigel 41 int g_notempty = 0;
906 nigel 3
907     options = 0;
908    
909 nigel 35 if (infile == stdin) printf("data> ");
910 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
911     {
912     done = 1;
913     goto CONTINUE;
914     }
915 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
916 nigel 3
917     len = (int)strlen((char *)buffer);
918     while (len > 0 && isspace(buffer[len-1])) len--;
919     buffer[len] = 0;
920     if (len == 0) break;
921    
922     p = buffer;
923     while (isspace(*p)) p++;
924    
925 nigel 9 q = dbuffer;
926 nigel 3 while ((c = *p++) != 0)
927     {
928     int i = 0;
929     int n = 0;
930     if (c == '\\') switch ((c = *p++))
931     {
932     case 'a': c = 7; break;
933     case 'b': c = '\b'; break;
934     case 'e': c = 27; break;
935     case 'f': c = '\f'; break;
936     case 'n': c = '\n'; break;
937     case 'r': c = '\r'; break;
938     case 't': c = '\t'; break;
939     case 'v': c = '\v'; break;
940    
941     case '0': case '1': case '2': case '3':
942     case '4': case '5': case '6': case '7':
943     c -= '0';
944     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
945     c = c * 8 + *p++ - '0';
946     break;
947    
948     case 'x':
949 nigel 49
950     /* Handle \x{..} specially - new Perl thing for utf8 */
951    
952     if (*p == '{')
953     {
954     unsigned char *pt = p;
955     c = 0;
956     while (isxdigit(*(++pt)))
957     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
958     if (*pt == '}')
959     {
960     unsigned char buffer[8];
961     int ii, utn;
962     utn = ord2utf8(c, buffer);
963     for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
964     c = buffer[ii]; /* Last byte */
965     p = pt + 1;
966     break;
967     }
968     /* Not correct form; fall through */
969     }
970    
971     /* Ordinary \x */
972    
973 nigel 3 c = 0;
974     while (i++ < 2 && isxdigit(*p))
975     {
976     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
977     p++;
978     }
979     break;
980    
981     case 0: /* Allows for an empty line */
982     p--;
983     continue;
984    
985     case 'A': /* Option setting */
986     options |= PCRE_ANCHORED;
987     continue;
988    
989     case 'B':
990     options |= PCRE_NOTBOL;
991     continue;
992    
993 nigel 29 case 'C':
994     while(isdigit(*p)) n = n * 10 + *p++ - '0';
995     copystrings |= 1 << n;
996     continue;
997    
998     case 'G':
999     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1000     getstrings |= 1 << n;
1001     continue;
1002    
1003     case 'L':
1004     getlist = 1;
1005     continue;
1006    
1007 nigel 37 case 'N':
1008     options |= PCRE_NOTEMPTY;
1009     continue;
1010    
1011 nigel 3 case 'O':
1012     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013 nigel 53 if (n > size_offsets_max)
1014     {
1015     size_offsets_max = n;
1016 nigel 57 free(offsets);
1017     use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1018 nigel 53 if (offsets == NULL)
1019     {
1020     printf("** Failed to get %d bytes of memory for offsets vector\n",
1021     size_offsets_max * sizeof(int));
1022     return 1;
1023     }
1024     }
1025     use_size_offsets = n;
1026 nigel 57 if (n == 0) use_offsets = NULL;
1027 nigel 3 continue;
1028    
1029     case 'Z':
1030     options |= PCRE_NOTEOL;
1031     continue;
1032     }
1033 nigel 9 *q++ = c;
1034 nigel 3 }
1035 nigel 9 *q = 0;
1036     len = q - dbuffer;
1037 nigel 3
1038     /* Handle matching via the POSIX interface, which does not
1039     support timing. */
1040    
1041 nigel 37 #if !defined NOPOSIX
1042 nigel 3 if (posix || do_posix)
1043     {
1044     int rc;
1045     int eflags = 0;
1046 nigel 53 regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1047 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1048     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1049    
1050 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1051 nigel 3
1052     if (rc != 0)
1053     {
1054     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1055     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1056     }
1057     else
1058     {
1059 nigel 7 size_t i;
1060 nigel 53 for (i = 0; i < use_size_offsets; i++)
1061 nigel 3 {
1062     if (pmatch[i].rm_so >= 0)
1063     {
1064 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1065 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
1066 nigel 49 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1067 nigel 3 fprintf(outfile, "\n");
1068 nigel 35 if (i == 0 && do_showrest)
1069     {
1070     fprintf(outfile, " 0+ ");
1071 nigel 49 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1072 nigel 35 fprintf(outfile, "\n");
1073     }
1074 nigel 3 }
1075     }
1076     }
1077 nigel 53 free(pmatch);
1078 nigel 3 }
1079    
1080 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1081 nigel 3
1082 nigel 37 else
1083     #endif /* !defined NOPOSIX */
1084    
1085 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1086 nigel 3 {
1087     if (timeit)
1088     {
1089     register int i;
1090     clock_t time_taken;
1091     clock_t start_time = clock();
1092 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1093 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1094 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1095 nigel 3 time_taken = clock() - start_time;
1096 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1097     ((double)time_taken * 1000.0)/
1098     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1099 nigel 3 }
1100    
1101 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1102 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1103 nigel 3
1104     if (count == 0)
1105     {
1106     fprintf(outfile, "Matched, but too many substrings\n");
1107 nigel 53 count = use_size_offsets/3;
1108 nigel 3 }
1109    
1110 nigel 39 /* Matched */
1111    
1112 nigel 3 if (count >= 0)
1113     {
1114     int i;
1115 nigel 29 for (i = 0; i < count * 2; i += 2)
1116 nigel 3 {
1117 nigel 57 if (use_offsets[i] < 0)
1118 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1119     else
1120     {
1121     fprintf(outfile, "%2d: ", i/2);
1122 nigel 57 pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1123 nigel 3 fprintf(outfile, "\n");
1124 nigel 35 if (i == 0)
1125     {
1126     if (do_showrest)
1127     {
1128     fprintf(outfile, " 0+ ");
1129 nigel 57 pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1130 nigel 35 fprintf(outfile, "\n");
1131     }
1132     }
1133 nigel 3 }
1134     }
1135 nigel 29
1136     for (i = 0; i < 32; i++)
1137     {
1138     if ((copystrings & (1 << i)) != 0)
1139     {
1140 nigel 37 char copybuffer[16];
1141 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1142 nigel 37 i, copybuffer, sizeof(copybuffer));
1143 nigel 29 if (rc < 0)
1144     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1145     else
1146 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1147 nigel 29 }
1148     }
1149    
1150     for (i = 0; i < 32; i++)
1151     {
1152     if ((getstrings & (1 << i)) != 0)
1153     {
1154     const char *substring;
1155 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1156 nigel 29 i, &substring);
1157     if (rc < 0)
1158     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1159     else
1160     {
1161     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1162 nigel 49 /* free((void *)substring); */
1163     pcre_free_substring(substring);
1164 nigel 29 }
1165     }
1166     }
1167    
1168     if (getlist)
1169     {
1170     const char **stringlist;
1171 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1172 nigel 29 &stringlist);
1173     if (rc < 0)
1174     fprintf(outfile, "get substring list failed %d\n", rc);
1175     else
1176     {
1177     for (i = 0; i < count; i++)
1178     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1179     if (stringlist[i] != NULL)
1180     fprintf(outfile, "string list not terminated by NULL\n");
1181 nigel 49 /* free((void *)stringlist); */
1182     pcre_free_substring_list(stringlist);
1183 nigel 29 }
1184     }
1185 nigel 39 }
1186 nigel 29
1187 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1188 nigel 47 g_notempty after a null match, this is not necessarily the end.
1189 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1190     values to achieve this. We won't be at the end of the string - that
1191 nigel 47 was checked before setting g_notempty. */
1192 nigel 39
1193 nigel 3 else
1194     {
1195 nigel 41 if (g_notempty != 0)
1196 nigel 35 {
1197 nigel 57 use_offsets[0] = start_offset;
1198     use_offsets[1] = start_offset + 1;
1199 nigel 35 }
1200 nigel 41 else
1201     {
1202     if (gmatched == 0) /* Error if no previous matches */
1203     {
1204     if (count == -1) fprintf(outfile, "No match\n");
1205     else fprintf(outfile, "Error %d\n", count);
1206     }
1207     break; /* Out of the /g loop */
1208     }
1209 nigel 3 }
1210 nigel 35
1211 nigel 39 /* If not /g or /G we are done */
1212    
1213     if (!do_g && !do_G) break;
1214    
1215 nigel 41 /* If we have matched an empty string, first check to see if we are at
1216     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1217     what Perl's /g options does. This turns out to be rather cunning. First
1218 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1219     same point. If this fails (picked up above) we advance to the next
1220     character. */
1221 nigel 39
1222 nigel 41 g_notempty = 0;
1223 nigel 57 if (use_offsets[0] == use_offsets[1])
1224 nigel 41 {
1225 nigel 57 if (use_offsets[0] == len) break;
1226 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1227 nigel 41 }
1228 nigel 39
1229     /* For /g, update the start offset, leaving the rest alone */
1230    
1231 nigel 57 if (do_g) start_offset = use_offsets[1];
1232 nigel 39
1233     /* For /G, update the pointer and length */
1234    
1235     else
1236 nigel 35 {
1237 nigel 57 bptr += use_offsets[1];
1238     len -= use_offsets[1];
1239 nigel 35 }
1240 nigel 39 } /* End of loop for /g and /G */
1241     } /* End of loop for data lines */
1242 nigel 3
1243 nigel 11 CONTINUE:
1244 nigel 37
1245     #if !defined NOPOSIX
1246 nigel 3 if (posix || do_posix) regfree(&preg);
1247 nigel 37 #endif
1248    
1249 nigel 3 if (re != NULL) free(re);
1250     if (extra != NULL) free(extra);
1251 nigel 25 if (tables != NULL)
1252     {
1253     free((void *)tables);
1254     setlocale(LC_CTYPE, "C");
1255     }
1256 nigel 3 }
1257    
1258     fprintf(outfile, "\n");
1259     return 0;
1260     }
1261    
1262     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12