/[pcre]/code/tags/pcre-6.2/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-6.2/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 57 - (hide annotations) (download)
Sat Feb 24 21:39:50 2007 UTC (7 years, 5 months ago) by nigel
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 33804 byte(s)
Load pcre-3.7 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5     #include <ctype.h>
6     #include <stdio.h>
7     #include <string.h>
8     #include <stdlib.h>
9     #include <time.h>
10 nigel 25 #include <locale.h>
11 nigel 3
12     /* Use the internal info for displaying the results of pcre_study(). */
13    
14     #include "internal.h"
15 nigel 37
16     /* It is possible to compile this test program without including support for
17     testing the POSIX interface, though this is not available via the standard
18     Makefile. */
19    
20     #if !defined NOPOSIX
21 nigel 3 #include "pcreposix.h"
22 nigel 37 #endif
23 nigel 3
24     #ifndef CLOCKS_PER_SEC
25     #ifdef CLK_TCK
26     #define CLOCKS_PER_SEC CLK_TCK
27     #else
28     #define CLOCKS_PER_SEC 100
29     #endif
30     #endif
31    
32 nigel 27 #define LOOPREPEAT 20000
33 nigel 3
34 nigel 23
35 nigel 3 static FILE *outfile;
36     static int log_store = 0;
37 nigel 43 static size_t gotten_store;
38 nigel 3
39    
40    
41 nigel 49 static int utf8_table1[] = {
42     0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44     static int utf8_table2[] = {
45     0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47     static int utf8_table3[] = {
48     0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51     /*************************************************
52     * Convert character value to UTF-8 *
53     *************************************************/
54    
55     /* This function takes an integer value in the range 0 - 0x7fffffff
56     and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58     Arguments:
59     cvalue the character value
60     buffer pointer to buffer for result - at least 6 bytes long
61    
62     Returns: number of characters placed in the buffer
63     -1 if input character is negative
64     0 if input character is positive but too big (only when
65     int is longer than 32 bits)
66     */
67    
68     static int
69     ord2utf8(int cvalue, unsigned char *buffer)
70     {
71     register int i, j;
72     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73     if (cvalue <= utf8_table1[i]) break;
74     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75     if (cvalue < 0) return -1;
76     *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77     cvalue >>= 6 - i;
78     for (j = 0; j < i; j++)
79     {
80     *buffer++ = 0x80 | (cvalue & 0x3f);
81     cvalue >>= 6;
82     }
83     return i + 1;
84     }
85    
86    
87     /*************************************************
88     * Convert UTF-8 string to value *
89     *************************************************/
90    
91     /* This function takes one or more bytes that represents a UTF-8 character,
92     and returns the value of the character.
93    
94     Argument:
95     buffer a pointer to the byte vector
96     vptr a pointer to an int to receive the value
97    
98     Returns: > 0 => the number of bytes consumed
99     -6 to 0 => malformed UTF-8 character at offset = (-return)
100     */
101    
102     int
103     utf82ord(unsigned char *buffer, int *vptr)
104     {
105     int c = *buffer++;
106     int d = c;
107     int i, j, s;
108    
109     for (i = -1; i < 6; i++) /* i is number of additional bytes */
110     {
111     if ((d & 0x80) == 0) break;
112     d <<= 1;
113     }
114    
115     if (i == -1) { *vptr = c; return 1; } /* ascii character */
116     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
117    
118     /* i now has a value in the range 1-5 */
119    
120     d = c & utf8_table3[i];
121     s = 6 - i;
122    
123     for (j = 0; j < i; j++)
124     {
125     c = *buffer++;
126     if ((c & 0xc0) != 0x80) return -(j+1);
127     d |= (c & 0x3f) << s;
128     s += 6;
129     }
130    
131     /* Check that encoding was the correct unique one */
132    
133     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134     if (d <= utf8_table1[j]) break;
135     if (j != i) return -(i+1);
136    
137     /* Valid value */
138    
139     *vptr = d;
140     return i+1;
141     }
142    
143    
144    
145    
146    
147    
148 nigel 3 /* Debugging function to print the internal form of the regex. This is the same
149     code as contained in pcre.c under the DEBUG macro. */
150    
151 nigel 7 static const char *OP_names[] = {
152     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153 nigel 23 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154     "Opt", "^", "$", "Any", "chars", "not",
155 nigel 3 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158     "*", "*?", "+", "+?", "?", "??", "{", "{",
159 nigel 43 "class", "Ref", "Recurse",
160 nigel 23 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161     "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162 nigel 53 "Brazero", "Braminzero", "Branumber", "Bra"
163 nigel 3 };
164    
165    
166 nigel 37 static void print_internals(pcre *re)
167 nigel 3 {
168     unsigned char *code = ((real_pcre *)re)->code;
169    
170 nigel 23 fprintf(outfile, "------------------------------------------------------------------\n");
171 nigel 3
172     for(;;)
173     {
174     int c;
175     int charlength;
176    
177 nigel 23 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
178 nigel 3
179     if (*code >= OP_BRA)
180     {
181 nigel 53 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
182     fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
183     else
184     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
185 nigel 3 code += 2;
186     }
187    
188     else switch(*code)
189     {
190     case OP_END:
191 nigel 23 fprintf(outfile, " %s\n", OP_names[*code]);
192     fprintf(outfile, "------------------------------------------------------------------\n");
193 nigel 3 return;
194    
195 nigel 23 case OP_OPT:
196     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
197     code++;
198     break;
199    
200 nigel 3 case OP_CHARS:
201     charlength = *(++code);
202 nigel 23 fprintf(outfile, "%3d ", charlength);
203 nigel 3 while (charlength-- > 0)
204 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
205     else fprintf(outfile, "\\x%02x", c);
206 nigel 3 break;
207    
208     case OP_KETRMAX:
209     case OP_KETRMIN:
210     case OP_ALT:
211     case OP_KET:
212     case OP_ASSERT:
213     case OP_ASSERT_NOT:
214 nigel 23 case OP_ASSERTBACK:
215     case OP_ASSERTBACK_NOT:
216 nigel 3 case OP_ONCE:
217 nigel 53 case OP_COND:
218     case OP_BRANUMBER:
219 nigel 23 case OP_REVERSE:
220 nigel 53 case OP_CREF:
221 nigel 23 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
222     code += 2;
223     break;
224    
225 nigel 3 case OP_STAR:
226     case OP_MINSTAR:
227     case OP_PLUS:
228     case OP_MINPLUS:
229     case OP_QUERY:
230     case OP_MINQUERY:
231     case OP_TYPESTAR:
232     case OP_TYPEMINSTAR:
233     case OP_TYPEPLUS:
234     case OP_TYPEMINPLUS:
235     case OP_TYPEQUERY:
236     case OP_TYPEMINQUERY:
237     if (*code >= OP_TYPESTAR)
238 nigel 23 fprintf(outfile, " %s", OP_names[code[1]]);
239     else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
240     else fprintf(outfile, " \\x%02x", c);
241     fprintf(outfile, "%s", OP_names[*code++]);
242 nigel 3 break;
243    
244     case OP_EXACT:
245     case OP_UPTO:
246     case OP_MINUPTO:
247 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
248     else fprintf(outfile, " \\x%02x{", c);
249     if (*code != OP_EXACT) fprintf(outfile, ",");
250     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
251     if (*code == OP_MINUPTO) fprintf(outfile, "?");
252 nigel 3 code += 3;
253     break;
254    
255     case OP_TYPEEXACT:
256     case OP_TYPEUPTO:
257     case OP_TYPEMINUPTO:
258 nigel 23 fprintf(outfile, " %s{", OP_names[code[3]]);
259     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
260     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
261     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
262 nigel 3 code += 3;
263     break;
264    
265     case OP_NOT:
266 nigel 23 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
267     else fprintf(outfile, " [^\\x%02x]", c);
268 nigel 3 break;
269    
270     case OP_NOTSTAR:
271     case OP_NOTMINSTAR:
272     case OP_NOTPLUS:
273     case OP_NOTMINPLUS:
274     case OP_NOTQUERY:
275     case OP_NOTMINQUERY:
276 nigel 23 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
277     else fprintf(outfile, " [^\\x%02x]", c);
278     fprintf(outfile, "%s", OP_names[*code++]);
279 nigel 3 break;
280    
281     case OP_NOTEXACT:
282     case OP_NOTUPTO:
283     case OP_NOTMINUPTO:
284 nigel 23 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
285     else fprintf(outfile, " [^\\x%02x]{", c);
286     if (*code != OP_NOTEXACT) fprintf(outfile, ",");
287     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
288     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
289 nigel 3 code += 3;
290     break;
291    
292     case OP_REF:
293 nigel 53 fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
294     code += 3;
295 nigel 9 goto CLASS_REF_REPEAT;
296 nigel 3
297     case OP_CLASS:
298     {
299     int i, min, max;
300 nigel 23 code++;
301     fprintf(outfile, " [");
302 nigel 3
303     for (i = 0; i < 256; i++)
304     {
305     if ((code[i/8] & (1 << (i&7))) != 0)
306     {
307     int j;
308     for (j = i+1; j < 256; j++)
309     if ((code[j/8] & (1 << (j&7))) == 0) break;
310 nigel 23 if (i == '-' || i == ']') fprintf(outfile, "\\");
311     if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
312 nigel 3 if (--j > i)
313     {
314 nigel 23 fprintf(outfile, "-");
315     if (j == '-' || j == ']') fprintf(outfile, "\\");
316     if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
317 nigel 3 }
318     i = j;
319     }
320     }
321 nigel 23 fprintf(outfile, "]");
322 nigel 3 code += 32;
323    
324 nigel 9 CLASS_REF_REPEAT:
325    
326 nigel 3 switch(*code)
327     {
328     case OP_CRSTAR:
329     case OP_CRMINSTAR:
330     case OP_CRPLUS:
331     case OP_CRMINPLUS:
332     case OP_CRQUERY:
333     case OP_CRMINQUERY:
334 nigel 23 fprintf(outfile, "%s", OP_names[*code]);
335 nigel 3 break;
336    
337     case OP_CRRANGE:
338     case OP_CRMINRANGE:
339     min = (code[1] << 8) + code[2];
340     max = (code[3] << 8) + code[4];
341 nigel 23 if (max == 0) fprintf(outfile, "{%d,}", min);
342     else fprintf(outfile, "{%d,%d}", min, max);
343     if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
344 nigel 3 code += 4;
345     break;
346    
347     default:
348     code--;
349     }
350     }
351     break;
352    
353     /* Anything else is just a one-node item */
354    
355     default:
356 nigel 23 fprintf(outfile, " %s", OP_names[*code]);
357 nigel 3 break;
358     }
359    
360     code++;
361 nigel 23 fprintf(outfile, "\n");
362 nigel 3 }
363     }
364    
365    
366    
367 nigel 49 /* Character string printing function. A "normal" and a UTF-8 version. */
368 nigel 3
369 nigel 49 static void pchars(unsigned char *p, int length, int utf8)
370 nigel 3 {
371     int c;
372     while (length-- > 0)
373 nigel 49 {
374     if (utf8)
375     {
376     int rc = utf82ord(p, &c);
377     if (rc > 0)
378     {
379     length -= rc - 1;
380     p += rc;
381     if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
382     else fprintf(outfile, "\\x{%02x}", c);
383     continue;
384     }
385     }
386    
387     /* Not UTF-8, or malformed UTF-8 */
388    
389 nigel 3 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
390     else fprintf(outfile, "\\x%02x", c);
391 nigel 49 }
392 nigel 3 }
393    
394    
395    
396     /* Alternative malloc function, to test functionality and show the size of the
397     compiled re. */
398    
399     static void *new_malloc(size_t size)
400     {
401 nigel 43 gotten_store = size;
402 nigel 31 if (log_store)
403 nigel 35 fprintf(outfile, "Memory allocation (code space): %d\n",
404     (int)((int)size - offsetof(real_pcre, code[0])));
405 nigel 3 return malloc(size);
406     }
407    
408    
409    
410 nigel 43
411     /* Get one piece of information from the pcre_fullinfo() function */
412    
413     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
414     {
415     int rc;
416     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
417     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
418     }
419    
420    
421    
422    
423 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
424     consist of a regular expression, in delimiters and optionally followed by
425     options, followed by a set of test data, terminated by an empty line. */
426    
427     int main(int argc, char **argv)
428     {
429     FILE *infile = stdin;
430     int options = 0;
431     int study_options = 0;
432     int op = 1;
433     int timeit = 0;
434     int showinfo = 0;
435 nigel 31 int showstore = 0;
436 nigel 53 int size_offsets = 45;
437     int size_offsets_max;
438     int *offsets;
439     #if !defined NOPOSIX
440 nigel 3 int posix = 0;
441 nigel 53 #endif
442 nigel 3 int debug = 0;
443 nigel 11 int done = 0;
444 nigel 3 unsigned char buffer[30000];
445     unsigned char dbuffer[1024];
446    
447     /* Static so that new_malloc can use it. */
448    
449     outfile = stdout;
450    
451     /* Scan options */
452    
453     while (argc > 1 && argv[op][0] == '-')
454     {
455 nigel 53 char *endptr;
456    
457 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
458     showstore = 1;
459 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
460     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
461     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
462 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
463     ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
464     {
465     op++;
466     argc--;
467     }
468     #if !defined NOPOSIX
469 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
470 nigel 53 #endif
471 nigel 3 else
472     {
473 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
474     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
475     printf(" -d debug: show compiled code; implies -i\n"
476     " -i show information about compiled pattern\n"
477     " -o <n> set size of offsets vector to <n>\n");
478     #if !defined NOPOSIX
479     printf(" -p use POSIX interface\n");
480     #endif
481     printf(" -s output store information\n"
482     " -t time compilation and execution\n");
483 nigel 3 return 1;
484     }
485     op++;
486     argc--;
487     }
488    
489 nigel 53 /* Get the store for the offsets vector, and remember what it was */
490    
491     size_offsets_max = size_offsets;
492     offsets = malloc(size_offsets_max * sizeof(int));
493     if (offsets == NULL)
494     {
495     printf("** Failed to get %d bytes of memory for offsets vector\n",
496     size_offsets_max * sizeof(int));
497     return 1;
498     }
499    
500 nigel 3 /* Sort out the input and output files */
501    
502     if (argc > 1)
503     {
504     infile = fopen(argv[op], "r");
505     if (infile == NULL)
506     {
507     printf("** Failed to open %s\n", argv[op]);
508     return 1;
509     }
510     }
511    
512     if (argc > 2)
513     {
514     outfile = fopen(argv[op+1], "w");
515     if (outfile == NULL)
516     {
517     printf("** Failed to open %s\n", argv[op+1]);
518     return 1;
519     }
520     }
521    
522     /* Set alternative malloc function */
523    
524     pcre_malloc = new_malloc;
525    
526 nigel 23 /* Heading line, then prompt for first regex if stdin */
527 nigel 3
528     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
529    
530     /* Main loop */
531    
532 nigel 11 while (!done)
533 nigel 3 {
534     pcre *re = NULL;
535     pcre_extra *extra = NULL;
536 nigel 37
537     #if !defined NOPOSIX /* There are still compilers that require no indent */
538 nigel 3 regex_t preg;
539 nigel 45 int do_posix = 0;
540 nigel 37 #endif
541    
542 nigel 7 const char *error;
543 nigel 25 unsigned char *p, *pp, *ppp;
544 nigel 53 const unsigned char *tables = NULL;
545 nigel 3 int do_study = 0;
546 nigel 25 int do_debug = debug;
547 nigel 35 int do_G = 0;
548     int do_g = 0;
549 nigel 25 int do_showinfo = showinfo;
550 nigel 35 int do_showrest = 0;
551 nigel 49 int utf8 = 0;
552 nigel 3 int erroroffset, len, delimiter;
553    
554     if (infile == stdin) printf(" re> ");
555     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
556 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
557 nigel 3
558     p = buffer;
559     while (isspace(*p)) p++;
560     if (*p == 0) continue;
561    
562     /* Get the delimiter and seek the end of the pattern; if is isn't
563     complete, read more. */
564    
565     delimiter = *p++;
566    
567 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
568 nigel 3 {
569 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
570 nigel 3 goto SKIP_DATA;
571     }
572    
573     pp = p;
574    
575     for(;;)
576     {
577 nigel 29 while (*pp != 0)
578     {
579     if (*pp == '\\' && pp[1] != 0) pp++;
580     else if (*pp == delimiter) break;
581     pp++;
582     }
583 nigel 3 if (*pp != 0) break;
584    
585     len = sizeof(buffer) - (pp - buffer);
586     if (len < 256)
587     {
588     fprintf(outfile, "** Expression too long - missing delimiter?\n");
589     goto SKIP_DATA;
590     }
591    
592     if (infile == stdin) printf(" > ");
593     if (fgets((char *)pp, len, infile) == NULL)
594     {
595     fprintf(outfile, "** Unexpected EOF\n");
596 nigel 11 done = 1;
597     goto CONTINUE;
598 nigel 3 }
599 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
600 nigel 3 }
601    
602 nigel 29 /* If the first character after the delimiter is backslash, make
603     the pattern end with backslash. This is purely to provide a way
604     of testing for the error message when a pattern ends with backslash. */
605    
606     if (pp[1] == '\\') *pp++ = '\\';
607    
608 nigel 3 /* Terminate the pattern at the delimiter */
609    
610     *pp++ = 0;
611    
612     /* Look for options after final delimiter */
613    
614     options = 0;
615     study_options = 0;
616 nigel 31 log_store = showstore; /* default from command line */
617    
618 nigel 3 while (*pp != 0)
619     {
620     switch (*pp++)
621     {
622 nigel 35 case 'g': do_g = 1; break;
623 nigel 3 case 'i': options |= PCRE_CASELESS; break;
624     case 'm': options |= PCRE_MULTILINE; break;
625     case 's': options |= PCRE_DOTALL; break;
626     case 'x': options |= PCRE_EXTENDED; break;
627 nigel 25
628 nigel 35 case '+': do_showrest = 1; break;
629 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
630 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
631 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
632 nigel 35 case 'G': do_G = 1; break;
633 nigel 25 case 'I': do_showinfo = 1; break;
634 nigel 31 case 'M': log_store = 1; break;
635 nigel 37
636     #if !defined NOPOSIX
637 nigel 3 case 'P': do_posix = 1; break;
638 nigel 37 #endif
639    
640 nigel 3 case 'S': do_study = 1; break;
641 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
642 nigel 3 case 'X': options |= PCRE_EXTRA; break;
643 nigel 49 case '8': options |= PCRE_UTF8; utf8 = 1; break;
644 nigel 25
645     case 'L':
646     ppp = pp;
647     while (*ppp != '\n' && *ppp != ' ') ppp++;
648     *ppp = 0;
649     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
650     {
651     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
652     goto SKIP_DATA;
653     }
654     tables = pcre_maketables();
655     pp = ppp;
656     break;
657    
658 nigel 3 case '\n': case ' ': break;
659     default:
660     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
661     goto SKIP_DATA;
662     }
663     }
664    
665 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
666 nigel 25 timing, showing, or debugging options, nor the ability to pass over
667     local character tables. */
668 nigel 3
669 nigel 37 #if !defined NOPOSIX
670 nigel 3 if (posix || do_posix)
671     {
672     int rc;
673     int cflags = 0;
674     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
675     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
676     rc = regcomp(&preg, (char *)p, cflags);
677    
678     /* Compilation failed; go back for another re, skipping to blank line
679     if non-interactive. */
680    
681     if (rc != 0)
682     {
683     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
684     fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
685     goto SKIP_DATA;
686     }
687     }
688    
689     /* Handle compiling via the native interface */
690    
691     else
692 nigel 37 #endif /* !defined NOPOSIX */
693    
694 nigel 3 {
695     if (timeit)
696     {
697     register int i;
698     clock_t time_taken;
699     clock_t start_time = clock();
700 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
701 nigel 3 {
702 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
703 nigel 3 if (re != NULL) free(re);
704     }
705     time_taken = clock() - start_time;
706 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
707     ((double)time_taken * 1000.0) /
708     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
709 nigel 3 }
710    
711 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
712 nigel 3
713     /* Compilation failed; go back for another re, skipping to blank line
714     if non-interactive. */
715    
716     if (re == NULL)
717     {
718     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
719     SKIP_DATA:
720     if (infile != stdin)
721     {
722     for (;;)
723     {
724     if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
725 nigel 11 {
726     done = 1;
727     goto CONTINUE;
728     }
729 nigel 3 len = (int)strlen((char *)buffer);
730     while (len > 0 && isspace(buffer[len-1])) len--;
731     if (len == 0) break;
732     }
733     fprintf(outfile, "\n");
734     }
735 nigel 25 goto CONTINUE;
736 nigel 3 }
737    
738 nigel 43 /* Compilation succeeded; print data if required. There are now two
739     info-returning functions. The old one has a limited interface and
740     returns only limited data. Check that it agrees with the newer one. */
741 nigel 3
742 nigel 25 if (do_showinfo)
743 nigel 3 {
744 nigel 53 unsigned long int get_options;
745 nigel 43 int old_first_char, old_options, old_count;
746     int count, backrefmax, first_char, need_char;
747     size_t size;
748 nigel 3
749 nigel 37 if (do_debug) print_internals(re);
750 nigel 3
751 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
752 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
753     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
754     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
755     new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
756     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
757    
758     old_count = pcre_info(re, &old_options, &old_first_char);
759 nigel 3 if (count < 0) fprintf(outfile,
760 nigel 43 "Error %d from pcre_info()\n", count);
761 nigel 3 else
762     {
763 nigel 43 if (old_count != count) fprintf(outfile,
764     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
765     old_count);
766 nigel 37
767 nigel 43 if (old_first_char != first_char) fprintf(outfile,
768     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
769     first_char, old_first_char);
770 nigel 37
771 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
772     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
773     get_options, old_options);
774 nigel 43 }
775    
776     if (size != gotten_store) fprintf(outfile,
777     "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
778     size, gotten_store);
779    
780     fprintf(outfile, "Capturing subpattern count = %d\n", count);
781     if (backrefmax > 0)
782     fprintf(outfile, "Max back reference = %d\n", backrefmax);
783 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
784 nigel 49 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
785 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
786     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
787     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
788     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
789     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
790     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
791     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
792     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
793     ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
794 nigel 43
795     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
796     fprintf(outfile, "Case state changes\n");
797    
798     if (first_char == -1)
799     {
800     fprintf(outfile, "First char at start or follows \\n\n");
801     }
802     else if (first_char < 0)
803     {
804     fprintf(outfile, "No first char\n");
805     }
806     else
807     {
808     if (isprint(first_char))
809     fprintf(outfile, "First char = \'%c\'\n", first_char);
810 nigel 3 else
811 nigel 43 fprintf(outfile, "First char = %d\n", first_char);
812     }
813 nigel 37
814 nigel 43 if (need_char < 0)
815     {
816     fprintf(outfile, "No need char\n");
817 nigel 3 }
818 nigel 43 else
819     {
820     if (isprint(need_char))
821     fprintf(outfile, "Need char = \'%c\'\n", need_char);
822     else
823     fprintf(outfile, "Need char = %d\n", need_char);
824     }
825 nigel 3 }
826    
827     /* If /S was present, study the regexp to generate additional info to
828     help with the matching. */
829    
830     if (do_study)
831     {
832     if (timeit)
833     {
834     register int i;
835     clock_t time_taken;
836     clock_t start_time = clock();
837 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
838 nigel 3 extra = pcre_study(re, study_options, &error);
839     time_taken = clock() - start_time;
840     if (extra != NULL) free(extra);
841 nigel 27 fprintf(outfile, " Study time %.3f milliseconds\n",
842     ((double)time_taken * 1000.0)/
843     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
844 nigel 3 }
845    
846     extra = pcre_study(re, study_options, &error);
847     if (error != NULL)
848     fprintf(outfile, "Failed to study: %s\n", error);
849     else if (extra == NULL)
850     fprintf(outfile, "Study returned NULL\n");
851    
852 nigel 25 else if (do_showinfo)
853 nigel 3 {
854 nigel 43 uschar *start_bits = NULL;
855     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
856     if (start_bits == NULL)
857 nigel 3 fprintf(outfile, "No starting character set\n");
858     else
859     {
860     int i;
861     int c = 24;
862     fprintf(outfile, "Starting character set: ");
863     for (i = 0; i < 256; i++)
864     {
865 nigel 43 if ((start_bits[i/8] & (1<<(i%8))) != 0)
866 nigel 3 {
867     if (c > 75)
868     {
869     fprintf(outfile, "\n ");
870     c = 2;
871     }
872     if (isprint(i) && i != ' ')
873     {
874     fprintf(outfile, "%c ", i);
875     c += 2;
876     }
877     else
878     {
879     fprintf(outfile, "\\x%02x ", i);
880     c += 5;
881     }
882     }
883     }
884     fprintf(outfile, "\n");
885     }
886     }
887     }
888     }
889    
890     /* Read data lines and test them */
891    
892     for (;;)
893     {
894 nigel 9 unsigned char *q;
895 nigel 35 unsigned char *bptr = dbuffer;
896 nigel 57 int *use_offsets = offsets;
897 nigel 53 int use_size_offsets = size_offsets;
898 nigel 3 int count, c;
899 nigel 29 int copystrings = 0;
900     int getstrings = 0;
901     int getlist = 0;
902 nigel 39 int gmatched = 0;
903 nigel 35 int start_offset = 0;
904 nigel 41 int g_notempty = 0;
905 nigel 3
906     options = 0;
907    
908 nigel 35 if (infile == stdin) printf("data> ");
909 nigel 11 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
910     {
911     done = 1;
912     goto CONTINUE;
913     }
914 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
915 nigel 3
916     len = (int)strlen((char *)buffer);
917     while (len > 0 && isspace(buffer[len-1])) len--;
918     buffer[len] = 0;
919     if (len == 0) break;
920    
921     p = buffer;
922     while (isspace(*p)) p++;
923    
924 nigel 9 q = dbuffer;
925 nigel 3 while ((c = *p++) != 0)
926     {
927     int i = 0;
928     int n = 0;
929     if (c == '\\') switch ((c = *p++))
930     {
931     case 'a': c = 7; break;
932     case 'b': c = '\b'; break;
933     case 'e': c = 27; break;
934     case 'f': c = '\f'; break;
935     case 'n': c = '\n'; break;
936     case 'r': c = '\r'; break;
937     case 't': c = '\t'; break;
938     case 'v': c = '\v'; break;
939    
940     case '0': case '1': case '2': case '3':
941     case '4': case '5': case '6': case '7':
942     c -= '0';
943     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
944     c = c * 8 + *p++ - '0';
945     break;
946    
947     case 'x':
948 nigel 49
949     /* Handle \x{..} specially - new Perl thing for utf8 */
950    
951     if (*p == '{')
952     {
953     unsigned char *pt = p;
954     c = 0;
955     while (isxdigit(*(++pt)))
956     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
957     if (*pt == '}')
958     {
959     unsigned char buffer[8];
960     int ii, utn;
961     utn = ord2utf8(c, buffer);
962     for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
963     c = buffer[ii]; /* Last byte */
964     p = pt + 1;
965     break;
966     }
967     /* Not correct form; fall through */
968     }
969    
970     /* Ordinary \x */
971    
972 nigel 3 c = 0;
973     while (i++ < 2 && isxdigit(*p))
974     {
975     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
976     p++;
977     }
978     break;
979    
980     case 0: /* Allows for an empty line */
981     p--;
982     continue;
983    
984     case 'A': /* Option setting */
985     options |= PCRE_ANCHORED;
986     continue;
987    
988     case 'B':
989     options |= PCRE_NOTBOL;
990     continue;
991    
992 nigel 29 case 'C':
993     while(isdigit(*p)) n = n * 10 + *p++ - '0';
994     copystrings |= 1 << n;
995     continue;
996    
997     case 'G':
998     while(isdigit(*p)) n = n * 10 + *p++ - '0';
999     getstrings |= 1 << n;
1000     continue;
1001    
1002     case 'L':
1003     getlist = 1;
1004     continue;
1005    
1006 nigel 37 case 'N':
1007     options |= PCRE_NOTEMPTY;
1008     continue;
1009    
1010 nigel 3 case 'O':
1011     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1012 nigel 53 if (n > size_offsets_max)
1013     {
1014     size_offsets_max = n;
1015 nigel 57 free(offsets);
1016     use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1017 nigel 53 if (offsets == NULL)
1018     {
1019     printf("** Failed to get %d bytes of memory for offsets vector\n",
1020     size_offsets_max * sizeof(int));
1021     return 1;
1022     }
1023     }
1024     use_size_offsets = n;
1025 nigel 57 if (n == 0) use_offsets = NULL;
1026 nigel 3 continue;
1027    
1028     case 'Z':
1029     options |= PCRE_NOTEOL;
1030     continue;
1031     }
1032 nigel 9 *q++ = c;
1033 nigel 3 }
1034 nigel 9 *q = 0;
1035     len = q - dbuffer;
1036 nigel 3
1037     /* Handle matching via the POSIX interface, which does not
1038     support timing. */
1039    
1040 nigel 37 #if !defined NOPOSIX
1041 nigel 3 if (posix || do_posix)
1042     {
1043     int rc;
1044     int eflags = 0;
1045 nigel 53 regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1046 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1047     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1048    
1049 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1050 nigel 3
1051     if (rc != 0)
1052     {
1053     (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1054     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1055     }
1056     else
1057     {
1058 nigel 7 size_t i;
1059 nigel 53 for (i = 0; i < use_size_offsets; i++)
1060 nigel 3 {
1061     if (pmatch[i].rm_so >= 0)
1062     {
1063 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1064 nigel 3 pchars(dbuffer + pmatch[i].rm_so,
1065 nigel 49 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1066 nigel 3 fprintf(outfile, "\n");
1067 nigel 35 if (i == 0 && do_showrest)
1068     {
1069     fprintf(outfile, " 0+ ");
1070 nigel 49 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1071 nigel 35 fprintf(outfile, "\n");
1072     }
1073 nigel 3 }
1074     }
1075     }
1076 nigel 53 free(pmatch);
1077 nigel 3 }
1078    
1079 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1080 nigel 3
1081 nigel 37 else
1082     #endif /* !defined NOPOSIX */
1083    
1084 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1085 nigel 3 {
1086     if (timeit)
1087     {
1088     register int i;
1089     clock_t time_taken;
1090     clock_t start_time = clock();
1091 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1092 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1093 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1094 nigel 3 time_taken = clock() - start_time;
1095 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1096     ((double)time_taken * 1000.0)/
1097     ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1098 nigel 3 }
1099    
1100 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1101 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1102 nigel 3
1103     if (count == 0)
1104     {
1105     fprintf(outfile, "Matched, but too many substrings\n");
1106 nigel 53 count = use_size_offsets/3;
1107 nigel 3 }
1108    
1109 nigel 39 /* Matched */
1110    
1111 nigel 3 if (count >= 0)
1112     {
1113     int i;
1114 nigel 29 for (i = 0; i < count * 2; i += 2)
1115 nigel 3 {
1116 nigel 57 if (use_offsets[i] < 0)
1117 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1118     else
1119     {
1120     fprintf(outfile, "%2d: ", i/2);
1121 nigel 57 pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1122 nigel 3 fprintf(outfile, "\n");
1123 nigel 35 if (i == 0)
1124     {
1125     if (do_showrest)
1126     {
1127     fprintf(outfile, " 0+ ");
1128 nigel 57 pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1129 nigel 35 fprintf(outfile, "\n");
1130     }
1131     }
1132 nigel 3 }
1133     }
1134 nigel 29
1135     for (i = 0; i < 32; i++)
1136     {
1137     if ((copystrings & (1 << i)) != 0)
1138     {
1139 nigel 37 char copybuffer[16];
1140 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1141 nigel 37 i, copybuffer, sizeof(copybuffer));
1142 nigel 29 if (rc < 0)
1143     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1144     else
1145 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1146 nigel 29 }
1147     }
1148    
1149     for (i = 0; i < 32; i++)
1150     {
1151     if ((getstrings & (1 << i)) != 0)
1152     {
1153     const char *substring;
1154 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1155 nigel 29 i, &substring);
1156     if (rc < 0)
1157     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1158     else
1159     {
1160     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1161 nigel 49 /* free((void *)substring); */
1162     pcre_free_substring(substring);
1163 nigel 29 }
1164     }
1165     }
1166    
1167     if (getlist)
1168     {
1169     const char **stringlist;
1170 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1171 nigel 29 &stringlist);
1172     if (rc < 0)
1173     fprintf(outfile, "get substring list failed %d\n", rc);
1174     else
1175     {
1176     for (i = 0; i < count; i++)
1177     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1178     if (stringlist[i] != NULL)
1179     fprintf(outfile, "string list not terminated by NULL\n");
1180 nigel 49 /* free((void *)stringlist); */
1181     pcre_free_substring_list(stringlist);
1182 nigel 29 }
1183     }
1184 nigel 39 }
1185 nigel 29
1186 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1187 nigel 47 g_notempty after a null match, this is not necessarily the end.
1188 nigel 41 We want to advance the start offset, and continue. Fudge the offset
1189     values to achieve this. We won't be at the end of the string - that
1190 nigel 47 was checked before setting g_notempty. */
1191 nigel 39
1192 nigel 3 else
1193     {
1194 nigel 41 if (g_notempty != 0)
1195 nigel 35 {
1196 nigel 57 use_offsets[0] = start_offset;
1197     use_offsets[1] = start_offset + 1;
1198 nigel 35 }
1199 nigel 41 else
1200     {
1201     if (gmatched == 0) /* Error if no previous matches */
1202     {
1203     if (count == -1) fprintf(outfile, "No match\n");
1204     else fprintf(outfile, "Error %d\n", count);
1205     }
1206     break; /* Out of the /g loop */
1207     }
1208 nigel 3 }
1209 nigel 35
1210 nigel 39 /* If not /g or /G we are done */
1211    
1212     if (!do_g && !do_G) break;
1213    
1214 nigel 41 /* If we have matched an empty string, first check to see if we are at
1215     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1216     what Perl's /g options does. This turns out to be rather cunning. First
1217 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1218     same point. If this fails (picked up above) we advance to the next
1219     character. */
1220 nigel 39
1221 nigel 41 g_notempty = 0;
1222 nigel 57 if (use_offsets[0] == use_offsets[1])
1223 nigel 41 {
1224 nigel 57 if (use_offsets[0] == len) break;
1225 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1226 nigel 41 }
1227 nigel 39
1228     /* For /g, update the start offset, leaving the rest alone */
1229    
1230 nigel 57 if (do_g) start_offset = use_offsets[1];
1231 nigel 39
1232     /* For /G, update the pointer and length */
1233    
1234     else
1235 nigel 35 {
1236 nigel 57 bptr += use_offsets[1];
1237     len -= use_offsets[1];
1238 nigel 35 }
1239 nigel 39 } /* End of loop for /g and /G */
1240     } /* End of loop for data lines */
1241 nigel 3
1242 nigel 11 CONTINUE:
1243 nigel 37
1244     #if !defined NOPOSIX
1245 nigel 3 if (posix || do_posix) regfree(&preg);
1246 nigel 37 #endif
1247    
1248 nigel 3 if (re != NULL) free(re);
1249     if (extra != NULL) free(extra);
1250 nigel 25 if (tables != NULL)
1251     {
1252     free((void *)tables);
1253     setlocale(LC_CTYPE, "C");
1254     }
1255 nigel 3 }
1256    
1257     fprintf(outfile, "\n");
1258     return 0;
1259     }
1260    
1261     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12