/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (hide annotations) (download)
Sat Feb 24 21:40:45 2007 UTC (7 years, 9 months ago) by nigel
File MIME type: text/plain
File size: 52292 byte(s)
Load pcre-6.0 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
48 nigel 37
49 nigel 77 /* We need the internal info for displaying the results of pcre_study() and
50     other internal data; pcretest also uses some of the fixed tables, and generally
51     has "inside information" compared to a program that strictly follows the PCRE
52     API. */
53    
54     #include "pcre_internal.h"
55    
56    
57 nigel 37 /* It is possible to compile this test program without including support for
58     testing the POSIX interface, though this is not available via the standard
59     Makefile. */
60    
61     #if !defined NOPOSIX
62 nigel 3 #include "pcreposix.h"
63 nigel 37 #endif
64 nigel 3
65     #ifndef CLOCKS_PER_SEC
66     #ifdef CLK_TCK
67     #define CLOCKS_PER_SEC CLK_TCK
68     #else
69     #define CLOCKS_PER_SEC 100
70     #endif
71     #endif
72    
73 nigel 75 #define LOOPREPEAT 500000
74 nigel 3
75 nigel 69 #define BUFFER_SIZE 30000
76 nigel 75 #define PBUFFER_SIZE BUFFER_SIZE
77 nigel 73 #define DBUFFER_SIZE BUFFER_SIZE
78 nigel 23
79 nigel 69
80 nigel 3 static FILE *outfile;
81     static int log_store = 0;
82 nigel 63 static int callout_count;
83     static int callout_extra;
84     static int callout_fail_count;
85     static int callout_fail_id;
86     static int first_callout;
87 nigel 73 static int show_malloc;
88 nigel 67 static int use_utf8;
89 nigel 43 static size_t gotten_store;
90 nigel 3
91 nigel 75 static uschar *pbuffer = NULL;
92 nigel 3
93 nigel 75
94 nigel 49
95     /*************************************************
96 nigel 63 * Read number from string *
97     *************************************************/
98    
99     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
100     around with conditional compilation, just do the job by hand. It is only used
101     for unpicking the -o argument, so just keep it simple.
102    
103     Arguments:
104     str string to be converted
105     endptr where to put the end pointer
106    
107     Returns: the unsigned long
108     */
109    
110     static int
111     get_value(unsigned char *str, unsigned char **endptr)
112     {
113     int result = 0;
114     while(*str != 0 && isspace(*str)) str++;
115     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
116     *endptr = str;
117     return(result);
118     }
119    
120    
121    
122 nigel 49
123     /*************************************************
124     * Convert UTF-8 string to value *
125     *************************************************/
126    
127     /* This function takes one or more bytes that represents a UTF-8 character,
128     and returns the value of the character.
129    
130     Argument:
131     buffer a pointer to the byte vector
132     vptr a pointer to an int to receive the value
133    
134     Returns: > 0 => the number of bytes consumed
135     -6 to 0 => malformed UTF-8 character at offset = (-return)
136     */
137    
138 nigel 67 static int
139 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
140     {
141     int c = *buffer++;
142     int d = c;
143     int i, j, s;
144    
145     for (i = -1; i < 6; i++) /* i is number of additional bytes */
146     {
147     if ((d & 0x80) == 0) break;
148     d <<= 1;
149     }
150    
151     if (i == -1) { *vptr = c; return 1; } /* ascii character */
152     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
153    
154     /* i now has a value in the range 1-5 */
155    
156 nigel 59 s = 6*i;
157 nigel 77 d = (c & _pcre_utf8_table3[i]) << s;
158 nigel 49
159     for (j = 0; j < i; j++)
160     {
161     c = *buffer++;
162     if ((c & 0xc0) != 0x80) return -(j+1);
163 nigel 59 s -= 6;
164 nigel 49 d |= (c & 0x3f) << s;
165     }
166    
167     /* Check that encoding was the correct unique one */
168    
169 nigel 77 for (j = 0; j < _pcre_utf8_table1_size; j++)
170     if (d <= _pcre_utf8_table1[j]) break;
171 nigel 49 if (j != i) return -(i+1);
172    
173     /* Valid value */
174    
175     *vptr = d;
176     return i+1;
177     }
178    
179    
180    
181 nigel 63 /*************************************************
182     * Print character string *
183     *************************************************/
184 nigel 49
185 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
186     mode. Yields number of characters printed. If handed a NULL file, just counts
187     chars without printing. */
188 nigel 49
189 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
190 nigel 3 {
191 nigel 63 int c;
192     int yield = 0;
193 nigel 3
194 nigel 63 while (length-- > 0)
195 nigel 3 {
196 nigel 67 if (use_utf8)
197 nigel 63 {
198     int rc = utf82ord(p, &c);
199 nigel 3
200 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
201     {
202     length -= rc - 1;
203     p += rc;
204     if (c < 256 && isprint(c))
205     {
206     if (f != NULL) fprintf(f, "%c", c);
207     yield++;
208     }
209     else
210     {
211     int n;
212     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
213     yield += n;
214     }
215     continue;
216     }
217     }
218 nigel 3
219 nigel 63 /* Not UTF-8, or malformed UTF-8 */
220    
221     if (isprint(c = *(p++)))
222 nigel 3 {
223 nigel 63 if (f != NULL) fprintf(f, "%c", c);
224     yield++;
225 nigel 3 }
226 nigel 63 else
227 nigel 3 {
228 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
229     yield += 4;
230     }
231     }
232 nigel 3
233 nigel 63 return yield;
234     }
235 nigel 23
236 nigel 3
237 nigel 23
238 nigel 63 /*************************************************
239     * Callout function *
240     *************************************************/
241 nigel 3
242 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
243     the match. Yield zero unless more callouts than the fail count, or the callout
244     data is not zero. */
245 nigel 3
246 nigel 63 static int callout(pcre_callout_block *cb)
247     {
248     FILE *f = (first_callout | callout_extra)? outfile : NULL;
249 nigel 75 int i, pre_start, post_start, subject_length;
250 nigel 3
251 nigel 63 if (callout_extra)
252     {
253     fprintf(f, "Callout %d: last capture = %d\n",
254     cb->callout_number, cb->capture_last);
255 nigel 3
256 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
257     {
258     if (cb->offset_vector[i] < 0)
259     fprintf(f, "%2d: <unset>\n", i/2);
260     else
261     {
262     fprintf(f, "%2d: ", i/2);
263     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
264     cb->offset_vector[i+1] - cb->offset_vector[i], f);
265     fprintf(f, "\n");
266     }
267     }
268     }
269 nigel 3
270 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
271     datails. On subsequent calls in the same match, we use pchars just to find the
272     printed lengths of the substrings. */
273 nigel 3
274 nigel 63 if (f != NULL) fprintf(f, "--->");
275 nigel 3
276 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
277     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
278     cb->current_position - cb->start_match, f);
279 nigel 3
280 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
281    
282 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
283     cb->subject_length - cb->current_position, f);
284 nigel 3
285 nigel 63 if (f != NULL) fprintf(f, "\n");
286 nigel 9
287 nigel 63 /* Always print appropriate indicators, with callout number if not already
288 nigel 75 shown. For automatic callouts, show the pattern offset. */
289 nigel 3
290 nigel 75 if (cb->callout_number == 255)
291     {
292     fprintf(outfile, "%+3d ", cb->pattern_position);
293     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
294     }
295     else
296     {
297     if (callout_extra) fprintf(outfile, " ");
298     else fprintf(outfile, "%3d ", cb->callout_number);
299     }
300 nigel 3
301 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
302     fprintf(outfile, "^");
303 nigel 3
304 nigel 63 if (post_start > 0)
305     {
306     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
307     fprintf(outfile, "^");
308 nigel 3 }
309    
310 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
311     fprintf(outfile, " ");
312    
313     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
314     pbuffer + cb->pattern_position);
315    
316 nigel 63 fprintf(outfile, "\n");
317     first_callout = 0;
318 nigel 3
319 nigel 71 if (cb->callout_data != NULL)
320 nigel 49 {
321 nigel 71 int callout_data = *((int *)(cb->callout_data));
322     if (callout_data != 0)
323     {
324     fprintf(outfile, "Callout data = %d\n", callout_data);
325     return callout_data;
326     }
327 nigel 63 }
328 nigel 49
329 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
330     (++callout_count >= callout_fail_count)? 1 : 0;
331 nigel 3 }
332    
333    
334 nigel 63 /*************************************************
335 nigel 73 * Local malloc functions *
336 nigel 63 *************************************************/
337 nigel 3
338     /* Alternative malloc function, to test functionality and show the size of the
339     compiled re. */
340    
341     static void *new_malloc(size_t size)
342     {
343 nigel 73 void *block = malloc(size);
344 nigel 43 gotten_store = size;
345 nigel 73 if (show_malloc)
346 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
347 nigel 73 return block;
348 nigel 3 }
349    
350 nigel 73 static void new_free(void *block)
351     {
352     if (show_malloc)
353     fprintf(outfile, "free %p\n", block);
354     free(block);
355     }
356 nigel 3
357    
358 nigel 73 /* For recursion malloc/free, to test stacking calls */
359    
360     static void *stack_malloc(size_t size)
361     {
362     void *block = malloc(size);
363     if (show_malloc)
364 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
365 nigel 73 return block;
366     }
367    
368     static void stack_free(void *block)
369     {
370     if (show_malloc)
371     fprintf(outfile, "stack_free %p\n", block);
372     free(block);
373     }
374    
375    
376 nigel 63 /*************************************************
377     * Call pcre_fullinfo() *
378     *************************************************/
379 nigel 43
380     /* Get one piece of information from the pcre_fullinfo() function */
381    
382     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
383     {
384     int rc;
385     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
386     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
387     }
388    
389    
390    
391 nigel 63 /*************************************************
392 nigel 75 * Byte flipping function *
393     *************************************************/
394    
395     static long int
396     byteflip(long int value, int n)
397     {
398     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
399     return ((value & 0x000000ff) << 24) |
400     ((value & 0x0000ff00) << 8) |
401     ((value & 0x00ff0000) >> 8) |
402     ((value & 0xff000000) >> 24);
403     }
404    
405    
406    
407    
408     /*************************************************
409 nigel 63 * Main Program *
410     *************************************************/
411 nigel 43
412 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
413     consist of a regular expression, in delimiters and optionally followed by
414     options, followed by a set of test data, terminated by an empty line. */
415    
416     int main(int argc, char **argv)
417     {
418     FILE *infile = stdin;
419     int options = 0;
420     int study_options = 0;
421     int op = 1;
422     int timeit = 0;
423     int showinfo = 0;
424 nigel 31 int showstore = 0;
425 nigel 53 int size_offsets = 45;
426     int size_offsets_max;
427 nigel 77 int *offsets = NULL;
428 nigel 53 #if !defined NOPOSIX
429 nigel 3 int posix = 0;
430 nigel 53 #endif
431 nigel 3 int debug = 0;
432 nigel 11 int done = 0;
433 nigel 77 int all_use_dfa = 0;
434     int yield = 0;
435 nigel 3
436 nigel 69 unsigned char *buffer;
437     unsigned char *dbuffer;
438    
439     /* Get buffers from malloc() so that Electric Fence will check their misuse
440     when I am debugging. */
441    
442 nigel 71 buffer = (unsigned char *)malloc(BUFFER_SIZE);
443     dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
444 nigel 75 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
445 nigel 69
446 nigel 75 /* The outfile variable is static so that new_malloc can use it. The _setmode()
447     stuff is some magic that I don't understand, but which apparently does good
448     things in Windows. It's related to line terminations. */
449 nigel 3
450 nigel 75 #if defined(_WIN32) || defined(WIN32)
451     _setmode( _fileno( stdout ), 0x8000 );
452     #endif /* defined(_WIN32) || defined(WIN32) */
453    
454 nigel 3 outfile = stdout;
455    
456     /* Scan options */
457    
458     while (argc > 1 && argv[op][0] == '-')
459     {
460 nigel 63 unsigned char *endptr;
461 nigel 53
462 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
463     showstore = 1;
464 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
465     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
466     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
467 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
468 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
469 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
470     *endptr == 0))
471 nigel 53 {
472     op++;
473     argc--;
474     }
475     #if !defined NOPOSIX
476 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
477 nigel 53 #endif
478 nigel 63 else if (strcmp(argv[op], "-C") == 0)
479     {
480     int rc;
481     printf("PCRE version %s\n", pcre_version());
482     printf("Compiled with\n");
483     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
484     printf(" %sUTF-8 support\n", rc? "" : "No ");
485 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
486     printf(" %sUnicode properties support\n", rc? "" : "No ");
487 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
488     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
489     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
490     printf(" Internal link size = %d\n", rc);
491     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
492     printf(" POSIX malloc threshold = %d\n", rc);
493     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
494     printf(" Default match limit = %d\n", rc);
495 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
496     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
497 nigel 63 exit(0);
498     }
499 nigel 3 else
500     {
501 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
502     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
503 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
504 nigel 77 printf(" -d debug: show compiled code; implies -i\n");
505     printf(" -dfa force DFA matching for all subjects\n");
506     printf(" -i show information about compiled pattern\n"
507 nigel 75 " -m output memory used information\n"
508 nigel 53 " -o <n> set size of offsets vector to <n>\n");
509     #if !defined NOPOSIX
510     printf(" -p use POSIX interface\n");
511     #endif
512 nigel 75 printf(" -s output store (memory) used information\n"
513 nigel 53 " -t time compilation and execution\n");
514 nigel 77 yield = 1;
515     goto EXIT;
516 nigel 3 }
517     op++;
518     argc--;
519     }
520    
521 nigel 53 /* Get the store for the offsets vector, and remember what it was */
522    
523     size_offsets_max = size_offsets;
524 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
525 nigel 53 if (offsets == NULL)
526     {
527     printf("** Failed to get %d bytes of memory for offsets vector\n",
528     size_offsets_max * sizeof(int));
529 nigel 77 yield = 1;
530     goto EXIT;
531 nigel 53 }
532    
533 nigel 3 /* Sort out the input and output files */
534    
535     if (argc > 1)
536     {
537 nigel 75 infile = fopen(argv[op], "rb");
538 nigel 3 if (infile == NULL)
539     {
540     printf("** Failed to open %s\n", argv[op]);
541 nigel 77 yield = 1;
542     goto EXIT;
543 nigel 3 }
544     }
545    
546     if (argc > 2)
547     {
548 nigel 75 outfile = fopen(argv[op+1], "wb");
549 nigel 3 if (outfile == NULL)
550     {
551     printf("** Failed to open %s\n", argv[op+1]);
552 nigel 77 yield = 1;
553     goto EXIT;
554 nigel 3 }
555     }
556    
557     /* Set alternative malloc function */
558    
559     pcre_malloc = new_malloc;
560 nigel 73 pcre_free = new_free;
561     pcre_stack_malloc = stack_malloc;
562     pcre_stack_free = stack_free;
563 nigel 3
564 nigel 23 /* Heading line, then prompt for first regex if stdin */
565 nigel 3
566     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
567    
568     /* Main loop */
569    
570 nigel 11 while (!done)
571 nigel 3 {
572     pcre *re = NULL;
573     pcre_extra *extra = NULL;
574 nigel 37
575     #if !defined NOPOSIX /* There are still compilers that require no indent */
576 nigel 3 regex_t preg;
577 nigel 45 int do_posix = 0;
578 nigel 37 #endif
579    
580 nigel 7 const char *error;
581 nigel 25 unsigned char *p, *pp, *ppp;
582 nigel 75 unsigned char *to_file = NULL;
583 nigel 53 const unsigned char *tables = NULL;
584 nigel 75 unsigned long int true_size, true_study_size = 0;
585     size_t size, regex_gotten_store;
586 nigel 3 int do_study = 0;
587 nigel 25 int do_debug = debug;
588 nigel 35 int do_G = 0;
589     int do_g = 0;
590 nigel 25 int do_showinfo = showinfo;
591 nigel 35 int do_showrest = 0;
592 nigel 75 int do_flip = 0;
593 nigel 3 int erroroffset, len, delimiter;
594    
595 nigel 67 use_utf8 = 0;
596 nigel 63
597 nigel 3 if (infile == stdin) printf(" re> ");
598 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
599 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
600 nigel 63 fflush(outfile);
601 nigel 3
602     p = buffer;
603     while (isspace(*p)) p++;
604     if (*p == 0) continue;
605    
606 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
607 nigel 3
608 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
609     {
610     unsigned long int magic;
611     uschar sbuf[8];
612     FILE *f;
613    
614     p++;
615     pp = p + (int)strlen((char *)p);
616     while (isspace(pp[-1])) pp--;
617     *pp = 0;
618    
619     f = fopen((char *)p, "rb");
620     if (f == NULL)
621     {
622     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
623     continue;
624     }
625    
626     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
627    
628     true_size =
629     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
630     true_study_size =
631     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
632    
633     re = (real_pcre *)new_malloc(true_size);
634     regex_gotten_store = gotten_store;
635    
636     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
637    
638     magic = ((real_pcre *)re)->magic_number;
639     if (magic != MAGIC_NUMBER)
640     {
641     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
642     {
643     do_flip = 1;
644     }
645     else
646     {
647     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
648     fclose(f);
649     continue;
650     }
651     }
652    
653     fprintf(outfile, "Compiled regex%s loaded from %s\n",
654     do_flip? " (byte-inverted)" : "", p);
655    
656     /* Need to know if UTF-8 for printing data strings */
657    
658     new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
659     use_utf8 = (options & PCRE_UTF8) != 0;
660    
661     /* Now see if there is any following study data */
662    
663     if (true_study_size != 0)
664     {
665     pcre_study_data *psd;
666    
667     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
668     extra->flags = PCRE_EXTRA_STUDY_DATA;
669    
670     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
671     extra->study_data = psd;
672    
673     if (fread(psd, 1, true_study_size, f) != true_study_size)
674     {
675     FAIL_READ:
676     fprintf(outfile, "Failed to read data from %s\n", p);
677     if (extra != NULL) new_free(extra);
678     if (re != NULL) new_free(re);
679     fclose(f);
680     continue;
681     }
682     fprintf(outfile, "Study data loaded from %s\n", p);
683     do_study = 1; /* To get the data output if requested */
684     }
685     else fprintf(outfile, "No study data\n");
686    
687     fclose(f);
688     goto SHOW_INFO;
689     }
690    
691     /* In-line pattern (the usual case). Get the delimiter and seek the end of
692     the pattern; if is isn't complete, read more. */
693    
694 nigel 3 delimiter = *p++;
695    
696 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
697 nigel 3 {
698 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
699 nigel 3 goto SKIP_DATA;
700     }
701    
702     pp = p;
703    
704     for(;;)
705     {
706 nigel 29 while (*pp != 0)
707     {
708     if (*pp == '\\' && pp[1] != 0) pp++;
709     else if (*pp == delimiter) break;
710     pp++;
711     }
712 nigel 3 if (*pp != 0) break;
713    
714 nigel 69 len = BUFFER_SIZE - (pp - buffer);
715 nigel 3 if (len < 256)
716     {
717     fprintf(outfile, "** Expression too long - missing delimiter?\n");
718     goto SKIP_DATA;
719     }
720    
721     if (infile == stdin) printf(" > ");
722     if (fgets((char *)pp, len, infile) == NULL)
723     {
724     fprintf(outfile, "** Unexpected EOF\n");
725 nigel 11 done = 1;
726     goto CONTINUE;
727 nigel 3 }
728 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
729 nigel 3 }
730    
731 nigel 29 /* If the first character after the delimiter is backslash, make
732     the pattern end with backslash. This is purely to provide a way
733     of testing for the error message when a pattern ends with backslash. */
734    
735     if (pp[1] == '\\') *pp++ = '\\';
736    
737 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
738     for callouts. */
739 nigel 3
740     *pp++ = 0;
741 nigel 75 strcpy((char *)pbuffer, (char *)p);
742 nigel 3
743     /* Look for options after final delimiter */
744    
745     options = 0;
746     study_options = 0;
747 nigel 31 log_store = showstore; /* default from command line */
748    
749 nigel 3 while (*pp != 0)
750     {
751     switch (*pp++)
752     {
753 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
754 nigel 35 case 'g': do_g = 1; break;
755 nigel 3 case 'i': options |= PCRE_CASELESS; break;
756     case 'm': options |= PCRE_MULTILINE; break;
757     case 's': options |= PCRE_DOTALL; break;
758     case 'x': options |= PCRE_EXTENDED; break;
759 nigel 25
760 nigel 35 case '+': do_showrest = 1; break;
761 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
762 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
763 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
764 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
765 nigel 75 case 'F': do_flip = 1; break;
766 nigel 35 case 'G': do_G = 1; break;
767 nigel 25 case 'I': do_showinfo = 1; break;
768 nigel 31 case 'M': log_store = 1; break;
769 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
770 nigel 37
771     #if !defined NOPOSIX
772 nigel 3 case 'P': do_posix = 1; break;
773 nigel 37 #endif
774    
775 nigel 3 case 'S': do_study = 1; break;
776 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
777 nigel 3 case 'X': options |= PCRE_EXTRA; break;
778 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
779 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
780 nigel 25
781     case 'L':
782     ppp = pp;
783 nigel 77 /* The '\r' test here is so that it works on Windows */
784     while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
785 nigel 25 *ppp = 0;
786     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
787     {
788     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
789     goto SKIP_DATA;
790     }
791     tables = pcre_maketables();
792     pp = ppp;
793     break;
794    
795 nigel 75 case '>':
796     to_file = pp;
797     while (*pp != 0) pp++;
798     while (isspace(pp[-1])) pp--;
799     *pp = 0;
800     break;
801    
802 nigel 77 case '\r': /* So that it works in Windows */
803     case '\n':
804     case ' ':
805     break;
806 nigel 75
807 nigel 3 default:
808     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
809     goto SKIP_DATA;
810     }
811     }
812    
813 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
814 nigel 25 timing, showing, or debugging options, nor the ability to pass over
815     local character tables. */
816 nigel 3
817 nigel 37 #if !defined NOPOSIX
818 nigel 3 if (posix || do_posix)
819     {
820     int rc;
821     int cflags = 0;
822 nigel 75
823 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
824     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
825 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
826 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
827    
828     /* Compilation failed; go back for another re, skipping to blank line
829     if non-interactive. */
830    
831     if (rc != 0)
832     {
833 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
834 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
835     goto SKIP_DATA;
836     }
837     }
838    
839     /* Handle compiling via the native interface */
840    
841     else
842 nigel 37 #endif /* !defined NOPOSIX */
843    
844 nigel 3 {
845     if (timeit)
846     {
847     register int i;
848     clock_t time_taken;
849     clock_t start_time = clock();
850 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
851 nigel 3 {
852 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
853 nigel 3 if (re != NULL) free(re);
854     }
855     time_taken = clock() - start_time;
856 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
857 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
858     (double)CLOCKS_PER_SEC);
859 nigel 3 }
860    
861 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
862 nigel 3
863     /* Compilation failed; go back for another re, skipping to blank line
864     if non-interactive. */
865    
866     if (re == NULL)
867     {
868     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
869     SKIP_DATA:
870     if (infile != stdin)
871     {
872     for (;;)
873     {
874 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
875 nigel 11 {
876     done = 1;
877     goto CONTINUE;
878     }
879 nigel 3 len = (int)strlen((char *)buffer);
880     while (len > 0 && isspace(buffer[len-1])) len--;
881     if (len == 0) break;
882     }
883     fprintf(outfile, "\n");
884     }
885 nigel 25 goto CONTINUE;
886 nigel 3 }
887    
888 nigel 43 /* Compilation succeeded; print data if required. There are now two
889     info-returning functions. The old one has a limited interface and
890     returns only limited data. Check that it agrees with the newer one. */
891 nigel 3
892 nigel 63 if (log_store)
893     fprintf(outfile, "Memory allocation (code space): %d\n",
894     (int)(gotten_store -
895     sizeof(real_pcre) -
896     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
897    
898 nigel 75 /* Extract the size for possible writing before possibly flipping it,
899     and remember the store that was got. */
900    
901     true_size = ((real_pcre *)re)->size;
902     regex_gotten_store = gotten_store;
903    
904     /* If /S was present, study the regexp to generate additional info to
905     help with the matching. */
906    
907     if (do_study)
908     {
909     if (timeit)
910     {
911     register int i;
912     clock_t time_taken;
913     clock_t start_time = clock();
914     for (i = 0; i < LOOPREPEAT; i++)
915     extra = pcre_study(re, study_options, &error);
916     time_taken = clock() - start_time;
917     if (extra != NULL) free(extra);
918     fprintf(outfile, " Study time %.3f milliseconds\n",
919     (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
920     (double)CLOCKS_PER_SEC);
921     }
922     extra = pcre_study(re, study_options, &error);
923     if (error != NULL)
924     fprintf(outfile, "Failed to study: %s\n", error);
925     else if (extra != NULL)
926     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
927     }
928    
929     /* If the 'F' option was present, we flip the bytes of all the integer
930     fields in the regex data block and the study block. This is to make it
931     possible to test PCRE's handling of byte-flipped patterns, e.g. those
932     compiled on a different architecture. */
933    
934     if (do_flip)
935     {
936     real_pcre *rre = (real_pcre *)re;
937     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
938     rre->size = byteflip(rre->size, sizeof(rre->size));
939     rre->options = byteflip(rre->options, sizeof(rre->options));
940     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
941     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
942     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
943     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
944     rre->name_table_offset = byteflip(rre->name_table_offset,
945     sizeof(rre->name_table_offset));
946     rre->name_entry_size = byteflip(rre->name_entry_size,
947     sizeof(rre->name_entry_size));
948     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
949    
950     if (extra != NULL)
951     {
952     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
953     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
954     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
955     }
956     }
957    
958     /* Extract information from the compiled data if required */
959    
960     SHOW_INFO:
961    
962 nigel 25 if (do_showinfo)
963 nigel 3 {
964 nigel 75 unsigned long int get_options, all_options;
965 nigel 43 int old_first_char, old_options, old_count;
966     int count, backrefmax, first_char, need_char;
967 nigel 63 int nameentrysize, namecount;
968     const uschar *nametable;
969 nigel 3
970 nigel 63 if (do_debug)
971     {
972     fprintf(outfile, "------------------------------------------------------------------\n");
973 nigel 77 _pcre_printint(re, outfile);
974 nigel 63 }
975 nigel 3
976 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
977 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
978     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
979     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
980 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
981 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
982 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
983     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
984 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
985 nigel 43
986     old_count = pcre_info(re, &old_options, &old_first_char);
987 nigel 3 if (count < 0) fprintf(outfile,
988 nigel 43 "Error %d from pcre_info()\n", count);
989 nigel 3 else
990     {
991 nigel 43 if (old_count != count) fprintf(outfile,
992     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
993     old_count);
994 nigel 37
995 nigel 43 if (old_first_char != first_char) fprintf(outfile,
996     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
997     first_char, old_first_char);
998 nigel 37
999 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1000     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1001     get_options, old_options);
1002 nigel 43 }
1003    
1004 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1005 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1006 nigel 77 (int)size, (int)regex_gotten_store);
1007 nigel 43
1008     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1009     if (backrefmax > 0)
1010     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1011 nigel 63
1012     if (namecount > 0)
1013     {
1014     fprintf(outfile, "Named capturing subpatterns:\n");
1015     while (namecount-- > 0)
1016     {
1017     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1018     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1019     GET2(nametable, 0));
1020     nametable += nameentrysize;
1021     }
1022     }
1023    
1024 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1025     to fish it out via out back door */
1026    
1027     all_options = ((real_pcre *)re)->options;
1028     if (do_flip)
1029     {
1030     all_options = byteflip(all_options, sizeof(all_options));
1031     }
1032    
1033     if ((all_options & PCRE_NOPARTIAL) != 0)
1034     fprintf(outfile, "Partial matching not supported\n");
1035    
1036 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1037 nigel 77 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1038 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1039     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1040     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1041     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1042 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1043 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1044     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1045     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1046     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1047 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1048     ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1049 nigel 43
1050     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1051     fprintf(outfile, "Case state changes\n");
1052    
1053     if (first_char == -1)
1054     {
1055     fprintf(outfile, "First char at start or follows \\n\n");
1056     }
1057     else if (first_char < 0)
1058     {
1059     fprintf(outfile, "No first char\n");
1060     }
1061     else
1062     {
1063 nigel 63 int ch = first_char & 255;
1064 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1065 nigel 63 "" : " (caseless)";
1066     if (isprint(ch))
1067     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1068 nigel 3 else
1069 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1070 nigel 43 }
1071 nigel 37
1072 nigel 43 if (need_char < 0)
1073     {
1074     fprintf(outfile, "No need char\n");
1075 nigel 3 }
1076 nigel 43 else
1077     {
1078 nigel 63 int ch = need_char & 255;
1079 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1080 nigel 63 "" : " (caseless)";
1081     if (isprint(ch))
1082     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1083 nigel 43 else
1084 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1085 nigel 43 }
1086 nigel 75
1087     /* Don't output study size; at present it is in any case a fixed
1088     value, but it varies, depending on the computer architecture, and
1089     so messes up the test suite. (And with the /F option, it might be
1090     flipped.) */
1091    
1092     if (do_study)
1093     {
1094     if (extra == NULL)
1095     fprintf(outfile, "Study returned NULL\n");
1096     else
1097     {
1098     uschar *start_bits = NULL;
1099     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1100    
1101     if (start_bits == NULL)
1102     fprintf(outfile, "No starting byte set\n");
1103     else
1104     {
1105     int i;
1106     int c = 24;
1107     fprintf(outfile, "Starting byte set: ");
1108     for (i = 0; i < 256; i++)
1109     {
1110     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1111     {
1112     if (c > 75)
1113     {
1114     fprintf(outfile, "\n ");
1115     c = 2;
1116     }
1117     if (isprint(i) && i != ' ')
1118     {
1119     fprintf(outfile, "%c ", i);
1120     c += 2;
1121     }
1122     else
1123     {
1124     fprintf(outfile, "\\x%02x ", i);
1125     c += 5;
1126     }
1127     }
1128     }
1129     fprintf(outfile, "\n");
1130     }
1131     }
1132     }
1133 nigel 3 }
1134    
1135 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1136     that is all. The first 8 bytes of the file are the regex length and then
1137     the study length, in big-endian order. */
1138 nigel 3
1139 nigel 75 if (to_file != NULL)
1140 nigel 3 {
1141 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1142     if (f == NULL)
1143 nigel 3 {
1144 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1145 nigel 3 }
1146 nigel 75 else
1147     {
1148     uschar sbuf[8];
1149     sbuf[0] = (true_size >> 24) & 255;
1150     sbuf[1] = (true_size >> 16) & 255;
1151     sbuf[2] = (true_size >> 8) & 255;
1152     sbuf[3] = (true_size) & 255;
1153 nigel 3
1154 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1155     sbuf[5] = (true_study_size >> 16) & 255;
1156     sbuf[6] = (true_study_size >> 8) & 255;
1157     sbuf[7] = (true_study_size) & 255;
1158 nigel 3
1159 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1160     fwrite(re, 1, true_size, f) < true_size)
1161     {
1162     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1163     }
1164 nigel 3 else
1165     {
1166 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1167     if (extra != NULL)
1168 nigel 3 {
1169 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1170     true_study_size)
1171 nigel 3 {
1172 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1173     strerror(errno));
1174 nigel 3 }
1175 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1176 nigel 3 }
1177     }
1178 nigel 75 fclose(f);
1179 nigel 3 }
1180 nigel 77
1181     new_free(re);
1182     if (extra != NULL) new_free(extra);
1183     if (tables != NULL) new_free((void *)tables);
1184 nigel 75 continue; /* With next regex */
1185 nigel 3 }
1186 nigel 75 } /* End of non-POSIX compile */
1187 nigel 3
1188     /* Read data lines and test them */
1189    
1190     for (;;)
1191     {
1192 nigel 9 unsigned char *q;
1193 nigel 35 unsigned char *bptr = dbuffer;
1194 nigel 57 int *use_offsets = offsets;
1195 nigel 53 int use_size_offsets = size_offsets;
1196 nigel 63 int callout_data = 0;
1197     int callout_data_set = 0;
1198 nigel 3 int count, c;
1199 nigel 29 int copystrings = 0;
1200 nigel 63 int find_match_limit = 0;
1201 nigel 29 int getstrings = 0;
1202     int getlist = 0;
1203 nigel 39 int gmatched = 0;
1204 nigel 35 int start_offset = 0;
1205 nigel 41 int g_notempty = 0;
1206 nigel 77 int use_dfa = 0;
1207 nigel 3
1208     options = 0;
1209    
1210 nigel 63 pcre_callout = callout;
1211     first_callout = 1;
1212     callout_extra = 0;
1213     callout_count = 0;
1214     callout_fail_count = 999999;
1215     callout_fail_id = -1;
1216 nigel 73 show_malloc = 0;
1217 nigel 63
1218 nigel 35 if (infile == stdin) printf("data> ");
1219 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1220 nigel 11 {
1221     done = 1;
1222     goto CONTINUE;
1223     }
1224 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1225 nigel 3
1226     len = (int)strlen((char *)buffer);
1227     while (len > 0 && isspace(buffer[len-1])) len--;
1228     buffer[len] = 0;
1229     if (len == 0) break;
1230    
1231     p = buffer;
1232     while (isspace(*p)) p++;
1233    
1234 nigel 9 q = dbuffer;
1235 nigel 3 while ((c = *p++) != 0)
1236     {
1237     int i = 0;
1238     int n = 0;
1239 nigel 63
1240 nigel 3 if (c == '\\') switch ((c = *p++))
1241     {
1242     case 'a': c = 7; break;
1243     case 'b': c = '\b'; break;
1244     case 'e': c = 27; break;
1245     case 'f': c = '\f'; break;
1246     case 'n': c = '\n'; break;
1247     case 'r': c = '\r'; break;
1248     case 't': c = '\t'; break;
1249     case 'v': c = '\v'; break;
1250    
1251     case '0': case '1': case '2': case '3':
1252     case '4': case '5': case '6': case '7':
1253     c -= '0';
1254     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1255     c = c * 8 + *p++ - '0';
1256     break;
1257    
1258     case 'x':
1259 nigel 49
1260     /* Handle \x{..} specially - new Perl thing for utf8 */
1261    
1262     if (*p == '{')
1263     {
1264     unsigned char *pt = p;
1265     c = 0;
1266     while (isxdigit(*(++pt)))
1267     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1268     if (*pt == '}')
1269     {
1270 nigel 67 unsigned char buff8[8];
1271 nigel 49 int ii, utn;
1272 nigel 77 utn = _pcre_ord2utf8(c, buff8);
1273 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1274     c = buff8[ii]; /* Last byte */
1275 nigel 49 p = pt + 1;
1276     break;
1277     }
1278     /* Not correct form; fall through */
1279     }
1280    
1281     /* Ordinary \x */
1282    
1283 nigel 3 c = 0;
1284     while (i++ < 2 && isxdigit(*p))
1285     {
1286     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1287     p++;
1288     }
1289     break;
1290    
1291 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1292 nigel 3 p--;
1293     continue;
1294    
1295 nigel 75 case '>':
1296     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1297     continue;
1298    
1299 nigel 3 case 'A': /* Option setting */
1300     options |= PCRE_ANCHORED;
1301     continue;
1302    
1303     case 'B':
1304     options |= PCRE_NOTBOL;
1305     continue;
1306    
1307 nigel 29 case 'C':
1308 nigel 63 if (isdigit(*p)) /* Set copy string */
1309     {
1310     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1311     copystrings |= 1 << n;
1312     }
1313     else if (isalnum(*p))
1314     {
1315     uschar name[256];
1316 nigel 67 uschar *npp = name;
1317     while (isalnum(*p)) *npp++ = *p++;
1318     *npp = 0;
1319 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1320 nigel 63 if (n < 0)
1321     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1322     else copystrings |= 1 << n;
1323     }
1324     else if (*p == '+')
1325     {
1326     callout_extra = 1;
1327     p++;
1328     }
1329     else if (*p == '-')
1330     {
1331     pcre_callout = NULL;
1332     p++;
1333     }
1334     else if (*p == '!')
1335     {
1336     callout_fail_id = 0;
1337     p++;
1338     while(isdigit(*p))
1339     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1340     callout_fail_count = 0;
1341     if (*p == '!')
1342     {
1343     p++;
1344     while(isdigit(*p))
1345     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1346     }
1347     }
1348     else if (*p == '*')
1349     {
1350     int sign = 1;
1351     callout_data = 0;
1352     if (*(++p) == '-') { sign = -1; p++; }
1353     while(isdigit(*p))
1354     callout_data = callout_data * 10 + *p++ - '0';
1355     callout_data *= sign;
1356     callout_data_set = 1;
1357     }
1358 nigel 29 continue;
1359    
1360 nigel 77 case 'D':
1361     if (posix || do_posix)
1362     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1363     else
1364     use_dfa = 1;
1365     continue;
1366    
1367     case 'F':
1368     options |= PCRE_DFA_SHORTEST;
1369     continue;
1370    
1371 nigel 29 case 'G':
1372 nigel 63 if (isdigit(*p))
1373     {
1374     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1375     getstrings |= 1 << n;
1376     }
1377     else if (isalnum(*p))
1378     {
1379     uschar name[256];
1380 nigel 67 uschar *npp = name;
1381     while (isalnum(*p)) *npp++ = *p++;
1382     *npp = 0;
1383 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1384 nigel 63 if (n < 0)
1385     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1386     else getstrings |= 1 << n;
1387     }
1388 nigel 29 continue;
1389    
1390     case 'L':
1391     getlist = 1;
1392     continue;
1393    
1394 nigel 63 case 'M':
1395     find_match_limit = 1;
1396     continue;
1397    
1398 nigel 37 case 'N':
1399     options |= PCRE_NOTEMPTY;
1400     continue;
1401    
1402 nigel 3 case 'O':
1403     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404 nigel 53 if (n > size_offsets_max)
1405     {
1406     size_offsets_max = n;
1407 nigel 57 free(offsets);
1408 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409 nigel 53 if (offsets == NULL)
1410     {
1411     printf("** Failed to get %d bytes of memory for offsets vector\n",
1412     size_offsets_max * sizeof(int));
1413 nigel 77 yield = 1;
1414     goto EXIT;
1415 nigel 53 }
1416     }
1417     use_size_offsets = n;
1418 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1419 nigel 3 continue;
1420    
1421 nigel 75 case 'P':
1422     options |= PCRE_PARTIAL;
1423     continue;
1424    
1425 nigel 77 case 'R':
1426     options |= PCRE_DFA_RESTART;
1427     continue;
1428    
1429 nigel 73 case 'S':
1430     show_malloc = 1;
1431     continue;
1432    
1433 nigel 3 case 'Z':
1434     options |= PCRE_NOTEOL;
1435     continue;
1436 nigel 71
1437     case '?':
1438     options |= PCRE_NO_UTF8_CHECK;
1439     continue;
1440 nigel 3 }
1441 nigel 9 *q++ = c;
1442 nigel 3 }
1443 nigel 9 *q = 0;
1444     len = q - dbuffer;
1445 nigel 3
1446 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1447     {
1448     printf("**Match limit not relevant for DFA matching: ignored\n");
1449     find_match_limit = 0;
1450     }
1451    
1452 nigel 3 /* Handle matching via the POSIX interface, which does not
1453 nigel 63 support timing or playing with the match limit or callout data. */
1454 nigel 3
1455 nigel 37 #if !defined NOPOSIX
1456 nigel 3 if (posix || do_posix)
1457     {
1458     int rc;
1459     int eflags = 0;
1460 nigel 63 regmatch_t *pmatch = NULL;
1461     if (use_size_offsets > 0)
1462 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1463 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1464     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1465    
1466 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1467 nigel 3
1468     if (rc != 0)
1469     {
1470 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1471 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1472     }
1473     else
1474     {
1475 nigel 7 size_t i;
1476 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1477 nigel 3 {
1478     if (pmatch[i].rm_so >= 0)
1479     {
1480 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1481 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1482     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1483 nigel 3 fprintf(outfile, "\n");
1484 nigel 35 if (i == 0 && do_showrest)
1485     {
1486     fprintf(outfile, " 0+ ");
1487 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1488     outfile);
1489 nigel 35 fprintf(outfile, "\n");
1490     }
1491 nigel 3 }
1492     }
1493     }
1494 nigel 53 free(pmatch);
1495 nigel 3 }
1496    
1497 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1498 nigel 3
1499 nigel 37 else
1500     #endif /* !defined NOPOSIX */
1501    
1502 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1503 nigel 3 {
1504     if (timeit)
1505     {
1506     register int i;
1507     clock_t time_taken;
1508     clock_t start_time = clock();
1509 nigel 77
1510     if (all_use_dfa || use_dfa)
1511     {
1512     int workspace[1000];
1513     for (i = 0; i < LOOPREPEAT; i++)
1514     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1515     options | g_notempty, use_offsets, use_size_offsets, workspace,
1516     sizeof(workspace)/sizeof(int));
1517     }
1518     else
1519    
1520 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1521 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1522 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1523 nigel 77
1524 nigel 3 time_taken = clock() - start_time;
1525 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1526 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1527     (double)CLOCKS_PER_SEC);
1528 nigel 3 }
1529    
1530 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1531     varying limits in order to find the minimum value. */
1532    
1533     if (find_match_limit)
1534     {
1535     int min = 0;
1536     int mid = 64;
1537     int max = -1;
1538    
1539     if (extra == NULL)
1540     {
1541 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1542 nigel 63 extra->flags = 0;
1543     }
1544     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1545    
1546     for (;;)
1547     {
1548     extra->match_limit = mid;
1549     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1550     options | g_notempty, use_offsets, use_size_offsets);
1551     if (count == PCRE_ERROR_MATCHLIMIT)
1552     {
1553     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1554     min = mid;
1555     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1556     }
1557 nigel 75 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1558     count == PCRE_ERROR_PARTIAL)
1559 nigel 63 {
1560     if (mid == min + 1)
1561     {
1562     fprintf(outfile, "Minimum match limit = %d\n", mid);
1563     break;
1564     }
1565     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1566     max = mid;
1567     mid = (min + mid)/2;
1568     }
1569     else break; /* Some other error */
1570     }
1571    
1572     extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1573     }
1574    
1575     /* If callout_data is set, use the interface with additional data */
1576    
1577     else if (callout_data_set)
1578     {
1579     if (extra == NULL)
1580     {
1581 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1582 nigel 63 extra->flags = 0;
1583     }
1584     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1585 nigel 71 extra->callout_data = &callout_data;
1586 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1587     options | g_notempty, use_offsets, use_size_offsets);
1588     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1589     }
1590    
1591     /* The normal case is just to do the match once, with the default
1592     value of match_limit. */
1593    
1594 nigel 77 else if (all_use_dfa || use_dfa)
1595     {
1596     int workspace[1000];
1597     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1598     options | g_notempty, use_offsets, use_size_offsets, workspace,
1599     sizeof(workspace)/sizeof(int));
1600     if (count == 0)
1601     {
1602     fprintf(outfile, "Matched, but too many subsidiary matches\n");
1603     count = use_size_offsets/2;
1604     }
1605     }
1606    
1607 nigel 75 else
1608     {
1609     count = pcre_exec(re, extra, (char *)bptr, len,
1610     start_offset, options | g_notempty, use_offsets, use_size_offsets);
1611 nigel 77 if (count == 0)
1612     {
1613     fprintf(outfile, "Matched, but too many substrings\n");
1614     count = use_size_offsets/3;
1615     }
1616 nigel 75 }
1617 nigel 3
1618 nigel 39 /* Matched */
1619    
1620 nigel 3 if (count >= 0)
1621     {
1622     int i;
1623 nigel 29 for (i = 0; i < count * 2; i += 2)
1624 nigel 3 {
1625 nigel 57 if (use_offsets[i] < 0)
1626 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1627     else
1628     {
1629     fprintf(outfile, "%2d: ", i/2);
1630 nigel 63 (void)pchars(bptr + use_offsets[i],
1631     use_offsets[i+1] - use_offsets[i], outfile);
1632 nigel 3 fprintf(outfile, "\n");
1633 nigel 35 if (i == 0)
1634     {
1635     if (do_showrest)
1636     {
1637     fprintf(outfile, " 0+ ");
1638 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1639     outfile);
1640 nigel 35 fprintf(outfile, "\n");
1641     }
1642     }
1643 nigel 3 }
1644     }
1645 nigel 29
1646     for (i = 0; i < 32; i++)
1647     {
1648     if ((copystrings & (1 << i)) != 0)
1649     {
1650 nigel 37 char copybuffer[16];
1651 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1652 nigel 37 i, copybuffer, sizeof(copybuffer));
1653 nigel 29 if (rc < 0)
1654     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1655     else
1656 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1657 nigel 29 }
1658     }
1659    
1660     for (i = 0; i < 32; i++)
1661     {
1662     if ((getstrings & (1 << i)) != 0)
1663     {
1664     const char *substring;
1665 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1666 nigel 29 i, &substring);
1667     if (rc < 0)
1668     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1669     else
1670     {
1671     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1672 nigel 49 /* free((void *)substring); */
1673     pcre_free_substring(substring);
1674 nigel 29 }
1675     }
1676     }
1677    
1678     if (getlist)
1679     {
1680     const char **stringlist;
1681 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1682 nigel 29 &stringlist);
1683     if (rc < 0)
1684     fprintf(outfile, "get substring list failed %d\n", rc);
1685     else
1686     {
1687     for (i = 0; i < count; i++)
1688     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1689     if (stringlist[i] != NULL)
1690     fprintf(outfile, "string list not terminated by NULL\n");
1691 nigel 49 /* free((void *)stringlist); */
1692     pcre_free_substring_list(stringlist);
1693 nigel 29 }
1694     }
1695 nigel 39 }
1696 nigel 29
1697 nigel 75 /* There was a partial match */
1698    
1699     else if (count == PCRE_ERROR_PARTIAL)
1700     {
1701 nigel 77 fprintf(outfile, "Partial match");
1702     if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1703     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1704     bptr + use_offsets[0]);
1705     fprintf(outfile, "\n");
1706 nigel 75 break; /* Out of the /g loop */
1707     }
1708    
1709 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1710 nigel 47 g_notempty after a null match, this is not necessarily the end.
1711 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
1712     matching, the advance must be one character, not one byte. Fudge the
1713     offset values to achieve this. We won't be at the end of the string -
1714     that was checked before setting g_notempty. */
1715 nigel 39
1716 nigel 3 else
1717     {
1718 nigel 41 if (g_notempty != 0)
1719 nigel 35 {
1720 nigel 73 int onechar = 1;
1721 nigel 57 use_offsets[0] = start_offset;
1722 nigel 73 if (use_utf8)
1723     {
1724     while (start_offset + onechar < len)
1725     {
1726     int tb = bptr[start_offset+onechar];
1727     if (tb <= 127) break;
1728     tb &= 0xc0;
1729     if (tb != 0 && tb != 0xc0) onechar++;
1730     }
1731     }
1732     use_offsets[1] = start_offset + onechar;
1733 nigel 35 }
1734 nigel 41 else
1735     {
1736 nigel 73 if (count == PCRE_ERROR_NOMATCH)
1737 nigel 41 {
1738 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
1739 nigel 41 }
1740 nigel 73 else fprintf(outfile, "Error %d\n", count);
1741 nigel 41 break; /* Out of the /g loop */
1742     }
1743 nigel 3 }
1744 nigel 35
1745 nigel 39 /* If not /g or /G we are done */
1746    
1747     if (!do_g && !do_G) break;
1748    
1749 nigel 41 /* If we have matched an empty string, first check to see if we are at
1750     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1751     what Perl's /g options does. This turns out to be rather cunning. First
1752 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1753     same point. If this fails (picked up above) we advance to the next
1754     character. */
1755 nigel 39
1756 nigel 41 g_notempty = 0;
1757 nigel 57 if (use_offsets[0] == use_offsets[1])
1758 nigel 41 {
1759 nigel 57 if (use_offsets[0] == len) break;
1760 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1761 nigel 41 }
1762 nigel 39
1763     /* For /g, update the start offset, leaving the rest alone */
1764    
1765 nigel 57 if (do_g) start_offset = use_offsets[1];
1766 nigel 39
1767     /* For /G, update the pointer and length */
1768    
1769     else
1770 nigel 35 {
1771 nigel 57 bptr += use_offsets[1];
1772     len -= use_offsets[1];
1773 nigel 35 }
1774 nigel 39 } /* End of loop for /g and /G */
1775     } /* End of loop for data lines */
1776 nigel 3
1777 nigel 11 CONTINUE:
1778 nigel 37
1779     #if !defined NOPOSIX
1780 nigel 3 if (posix || do_posix) regfree(&preg);
1781 nigel 37 #endif
1782    
1783 nigel 77 if (re != NULL) new_free(re);
1784     if (extra != NULL) new_free(extra);
1785 nigel 25 if (tables != NULL)
1786     {
1787 nigel 77 new_free((void *)tables);
1788 nigel 25 setlocale(LC_CTYPE, "C");
1789     }
1790 nigel 3 }
1791    
1792 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
1793 nigel 77
1794     EXIT:
1795    
1796     if (infile != NULL && infile != stdin) fclose(infile);
1797     if (outfile != NULL && outfile != stdout) fclose(outfile);
1798    
1799     free(buffer);
1800     free(dbuffer);
1801     free(pbuffer);
1802     free(offsets);
1803    
1804     return yield;
1805 nigel 3 }
1806    
1807 nigel 77 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12