/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 79 - (hide annotations) (download)
Sat Feb 24 21:40:52 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 52916 byte(s)
Load pcre-6.1 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
48 nigel 37
49 nigel 77 /* We need the internal info for displaying the results of pcre_study() and
50     other internal data; pcretest also uses some of the fixed tables, and generally
51     has "inside information" compared to a program that strictly follows the PCRE
52     API. */
53    
54     #include "pcre_internal.h"
55    
56    
57 nigel 37 /* It is possible to compile this test program without including support for
58     testing the POSIX interface, though this is not available via the standard
59     Makefile. */
60    
61     #if !defined NOPOSIX
62 nigel 3 #include "pcreposix.h"
63 nigel 37 #endif
64 nigel 3
65 nigel 79 /* It is also possible, for the benefit of the version imported into Exim, to
66     build pcretest without support for UTF8 (define NOUTF8), without the interface
67     to the DFA matcher (NODFA), and without the doublecheck of the old "info"
68     function (define NOINFOCHECK). */
69    
70    
71 nigel 3 #ifndef CLOCKS_PER_SEC
72     #ifdef CLK_TCK
73     #define CLOCKS_PER_SEC CLK_TCK
74     #else
75     #define CLOCKS_PER_SEC 100
76     #endif
77     #endif
78    
79 nigel 75 #define LOOPREPEAT 500000
80 nigel 3
81 nigel 69 #define BUFFER_SIZE 30000
82 nigel 75 #define PBUFFER_SIZE BUFFER_SIZE
83 nigel 73 #define DBUFFER_SIZE BUFFER_SIZE
84 nigel 23
85 nigel 69
86 nigel 3 static FILE *outfile;
87     static int log_store = 0;
88 nigel 63 static int callout_count;
89     static int callout_extra;
90     static int callout_fail_count;
91     static int callout_fail_id;
92     static int first_callout;
93 nigel 73 static int show_malloc;
94 nigel 67 static int use_utf8;
95 nigel 43 static size_t gotten_store;
96 nigel 3
97 nigel 75 static uschar *pbuffer = NULL;
98 nigel 3
99 nigel 75
100 nigel 49
101     /*************************************************
102 nigel 63 * Read number from string *
103     *************************************************/
104    
105     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
106     around with conditional compilation, just do the job by hand. It is only used
107     for unpicking the -o argument, so just keep it simple.
108    
109     Arguments:
110     str string to be converted
111     endptr where to put the end pointer
112    
113     Returns: the unsigned long
114     */
115    
116     static int
117     get_value(unsigned char *str, unsigned char **endptr)
118     {
119     int result = 0;
120     while(*str != 0 && isspace(*str)) str++;
121     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
122     *endptr = str;
123     return(result);
124     }
125    
126    
127    
128 nigel 49
129     /*************************************************
130     * Convert UTF-8 string to value *
131     *************************************************/
132    
133     /* This function takes one or more bytes that represents a UTF-8 character,
134     and returns the value of the character.
135    
136     Argument:
137     buffer a pointer to the byte vector
138     vptr a pointer to an int to receive the value
139    
140     Returns: > 0 => the number of bytes consumed
141     -6 to 0 => malformed UTF-8 character at offset = (-return)
142     */
143    
144 nigel 79 #if !defined NOUTF8
145    
146 nigel 67 static int
147 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
148     {
149     int c = *buffer++;
150     int d = c;
151     int i, j, s;
152    
153     for (i = -1; i < 6; i++) /* i is number of additional bytes */
154     {
155     if ((d & 0x80) == 0) break;
156     d <<= 1;
157     }
158    
159     if (i == -1) { *vptr = c; return 1; } /* ascii character */
160     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
161    
162     /* i now has a value in the range 1-5 */
163    
164 nigel 59 s = 6*i;
165 nigel 77 d = (c & _pcre_utf8_table3[i]) << s;
166 nigel 49
167     for (j = 0; j < i; j++)
168     {
169     c = *buffer++;
170     if ((c & 0xc0) != 0x80) return -(j+1);
171 nigel 59 s -= 6;
172 nigel 49 d |= (c & 0x3f) << s;
173     }
174    
175     /* Check that encoding was the correct unique one */
176    
177 nigel 77 for (j = 0; j < _pcre_utf8_table1_size; j++)
178     if (d <= _pcre_utf8_table1[j]) break;
179 nigel 49 if (j != i) return -(i+1);
180    
181     /* Valid value */
182    
183     *vptr = d;
184     return i+1;
185     }
186    
187 nigel 79 #endif
188 nigel 49
189    
190 nigel 79
191 nigel 63 /*************************************************
192     * Print character string *
193     *************************************************/
194 nigel 49
195 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
196     mode. Yields number of characters printed. If handed a NULL file, just counts
197     chars without printing. */
198 nigel 49
199 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
200 nigel 3 {
201 nigel 63 int c;
202     int yield = 0;
203 nigel 3
204 nigel 63 while (length-- > 0)
205 nigel 3 {
206 nigel 79 #if !defined NOUTF8
207 nigel 67 if (use_utf8)
208 nigel 63 {
209     int rc = utf82ord(p, &c);
210 nigel 3
211 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
212     {
213     length -= rc - 1;
214     p += rc;
215     if (c < 256 && isprint(c))
216     {
217     if (f != NULL) fprintf(f, "%c", c);
218     yield++;
219     }
220     else
221     {
222     int n;
223     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
224     yield += n;
225     }
226     continue;
227     }
228     }
229 nigel 79 #endif
230 nigel 3
231 nigel 63 /* Not UTF-8, or malformed UTF-8 */
232    
233     if (isprint(c = *(p++)))
234 nigel 3 {
235 nigel 63 if (f != NULL) fprintf(f, "%c", c);
236     yield++;
237 nigel 3 }
238 nigel 63 else
239 nigel 3 {
240 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
241     yield += 4;
242     }
243     }
244 nigel 3
245 nigel 63 return yield;
246     }
247 nigel 23
248 nigel 3
249 nigel 23
250 nigel 63 /*************************************************
251     * Callout function *
252     *************************************************/
253 nigel 3
254 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
255     the match. Yield zero unless more callouts than the fail count, or the callout
256     data is not zero. */
257 nigel 3
258 nigel 63 static int callout(pcre_callout_block *cb)
259     {
260     FILE *f = (first_callout | callout_extra)? outfile : NULL;
261 nigel 75 int i, pre_start, post_start, subject_length;
262 nigel 3
263 nigel 63 if (callout_extra)
264     {
265     fprintf(f, "Callout %d: last capture = %d\n",
266     cb->callout_number, cb->capture_last);
267 nigel 3
268 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
269     {
270     if (cb->offset_vector[i] < 0)
271     fprintf(f, "%2d: <unset>\n", i/2);
272     else
273     {
274     fprintf(f, "%2d: ", i/2);
275     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
276     cb->offset_vector[i+1] - cb->offset_vector[i], f);
277     fprintf(f, "\n");
278     }
279     }
280     }
281 nigel 3
282 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
283     datails. On subsequent calls in the same match, we use pchars just to find the
284     printed lengths of the substrings. */
285 nigel 3
286 nigel 63 if (f != NULL) fprintf(f, "--->");
287 nigel 3
288 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
289     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
290     cb->current_position - cb->start_match, f);
291 nigel 3
292 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
293    
294 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
295     cb->subject_length - cb->current_position, f);
296 nigel 3
297 nigel 63 if (f != NULL) fprintf(f, "\n");
298 nigel 9
299 nigel 63 /* Always print appropriate indicators, with callout number if not already
300 nigel 75 shown. For automatic callouts, show the pattern offset. */
301 nigel 3
302 nigel 75 if (cb->callout_number == 255)
303     {
304     fprintf(outfile, "%+3d ", cb->pattern_position);
305     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
306     }
307     else
308     {
309     if (callout_extra) fprintf(outfile, " ");
310     else fprintf(outfile, "%3d ", cb->callout_number);
311     }
312 nigel 3
313 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314     fprintf(outfile, "^");
315 nigel 3
316 nigel 63 if (post_start > 0)
317     {
318     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
319     fprintf(outfile, "^");
320 nigel 3 }
321    
322 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
323     fprintf(outfile, " ");
324    
325     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
326     pbuffer + cb->pattern_position);
327    
328 nigel 63 fprintf(outfile, "\n");
329     first_callout = 0;
330 nigel 3
331 nigel 71 if (cb->callout_data != NULL)
332 nigel 49 {
333 nigel 71 int callout_data = *((int *)(cb->callout_data));
334     if (callout_data != 0)
335     {
336     fprintf(outfile, "Callout data = %d\n", callout_data);
337     return callout_data;
338     }
339 nigel 63 }
340 nigel 49
341 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
342     (++callout_count >= callout_fail_count)? 1 : 0;
343 nigel 3 }
344    
345    
346 nigel 63 /*************************************************
347 nigel 73 * Local malloc functions *
348 nigel 63 *************************************************/
349 nigel 3
350     /* Alternative malloc function, to test functionality and show the size of the
351     compiled re. */
352    
353     static void *new_malloc(size_t size)
354     {
355 nigel 73 void *block = malloc(size);
356 nigel 43 gotten_store = size;
357 nigel 73 if (show_malloc)
358 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
359 nigel 73 return block;
360 nigel 3 }
361    
362 nigel 73 static void new_free(void *block)
363     {
364     if (show_malloc)
365     fprintf(outfile, "free %p\n", block);
366     free(block);
367     }
368 nigel 3
369    
370 nigel 73 /* For recursion malloc/free, to test stacking calls */
371    
372     static void *stack_malloc(size_t size)
373     {
374     void *block = malloc(size);
375     if (show_malloc)
376 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
377 nigel 73 return block;
378     }
379    
380     static void stack_free(void *block)
381     {
382     if (show_malloc)
383     fprintf(outfile, "stack_free %p\n", block);
384     free(block);
385     }
386    
387    
388 nigel 63 /*************************************************
389     * Call pcre_fullinfo() *
390     *************************************************/
391 nigel 43
392     /* Get one piece of information from the pcre_fullinfo() function */
393    
394     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
395     {
396     int rc;
397     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
398     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
399     }
400    
401    
402    
403 nigel 63 /*************************************************
404 nigel 75 * Byte flipping function *
405     *************************************************/
406    
407     static long int
408     byteflip(long int value, int n)
409     {
410     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
411     return ((value & 0x000000ff) << 24) |
412     ((value & 0x0000ff00) << 8) |
413     ((value & 0x00ff0000) >> 8) |
414     ((value & 0xff000000) >> 24);
415     }
416    
417    
418    
419    
420     /*************************************************
421 nigel 63 * Main Program *
422     *************************************************/
423 nigel 43
424 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
425     consist of a regular expression, in delimiters and optionally followed by
426     options, followed by a set of test data, terminated by an empty line. */
427    
428     int main(int argc, char **argv)
429     {
430     FILE *infile = stdin;
431     int options = 0;
432     int study_options = 0;
433     int op = 1;
434     int timeit = 0;
435     int showinfo = 0;
436 nigel 31 int showstore = 0;
437 nigel 53 int size_offsets = 45;
438     int size_offsets_max;
439 nigel 77 int *offsets = NULL;
440 nigel 53 #if !defined NOPOSIX
441 nigel 3 int posix = 0;
442 nigel 53 #endif
443 nigel 3 int debug = 0;
444 nigel 11 int done = 0;
445 nigel 77 int all_use_dfa = 0;
446     int yield = 0;
447 nigel 3
448 nigel 69 unsigned char *buffer;
449     unsigned char *dbuffer;
450    
451     /* Get buffers from malloc() so that Electric Fence will check their misuse
452     when I am debugging. */
453    
454 nigel 71 buffer = (unsigned char *)malloc(BUFFER_SIZE);
455     dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
456 nigel 75 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
457 nigel 69
458 nigel 75 /* The outfile variable is static so that new_malloc can use it. The _setmode()
459     stuff is some magic that I don't understand, but which apparently does good
460     things in Windows. It's related to line terminations. */
461 nigel 3
462 nigel 75 #if defined(_WIN32) || defined(WIN32)
463     _setmode( _fileno( stdout ), 0x8000 );
464     #endif /* defined(_WIN32) || defined(WIN32) */
465    
466 nigel 3 outfile = stdout;
467    
468     /* Scan options */
469    
470     while (argc > 1 && argv[op][0] == '-')
471     {
472 nigel 63 unsigned char *endptr;
473 nigel 53
474 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
475     showstore = 1;
476 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
477     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
478     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
479 nigel 79 #if !defined NODFA
480 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
481 nigel 79 #endif
482 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
483 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
484     *endptr == 0))
485 nigel 53 {
486     op++;
487     argc--;
488     }
489     #if !defined NOPOSIX
490 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
491 nigel 53 #endif
492 nigel 63 else if (strcmp(argv[op], "-C") == 0)
493     {
494     int rc;
495     printf("PCRE version %s\n", pcre_version());
496     printf("Compiled with\n");
497     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
498     printf(" %sUTF-8 support\n", rc? "" : "No ");
499 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
500     printf(" %sUnicode properties support\n", rc? "" : "No ");
501 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
502     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
503     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
504     printf(" Internal link size = %d\n", rc);
505     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
506     printf(" POSIX malloc threshold = %d\n", rc);
507     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
508     printf(" Default match limit = %d\n", rc);
509 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
510     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
511 nigel 63 exit(0);
512     }
513 nigel 3 else
514     {
515 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
516     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
517 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
518 nigel 77 printf(" -d debug: show compiled code; implies -i\n");
519 nigel 79 #if !defined NODFA
520 nigel 77 printf(" -dfa force DFA matching for all subjects\n");
521 nigel 79 #endif
522 nigel 77 printf(" -i show information about compiled pattern\n"
523 nigel 75 " -m output memory used information\n"
524 nigel 53 " -o <n> set size of offsets vector to <n>\n");
525     #if !defined NOPOSIX
526     printf(" -p use POSIX interface\n");
527     #endif
528 nigel 75 printf(" -s output store (memory) used information\n"
529 nigel 53 " -t time compilation and execution\n");
530 nigel 77 yield = 1;
531     goto EXIT;
532 nigel 3 }
533     op++;
534     argc--;
535     }
536    
537 nigel 53 /* Get the store for the offsets vector, and remember what it was */
538    
539     size_offsets_max = size_offsets;
540 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
541 nigel 53 if (offsets == NULL)
542     {
543     printf("** Failed to get %d bytes of memory for offsets vector\n",
544     size_offsets_max * sizeof(int));
545 nigel 77 yield = 1;
546     goto EXIT;
547 nigel 53 }
548    
549 nigel 3 /* Sort out the input and output files */
550    
551     if (argc > 1)
552     {
553 nigel 75 infile = fopen(argv[op], "rb");
554 nigel 3 if (infile == NULL)
555     {
556     printf("** Failed to open %s\n", argv[op]);
557 nigel 77 yield = 1;
558     goto EXIT;
559 nigel 3 }
560     }
561    
562     if (argc > 2)
563     {
564 nigel 75 outfile = fopen(argv[op+1], "wb");
565 nigel 3 if (outfile == NULL)
566     {
567     printf("** Failed to open %s\n", argv[op+1]);
568 nigel 77 yield = 1;
569     goto EXIT;
570 nigel 3 }
571     }
572    
573     /* Set alternative malloc function */
574    
575     pcre_malloc = new_malloc;
576 nigel 73 pcre_free = new_free;
577     pcre_stack_malloc = stack_malloc;
578     pcre_stack_free = stack_free;
579 nigel 3
580 nigel 23 /* Heading line, then prompt for first regex if stdin */
581 nigel 3
582     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
583    
584     /* Main loop */
585    
586 nigel 11 while (!done)
587 nigel 3 {
588     pcre *re = NULL;
589     pcre_extra *extra = NULL;
590 nigel 37
591     #if !defined NOPOSIX /* There are still compilers that require no indent */
592 nigel 3 regex_t preg;
593 nigel 45 int do_posix = 0;
594 nigel 37 #endif
595    
596 nigel 7 const char *error;
597 nigel 25 unsigned char *p, *pp, *ppp;
598 nigel 75 unsigned char *to_file = NULL;
599 nigel 53 const unsigned char *tables = NULL;
600 nigel 75 unsigned long int true_size, true_study_size = 0;
601     size_t size, regex_gotten_store;
602 nigel 3 int do_study = 0;
603 nigel 25 int do_debug = debug;
604 nigel 35 int do_G = 0;
605     int do_g = 0;
606 nigel 25 int do_showinfo = showinfo;
607 nigel 35 int do_showrest = 0;
608 nigel 75 int do_flip = 0;
609 nigel 3 int erroroffset, len, delimiter;
610    
611 nigel 67 use_utf8 = 0;
612 nigel 63
613 nigel 3 if (infile == stdin) printf(" re> ");
614 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
615 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
616 nigel 63 fflush(outfile);
617 nigel 3
618     p = buffer;
619     while (isspace(*p)) p++;
620     if (*p == 0) continue;
621    
622 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
623 nigel 3
624 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
625     {
626     unsigned long int magic;
627     uschar sbuf[8];
628     FILE *f;
629    
630     p++;
631     pp = p + (int)strlen((char *)p);
632     while (isspace(pp[-1])) pp--;
633     *pp = 0;
634    
635     f = fopen((char *)p, "rb");
636     if (f == NULL)
637     {
638     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
639     continue;
640     }
641    
642     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
643    
644     true_size =
645     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
646     true_study_size =
647     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
648    
649     re = (real_pcre *)new_malloc(true_size);
650     regex_gotten_store = gotten_store;
651    
652     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
653    
654     magic = ((real_pcre *)re)->magic_number;
655     if (magic != MAGIC_NUMBER)
656     {
657     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
658     {
659     do_flip = 1;
660     }
661     else
662     {
663     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
664     fclose(f);
665     continue;
666     }
667     }
668    
669     fprintf(outfile, "Compiled regex%s loaded from %s\n",
670     do_flip? " (byte-inverted)" : "", p);
671    
672     /* Need to know if UTF-8 for printing data strings */
673    
674     new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
675     use_utf8 = (options & PCRE_UTF8) != 0;
676    
677     /* Now see if there is any following study data */
678    
679     if (true_study_size != 0)
680     {
681     pcre_study_data *psd;
682    
683     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
684     extra->flags = PCRE_EXTRA_STUDY_DATA;
685    
686     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
687     extra->study_data = psd;
688    
689     if (fread(psd, 1, true_study_size, f) != true_study_size)
690     {
691     FAIL_READ:
692     fprintf(outfile, "Failed to read data from %s\n", p);
693     if (extra != NULL) new_free(extra);
694     if (re != NULL) new_free(re);
695     fclose(f);
696     continue;
697     }
698     fprintf(outfile, "Study data loaded from %s\n", p);
699     do_study = 1; /* To get the data output if requested */
700     }
701     else fprintf(outfile, "No study data\n");
702    
703     fclose(f);
704     goto SHOW_INFO;
705     }
706    
707     /* In-line pattern (the usual case). Get the delimiter and seek the end of
708     the pattern; if is isn't complete, read more. */
709    
710 nigel 3 delimiter = *p++;
711    
712 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
713 nigel 3 {
714 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
715 nigel 3 goto SKIP_DATA;
716     }
717    
718     pp = p;
719    
720     for(;;)
721     {
722 nigel 29 while (*pp != 0)
723     {
724     if (*pp == '\\' && pp[1] != 0) pp++;
725     else if (*pp == delimiter) break;
726     pp++;
727     }
728 nigel 3 if (*pp != 0) break;
729    
730 nigel 69 len = BUFFER_SIZE - (pp - buffer);
731 nigel 3 if (len < 256)
732     {
733     fprintf(outfile, "** Expression too long - missing delimiter?\n");
734     goto SKIP_DATA;
735     }
736    
737     if (infile == stdin) printf(" > ");
738     if (fgets((char *)pp, len, infile) == NULL)
739     {
740     fprintf(outfile, "** Unexpected EOF\n");
741 nigel 11 done = 1;
742     goto CONTINUE;
743 nigel 3 }
744 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
745 nigel 3 }
746    
747 nigel 29 /* If the first character after the delimiter is backslash, make
748     the pattern end with backslash. This is purely to provide a way
749     of testing for the error message when a pattern ends with backslash. */
750    
751     if (pp[1] == '\\') *pp++ = '\\';
752    
753 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
754     for callouts. */
755 nigel 3
756     *pp++ = 0;
757 nigel 75 strcpy((char *)pbuffer, (char *)p);
758 nigel 3
759     /* Look for options after final delimiter */
760    
761     options = 0;
762     study_options = 0;
763 nigel 31 log_store = showstore; /* default from command line */
764    
765 nigel 3 while (*pp != 0)
766     {
767     switch (*pp++)
768     {
769 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
770 nigel 35 case 'g': do_g = 1; break;
771 nigel 3 case 'i': options |= PCRE_CASELESS; break;
772     case 'm': options |= PCRE_MULTILINE; break;
773     case 's': options |= PCRE_DOTALL; break;
774     case 'x': options |= PCRE_EXTENDED; break;
775 nigel 25
776 nigel 35 case '+': do_showrest = 1; break;
777 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
778 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
779 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
780 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
781 nigel 75 case 'F': do_flip = 1; break;
782 nigel 35 case 'G': do_G = 1; break;
783 nigel 25 case 'I': do_showinfo = 1; break;
784 nigel 31 case 'M': log_store = 1; break;
785 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
786 nigel 37
787     #if !defined NOPOSIX
788 nigel 3 case 'P': do_posix = 1; break;
789 nigel 37 #endif
790    
791 nigel 3 case 'S': do_study = 1; break;
792 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
793 nigel 3 case 'X': options |= PCRE_EXTRA; break;
794 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
795 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
796 nigel 25
797     case 'L':
798     ppp = pp;
799 nigel 77 /* The '\r' test here is so that it works on Windows */
800     while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
801 nigel 25 *ppp = 0;
802     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
803     {
804     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
805     goto SKIP_DATA;
806     }
807     tables = pcre_maketables();
808     pp = ppp;
809     break;
810    
811 nigel 75 case '>':
812     to_file = pp;
813     while (*pp != 0) pp++;
814     while (isspace(pp[-1])) pp--;
815     *pp = 0;
816     break;
817    
818 nigel 77 case '\r': /* So that it works in Windows */
819     case '\n':
820     case ' ':
821     break;
822 nigel 75
823 nigel 3 default:
824     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
825     goto SKIP_DATA;
826     }
827     }
828    
829 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
830 nigel 25 timing, showing, or debugging options, nor the ability to pass over
831     local character tables. */
832 nigel 3
833 nigel 37 #if !defined NOPOSIX
834 nigel 3 if (posix || do_posix)
835     {
836     int rc;
837     int cflags = 0;
838 nigel 75
839 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
840     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
841 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
842 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
843    
844     /* Compilation failed; go back for another re, skipping to blank line
845     if non-interactive. */
846    
847     if (rc != 0)
848     {
849 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
850 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
851     goto SKIP_DATA;
852     }
853     }
854    
855     /* Handle compiling via the native interface */
856    
857     else
858 nigel 37 #endif /* !defined NOPOSIX */
859    
860 nigel 3 {
861     if (timeit)
862     {
863     register int i;
864     clock_t time_taken;
865     clock_t start_time = clock();
866 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
867 nigel 3 {
868 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
869 nigel 3 if (re != NULL) free(re);
870     }
871     time_taken = clock() - start_time;
872 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
873 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
874     (double)CLOCKS_PER_SEC);
875 nigel 3 }
876    
877 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
878 nigel 3
879     /* Compilation failed; go back for another re, skipping to blank line
880     if non-interactive. */
881    
882     if (re == NULL)
883     {
884     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
885     SKIP_DATA:
886     if (infile != stdin)
887     {
888     for (;;)
889     {
890 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
891 nigel 11 {
892     done = 1;
893     goto CONTINUE;
894     }
895 nigel 3 len = (int)strlen((char *)buffer);
896     while (len > 0 && isspace(buffer[len-1])) len--;
897     if (len == 0) break;
898     }
899     fprintf(outfile, "\n");
900     }
901 nigel 25 goto CONTINUE;
902 nigel 3 }
903    
904 nigel 43 /* Compilation succeeded; print data if required. There are now two
905     info-returning functions. The old one has a limited interface and
906     returns only limited data. Check that it agrees with the newer one. */
907 nigel 3
908 nigel 63 if (log_store)
909     fprintf(outfile, "Memory allocation (code space): %d\n",
910     (int)(gotten_store -
911     sizeof(real_pcre) -
912     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
913    
914 nigel 75 /* Extract the size for possible writing before possibly flipping it,
915     and remember the store that was got. */
916    
917     true_size = ((real_pcre *)re)->size;
918     regex_gotten_store = gotten_store;
919    
920     /* If /S was present, study the regexp to generate additional info to
921     help with the matching. */
922    
923     if (do_study)
924     {
925     if (timeit)
926     {
927     register int i;
928     clock_t time_taken;
929     clock_t start_time = clock();
930     for (i = 0; i < LOOPREPEAT; i++)
931     extra = pcre_study(re, study_options, &error);
932     time_taken = clock() - start_time;
933     if (extra != NULL) free(extra);
934     fprintf(outfile, " Study time %.3f milliseconds\n",
935     (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
936     (double)CLOCKS_PER_SEC);
937     }
938     extra = pcre_study(re, study_options, &error);
939     if (error != NULL)
940     fprintf(outfile, "Failed to study: %s\n", error);
941     else if (extra != NULL)
942     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
943     }
944    
945     /* If the 'F' option was present, we flip the bytes of all the integer
946     fields in the regex data block and the study block. This is to make it
947     possible to test PCRE's handling of byte-flipped patterns, e.g. those
948     compiled on a different architecture. */
949    
950     if (do_flip)
951     {
952     real_pcre *rre = (real_pcre *)re;
953     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
954     rre->size = byteflip(rre->size, sizeof(rre->size));
955     rre->options = byteflip(rre->options, sizeof(rre->options));
956     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
957     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
958     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
959     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
960     rre->name_table_offset = byteflip(rre->name_table_offset,
961     sizeof(rre->name_table_offset));
962     rre->name_entry_size = byteflip(rre->name_entry_size,
963     sizeof(rre->name_entry_size));
964     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
965    
966     if (extra != NULL)
967     {
968     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
969     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
970     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
971     }
972     }
973    
974     /* Extract information from the compiled data if required */
975    
976     SHOW_INFO:
977    
978 nigel 25 if (do_showinfo)
979 nigel 3 {
980 nigel 75 unsigned long int get_options, all_options;
981 nigel 79 #if !defined NOINFOCHECK
982 nigel 43 int old_first_char, old_options, old_count;
983 nigel 79 #endif
984 nigel 43 int count, backrefmax, first_char, need_char;
985 nigel 63 int nameentrysize, namecount;
986     const uschar *nametable;
987 nigel 3
988 nigel 63 if (do_debug)
989     {
990     fprintf(outfile, "------------------------------------------------------------------\n");
991 nigel 77 _pcre_printint(re, outfile);
992 nigel 63 }
993 nigel 3
994 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
995 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
996     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
997     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
998 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
999 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1000 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1001     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1002 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1003 nigel 43
1004 nigel 79 #if !defined NOINFOCHECK
1005 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1006 nigel 3 if (count < 0) fprintf(outfile,
1007 nigel 43 "Error %d from pcre_info()\n", count);
1008 nigel 3 else
1009     {
1010 nigel 43 if (old_count != count) fprintf(outfile,
1011     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1012     old_count);
1013 nigel 37
1014 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1015     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1016     first_char, old_first_char);
1017 nigel 37
1018 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1019     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1020     get_options, old_options);
1021 nigel 43 }
1022 nigel 79 #endif
1023 nigel 43
1024 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1025 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1026 nigel 77 (int)size, (int)regex_gotten_store);
1027 nigel 43
1028     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1029     if (backrefmax > 0)
1030     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1031 nigel 63
1032     if (namecount > 0)
1033     {
1034     fprintf(outfile, "Named capturing subpatterns:\n");
1035     while (namecount-- > 0)
1036     {
1037     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1038     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1039     GET2(nametable, 0));
1040     nametable += nameentrysize;
1041     }
1042     }
1043    
1044 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1045     to fish it out via out back door */
1046    
1047     all_options = ((real_pcre *)re)->options;
1048     if (do_flip)
1049     {
1050     all_options = byteflip(all_options, sizeof(all_options));
1051     }
1052    
1053     if ((all_options & PCRE_NOPARTIAL) != 0)
1054     fprintf(outfile, "Partial matching not supported\n");
1055    
1056 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1057 nigel 77 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1058 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1059     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1060     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1061     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1062 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1063 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1064     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1065     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1066     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1067 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1068     ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1069 nigel 43
1070     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1071     fprintf(outfile, "Case state changes\n");
1072    
1073     if (first_char == -1)
1074     {
1075     fprintf(outfile, "First char at start or follows \\n\n");
1076     }
1077     else if (first_char < 0)
1078     {
1079     fprintf(outfile, "No first char\n");
1080     }
1081     else
1082     {
1083 nigel 63 int ch = first_char & 255;
1084 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1085 nigel 63 "" : " (caseless)";
1086     if (isprint(ch))
1087     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1088 nigel 3 else
1089 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1090 nigel 43 }
1091 nigel 37
1092 nigel 43 if (need_char < 0)
1093     {
1094     fprintf(outfile, "No need char\n");
1095 nigel 3 }
1096 nigel 43 else
1097     {
1098 nigel 63 int ch = need_char & 255;
1099 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1100 nigel 63 "" : " (caseless)";
1101     if (isprint(ch))
1102     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1103 nigel 43 else
1104 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1105 nigel 43 }
1106 nigel 75
1107     /* Don't output study size; at present it is in any case a fixed
1108     value, but it varies, depending on the computer architecture, and
1109     so messes up the test suite. (And with the /F option, it might be
1110     flipped.) */
1111    
1112     if (do_study)
1113     {
1114     if (extra == NULL)
1115     fprintf(outfile, "Study returned NULL\n");
1116     else
1117     {
1118     uschar *start_bits = NULL;
1119     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1120    
1121     if (start_bits == NULL)
1122     fprintf(outfile, "No starting byte set\n");
1123     else
1124     {
1125     int i;
1126     int c = 24;
1127     fprintf(outfile, "Starting byte set: ");
1128     for (i = 0; i < 256; i++)
1129     {
1130     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1131     {
1132     if (c > 75)
1133     {
1134     fprintf(outfile, "\n ");
1135     c = 2;
1136     }
1137     if (isprint(i) && i != ' ')
1138     {
1139     fprintf(outfile, "%c ", i);
1140     c += 2;
1141     }
1142     else
1143     {
1144     fprintf(outfile, "\\x%02x ", i);
1145     c += 5;
1146     }
1147     }
1148     }
1149     fprintf(outfile, "\n");
1150     }
1151     }
1152     }
1153 nigel 3 }
1154    
1155 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1156     that is all. The first 8 bytes of the file are the regex length and then
1157     the study length, in big-endian order. */
1158 nigel 3
1159 nigel 75 if (to_file != NULL)
1160 nigel 3 {
1161 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1162     if (f == NULL)
1163 nigel 3 {
1164 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1165 nigel 3 }
1166 nigel 75 else
1167     {
1168     uschar sbuf[8];
1169     sbuf[0] = (true_size >> 24) & 255;
1170     sbuf[1] = (true_size >> 16) & 255;
1171     sbuf[2] = (true_size >> 8) & 255;
1172     sbuf[3] = (true_size) & 255;
1173 nigel 3
1174 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1175     sbuf[5] = (true_study_size >> 16) & 255;
1176     sbuf[6] = (true_study_size >> 8) & 255;
1177     sbuf[7] = (true_study_size) & 255;
1178 nigel 3
1179 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1180     fwrite(re, 1, true_size, f) < true_size)
1181     {
1182     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1183     }
1184 nigel 3 else
1185     {
1186 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1187     if (extra != NULL)
1188 nigel 3 {
1189 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1190     true_study_size)
1191 nigel 3 {
1192 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1193     strerror(errno));
1194 nigel 3 }
1195 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1196 nigel 3 }
1197     }
1198 nigel 75 fclose(f);
1199 nigel 3 }
1200 nigel 77
1201     new_free(re);
1202     if (extra != NULL) new_free(extra);
1203     if (tables != NULL) new_free((void *)tables);
1204 nigel 75 continue; /* With next regex */
1205 nigel 3 }
1206 nigel 75 } /* End of non-POSIX compile */
1207 nigel 3
1208     /* Read data lines and test them */
1209    
1210     for (;;)
1211     {
1212 nigel 9 unsigned char *q;
1213 nigel 35 unsigned char *bptr = dbuffer;
1214 nigel 57 int *use_offsets = offsets;
1215 nigel 53 int use_size_offsets = size_offsets;
1216 nigel 63 int callout_data = 0;
1217     int callout_data_set = 0;
1218 nigel 3 int count, c;
1219 nigel 29 int copystrings = 0;
1220 nigel 63 int find_match_limit = 0;
1221 nigel 29 int getstrings = 0;
1222     int getlist = 0;
1223 nigel 39 int gmatched = 0;
1224 nigel 35 int start_offset = 0;
1225 nigel 41 int g_notempty = 0;
1226 nigel 77 int use_dfa = 0;
1227 nigel 3
1228     options = 0;
1229    
1230 nigel 63 pcre_callout = callout;
1231     first_callout = 1;
1232     callout_extra = 0;
1233     callout_count = 0;
1234     callout_fail_count = 999999;
1235     callout_fail_id = -1;
1236 nigel 73 show_malloc = 0;
1237 nigel 63
1238 nigel 35 if (infile == stdin) printf("data> ");
1239 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1240 nigel 11 {
1241     done = 1;
1242     goto CONTINUE;
1243     }
1244 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1245 nigel 3
1246     len = (int)strlen((char *)buffer);
1247     while (len > 0 && isspace(buffer[len-1])) len--;
1248     buffer[len] = 0;
1249     if (len == 0) break;
1250    
1251     p = buffer;
1252     while (isspace(*p)) p++;
1253    
1254 nigel 9 q = dbuffer;
1255 nigel 3 while ((c = *p++) != 0)
1256     {
1257     int i = 0;
1258     int n = 0;
1259 nigel 63
1260 nigel 3 if (c == '\\') switch ((c = *p++))
1261     {
1262     case 'a': c = 7; break;
1263     case 'b': c = '\b'; break;
1264     case 'e': c = 27; break;
1265     case 'f': c = '\f'; break;
1266     case 'n': c = '\n'; break;
1267     case 'r': c = '\r'; break;
1268     case 't': c = '\t'; break;
1269     case 'v': c = '\v'; break;
1270    
1271     case '0': case '1': case '2': case '3':
1272     case '4': case '5': case '6': case '7':
1273     c -= '0';
1274     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1275     c = c * 8 + *p++ - '0';
1276     break;
1277    
1278     case 'x':
1279 nigel 49
1280     /* Handle \x{..} specially - new Perl thing for utf8 */
1281    
1282 nigel 79 #if !defined NOUTF8
1283 nigel 49 if (*p == '{')
1284     {
1285     unsigned char *pt = p;
1286     c = 0;
1287     while (isxdigit(*(++pt)))
1288     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1289     if (*pt == '}')
1290     {
1291 nigel 67 unsigned char buff8[8];
1292 nigel 49 int ii, utn;
1293 nigel 77 utn = _pcre_ord2utf8(c, buff8);
1294 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1295     c = buff8[ii]; /* Last byte */
1296 nigel 49 p = pt + 1;
1297     break;
1298     }
1299     /* Not correct form; fall through */
1300     }
1301 nigel 79 #endif
1302 nigel 49
1303     /* Ordinary \x */
1304    
1305 nigel 3 c = 0;
1306     while (i++ < 2 && isxdigit(*p))
1307     {
1308     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1309     p++;
1310     }
1311     break;
1312    
1313 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1314 nigel 3 p--;
1315     continue;
1316    
1317 nigel 75 case '>':
1318     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1319     continue;
1320    
1321 nigel 3 case 'A': /* Option setting */
1322     options |= PCRE_ANCHORED;
1323     continue;
1324    
1325     case 'B':
1326     options |= PCRE_NOTBOL;
1327     continue;
1328    
1329 nigel 29 case 'C':
1330 nigel 63 if (isdigit(*p)) /* Set copy string */
1331     {
1332     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1333     copystrings |= 1 << n;
1334     }
1335     else if (isalnum(*p))
1336     {
1337     uschar name[256];
1338 nigel 67 uschar *npp = name;
1339     while (isalnum(*p)) *npp++ = *p++;
1340     *npp = 0;
1341 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1342 nigel 63 if (n < 0)
1343     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1344     else copystrings |= 1 << n;
1345     }
1346     else if (*p == '+')
1347     {
1348     callout_extra = 1;
1349     p++;
1350     }
1351     else if (*p == '-')
1352     {
1353     pcre_callout = NULL;
1354     p++;
1355     }
1356     else if (*p == '!')
1357     {
1358     callout_fail_id = 0;
1359     p++;
1360     while(isdigit(*p))
1361     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1362     callout_fail_count = 0;
1363     if (*p == '!')
1364     {
1365     p++;
1366     while(isdigit(*p))
1367     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1368     }
1369     }
1370     else if (*p == '*')
1371     {
1372     int sign = 1;
1373     callout_data = 0;
1374     if (*(++p) == '-') { sign = -1; p++; }
1375     while(isdigit(*p))
1376     callout_data = callout_data * 10 + *p++ - '0';
1377     callout_data *= sign;
1378     callout_data_set = 1;
1379     }
1380 nigel 29 continue;
1381    
1382 nigel 79 #if !defined NODFA
1383 nigel 77 case 'D':
1384 nigel 79 #if !defined NOPOSIX
1385 nigel 77 if (posix || do_posix)
1386     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1387     else
1388 nigel 79 #endif
1389 nigel 77 use_dfa = 1;
1390     continue;
1391    
1392     case 'F':
1393     options |= PCRE_DFA_SHORTEST;
1394     continue;
1395 nigel 79 #endif
1396 nigel 77
1397 nigel 29 case 'G':
1398 nigel 63 if (isdigit(*p))
1399     {
1400     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1401     getstrings |= 1 << n;
1402     }
1403     else if (isalnum(*p))
1404     {
1405     uschar name[256];
1406 nigel 67 uschar *npp = name;
1407     while (isalnum(*p)) *npp++ = *p++;
1408     *npp = 0;
1409 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1410 nigel 63 if (n < 0)
1411     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1412     else getstrings |= 1 << n;
1413     }
1414 nigel 29 continue;
1415    
1416     case 'L':
1417     getlist = 1;
1418     continue;
1419    
1420 nigel 63 case 'M':
1421     find_match_limit = 1;
1422     continue;
1423    
1424 nigel 37 case 'N':
1425     options |= PCRE_NOTEMPTY;
1426     continue;
1427    
1428 nigel 3 case 'O':
1429     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1430 nigel 53 if (n > size_offsets_max)
1431     {
1432     size_offsets_max = n;
1433 nigel 57 free(offsets);
1434 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1435 nigel 53 if (offsets == NULL)
1436     {
1437     printf("** Failed to get %d bytes of memory for offsets vector\n",
1438     size_offsets_max * sizeof(int));
1439 nigel 77 yield = 1;
1440     goto EXIT;
1441 nigel 53 }
1442     }
1443     use_size_offsets = n;
1444 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1445 nigel 3 continue;
1446    
1447 nigel 75 case 'P':
1448     options |= PCRE_PARTIAL;
1449     continue;
1450    
1451 nigel 79 #if !defined NODFA
1452 nigel 77 case 'R':
1453     options |= PCRE_DFA_RESTART;
1454     continue;
1455 nigel 79 #endif
1456 nigel 77
1457 nigel 73 case 'S':
1458     show_malloc = 1;
1459     continue;
1460    
1461 nigel 3 case 'Z':
1462     options |= PCRE_NOTEOL;
1463     continue;
1464 nigel 71
1465     case '?':
1466     options |= PCRE_NO_UTF8_CHECK;
1467     continue;
1468 nigel 3 }
1469 nigel 9 *q++ = c;
1470 nigel 3 }
1471 nigel 9 *q = 0;
1472     len = q - dbuffer;
1473 nigel 3
1474 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1475     {
1476     printf("**Match limit not relevant for DFA matching: ignored\n");
1477     find_match_limit = 0;
1478     }
1479    
1480 nigel 3 /* Handle matching via the POSIX interface, which does not
1481 nigel 63 support timing or playing with the match limit or callout data. */
1482 nigel 3
1483 nigel 37 #if !defined NOPOSIX
1484 nigel 3 if (posix || do_posix)
1485     {
1486     int rc;
1487     int eflags = 0;
1488 nigel 63 regmatch_t *pmatch = NULL;
1489     if (use_size_offsets > 0)
1490 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1491 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1492     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1493    
1494 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1495 nigel 3
1496     if (rc != 0)
1497     {
1498 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1499 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1500     }
1501     else
1502     {
1503 nigel 7 size_t i;
1504 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1505 nigel 3 {
1506     if (pmatch[i].rm_so >= 0)
1507     {
1508 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1509 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1510     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1511 nigel 3 fprintf(outfile, "\n");
1512 nigel 35 if (i == 0 && do_showrest)
1513     {
1514     fprintf(outfile, " 0+ ");
1515 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1516     outfile);
1517 nigel 35 fprintf(outfile, "\n");
1518     }
1519 nigel 3 }
1520     }
1521     }
1522 nigel 53 free(pmatch);
1523 nigel 3 }
1524    
1525 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1526 nigel 3
1527 nigel 37 else
1528     #endif /* !defined NOPOSIX */
1529    
1530 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1531 nigel 3 {
1532     if (timeit)
1533     {
1534     register int i;
1535     clock_t time_taken;
1536     clock_t start_time = clock();
1537 nigel 77
1538 nigel 79 #if !defined NODFA
1539 nigel 77 if (all_use_dfa || use_dfa)
1540     {
1541     int workspace[1000];
1542     for (i = 0; i < LOOPREPEAT; i++)
1543     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1544     options | g_notempty, use_offsets, use_size_offsets, workspace,
1545     sizeof(workspace)/sizeof(int));
1546     }
1547     else
1548 nigel 79 #endif
1549 nigel 77
1550 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1551 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1552 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1553 nigel 77
1554 nigel 3 time_taken = clock() - start_time;
1555 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1556 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1557     (double)CLOCKS_PER_SEC);
1558 nigel 3 }
1559    
1560 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1561     varying limits in order to find the minimum value. */
1562    
1563     if (find_match_limit)
1564     {
1565     int min = 0;
1566     int mid = 64;
1567     int max = -1;
1568    
1569     if (extra == NULL)
1570     {
1571 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1572 nigel 63 extra->flags = 0;
1573     }
1574     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1575    
1576     for (;;)
1577     {
1578     extra->match_limit = mid;
1579     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1580     options | g_notempty, use_offsets, use_size_offsets);
1581     if (count == PCRE_ERROR_MATCHLIMIT)
1582     {
1583     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1584     min = mid;
1585     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1586     }
1587 nigel 75 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1588     count == PCRE_ERROR_PARTIAL)
1589 nigel 63 {
1590     if (mid == min + 1)
1591     {
1592     fprintf(outfile, "Minimum match limit = %d\n", mid);
1593     break;
1594     }
1595     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1596     max = mid;
1597     mid = (min + mid)/2;
1598     }
1599     else break; /* Some other error */
1600     }
1601    
1602     extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1603     }
1604    
1605     /* If callout_data is set, use the interface with additional data */
1606    
1607     else if (callout_data_set)
1608     {
1609     if (extra == NULL)
1610     {
1611 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1612 nigel 63 extra->flags = 0;
1613     }
1614     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1615 nigel 71 extra->callout_data = &callout_data;
1616 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1617     options | g_notempty, use_offsets, use_size_offsets);
1618     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1619     }
1620    
1621     /* The normal case is just to do the match once, with the default
1622     value of match_limit. */
1623    
1624 nigel 79 #if !defined NODFA
1625 nigel 77 else if (all_use_dfa || use_dfa)
1626     {
1627     int workspace[1000];
1628     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1629     options | g_notempty, use_offsets, use_size_offsets, workspace,
1630     sizeof(workspace)/sizeof(int));
1631     if (count == 0)
1632     {
1633     fprintf(outfile, "Matched, but too many subsidiary matches\n");
1634     count = use_size_offsets/2;
1635     }
1636     }
1637 nigel 79 #endif
1638 nigel 77
1639 nigel 75 else
1640     {
1641     count = pcre_exec(re, extra, (char *)bptr, len,
1642     start_offset, options | g_notempty, use_offsets, use_size_offsets);
1643 nigel 77 if (count == 0)
1644     {
1645     fprintf(outfile, "Matched, but too many substrings\n");
1646     count = use_size_offsets/3;
1647     }
1648 nigel 75 }
1649 nigel 3
1650 nigel 39 /* Matched */
1651    
1652 nigel 3 if (count >= 0)
1653     {
1654     int i;
1655 nigel 29 for (i = 0; i < count * 2; i += 2)
1656 nigel 3 {
1657 nigel 57 if (use_offsets[i] < 0)
1658 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1659     else
1660     {
1661     fprintf(outfile, "%2d: ", i/2);
1662 nigel 63 (void)pchars(bptr + use_offsets[i],
1663     use_offsets[i+1] - use_offsets[i], outfile);
1664 nigel 3 fprintf(outfile, "\n");
1665 nigel 35 if (i == 0)
1666     {
1667     if (do_showrest)
1668     {
1669     fprintf(outfile, " 0+ ");
1670 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1671     outfile);
1672 nigel 35 fprintf(outfile, "\n");
1673     }
1674     }
1675 nigel 3 }
1676     }
1677 nigel 29
1678     for (i = 0; i < 32; i++)
1679     {
1680     if ((copystrings & (1 << i)) != 0)
1681     {
1682 nigel 37 char copybuffer[16];
1683 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1684 nigel 37 i, copybuffer, sizeof(copybuffer));
1685 nigel 29 if (rc < 0)
1686     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1687     else
1688 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1689 nigel 29 }
1690     }
1691    
1692     for (i = 0; i < 32; i++)
1693     {
1694     if ((getstrings & (1 << i)) != 0)
1695     {
1696     const char *substring;
1697 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1698 nigel 29 i, &substring);
1699     if (rc < 0)
1700     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1701     else
1702     {
1703     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1704 nigel 49 /* free((void *)substring); */
1705     pcre_free_substring(substring);
1706 nigel 29 }
1707     }
1708     }
1709    
1710     if (getlist)
1711     {
1712     const char **stringlist;
1713 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1714 nigel 29 &stringlist);
1715     if (rc < 0)
1716     fprintf(outfile, "get substring list failed %d\n", rc);
1717     else
1718     {
1719     for (i = 0; i < count; i++)
1720     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1721     if (stringlist[i] != NULL)
1722     fprintf(outfile, "string list not terminated by NULL\n");
1723 nigel 49 /* free((void *)stringlist); */
1724     pcre_free_substring_list(stringlist);
1725 nigel 29 }
1726     }
1727 nigel 39 }
1728 nigel 29
1729 nigel 75 /* There was a partial match */
1730    
1731     else if (count == PCRE_ERROR_PARTIAL)
1732     {
1733 nigel 77 fprintf(outfile, "Partial match");
1734 nigel 79 #if !defined NODFA
1735 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1736     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1737     bptr + use_offsets[0]);
1738 nigel 79 #endif
1739 nigel 77 fprintf(outfile, "\n");
1740 nigel 75 break; /* Out of the /g loop */
1741     }
1742    
1743 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1744 nigel 47 g_notempty after a null match, this is not necessarily the end.
1745 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
1746     matching, the advance must be one character, not one byte. Fudge the
1747     offset values to achieve this. We won't be at the end of the string -
1748     that was checked before setting g_notempty. */
1749 nigel 39
1750 nigel 3 else
1751     {
1752 nigel 41 if (g_notempty != 0)
1753 nigel 35 {
1754 nigel 73 int onechar = 1;
1755 nigel 57 use_offsets[0] = start_offset;
1756 nigel 73 if (use_utf8)
1757     {
1758     while (start_offset + onechar < len)
1759     {
1760     int tb = bptr[start_offset+onechar];
1761     if (tb <= 127) break;
1762     tb &= 0xc0;
1763     if (tb != 0 && tb != 0xc0) onechar++;
1764     }
1765     }
1766     use_offsets[1] = start_offset + onechar;
1767 nigel 35 }
1768 nigel 41 else
1769     {
1770 nigel 73 if (count == PCRE_ERROR_NOMATCH)
1771 nigel 41 {
1772 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
1773 nigel 41 }
1774 nigel 73 else fprintf(outfile, "Error %d\n", count);
1775 nigel 41 break; /* Out of the /g loop */
1776     }
1777 nigel 3 }
1778 nigel 35
1779 nigel 39 /* If not /g or /G we are done */
1780    
1781     if (!do_g && !do_G) break;
1782    
1783 nigel 41 /* If we have matched an empty string, first check to see if we are at
1784     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1785     what Perl's /g options does. This turns out to be rather cunning. First
1786 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1787     same point. If this fails (picked up above) we advance to the next
1788     character. */
1789 nigel 39
1790 nigel 41 g_notempty = 0;
1791 nigel 57 if (use_offsets[0] == use_offsets[1])
1792 nigel 41 {
1793 nigel 57 if (use_offsets[0] == len) break;
1794 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1795 nigel 41 }
1796 nigel 39
1797     /* For /g, update the start offset, leaving the rest alone */
1798    
1799 nigel 57 if (do_g) start_offset = use_offsets[1];
1800 nigel 39
1801     /* For /G, update the pointer and length */
1802    
1803     else
1804 nigel 35 {
1805 nigel 57 bptr += use_offsets[1];
1806     len -= use_offsets[1];
1807 nigel 35 }
1808 nigel 39 } /* End of loop for /g and /G */
1809     } /* End of loop for data lines */
1810 nigel 3
1811 nigel 11 CONTINUE:
1812 nigel 37
1813     #if !defined NOPOSIX
1814 nigel 3 if (posix || do_posix) regfree(&preg);
1815 nigel 37 #endif
1816    
1817 nigel 77 if (re != NULL) new_free(re);
1818     if (extra != NULL) new_free(extra);
1819 nigel 25 if (tables != NULL)
1820     {
1821 nigel 77 new_free((void *)tables);
1822 nigel 25 setlocale(LC_CTYPE, "C");
1823     }
1824 nigel 3 }
1825    
1826 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
1827 nigel 77
1828     EXIT:
1829    
1830     if (infile != NULL && infile != stdin) fclose(infile);
1831     if (outfile != NULL && outfile != stdout) fclose(outfile);
1832    
1833     free(buffer);
1834     free(dbuffer);
1835     free(pbuffer);
1836     free(offsets);
1837    
1838     return yield;
1839 nigel 3 }
1840    
1841 nigel 77 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12