/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 75 - (hide annotations) (download)
Sat Feb 24 21:40:37 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 51609 byte(s)
Load pcre-5.0 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 75 been extended and consequently is now rather untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 63 /* We need the internal info for displaying the results of pcre_study(). Also
48     for getting the opcodes for showing compiled code. */
49 nigel 3
50 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
51 nigel 3 #include "internal.h"
52 nigel 37
53     /* It is possible to compile this test program without including support for
54     testing the POSIX interface, though this is not available via the standard
55     Makefile. */
56    
57     #if !defined NOPOSIX
58 nigel 3 #include "pcreposix.h"
59 nigel 37 #endif
60 nigel 3
61     #ifndef CLOCKS_PER_SEC
62     #ifdef CLK_TCK
63     #define CLOCKS_PER_SEC CLK_TCK
64     #else
65     #define CLOCKS_PER_SEC 100
66     #endif
67     #endif
68    
69 nigel 75 #define LOOPREPEAT 500000
70 nigel 3
71 nigel 69 #define BUFFER_SIZE 30000
72 nigel 75 #define PBUFFER_SIZE BUFFER_SIZE
73 nigel 73 #define DBUFFER_SIZE BUFFER_SIZE
74 nigel 23
75 nigel 69
76 nigel 3 static FILE *outfile;
77     static int log_store = 0;
78 nigel 63 static int callout_count;
79     static int callout_extra;
80     static int callout_fail_count;
81     static int callout_fail_id;
82     static int first_callout;
83 nigel 73 static int show_malloc;
84 nigel 67 static int use_utf8;
85 nigel 43 static size_t gotten_store;
86 nigel 3
87 nigel 75 static uschar *pbuffer = NULL;
88 nigel 3
89 nigel 75
90 nigel 69 static const int utf8_table1[] = {
91 nigel 49 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
92    
93 nigel 69 static const int utf8_table2[] = {
94 nigel 49 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
95    
96 nigel 69 static const int utf8_table3[] = {
97 nigel 49 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
98    
99    
100 nigel 63
101 nigel 49 /*************************************************
102 nigel 63 * Print compiled regex *
103     *************************************************/
104    
105     /* The code for doing this is held in a separate file that is also included in
106     pcre.c when it is compiled with the debug switch. It defines a function called
107     print_internals(), which uses a table of opcode lengths defined by the macro
108 nigel 75 OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
109     Unicode property names to numbers; this is kept in a separate file. */
110 nigel 63
111     static uschar OP_lengths[] = { OP_LENGTHS };
112    
113 nigel 75 #include "ucp.h"
114     #include "ucptypetable.c"
115 nigel 63 #include "printint.c"
116    
117    
118    
119     /*************************************************
120     * Read number from string *
121     *************************************************/
122    
123     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
124     around with conditional compilation, just do the job by hand. It is only used
125     for unpicking the -o argument, so just keep it simple.
126    
127     Arguments:
128     str string to be converted
129     endptr where to put the end pointer
130    
131     Returns: the unsigned long
132     */
133    
134     static int
135     get_value(unsigned char *str, unsigned char **endptr)
136     {
137     int result = 0;
138     while(*str != 0 && isspace(*str)) str++;
139     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
140     *endptr = str;
141     return(result);
142     }
143    
144    
145    
146     /*************************************************
147 nigel 49 * Convert character value to UTF-8 *
148     *************************************************/
149    
150     /* This function takes an integer value in the range 0 - 0x7fffffff
151     and encodes it as a UTF-8 character in 0 to 6 bytes.
152    
153     Arguments:
154     cvalue the character value
155     buffer pointer to buffer for result - at least 6 bytes long
156    
157     Returns: number of characters placed in the buffer
158     -1 if input character is negative
159     0 if input character is positive but too big (only when
160     int is longer than 32 bits)
161     */
162    
163     static int
164     ord2utf8(int cvalue, unsigned char *buffer)
165     {
166     register int i, j;
167     for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
168     if (cvalue <= utf8_table1[i]) break;
169     if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
170     if (cvalue < 0) return -1;
171 nigel 59
172     buffer += i;
173     for (j = i; j > 0; j--)
174     {
175     *buffer-- = 0x80 | (cvalue & 0x3f);
176     cvalue >>= 6;
177     }
178     *buffer = utf8_table2[i] | cvalue;
179 nigel 49 return i + 1;
180     }
181    
182    
183     /*************************************************
184     * Convert UTF-8 string to value *
185     *************************************************/
186    
187     /* This function takes one or more bytes that represents a UTF-8 character,
188     and returns the value of the character.
189    
190     Argument:
191     buffer a pointer to the byte vector
192     vptr a pointer to an int to receive the value
193    
194     Returns: > 0 => the number of bytes consumed
195     -6 to 0 => malformed UTF-8 character at offset = (-return)
196     */
197    
198 nigel 67 static int
199 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
200     {
201     int c = *buffer++;
202     int d = c;
203     int i, j, s;
204    
205     for (i = -1; i < 6; i++) /* i is number of additional bytes */
206     {
207     if ((d & 0x80) == 0) break;
208     d <<= 1;
209     }
210    
211     if (i == -1) { *vptr = c; return 1; } /* ascii character */
212     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
213    
214     /* i now has a value in the range 1-5 */
215    
216 nigel 59 s = 6*i;
217     d = (c & utf8_table3[i]) << s;
218 nigel 49
219     for (j = 0; j < i; j++)
220     {
221     c = *buffer++;
222     if ((c & 0xc0) != 0x80) return -(j+1);
223 nigel 59 s -= 6;
224 nigel 49 d |= (c & 0x3f) << s;
225     }
226    
227     /* Check that encoding was the correct unique one */
228    
229     for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
230     if (d <= utf8_table1[j]) break;
231     if (j != i) return -(i+1);
232    
233     /* Valid value */
234    
235     *vptr = d;
236     return i+1;
237     }
238    
239    
240    
241 nigel 63 /*************************************************
242     * Print character string *
243     *************************************************/
244 nigel 49
245 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
246     mode. Yields number of characters printed. If handed a NULL file, just counts
247     chars without printing. */
248 nigel 49
249 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
250 nigel 3 {
251 nigel 63 int c;
252     int yield = 0;
253 nigel 3
254 nigel 63 while (length-- > 0)
255 nigel 3 {
256 nigel 67 if (use_utf8)
257 nigel 63 {
258     int rc = utf82ord(p, &c);
259 nigel 3
260 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
261     {
262     length -= rc - 1;
263     p += rc;
264     if (c < 256 && isprint(c))
265     {
266     if (f != NULL) fprintf(f, "%c", c);
267     yield++;
268     }
269     else
270     {
271     int n;
272     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
273     yield += n;
274     }
275     continue;
276     }
277     }
278 nigel 3
279 nigel 63 /* Not UTF-8, or malformed UTF-8 */
280    
281     if (isprint(c = *(p++)))
282 nigel 3 {
283 nigel 63 if (f != NULL) fprintf(f, "%c", c);
284     yield++;
285 nigel 3 }
286 nigel 63 else
287 nigel 3 {
288 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
289     yield += 4;
290     }
291     }
292 nigel 3
293 nigel 63 return yield;
294     }
295 nigel 23
296 nigel 3
297 nigel 23
298 nigel 63 /*************************************************
299     * Callout function *
300     *************************************************/
301 nigel 3
302 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
303     the match. Yield zero unless more callouts than the fail count, or the callout
304     data is not zero. */
305 nigel 3
306 nigel 63 static int callout(pcre_callout_block *cb)
307     {
308     FILE *f = (first_callout | callout_extra)? outfile : NULL;
309 nigel 75 int i, pre_start, post_start, subject_length;
310 nigel 3
311 nigel 63 if (callout_extra)
312     {
313     fprintf(f, "Callout %d: last capture = %d\n",
314     cb->callout_number, cb->capture_last);
315 nigel 3
316 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
317     {
318     if (cb->offset_vector[i] < 0)
319     fprintf(f, "%2d: <unset>\n", i/2);
320     else
321     {
322     fprintf(f, "%2d: ", i/2);
323     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
324     cb->offset_vector[i+1] - cb->offset_vector[i], f);
325     fprintf(f, "\n");
326     }
327     }
328     }
329 nigel 3
330 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
331     datails. On subsequent calls in the same match, we use pchars just to find the
332     printed lengths of the substrings. */
333 nigel 3
334 nigel 63 if (f != NULL) fprintf(f, "--->");
335 nigel 3
336 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
337     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
338     cb->current_position - cb->start_match, f);
339 nigel 3
340 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
341    
342 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
343     cb->subject_length - cb->current_position, f);
344 nigel 3
345 nigel 63 if (f != NULL) fprintf(f, "\n");
346 nigel 9
347 nigel 63 /* Always print appropriate indicators, with callout number if not already
348 nigel 75 shown. For automatic callouts, show the pattern offset. */
349 nigel 3
350 nigel 75 if (cb->callout_number == 255)
351     {
352     fprintf(outfile, "%+3d ", cb->pattern_position);
353     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
354     }
355     else
356     {
357     if (callout_extra) fprintf(outfile, " ");
358     else fprintf(outfile, "%3d ", cb->callout_number);
359     }
360 nigel 3
361 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
362     fprintf(outfile, "^");
363 nigel 3
364 nigel 63 if (post_start > 0)
365     {
366     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
367     fprintf(outfile, "^");
368 nigel 3 }
369    
370 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
371     fprintf(outfile, " ");
372    
373     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
374     pbuffer + cb->pattern_position);
375    
376 nigel 63 fprintf(outfile, "\n");
377     first_callout = 0;
378 nigel 3
379 nigel 71 if (cb->callout_data != NULL)
380 nigel 49 {
381 nigel 71 int callout_data = *((int *)(cb->callout_data));
382     if (callout_data != 0)
383     {
384     fprintf(outfile, "Callout data = %d\n", callout_data);
385     return callout_data;
386     }
387 nigel 63 }
388 nigel 49
389 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
390     (++callout_count >= callout_fail_count)? 1 : 0;
391 nigel 3 }
392    
393    
394 nigel 63 /*************************************************
395 nigel 73 * Local malloc functions *
396 nigel 63 *************************************************/
397 nigel 3
398     /* Alternative malloc function, to test functionality and show the size of the
399     compiled re. */
400    
401     static void *new_malloc(size_t size)
402     {
403 nigel 73 void *block = malloc(size);
404 nigel 43 gotten_store = size;
405 nigel 73 if (show_malloc)
406     fprintf(outfile, "malloc %3d %p\n", size, block);
407     return block;
408 nigel 3 }
409    
410 nigel 73 static void new_free(void *block)
411     {
412     if (show_malloc)
413     fprintf(outfile, "free %p\n", block);
414     free(block);
415     }
416 nigel 3
417    
418 nigel 73 /* For recursion malloc/free, to test stacking calls */
419    
420     static void *stack_malloc(size_t size)
421     {
422     void *block = malloc(size);
423     if (show_malloc)
424     fprintf(outfile, "stack_malloc %3d %p\n", size, block);
425     return block;
426     }
427    
428     static void stack_free(void *block)
429     {
430     if (show_malloc)
431     fprintf(outfile, "stack_free %p\n", block);
432     free(block);
433     }
434    
435    
436 nigel 63 /*************************************************
437     * Call pcre_fullinfo() *
438     *************************************************/
439 nigel 43
440     /* Get one piece of information from the pcre_fullinfo() function */
441    
442     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
443     {
444     int rc;
445     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
446     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
447     }
448    
449    
450    
451 nigel 63 /*************************************************
452 nigel 75 * Byte flipping function *
453     *************************************************/
454    
455     static long int
456     byteflip(long int value, int n)
457     {
458     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
459     return ((value & 0x000000ff) << 24) |
460     ((value & 0x0000ff00) << 8) |
461     ((value & 0x00ff0000) >> 8) |
462     ((value & 0xff000000) >> 24);
463     }
464    
465    
466    
467    
468     /*************************************************
469 nigel 63 * Main Program *
470     *************************************************/
471 nigel 43
472 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
473     consist of a regular expression, in delimiters and optionally followed by
474     options, followed by a set of test data, terminated by an empty line. */
475    
476     int main(int argc, char **argv)
477     {
478     FILE *infile = stdin;
479     int options = 0;
480     int study_options = 0;
481     int op = 1;
482     int timeit = 0;
483     int showinfo = 0;
484 nigel 31 int showstore = 0;
485 nigel 53 int size_offsets = 45;
486     int size_offsets_max;
487     int *offsets;
488     #if !defined NOPOSIX
489 nigel 3 int posix = 0;
490 nigel 53 #endif
491 nigel 3 int debug = 0;
492 nigel 11 int done = 0;
493 nigel 3
494 nigel 69 unsigned char *buffer;
495     unsigned char *dbuffer;
496    
497     /* Get buffers from malloc() so that Electric Fence will check their misuse
498     when I am debugging. */
499    
500 nigel 71 buffer = (unsigned char *)malloc(BUFFER_SIZE);
501     dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
502 nigel 75 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
503 nigel 69
504 nigel 75 /* The outfile variable is static so that new_malloc can use it. The _setmode()
505     stuff is some magic that I don't understand, but which apparently does good
506     things in Windows. It's related to line terminations. */
507 nigel 3
508 nigel 75 #if defined(_WIN32) || defined(WIN32)
509     _setmode( _fileno( stdout ), 0x8000 );
510     #endif /* defined(_WIN32) || defined(WIN32) */
511    
512 nigel 3 outfile = stdout;
513    
514     /* Scan options */
515    
516     while (argc > 1 && argv[op][0] == '-')
517     {
518 nigel 63 unsigned char *endptr;
519 nigel 53
520 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
521     showstore = 1;
522 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
523     else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
524     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
525 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
526 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
527     *endptr == 0))
528 nigel 53 {
529     op++;
530     argc--;
531     }
532     #if !defined NOPOSIX
533 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
534 nigel 53 #endif
535 nigel 63 else if (strcmp(argv[op], "-C") == 0)
536     {
537     int rc;
538     printf("PCRE version %s\n", pcre_version());
539     printf("Compiled with\n");
540     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
541     printf(" %sUTF-8 support\n", rc? "" : "No ");
542 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
543     printf(" %sUnicode properties support\n", rc? "" : "No ");
544 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
545     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
546     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
547     printf(" Internal link size = %d\n", rc);
548     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
549     printf(" POSIX malloc threshold = %d\n", rc);
550     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
551     printf(" Default match limit = %d\n", rc);
552 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
553     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
554 nigel 63 exit(0);
555     }
556 nigel 3 else
557     {
558 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
559     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
560 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
561 nigel 53 printf(" -d debug: show compiled code; implies -i\n"
562     " -i show information about compiled pattern\n"
563 nigel 75 " -m output memory used information\n"
564 nigel 53 " -o <n> set size of offsets vector to <n>\n");
565     #if !defined NOPOSIX
566     printf(" -p use POSIX interface\n");
567     #endif
568 nigel 75 printf(" -s output store (memory) used information\n"
569 nigel 53 " -t time compilation and execution\n");
570 nigel 3 return 1;
571     }
572     op++;
573     argc--;
574     }
575    
576 nigel 53 /* Get the store for the offsets vector, and remember what it was */
577    
578     size_offsets_max = size_offsets;
579 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
580 nigel 53 if (offsets == NULL)
581     {
582     printf("** Failed to get %d bytes of memory for offsets vector\n",
583     size_offsets_max * sizeof(int));
584     return 1;
585     }
586    
587 nigel 3 /* Sort out the input and output files */
588    
589     if (argc > 1)
590     {
591 nigel 75 infile = fopen(argv[op], "rb");
592 nigel 3 if (infile == NULL)
593     {
594     printf("** Failed to open %s\n", argv[op]);
595     return 1;
596     }
597     }
598    
599     if (argc > 2)
600     {
601 nigel 75 outfile = fopen(argv[op+1], "wb");
602 nigel 3 if (outfile == NULL)
603     {
604     printf("** Failed to open %s\n", argv[op+1]);
605     return 1;
606     }
607     }
608    
609     /* Set alternative malloc function */
610    
611     pcre_malloc = new_malloc;
612 nigel 73 pcre_free = new_free;
613     pcre_stack_malloc = stack_malloc;
614     pcre_stack_free = stack_free;
615 nigel 3
616 nigel 23 /* Heading line, then prompt for first regex if stdin */
617 nigel 3
618     fprintf(outfile, "PCRE version %s\n\n", pcre_version());
619    
620     /* Main loop */
621    
622 nigel 11 while (!done)
623 nigel 3 {
624     pcre *re = NULL;
625     pcre_extra *extra = NULL;
626 nigel 37
627     #if !defined NOPOSIX /* There are still compilers that require no indent */
628 nigel 3 regex_t preg;
629 nigel 45 int do_posix = 0;
630 nigel 37 #endif
631    
632 nigel 7 const char *error;
633 nigel 25 unsigned char *p, *pp, *ppp;
634 nigel 75 unsigned char *to_file = NULL;
635 nigel 53 const unsigned char *tables = NULL;
636 nigel 75 unsigned long int true_size, true_study_size = 0;
637     size_t size, regex_gotten_store;
638 nigel 3 int do_study = 0;
639 nigel 25 int do_debug = debug;
640 nigel 35 int do_G = 0;
641     int do_g = 0;
642 nigel 25 int do_showinfo = showinfo;
643 nigel 35 int do_showrest = 0;
644 nigel 75 int do_flip = 0;
645 nigel 3 int erroroffset, len, delimiter;
646    
647 nigel 67 use_utf8 = 0;
648 nigel 63
649 nigel 3 if (infile == stdin) printf(" re> ");
650 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
651 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
652 nigel 63 fflush(outfile);
653 nigel 3
654     p = buffer;
655     while (isspace(*p)) p++;
656     if (*p == 0) continue;
657    
658 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
659 nigel 3
660 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
661     {
662     unsigned long int magic;
663     uschar sbuf[8];
664     FILE *f;
665    
666     p++;
667     pp = p + (int)strlen((char *)p);
668     while (isspace(pp[-1])) pp--;
669     *pp = 0;
670    
671     f = fopen((char *)p, "rb");
672     if (f == NULL)
673     {
674     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
675     continue;
676     }
677    
678     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
679    
680     true_size =
681     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
682     true_study_size =
683     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
684    
685     re = (real_pcre *)new_malloc(true_size);
686     regex_gotten_store = gotten_store;
687    
688     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
689    
690     magic = ((real_pcre *)re)->magic_number;
691     if (magic != MAGIC_NUMBER)
692     {
693     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
694     {
695     do_flip = 1;
696     }
697     else
698     {
699     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
700     fclose(f);
701     continue;
702     }
703     }
704    
705     fprintf(outfile, "Compiled regex%s loaded from %s\n",
706     do_flip? " (byte-inverted)" : "", p);
707    
708     /* Need to know if UTF-8 for printing data strings */
709    
710     new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
711     use_utf8 = (options & PCRE_UTF8) != 0;
712    
713     /* Now see if there is any following study data */
714    
715     if (true_study_size != 0)
716     {
717     pcre_study_data *psd;
718    
719     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
720     extra->flags = PCRE_EXTRA_STUDY_DATA;
721    
722     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
723     extra->study_data = psd;
724    
725     if (fread(psd, 1, true_study_size, f) != true_study_size)
726     {
727     FAIL_READ:
728     fprintf(outfile, "Failed to read data from %s\n", p);
729     if (extra != NULL) new_free(extra);
730     if (re != NULL) new_free(re);
731     fclose(f);
732     continue;
733     }
734     fprintf(outfile, "Study data loaded from %s\n", p);
735     do_study = 1; /* To get the data output if requested */
736     }
737     else fprintf(outfile, "No study data\n");
738    
739     fclose(f);
740     goto SHOW_INFO;
741     }
742    
743     /* In-line pattern (the usual case). Get the delimiter and seek the end of
744     the pattern; if is isn't complete, read more. */
745    
746 nigel 3 delimiter = *p++;
747    
748 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
749 nigel 3 {
750 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
751 nigel 3 goto SKIP_DATA;
752     }
753    
754     pp = p;
755    
756     for(;;)
757     {
758 nigel 29 while (*pp != 0)
759     {
760     if (*pp == '\\' && pp[1] != 0) pp++;
761     else if (*pp == delimiter) break;
762     pp++;
763     }
764 nigel 3 if (*pp != 0) break;
765    
766 nigel 69 len = BUFFER_SIZE - (pp - buffer);
767 nigel 3 if (len < 256)
768     {
769     fprintf(outfile, "** Expression too long - missing delimiter?\n");
770     goto SKIP_DATA;
771     }
772    
773     if (infile == stdin) printf(" > ");
774     if (fgets((char *)pp, len, infile) == NULL)
775     {
776     fprintf(outfile, "** Unexpected EOF\n");
777 nigel 11 done = 1;
778     goto CONTINUE;
779 nigel 3 }
780 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
781 nigel 3 }
782    
783 nigel 29 /* If the first character after the delimiter is backslash, make
784     the pattern end with backslash. This is purely to provide a way
785     of testing for the error message when a pattern ends with backslash. */
786    
787     if (pp[1] == '\\') *pp++ = '\\';
788    
789 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
790     for callouts. */
791 nigel 3
792     *pp++ = 0;
793 nigel 75 strcpy((char *)pbuffer, (char *)p);
794 nigel 3
795     /* Look for options after final delimiter */
796    
797     options = 0;
798     study_options = 0;
799 nigel 31 log_store = showstore; /* default from command line */
800    
801 nigel 3 while (*pp != 0)
802     {
803     switch (*pp++)
804     {
805 nigel 35 case 'g': do_g = 1; break;
806 nigel 3 case 'i': options |= PCRE_CASELESS; break;
807     case 'm': options |= PCRE_MULTILINE; break;
808     case 's': options |= PCRE_DOTALL; break;
809     case 'x': options |= PCRE_EXTENDED; break;
810 nigel 25
811 nigel 35 case '+': do_showrest = 1; break;
812 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
813 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
814 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
815 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
816 nigel 75 case 'F': do_flip = 1; break;
817 nigel 35 case 'G': do_G = 1; break;
818 nigel 25 case 'I': do_showinfo = 1; break;
819 nigel 31 case 'M': log_store = 1; break;
820 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
821 nigel 37
822     #if !defined NOPOSIX
823 nigel 3 case 'P': do_posix = 1; break;
824 nigel 37 #endif
825    
826 nigel 3 case 'S': do_study = 1; break;
827 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
828 nigel 3 case 'X': options |= PCRE_EXTRA; break;
829 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
830 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
831 nigel 25
832     case 'L':
833     ppp = pp;
834     while (*ppp != '\n' && *ppp != ' ') ppp++;
835     *ppp = 0;
836     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
837     {
838     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
839     goto SKIP_DATA;
840     }
841     tables = pcre_maketables();
842     pp = ppp;
843     break;
844    
845 nigel 75 case '>':
846     to_file = pp;
847     while (*pp != 0) pp++;
848     while (isspace(pp[-1])) pp--;
849     *pp = 0;
850     break;
851    
852 nigel 3 case '\n': case ' ': break;
853 nigel 75
854 nigel 3 default:
855     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
856     goto SKIP_DATA;
857     }
858     }
859    
860 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
861 nigel 25 timing, showing, or debugging options, nor the ability to pass over
862     local character tables. */
863 nigel 3
864 nigel 37 #if !defined NOPOSIX
865 nigel 3 if (posix || do_posix)
866     {
867     int rc;
868     int cflags = 0;
869 nigel 75
870 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
871     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
872     rc = regcomp(&preg, (char *)p, cflags);
873    
874     /* Compilation failed; go back for another re, skipping to blank line
875     if non-interactive. */
876    
877     if (rc != 0)
878     {
879 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
880 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
881     goto SKIP_DATA;
882     }
883     }
884    
885     /* Handle compiling via the native interface */
886    
887     else
888 nigel 37 #endif /* !defined NOPOSIX */
889    
890 nigel 3 {
891     if (timeit)
892     {
893     register int i;
894     clock_t time_taken;
895     clock_t start_time = clock();
896 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
897 nigel 3 {
898 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
899 nigel 3 if (re != NULL) free(re);
900     }
901     time_taken = clock() - start_time;
902 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
903 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
904     (double)CLOCKS_PER_SEC);
905 nigel 3 }
906    
907 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
908 nigel 3
909     /* Compilation failed; go back for another re, skipping to blank line
910     if non-interactive. */
911    
912     if (re == NULL)
913     {
914     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
915     SKIP_DATA:
916     if (infile != stdin)
917     {
918     for (;;)
919     {
920 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
921 nigel 11 {
922     done = 1;
923     goto CONTINUE;
924     }
925 nigel 3 len = (int)strlen((char *)buffer);
926     while (len > 0 && isspace(buffer[len-1])) len--;
927     if (len == 0) break;
928     }
929     fprintf(outfile, "\n");
930     }
931 nigel 25 goto CONTINUE;
932 nigel 3 }
933    
934 nigel 43 /* Compilation succeeded; print data if required. There are now two
935     info-returning functions. The old one has a limited interface and
936     returns only limited data. Check that it agrees with the newer one. */
937 nigel 3
938 nigel 63 if (log_store)
939     fprintf(outfile, "Memory allocation (code space): %d\n",
940     (int)(gotten_store -
941     sizeof(real_pcre) -
942     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
943    
944 nigel 75 /* Extract the size for possible writing before possibly flipping it,
945     and remember the store that was got. */
946    
947     true_size = ((real_pcre *)re)->size;
948     regex_gotten_store = gotten_store;
949    
950     /* If /S was present, study the regexp to generate additional info to
951     help with the matching. */
952    
953     if (do_study)
954     {
955     if (timeit)
956     {
957     register int i;
958     clock_t time_taken;
959     clock_t start_time = clock();
960     for (i = 0; i < LOOPREPEAT; i++)
961     extra = pcre_study(re, study_options, &error);
962     time_taken = clock() - start_time;
963     if (extra != NULL) free(extra);
964     fprintf(outfile, " Study time %.3f milliseconds\n",
965     (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
966     (double)CLOCKS_PER_SEC);
967     }
968     extra = pcre_study(re, study_options, &error);
969     if (error != NULL)
970     fprintf(outfile, "Failed to study: %s\n", error);
971     else if (extra != NULL)
972     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
973     }
974    
975     /* If the 'F' option was present, we flip the bytes of all the integer
976     fields in the regex data block and the study block. This is to make it
977     possible to test PCRE's handling of byte-flipped patterns, e.g. those
978     compiled on a different architecture. */
979    
980     if (do_flip)
981     {
982     real_pcre *rre = (real_pcre *)re;
983     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
984     rre->size = byteflip(rre->size, sizeof(rre->size));
985     rre->options = byteflip(rre->options, sizeof(rre->options));
986     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
987     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
988     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
989     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
990     rre->name_table_offset = byteflip(rre->name_table_offset,
991     sizeof(rre->name_table_offset));
992     rre->name_entry_size = byteflip(rre->name_entry_size,
993     sizeof(rre->name_entry_size));
994     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
995    
996     if (extra != NULL)
997     {
998     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
999     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1000     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1001     }
1002     }
1003    
1004     /* Extract information from the compiled data if required */
1005    
1006     SHOW_INFO:
1007    
1008 nigel 25 if (do_showinfo)
1009 nigel 3 {
1010 nigel 75 unsigned long int get_options, all_options;
1011 nigel 43 int old_first_char, old_options, old_count;
1012     int count, backrefmax, first_char, need_char;
1013 nigel 63 int nameentrysize, namecount;
1014     const uschar *nametable;
1015 nigel 3
1016 nigel 63 if (do_debug)
1017     {
1018     fprintf(outfile, "------------------------------------------------------------------\n");
1019     print_internals(re, outfile);
1020     }
1021 nigel 3
1022 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1023 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1024     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1025     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1026 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1027 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1028 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1029     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1030 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1031 nigel 43
1032     old_count = pcre_info(re, &old_options, &old_first_char);
1033 nigel 3 if (count < 0) fprintf(outfile,
1034 nigel 43 "Error %d from pcre_info()\n", count);
1035 nigel 3 else
1036     {
1037 nigel 43 if (old_count != count) fprintf(outfile,
1038     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1039     old_count);
1040 nigel 37
1041 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1042     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1043     first_char, old_first_char);
1044 nigel 37
1045 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1046     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1047     get_options, old_options);
1048 nigel 43 }
1049    
1050 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1051 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1052 nigel 75 size, regex_gotten_store);
1053 nigel 43
1054     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1055     if (backrefmax > 0)
1056     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1057 nigel 63
1058     if (namecount > 0)
1059     {
1060     fprintf(outfile, "Named capturing subpatterns:\n");
1061     while (namecount-- > 0)
1062     {
1063     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1064     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1065     GET2(nametable, 0));
1066     nametable += nameentrysize;
1067     }
1068     }
1069    
1070 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1071     to fish it out via out back door */
1072    
1073     all_options = ((real_pcre *)re)->options;
1074     if (do_flip)
1075     {
1076     all_options = byteflip(all_options, sizeof(all_options));
1077     }
1078    
1079     if ((all_options & PCRE_NOPARTIAL) != 0)
1080     fprintf(outfile, "Partial matching not supported\n");
1081    
1082 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1083 nigel 71 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
1084 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1085     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1086     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1087     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1088     ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1089     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1090     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1091     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1092 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1093     ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1094 nigel 43
1095     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1096     fprintf(outfile, "Case state changes\n");
1097    
1098     if (first_char == -1)
1099     {
1100     fprintf(outfile, "First char at start or follows \\n\n");
1101     }
1102     else if (first_char < 0)
1103     {
1104     fprintf(outfile, "No first char\n");
1105     }
1106     else
1107     {
1108 nigel 63 int ch = first_char & 255;
1109 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1110 nigel 63 "" : " (caseless)";
1111     if (isprint(ch))
1112     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1113 nigel 3 else
1114 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1115 nigel 43 }
1116 nigel 37
1117 nigel 43 if (need_char < 0)
1118     {
1119     fprintf(outfile, "No need char\n");
1120 nigel 3 }
1121 nigel 43 else
1122     {
1123 nigel 63 int ch = need_char & 255;
1124 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1125 nigel 63 "" : " (caseless)";
1126     if (isprint(ch))
1127     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1128 nigel 43 else
1129 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1130 nigel 43 }
1131 nigel 75
1132     /* Don't output study size; at present it is in any case a fixed
1133     value, but it varies, depending on the computer architecture, and
1134     so messes up the test suite. (And with the /F option, it might be
1135     flipped.) */
1136    
1137     if (do_study)
1138     {
1139     if (extra == NULL)
1140     fprintf(outfile, "Study returned NULL\n");
1141     else
1142     {
1143     uschar *start_bits = NULL;
1144     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1145    
1146     if (start_bits == NULL)
1147     fprintf(outfile, "No starting byte set\n");
1148     else
1149     {
1150     int i;
1151     int c = 24;
1152     fprintf(outfile, "Starting byte set: ");
1153     for (i = 0; i < 256; i++)
1154     {
1155     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1156     {
1157     if (c > 75)
1158     {
1159     fprintf(outfile, "\n ");
1160     c = 2;
1161     }
1162     if (isprint(i) && i != ' ')
1163     {
1164     fprintf(outfile, "%c ", i);
1165     c += 2;
1166     }
1167     else
1168     {
1169     fprintf(outfile, "\\x%02x ", i);
1170     c += 5;
1171     }
1172     }
1173     }
1174     fprintf(outfile, "\n");
1175     }
1176     }
1177     }
1178 nigel 3 }
1179    
1180 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1181     that is all. The first 8 bytes of the file are the regex length and then
1182     the study length, in big-endian order. */
1183 nigel 3
1184 nigel 75 if (to_file != NULL)
1185 nigel 3 {
1186 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1187     if (f == NULL)
1188 nigel 3 {
1189 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1190 nigel 3 }
1191 nigel 75 else
1192     {
1193     uschar sbuf[8];
1194     sbuf[0] = (true_size >> 24) & 255;
1195     sbuf[1] = (true_size >> 16) & 255;
1196     sbuf[2] = (true_size >> 8) & 255;
1197     sbuf[3] = (true_size) & 255;
1198 nigel 3
1199 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1200     sbuf[5] = (true_study_size >> 16) & 255;
1201     sbuf[6] = (true_study_size >> 8) & 255;
1202     sbuf[7] = (true_study_size) & 255;
1203 nigel 3
1204 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1205     fwrite(re, 1, true_size, f) < true_size)
1206     {
1207     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1208     }
1209 nigel 3 else
1210     {
1211 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1212     if (extra != NULL)
1213 nigel 3 {
1214 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1215     true_study_size)
1216 nigel 3 {
1217 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1218     strerror(errno));
1219 nigel 3 }
1220 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1221 nigel 3 }
1222     }
1223 nigel 75 fclose(f);
1224 nigel 3 }
1225 nigel 75 continue; /* With next regex */
1226 nigel 3 }
1227 nigel 75 } /* End of non-POSIX compile */
1228 nigel 3
1229     /* Read data lines and test them */
1230    
1231     for (;;)
1232     {
1233 nigel 9 unsigned char *q;
1234 nigel 35 unsigned char *bptr = dbuffer;
1235 nigel 57 int *use_offsets = offsets;
1236 nigel 53 int use_size_offsets = size_offsets;
1237 nigel 63 int callout_data = 0;
1238     int callout_data_set = 0;
1239 nigel 3 int count, c;
1240 nigel 29 int copystrings = 0;
1241 nigel 63 int find_match_limit = 0;
1242 nigel 29 int getstrings = 0;
1243     int getlist = 0;
1244 nigel 39 int gmatched = 0;
1245 nigel 35 int start_offset = 0;
1246 nigel 41 int g_notempty = 0;
1247 nigel 3
1248     options = 0;
1249    
1250 nigel 63 pcre_callout = callout;
1251     first_callout = 1;
1252     callout_extra = 0;
1253     callout_count = 0;
1254     callout_fail_count = 999999;
1255     callout_fail_id = -1;
1256 nigel 73 show_malloc = 0;
1257 nigel 63
1258 nigel 35 if (infile == stdin) printf("data> ");
1259 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1260 nigel 11 {
1261     done = 1;
1262     goto CONTINUE;
1263     }
1264 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1265 nigel 3
1266     len = (int)strlen((char *)buffer);
1267     while (len > 0 && isspace(buffer[len-1])) len--;
1268     buffer[len] = 0;
1269     if (len == 0) break;
1270    
1271     p = buffer;
1272     while (isspace(*p)) p++;
1273    
1274 nigel 9 q = dbuffer;
1275 nigel 3 while ((c = *p++) != 0)
1276     {
1277     int i = 0;
1278     int n = 0;
1279 nigel 63
1280 nigel 3 if (c == '\\') switch ((c = *p++))
1281     {
1282     case 'a': c = 7; break;
1283     case 'b': c = '\b'; break;
1284     case 'e': c = 27; break;
1285     case 'f': c = '\f'; break;
1286     case 'n': c = '\n'; break;
1287     case 'r': c = '\r'; break;
1288     case 't': c = '\t'; break;
1289     case 'v': c = '\v'; break;
1290    
1291     case '0': case '1': case '2': case '3':
1292     case '4': case '5': case '6': case '7':
1293     c -= '0';
1294     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1295     c = c * 8 + *p++ - '0';
1296     break;
1297    
1298     case 'x':
1299 nigel 49
1300     /* Handle \x{..} specially - new Perl thing for utf8 */
1301    
1302     if (*p == '{')
1303     {
1304     unsigned char *pt = p;
1305     c = 0;
1306     while (isxdigit(*(++pt)))
1307     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1308     if (*pt == '}')
1309     {
1310 nigel 67 unsigned char buff8[8];
1311 nigel 49 int ii, utn;
1312 nigel 67 utn = ord2utf8(c, buff8);
1313     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1314     c = buff8[ii]; /* Last byte */
1315 nigel 49 p = pt + 1;
1316     break;
1317     }
1318     /* Not correct form; fall through */
1319     }
1320    
1321     /* Ordinary \x */
1322    
1323 nigel 3 c = 0;
1324     while (i++ < 2 && isxdigit(*p))
1325     {
1326     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1327     p++;
1328     }
1329     break;
1330    
1331 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1332 nigel 3 p--;
1333     continue;
1334    
1335 nigel 75 case '>':
1336     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1337     continue;
1338    
1339 nigel 3 case 'A': /* Option setting */
1340     options |= PCRE_ANCHORED;
1341     continue;
1342    
1343     case 'B':
1344     options |= PCRE_NOTBOL;
1345     continue;
1346    
1347 nigel 29 case 'C':
1348 nigel 63 if (isdigit(*p)) /* Set copy string */
1349     {
1350     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1351     copystrings |= 1 << n;
1352     }
1353     else if (isalnum(*p))
1354     {
1355     uschar name[256];
1356 nigel 67 uschar *npp = name;
1357     while (isalnum(*p)) *npp++ = *p++;
1358     *npp = 0;
1359 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1360 nigel 63 if (n < 0)
1361     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1362     else copystrings |= 1 << n;
1363     }
1364     else if (*p == '+')
1365     {
1366     callout_extra = 1;
1367     p++;
1368     }
1369     else if (*p == '-')
1370     {
1371     pcre_callout = NULL;
1372     p++;
1373     }
1374     else if (*p == '!')
1375     {
1376     callout_fail_id = 0;
1377     p++;
1378     while(isdigit(*p))
1379     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1380     callout_fail_count = 0;
1381     if (*p == '!')
1382     {
1383     p++;
1384     while(isdigit(*p))
1385     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1386     }
1387     }
1388     else if (*p == '*')
1389     {
1390     int sign = 1;
1391     callout_data = 0;
1392     if (*(++p) == '-') { sign = -1; p++; }
1393     while(isdigit(*p))
1394     callout_data = callout_data * 10 + *p++ - '0';
1395     callout_data *= sign;
1396     callout_data_set = 1;
1397     }
1398 nigel 29 continue;
1399    
1400     case 'G':
1401 nigel 63 if (isdigit(*p))
1402     {
1403     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404     getstrings |= 1 << n;
1405     }
1406     else if (isalnum(*p))
1407     {
1408     uschar name[256];
1409 nigel 67 uschar *npp = name;
1410     while (isalnum(*p)) *npp++ = *p++;
1411     *npp = 0;
1412 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1413 nigel 63 if (n < 0)
1414     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1415     else getstrings |= 1 << n;
1416     }
1417 nigel 29 continue;
1418    
1419     case 'L':
1420     getlist = 1;
1421     continue;
1422    
1423 nigel 63 case 'M':
1424     find_match_limit = 1;
1425     continue;
1426    
1427 nigel 37 case 'N':
1428     options |= PCRE_NOTEMPTY;
1429     continue;
1430    
1431 nigel 3 case 'O':
1432     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1433 nigel 53 if (n > size_offsets_max)
1434     {
1435     size_offsets_max = n;
1436 nigel 57 free(offsets);
1437 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1438 nigel 53 if (offsets == NULL)
1439     {
1440     printf("** Failed to get %d bytes of memory for offsets vector\n",
1441     size_offsets_max * sizeof(int));
1442     return 1;
1443     }
1444     }
1445     use_size_offsets = n;
1446 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1447 nigel 3 continue;
1448    
1449 nigel 75 case 'P':
1450     options |= PCRE_PARTIAL;
1451     continue;
1452    
1453 nigel 73 case 'S':
1454     show_malloc = 1;
1455     continue;
1456    
1457 nigel 3 case 'Z':
1458     options |= PCRE_NOTEOL;
1459     continue;
1460 nigel 71
1461     case '?':
1462     options |= PCRE_NO_UTF8_CHECK;
1463     continue;
1464 nigel 3 }
1465 nigel 9 *q++ = c;
1466 nigel 3 }
1467 nigel 9 *q = 0;
1468     len = q - dbuffer;
1469 nigel 3
1470     /* Handle matching via the POSIX interface, which does not
1471 nigel 63 support timing or playing with the match limit or callout data. */
1472 nigel 3
1473 nigel 37 #if !defined NOPOSIX
1474 nigel 3 if (posix || do_posix)
1475     {
1476     int rc;
1477     int eflags = 0;
1478 nigel 63 regmatch_t *pmatch = NULL;
1479     if (use_size_offsets > 0)
1480 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1481 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1482     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1483    
1484 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1485 nigel 3
1486     if (rc != 0)
1487     {
1488 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1489 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1490     }
1491     else
1492     {
1493 nigel 7 size_t i;
1494 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1495 nigel 3 {
1496     if (pmatch[i].rm_so >= 0)
1497     {
1498 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1499 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1500     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1501 nigel 3 fprintf(outfile, "\n");
1502 nigel 35 if (i == 0 && do_showrest)
1503     {
1504     fprintf(outfile, " 0+ ");
1505 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1506     outfile);
1507 nigel 35 fprintf(outfile, "\n");
1508     }
1509 nigel 3 }
1510     }
1511     }
1512 nigel 53 free(pmatch);
1513 nigel 3 }
1514    
1515 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1516 nigel 3
1517 nigel 37 else
1518     #endif /* !defined NOPOSIX */
1519    
1520 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1521 nigel 3 {
1522     if (timeit)
1523     {
1524     register int i;
1525     clock_t time_taken;
1526     clock_t start_time = clock();
1527 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1528 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1529 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1530 nigel 3 time_taken = clock() - start_time;
1531 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1532 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1533     (double)CLOCKS_PER_SEC);
1534 nigel 3 }
1535    
1536 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1537     varying limits in order to find the minimum value. */
1538    
1539     if (find_match_limit)
1540     {
1541     int min = 0;
1542     int mid = 64;
1543     int max = -1;
1544    
1545     if (extra == NULL)
1546     {
1547 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1548 nigel 63 extra->flags = 0;
1549     }
1550     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1551    
1552     for (;;)
1553     {
1554     extra->match_limit = mid;
1555     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1556     options | g_notempty, use_offsets, use_size_offsets);
1557     if (count == PCRE_ERROR_MATCHLIMIT)
1558     {
1559     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1560     min = mid;
1561     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1562     }
1563 nigel 75 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1564     count == PCRE_ERROR_PARTIAL)
1565 nigel 63 {
1566     if (mid == min + 1)
1567     {
1568     fprintf(outfile, "Minimum match limit = %d\n", mid);
1569     break;
1570     }
1571     /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1572     max = mid;
1573     mid = (min + mid)/2;
1574     }
1575     else break; /* Some other error */
1576     }
1577    
1578     extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1579     }
1580    
1581     /* If callout_data is set, use the interface with additional data */
1582    
1583     else if (callout_data_set)
1584     {
1585     if (extra == NULL)
1586     {
1587 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1588 nigel 63 extra->flags = 0;
1589     }
1590     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1591 nigel 71 extra->callout_data = &callout_data;
1592 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1593     options | g_notempty, use_offsets, use_size_offsets);
1594     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1595     }
1596    
1597     /* The normal case is just to do the match once, with the default
1598     value of match_limit. */
1599    
1600 nigel 75 else
1601     {
1602     count = pcre_exec(re, extra, (char *)bptr, len,
1603     start_offset, options | g_notempty, use_offsets, use_size_offsets);
1604     }
1605 nigel 3
1606     if (count == 0)
1607     {
1608     fprintf(outfile, "Matched, but too many substrings\n");
1609 nigel 53 count = use_size_offsets/3;
1610 nigel 3 }
1611    
1612 nigel 39 /* Matched */
1613    
1614 nigel 3 if (count >= 0)
1615     {
1616     int i;
1617 nigel 29 for (i = 0; i < count * 2; i += 2)
1618 nigel 3 {
1619 nigel 57 if (use_offsets[i] < 0)
1620 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1621     else
1622     {
1623     fprintf(outfile, "%2d: ", i/2);
1624 nigel 63 (void)pchars(bptr + use_offsets[i],
1625     use_offsets[i+1] - use_offsets[i], outfile);
1626 nigel 3 fprintf(outfile, "\n");
1627 nigel 35 if (i == 0)
1628     {
1629     if (do_showrest)
1630     {
1631     fprintf(outfile, " 0+ ");
1632 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1633     outfile);
1634 nigel 35 fprintf(outfile, "\n");
1635     }
1636     }
1637 nigel 3 }
1638     }
1639 nigel 29
1640     for (i = 0; i < 32; i++)
1641     {
1642     if ((copystrings & (1 << i)) != 0)
1643     {
1644 nigel 37 char copybuffer[16];
1645 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1646 nigel 37 i, copybuffer, sizeof(copybuffer));
1647 nigel 29 if (rc < 0)
1648     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1649     else
1650 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1651 nigel 29 }
1652     }
1653    
1654     for (i = 0; i < 32; i++)
1655     {
1656     if ((getstrings & (1 << i)) != 0)
1657     {
1658     const char *substring;
1659 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1660 nigel 29 i, &substring);
1661     if (rc < 0)
1662     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1663     else
1664     {
1665     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1666 nigel 49 /* free((void *)substring); */
1667     pcre_free_substring(substring);
1668 nigel 29 }
1669     }
1670     }
1671    
1672     if (getlist)
1673     {
1674     const char **stringlist;
1675 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1676 nigel 29 &stringlist);
1677     if (rc < 0)
1678     fprintf(outfile, "get substring list failed %d\n", rc);
1679     else
1680     {
1681     for (i = 0; i < count; i++)
1682     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1683     if (stringlist[i] != NULL)
1684     fprintf(outfile, "string list not terminated by NULL\n");
1685 nigel 49 /* free((void *)stringlist); */
1686     pcre_free_substring_list(stringlist);
1687 nigel 29 }
1688     }
1689 nigel 39 }
1690 nigel 29
1691 nigel 75 /* There was a partial match */
1692    
1693     else if (count == PCRE_ERROR_PARTIAL)
1694     {
1695     fprintf(outfile, "Partial match\n");
1696     break; /* Out of the /g loop */
1697     }
1698    
1699 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1700 nigel 47 g_notempty after a null match, this is not necessarily the end.
1701 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
1702     matching, the advance must be one character, not one byte. Fudge the
1703     offset values to achieve this. We won't be at the end of the string -
1704     that was checked before setting g_notempty. */
1705 nigel 39
1706 nigel 3 else
1707     {
1708 nigel 41 if (g_notempty != 0)
1709 nigel 35 {
1710 nigel 73 int onechar = 1;
1711 nigel 57 use_offsets[0] = start_offset;
1712 nigel 73 if (use_utf8)
1713     {
1714     while (start_offset + onechar < len)
1715     {
1716     int tb = bptr[start_offset+onechar];
1717     if (tb <= 127) break;
1718     tb &= 0xc0;
1719     if (tb != 0 && tb != 0xc0) onechar++;
1720     }
1721     }
1722     use_offsets[1] = start_offset + onechar;
1723 nigel 35 }
1724 nigel 41 else
1725     {
1726 nigel 73 if (count == PCRE_ERROR_NOMATCH)
1727 nigel 41 {
1728 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
1729 nigel 41 }
1730 nigel 73 else fprintf(outfile, "Error %d\n", count);
1731 nigel 41 break; /* Out of the /g loop */
1732     }
1733 nigel 3 }
1734 nigel 35
1735 nigel 39 /* If not /g or /G we are done */
1736    
1737     if (!do_g && !do_G) break;
1738    
1739 nigel 41 /* If we have matched an empty string, first check to see if we are at
1740     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1741     what Perl's /g options does. This turns out to be rather cunning. First
1742 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1743     same point. If this fails (picked up above) we advance to the next
1744     character. */
1745 nigel 39
1746 nigel 41 g_notempty = 0;
1747 nigel 57 if (use_offsets[0] == use_offsets[1])
1748 nigel 41 {
1749 nigel 57 if (use_offsets[0] == len) break;
1750 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1751 nigel 41 }
1752 nigel 39
1753     /* For /g, update the start offset, leaving the rest alone */
1754    
1755 nigel 57 if (do_g) start_offset = use_offsets[1];
1756 nigel 39
1757     /* For /G, update the pointer and length */
1758    
1759     else
1760 nigel 35 {
1761 nigel 57 bptr += use_offsets[1];
1762     len -= use_offsets[1];
1763 nigel 35 }
1764 nigel 39 } /* End of loop for /g and /G */
1765     } /* End of loop for data lines */
1766 nigel 3
1767 nigel 11 CONTINUE:
1768 nigel 37
1769     #if !defined NOPOSIX
1770 nigel 3 if (posix || do_posix) regfree(&preg);
1771 nigel 37 #endif
1772    
1773 nigel 3 if (re != NULL) free(re);
1774     if (extra != NULL) free(extra);
1775 nigel 25 if (tables != NULL)
1776     {
1777     free((void *)tables);
1778     setlocale(LC_CTYPE, "C");
1779     }
1780 nigel 3 }
1781    
1782 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
1783 nigel 3 return 0;
1784     }
1785    
1786     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12