/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (hide annotations) (download)
Sat Feb 24 21:41:21 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 55784 byte(s)
Load pcre-6.5 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
48 nigel 37
49 nigel 85 /* We include pcre_internal.h because we need the internal info for displaying
50     the results of pcre_study() and we also need to know about the internal
51     macros, structures, and other internal data values; pcretest has "inside
52     information" compared to a program that strictly follows the PCRE API. */
53 nigel 77
54     #include "pcre_internal.h"
55    
56 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
57     two copies, we include the source file here, changing the names of the external
58     symbols to prevent clashes. */
59 nigel 77
60 nigel 85 #define _pcre_utf8_table1 utf8_table1
61     #define _pcre_utf8_table1_size utf8_table1_size
62     #define _pcre_utf8_table2 utf8_table2
63     #define _pcre_utf8_table3 utf8_table3
64     #define _pcre_utf8_table4 utf8_table4
65     #define _pcre_utt utt
66     #define _pcre_utt_size utt_size
67     #define _pcre_OP_lengths OP_lengths
68    
69     #include "pcre_tables.c"
70    
71     /* We also need the pcre_printint() function for printing out compiled
72     patterns. This function is in a separate file so that it can be included in
73     pcre_compile.c when that module is compiled with debugging enabled. */
74    
75     #include "pcre_printint.src"
76    
77    
78 nigel 37 /* It is possible to compile this test program without including support for
79     testing the POSIX interface, though this is not available via the standard
80     Makefile. */
81    
82     #if !defined NOPOSIX
83 nigel 3 #include "pcreposix.h"
84 nigel 37 #endif
85 nigel 3
86 nigel 79 /* It is also possible, for the benefit of the version imported into Exim, to
87     build pcretest without support for UTF8 (define NOUTF8), without the interface
88     to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89     function (define NOINFOCHECK). */
90    
91    
92 nigel 85 /* Other parameters */
93    
94 nigel 3 #ifndef CLOCKS_PER_SEC
95     #ifdef CLK_TCK
96     #define CLOCKS_PER_SEC CLK_TCK
97     #else
98     #define CLOCKS_PER_SEC 100
99     #endif
100     #endif
101    
102 nigel 75 #define LOOPREPEAT 500000
103 nigel 3
104 nigel 69 #define BUFFER_SIZE 30000
105 nigel 75 #define PBUFFER_SIZE BUFFER_SIZE
106 nigel 73 #define DBUFFER_SIZE BUFFER_SIZE
107 nigel 23
108 nigel 69
109 nigel 85 /* Static variables */
110    
111 nigel 3 static FILE *outfile;
112     static int log_store = 0;
113 nigel 63 static int callout_count;
114     static int callout_extra;
115     static int callout_fail_count;
116     static int callout_fail_id;
117     static int first_callout;
118 nigel 73 static int show_malloc;
119 nigel 67 static int use_utf8;
120 nigel 43 static size_t gotten_store;
121 nigel 3
122 nigel 75 static uschar *pbuffer = NULL;
123 nigel 3
124 nigel 75
125 nigel 49
126     /*************************************************
127 nigel 63 * Read number from string *
128     *************************************************/
129    
130     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
131     around with conditional compilation, just do the job by hand. It is only used
132     for unpicking the -o argument, so just keep it simple.
133    
134     Arguments:
135     str string to be converted
136     endptr where to put the end pointer
137    
138     Returns: the unsigned long
139     */
140    
141     static int
142     get_value(unsigned char *str, unsigned char **endptr)
143     {
144     int result = 0;
145     while(*str != 0 && isspace(*str)) str++;
146     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
147     *endptr = str;
148     return(result);
149     }
150    
151    
152    
153 nigel 49
154     /*************************************************
155     * Convert UTF-8 string to value *
156     *************************************************/
157    
158     /* This function takes one or more bytes that represents a UTF-8 character,
159     and returns the value of the character.
160    
161     Argument:
162     buffer a pointer to the byte vector
163     vptr a pointer to an int to receive the value
164    
165     Returns: > 0 => the number of bytes consumed
166     -6 to 0 => malformed UTF-8 character at offset = (-return)
167     */
168    
169 nigel 79 #if !defined NOUTF8
170    
171 nigel 67 static int
172 nigel 49 utf82ord(unsigned char *buffer, int *vptr)
173     {
174     int c = *buffer++;
175     int d = c;
176     int i, j, s;
177    
178     for (i = -1; i < 6; i++) /* i is number of additional bytes */
179     {
180     if ((d & 0x80) == 0) break;
181     d <<= 1;
182     }
183    
184     if (i == -1) { *vptr = c; return 1; } /* ascii character */
185     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
186    
187     /* i now has a value in the range 1-5 */
188    
189 nigel 59 s = 6*i;
190 nigel 85 d = (c & utf8_table3[i]) << s;
191 nigel 49
192     for (j = 0; j < i; j++)
193     {
194     c = *buffer++;
195     if ((c & 0xc0) != 0x80) return -(j+1);
196 nigel 59 s -= 6;
197 nigel 49 d |= (c & 0x3f) << s;
198     }
199    
200     /* Check that encoding was the correct unique one */
201    
202 nigel 85 for (j = 0; j < utf8_table1_size; j++)
203     if (d <= utf8_table1[j]) break;
204 nigel 49 if (j != i) return -(i+1);
205    
206     /* Valid value */
207    
208     *vptr = d;
209     return i+1;
210     }
211    
212 nigel 79 #endif
213 nigel 49
214    
215 nigel 79
216 nigel 63 /*************************************************
217 nigel 85 * Convert character value to UTF-8 *
218     *************************************************/
219    
220     /* This function takes an integer value in the range 0 - 0x7fffffff
221     and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223     Arguments:
224     cvalue the character value
225     buffer pointer to buffer for result - at least 6 bytes long
226    
227     Returns: number of characters placed in the buffer
228     */
229    
230     static int
231     ord2utf8(int cvalue, uschar *buffer)
232     {
233     register int i, j;
234     for (i = 0; i < utf8_table1_size; i++)
235     if (cvalue <= utf8_table1[i]) break;
236     buffer += i;
237     for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242     *buffer = utf8_table2[i] | cvalue;
243     return i + 1;
244     }
245    
246    
247    
248     /*************************************************
249 nigel 63 * Print character string *
250     *************************************************/
251 nigel 49
252 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
253     mode. Yields number of characters printed. If handed a NULL file, just counts
254     chars without printing. */
255 nigel 49
256 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
257 nigel 3 {
258 nigel 85 int c = 0;
259 nigel 63 int yield = 0;
260 nigel 3
261 nigel 63 while (length-- > 0)
262 nigel 3 {
263 nigel 79 #if !defined NOUTF8
264 nigel 67 if (use_utf8)
265 nigel 63 {
266     int rc = utf82ord(p, &c);
267 nigel 3
268 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
269     {
270     length -= rc - 1;
271     p += rc;
272     if (c < 256 && isprint(c))
273     {
274     if (f != NULL) fprintf(f, "%c", c);
275     yield++;
276     }
277     else
278     {
279     int n;
280     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
281     yield += n;
282     }
283     continue;
284     }
285     }
286 nigel 79 #endif
287 nigel 3
288 nigel 63 /* Not UTF-8, or malformed UTF-8 */
289    
290     if (isprint(c = *(p++)))
291 nigel 3 {
292 nigel 63 if (f != NULL) fprintf(f, "%c", c);
293     yield++;
294 nigel 3 }
295 nigel 63 else
296 nigel 3 {
297 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
298     yield += 4;
299     }
300     }
301 nigel 3
302 nigel 63 return yield;
303     }
304 nigel 23
305 nigel 3
306 nigel 23
307 nigel 63 /*************************************************
308     * Callout function *
309     *************************************************/
310 nigel 3
311 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
312     the match. Yield zero unless more callouts than the fail count, or the callout
313     data is not zero. */
314 nigel 3
315 nigel 63 static int callout(pcre_callout_block *cb)
316     {
317     FILE *f = (first_callout | callout_extra)? outfile : NULL;
318 nigel 75 int i, pre_start, post_start, subject_length;
319 nigel 3
320 nigel 63 if (callout_extra)
321     {
322     fprintf(f, "Callout %d: last capture = %d\n",
323     cb->callout_number, cb->capture_last);
324 nigel 3
325 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
326     {
327     if (cb->offset_vector[i] < 0)
328     fprintf(f, "%2d: <unset>\n", i/2);
329     else
330     {
331     fprintf(f, "%2d: ", i/2);
332     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
333     cb->offset_vector[i+1] - cb->offset_vector[i], f);
334     fprintf(f, "\n");
335     }
336     }
337     }
338 nigel 3
339 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
340     datails. On subsequent calls in the same match, we use pchars just to find the
341     printed lengths of the substrings. */
342 nigel 3
343 nigel 63 if (f != NULL) fprintf(f, "--->");
344 nigel 3
345 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
346     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347     cb->current_position - cb->start_match, f);
348 nigel 3
349 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350    
351 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352     cb->subject_length - cb->current_position, f);
353 nigel 3
354 nigel 63 if (f != NULL) fprintf(f, "\n");
355 nigel 9
356 nigel 63 /* Always print appropriate indicators, with callout number if not already
357 nigel 75 shown. For automatic callouts, show the pattern offset. */
358 nigel 3
359 nigel 75 if (cb->callout_number == 255)
360     {
361     fprintf(outfile, "%+3d ", cb->pattern_position);
362     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
363     }
364     else
365     {
366     if (callout_extra) fprintf(outfile, " ");
367     else fprintf(outfile, "%3d ", cb->callout_number);
368     }
369 nigel 3
370 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371     fprintf(outfile, "^");
372 nigel 3
373 nigel 63 if (post_start > 0)
374     {
375     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
376     fprintf(outfile, "^");
377 nigel 3 }
378    
379 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380     fprintf(outfile, " ");
381    
382     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383     pbuffer + cb->pattern_position);
384    
385 nigel 63 fprintf(outfile, "\n");
386     first_callout = 0;
387 nigel 3
388 nigel 71 if (cb->callout_data != NULL)
389 nigel 49 {
390 nigel 71 int callout_data = *((int *)(cb->callout_data));
391     if (callout_data != 0)
392     {
393     fprintf(outfile, "Callout data = %d\n", callout_data);
394     return callout_data;
395     }
396 nigel 63 }
397 nigel 49
398 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
399     (++callout_count >= callout_fail_count)? 1 : 0;
400 nigel 3 }
401    
402    
403 nigel 63 /*************************************************
404 nigel 73 * Local malloc functions *
405 nigel 63 *************************************************/
406 nigel 3
407     /* Alternative malloc function, to test functionality and show the size of the
408     compiled re. */
409    
410     static void *new_malloc(size_t size)
411     {
412 nigel 73 void *block = malloc(size);
413 nigel 43 gotten_store = size;
414 nigel 73 if (show_malloc)
415 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
416 nigel 73 return block;
417 nigel 3 }
418    
419 nigel 73 static void new_free(void *block)
420     {
421     if (show_malloc)
422     fprintf(outfile, "free %p\n", block);
423     free(block);
424     }
425 nigel 3
426    
427 nigel 73 /* For recursion malloc/free, to test stacking calls */
428    
429     static void *stack_malloc(size_t size)
430     {
431     void *block = malloc(size);
432     if (show_malloc)
433 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434 nigel 73 return block;
435     }
436    
437     static void stack_free(void *block)
438     {
439     if (show_malloc)
440     fprintf(outfile, "stack_free %p\n", block);
441     free(block);
442     }
443    
444    
445 nigel 63 /*************************************************
446     * Call pcre_fullinfo() *
447     *************************************************/
448 nigel 43
449     /* Get one piece of information from the pcre_fullinfo() function */
450    
451     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
452     {
453     int rc;
454     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
455     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
456     }
457    
458    
459    
460 nigel 63 /*************************************************
461 nigel 75 * Byte flipping function *
462     *************************************************/
463    
464     static long int
465     byteflip(long int value, int n)
466     {
467     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468     return ((value & 0x000000ff) << 24) |
469     ((value & 0x0000ff00) << 8) |
470     ((value & 0x00ff0000) >> 8) |
471     ((value & 0xff000000) >> 24);
472     }
473    
474    
475    
476    
477     /*************************************************
478 nigel 87 * Check match or recursion limit *
479     *************************************************/
480    
481     static int
482     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
483     int start_offset, int options, int *use_offsets, int use_size_offsets,
484     int flag, unsigned long int *limit, int errnumber, const char *msg)
485     {
486     int count;
487     int min = 0;
488     int mid = 64;
489     int max = -1;
490    
491     extra->flags |= flag;
492    
493     for (;;)
494     {
495     *limit = mid;
496    
497     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
498     use_offsets, use_size_offsets);
499    
500     if (count == errnumber)
501     {
502     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
503     min = mid;
504     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
505     }
506    
507     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
508     count == PCRE_ERROR_PARTIAL)
509     {
510     if (mid == min + 1)
511     {
512     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
513     break;
514     }
515     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
516     max = mid;
517     mid = (min + mid)/2;
518     }
519     else break; /* Some other error */
520     }
521    
522     extra->flags &= ~flag;
523     return count;
524     }
525    
526    
527    
528     /*************************************************
529 nigel 63 * Main Program *
530     *************************************************/
531 nigel 43
532 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
533     consist of a regular expression, in delimiters and optionally followed by
534     options, followed by a set of test data, terminated by an empty line. */
535    
536     int main(int argc, char **argv)
537     {
538     FILE *infile = stdin;
539     int options = 0;
540     int study_options = 0;
541     int op = 1;
542     int timeit = 0;
543     int showinfo = 0;
544 nigel 31 int showstore = 0;
545 nigel 87 int quiet = 0;
546 nigel 53 int size_offsets = 45;
547     int size_offsets_max;
548 nigel 77 int *offsets = NULL;
549 nigel 53 #if !defined NOPOSIX
550 nigel 3 int posix = 0;
551 nigel 53 #endif
552 nigel 3 int debug = 0;
553 nigel 11 int done = 0;
554 nigel 77 int all_use_dfa = 0;
555     int yield = 0;
556 nigel 3
557 nigel 69 unsigned char *buffer;
558     unsigned char *dbuffer;
559    
560     /* Get buffers from malloc() so that Electric Fence will check their misuse
561     when I am debugging. */
562    
563 nigel 71 buffer = (unsigned char *)malloc(BUFFER_SIZE);
564     dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
565 nigel 75 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
566 nigel 69
567 nigel 75 /* The outfile variable is static so that new_malloc can use it. The _setmode()
568     stuff is some magic that I don't understand, but which apparently does good
569     things in Windows. It's related to line terminations. */
570 nigel 3
571 nigel 75 #if defined(_WIN32) || defined(WIN32)
572     _setmode( _fileno( stdout ), 0x8000 );
573     #endif /* defined(_WIN32) || defined(WIN32) */
574    
575 nigel 3 outfile = stdout;
576    
577     /* Scan options */
578    
579     while (argc > 1 && argv[op][0] == '-')
580     {
581 nigel 63 unsigned char *endptr;
582 nigel 53
583 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
584     showstore = 1;
585 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
586 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
587 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
588     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
589 nigel 79 #if !defined NODFA
590 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
591 nigel 79 #endif
592 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
593 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
594     *endptr == 0))
595 nigel 53 {
596     op++;
597     argc--;
598     }
599     #if !defined NOPOSIX
600 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
601 nigel 53 #endif
602 nigel 63 else if (strcmp(argv[op], "-C") == 0)
603     {
604     int rc;
605     printf("PCRE version %s\n", pcre_version());
606     printf("Compiled with\n");
607     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
608     printf(" %sUTF-8 support\n", rc? "" : "No ");
609 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
610     printf(" %sUnicode properties support\n", rc? "" : "No ");
611 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
612     printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
613     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
614     printf(" Internal link size = %d\n", rc);
615     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
616     printf(" POSIX malloc threshold = %d\n", rc);
617     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
618     printf(" Default match limit = %d\n", rc);
619 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
620     printf(" Default recursion depth limit = %d\n", rc);
621 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
622     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
623 nigel 63 exit(0);
624     }
625 nigel 3 else
626     {
627 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
628     printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
629 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
630 nigel 77 printf(" -d debug: show compiled code; implies -i\n");
631 nigel 79 #if !defined NODFA
632 nigel 77 printf(" -dfa force DFA matching for all subjects\n");
633 nigel 79 #endif
634 nigel 77 printf(" -i show information about compiled pattern\n"
635 nigel 75 " -m output memory used information\n"
636 nigel 53 " -o <n> set size of offsets vector to <n>\n");
637     #if !defined NOPOSIX
638     printf(" -p use POSIX interface\n");
639     #endif
640 nigel 75 printf(" -s output store (memory) used information\n"
641 nigel 53 " -t time compilation and execution\n");
642 nigel 77 yield = 1;
643     goto EXIT;
644 nigel 3 }
645     op++;
646     argc--;
647     }
648    
649 nigel 53 /* Get the store for the offsets vector, and remember what it was */
650    
651     size_offsets_max = size_offsets;
652 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
653 nigel 53 if (offsets == NULL)
654     {
655     printf("** Failed to get %d bytes of memory for offsets vector\n",
656     size_offsets_max * sizeof(int));
657 nigel 77 yield = 1;
658     goto EXIT;
659 nigel 53 }
660    
661 nigel 3 /* Sort out the input and output files */
662    
663     if (argc > 1)
664     {
665 nigel 75 infile = fopen(argv[op], "rb");
666 nigel 3 if (infile == NULL)
667     {
668     printf("** Failed to open %s\n", argv[op]);
669 nigel 77 yield = 1;
670     goto EXIT;
671 nigel 3 }
672     }
673    
674     if (argc > 2)
675     {
676 nigel 75 outfile = fopen(argv[op+1], "wb");
677 nigel 3 if (outfile == NULL)
678     {
679     printf("** Failed to open %s\n", argv[op+1]);
680 nigel 77 yield = 1;
681     goto EXIT;
682 nigel 3 }
683     }
684    
685     /* Set alternative malloc function */
686    
687     pcre_malloc = new_malloc;
688 nigel 73 pcre_free = new_free;
689     pcre_stack_malloc = stack_malloc;
690     pcre_stack_free = stack_free;
691 nigel 3
692 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
693 nigel 3
694 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
695 nigel 3
696     /* Main loop */
697    
698 nigel 11 while (!done)
699 nigel 3 {
700     pcre *re = NULL;
701     pcre_extra *extra = NULL;
702 nigel 37
703     #if !defined NOPOSIX /* There are still compilers that require no indent */
704 nigel 3 regex_t preg;
705 nigel 45 int do_posix = 0;
706 nigel 37 #endif
707    
708 nigel 7 const char *error;
709 nigel 25 unsigned char *p, *pp, *ppp;
710 nigel 75 unsigned char *to_file = NULL;
711 nigel 53 const unsigned char *tables = NULL;
712 nigel 75 unsigned long int true_size, true_study_size = 0;
713     size_t size, regex_gotten_store;
714 nigel 3 int do_study = 0;
715 nigel 25 int do_debug = debug;
716 nigel 35 int do_G = 0;
717     int do_g = 0;
718 nigel 25 int do_showinfo = showinfo;
719 nigel 35 int do_showrest = 0;
720 nigel 75 int do_flip = 0;
721 nigel 3 int erroroffset, len, delimiter;
722    
723 nigel 67 use_utf8 = 0;
724 nigel 63
725 nigel 3 if (infile == stdin) printf(" re> ");
726 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
727 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
728 nigel 63 fflush(outfile);
729 nigel 3
730     p = buffer;
731     while (isspace(*p)) p++;
732     if (*p == 0) continue;
733    
734 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
735 nigel 3
736 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
737     {
738     unsigned long int magic;
739     uschar sbuf[8];
740     FILE *f;
741    
742     p++;
743     pp = p + (int)strlen((char *)p);
744     while (isspace(pp[-1])) pp--;
745     *pp = 0;
746    
747     f = fopen((char *)p, "rb");
748     if (f == NULL)
749     {
750     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
751     continue;
752     }
753    
754     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
755    
756     true_size =
757     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
758     true_study_size =
759     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
760    
761     re = (real_pcre *)new_malloc(true_size);
762     regex_gotten_store = gotten_store;
763    
764     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
765    
766     magic = ((real_pcre *)re)->magic_number;
767     if (magic != MAGIC_NUMBER)
768     {
769     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
770     {
771     do_flip = 1;
772     }
773     else
774     {
775     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
776     fclose(f);
777     continue;
778     }
779     }
780    
781     fprintf(outfile, "Compiled regex%s loaded from %s\n",
782     do_flip? " (byte-inverted)" : "", p);
783    
784     /* Need to know if UTF-8 for printing data strings */
785    
786     new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
787     use_utf8 = (options & PCRE_UTF8) != 0;
788    
789     /* Now see if there is any following study data */
790    
791     if (true_study_size != 0)
792     {
793     pcre_study_data *psd;
794    
795     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
796     extra->flags = PCRE_EXTRA_STUDY_DATA;
797    
798     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
799     extra->study_data = psd;
800    
801     if (fread(psd, 1, true_study_size, f) != true_study_size)
802     {
803     FAIL_READ:
804     fprintf(outfile, "Failed to read data from %s\n", p);
805     if (extra != NULL) new_free(extra);
806     if (re != NULL) new_free(re);
807     fclose(f);
808     continue;
809     }
810     fprintf(outfile, "Study data loaded from %s\n", p);
811     do_study = 1; /* To get the data output if requested */
812     }
813     else fprintf(outfile, "No study data\n");
814    
815     fclose(f);
816     goto SHOW_INFO;
817     }
818    
819     /* In-line pattern (the usual case). Get the delimiter and seek the end of
820     the pattern; if is isn't complete, read more. */
821    
822 nigel 3 delimiter = *p++;
823    
824 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
825 nigel 3 {
826 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
827 nigel 3 goto SKIP_DATA;
828     }
829    
830     pp = p;
831    
832     for(;;)
833     {
834 nigel 29 while (*pp != 0)
835     {
836     if (*pp == '\\' && pp[1] != 0) pp++;
837     else if (*pp == delimiter) break;
838     pp++;
839     }
840 nigel 3 if (*pp != 0) break;
841    
842 nigel 69 len = BUFFER_SIZE - (pp - buffer);
843 nigel 3 if (len < 256)
844     {
845     fprintf(outfile, "** Expression too long - missing delimiter?\n");
846     goto SKIP_DATA;
847     }
848    
849     if (infile == stdin) printf(" > ");
850     if (fgets((char *)pp, len, infile) == NULL)
851     {
852     fprintf(outfile, "** Unexpected EOF\n");
853 nigel 11 done = 1;
854     goto CONTINUE;
855 nigel 3 }
856 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
857 nigel 3 }
858    
859 nigel 29 /* If the first character after the delimiter is backslash, make
860     the pattern end with backslash. This is purely to provide a way
861     of testing for the error message when a pattern ends with backslash. */
862    
863     if (pp[1] == '\\') *pp++ = '\\';
864    
865 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
866     for callouts. */
867 nigel 3
868     *pp++ = 0;
869 nigel 75 strcpy((char *)pbuffer, (char *)p);
870 nigel 3
871     /* Look for options after final delimiter */
872    
873     options = 0;
874     study_options = 0;
875 nigel 31 log_store = showstore; /* default from command line */
876    
877 nigel 3 while (*pp != 0)
878     {
879     switch (*pp++)
880     {
881 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
882 nigel 35 case 'g': do_g = 1; break;
883 nigel 3 case 'i': options |= PCRE_CASELESS; break;
884     case 'm': options |= PCRE_MULTILINE; break;
885     case 's': options |= PCRE_DOTALL; break;
886     case 'x': options |= PCRE_EXTENDED; break;
887 nigel 25
888 nigel 35 case '+': do_showrest = 1; break;
889 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
890 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
891 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
892 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
893 nigel 75 case 'F': do_flip = 1; break;
894 nigel 35 case 'G': do_G = 1; break;
895 nigel 25 case 'I': do_showinfo = 1; break;
896 nigel 31 case 'M': log_store = 1; break;
897 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
898 nigel 37
899     #if !defined NOPOSIX
900 nigel 3 case 'P': do_posix = 1; break;
901 nigel 37 #endif
902    
903 nigel 3 case 'S': do_study = 1; break;
904 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
905 nigel 3 case 'X': options |= PCRE_EXTRA; break;
906 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
907 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
908 nigel 25
909     case 'L':
910     ppp = pp;
911 nigel 77 /* The '\r' test here is so that it works on Windows */
912     while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
913 nigel 25 *ppp = 0;
914     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
915     {
916     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
917     goto SKIP_DATA;
918     }
919     tables = pcre_maketables();
920     pp = ppp;
921     break;
922    
923 nigel 75 case '>':
924     to_file = pp;
925     while (*pp != 0) pp++;
926     while (isspace(pp[-1])) pp--;
927     *pp = 0;
928     break;
929    
930 nigel 77 case '\r': /* So that it works in Windows */
931     case '\n':
932     case ' ':
933     break;
934 nigel 75
935 nigel 3 default:
936     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
937     goto SKIP_DATA;
938     }
939     }
940    
941 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
942 nigel 25 timing, showing, or debugging options, nor the ability to pass over
943     local character tables. */
944 nigel 3
945 nigel 37 #if !defined NOPOSIX
946 nigel 3 if (posix || do_posix)
947     {
948     int rc;
949     int cflags = 0;
950 nigel 75
951 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
952     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
953 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
954 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
955     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
956    
957 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
958    
959     /* Compilation failed; go back for another re, skipping to blank line
960     if non-interactive. */
961    
962     if (rc != 0)
963     {
964 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
965 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
966     goto SKIP_DATA;
967     }
968     }
969    
970     /* Handle compiling via the native interface */
971    
972     else
973 nigel 37 #endif /* !defined NOPOSIX */
974    
975 nigel 3 {
976     if (timeit)
977     {
978     register int i;
979     clock_t time_taken;
980     clock_t start_time = clock();
981 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
982 nigel 3 {
983 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
984 nigel 3 if (re != NULL) free(re);
985     }
986     time_taken = clock() - start_time;
987 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
988 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
989     (double)CLOCKS_PER_SEC);
990 nigel 3 }
991    
992 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
993 nigel 3
994     /* Compilation failed; go back for another re, skipping to blank line
995     if non-interactive. */
996    
997     if (re == NULL)
998     {
999     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1000     SKIP_DATA:
1001     if (infile != stdin)
1002     {
1003     for (;;)
1004     {
1005 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1006 nigel 11 {
1007     done = 1;
1008     goto CONTINUE;
1009     }
1010 nigel 3 len = (int)strlen((char *)buffer);
1011     while (len > 0 && isspace(buffer[len-1])) len--;
1012     if (len == 0) break;
1013     }
1014     fprintf(outfile, "\n");
1015     }
1016 nigel 25 goto CONTINUE;
1017 nigel 3 }
1018    
1019 nigel 43 /* Compilation succeeded; print data if required. There are now two
1020     info-returning functions. The old one has a limited interface and
1021     returns only limited data. Check that it agrees with the newer one. */
1022 nigel 3
1023 nigel 63 if (log_store)
1024     fprintf(outfile, "Memory allocation (code space): %d\n",
1025     (int)(gotten_store -
1026     sizeof(real_pcre) -
1027     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1028    
1029 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1030     and remember the store that was got. */
1031    
1032     true_size = ((real_pcre *)re)->size;
1033     regex_gotten_store = gotten_store;
1034    
1035     /* If /S was present, study the regexp to generate additional info to
1036     help with the matching. */
1037    
1038     if (do_study)
1039     {
1040     if (timeit)
1041     {
1042     register int i;
1043     clock_t time_taken;
1044     clock_t start_time = clock();
1045     for (i = 0; i < LOOPREPEAT; i++)
1046     extra = pcre_study(re, study_options, &error);
1047     time_taken = clock() - start_time;
1048     if (extra != NULL) free(extra);
1049     fprintf(outfile, " Study time %.3f milliseconds\n",
1050     (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1051     (double)CLOCKS_PER_SEC);
1052     }
1053     extra = pcre_study(re, study_options, &error);
1054     if (error != NULL)
1055     fprintf(outfile, "Failed to study: %s\n", error);
1056     else if (extra != NULL)
1057     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1058     }
1059    
1060     /* If the 'F' option was present, we flip the bytes of all the integer
1061     fields in the regex data block and the study block. This is to make it
1062     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1063     compiled on a different architecture. */
1064    
1065     if (do_flip)
1066     {
1067     real_pcre *rre = (real_pcre *)re;
1068     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1069     rre->size = byteflip(rre->size, sizeof(rre->size));
1070     rre->options = byteflip(rre->options, sizeof(rre->options));
1071     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1072     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1073     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1074     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1075     rre->name_table_offset = byteflip(rre->name_table_offset,
1076     sizeof(rre->name_table_offset));
1077     rre->name_entry_size = byteflip(rre->name_entry_size,
1078     sizeof(rre->name_entry_size));
1079     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1080    
1081     if (extra != NULL)
1082     {
1083     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1084     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1085     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1086     }
1087     }
1088    
1089     /* Extract information from the compiled data if required */
1090    
1091     SHOW_INFO:
1092    
1093 nigel 25 if (do_showinfo)
1094 nigel 3 {
1095 nigel 75 unsigned long int get_options, all_options;
1096 nigel 79 #if !defined NOINFOCHECK
1097 nigel 43 int old_first_char, old_options, old_count;
1098 nigel 79 #endif
1099 nigel 43 int count, backrefmax, first_char, need_char;
1100 nigel 63 int nameentrysize, namecount;
1101     const uschar *nametable;
1102 nigel 3
1103 nigel 63 if (do_debug)
1104     {
1105     fprintf(outfile, "------------------------------------------------------------------\n");
1106 nigel 85 pcre_printint(re, outfile);
1107 nigel 63 }
1108 nigel 3
1109 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1110 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1111     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1112     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1113 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1114 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1115 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1116     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1117 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1118 nigel 43
1119 nigel 79 #if !defined NOINFOCHECK
1120 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1121 nigel 3 if (count < 0) fprintf(outfile,
1122 nigel 43 "Error %d from pcre_info()\n", count);
1123 nigel 3 else
1124     {
1125 nigel 43 if (old_count != count) fprintf(outfile,
1126     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1127     old_count);
1128 nigel 37
1129 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1130     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1131     first_char, old_first_char);
1132 nigel 37
1133 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1134     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1135     get_options, old_options);
1136 nigel 43 }
1137 nigel 79 #endif
1138 nigel 43
1139 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1140 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1141 nigel 77 (int)size, (int)regex_gotten_store);
1142 nigel 43
1143     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1144     if (backrefmax > 0)
1145     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1146 nigel 63
1147     if (namecount > 0)
1148     {
1149     fprintf(outfile, "Named capturing subpatterns:\n");
1150     while (namecount-- > 0)
1151     {
1152     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1153     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1154     GET2(nametable, 0));
1155     nametable += nameentrysize;
1156     }
1157     }
1158    
1159 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1160     to fish it out via out back door */
1161    
1162     all_options = ((real_pcre *)re)->options;
1163     if (do_flip)
1164     {
1165     all_options = byteflip(all_options, sizeof(all_options));
1166     }
1167    
1168     if ((all_options & PCRE_NOPARTIAL) != 0)
1169     fprintf(outfile, "Partial matching not supported\n");
1170    
1171 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1172 nigel 87 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
1173 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1174     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1175     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1176     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1177 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1178 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1179     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1180     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1181     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1182 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1183 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1184     ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1185 nigel 43
1186     if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1187     fprintf(outfile, "Case state changes\n");
1188    
1189     if (first_char == -1)
1190     {
1191     fprintf(outfile, "First char at start or follows \\n\n");
1192     }
1193     else if (first_char < 0)
1194     {
1195     fprintf(outfile, "No first char\n");
1196     }
1197     else
1198     {
1199 nigel 63 int ch = first_char & 255;
1200 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1201 nigel 63 "" : " (caseless)";
1202     if (isprint(ch))
1203     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1204 nigel 3 else
1205 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1206 nigel 43 }
1207 nigel 37
1208 nigel 43 if (need_char < 0)
1209     {
1210     fprintf(outfile, "No need char\n");
1211 nigel 3 }
1212 nigel 43 else
1213     {
1214 nigel 63 int ch = need_char & 255;
1215 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1216 nigel 63 "" : " (caseless)";
1217     if (isprint(ch))
1218     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1219 nigel 43 else
1220 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1221 nigel 43 }
1222 nigel 75
1223     /* Don't output study size; at present it is in any case a fixed
1224     value, but it varies, depending on the computer architecture, and
1225     so messes up the test suite. (And with the /F option, it might be
1226     flipped.) */
1227    
1228     if (do_study)
1229     {
1230     if (extra == NULL)
1231     fprintf(outfile, "Study returned NULL\n");
1232     else
1233     {
1234     uschar *start_bits = NULL;
1235     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1236    
1237     if (start_bits == NULL)
1238     fprintf(outfile, "No starting byte set\n");
1239     else
1240     {
1241     int i;
1242     int c = 24;
1243     fprintf(outfile, "Starting byte set: ");
1244     for (i = 0; i < 256; i++)
1245     {
1246     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1247     {
1248     if (c > 75)
1249     {
1250     fprintf(outfile, "\n ");
1251     c = 2;
1252     }
1253     if (isprint(i) && i != ' ')
1254     {
1255     fprintf(outfile, "%c ", i);
1256     c += 2;
1257     }
1258     else
1259     {
1260     fprintf(outfile, "\\x%02x ", i);
1261     c += 5;
1262     }
1263     }
1264     }
1265     fprintf(outfile, "\n");
1266     }
1267     }
1268     }
1269 nigel 3 }
1270    
1271 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1272     that is all. The first 8 bytes of the file are the regex length and then
1273     the study length, in big-endian order. */
1274 nigel 3
1275 nigel 75 if (to_file != NULL)
1276 nigel 3 {
1277 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1278     if (f == NULL)
1279 nigel 3 {
1280 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1281 nigel 3 }
1282 nigel 75 else
1283     {
1284     uschar sbuf[8];
1285     sbuf[0] = (true_size >> 24) & 255;
1286     sbuf[1] = (true_size >> 16) & 255;
1287     sbuf[2] = (true_size >> 8) & 255;
1288     sbuf[3] = (true_size) & 255;
1289 nigel 3
1290 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1291     sbuf[5] = (true_study_size >> 16) & 255;
1292     sbuf[6] = (true_study_size >> 8) & 255;
1293     sbuf[7] = (true_study_size) & 255;
1294 nigel 3
1295 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1296     fwrite(re, 1, true_size, f) < true_size)
1297     {
1298     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1299     }
1300 nigel 3 else
1301     {
1302 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1303     if (extra != NULL)
1304 nigel 3 {
1305 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1306     true_study_size)
1307 nigel 3 {
1308 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1309     strerror(errno));
1310 nigel 3 }
1311 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1312 nigel 3 }
1313     }
1314 nigel 75 fclose(f);
1315 nigel 3 }
1316 nigel 77
1317     new_free(re);
1318     if (extra != NULL) new_free(extra);
1319     if (tables != NULL) new_free((void *)tables);
1320 nigel 75 continue; /* With next regex */
1321 nigel 3 }
1322 nigel 75 } /* End of non-POSIX compile */
1323 nigel 3
1324     /* Read data lines and test them */
1325    
1326     for (;;)
1327     {
1328 nigel 87 uschar *q;
1329     uschar *bptr = dbuffer;
1330 nigel 57 int *use_offsets = offsets;
1331 nigel 53 int use_size_offsets = size_offsets;
1332 nigel 63 int callout_data = 0;
1333     int callout_data_set = 0;
1334 nigel 3 int count, c;
1335 nigel 29 int copystrings = 0;
1336 nigel 63 int find_match_limit = 0;
1337 nigel 29 int getstrings = 0;
1338     int getlist = 0;
1339 nigel 39 int gmatched = 0;
1340 nigel 35 int start_offset = 0;
1341 nigel 41 int g_notempty = 0;
1342 nigel 77 int use_dfa = 0;
1343 nigel 3
1344     options = 0;
1345    
1346 nigel 63 pcre_callout = callout;
1347     first_callout = 1;
1348     callout_extra = 0;
1349     callout_count = 0;
1350     callout_fail_count = 999999;
1351     callout_fail_id = -1;
1352 nigel 73 show_malloc = 0;
1353 nigel 63
1354 nigel 35 if (infile == stdin) printf("data> ");
1355 nigel 69 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1356 nigel 11 {
1357     done = 1;
1358     goto CONTINUE;
1359     }
1360 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1361 nigel 3
1362     len = (int)strlen((char *)buffer);
1363     while (len > 0 && isspace(buffer[len-1])) len--;
1364     buffer[len] = 0;
1365     if (len == 0) break;
1366    
1367     p = buffer;
1368     while (isspace(*p)) p++;
1369    
1370 nigel 9 q = dbuffer;
1371 nigel 3 while ((c = *p++) != 0)
1372     {
1373     int i = 0;
1374     int n = 0;
1375 nigel 63
1376 nigel 3 if (c == '\\') switch ((c = *p++))
1377     {
1378     case 'a': c = 7; break;
1379     case 'b': c = '\b'; break;
1380     case 'e': c = 27; break;
1381     case 'f': c = '\f'; break;
1382     case 'n': c = '\n'; break;
1383     case 'r': c = '\r'; break;
1384     case 't': c = '\t'; break;
1385     case 'v': c = '\v'; break;
1386    
1387     case '0': case '1': case '2': case '3':
1388     case '4': case '5': case '6': case '7':
1389     c -= '0';
1390     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1391     c = c * 8 + *p++ - '0';
1392     break;
1393    
1394     case 'x':
1395 nigel 49
1396     /* Handle \x{..} specially - new Perl thing for utf8 */
1397    
1398 nigel 79 #if !defined NOUTF8
1399 nigel 49 if (*p == '{')
1400     {
1401     unsigned char *pt = p;
1402     c = 0;
1403     while (isxdigit(*(++pt)))
1404     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1405     if (*pt == '}')
1406     {
1407 nigel 67 unsigned char buff8[8];
1408 nigel 49 int ii, utn;
1409 nigel 85 utn = ord2utf8(c, buff8);
1410 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1411     c = buff8[ii]; /* Last byte */
1412 nigel 49 p = pt + 1;
1413     break;
1414     }
1415     /* Not correct form; fall through */
1416     }
1417 nigel 79 #endif
1418 nigel 49
1419     /* Ordinary \x */
1420    
1421 nigel 3 c = 0;
1422     while (i++ < 2 && isxdigit(*p))
1423     {
1424     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1425     p++;
1426     }
1427     break;
1428    
1429 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1430 nigel 3 p--;
1431     continue;
1432    
1433 nigel 75 case '>':
1434     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1435     continue;
1436    
1437 nigel 3 case 'A': /* Option setting */
1438     options |= PCRE_ANCHORED;
1439     continue;
1440    
1441     case 'B':
1442     options |= PCRE_NOTBOL;
1443     continue;
1444    
1445 nigel 29 case 'C':
1446 nigel 63 if (isdigit(*p)) /* Set copy string */
1447     {
1448     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1449     copystrings |= 1 << n;
1450     }
1451     else if (isalnum(*p))
1452     {
1453     uschar name[256];
1454 nigel 67 uschar *npp = name;
1455     while (isalnum(*p)) *npp++ = *p++;
1456     *npp = 0;
1457 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1458 nigel 63 if (n < 0)
1459     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1460     else copystrings |= 1 << n;
1461     }
1462     else if (*p == '+')
1463     {
1464     callout_extra = 1;
1465     p++;
1466     }
1467     else if (*p == '-')
1468     {
1469     pcre_callout = NULL;
1470     p++;
1471     }
1472     else if (*p == '!')
1473     {
1474     callout_fail_id = 0;
1475     p++;
1476     while(isdigit(*p))
1477     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1478     callout_fail_count = 0;
1479     if (*p == '!')
1480     {
1481     p++;
1482     while(isdigit(*p))
1483     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1484     }
1485     }
1486     else if (*p == '*')
1487     {
1488     int sign = 1;
1489     callout_data = 0;
1490     if (*(++p) == '-') { sign = -1; p++; }
1491     while(isdigit(*p))
1492     callout_data = callout_data * 10 + *p++ - '0';
1493     callout_data *= sign;
1494     callout_data_set = 1;
1495     }
1496 nigel 29 continue;
1497    
1498 nigel 79 #if !defined NODFA
1499 nigel 77 case 'D':
1500 nigel 79 #if !defined NOPOSIX
1501 nigel 77 if (posix || do_posix)
1502     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1503     else
1504 nigel 79 #endif
1505 nigel 77 use_dfa = 1;
1506     continue;
1507    
1508     case 'F':
1509     options |= PCRE_DFA_SHORTEST;
1510     continue;
1511 nigel 79 #endif
1512 nigel 77
1513 nigel 29 case 'G':
1514 nigel 63 if (isdigit(*p))
1515     {
1516     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1517     getstrings |= 1 << n;
1518     }
1519     else if (isalnum(*p))
1520     {
1521     uschar name[256];
1522 nigel 67 uschar *npp = name;
1523     while (isalnum(*p)) *npp++ = *p++;
1524     *npp = 0;
1525 nigel 65 n = pcre_get_stringnumber(re, (char *)name);
1526 nigel 63 if (n < 0)
1527     fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1528     else getstrings |= 1 << n;
1529     }
1530 nigel 29 continue;
1531    
1532     case 'L':
1533     getlist = 1;
1534     continue;
1535    
1536 nigel 63 case 'M':
1537     find_match_limit = 1;
1538     continue;
1539    
1540 nigel 37 case 'N':
1541     options |= PCRE_NOTEMPTY;
1542     continue;
1543    
1544 nigel 3 case 'O':
1545     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1546 nigel 53 if (n > size_offsets_max)
1547     {
1548     size_offsets_max = n;
1549 nigel 57 free(offsets);
1550 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1551 nigel 53 if (offsets == NULL)
1552     {
1553     printf("** Failed to get %d bytes of memory for offsets vector\n",
1554     size_offsets_max * sizeof(int));
1555 nigel 77 yield = 1;
1556     goto EXIT;
1557 nigel 53 }
1558     }
1559     use_size_offsets = n;
1560 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1561 nigel 3 continue;
1562    
1563 nigel 75 case 'P':
1564     options |= PCRE_PARTIAL;
1565     continue;
1566    
1567 nigel 79 #if !defined NODFA
1568 nigel 77 case 'R':
1569     options |= PCRE_DFA_RESTART;
1570     continue;
1571 nigel 79 #endif
1572 nigel 77
1573 nigel 73 case 'S':
1574     show_malloc = 1;
1575     continue;
1576    
1577 nigel 3 case 'Z':
1578     options |= PCRE_NOTEOL;
1579     continue;
1580 nigel 71
1581     case '?':
1582     options |= PCRE_NO_UTF8_CHECK;
1583     continue;
1584 nigel 3 }
1585 nigel 9 *q++ = c;
1586 nigel 3 }
1587 nigel 9 *q = 0;
1588     len = q - dbuffer;
1589 nigel 3
1590 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1591     {
1592     printf("**Match limit not relevant for DFA matching: ignored\n");
1593     find_match_limit = 0;
1594     }
1595    
1596 nigel 3 /* Handle matching via the POSIX interface, which does not
1597 nigel 63 support timing or playing with the match limit or callout data. */
1598 nigel 3
1599 nigel 37 #if !defined NOPOSIX
1600 nigel 3 if (posix || do_posix)
1601     {
1602     int rc;
1603     int eflags = 0;
1604 nigel 63 regmatch_t *pmatch = NULL;
1605     if (use_size_offsets > 0)
1606 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1607 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1608     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1609    
1610 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1611 nigel 3
1612     if (rc != 0)
1613     {
1614 nigel 69 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1615 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1616     }
1617 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1618     != 0)
1619     {
1620     fprintf(outfile, "Matched with REG_NOSUB\n");
1621     }
1622 nigel 3 else
1623     {
1624 nigel 7 size_t i;
1625 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1626 nigel 3 {
1627     if (pmatch[i].rm_so >= 0)
1628     {
1629 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1630 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1631     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1632 nigel 3 fprintf(outfile, "\n");
1633 nigel 35 if (i == 0 && do_showrest)
1634     {
1635     fprintf(outfile, " 0+ ");
1636 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1637     outfile);
1638 nigel 35 fprintf(outfile, "\n");
1639     }
1640 nigel 3 }
1641     }
1642     }
1643 nigel 53 free(pmatch);
1644 nigel 3 }
1645    
1646 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1647 nigel 3
1648 nigel 37 else
1649     #endif /* !defined NOPOSIX */
1650    
1651 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1652 nigel 3 {
1653     if (timeit)
1654     {
1655     register int i;
1656     clock_t time_taken;
1657     clock_t start_time = clock();
1658 nigel 77
1659 nigel 79 #if !defined NODFA
1660 nigel 77 if (all_use_dfa || use_dfa)
1661     {
1662     int workspace[1000];
1663     for (i = 0; i < LOOPREPEAT; i++)
1664     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1665     options | g_notempty, use_offsets, use_size_offsets, workspace,
1666     sizeof(workspace)/sizeof(int));
1667     }
1668     else
1669 nigel 79 #endif
1670 nigel 77
1671 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1672 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1673 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1674 nigel 77
1675 nigel 3 time_taken = clock() - start_time;
1676 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1677 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1678     (double)CLOCKS_PER_SEC);
1679 nigel 3 }
1680    
1681 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1682 nigel 87 varying limits in order to find the minimum value for the match limit and
1683     for the recursion limit. */
1684 nigel 63
1685     if (find_match_limit)
1686     {
1687     if (extra == NULL)
1688     {
1689 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1690 nigel 63 extra->flags = 0;
1691     }
1692    
1693 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
1694     options|g_notempty, use_offsets, use_size_offsets,
1695     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1696     PCRE_ERROR_MATCHLIMIT, "match()");
1697 nigel 63
1698 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
1699     options|g_notempty, use_offsets, use_size_offsets,
1700     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1701     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1702 nigel 63 }
1703    
1704     /* If callout_data is set, use the interface with additional data */
1705    
1706     else if (callout_data_set)
1707     {
1708     if (extra == NULL)
1709     {
1710 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1711 nigel 63 extra->flags = 0;
1712     }
1713     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1714 nigel 71 extra->callout_data = &callout_data;
1715 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1716     options | g_notempty, use_offsets, use_size_offsets);
1717     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1718     }
1719    
1720     /* The normal case is just to do the match once, with the default
1721     value of match_limit. */
1722    
1723 nigel 79 #if !defined NODFA
1724 nigel 77 else if (all_use_dfa || use_dfa)
1725     {
1726     int workspace[1000];
1727     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1728     options | g_notempty, use_offsets, use_size_offsets, workspace,
1729     sizeof(workspace)/sizeof(int));
1730     if (count == 0)
1731     {
1732     fprintf(outfile, "Matched, but too many subsidiary matches\n");
1733     count = use_size_offsets/2;
1734     }
1735     }
1736 nigel 79 #endif
1737 nigel 77
1738 nigel 75 else
1739     {
1740     count = pcre_exec(re, extra, (char *)bptr, len,
1741     start_offset, options | g_notempty, use_offsets, use_size_offsets);
1742 nigel 77 if (count == 0)
1743     {
1744     fprintf(outfile, "Matched, but too many substrings\n");
1745     count = use_size_offsets/3;
1746     }
1747 nigel 75 }
1748 nigel 3
1749 nigel 39 /* Matched */
1750    
1751 nigel 3 if (count >= 0)
1752     {
1753     int i;
1754 nigel 29 for (i = 0; i < count * 2; i += 2)
1755 nigel 3 {
1756 nigel 57 if (use_offsets[i] < 0)
1757 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1758     else
1759     {
1760     fprintf(outfile, "%2d: ", i/2);
1761 nigel 63 (void)pchars(bptr + use_offsets[i],
1762     use_offsets[i+1] - use_offsets[i], outfile);
1763 nigel 3 fprintf(outfile, "\n");
1764 nigel 35 if (i == 0)
1765     {
1766     if (do_showrest)
1767     {
1768     fprintf(outfile, " 0+ ");
1769 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1770     outfile);
1771 nigel 35 fprintf(outfile, "\n");
1772     }
1773     }
1774 nigel 3 }
1775     }
1776 nigel 29
1777     for (i = 0; i < 32; i++)
1778     {
1779     if ((copystrings & (1 << i)) != 0)
1780     {
1781 nigel 37 char copybuffer[16];
1782 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1783 nigel 37 i, copybuffer, sizeof(copybuffer));
1784 nigel 29 if (rc < 0)
1785     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1786     else
1787 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1788 nigel 29 }
1789     }
1790    
1791     for (i = 0; i < 32; i++)
1792     {
1793     if ((getstrings & (1 << i)) != 0)
1794     {
1795     const char *substring;
1796 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1797 nigel 29 i, &substring);
1798     if (rc < 0)
1799     fprintf(outfile, "get substring %d failed %d\n", i, rc);
1800     else
1801     {
1802     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1803 nigel 49 /* free((void *)substring); */
1804     pcre_free_substring(substring);
1805 nigel 29 }
1806     }
1807     }
1808    
1809     if (getlist)
1810     {
1811     const char **stringlist;
1812 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1813 nigel 29 &stringlist);
1814     if (rc < 0)
1815     fprintf(outfile, "get substring list failed %d\n", rc);
1816     else
1817     {
1818     for (i = 0; i < count; i++)
1819     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1820     if (stringlist[i] != NULL)
1821     fprintf(outfile, "string list not terminated by NULL\n");
1822 nigel 49 /* free((void *)stringlist); */
1823     pcre_free_substring_list(stringlist);
1824 nigel 29 }
1825     }
1826 nigel 39 }
1827 nigel 29
1828 nigel 75 /* There was a partial match */
1829    
1830     else if (count == PCRE_ERROR_PARTIAL)
1831     {
1832 nigel 77 fprintf(outfile, "Partial match");
1833 nigel 79 #if !defined NODFA
1834 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1835     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1836     bptr + use_offsets[0]);
1837 nigel 79 #endif
1838 nigel 77 fprintf(outfile, "\n");
1839 nigel 75 break; /* Out of the /g loop */
1840     }
1841    
1842 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
1843 nigel 47 g_notempty after a null match, this is not necessarily the end.
1844 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
1845     matching, the advance must be one character, not one byte. Fudge the
1846     offset values to achieve this. We won't be at the end of the string -
1847     that was checked before setting g_notempty. */
1848 nigel 39
1849 nigel 3 else
1850     {
1851 nigel 41 if (g_notempty != 0)
1852 nigel 35 {
1853 nigel 73 int onechar = 1;
1854 nigel 57 use_offsets[0] = start_offset;
1855 nigel 73 if (use_utf8)
1856     {
1857     while (start_offset + onechar < len)
1858     {
1859     int tb = bptr[start_offset+onechar];
1860     if (tb <= 127) break;
1861     tb &= 0xc0;
1862     if (tb != 0 && tb != 0xc0) onechar++;
1863     }
1864     }
1865     use_offsets[1] = start_offset + onechar;
1866 nigel 35 }
1867 nigel 41 else
1868     {
1869 nigel 73 if (count == PCRE_ERROR_NOMATCH)
1870 nigel 41 {
1871 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
1872 nigel 41 }
1873 nigel 73 else fprintf(outfile, "Error %d\n", count);
1874 nigel 41 break; /* Out of the /g loop */
1875     }
1876 nigel 3 }
1877 nigel 35
1878 nigel 39 /* If not /g or /G we are done */
1879    
1880     if (!do_g && !do_G) break;
1881    
1882 nigel 41 /* If we have matched an empty string, first check to see if we are at
1883     the end of the subject. If so, the /g loop is over. Otherwise, mimic
1884     what Perl's /g options does. This turns out to be rather cunning. First
1885 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1886     same point. If this fails (picked up above) we advance to the next
1887     character. */
1888 nigel 39
1889 nigel 41 g_notempty = 0;
1890 nigel 57 if (use_offsets[0] == use_offsets[1])
1891 nigel 41 {
1892 nigel 57 if (use_offsets[0] == len) break;
1893 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1894 nigel 41 }
1895 nigel 39
1896     /* For /g, update the start offset, leaving the rest alone */
1897    
1898 nigel 57 if (do_g) start_offset = use_offsets[1];
1899 nigel 39
1900     /* For /G, update the pointer and length */
1901    
1902     else
1903 nigel 35 {
1904 nigel 57 bptr += use_offsets[1];
1905     len -= use_offsets[1];
1906 nigel 35 }
1907 nigel 39 } /* End of loop for /g and /G */
1908     } /* End of loop for data lines */
1909 nigel 3
1910 nigel 11 CONTINUE:
1911 nigel 37
1912     #if !defined NOPOSIX
1913 nigel 3 if (posix || do_posix) regfree(&preg);
1914 nigel 37 #endif
1915    
1916 nigel 77 if (re != NULL) new_free(re);
1917     if (extra != NULL) new_free(extra);
1918 nigel 25 if (tables != NULL)
1919     {
1920 nigel 77 new_free((void *)tables);
1921 nigel 25 setlocale(LC_CTYPE, "C");
1922     }
1923 nigel 3 }
1924    
1925 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
1926 nigel 77
1927     EXIT:
1928    
1929     if (infile != NULL && infile != stdin) fclose(infile);
1930     if (outfile != NULL && outfile != stdout) fclose(outfile);
1931    
1932     free(buffer);
1933     free(dbuffer);
1934     free(pbuffer);
1935     free(offsets);
1936    
1937     return yield;
1938 nigel 3 }
1939    
1940 nigel 77 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12