/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 392 - (hide annotations) (download)
Tue Mar 17 21:30:30 2009 UTC (5 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 73076 byte(s)
Update after detrailing for a test release.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 343 #define isatty _isatty /* This is what Windows calls them, I'm told */
75     #define fileno _fileno
76    
77 nigel 93 #else
78     #include <sys/time.h> /* These two includes are needed */
79     #include <sys/resource.h> /* for setrlimit(). */
80     #define INPUT_MODE "rb"
81     #define OUTPUT_MODE "wb"
82 nigel 91 #endif
83    
84 nigel 93
85 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
86     displaying the results of pcre_study() and we also need to know about the
87     internal macros, structures, and other internal data values; pcretest has
88     "inside information" compared to a program that strictly follows the PCRE API.
89 nigel 37
90 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92     appropriately for an application, not for building PCRE. */
93 nigel 77
94 ph10 145 #include "pcre.h"
95 nigel 77 #include "pcre_internal.h"
96    
97 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
98     to keep two copies, we include the source file here, changing the names of the
99     external symbols to prevent clashes. */
100 nigel 77
101 ph10 351 #define _pcre_ucp_gentype ucp_gentype
102 nigel 85 #define _pcre_utf8_table1 utf8_table1
103     #define _pcre_utf8_table1_size utf8_table1_size
104     #define _pcre_utf8_table2 utf8_table2
105     #define _pcre_utf8_table3 utf8_table3
106     #define _pcre_utf8_table4 utf8_table4
107     #define _pcre_utt utt
108     #define _pcre_utt_size utt_size
109 ph10 240 #define _pcre_utt_names utt_names
110 nigel 85 #define _pcre_OP_lengths OP_lengths
111    
112     #include "pcre_tables.c"
113    
114     /* We also need the pcre_printint() function for printing out compiled
115     patterns. This function is in a separate file so that it can be included in
116 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
117 nigel 85
118 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
119     output character as-is or as a hex value when showing compiled patterns, is
120     contained in this file. We uses it here also, in cases when the locale has not
121     been explicitly changed, so as to get consistent output from systems that
122     differ in their output from isprint() even in the "C" locale. */
123    
124 nigel 85 #include "pcre_printint.src"
125    
126 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127 nigel 85
128 nigel 93
129 nigel 37 /* It is possible to compile this test program without including support for
130     testing the POSIX interface, though this is not available via the standard
131     Makefile. */
132    
133     #if !defined NOPOSIX
134 nigel 3 #include "pcreposix.h"
135 nigel 37 #endif
136 nigel 3
137 ph10 107 /* It is also possible, for the benefit of the version currently imported into
138     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139     interface to the DFA matcher (NODFA), and without the doublecheck of the old
140     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141     UTF8 support if PCRE is built without it. */
142 nigel 79
143 ph10 107 #ifndef SUPPORT_UTF8
144     #ifndef NOUTF8
145     #define NOUTF8
146     #endif
147     #endif
148 nigel 79
149 ph10 107
150 nigel 85 /* Other parameters */
151    
152 nigel 3 #ifndef CLOCKS_PER_SEC
153     #ifdef CLK_TCK
154     #define CLOCKS_PER_SEC CLK_TCK
155     #else
156     #define CLOCKS_PER_SEC 100
157     #endif
158     #endif
159    
160 nigel 93 /* This is the default loop count for timing. */
161    
162 nigel 75 #define LOOPREPEAT 500000
163 nigel 3
164 nigel 85 /* Static variables */
165    
166 nigel 3 static FILE *outfile;
167     static int log_store = 0;
168 nigel 63 static int callout_count;
169     static int callout_extra;
170     static int callout_fail_count;
171     static int callout_fail_id;
172 ph10 210 static int debug_lengths;
173 nigel 63 static int first_callout;
174 nigel 93 static int locale_set = 0;
175 nigel 73 static int show_malloc;
176 nigel 67 static int use_utf8;
177 nigel 43 static size_t gotten_store;
178 nigel 3
179 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
180    
181     static int buffer_size = 50000;
182     static uschar *buffer = NULL;
183     static uschar *dbuffer = NULL;
184 nigel 75 static uschar *pbuffer = NULL;
185 nigel 3
186 nigel 75
187 nigel 49
188     /*************************************************
189 nigel 91 * Read or extend an input line *
190     *************************************************/
191    
192     /* Input lines are read into buffer, but both patterns and data lines can be
193     continued over multiple input lines. In addition, if the buffer fills up, we
194     want to automatically expand it so as to be able to handle extremely large
195     lines that are needed for certain stress tests. When the input buffer is
196     expanded, the other two buffers must also be expanded likewise, and the
197     contents of pbuffer, which are a copy of the input for callouts, must be
198     preserved (for when expansion happens for a data line). This is not the most
199     optimal way of handling this, but hey, this is just a test program!
200    
201     Arguments:
202     f the file to read
203     start where in buffer to start (this *must* be within buffer)
204 ph10 287 prompt for stdin or readline()
205 nigel 91
206     Returns: pointer to the start of new data
207     could be a copy of start, or could be moved
208     NULL if no data read and EOF reached
209     */
210    
211     static uschar *
212 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 nigel 91 {
214     uschar *here = start;
215    
216     for (;;)
217     {
218     int rlen = buffer_size - (here - buffer);
219 nigel 93
220 nigel 91 if (rlen > 1000)
221     {
222     int dlen;
223 ph10 289
224 ph10 287 /* If libreadline support is required, use readline() to read a line if the
225     input is a terminal. Note that readline() removes the trailing newline, so
226     we must put it back again, to be compatible with fgets(). */
227 ph10 289
228 ph10 287 #ifdef SUPPORT_LIBREADLINE
229     if (isatty(fileno(f)))
230     {
231 ph10 289 size_t len;
232 ph10 287 char *s = readline(prompt);
233     if (s == NULL) return (here == start)? NULL : start;
234     len = strlen(s);
235 ph10 289 if (len > 0) add_history(s);
236 ph10 287 if (len > rlen - 1) len = rlen - 1;
237     memcpy(here, s, len);
238     here[len] = '\n';
239 ph10 289 here[len+1] = 0;
240     free(s);
241 ph10 287 }
242 ph10 289 else
243     #endif
244    
245 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
246 ph10 289
247 ph10 287 {
248 ph10 289 if (f == stdin) printf(prompt);
249 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
250     return (here == start)? NULL : start;
251 ph10 289 }
252    
253 nigel 91 dlen = (int)strlen((char *)here);
254     if (dlen > 0 && here[dlen - 1] == '\n') return start;
255     here += dlen;
256     }
257    
258     else
259     {
260     int new_buffer_size = 2*buffer_size;
261     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266     {
267     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268     exit(1);
269     }
270    
271     memcpy(new_buffer, buffer, buffer_size);
272     memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274     buffer_size = new_buffer_size;
275    
276     start = new_buffer + (start - buffer);
277     here = new_buffer + (here - buffer);
278    
279     free(buffer);
280     free(dbuffer);
281     free(pbuffer);
282    
283     buffer = new_buffer;
284     dbuffer = new_dbuffer;
285     pbuffer = new_pbuffer;
286     }
287     }
288    
289     return NULL; /* Control never gets here */
290     }
291    
292    
293    
294    
295    
296    
297    
298     /*************************************************
299 nigel 63 * Read number from string *
300     *************************************************/
301    
302     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303     around with conditional compilation, just do the job by hand. It is only used
304 nigel 93 for unpicking arguments, so just keep it simple.
305 nigel 63
306     Arguments:
307     str string to be converted
308     endptr where to put the end pointer
309    
310     Returns: the unsigned long
311     */
312    
313     static int
314     get_value(unsigned char *str, unsigned char **endptr)
315     {
316     int result = 0;
317     while(*str != 0 && isspace(*str)) str++;
318     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319     *endptr = str;
320     return(result);
321     }
322    
323    
324    
325 nigel 49
326     /*************************************************
327     * Convert UTF-8 string to value *
328     *************************************************/
329    
330     /* This function takes one or more bytes that represents a UTF-8 character,
331     and returns the value of the character.
332    
333     Argument:
334 nigel 91 utf8bytes a pointer to the byte vector
335     vptr a pointer to an int to receive the value
336 nigel 49
337 nigel 91 Returns: > 0 => the number of bytes consumed
338     -6 to 0 => malformed UTF-8 character at offset = (-return)
339 nigel 49 */
340    
341 nigel 79 #if !defined NOUTF8
342    
343 nigel 67 static int
344 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
345 nigel 49 {
346 nigel 91 int c = *utf8bytes++;
347 nigel 49 int d = c;
348     int i, j, s;
349    
350     for (i = -1; i < 6; i++) /* i is number of additional bytes */
351     {
352     if ((d & 0x80) == 0) break;
353     d <<= 1;
354     }
355    
356     if (i == -1) { *vptr = c; return 1; } /* ascii character */
357     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358    
359     /* i now has a value in the range 1-5 */
360    
361 nigel 59 s = 6*i;
362 nigel 85 d = (c & utf8_table3[i]) << s;
363 nigel 49
364     for (j = 0; j < i; j++)
365     {
366 nigel 91 c = *utf8bytes++;
367 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
368 nigel 59 s -= 6;
369 nigel 49 d |= (c & 0x3f) << s;
370     }
371    
372     /* Check that encoding was the correct unique one */
373    
374 nigel 85 for (j = 0; j < utf8_table1_size; j++)
375     if (d <= utf8_table1[j]) break;
376 nigel 49 if (j != i) return -(i+1);
377    
378     /* Valid value */
379    
380     *vptr = d;
381     return i+1;
382     }
383    
384 nigel 79 #endif
385 nigel 49
386    
387 nigel 79
388 nigel 63 /*************************************************
389 nigel 85 * Convert character value to UTF-8 *
390     *************************************************/
391    
392     /* This function takes an integer value in the range 0 - 0x7fffffff
393     and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395     Arguments:
396     cvalue the character value
397 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
398 nigel 85
399     Returns: number of characters placed in the buffer
400     */
401    
402 nigel 93 #if !defined NOUTF8
403    
404 nigel 85 static int
405 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
406 nigel 85 {
407     register int i, j;
408     for (i = 0; i < utf8_table1_size; i++)
409     if (cvalue <= utf8_table1[i]) break;
410 nigel 91 utf8bytes += i;
411 nigel 85 for (j = i; j > 0; j--)
412     {
413 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 nigel 85 cvalue >>= 6;
415     }
416 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
417 nigel 85 return i + 1;
418     }
419    
420 nigel 93 #endif
421 nigel 85
422    
423 nigel 93
424 nigel 85 /*************************************************
425 nigel 63 * Print character string *
426     *************************************************/
427 nigel 49
428 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
429     mode. Yields number of characters printed. If handed a NULL file, just counts
430     chars without printing. */
431 nigel 49
432 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
433 nigel 3 {
434 nigel 85 int c = 0;
435 nigel 63 int yield = 0;
436 nigel 3
437 nigel 63 while (length-- > 0)
438 nigel 3 {
439 nigel 79 #if !defined NOUTF8
440 nigel 67 if (use_utf8)
441 nigel 63 {
442     int rc = utf82ord(p, &c);
443 nigel 3
444 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445     {
446     length -= rc - 1;
447     p += rc;
448 nigel 93 if (PRINTHEX(c))
449 nigel 63 {
450     if (f != NULL) fprintf(f, "%c", c);
451     yield++;
452     }
453     else
454     {
455 nigel 93 int n = 4;
456     if (f != NULL) fprintf(f, "\\x{%02x}", c);
457     yield += (n <= 0x000000ff)? 2 :
458     (n <= 0x00000fff)? 3 :
459     (n <= 0x0000ffff)? 4 :
460     (n <= 0x000fffff)? 5 : 6;
461 nigel 63 }
462     continue;
463     }
464     }
465 nigel 79 #endif
466 nigel 3
467 nigel 63 /* Not UTF-8, or malformed UTF-8 */
468    
469 nigel 93 c = *p++;
470     if (PRINTHEX(c))
471 nigel 3 {
472 nigel 63 if (f != NULL) fprintf(f, "%c", c);
473     yield++;
474 nigel 3 }
475 nigel 63 else
476 nigel 3 {
477 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
478     yield += 4;
479     }
480     }
481 nigel 3
482 nigel 63 return yield;
483     }
484 nigel 23
485 nigel 3
486 nigel 23
487 nigel 63 /*************************************************
488     * Callout function *
489     *************************************************/
490 nigel 3
491 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492     the match. Yield zero unless more callouts than the fail count, or the callout
493     data is not zero. */
494 nigel 3
495 nigel 63 static int callout(pcre_callout_block *cb)
496     {
497     FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 nigel 75 int i, pre_start, post_start, subject_length;
499 nigel 3
500 nigel 63 if (callout_extra)
501     {
502     fprintf(f, "Callout %d: last capture = %d\n",
503     cb->callout_number, cb->capture_last);
504 nigel 3
505 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
506     {
507     if (cb->offset_vector[i] < 0)
508     fprintf(f, "%2d: <unset>\n", i/2);
509     else
510     {
511     fprintf(f, "%2d: ", i/2);
512     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513     cb->offset_vector[i+1] - cb->offset_vector[i], f);
514     fprintf(f, "\n");
515     }
516     }
517     }
518 nigel 3
519 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
520     datails. On subsequent calls in the same match, we use pchars just to find the
521     printed lengths of the substrings. */
522 nigel 3
523 nigel 63 if (f != NULL) fprintf(f, "--->");
524 nigel 3
525 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527     cb->current_position - cb->start_match, f);
528 nigel 3
529 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530    
531 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532     cb->subject_length - cb->current_position, f);
533 nigel 3
534 nigel 63 if (f != NULL) fprintf(f, "\n");
535 nigel 9
536 nigel 63 /* Always print appropriate indicators, with callout number if not already
537 nigel 75 shown. For automatic callouts, show the pattern offset. */
538 nigel 3
539 nigel 75 if (cb->callout_number == 255)
540     {
541     fprintf(outfile, "%+3d ", cb->pattern_position);
542     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543     }
544     else
545     {
546     if (callout_extra) fprintf(outfile, " ");
547     else fprintf(outfile, "%3d ", cb->callout_number);
548     }
549 nigel 3
550 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551     fprintf(outfile, "^");
552 nigel 3
553 nigel 63 if (post_start > 0)
554     {
555     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556     fprintf(outfile, "^");
557 nigel 3 }
558    
559 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560     fprintf(outfile, " ");
561    
562     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563     pbuffer + cb->pattern_position);
564    
565 nigel 63 fprintf(outfile, "\n");
566     first_callout = 0;
567 nigel 3
568 nigel 71 if (cb->callout_data != NULL)
569 nigel 49 {
570 nigel 71 int callout_data = *((int *)(cb->callout_data));
571     if (callout_data != 0)
572     {
573     fprintf(outfile, "Callout data = %d\n", callout_data);
574     return callout_data;
575     }
576 nigel 63 }
577 nigel 49
578 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
579     (++callout_count >= callout_fail_count)? 1 : 0;
580 nigel 3 }
581    
582    
583 nigel 63 /*************************************************
584 nigel 73 * Local malloc functions *
585 nigel 63 *************************************************/
586 nigel 3
587     /* Alternative malloc function, to test functionality and show the size of the
588     compiled re. */
589    
590     static void *new_malloc(size_t size)
591     {
592 nigel 73 void *block = malloc(size);
593 nigel 43 gotten_store = size;
594 nigel 73 if (show_malloc)
595 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 nigel 73 return block;
597 nigel 3 }
598    
599 nigel 73 static void new_free(void *block)
600     {
601     if (show_malloc)
602     fprintf(outfile, "free %p\n", block);
603     free(block);
604     }
605 nigel 3
606    
607 nigel 73 /* For recursion malloc/free, to test stacking calls */
608    
609     static void *stack_malloc(size_t size)
610     {
611     void *block = malloc(size);
612     if (show_malloc)
613 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 nigel 73 return block;
615     }
616    
617     static void stack_free(void *block)
618     {
619     if (show_malloc)
620     fprintf(outfile, "stack_free %p\n", block);
621     free(block);
622     }
623    
624    
625 nigel 63 /*************************************************
626     * Call pcre_fullinfo() *
627     *************************************************/
628 nigel 43
629     /* Get one piece of information from the pcre_fullinfo() function */
630    
631     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632     {
633     int rc;
634     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636     }
637    
638    
639    
640 nigel 63 /*************************************************
641 nigel 75 * Byte flipping function *
642     *************************************************/
643    
644 nigel 91 static unsigned long int
645     byteflip(unsigned long int value, int n)
646 nigel 75 {
647     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648     return ((value & 0x000000ff) << 24) |
649     ((value & 0x0000ff00) << 8) |
650     ((value & 0x00ff0000) >> 8) |
651     ((value & 0xff000000) >> 24);
652     }
653    
654    
655    
656    
657     /*************************************************
658 nigel 87 * Check match or recursion limit *
659     *************************************************/
660    
661     static int
662     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663     int start_offset, int options, int *use_offsets, int use_size_offsets,
664     int flag, unsigned long int *limit, int errnumber, const char *msg)
665     {
666     int count;
667     int min = 0;
668     int mid = 64;
669     int max = -1;
670    
671     extra->flags |= flag;
672    
673     for (;;)
674     {
675     *limit = mid;
676    
677     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678     use_offsets, use_size_offsets);
679    
680     if (count == errnumber)
681     {
682     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683     min = mid;
684     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685     }
686    
687     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688     count == PCRE_ERROR_PARTIAL)
689     {
690     if (mid == min + 1)
691     {
692     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693     break;
694     }
695     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696     max = mid;
697     mid = (min + mid)/2;
698     }
699     else break; /* Some other error */
700     }
701    
702     extra->flags &= ~flag;
703     return count;
704     }
705    
706    
707    
708     /*************************************************
709 ph10 227 * Case-independent strncmp() function *
710     *************************************************/
711    
712     /*
713     Arguments:
714     s first string
715     t second string
716     n number of characters to compare
717    
718     Returns: < 0, = 0, or > 0, according to the comparison
719     */
720    
721     static int
722     strncmpic(uschar *s, uschar *t, int n)
723     {
724     while (n--)
725     {
726     int c = tolower(*s++) - tolower(*t++);
727     if (c) return c;
728     }
729     return 0;
730     }
731    
732    
733    
734     /*************************************************
735 nigel 91 * Check newline indicator *
736     *************************************************/
737    
738     /* This is used both at compile and run-time to check for <xxx> escapes, where
739 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740     no match.
741 nigel 91
742     Arguments:
743     p points after the leading '<'
744     f file for error message
745    
746     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747     */
748    
749     static int
750     check_newline(uschar *p, FILE *f)
751     {
752 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
760     return 0;
761     }
762    
763    
764    
765     /*************************************************
766 nigel 93 * Usage function *
767     *************************************************/
768    
769     static void
770     usage(void)
771     {
772 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773     printf("Input and output default to stdin and stdout.\n");
774     #ifdef SUPPORT_LIBREADLINE
775     printf("If input is a terminal, readline() is used to read from it.\n");
776     #else
777     printf("This version of pcretest is not linked with readline().\n");
778     #endif
779     printf("\nOptions:\n");
780 nigel 93 printf(" -b show compiled code (bytecode)\n");
781     printf(" -C show PCRE compile-time options and exit\n");
782     printf(" -d debug: show compiled code and information (-b and -i)\n");
783     #if !defined NODFA
784     printf(" -dfa force DFA matching for all subjects\n");
785     #endif
786     printf(" -help show usage information\n");
787     printf(" -i show information about compiled patterns\n"
788 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
789 nigel 93 " -m output memory used information\n"
790     " -o <n> set size of offsets vector to <n>\n");
791     #if !defined NOPOSIX
792     printf(" -p use POSIX interface\n");
793     #endif
794     printf(" -q quiet: do not output PCRE version number at start\n");
795     printf(" -S <n> set stack size to <n> megabytes\n");
796     printf(" -s output store (memory) used information\n"
797     " -t time compilation and execution\n");
798     printf(" -t <n> time compilation and execution, repeating <n> times\n");
799     printf(" -tm time execution (matching) only\n");
800     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
801     }
802    
803    
804    
805     /*************************************************
806 nigel 63 * Main Program *
807     *************************************************/
808 nigel 43
809 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
810     consist of a regular expression, in delimiters and optionally followed by
811     options, followed by a set of test data, terminated by an empty line. */
812    
813     int main(int argc, char **argv)
814     {
815     FILE *infile = stdin;
816     int options = 0;
817     int study_options = 0;
818 ph10 386 int default_find_match_limit = FALSE;
819 nigel 3 int op = 1;
820     int timeit = 0;
821 nigel 93 int timeitm = 0;
822 nigel 3 int showinfo = 0;
823 nigel 31 int showstore = 0;
824 nigel 87 int quiet = 0;
825 nigel 53 int size_offsets = 45;
826     int size_offsets_max;
827 nigel 77 int *offsets = NULL;
828 nigel 53 #if !defined NOPOSIX
829 nigel 3 int posix = 0;
830 nigel 53 #endif
831 nigel 3 int debug = 0;
832 nigel 11 int done = 0;
833 nigel 77 int all_use_dfa = 0;
834     int yield = 0;
835 nigel 91 int stack_size;
836 nigel 3
837 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
838     that 1024 is plenty long enough for the few names we'll be testing. */
839 nigel 69
840 nigel 91 uschar copynames[1024];
841     uschar getnames[1024];
842    
843     uschar *copynamesptr;
844     uschar *getnamesptr;
845    
846 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
847 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
848 nigel 69
849 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
850     dbuffer = (unsigned char *)malloc(buffer_size);
851     pbuffer = (unsigned char *)malloc(buffer_size);
852 nigel 69
853 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
854 nigel 3
855 nigel 93 outfile = stdout;
856    
857     /* The following _setmode() stuff is some Windows magic that tells its runtime
858     library to translate CRLF into a single LF character. At least, that's what
859     I've been told: never having used Windows I take this all on trust. Originally
860     it set 0x8000, but then I was advised that _O_BINARY was better. */
861    
862 nigel 75 #if defined(_WIN32) || defined(WIN32)
863 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
864     #endif
865 nigel 75
866 nigel 3 /* Scan options */
867    
868     while (argc > 1 && argv[op][0] == '-')
869     {
870 nigel 63 unsigned char *endptr;
871 nigel 53
872 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873     showstore = 1;
874 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
876 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879 nigel 79 #if !defined NODFA
880 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881 nigel 79 #endif
882 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884     *endptr == 0))
885 nigel 53 {
886     op++;
887     argc--;
888     }
889 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890     {
891     int both = argv[op][2] == 0;
892     int temp;
893     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894     *endptr == 0))
895     {
896     timeitm = temp;
897     op++;
898     argc--;
899     }
900     else timeitm = LOOPREPEAT;
901     if (both) timeit = timeitm;
902     }
903 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905     *endptr == 0))
906     {
907 nigel 93 #if defined(_WIN32) || defined(WIN32)
908 nigel 91 printf("PCRE: -S not supported on this OS\n");
909     exit(1);
910     #else
911     int rc;
912     struct rlimit rlim;
913     getrlimit(RLIMIT_STACK, &rlim);
914     rlim.rlim_cur = stack_size * 1024 * 1024;
915     rc = setrlimit(RLIMIT_STACK, &rlim);
916     if (rc != 0)
917     {
918     printf("PCRE: setrlimit() failed with error %d\n", rc);
919     exit(1);
920     }
921     op++;
922     argc--;
923     #endif
924     }
925 nigel 53 #if !defined NOPOSIX
926 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
927 nigel 53 #endif
928 nigel 63 else if (strcmp(argv[op], "-C") == 0)
929     {
930     int rc;
931 ph10 392 unsigned long int lrc;
932 nigel 63 printf("PCRE version %s\n", pcre_version());
933     printf("Compiled with\n");
934     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935     printf(" %sUTF-8 support\n", rc? "" : "No ");
936 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937     printf(" %sUnicode properties support\n", rc? "" : "No ");
938 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939 ph10 391 /* Note that these values are always the ASCII values, even
940 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
941 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
942     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
943 ph10 150 (rc == -2)? "ANYCRLF" :
944 nigel 93 (rc == -1)? "ANY" : "???");
945 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
946     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
947     "all Unicode newlines");
948 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
949     printf(" Internal link size = %d\n", rc);
950     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
951     printf(" POSIX malloc threshold = %d\n", rc);
952 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
953     printf(" Default match limit = %ld\n", lrc);
954     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
955     printf(" Default recursion depth limit = %ld\n", lrc);
956 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
957     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
958 ph10 121 goto EXIT;
959 nigel 63 }
960 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
961     strcmp(argv[op], "--help") == 0)
962     {
963     usage();
964     goto EXIT;
965     }
966 nigel 3 else
967     {
968 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
969 nigel 93 usage();
970 nigel 77 yield = 1;
971     goto EXIT;
972 nigel 3 }
973     op++;
974     argc--;
975     }
976    
977 nigel 53 /* Get the store for the offsets vector, and remember what it was */
978    
979     size_offsets_max = size_offsets;
980 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
981 nigel 53 if (offsets == NULL)
982     {
983     printf("** Failed to get %d bytes of memory for offsets vector\n",
984 ph10 151 (int)(size_offsets_max * sizeof(int)));
985 nigel 77 yield = 1;
986     goto EXIT;
987 nigel 53 }
988    
989 nigel 3 /* Sort out the input and output files */
990    
991     if (argc > 1)
992     {
993 nigel 93 infile = fopen(argv[op], INPUT_MODE);
994 nigel 3 if (infile == NULL)
995     {
996     printf("** Failed to open %s\n", argv[op]);
997 nigel 77 yield = 1;
998     goto EXIT;
999 nigel 3 }
1000     }
1001    
1002     if (argc > 2)
1003     {
1004 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1005 nigel 3 if (outfile == NULL)
1006     {
1007     printf("** Failed to open %s\n", argv[op+1]);
1008 nigel 77 yield = 1;
1009     goto EXIT;
1010 nigel 3 }
1011     }
1012    
1013     /* Set alternative malloc function */
1014    
1015     pcre_malloc = new_malloc;
1016 nigel 73 pcre_free = new_free;
1017     pcre_stack_malloc = stack_malloc;
1018     pcre_stack_free = stack_free;
1019 nigel 3
1020 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1021 nigel 3
1022 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1023 nigel 3
1024     /* Main loop */
1025    
1026 nigel 11 while (!done)
1027 nigel 3 {
1028     pcre *re = NULL;
1029     pcre_extra *extra = NULL;
1030 nigel 37
1031     #if !defined NOPOSIX /* There are still compilers that require no indent */
1032 nigel 3 regex_t preg;
1033 nigel 45 int do_posix = 0;
1034 nigel 37 #endif
1035    
1036 nigel 7 const char *error;
1037 nigel 25 unsigned char *p, *pp, *ppp;
1038 nigel 75 unsigned char *to_file = NULL;
1039 nigel 53 const unsigned char *tables = NULL;
1040 nigel 75 unsigned long int true_size, true_study_size = 0;
1041     size_t size, regex_gotten_store;
1042 nigel 3 int do_study = 0;
1043 nigel 25 int do_debug = debug;
1044 nigel 35 int do_G = 0;
1045     int do_g = 0;
1046 nigel 25 int do_showinfo = showinfo;
1047 nigel 35 int do_showrest = 0;
1048 nigel 75 int do_flip = 0;
1049 nigel 93 int erroroffset, len, delimiter, poffset;
1050 nigel 3
1051 nigel 67 use_utf8 = 0;
1052 ph10 211 debug_lengths = 1;
1053 nigel 63
1054 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1055 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1056 nigel 63 fflush(outfile);
1057 nigel 3
1058     p = buffer;
1059     while (isspace(*p)) p++;
1060     if (*p == 0) continue;
1061    
1062 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1063 nigel 3
1064 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1065     {
1066 nigel 91 unsigned long int magic, get_options;
1067 nigel 75 uschar sbuf[8];
1068     FILE *f;
1069    
1070     p++;
1071     pp = p + (int)strlen((char *)p);
1072     while (isspace(pp[-1])) pp--;
1073     *pp = 0;
1074    
1075     f = fopen((char *)p, "rb");
1076     if (f == NULL)
1077     {
1078     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1079     continue;
1080     }
1081    
1082     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1083    
1084     true_size =
1085     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1086     true_study_size =
1087     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1088    
1089     re = (real_pcre *)new_malloc(true_size);
1090     regex_gotten_store = gotten_store;
1091    
1092     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1093    
1094     magic = ((real_pcre *)re)->magic_number;
1095     if (magic != MAGIC_NUMBER)
1096     {
1097     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1098     {
1099     do_flip = 1;
1100     }
1101     else
1102     {
1103     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1104     fclose(f);
1105     continue;
1106     }
1107     }
1108    
1109     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1110     do_flip? " (byte-inverted)" : "", p);
1111    
1112     /* Need to know if UTF-8 for printing data strings */
1113    
1114 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1115     use_utf8 = (get_options & PCRE_UTF8) != 0;
1116 nigel 75
1117     /* Now see if there is any following study data */
1118    
1119     if (true_study_size != 0)
1120     {
1121     pcre_study_data *psd;
1122    
1123     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1124     extra->flags = PCRE_EXTRA_STUDY_DATA;
1125    
1126     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1127     extra->study_data = psd;
1128    
1129     if (fread(psd, 1, true_study_size, f) != true_study_size)
1130     {
1131     FAIL_READ:
1132     fprintf(outfile, "Failed to read data from %s\n", p);
1133     if (extra != NULL) new_free(extra);
1134     if (re != NULL) new_free(re);
1135     fclose(f);
1136     continue;
1137     }
1138     fprintf(outfile, "Study data loaded from %s\n", p);
1139     do_study = 1; /* To get the data output if requested */
1140     }
1141     else fprintf(outfile, "No study data\n");
1142    
1143     fclose(f);
1144     goto SHOW_INFO;
1145     }
1146    
1147     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1148     the pattern; if is isn't complete, read more. */
1149    
1150 nigel 3 delimiter = *p++;
1151    
1152 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1153 nigel 3 {
1154 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1155 nigel 3 goto SKIP_DATA;
1156     }
1157    
1158     pp = p;
1159 nigel 93 poffset = p - buffer;
1160 nigel 3
1161     for(;;)
1162     {
1163 nigel 29 while (*pp != 0)
1164     {
1165     if (*pp == '\\' && pp[1] != 0) pp++;
1166     else if (*pp == delimiter) break;
1167     pp++;
1168     }
1169 nigel 3 if (*pp != 0) break;
1170 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1171 nigel 3 {
1172     fprintf(outfile, "** Unexpected EOF\n");
1173 nigel 11 done = 1;
1174     goto CONTINUE;
1175 nigel 3 }
1176 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1177 nigel 3 }
1178    
1179 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1180     pointer to the correct relative point in the buffer. */
1181    
1182     p = buffer + poffset;
1183    
1184 nigel 29 /* If the first character after the delimiter is backslash, make
1185     the pattern end with backslash. This is purely to provide a way
1186     of testing for the error message when a pattern ends with backslash. */
1187    
1188     if (pp[1] == '\\') *pp++ = '\\';
1189    
1190 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1191     for callouts. */
1192 nigel 3
1193     *pp++ = 0;
1194 nigel 75 strcpy((char *)pbuffer, (char *)p);
1195 nigel 3
1196     /* Look for options after final delimiter */
1197    
1198     options = 0;
1199     study_options = 0;
1200 nigel 31 log_store = showstore; /* default from command line */
1201    
1202 nigel 3 while (*pp != 0)
1203     {
1204     switch (*pp++)
1205     {
1206 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1207 nigel 35 case 'g': do_g = 1; break;
1208 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1209     case 'm': options |= PCRE_MULTILINE; break;
1210     case 's': options |= PCRE_DOTALL; break;
1211     case 'x': options |= PCRE_EXTENDED; break;
1212 nigel 25
1213 nigel 35 case '+': do_showrest = 1; break;
1214 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1215 nigel 93 case 'B': do_debug = 1; break;
1216 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1217 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1218 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1219 nigel 75 case 'F': do_flip = 1; break;
1220 nigel 35 case 'G': do_G = 1; break;
1221 nigel 25 case 'I': do_showinfo = 1; break;
1222 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1223 nigel 31 case 'M': log_store = 1; break;
1224 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1225 nigel 37
1226     #if !defined NOPOSIX
1227 nigel 3 case 'P': do_posix = 1; break;
1228 nigel 37 #endif
1229    
1230 nigel 3 case 'S': do_study = 1; break;
1231 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1232 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1233 ph10 126 case 'Z': debug_lengths = 0; break;
1234 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1235 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1236 nigel 25
1237     case 'L':
1238     ppp = pp;
1239 nigel 93 /* The '\r' test here is so that it works on Windows. */
1240     /* The '0' test is just in case this is an unterminated line. */
1241     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1242 nigel 25 *ppp = 0;
1243     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1244     {
1245     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1246     goto SKIP_DATA;
1247     }
1248 nigel 93 locale_set = 1;
1249 nigel 25 tables = pcre_maketables();
1250     pp = ppp;
1251     break;
1252    
1253 nigel 75 case '>':
1254     to_file = pp;
1255     while (*pp != 0) pp++;
1256     while (isspace(pp[-1])) pp--;
1257     *pp = 0;
1258     break;
1259    
1260 nigel 91 case '<':
1261     {
1262 ph10 336 if (strncmp((char *)pp, "JS>", 3) == 0)
1263     {
1264     options |= PCRE_JAVASCRIPT_COMPAT;
1265 ph10 345 pp += 3;
1266 ph10 336 }
1267     else
1268 ph10 345 {
1269 ph10 336 int x = check_newline(pp, outfile);
1270     if (x == 0) goto SKIP_DATA;
1271     options |= x;
1272     while (*pp++ != '>');
1273 ph10 345 }
1274 nigel 91 }
1275     break;
1276    
1277 nigel 77 case '\r': /* So that it works in Windows */
1278     case '\n':
1279     case ' ':
1280     break;
1281 nigel 75
1282 nigel 3 default:
1283     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1284     goto SKIP_DATA;
1285     }
1286     }
1287    
1288 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1289 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1290     local character tables. */
1291 nigel 3
1292 nigel 37 #if !defined NOPOSIX
1293 nigel 3 if (posix || do_posix)
1294     {
1295     int rc;
1296     int cflags = 0;
1297 nigel 75
1298 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1299     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1300 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1301 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1302     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1303    
1304 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1305    
1306     /* Compilation failed; go back for another re, skipping to blank line
1307     if non-interactive. */
1308    
1309     if (rc != 0)
1310     {
1311 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1312 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1313     goto SKIP_DATA;
1314     }
1315     }
1316    
1317     /* Handle compiling via the native interface */
1318    
1319     else
1320 nigel 37 #endif /* !defined NOPOSIX */
1321    
1322 nigel 3 {
1323 nigel 93 if (timeit > 0)
1324 nigel 3 {
1325     register int i;
1326     clock_t time_taken;
1327     clock_t start_time = clock();
1328 nigel 93 for (i = 0; i < timeit; i++)
1329 nigel 3 {
1330 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1331 nigel 3 if (re != NULL) free(re);
1332     }
1333     time_taken = clock() - start_time;
1334 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1335     (((double)time_taken * 1000.0) / (double)timeit) /
1336 nigel 63 (double)CLOCKS_PER_SEC);
1337 nigel 3 }
1338    
1339 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1340 nigel 3
1341     /* Compilation failed; go back for another re, skipping to blank line
1342     if non-interactive. */
1343    
1344     if (re == NULL)
1345     {
1346     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1347     SKIP_DATA:
1348     if (infile != stdin)
1349     {
1350     for (;;)
1351     {
1352 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1353 nigel 11 {
1354     done = 1;
1355     goto CONTINUE;
1356     }
1357 nigel 3 len = (int)strlen((char *)buffer);
1358     while (len > 0 && isspace(buffer[len-1])) len--;
1359     if (len == 0) break;
1360     }
1361     fprintf(outfile, "\n");
1362     }
1363 nigel 25 goto CONTINUE;
1364 nigel 3 }
1365    
1366 nigel 43 /* Compilation succeeded; print data if required. There are now two
1367     info-returning functions. The old one has a limited interface and
1368     returns only limited data. Check that it agrees with the newer one. */
1369 nigel 3
1370 nigel 63 if (log_store)
1371     fprintf(outfile, "Memory allocation (code space): %d\n",
1372     (int)(gotten_store -
1373     sizeof(real_pcre) -
1374     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1375    
1376 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1377     and remember the store that was got. */
1378    
1379     true_size = ((real_pcre *)re)->size;
1380     regex_gotten_store = gotten_store;
1381    
1382     /* If /S was present, study the regexp to generate additional info to
1383     help with the matching. */
1384    
1385     if (do_study)
1386     {
1387 nigel 93 if (timeit > 0)
1388 nigel 75 {
1389     register int i;
1390     clock_t time_taken;
1391     clock_t start_time = clock();
1392 nigel 93 for (i = 0; i < timeit; i++)
1393 nigel 75 extra = pcre_study(re, study_options, &error);
1394     time_taken = clock() - start_time;
1395     if (extra != NULL) free(extra);
1396 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1397     (((double)time_taken * 1000.0) / (double)timeit) /
1398 nigel 75 (double)CLOCKS_PER_SEC);
1399     }
1400     extra = pcre_study(re, study_options, &error);
1401     if (error != NULL)
1402     fprintf(outfile, "Failed to study: %s\n", error);
1403     else if (extra != NULL)
1404     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1405     }
1406    
1407     /* If the 'F' option was present, we flip the bytes of all the integer
1408     fields in the regex data block and the study block. This is to make it
1409     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1410     compiled on a different architecture. */
1411    
1412     if (do_flip)
1413     {
1414     real_pcre *rre = (real_pcre *)re;
1415 ph10 259 rre->magic_number =
1416 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1417 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1418     rre->options = byteflip(rre->options, sizeof(rre->options));
1419 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1420 ph10 259 rre->top_bracket =
1421 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1422 ph10 259 rre->top_backref =
1423 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1424 ph10 259 rre->first_byte =
1425 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1426 ph10 259 rre->req_byte =
1427 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1428     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1429 nigel 75 sizeof(rre->name_table_offset));
1430 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1431 nigel 75 sizeof(rre->name_entry_size));
1432 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1433 ph10 255 sizeof(rre->name_count));
1434 nigel 75
1435     if (extra != NULL)
1436     {
1437     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1438     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1439     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1440     }
1441     }
1442    
1443     /* Extract information from the compiled data if required */
1444    
1445     SHOW_INFO:
1446    
1447 nigel 93 if (do_debug)
1448     {
1449     fprintf(outfile, "------------------------------------------------------------------\n");
1450 ph10 116 pcre_printint(re, outfile, debug_lengths);
1451 nigel 93 }
1452    
1453 nigel 25 if (do_showinfo)
1454 nigel 3 {
1455 nigel 75 unsigned long int get_options, all_options;
1456 nigel 79 #if !defined NOINFOCHECK
1457 nigel 43 int old_first_char, old_options, old_count;
1458 nigel 79 #endif
1459 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1460 ph10 227 hascrorlf;
1461 nigel 63 int nameentrysize, namecount;
1462     const uschar *nametable;
1463 nigel 3
1464 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1465 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1466     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1467     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1468 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1469 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1470 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1471     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1472 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1473 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1474     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1475 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1476 nigel 43
1477 nigel 79 #if !defined NOINFOCHECK
1478 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1479 nigel 3 if (count < 0) fprintf(outfile,
1480 nigel 43 "Error %d from pcre_info()\n", count);
1481 nigel 3 else
1482     {
1483 nigel 43 if (old_count != count) fprintf(outfile,
1484     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1485     old_count);
1486 nigel 37
1487 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1488     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1489     first_char, old_first_char);
1490 nigel 37
1491 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1492     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1493     get_options, old_options);
1494 nigel 43 }
1495 nigel 79 #endif
1496 nigel 43
1497 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1498 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1499 nigel 77 (int)size, (int)regex_gotten_store);
1500 nigel 43
1501     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1502     if (backrefmax > 0)
1503     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1504 nigel 63
1505     if (namecount > 0)
1506     {
1507     fprintf(outfile, "Named capturing subpatterns:\n");
1508     while (namecount-- > 0)
1509     {
1510     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1511     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1512     GET2(nametable, 0));
1513     nametable += nameentrysize;
1514     }
1515     }
1516 ph10 172
1517 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1518 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1519 nigel 63
1520 nigel 75 all_options = ((real_pcre *)re)->options;
1521 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1522 nigel 75
1523 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1524 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1525 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1526     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1527     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1528     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1529 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1530 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1531 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1532     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1533 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1534     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1535     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1536 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1537 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1538 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1539     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1540 ph10 172
1541 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1542 nigel 43
1543 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1544 nigel 91 {
1545     case PCRE_NEWLINE_CR:
1546     fprintf(outfile, "Forced newline sequence: CR\n");
1547     break;
1548 nigel 43
1549 nigel 91 case PCRE_NEWLINE_LF:
1550     fprintf(outfile, "Forced newline sequence: LF\n");
1551     break;
1552    
1553     case PCRE_NEWLINE_CRLF:
1554     fprintf(outfile, "Forced newline sequence: CRLF\n");
1555     break;
1556    
1557 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1558     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1559     break;
1560    
1561 nigel 93 case PCRE_NEWLINE_ANY:
1562     fprintf(outfile, "Forced newline sequence: ANY\n");
1563     break;
1564    
1565 nigel 91 default:
1566     break;
1567     }
1568    
1569 nigel 43 if (first_char == -1)
1570     {
1571 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1572 nigel 43 }
1573     else if (first_char < 0)
1574     {
1575     fprintf(outfile, "No first char\n");
1576     }
1577     else
1578     {
1579 nigel 63 int ch = first_char & 255;
1580 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1581 nigel 63 "" : " (caseless)";
1582 nigel 93 if (PRINTHEX(ch))
1583 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1584 nigel 3 else
1585 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1586 nigel 43 }
1587 nigel 37
1588 nigel 43 if (need_char < 0)
1589     {
1590     fprintf(outfile, "No need char\n");
1591 nigel 3 }
1592 nigel 43 else
1593     {
1594 nigel 63 int ch = need_char & 255;
1595 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1596 nigel 63 "" : " (caseless)";
1597 nigel 93 if (PRINTHEX(ch))
1598 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1599 nigel 43 else
1600 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1601 nigel 43 }
1602 nigel 75
1603     /* Don't output study size; at present it is in any case a fixed
1604     value, but it varies, depending on the computer architecture, and
1605     so messes up the test suite. (And with the /F option, it might be
1606     flipped.) */
1607    
1608     if (do_study)
1609     {
1610     if (extra == NULL)
1611     fprintf(outfile, "Study returned NULL\n");
1612     else
1613     {
1614     uschar *start_bits = NULL;
1615     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1616    
1617     if (start_bits == NULL)
1618     fprintf(outfile, "No starting byte set\n");
1619     else
1620     {
1621     int i;
1622     int c = 24;
1623     fprintf(outfile, "Starting byte set: ");
1624     for (i = 0; i < 256; i++)
1625     {
1626     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1627     {
1628     if (c > 75)
1629     {
1630     fprintf(outfile, "\n ");
1631     c = 2;
1632     }
1633 nigel 93 if (PRINTHEX(i) && i != ' ')
1634 nigel 75 {
1635     fprintf(outfile, "%c ", i);
1636     c += 2;
1637     }
1638     else
1639     {
1640     fprintf(outfile, "\\x%02x ", i);
1641     c += 5;
1642     }
1643     }
1644     }
1645     fprintf(outfile, "\n");
1646     }
1647     }
1648     }
1649 nigel 3 }
1650    
1651 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1652     that is all. The first 8 bytes of the file are the regex length and then
1653     the study length, in big-endian order. */
1654 nigel 3
1655 nigel 75 if (to_file != NULL)
1656 nigel 3 {
1657 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1658     if (f == NULL)
1659 nigel 3 {
1660 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1661 nigel 3 }
1662 nigel 75 else
1663     {
1664     uschar sbuf[8];
1665 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1666     sbuf[1] = (uschar)((true_size >> 16) & 255);
1667     sbuf[2] = (uschar)((true_size >> 8) & 255);
1668     sbuf[3] = (uschar)((true_size) & 255);
1669 ph10 259
1670 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1671     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1672     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1673     sbuf[7] = (uschar)((true_study_size) & 255);
1674 nigel 3
1675 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1676     fwrite(re, 1, true_size, f) < true_size)
1677     {
1678     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1679     }
1680 nigel 3 else
1681     {
1682 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1683     if (extra != NULL)
1684 nigel 3 {
1685 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1686     true_study_size)
1687 nigel 3 {
1688 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1689     strerror(errno));
1690 nigel 3 }
1691 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1692 nigel 93
1693 nigel 3 }
1694     }
1695 nigel 75 fclose(f);
1696 nigel 3 }
1697 nigel 77
1698     new_free(re);
1699     if (extra != NULL) new_free(extra);
1700     if (tables != NULL) new_free((void *)tables);
1701 nigel 75 continue; /* With next regex */
1702 nigel 3 }
1703 nigel 75 } /* End of non-POSIX compile */
1704 nigel 3
1705     /* Read data lines and test them */
1706    
1707     for (;;)
1708     {
1709 nigel 87 uschar *q;
1710 ph10 147 uschar *bptr;
1711 nigel 57 int *use_offsets = offsets;
1712 nigel 53 int use_size_offsets = size_offsets;
1713 nigel 63 int callout_data = 0;
1714     int callout_data_set = 0;
1715 nigel 3 int count, c;
1716 nigel 29 int copystrings = 0;
1717 ph10 386 int find_match_limit = default_find_match_limit;
1718 nigel 29 int getstrings = 0;
1719     int getlist = 0;
1720 nigel 39 int gmatched = 0;
1721 nigel 35 int start_offset = 0;
1722 nigel 41 int g_notempty = 0;
1723 nigel 77 int use_dfa = 0;
1724 nigel 3
1725     options = 0;
1726    
1727 nigel 91 *copynames = 0;
1728     *getnames = 0;
1729    
1730     copynamesptr = copynames;
1731     getnamesptr = getnames;
1732    
1733 nigel 63 pcre_callout = callout;
1734     first_callout = 1;
1735     callout_extra = 0;
1736     callout_count = 0;
1737     callout_fail_count = 999999;
1738     callout_fail_id = -1;
1739 nigel 73 show_malloc = 0;
1740 nigel 63
1741 nigel 91 if (extra != NULL) extra->flags &=
1742     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1743    
1744     len = 0;
1745     for (;;)
1746 nigel 11 {
1747 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1748 nigel 91 {
1749     if (len > 0) break;
1750     done = 1;
1751     goto CONTINUE;
1752     }
1753     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1754     len = (int)strlen((char *)buffer);
1755     if (buffer[len-1] == '\n') break;
1756 nigel 11 }
1757 nigel 3
1758     while (len > 0 && isspace(buffer[len-1])) len--;
1759     buffer[len] = 0;
1760     if (len == 0) break;
1761    
1762     p = buffer;
1763     while (isspace(*p)) p++;
1764    
1765 ph10 147 bptr = q = dbuffer;
1766 nigel 3 while ((c = *p++) != 0)
1767     {
1768     int i = 0;
1769     int n = 0;
1770 nigel 63
1771 nigel 3 if (c == '\\') switch ((c = *p++))
1772     {
1773     case 'a': c = 7; break;
1774     case 'b': c = '\b'; break;
1775     case 'e': c = 27; break;
1776     case 'f': c = '\f'; break;
1777     case 'n': c = '\n'; break;
1778     case 'r': c = '\r'; break;
1779     case 't': c = '\t'; break;
1780     case 'v': c = '\v'; break;
1781    
1782     case '0': case '1': case '2': case '3':
1783     case '4': case '5': case '6': case '7':
1784     c -= '0';
1785     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1786     c = c * 8 + *p++ - '0';
1787 nigel 91
1788     #if !defined NOUTF8
1789     if (use_utf8 && c > 255)
1790     {
1791     unsigned char buff8[8];
1792     int ii, utn;
1793     utn = ord2utf8(c, buff8);
1794     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1795     c = buff8[ii]; /* Last byte */
1796     }
1797     #endif
1798 nigel 3 break;
1799    
1800     case 'x':
1801 nigel 49
1802     /* Handle \x{..} specially - new Perl thing for utf8 */
1803    
1804 nigel 79 #if !defined NOUTF8
1805 nigel 49 if (*p == '{')
1806     {
1807     unsigned char *pt = p;
1808     c = 0;
1809     while (isxdigit(*(++pt)))
1810     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1811     if (*pt == '}')
1812     {
1813 nigel 67 unsigned char buff8[8];
1814 nigel 49 int ii, utn;
1815 ph10 355 if (use_utf8)
1816 ph10 358 {
1817 ph10 355 utn = ord2utf8(c, buff8);
1818     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1819     c = buff8[ii]; /* Last byte */
1820     }
1821     else
1822     {
1823 ph10 358 if (c > 255)
1824 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1825     "UTF-8 mode is not enabled.\n"
1826     "** Truncation will probably give the wrong result.\n", c);
1827 ph10 358 }
1828 nigel 49 p = pt + 1;
1829     break;
1830     }
1831     /* Not correct form; fall through */
1832     }
1833 nigel 79 #endif
1834 nigel 49
1835     /* Ordinary \x */
1836    
1837 nigel 3 c = 0;
1838     while (i++ < 2 && isxdigit(*p))
1839     {
1840     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1841     p++;
1842     }
1843     break;
1844    
1845 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1846 nigel 3 p--;
1847     continue;
1848    
1849 nigel 75 case '>':
1850     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1851     continue;
1852    
1853 nigel 3 case 'A': /* Option setting */
1854     options |= PCRE_ANCHORED;
1855     continue;
1856    
1857     case 'B':
1858     options |= PCRE_NOTBOL;
1859     continue;
1860    
1861 nigel 29 case 'C':
1862 nigel 63 if (isdigit(*p)) /* Set copy string */
1863     {
1864     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1865     copystrings |= 1 << n;
1866     }
1867     else if (isalnum(*p))
1868     {
1869 nigel 91 uschar *npp = copynamesptr;
1870 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1871 nigel 91 *npp++ = 0;
1872 nigel 67 *npp = 0;
1873 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1874 nigel 63 if (n < 0)
1875 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1876     copynamesptr = npp;
1877 nigel 63 }
1878     else if (*p == '+')
1879     {
1880     callout_extra = 1;
1881     p++;
1882     }
1883     else if (*p == '-')
1884     {
1885     pcre_callout = NULL;
1886     p++;
1887     }
1888     else if (*p == '!')
1889     {
1890     callout_fail_id = 0;
1891     p++;
1892     while(isdigit(*p))
1893     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1894     callout_fail_count = 0;
1895     if (*p == '!')
1896     {
1897     p++;
1898     while(isdigit(*p))
1899     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1900     }
1901     }
1902     else if (*p == '*')
1903     {
1904     int sign = 1;
1905     callout_data = 0;
1906     if (*(++p) == '-') { sign = -1; p++; }
1907     while(isdigit(*p))
1908     callout_data = callout_data * 10 + *p++ - '0';
1909     callout_data *= sign;
1910     callout_data_set = 1;
1911     }
1912 nigel 29 continue;
1913    
1914 nigel 79 #if !defined NODFA
1915 nigel 77 case 'D':
1916 nigel 79 #if !defined NOPOSIX
1917 nigel 77 if (posix || do_posix)
1918     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1919     else
1920 nigel 79 #endif
1921 nigel 77 use_dfa = 1;
1922     continue;
1923    
1924     case 'F':
1925     options |= PCRE_DFA_SHORTEST;
1926     continue;
1927 nigel 79 #endif
1928 nigel 77
1929 nigel 29 case 'G':
1930 nigel 63 if (isdigit(*p))
1931     {
1932     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1933     getstrings |= 1 << n;
1934     }
1935     else if (isalnum(*p))
1936     {
1937 nigel 91 uschar *npp = getnamesptr;
1938 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1939 nigel 91 *npp++ = 0;
1940 nigel 67 *npp = 0;
1941 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1942 nigel 63 if (n < 0)
1943 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1944     getnamesptr = npp;
1945 nigel 63 }
1946 nigel 29 continue;
1947    
1948     case 'L':
1949     getlist = 1;
1950     continue;
1951    
1952 nigel 63 case 'M':
1953     find_match_limit = 1;
1954     continue;
1955    
1956 nigel 37 case 'N':
1957     options |= PCRE_NOTEMPTY;
1958     continue;
1959    
1960 nigel 3 case 'O':
1961     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1962 nigel 53 if (n > size_offsets_max)
1963     {
1964     size_offsets_max = n;
1965 nigel 57 free(offsets);
1966 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1967 nigel 53 if (offsets == NULL)
1968     {
1969     printf("** Failed to get %d bytes of memory for offsets vector\n",
1970 ph10 151 (int)(size_offsets_max * sizeof(int)));
1971 nigel 77 yield = 1;
1972     goto EXIT;
1973 nigel 53 }
1974     }
1975     use_size_offsets = n;
1976 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1977 nigel 3 continue;
1978    
1979 nigel 75 case 'P':
1980     options |= PCRE_PARTIAL;
1981     continue;
1982    
1983 nigel 91 case 'Q':
1984     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1985     if (extra == NULL)
1986     {
1987     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1988     extra->flags = 0;
1989     }
1990     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1991     extra->match_limit_recursion = n;
1992     continue;
1993    
1994     case 'q':
1995     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1996     if (extra == NULL)
1997     {
1998     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1999     extra->flags = 0;
2000     }
2001     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2002     extra->match_limit = n;
2003     continue;
2004    
2005 nigel 79 #if !defined NODFA
2006 nigel 77 case 'R':
2007     options |= PCRE_DFA_RESTART;
2008     continue;
2009 nigel 79 #endif
2010 nigel 77
2011 nigel 73 case 'S':
2012     show_malloc = 1;
2013     continue;
2014 ph10 392
2015 ph10 389 case 'Y':
2016     options |= PCRE_NO_START_OPTIMIZE;
2017 ph10 392 continue;
2018 nigel 73
2019 nigel 3 case 'Z':
2020     options |= PCRE_NOTEOL;
2021     continue;
2022 nigel 71
2023     case '?':
2024     options |= PCRE_NO_UTF8_CHECK;
2025     continue;
2026 nigel 91
2027     case '<':
2028     {
2029     int x = check_newline(p, outfile);
2030     if (x == 0) goto NEXT_DATA;
2031     options |= x;
2032     while (*p++ != '>');
2033     }
2034     continue;
2035 nigel 3 }
2036 nigel 9 *q++ = c;
2037 nigel 3 }
2038 nigel 9 *q = 0;
2039     len = q - dbuffer;
2040 ph10 371
2041 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2042 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2043 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2044 ph10 371
2045 ph10 363 #if !defined NOPOSIX
2046     if (posix || do_posix)
2047     {
2048     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2049 ph10 371 bptr += buffer_size - len - 1;
2050 ph10 363 }
2051 ph10 371 else
2052     #endif
2053 ph10 363 {
2054     memmove(bptr + buffer_size - len, bptr, len);
2055 ph10 371 bptr += buffer_size - len;
2056     }
2057 nigel 3
2058 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2059     {
2060     printf("**Match limit not relevant for DFA matching: ignored\n");
2061     find_match_limit = 0;
2062     }
2063    
2064 nigel 3 /* Handle matching via the POSIX interface, which does not
2065 nigel 63 support timing or playing with the match limit or callout data. */
2066 nigel 3
2067 nigel 37 #if !defined NOPOSIX
2068 nigel 3 if (posix || do_posix)
2069     {
2070     int rc;
2071     int eflags = 0;
2072 nigel 63 regmatch_t *pmatch = NULL;
2073     if (use_size_offsets > 0)
2074 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2075 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2076     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2077 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2078 nigel 3
2079 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2080 nigel 3
2081     if (rc != 0)
2082     {
2083 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2084 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2085     }
2086 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2087     != 0)
2088     {
2089     fprintf(outfile, "Matched with REG_NOSUB\n");
2090     }
2091 nigel 3 else
2092     {
2093 nigel 7 size_t i;
2094 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2095 nigel 3 {
2096     if (pmatch[i].rm_so >= 0)
2097     {
2098 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2099 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2100     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2101 nigel 3 fprintf(outfile, "\n");
2102 nigel 35 if (i == 0 && do_showrest)
2103     {
2104     fprintf(outfile, " 0+ ");
2105 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2106     outfile);
2107 nigel 35 fprintf(outfile, "\n");
2108     }
2109 nigel 3 }
2110     }
2111     }
2112 nigel 53 free(pmatch);
2113 nigel 3 }
2114    
2115 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2116 nigel 3
2117 nigel 37 else
2118     #endif /* !defined NOPOSIX */
2119    
2120 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2121 nigel 3 {
2122 nigel 93 if (timeitm > 0)
2123 nigel 3 {
2124     register int i;
2125     clock_t time_taken;
2126     clock_t start_time = clock();
2127 nigel 77
2128 nigel 79 #if !defined NODFA
2129 nigel 77 if (all_use_dfa || use_dfa)
2130     {
2131     int workspace[1000];
2132 nigel 93 for (i = 0; i < timeitm; i++)
2133 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2134     options | g_notempty, use_offsets, use_size_offsets, workspace,
2135     sizeof(workspace)/sizeof(int));
2136     }
2137     else
2138 nigel 79 #endif
2139 nigel 77
2140 nigel 93 for (i = 0; i < timeitm; i++)
2141 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2142 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2143 nigel 77
2144 nigel 3 time_taken = clock() - start_time;
2145 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2146     (((double)time_taken * 1000.0) / (double)timeitm) /
2147 nigel 63 (double)CLOCKS_PER_SEC);
2148 nigel 3 }
2149    
2150 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2151 nigel 87 varying limits in order to find the minimum value for the match limit and
2152     for the recursion limit. */
2153 nigel 63
2154     if (find_match_limit)
2155     {
2156     if (extra == NULL)
2157     {
2158 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2159 nigel 63 extra->flags = 0;
2160     }
2161    
2162 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2163 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2164     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2165     PCRE_ERROR_MATCHLIMIT, "match()");
2166 nigel 63
2167 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2168     options|g_notempty, use_offsets, use_size_offsets,
2169     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2170     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2171 nigel 63 }
2172    
2173     /* If callout_data is set, use the interface with additional data */
2174    
2175     else if (callout_data_set)
2176     {
2177     if (extra == NULL)
2178     {
2179 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2180 nigel 63 extra->flags = 0;
2181     }
2182     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2183 nigel 71 extra->callout_data = &callout_data;
2184 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2185     options | g_notempty, use_offsets, use_size_offsets);
2186     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2187     }
2188    
2189     /* The normal case is just to do the match once, with the default
2190     value of match_limit. */
2191    
2192 nigel 79 #if !defined NODFA
2193 nigel 77 else if (all_use_dfa || use_dfa)
2194     {
2195     int workspace[1000];
2196     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2197     options | g_notempty, use_offsets, use_size_offsets, workspace,
2198     sizeof(workspace)/sizeof(int));
2199     if (count == 0)
2200     {
2201     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2202     count = use_size_offsets/2;
2203     }
2204     }
2205 nigel 79 #endif
2206 nigel 77
2207 nigel 75 else
2208     {
2209     count = pcre_exec(re, extra, (char *)bptr, len,
2210     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2211 nigel 77 if (count == 0)
2212     {
2213     fprintf(outfile, "Matched, but too many substrings\n");
2214     count = use_size_offsets/3;
2215     }
2216 nigel 75 }
2217 nigel 3
2218 nigel 39 /* Matched */
2219    
2220 nigel 3 if (count >= 0)
2221     {
2222 nigel 93 int i, maxcount;
2223    
2224     #if !defined NODFA
2225     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2226     #endif
2227     maxcount = use_size_offsets/3;
2228    
2229     /* This is a check against a lunatic return value. */
2230    
2231     if (count > maxcount)
2232     {
2233     fprintf(outfile,
2234     "** PCRE error: returned count %d is too big for offset size %d\n",
2235     count, use_size_offsets);
2236     count = use_size_offsets/3;
2237     if (do_g || do_G)
2238     {
2239     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2240     do_g = do_G = FALSE; /* Break g/G loop */
2241     }
2242     }
2243    
2244 nigel 29 for (i = 0; i < count * 2; i += 2)
2245 nigel 3 {
2246 nigel 57 if (use_offsets[i] < 0)
2247 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2248     else
2249     {
2250     fprintf(outfile, "%2d: ", i/2);
2251 nigel 63 (void)pchars(bptr + use_offsets[i],
2252     use_offsets[i+1] - use_offsets[i], outfile);
2253 nigel 3 fprintf(outfile, "\n");
2254 nigel 35 if (i == 0)
2255     {
2256     if (do_showrest)
2257     {
2258     fprintf(outfile, " 0+ ");
2259 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2260     outfile);
2261 nigel 35 fprintf(outfile, "\n");
2262     }
2263     }
2264 nigel 3 }
2265     }
2266 nigel 29
2267     for (i = 0; i < 32; i++)
2268     {
2269     if ((copystrings & (1 << i)) != 0)
2270     {
2271 nigel 91 char copybuffer[256];
2272 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2273 nigel 37 i, copybuffer, sizeof(copybuffer));
2274 nigel 29 if (rc < 0)
2275     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2276     else
2277 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2278 nigel 29 }
2279     }
2280    
2281 nigel 91 for (copynamesptr = copynames;
2282     *copynamesptr != 0;
2283     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2284     {
2285     char copybuffer[256];
2286     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2287     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2288     if (rc < 0)
2289     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2290     else
2291     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2292     }
2293    
2294 nigel 29 for (i = 0; i < 32; i++)
2295     {
2296     if ((getstrings & (1 << i)) != 0)
2297     {
2298     const char *substring;
2299 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2300 nigel 29 i, &substring);
2301     if (rc < 0)
2302     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2303     else
2304     {
2305     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2306 nigel 49 pcre_free_substring(substring);
2307 nigel 29 }
2308     }
2309     }
2310    
2311 nigel 91 for (getnamesptr = getnames;
2312     *getnamesptr != 0;
2313     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2314     {
2315     const char *substring;
2316     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2317     count, (char *)getnamesptr, &substring);
2318     if (rc < 0)
2319     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2320     else
2321     {
2322     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2323     pcre_free_substring(substring);
2324     }
2325     }
2326    
2327 nigel 29 if (getlist)
2328     {
2329     const char **stringlist;
2330 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2331 nigel 29 &stringlist);
2332     if (rc < 0)
2333     fprintf(outfile, "get substring list failed %d\n", rc);
2334     else
2335     {
2336     for (i = 0; i < count; i++)
2337     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2338     if (stringlist[i] != NULL)
2339     fprintf(outfile, "string list not terminated by NULL\n");
2340 nigel 49 /* free((void *)stringlist); */
2341     pcre_free_substring_list(stringlist);
2342 nigel 29 }
2343     }
2344 nigel 39 }
2345 nigel 29
2346 nigel 75 /* There was a partial match */
2347    
2348     else if (count == PCRE_ERROR_PARTIAL)
2349     {
2350 nigel 77 fprintf(outfile, "Partial match");
2351 nigel 79 #if !defined NODFA
2352 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2353     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2354     bptr + use_offsets[0]);
2355 nigel 79 #endif
2356 nigel 77 fprintf(outfile, "\n");
2357 nigel 75 break; /* Out of the /g loop */
2358     }
2359    
2360 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2361 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2362     to advance the start offset, and continue. We won't be at the end of the
2363     string - that was checked before setting g_notempty.
2364 nigel 39
2365 ph10 150 Complication arises in the case when the newline option is "any" or
2366 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2367     CRLF, an advance of one character just passes the \r, whereas we should
2368     prefer the longer newline sequence, as does the code in pcre_exec().
2369     Fudge the offset value to achieve this.
2370 ph10 144
2371 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2372     character, not one byte. */
2373    
2374 nigel 3 else
2375     {
2376 nigel 41 if (g_notempty != 0)
2377 nigel 35 {
2378 nigel 73 int onechar = 1;
2379 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2380 nigel 57 use_offsets[0] = start_offset;
2381 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2382     {
2383     int d;
2384     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2385 ph10 391 /* Note that these values are always the ASCII ones, even in
2386     EBCDIC environments. CR = 13, NL = 10. */
2387     obits = (d == 13)? PCRE_NEWLINE_CR :
2388     (d == 10)? PCRE_NEWLINE_LF :
2389     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2390 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2391 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2392     }
2393 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2394 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2395 ph10 149 &&
2396 ph10 143 start_offset < len - 1 &&
2397     bptr[start_offset] == '\r' &&
2398     bptr[start_offset+1] == '\n')
2399 ph10 144 onechar++;
2400 ph10 143 else if (use_utf8)
2401 nigel 73 {
2402     while (start_offset + onechar < len)
2403     {
2404     int tb = bptr[start_offset+onechar];
2405     if (tb <= 127) break;
2406     tb &= 0xc0;
2407     if (tb != 0 && tb != 0xc0) onechar++;
2408     }
2409     }
2410     use_offsets[1] = start_offset + onechar;
2411 nigel 35 }
2412 nigel 41 else
2413     {
2414 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2415 nigel 41 {
2416 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2417 nigel 41 }
2418 nigel 73 else fprintf(outfile, "Error %d\n", count);
2419 nigel 41 break; /* Out of the /g loop */
2420     }
2421 nigel 3 }
2422 nigel 35
2423 nigel 39 /* If not /g or /G we are done */
2424    
2425     if (!do_g && !do_G) break;
2426    
2427 nigel 41 /* If we have matched an empty string, first check to see if we are at
2428     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2429     what Perl's /g options does. This turns out to be rather cunning. First
2430 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2431     same point. If this fails (picked up above) we advance to the next
2432 ph10 143 character. */
2433 ph10 142
2434 nigel 41 g_notempty = 0;
2435 ph10 142
2436 nigel 57 if (use_offsets[0] == use_offsets[1])
2437 nigel 41 {
2438 nigel 57 if (use_offsets[0] == len) break;
2439 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2440 nigel 41 }
2441 nigel 39
2442     /* For /g, update the start offset, leaving the rest alone */
2443    
2444 ph10 143 if (do_g) start_offset = use_offsets[1];
2445 nigel 39
2446     /* For /G, update the pointer and length */
2447    
2448     else
2449 nigel 35 {
2450 ph10 143 bptr += use_offsets[1];
2451     len -= use_offsets[1];
2452 nigel 35 }
2453 nigel 39 } /* End of loop for /g and /G */
2454 nigel 91
2455     NEXT_DATA: continue;
2456 nigel 39 } /* End of loop for data lines */
2457 nigel 3
2458 nigel 11 CONTINUE:
2459 nigel 37
2460     #if !defined NOPOSIX
2461 nigel 3 if (posix || do_posix) regfree(&preg);
2462 nigel 37 #endif
2463    
2464 nigel 77 if (re != NULL) new_free(re);
2465     if (extra != NULL) new_free(extra);
2466 nigel 25 if (tables != NULL)
2467     {
2468 nigel 77 new_free((void *)tables);
2469 nigel 25 setlocale(LC_CTYPE, "C");
2470 nigel 93 locale_set = 0;
2471 nigel 25 }
2472 nigel 3 }
2473    
2474 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2475 nigel 77
2476     EXIT:
2477    
2478     if (infile != NULL && infile != stdin) fclose(infile);
2479     if (outfile != NULL && outfile != stdout) fclose(outfile);
2480    
2481     free(buffer);
2482     free(dbuffer);
2483     free(pbuffer);
2484     free(offsets);
2485    
2486     return yield;
2487 nigel 3 }
2488    
2489 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12