/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 411 - (hide annotations) (download)
Fri Apr 10 15:40:21 2009 UTC (5 years, 7 months ago) by ph10
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 73274 byte(s)
Wrap fileno/isatty defs for Windows in #ifndefs

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 nigel 93 #else
83     #include <sys/time.h> /* These two includes are needed */
84     #include <sys/resource.h> /* for setrlimit(). */
85     #define INPUT_MODE "rb"
86     #define OUTPUT_MODE "wb"
87 nigel 91 #endif
88    
89 nigel 93
90 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
91     displaying the results of pcre_study() and we also need to know about the
92     internal macros, structures, and other internal data values; pcretest has
93     "inside information" compared to a program that strictly follows the PCRE API.
94 nigel 37
95 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97     appropriately for an application, not for building PCRE. */
98 nigel 77
99 ph10 145 #include "pcre.h"
100 nigel 77 #include "pcre_internal.h"
101    
102 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
103     to keep two copies, we include the source file here, changing the names of the
104     external symbols to prevent clashes. */
105 nigel 77
106 ph10 351 #define _pcre_ucp_gentype ucp_gentype
107 nigel 85 #define _pcre_utf8_table1 utf8_table1
108     #define _pcre_utf8_table1_size utf8_table1_size
109     #define _pcre_utf8_table2 utf8_table2
110     #define _pcre_utf8_table3 utf8_table3
111     #define _pcre_utf8_table4 utf8_table4
112     #define _pcre_utt utt
113     #define _pcre_utt_size utt_size
114 ph10 240 #define _pcre_utt_names utt_names
115 nigel 85 #define _pcre_OP_lengths OP_lengths
116    
117     #include "pcre_tables.c"
118    
119     /* We also need the pcre_printint() function for printing out compiled
120     patterns. This function is in a separate file so that it can be included in
121 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
122 nigel 85
123 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
124     output character as-is or as a hex value when showing compiled patterns, is
125     contained in this file. We uses it here also, in cases when the locale has not
126     been explicitly changed, so as to get consistent output from systems that
127     differ in their output from isprint() even in the "C" locale. */
128    
129 nigel 85 #include "pcre_printint.src"
130    
131 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
132 nigel 85
133 nigel 93
134 nigel 37 /* It is possible to compile this test program without including support for
135     testing the POSIX interface, though this is not available via the standard
136     Makefile. */
137    
138     #if !defined NOPOSIX
139 nigel 3 #include "pcreposix.h"
140 nigel 37 #endif
141 nigel 3
142 ph10 107 /* It is also possible, for the benefit of the version currently imported into
143     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
144     interface to the DFA matcher (NODFA), and without the doublecheck of the old
145     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
146     UTF8 support if PCRE is built without it. */
147 nigel 79
148 ph10 107 #ifndef SUPPORT_UTF8
149     #ifndef NOUTF8
150     #define NOUTF8
151     #endif
152     #endif
153 nigel 79
154 ph10 107
155 nigel 85 /* Other parameters */
156    
157 nigel 3 #ifndef CLOCKS_PER_SEC
158     #ifdef CLK_TCK
159     #define CLOCKS_PER_SEC CLK_TCK
160     #else
161     #define CLOCKS_PER_SEC 100
162     #endif
163     #endif
164    
165 nigel 93 /* This is the default loop count for timing. */
166    
167 nigel 75 #define LOOPREPEAT 500000
168 nigel 3
169 nigel 85 /* Static variables */
170    
171 nigel 3 static FILE *outfile;
172     static int log_store = 0;
173 nigel 63 static int callout_count;
174     static int callout_extra;
175     static int callout_fail_count;
176     static int callout_fail_id;
177 ph10 210 static int debug_lengths;
178 nigel 63 static int first_callout;
179 nigel 93 static int locale_set = 0;
180 nigel 73 static int show_malloc;
181 nigel 67 static int use_utf8;
182 nigel 43 static size_t gotten_store;
183 nigel 3
184 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
185    
186     static int buffer_size = 50000;
187     static uschar *buffer = NULL;
188     static uschar *dbuffer = NULL;
189 nigel 75 static uschar *pbuffer = NULL;
190 nigel 3
191 nigel 75
192 nigel 49
193     /*************************************************
194 nigel 91 * Read or extend an input line *
195     *************************************************/
196    
197     /* Input lines are read into buffer, but both patterns and data lines can be
198     continued over multiple input lines. In addition, if the buffer fills up, we
199     want to automatically expand it so as to be able to handle extremely large
200     lines that are needed for certain stress tests. When the input buffer is
201     expanded, the other two buffers must also be expanded likewise, and the
202     contents of pbuffer, which are a copy of the input for callouts, must be
203     preserved (for when expansion happens for a data line). This is not the most
204     optimal way of handling this, but hey, this is just a test program!
205    
206     Arguments:
207     f the file to read
208     start where in buffer to start (this *must* be within buffer)
209 ph10 287 prompt for stdin or readline()
210 nigel 91
211     Returns: pointer to the start of new data
212     could be a copy of start, or could be moved
213     NULL if no data read and EOF reached
214     */
215    
216     static uschar *
217 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
218 nigel 91 {
219     uschar *here = start;
220    
221     for (;;)
222     {
223     int rlen = buffer_size - (here - buffer);
224 nigel 93
225 nigel 91 if (rlen > 1000)
226     {
227     int dlen;
228 ph10 289
229 ph10 287 /* If libreadline support is required, use readline() to read a line if the
230     input is a terminal. Note that readline() removes the trailing newline, so
231     we must put it back again, to be compatible with fgets(). */
232 ph10 289
233 ph10 287 #ifdef SUPPORT_LIBREADLINE
234     if (isatty(fileno(f)))
235     {
236 ph10 289 size_t len;
237 ph10 287 char *s = readline(prompt);
238     if (s == NULL) return (here == start)? NULL : start;
239     len = strlen(s);
240 ph10 289 if (len > 0) add_history(s);
241 ph10 287 if (len > rlen - 1) len = rlen - 1;
242     memcpy(here, s, len);
243     here[len] = '\n';
244 ph10 289 here[len+1] = 0;
245     free(s);
246 ph10 287 }
247 ph10 289 else
248     #endif
249    
250 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
251 ph10 289
252 ph10 287 {
253 ph10 289 if (f == stdin) printf(prompt);
254 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
255     return (here == start)? NULL : start;
256 ph10 289 }
257    
258 nigel 91 dlen = (int)strlen((char *)here);
259     if (dlen > 0 && here[dlen - 1] == '\n') return start;
260     here += dlen;
261     }
262    
263     else
264     {
265     int new_buffer_size = 2*buffer_size;
266     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
267     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
268     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
269    
270     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
271     {
272     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
273     exit(1);
274     }
275    
276     memcpy(new_buffer, buffer, buffer_size);
277     memcpy(new_pbuffer, pbuffer, buffer_size);
278    
279     buffer_size = new_buffer_size;
280    
281     start = new_buffer + (start - buffer);
282     here = new_buffer + (here - buffer);
283    
284     free(buffer);
285     free(dbuffer);
286     free(pbuffer);
287    
288     buffer = new_buffer;
289     dbuffer = new_dbuffer;
290     pbuffer = new_pbuffer;
291     }
292     }
293    
294     return NULL; /* Control never gets here */
295     }
296    
297    
298    
299    
300    
301    
302    
303     /*************************************************
304 nigel 63 * Read number from string *
305     *************************************************/
306    
307     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
308     around with conditional compilation, just do the job by hand. It is only used
309 nigel 93 for unpicking arguments, so just keep it simple.
310 nigel 63
311     Arguments:
312     str string to be converted
313     endptr where to put the end pointer
314    
315     Returns: the unsigned long
316     */
317    
318     static int
319     get_value(unsigned char *str, unsigned char **endptr)
320     {
321     int result = 0;
322     while(*str != 0 && isspace(*str)) str++;
323     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
324     *endptr = str;
325     return(result);
326     }
327    
328    
329    
330 nigel 49
331     /*************************************************
332     * Convert UTF-8 string to value *
333     *************************************************/
334    
335     /* This function takes one or more bytes that represents a UTF-8 character,
336     and returns the value of the character.
337    
338     Argument:
339 nigel 91 utf8bytes a pointer to the byte vector
340     vptr a pointer to an int to receive the value
341 nigel 49
342 nigel 91 Returns: > 0 => the number of bytes consumed
343     -6 to 0 => malformed UTF-8 character at offset = (-return)
344 nigel 49 */
345    
346 nigel 79 #if !defined NOUTF8
347    
348 nigel 67 static int
349 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
350 nigel 49 {
351 nigel 91 int c = *utf8bytes++;
352 nigel 49 int d = c;
353     int i, j, s;
354    
355     for (i = -1; i < 6; i++) /* i is number of additional bytes */
356     {
357     if ((d & 0x80) == 0) break;
358     d <<= 1;
359     }
360    
361     if (i == -1) { *vptr = c; return 1; } /* ascii character */
362     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
363    
364     /* i now has a value in the range 1-5 */
365    
366 nigel 59 s = 6*i;
367 nigel 85 d = (c & utf8_table3[i]) << s;
368 nigel 49
369     for (j = 0; j < i; j++)
370     {
371 nigel 91 c = *utf8bytes++;
372 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
373 nigel 59 s -= 6;
374 nigel 49 d |= (c & 0x3f) << s;
375     }
376    
377     /* Check that encoding was the correct unique one */
378    
379 nigel 85 for (j = 0; j < utf8_table1_size; j++)
380     if (d <= utf8_table1[j]) break;
381 nigel 49 if (j != i) return -(i+1);
382    
383     /* Valid value */
384    
385     *vptr = d;
386     return i+1;
387     }
388    
389 nigel 79 #endif
390 nigel 49
391    
392 nigel 79
393 nigel 63 /*************************************************
394 nigel 85 * Convert character value to UTF-8 *
395     *************************************************/
396    
397     /* This function takes an integer value in the range 0 - 0x7fffffff
398     and encodes it as a UTF-8 character in 0 to 6 bytes.
399    
400     Arguments:
401     cvalue the character value
402 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
403 nigel 85
404     Returns: number of characters placed in the buffer
405     */
406    
407 nigel 93 #if !defined NOUTF8
408    
409 nigel 85 static int
410 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
411 nigel 85 {
412     register int i, j;
413     for (i = 0; i < utf8_table1_size; i++)
414     if (cvalue <= utf8_table1[i]) break;
415 nigel 91 utf8bytes += i;
416 nigel 85 for (j = i; j > 0; j--)
417     {
418 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
419 nigel 85 cvalue >>= 6;
420     }
421 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
422 nigel 85 return i + 1;
423     }
424    
425 nigel 93 #endif
426 nigel 85
427    
428 nigel 93
429 nigel 85 /*************************************************
430 nigel 63 * Print character string *
431     *************************************************/
432 nigel 49
433 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
434     mode. Yields number of characters printed. If handed a NULL file, just counts
435     chars without printing. */
436 nigel 49
437 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
438 nigel 3 {
439 nigel 85 int c = 0;
440 nigel 63 int yield = 0;
441 nigel 3
442 nigel 63 while (length-- > 0)
443 nigel 3 {
444 nigel 79 #if !defined NOUTF8
445 nigel 67 if (use_utf8)
446 nigel 63 {
447     int rc = utf82ord(p, &c);
448 nigel 3
449 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
450     {
451     length -= rc - 1;
452     p += rc;
453 nigel 93 if (PRINTHEX(c))
454 nigel 63 {
455     if (f != NULL) fprintf(f, "%c", c);
456     yield++;
457     }
458     else
459     {
460 nigel 93 int n = 4;
461     if (f != NULL) fprintf(f, "\\x{%02x}", c);
462     yield += (n <= 0x000000ff)? 2 :
463     (n <= 0x00000fff)? 3 :
464     (n <= 0x0000ffff)? 4 :
465     (n <= 0x000fffff)? 5 : 6;
466 nigel 63 }
467     continue;
468     }
469     }
470 nigel 79 #endif
471 nigel 3
472 nigel 63 /* Not UTF-8, or malformed UTF-8 */
473    
474 nigel 93 c = *p++;
475     if (PRINTHEX(c))
476 nigel 3 {
477 nigel 63 if (f != NULL) fprintf(f, "%c", c);
478     yield++;
479 nigel 3 }
480 nigel 63 else
481 nigel 3 {
482 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
483     yield += 4;
484     }
485     }
486 nigel 3
487 nigel 63 return yield;
488     }
489 nigel 23
490 nigel 3
491 nigel 23
492 nigel 63 /*************************************************
493     * Callout function *
494     *************************************************/
495 nigel 3
496 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
497     the match. Yield zero unless more callouts than the fail count, or the callout
498     data is not zero. */
499 nigel 3
500 nigel 63 static int callout(pcre_callout_block *cb)
501     {
502     FILE *f = (first_callout | callout_extra)? outfile : NULL;
503 nigel 75 int i, pre_start, post_start, subject_length;
504 nigel 3
505 nigel 63 if (callout_extra)
506     {
507     fprintf(f, "Callout %d: last capture = %d\n",
508     cb->callout_number, cb->capture_last);
509 nigel 3
510 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
511     {
512     if (cb->offset_vector[i] < 0)
513     fprintf(f, "%2d: <unset>\n", i/2);
514     else
515     {
516     fprintf(f, "%2d: ", i/2);
517     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
518     cb->offset_vector[i+1] - cb->offset_vector[i], f);
519     fprintf(f, "\n");
520     }
521     }
522     }
523 nigel 3
524 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
525     datails. On subsequent calls in the same match, we use pchars just to find the
526     printed lengths of the substrings. */
527 nigel 3
528 nigel 63 if (f != NULL) fprintf(f, "--->");
529 nigel 3
530 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
531     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
532     cb->current_position - cb->start_match, f);
533 nigel 3
534 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
535    
536 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
537     cb->subject_length - cb->current_position, f);
538 nigel 3
539 nigel 63 if (f != NULL) fprintf(f, "\n");
540 nigel 9
541 nigel 63 /* Always print appropriate indicators, with callout number if not already
542 nigel 75 shown. For automatic callouts, show the pattern offset. */
543 nigel 3
544 nigel 75 if (cb->callout_number == 255)
545     {
546     fprintf(outfile, "%+3d ", cb->pattern_position);
547     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
548     }
549     else
550     {
551     if (callout_extra) fprintf(outfile, " ");
552     else fprintf(outfile, "%3d ", cb->callout_number);
553     }
554 nigel 3
555 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
556     fprintf(outfile, "^");
557 nigel 3
558 nigel 63 if (post_start > 0)
559     {
560     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
561     fprintf(outfile, "^");
562 nigel 3 }
563    
564 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
565     fprintf(outfile, " ");
566    
567     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
568     pbuffer + cb->pattern_position);
569    
570 nigel 63 fprintf(outfile, "\n");
571     first_callout = 0;
572 nigel 3
573 nigel 71 if (cb->callout_data != NULL)
574 nigel 49 {
575 nigel 71 int callout_data = *((int *)(cb->callout_data));
576     if (callout_data != 0)
577     {
578     fprintf(outfile, "Callout data = %d\n", callout_data);
579     return callout_data;
580     }
581 nigel 63 }
582 nigel 49
583 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
584     (++callout_count >= callout_fail_count)? 1 : 0;
585 nigel 3 }
586    
587    
588 nigel 63 /*************************************************
589 nigel 73 * Local malloc functions *
590 nigel 63 *************************************************/
591 nigel 3
592     /* Alternative malloc function, to test functionality and show the size of the
593     compiled re. */
594    
595     static void *new_malloc(size_t size)
596     {
597 nigel 73 void *block = malloc(size);
598 nigel 43 gotten_store = size;
599 nigel 73 if (show_malloc)
600 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
601 nigel 73 return block;
602 nigel 3 }
603    
604 nigel 73 static void new_free(void *block)
605     {
606     if (show_malloc)
607     fprintf(outfile, "free %p\n", block);
608     free(block);
609     }
610 nigel 3
611    
612 nigel 73 /* For recursion malloc/free, to test stacking calls */
613    
614     static void *stack_malloc(size_t size)
615     {
616     void *block = malloc(size);
617     if (show_malloc)
618 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
619 nigel 73 return block;
620     }
621    
622     static void stack_free(void *block)
623     {
624     if (show_malloc)
625     fprintf(outfile, "stack_free %p\n", block);
626     free(block);
627     }
628    
629    
630 nigel 63 /*************************************************
631     * Call pcre_fullinfo() *
632     *************************************************/
633 nigel 43
634     /* Get one piece of information from the pcre_fullinfo() function */
635    
636     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
637     {
638     int rc;
639     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
640     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
641     }
642    
643    
644    
645 nigel 63 /*************************************************
646 nigel 75 * Byte flipping function *
647     *************************************************/
648    
649 nigel 91 static unsigned long int
650     byteflip(unsigned long int value, int n)
651 nigel 75 {
652     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
653     return ((value & 0x000000ff) << 24) |
654     ((value & 0x0000ff00) << 8) |
655     ((value & 0x00ff0000) >> 8) |
656     ((value & 0xff000000) >> 24);
657     }
658    
659    
660    
661    
662     /*************************************************
663 nigel 87 * Check match or recursion limit *
664     *************************************************/
665    
666     static int
667     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
668     int start_offset, int options, int *use_offsets, int use_size_offsets,
669     int flag, unsigned long int *limit, int errnumber, const char *msg)
670     {
671     int count;
672     int min = 0;
673     int mid = 64;
674     int max = -1;
675    
676     extra->flags |= flag;
677    
678     for (;;)
679     {
680     *limit = mid;
681    
682     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
683     use_offsets, use_size_offsets);
684    
685     if (count == errnumber)
686     {
687     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
688     min = mid;
689     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
690     }
691    
692     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
693     count == PCRE_ERROR_PARTIAL)
694     {
695     if (mid == min + 1)
696     {
697     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
698     break;
699     }
700     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
701     max = mid;
702     mid = (min + mid)/2;
703     }
704     else break; /* Some other error */
705     }
706    
707     extra->flags &= ~flag;
708     return count;
709     }
710    
711    
712    
713     /*************************************************
714 ph10 227 * Case-independent strncmp() function *
715     *************************************************/
716    
717     /*
718     Arguments:
719     s first string
720     t second string
721     n number of characters to compare
722    
723     Returns: < 0, = 0, or > 0, according to the comparison
724     */
725    
726     static int
727     strncmpic(uschar *s, uschar *t, int n)
728     {
729     while (n--)
730     {
731     int c = tolower(*s++) - tolower(*t++);
732     if (c) return c;
733     }
734     return 0;
735     }
736    
737    
738    
739     /*************************************************
740 nigel 91 * Check newline indicator *
741     *************************************************/
742    
743     /* This is used both at compile and run-time to check for <xxx> escapes, where
744 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
745     no match.
746 nigel 91
747     Arguments:
748     p points after the leading '<'
749     f file for error message
750    
751     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752     */
753    
754     static int
755     check_newline(uschar *p, FILE *f)
756     {
757 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
765     return 0;
766     }
767    
768    
769    
770     /*************************************************
771 nigel 93 * Usage function *
772     *************************************************/
773    
774     static void
775     usage(void)
776     {
777 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778     printf("Input and output default to stdin and stdout.\n");
779     #ifdef SUPPORT_LIBREADLINE
780     printf("If input is a terminal, readline() is used to read from it.\n");
781     #else
782     printf("This version of pcretest is not linked with readline().\n");
783     #endif
784     printf("\nOptions:\n");
785 nigel 93 printf(" -b show compiled code (bytecode)\n");
786     printf(" -C show PCRE compile-time options and exit\n");
787     printf(" -d debug: show compiled code and information (-b and -i)\n");
788     #if !defined NODFA
789     printf(" -dfa force DFA matching for all subjects\n");
790     #endif
791     printf(" -help show usage information\n");
792     printf(" -i show information about compiled patterns\n"
793 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
794 nigel 93 " -m output memory used information\n"
795     " -o <n> set size of offsets vector to <n>\n");
796     #if !defined NOPOSIX
797     printf(" -p use POSIX interface\n");
798     #endif
799     printf(" -q quiet: do not output PCRE version number at start\n");
800     printf(" -S <n> set stack size to <n> megabytes\n");
801     printf(" -s output store (memory) used information\n"
802     " -t time compilation and execution\n");
803     printf(" -t <n> time compilation and execution, repeating <n> times\n");
804     printf(" -tm time execution (matching) only\n");
805     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806     }
807    
808    
809    
810     /*************************************************
811 nigel 63 * Main Program *
812     *************************************************/
813 nigel 43
814 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
815     consist of a regular expression, in delimiters and optionally followed by
816     options, followed by a set of test data, terminated by an empty line. */
817    
818     int main(int argc, char **argv)
819     {
820     FILE *infile = stdin;
821     int options = 0;
822     int study_options = 0;
823 ph10 386 int default_find_match_limit = FALSE;
824 nigel 3 int op = 1;
825     int timeit = 0;
826 nigel 93 int timeitm = 0;
827 nigel 3 int showinfo = 0;
828 nigel 31 int showstore = 0;
829 nigel 87 int quiet = 0;
830 nigel 53 int size_offsets = 45;
831     int size_offsets_max;
832 nigel 77 int *offsets = NULL;
833 nigel 53 #if !defined NOPOSIX
834 nigel 3 int posix = 0;
835 nigel 53 #endif
836 nigel 3 int debug = 0;
837 nigel 11 int done = 0;
838 nigel 77 int all_use_dfa = 0;
839     int yield = 0;
840 nigel 91 int stack_size;
841 nigel 3
842 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
843     that 1024 is plenty long enough for the few names we'll be testing. */
844 nigel 69
845 nigel 91 uschar copynames[1024];
846     uschar getnames[1024];
847    
848     uschar *copynamesptr;
849     uschar *getnamesptr;
850    
851 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
853 nigel 69
854 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
855     dbuffer = (unsigned char *)malloc(buffer_size);
856     pbuffer = (unsigned char *)malloc(buffer_size);
857 nigel 69
858 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
859 nigel 3
860 nigel 93 outfile = stdout;
861    
862     /* The following _setmode() stuff is some Windows magic that tells its runtime
863     library to translate CRLF into a single LF character. At least, that's what
864     I've been told: never having used Windows I take this all on trust. Originally
865     it set 0x8000, but then I was advised that _O_BINARY was better. */
866    
867 nigel 75 #if defined(_WIN32) || defined(WIN32)
868 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
869     #endif
870 nigel 75
871 nigel 3 /* Scan options */
872    
873     while (argc > 1 && argv[op][0] == '-')
874     {
875 nigel 63 unsigned char *endptr;
876 nigel 53
877 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878     showstore = 1;
879 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 nigel 79 #if !defined NODFA
885 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 nigel 79 #endif
887 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889     *endptr == 0))
890 nigel 53 {
891     op++;
892     argc--;
893     }
894 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895     {
896     int both = argv[op][2] == 0;
897     int temp;
898     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899     *endptr == 0))
900     {
901     timeitm = temp;
902     op++;
903     argc--;
904     }
905     else timeitm = LOOPREPEAT;
906     if (both) timeit = timeitm;
907     }
908 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910     *endptr == 0))
911     {
912 nigel 93 #if defined(_WIN32) || defined(WIN32)
913 nigel 91 printf("PCRE: -S not supported on this OS\n");
914     exit(1);
915     #else
916     int rc;
917     struct rlimit rlim;
918     getrlimit(RLIMIT_STACK, &rlim);
919     rlim.rlim_cur = stack_size * 1024 * 1024;
920     rc = setrlimit(RLIMIT_STACK, &rlim);
921     if (rc != 0)
922     {
923     printf("PCRE: setrlimit() failed with error %d\n", rc);
924     exit(1);
925     }
926     op++;
927     argc--;
928     #endif
929     }
930 nigel 53 #if !defined NOPOSIX
931 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 nigel 53 #endif
933 nigel 63 else if (strcmp(argv[op], "-C") == 0)
934     {
935     int rc;
936 ph10 392 unsigned long int lrc;
937 nigel 63 printf("PCRE version %s\n", pcre_version());
938     printf("Compiled with\n");
939     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940     printf(" %sUTF-8 support\n", rc? "" : "No ");
941 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942     printf(" %sUnicode properties support\n", rc? "" : "No ");
943 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 ph10 391 /* Note that these values are always the ASCII values, even
945 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
946 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 ph10 150 (rc == -2)? "ANYCRLF" :
949 nigel 93 (rc == -1)? "ANY" : "???");
950 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952     "all Unicode newlines");
953 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954     printf(" Internal link size = %d\n", rc);
955     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956     printf(" POSIX malloc threshold = %d\n", rc);
957 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958     printf(" Default match limit = %ld\n", lrc);
959     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960     printf(" Default recursion depth limit = %ld\n", lrc);
961 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 ph10 121 goto EXIT;
964 nigel 63 }
965 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
966     strcmp(argv[op], "--help") == 0)
967     {
968     usage();
969     goto EXIT;
970     }
971 nigel 3 else
972     {
973 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
974 nigel 93 usage();
975 nigel 77 yield = 1;
976     goto EXIT;
977 nigel 3 }
978     op++;
979     argc--;
980     }
981    
982 nigel 53 /* Get the store for the offsets vector, and remember what it was */
983    
984     size_offsets_max = size_offsets;
985 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 nigel 53 if (offsets == NULL)
987     {
988     printf("** Failed to get %d bytes of memory for offsets vector\n",
989 ph10 151 (int)(size_offsets_max * sizeof(int)));
990 nigel 77 yield = 1;
991     goto EXIT;
992 nigel 53 }
993    
994 nigel 3 /* Sort out the input and output files */
995    
996     if (argc > 1)
997     {
998 nigel 93 infile = fopen(argv[op], INPUT_MODE);
999 nigel 3 if (infile == NULL)
1000     {
1001     printf("** Failed to open %s\n", argv[op]);
1002 nigel 77 yield = 1;
1003     goto EXIT;
1004 nigel 3 }
1005     }
1006    
1007     if (argc > 2)
1008     {
1009 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 nigel 3 if (outfile == NULL)
1011     {
1012     printf("** Failed to open %s\n", argv[op+1]);
1013 nigel 77 yield = 1;
1014     goto EXIT;
1015 nigel 3 }
1016     }
1017    
1018     /* Set alternative malloc function */
1019    
1020     pcre_malloc = new_malloc;
1021 nigel 73 pcre_free = new_free;
1022     pcre_stack_malloc = stack_malloc;
1023     pcre_stack_free = stack_free;
1024 nigel 3
1025 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1026 nigel 3
1027 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028 nigel 3
1029     /* Main loop */
1030    
1031 nigel 11 while (!done)
1032 nigel 3 {
1033     pcre *re = NULL;
1034     pcre_extra *extra = NULL;
1035 nigel 37
1036     #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 nigel 3 regex_t preg;
1038 nigel 45 int do_posix = 0;
1039 nigel 37 #endif
1040    
1041 nigel 7 const char *error;
1042 nigel 25 unsigned char *p, *pp, *ppp;
1043 nigel 75 unsigned char *to_file = NULL;
1044 nigel 53 const unsigned char *tables = NULL;
1045 nigel 75 unsigned long int true_size, true_study_size = 0;
1046     size_t size, regex_gotten_store;
1047 nigel 3 int do_study = 0;
1048 nigel 25 int do_debug = debug;
1049 nigel 35 int do_G = 0;
1050     int do_g = 0;
1051 nigel 25 int do_showinfo = showinfo;
1052 nigel 35 int do_showrest = 0;
1053 nigel 75 int do_flip = 0;
1054 nigel 93 int erroroffset, len, delimiter, poffset;
1055 nigel 3
1056 nigel 67 use_utf8 = 0;
1057 ph10 211 debug_lengths = 1;
1058 nigel 63
1059 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1060 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1061 nigel 63 fflush(outfile);
1062 nigel 3
1063     p = buffer;
1064     while (isspace(*p)) p++;
1065     if (*p == 0) continue;
1066    
1067 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1068 nigel 3
1069 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1070     {
1071 nigel 91 unsigned long int magic, get_options;
1072 nigel 75 uschar sbuf[8];
1073     FILE *f;
1074    
1075     p++;
1076     pp = p + (int)strlen((char *)p);
1077     while (isspace(pp[-1])) pp--;
1078     *pp = 0;
1079    
1080     f = fopen((char *)p, "rb");
1081     if (f == NULL)
1082     {
1083     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1084     continue;
1085     }
1086    
1087     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1088    
1089     true_size =
1090     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1091     true_study_size =
1092     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1093    
1094     re = (real_pcre *)new_malloc(true_size);
1095     regex_gotten_store = gotten_store;
1096    
1097     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1098    
1099     magic = ((real_pcre *)re)->magic_number;
1100     if (magic != MAGIC_NUMBER)
1101     {
1102     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1103     {
1104     do_flip = 1;
1105     }
1106     else
1107     {
1108     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1109     fclose(f);
1110     continue;
1111     }
1112     }
1113    
1114     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1115     do_flip? " (byte-inverted)" : "", p);
1116    
1117     /* Need to know if UTF-8 for printing data strings */
1118    
1119 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1120     use_utf8 = (get_options & PCRE_UTF8) != 0;
1121 nigel 75
1122     /* Now see if there is any following study data */
1123    
1124     if (true_study_size != 0)
1125     {
1126     pcre_study_data *psd;
1127    
1128     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1129     extra->flags = PCRE_EXTRA_STUDY_DATA;
1130    
1131     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1132     extra->study_data = psd;
1133    
1134     if (fread(psd, 1, true_study_size, f) != true_study_size)
1135     {
1136     FAIL_READ:
1137     fprintf(outfile, "Failed to read data from %s\n", p);
1138     if (extra != NULL) new_free(extra);
1139     if (re != NULL) new_free(re);
1140     fclose(f);
1141     continue;
1142     }
1143     fprintf(outfile, "Study data loaded from %s\n", p);
1144     do_study = 1; /* To get the data output if requested */
1145     }
1146     else fprintf(outfile, "No study data\n");
1147    
1148     fclose(f);
1149     goto SHOW_INFO;
1150     }
1151    
1152     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1153     the pattern; if is isn't complete, read more. */
1154    
1155 nigel 3 delimiter = *p++;
1156    
1157 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1158 nigel 3 {
1159 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1160 nigel 3 goto SKIP_DATA;
1161     }
1162    
1163     pp = p;
1164 nigel 93 poffset = p - buffer;
1165 nigel 3
1166     for(;;)
1167     {
1168 nigel 29 while (*pp != 0)
1169     {
1170     if (*pp == '\\' && pp[1] != 0) pp++;
1171     else if (*pp == delimiter) break;
1172     pp++;
1173     }
1174 nigel 3 if (*pp != 0) break;
1175 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1176 nigel 3 {
1177     fprintf(outfile, "** Unexpected EOF\n");
1178 nigel 11 done = 1;
1179     goto CONTINUE;
1180 nigel 3 }
1181 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1182 nigel 3 }
1183    
1184 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1185     pointer to the correct relative point in the buffer. */
1186    
1187     p = buffer + poffset;
1188    
1189 nigel 29 /* If the first character after the delimiter is backslash, make
1190     the pattern end with backslash. This is purely to provide a way
1191     of testing for the error message when a pattern ends with backslash. */
1192    
1193     if (pp[1] == '\\') *pp++ = '\\';
1194    
1195 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1196     for callouts. */
1197 nigel 3
1198     *pp++ = 0;
1199 nigel 75 strcpy((char *)pbuffer, (char *)p);
1200 nigel 3
1201     /* Look for options after final delimiter */
1202    
1203     options = 0;
1204     study_options = 0;
1205 nigel 31 log_store = showstore; /* default from command line */
1206    
1207 nigel 3 while (*pp != 0)
1208     {
1209     switch (*pp++)
1210     {
1211 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1212 nigel 35 case 'g': do_g = 1; break;
1213 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1214     case 'm': options |= PCRE_MULTILINE; break;
1215     case 's': options |= PCRE_DOTALL; break;
1216     case 'x': options |= PCRE_EXTENDED; break;
1217 nigel 25
1218 nigel 35 case '+': do_showrest = 1; break;
1219 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1220 nigel 93 case 'B': do_debug = 1; break;
1221 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1222 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1223 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1224 nigel 75 case 'F': do_flip = 1; break;
1225 nigel 35 case 'G': do_G = 1; break;
1226 nigel 25 case 'I': do_showinfo = 1; break;
1227 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1228 nigel 31 case 'M': log_store = 1; break;
1229 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1230 nigel 37
1231     #if !defined NOPOSIX
1232 nigel 3 case 'P': do_posix = 1; break;
1233 nigel 37 #endif
1234    
1235 nigel 3 case 'S': do_study = 1; break;
1236 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1237 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1238 ph10 126 case 'Z': debug_lengths = 0; break;
1239 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1240 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1241 nigel 25
1242     case 'L':
1243     ppp = pp;
1244 nigel 93 /* The '\r' test here is so that it works on Windows. */
1245     /* The '0' test is just in case this is an unterminated line. */
1246     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1247 nigel 25 *ppp = 0;
1248     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1249     {
1250     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1251     goto SKIP_DATA;
1252     }
1253 nigel 93 locale_set = 1;
1254 nigel 25 tables = pcre_maketables();
1255     pp = ppp;
1256     break;
1257    
1258 nigel 75 case '>':
1259     to_file = pp;
1260     while (*pp != 0) pp++;
1261     while (isspace(pp[-1])) pp--;
1262     *pp = 0;
1263     break;
1264    
1265 nigel 91 case '<':
1266     {
1267 ph10 336 if (strncmp((char *)pp, "JS>", 3) == 0)
1268     {
1269     options |= PCRE_JAVASCRIPT_COMPAT;
1270 ph10 345 pp += 3;
1271 ph10 336 }
1272     else
1273 ph10 345 {
1274 ph10 336 int x = check_newline(pp, outfile);
1275     if (x == 0) goto SKIP_DATA;
1276     options |= x;
1277     while (*pp++ != '>');
1278 ph10 345 }
1279 nigel 91 }
1280     break;
1281    
1282 nigel 77 case '\r': /* So that it works in Windows */
1283     case '\n':
1284     case ' ':
1285     break;
1286 nigel 75
1287 nigel 3 default:
1288     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1289     goto SKIP_DATA;
1290     }
1291     }
1292    
1293 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1294 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1295     local character tables. */
1296 nigel 3
1297 nigel 37 #if !defined NOPOSIX
1298 nigel 3 if (posix || do_posix)
1299     {
1300     int rc;
1301     int cflags = 0;
1302 nigel 75
1303 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1304     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1305 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1306 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1307     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1308    
1309 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1310    
1311     /* Compilation failed; go back for another re, skipping to blank line
1312     if non-interactive. */
1313    
1314     if (rc != 0)
1315     {
1316 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1317 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1318     goto SKIP_DATA;
1319     }
1320     }
1321    
1322     /* Handle compiling via the native interface */
1323    
1324     else
1325 nigel 37 #endif /* !defined NOPOSIX */
1326    
1327 nigel 3 {
1328 nigel 93 if (timeit > 0)
1329 nigel 3 {
1330     register int i;
1331     clock_t time_taken;
1332     clock_t start_time = clock();
1333 nigel 93 for (i = 0; i < timeit; i++)
1334 nigel 3 {
1335 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1336 nigel 3 if (re != NULL) free(re);
1337     }
1338     time_taken = clock() - start_time;
1339 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1340     (((double)time_taken * 1000.0) / (double)timeit) /
1341 nigel 63 (double)CLOCKS_PER_SEC);
1342 nigel 3 }
1343    
1344 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1345 nigel 3
1346     /* Compilation failed; go back for another re, skipping to blank line
1347     if non-interactive. */
1348    
1349     if (re == NULL)
1350     {
1351     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1352     SKIP_DATA:
1353     if (infile != stdin)
1354     {
1355     for (;;)
1356     {
1357 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1358 nigel 11 {
1359     done = 1;
1360     goto CONTINUE;
1361     }
1362 nigel 3 len = (int)strlen((char *)buffer);
1363     while (len > 0 && isspace(buffer[len-1])) len--;
1364     if (len == 0) break;
1365     }
1366     fprintf(outfile, "\n");
1367     }
1368 nigel 25 goto CONTINUE;
1369 nigel 3 }
1370    
1371 nigel 43 /* Compilation succeeded; print data if required. There are now two
1372     info-returning functions. The old one has a limited interface and
1373     returns only limited data. Check that it agrees with the newer one. */
1374 nigel 3
1375 nigel 63 if (log_store)
1376     fprintf(outfile, "Memory allocation (code space): %d\n",
1377     (int)(gotten_store -
1378     sizeof(real_pcre) -
1379     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1380    
1381 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1382     and remember the store that was got. */
1383    
1384     true_size = ((real_pcre *)re)->size;
1385     regex_gotten_store = gotten_store;
1386    
1387     /* If /S was present, study the regexp to generate additional info to
1388     help with the matching. */
1389    
1390     if (do_study)
1391     {
1392 nigel 93 if (timeit > 0)
1393 nigel 75 {
1394     register int i;
1395     clock_t time_taken;
1396     clock_t start_time = clock();
1397 nigel 93 for (i = 0; i < timeit; i++)
1398 nigel 75 extra = pcre_study(re, study_options, &error);
1399     time_taken = clock() - start_time;
1400     if (extra != NULL) free(extra);
1401 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1402     (((double)time_taken * 1000.0) / (double)timeit) /
1403 nigel 75 (double)CLOCKS_PER_SEC);
1404     }
1405     extra = pcre_study(re, study_options, &error);
1406     if (error != NULL)
1407     fprintf(outfile, "Failed to study: %s\n", error);
1408     else if (extra != NULL)
1409     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1410     }
1411    
1412     /* If the 'F' option was present, we flip the bytes of all the integer
1413     fields in the regex data block and the study block. This is to make it
1414     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1415     compiled on a different architecture. */
1416    
1417     if (do_flip)
1418     {
1419     real_pcre *rre = (real_pcre *)re;
1420 ph10 259 rre->magic_number =
1421 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1422 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1423     rre->options = byteflip(rre->options, sizeof(rre->options));
1424 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1425 ph10 259 rre->top_bracket =
1426 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1427 ph10 259 rre->top_backref =
1428 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1429 ph10 259 rre->first_byte =
1430 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1431 ph10 259 rre->req_byte =
1432 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1433     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1434 nigel 75 sizeof(rre->name_table_offset));
1435 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1436 nigel 75 sizeof(rre->name_entry_size));
1437 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1438 ph10 255 sizeof(rre->name_count));
1439 nigel 75
1440     if (extra != NULL)
1441     {
1442     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1443     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1444     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1445     }
1446     }
1447    
1448     /* Extract information from the compiled data if required */
1449    
1450     SHOW_INFO:
1451    
1452 nigel 93 if (do_debug)
1453     {
1454     fprintf(outfile, "------------------------------------------------------------------\n");
1455 ph10 116 pcre_printint(re, outfile, debug_lengths);
1456 nigel 93 }
1457    
1458 nigel 25 if (do_showinfo)
1459 nigel 3 {
1460 nigel 75 unsigned long int get_options, all_options;
1461 nigel 79 #if !defined NOINFOCHECK
1462 nigel 43 int old_first_char, old_options, old_count;
1463 nigel 79 #endif
1464 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1465 ph10 227 hascrorlf;
1466 nigel 63 int nameentrysize, namecount;
1467     const uschar *nametable;
1468 nigel 3
1469 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1470 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1471     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1472     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1473 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1474 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1475 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1476     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1477 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1478 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1479     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1480 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1481 nigel 43
1482 nigel 79 #if !defined NOINFOCHECK
1483 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1484 nigel 3 if (count < 0) fprintf(outfile,
1485 nigel 43 "Error %d from pcre_info()\n", count);
1486 nigel 3 else
1487     {
1488 nigel 43 if (old_count != count) fprintf(outfile,
1489     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1490     old_count);
1491 nigel 37
1492 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1493     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1494     first_char, old_first_char);
1495 nigel 37
1496 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1497     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1498     get_options, old_options);
1499 nigel 43 }
1500 nigel 79 #endif
1501 nigel 43
1502 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1503 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1504 nigel 77 (int)size, (int)regex_gotten_store);
1505 nigel 43
1506     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1507     if (backrefmax > 0)
1508     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1509 nigel 63
1510     if (namecount > 0)
1511     {
1512     fprintf(outfile, "Named capturing subpatterns:\n");
1513     while (namecount-- > 0)
1514     {
1515     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1516     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1517     GET2(nametable, 0));
1518     nametable += nameentrysize;
1519     }
1520     }
1521 ph10 172
1522 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1523 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1524 nigel 63
1525 nigel 75 all_options = ((real_pcre *)re)->options;
1526 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1527 nigel 75
1528 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1529 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1530 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1531     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1532     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1533     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1534 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1535 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1536 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1537     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1538 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1539     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1540     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1541 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1542 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1543 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1544     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1545 ph10 172
1546 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1547 nigel 43
1548 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1549 nigel 91 {
1550     case PCRE_NEWLINE_CR:
1551     fprintf(outfile, "Forced newline sequence: CR\n");
1552     break;
1553 nigel 43
1554 nigel 91 case PCRE_NEWLINE_LF:
1555     fprintf(outfile, "Forced newline sequence: LF\n");
1556     break;
1557    
1558     case PCRE_NEWLINE_CRLF:
1559     fprintf(outfile, "Forced newline sequence: CRLF\n");
1560     break;
1561    
1562 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1563     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1564     break;
1565    
1566 nigel 93 case PCRE_NEWLINE_ANY:
1567     fprintf(outfile, "Forced newline sequence: ANY\n");
1568     break;
1569    
1570 nigel 91 default:
1571     break;
1572     }
1573    
1574 nigel 43 if (first_char == -1)
1575     {
1576 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1577 nigel 43 }
1578     else if (first_char < 0)
1579     {
1580     fprintf(outfile, "No first char\n");
1581     }
1582     else
1583     {
1584 nigel 63 int ch = first_char & 255;
1585 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1586 nigel 63 "" : " (caseless)";
1587 nigel 93 if (PRINTHEX(ch))
1588 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1589 nigel 3 else
1590 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1591 nigel 43 }
1592 nigel 37
1593 nigel 43 if (need_char < 0)
1594     {
1595     fprintf(outfile, "No need char\n");
1596 nigel 3 }
1597 nigel 43 else
1598     {
1599 nigel 63 int ch = need_char & 255;
1600 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1601 nigel 63 "" : " (caseless)";
1602 nigel 93 if (PRINTHEX(ch))
1603 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1604 nigel 43 else
1605 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1606 nigel 43 }
1607 nigel 75
1608     /* Don't output study size; at present it is in any case a fixed
1609     value, but it varies, depending on the computer architecture, and
1610     so messes up the test suite. (And with the /F option, it might be
1611     flipped.) */
1612    
1613     if (do_study)
1614     {
1615     if (extra == NULL)
1616     fprintf(outfile, "Study returned NULL\n");
1617     else
1618     {
1619     uschar *start_bits = NULL;
1620     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1621    
1622     if (start_bits == NULL)
1623     fprintf(outfile, "No starting byte set\n");
1624     else
1625     {
1626     int i;
1627     int c = 24;
1628     fprintf(outfile, "Starting byte set: ");
1629     for (i = 0; i < 256; i++)
1630     {
1631     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1632     {
1633     if (c > 75)
1634     {
1635     fprintf(outfile, "\n ");
1636     c = 2;
1637     }
1638 nigel 93 if (PRINTHEX(i) && i != ' ')
1639 nigel 75 {
1640     fprintf(outfile, "%c ", i);
1641     c += 2;
1642     }
1643     else
1644     {
1645     fprintf(outfile, "\\x%02x ", i);
1646     c += 5;
1647     }
1648     }
1649     }
1650     fprintf(outfile, "\n");
1651     }
1652     }
1653     }
1654 nigel 3 }
1655    
1656 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1657     that is all. The first 8 bytes of the file are the regex length and then
1658     the study length, in big-endian order. */
1659 nigel 3
1660 nigel 75 if (to_file != NULL)
1661 nigel 3 {
1662 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1663     if (f == NULL)
1664 nigel 3 {
1665 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1666 nigel 3 }
1667 nigel 75 else
1668     {
1669     uschar sbuf[8];
1670 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1671     sbuf[1] = (uschar)((true_size >> 16) & 255);
1672     sbuf[2] = (uschar)((true_size >> 8) & 255);
1673     sbuf[3] = (uschar)((true_size) & 255);
1674 ph10 259
1675 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1676     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1677     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1678     sbuf[7] = (uschar)((true_study_size) & 255);
1679 nigel 3
1680 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1681     fwrite(re, 1, true_size, f) < true_size)
1682     {
1683     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1684     }
1685 nigel 3 else
1686     {
1687 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1688     if (extra != NULL)
1689 nigel 3 {
1690 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1691     true_study_size)
1692 nigel 3 {
1693 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1694     strerror(errno));
1695 nigel 3 }
1696 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1697 nigel 93
1698 nigel 3 }
1699     }
1700 nigel 75 fclose(f);
1701 nigel 3 }
1702 nigel 77
1703     new_free(re);
1704     if (extra != NULL) new_free(extra);
1705     if (tables != NULL) new_free((void *)tables);
1706 nigel 75 continue; /* With next regex */
1707 nigel 3 }
1708 nigel 75 } /* End of non-POSIX compile */
1709 nigel 3
1710     /* Read data lines and test them */
1711    
1712     for (;;)
1713     {
1714 nigel 87 uschar *q;
1715 ph10 147 uschar *bptr;
1716 nigel 57 int *use_offsets = offsets;
1717 nigel 53 int use_size_offsets = size_offsets;
1718 nigel 63 int callout_data = 0;
1719     int callout_data_set = 0;
1720 nigel 3 int count, c;
1721 nigel 29 int copystrings = 0;
1722 ph10 386 int find_match_limit = default_find_match_limit;
1723 nigel 29 int getstrings = 0;
1724     int getlist = 0;
1725 nigel 39 int gmatched = 0;
1726 nigel 35 int start_offset = 0;
1727 nigel 41 int g_notempty = 0;
1728 nigel 77 int use_dfa = 0;
1729 nigel 3
1730     options = 0;
1731    
1732 nigel 91 *copynames = 0;
1733     *getnames = 0;
1734    
1735     copynamesptr = copynames;
1736     getnamesptr = getnames;
1737    
1738 nigel 63 pcre_callout = callout;
1739     first_callout = 1;
1740     callout_extra = 0;
1741     callout_count = 0;
1742     callout_fail_count = 999999;
1743     callout_fail_id = -1;
1744 nigel 73 show_malloc = 0;
1745 nigel 63
1746 nigel 91 if (extra != NULL) extra->flags &=
1747     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1748    
1749     len = 0;
1750     for (;;)
1751 nigel 11 {
1752 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1753 nigel 91 {
1754     if (len > 0) break;
1755     done = 1;
1756     goto CONTINUE;
1757     }
1758     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1759     len = (int)strlen((char *)buffer);
1760     if (buffer[len-1] == '\n') break;
1761 nigel 11 }
1762 nigel 3
1763     while (len > 0 && isspace(buffer[len-1])) len--;
1764     buffer[len] = 0;
1765     if (len == 0) break;
1766    
1767     p = buffer;
1768     while (isspace(*p)) p++;
1769    
1770 ph10 147 bptr = q = dbuffer;
1771 nigel 3 while ((c = *p++) != 0)
1772     {
1773     int i = 0;
1774     int n = 0;
1775 nigel 63
1776 nigel 3 if (c == '\\') switch ((c = *p++))
1777     {
1778     case 'a': c = 7; break;
1779     case 'b': c = '\b'; break;
1780     case 'e': c = 27; break;
1781     case 'f': c = '\f'; break;
1782     case 'n': c = '\n'; break;
1783     case 'r': c = '\r'; break;
1784     case 't': c = '\t'; break;
1785     case 'v': c = '\v'; break;
1786    
1787     case '0': case '1': case '2': case '3':
1788     case '4': case '5': case '6': case '7':
1789     c -= '0';
1790     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1791     c = c * 8 + *p++ - '0';
1792 nigel 91
1793     #if !defined NOUTF8
1794     if (use_utf8 && c > 255)
1795     {
1796     unsigned char buff8[8];
1797     int ii, utn;
1798     utn = ord2utf8(c, buff8);
1799     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1800     c = buff8[ii]; /* Last byte */
1801     }
1802     #endif
1803 nigel 3 break;
1804    
1805     case 'x':
1806 nigel 49
1807     /* Handle \x{..} specially - new Perl thing for utf8 */
1808    
1809 nigel 79 #if !defined NOUTF8
1810 nigel 49 if (*p == '{')
1811     {
1812     unsigned char *pt = p;
1813     c = 0;
1814     while (isxdigit(*(++pt)))
1815     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1816     if (*pt == '}')
1817     {
1818 nigel 67 unsigned char buff8[8];
1819 nigel 49 int ii, utn;
1820 ph10 355 if (use_utf8)
1821 ph10 358 {
1822 ph10 355 utn = ord2utf8(c, buff8);
1823     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1824     c = buff8[ii]; /* Last byte */
1825     }
1826     else
1827     {
1828 ph10 358 if (c > 255)
1829 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1830     "UTF-8 mode is not enabled.\n"
1831     "** Truncation will probably give the wrong result.\n", c);
1832 ph10 358 }
1833 nigel 49 p = pt + 1;
1834     break;
1835     }
1836     /* Not correct form; fall through */
1837     }
1838 nigel 79 #endif
1839 nigel 49
1840     /* Ordinary \x */
1841    
1842 nigel 3 c = 0;
1843     while (i++ < 2 && isxdigit(*p))
1844     {
1845     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1846     p++;
1847     }
1848     break;
1849    
1850 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1851 nigel 3 p--;
1852     continue;
1853    
1854 nigel 75 case '>':
1855     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1856     continue;
1857    
1858 nigel 3 case 'A': /* Option setting */
1859     options |= PCRE_ANCHORED;
1860     continue;
1861    
1862     case 'B':
1863     options |= PCRE_NOTBOL;
1864     continue;
1865    
1866 nigel 29 case 'C':
1867 nigel 63 if (isdigit(*p)) /* Set copy string */
1868     {
1869     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1870     copystrings |= 1 << n;
1871     }
1872     else if (isalnum(*p))
1873     {
1874 nigel 91 uschar *npp = copynamesptr;
1875 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1876 nigel 91 *npp++ = 0;
1877 nigel 67 *npp = 0;
1878 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1879 nigel 63 if (n < 0)
1880 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1881     copynamesptr = npp;
1882 nigel 63 }
1883     else if (*p == '+')
1884     {
1885     callout_extra = 1;
1886     p++;
1887     }
1888     else if (*p == '-')
1889     {
1890     pcre_callout = NULL;
1891     p++;
1892     }
1893     else if (*p == '!')
1894     {
1895     callout_fail_id = 0;
1896     p++;
1897     while(isdigit(*p))
1898     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1899     callout_fail_count = 0;
1900     if (*p == '!')
1901     {
1902     p++;
1903     while(isdigit(*p))
1904     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1905     }
1906     }
1907     else if (*p == '*')
1908     {
1909     int sign = 1;
1910     callout_data = 0;
1911     if (*(++p) == '-') { sign = -1; p++; }
1912     while(isdigit(*p))
1913     callout_data = callout_data * 10 + *p++ - '0';
1914     callout_data *= sign;
1915     callout_data_set = 1;
1916     }
1917 nigel 29 continue;
1918    
1919 nigel 79 #if !defined NODFA
1920 nigel 77 case 'D':
1921 nigel 79 #if !defined NOPOSIX
1922 nigel 77 if (posix || do_posix)
1923     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1924     else
1925 nigel 79 #endif
1926 nigel 77 use_dfa = 1;
1927     continue;
1928    
1929     case 'F':
1930     options |= PCRE_DFA_SHORTEST;
1931     continue;
1932 nigel 79 #endif
1933 nigel 77
1934 nigel 29 case 'G':
1935 nigel 63 if (isdigit(*p))
1936     {
1937     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1938     getstrings |= 1 << n;
1939     }
1940     else if (isalnum(*p))
1941     {
1942 nigel 91 uschar *npp = getnamesptr;
1943 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1944 nigel 91 *npp++ = 0;
1945 nigel 67 *npp = 0;
1946 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1947 nigel 63 if (n < 0)
1948 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1949     getnamesptr = npp;
1950 nigel 63 }
1951 nigel 29 continue;
1952    
1953     case 'L':
1954     getlist = 1;
1955     continue;
1956    
1957 nigel 63 case 'M':
1958     find_match_limit = 1;
1959     continue;
1960    
1961 nigel 37 case 'N':
1962     options |= PCRE_NOTEMPTY;
1963     continue;
1964    
1965 nigel 3 case 'O':
1966     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1967 nigel 53 if (n > size_offsets_max)
1968     {
1969     size_offsets_max = n;
1970 nigel 57 free(offsets);
1971 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1972 nigel 53 if (offsets == NULL)
1973     {
1974     printf("** Failed to get %d bytes of memory for offsets vector\n",
1975 ph10 151 (int)(size_offsets_max * sizeof(int)));
1976 nigel 77 yield = 1;
1977     goto EXIT;
1978 nigel 53 }
1979     }
1980     use_size_offsets = n;
1981 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1982 nigel 3 continue;
1983    
1984 nigel 75 case 'P':
1985     options |= PCRE_PARTIAL;
1986     continue;
1987    
1988 nigel 91 case 'Q':
1989     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1990     if (extra == NULL)
1991     {
1992     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1993     extra->flags = 0;
1994     }
1995     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1996     extra->match_limit_recursion = n;
1997     continue;
1998    
1999     case 'q':
2000     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2001     if (extra == NULL)
2002     {
2003     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2004     extra->flags = 0;
2005     }
2006     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2007     extra->match_limit = n;
2008     continue;
2009    
2010 nigel 79 #if !defined NODFA
2011 nigel 77 case 'R':
2012     options |= PCRE_DFA_RESTART;
2013     continue;
2014 nigel 79 #endif
2015 nigel 77
2016 nigel 73 case 'S':
2017     show_malloc = 1;
2018     continue;
2019 ph10 392
2020 ph10 389 case 'Y':
2021     options |= PCRE_NO_START_OPTIMIZE;
2022 ph10 392 continue;
2023 nigel 73
2024 nigel 3 case 'Z':
2025     options |= PCRE_NOTEOL;
2026     continue;
2027 nigel 71
2028     case '?':
2029     options |= PCRE_NO_UTF8_CHECK;
2030     continue;
2031 nigel 91
2032     case '<':
2033     {
2034     int x = check_newline(p, outfile);
2035     if (x == 0) goto NEXT_DATA;
2036     options |= x;
2037     while (*p++ != '>');
2038     }
2039     continue;
2040 nigel 3 }
2041 nigel 9 *q++ = c;
2042 nigel 3 }
2043 nigel 9 *q = 0;
2044     len = q - dbuffer;
2045 ph10 371
2046 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2047 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2048 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2049 ph10 371
2050 ph10 363 #if !defined NOPOSIX
2051     if (posix || do_posix)
2052     {
2053     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2054 ph10 371 bptr += buffer_size - len - 1;
2055 ph10 363 }
2056 ph10 371 else
2057     #endif
2058 ph10 363 {
2059     memmove(bptr + buffer_size - len, bptr, len);
2060 ph10 371 bptr += buffer_size - len;
2061     }
2062 nigel 3
2063 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2064     {
2065     printf("**Match limit not relevant for DFA matching: ignored\n");
2066     find_match_limit = 0;
2067     }
2068    
2069 nigel 3 /* Handle matching via the POSIX interface, which does not
2070 nigel 63 support timing or playing with the match limit or callout data. */
2071 nigel 3
2072 nigel 37 #if !defined NOPOSIX
2073 nigel 3 if (posix || do_posix)
2074     {
2075     int rc;
2076     int eflags = 0;
2077 nigel 63 regmatch_t *pmatch = NULL;
2078     if (use_size_offsets > 0)
2079 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2080 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2081     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2082 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2083 nigel 3
2084 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2085 nigel 3
2086     if (rc != 0)
2087     {
2088 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2089 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2090     }
2091 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2092     != 0)
2093     {
2094     fprintf(outfile, "Matched with REG_NOSUB\n");
2095     }
2096 nigel 3 else
2097     {
2098 nigel 7 size_t i;
2099 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2100 nigel 3 {
2101     if (pmatch[i].rm_so >= 0)
2102     {
2103 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2104 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2105     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2106 nigel 3 fprintf(outfile, "\n");
2107 nigel 35 if (i == 0 && do_showrest)
2108     {
2109     fprintf(outfile, " 0+ ");
2110 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2111     outfile);
2112 nigel 35 fprintf(outfile, "\n");
2113     }
2114 nigel 3 }
2115     }
2116     }
2117 nigel 53 free(pmatch);
2118 nigel 3 }
2119    
2120 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2121 nigel 3
2122 nigel 37 else
2123     #endif /* !defined NOPOSIX */
2124    
2125 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2126 nigel 3 {
2127 nigel 93 if (timeitm > 0)
2128 nigel 3 {
2129     register int i;
2130     clock_t time_taken;
2131     clock_t start_time = clock();
2132 nigel 77
2133 nigel 79 #if !defined NODFA
2134 nigel 77 if (all_use_dfa || use_dfa)
2135     {
2136     int workspace[1000];
2137 nigel 93 for (i = 0; i < timeitm; i++)
2138 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2139     options | g_notempty, use_offsets, use_size_offsets, workspace,
2140     sizeof(workspace)/sizeof(int));
2141     }
2142     else
2143 nigel 79 #endif
2144 nigel 77
2145 nigel 93 for (i = 0; i < timeitm; i++)
2146 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2147 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2148 nigel 77
2149 nigel 3 time_taken = clock() - start_time;
2150 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2151     (((double)time_taken * 1000.0) / (double)timeitm) /
2152 nigel 63 (double)CLOCKS_PER_SEC);
2153 nigel 3 }
2154    
2155 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2156 nigel 87 varying limits in order to find the minimum value for the match limit and
2157     for the recursion limit. */
2158 nigel 63
2159     if (find_match_limit)
2160     {
2161     if (extra == NULL)
2162     {
2163 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2164 nigel 63 extra->flags = 0;
2165     }
2166    
2167 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2168 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2169     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2170     PCRE_ERROR_MATCHLIMIT, "match()");
2171 nigel 63
2172 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2173     options|g_notempty, use_offsets, use_size_offsets,
2174     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2175     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2176 nigel 63 }
2177    
2178     /* If callout_data is set, use the interface with additional data */
2179    
2180     else if (callout_data_set)
2181     {
2182     if (extra == NULL)
2183     {
2184 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2185 nigel 63 extra->flags = 0;
2186     }
2187     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2188 nigel 71 extra->callout_data = &callout_data;
2189 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2190     options | g_notempty, use_offsets, use_size_offsets);
2191     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2192     }
2193    
2194     /* The normal case is just to do the match once, with the default
2195     value of match_limit. */
2196    
2197 nigel 79 #if !defined NODFA
2198 nigel 77 else if (all_use_dfa || use_dfa)
2199     {
2200     int workspace[1000];
2201     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2202     options | g_notempty, use_offsets, use_size_offsets, workspace,
2203     sizeof(workspace)/sizeof(int));
2204     if (count == 0)
2205     {
2206     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2207     count = use_size_offsets/2;
2208     }
2209     }
2210 nigel 79 #endif
2211 nigel 77
2212 nigel 75 else
2213     {
2214     count = pcre_exec(re, extra, (char *)bptr, len,
2215     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2216 nigel 77 if (count == 0)
2217     {
2218     fprintf(outfile, "Matched, but too many substrings\n");
2219     count = use_size_offsets/3;
2220     }
2221 nigel 75 }
2222 nigel 3
2223 nigel 39 /* Matched */
2224    
2225 nigel 3 if (count >= 0)
2226     {
2227 nigel 93 int i, maxcount;
2228    
2229     #if !defined NODFA
2230     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2231     #endif
2232     maxcount = use_size_offsets/3;
2233    
2234     /* This is a check against a lunatic return value. */
2235    
2236     if (count > maxcount)
2237     {
2238     fprintf(outfile,
2239     "** PCRE error: returned count %d is too big for offset size %d\n",
2240     count, use_size_offsets);
2241     count = use_size_offsets/3;
2242     if (do_g || do_G)
2243     {
2244     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2245     do_g = do_G = FALSE; /* Break g/G loop */
2246     }
2247     }
2248    
2249 nigel 29 for (i = 0; i < count * 2; i += 2)
2250 nigel 3 {
2251 nigel 57 if (use_offsets[i] < 0)
2252 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2253     else
2254     {
2255     fprintf(outfile, "%2d: ", i/2);
2256 nigel 63 (void)pchars(bptr + use_offsets[i],
2257     use_offsets[i+1] - use_offsets[i], outfile);
2258 nigel 3 fprintf(outfile, "\n");
2259 nigel 35 if (i == 0)
2260     {
2261     if (do_showrest)
2262     {
2263     fprintf(outfile, " 0+ ");
2264 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2265     outfile);
2266 nigel 35 fprintf(outfile, "\n");
2267     }
2268     }
2269 nigel 3 }
2270     }
2271 nigel 29
2272     for (i = 0; i < 32; i++)
2273     {
2274     if ((copystrings & (1 << i)) != 0)
2275     {
2276 nigel 91 char copybuffer[256];
2277 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2278 nigel 37 i, copybuffer, sizeof(copybuffer));
2279 nigel 29 if (rc < 0)
2280     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2281     else
2282 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2283 nigel 29 }
2284     }
2285    
2286 nigel 91 for (copynamesptr = copynames;
2287     *copynamesptr != 0;
2288     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2289     {
2290     char copybuffer[256];
2291     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2292     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2293     if (rc < 0)
2294     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2295     else
2296     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2297     }
2298    
2299 nigel 29 for (i = 0; i < 32; i++)
2300     {
2301     if ((getstrings & (1 << i)) != 0)
2302     {
2303     const char *substring;
2304 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2305 nigel 29 i, &substring);
2306     if (rc < 0)
2307     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2308     else
2309     {
2310     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2311 nigel 49 pcre_free_substring(substring);
2312 nigel 29 }
2313     }
2314     }
2315    
2316 nigel 91 for (getnamesptr = getnames;
2317     *getnamesptr != 0;
2318     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2319     {
2320     const char *substring;
2321     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2322     count, (char *)getnamesptr, &substring);
2323     if (rc < 0)
2324     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2325     else
2326     {
2327     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2328     pcre_free_substring(substring);
2329     }
2330     }
2331    
2332 nigel 29 if (getlist)
2333     {
2334     const char **stringlist;
2335 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2336 nigel 29 &stringlist);
2337     if (rc < 0)
2338     fprintf(outfile, "get substring list failed %d\n", rc);
2339     else
2340     {
2341     for (i = 0; i < count; i++)
2342     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2343     if (stringlist[i] != NULL)
2344     fprintf(outfile, "string list not terminated by NULL\n");
2345 nigel 49 /* free((void *)stringlist); */
2346     pcre_free_substring_list(stringlist);
2347 nigel 29 }
2348     }
2349 nigel 39 }
2350 nigel 29
2351 nigel 75 /* There was a partial match */
2352    
2353     else if (count == PCRE_ERROR_PARTIAL)
2354     {
2355 nigel 77 fprintf(outfile, "Partial match");
2356 nigel 79 #if !defined NODFA
2357 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2358     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2359     bptr + use_offsets[0]);
2360 nigel 79 #endif
2361 nigel 77 fprintf(outfile, "\n");
2362 nigel 75 break; /* Out of the /g loop */
2363     }
2364    
2365 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2366 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2367     to advance the start offset, and continue. We won't be at the end of the
2368     string - that was checked before setting g_notempty.
2369 nigel 39
2370 ph10 150 Complication arises in the case when the newline option is "any" or
2371 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2372     CRLF, an advance of one character just passes the \r, whereas we should
2373     prefer the longer newline sequence, as does the code in pcre_exec().
2374     Fudge the offset value to achieve this.
2375 ph10 144
2376 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2377     character, not one byte. */
2378    
2379 nigel 3 else
2380     {
2381 nigel 41 if (g_notempty != 0)
2382 nigel 35 {
2383 nigel 73 int onechar = 1;
2384 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2385 nigel 57 use_offsets[0] = start_offset;
2386 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2387     {
2388     int d;
2389     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2390 ph10 391 /* Note that these values are always the ASCII ones, even in
2391     EBCDIC environments. CR = 13, NL = 10. */
2392     obits = (d == 13)? PCRE_NEWLINE_CR :
2393     (d == 10)? PCRE_NEWLINE_LF :
2394     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2395 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2396 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2397     }
2398 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2399 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2400 ph10 149 &&
2401 ph10 143 start_offset < len - 1 &&
2402     bptr[start_offset] == '\r' &&
2403     bptr[start_offset+1] == '\n')
2404 ph10 144 onechar++;
2405 ph10 143 else if (use_utf8)
2406 nigel 73 {
2407     while (start_offset + onechar < len)
2408     {
2409     int tb = bptr[start_offset+onechar];
2410     if (tb <= 127) break;
2411     tb &= 0xc0;
2412     if (tb != 0 && tb != 0xc0) onechar++;
2413     }
2414     }
2415     use_offsets[1] = start_offset + onechar;
2416 nigel 35 }
2417 nigel 41 else
2418     {
2419 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2420 nigel 41 {
2421 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2422 nigel 41 }
2423 nigel 73 else fprintf(outfile, "Error %d\n", count);
2424 nigel 41 break; /* Out of the /g loop */
2425     }
2426 nigel 3 }
2427 nigel 35
2428 nigel 39 /* If not /g or /G we are done */
2429    
2430     if (!do_g && !do_G) break;
2431    
2432 nigel 41 /* If we have matched an empty string, first check to see if we are at
2433     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2434     what Perl's /g options does. This turns out to be rather cunning. First
2435 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2436     same point. If this fails (picked up above) we advance to the next
2437 ph10 143 character. */
2438 ph10 142
2439 nigel 41 g_notempty = 0;
2440 ph10 142
2441 nigel 57 if (use_offsets[0] == use_offsets[1])
2442 nigel 41 {
2443 nigel 57 if (use_offsets[0] == len) break;
2444 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2445 nigel 41 }
2446 nigel 39
2447     /* For /g, update the start offset, leaving the rest alone */
2448    
2449 ph10 143 if (do_g) start_offset = use_offsets[1];
2450 nigel 39
2451     /* For /G, update the pointer and length */
2452    
2453     else
2454 nigel 35 {
2455 ph10 143 bptr += use_offsets[1];
2456     len -= use_offsets[1];
2457 nigel 35 }
2458 nigel 39 } /* End of loop for /g and /G */
2459 nigel 91
2460     NEXT_DATA: continue;
2461 nigel 39 } /* End of loop for data lines */
2462 nigel 3
2463 nigel 11 CONTINUE:
2464 nigel 37
2465     #if !defined NOPOSIX
2466 nigel 3 if (posix || do_posix) regfree(&preg);
2467 nigel 37 #endif
2468    
2469 nigel 77 if (re != NULL) new_free(re);
2470     if (extra != NULL) new_free(extra);
2471 nigel 25 if (tables != NULL)
2472     {
2473 nigel 77 new_free((void *)tables);
2474 nigel 25 setlocale(LC_CTYPE, "C");
2475 nigel 93 locale_set = 0;
2476 nigel 25 }
2477 nigel 3 }
2478    
2479 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2480 nigel 77
2481     EXIT:
2482    
2483     if (infile != NULL && infile != stdin) fclose(infile);
2484     if (outfile != NULL && outfile != stdout) fclose(outfile);
2485    
2486     free(buffer);
2487     free(dbuffer);
2488     free(pbuffer);
2489     free(offsets);
2490    
2491     return yield;
2492 nigel 3 }
2493    
2494 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12