/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 389 - (hide annotations) (download)
Sun Mar 15 18:24:05 2009 UTC (5 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 72861 byte(s)
Add PCRE_NO_START_OPTIMIZE

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 343 #define isatty _isatty /* This is what Windows calls them, I'm told */
75     #define fileno _fileno
76    
77 nigel 93 #else
78     #include <sys/time.h> /* These two includes are needed */
79     #include <sys/resource.h> /* for setrlimit(). */
80     #define INPUT_MODE "rb"
81     #define OUTPUT_MODE "wb"
82 nigel 91 #endif
83    
84 nigel 93
85 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
86     displaying the results of pcre_study() and we also need to know about the
87     internal macros, structures, and other internal data values; pcretest has
88     "inside information" compared to a program that strictly follows the PCRE API.
89 nigel 37
90 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
91     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92     appropriately for an application, not for building PCRE. */
93 nigel 77
94 ph10 145 #include "pcre.h"
95 nigel 77 #include "pcre_internal.h"
96    
97 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
98     to keep two copies, we include the source file here, changing the names of the
99     external symbols to prevent clashes. */
100 nigel 77
101 ph10 351 #define _pcre_ucp_gentype ucp_gentype
102 nigel 85 #define _pcre_utf8_table1 utf8_table1
103     #define _pcre_utf8_table1_size utf8_table1_size
104     #define _pcre_utf8_table2 utf8_table2
105     #define _pcre_utf8_table3 utf8_table3
106     #define _pcre_utf8_table4 utf8_table4
107     #define _pcre_utt utt
108     #define _pcre_utt_size utt_size
109 ph10 240 #define _pcre_utt_names utt_names
110 nigel 85 #define _pcre_OP_lengths OP_lengths
111    
112     #include "pcre_tables.c"
113    
114     /* We also need the pcre_printint() function for printing out compiled
115     patterns. This function is in a separate file so that it can be included in
116 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
117 nigel 85
118 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
119     output character as-is or as a hex value when showing compiled patterns, is
120     contained in this file. We uses it here also, in cases when the locale has not
121     been explicitly changed, so as to get consistent output from systems that
122     differ in their output from isprint() even in the "C" locale. */
123    
124 nigel 85 #include "pcre_printint.src"
125    
126 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127 nigel 85
128 nigel 93
129 nigel 37 /* It is possible to compile this test program without including support for
130     testing the POSIX interface, though this is not available via the standard
131     Makefile. */
132    
133     #if !defined NOPOSIX
134 nigel 3 #include "pcreposix.h"
135 nigel 37 #endif
136 nigel 3
137 ph10 107 /* It is also possible, for the benefit of the version currently imported into
138     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139     interface to the DFA matcher (NODFA), and without the doublecheck of the old
140     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141     UTF8 support if PCRE is built without it. */
142 nigel 79
143 ph10 107 #ifndef SUPPORT_UTF8
144     #ifndef NOUTF8
145     #define NOUTF8
146     #endif
147     #endif
148 nigel 79
149 ph10 107
150 nigel 85 /* Other parameters */
151    
152 nigel 3 #ifndef CLOCKS_PER_SEC
153     #ifdef CLK_TCK
154     #define CLOCKS_PER_SEC CLK_TCK
155     #else
156     #define CLOCKS_PER_SEC 100
157     #endif
158     #endif
159    
160 nigel 93 /* This is the default loop count for timing. */
161    
162 nigel 75 #define LOOPREPEAT 500000
163 nigel 3
164 nigel 85 /* Static variables */
165    
166 nigel 3 static FILE *outfile;
167     static int log_store = 0;
168 nigel 63 static int callout_count;
169     static int callout_extra;
170     static int callout_fail_count;
171     static int callout_fail_id;
172 ph10 210 static int debug_lengths;
173 nigel 63 static int first_callout;
174 nigel 93 static int locale_set = 0;
175 nigel 73 static int show_malloc;
176 nigel 67 static int use_utf8;
177 nigel 43 static size_t gotten_store;
178 nigel 3
179 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
180    
181     static int buffer_size = 50000;
182     static uschar *buffer = NULL;
183     static uschar *dbuffer = NULL;
184 nigel 75 static uschar *pbuffer = NULL;
185 nigel 3
186 nigel 75
187 nigel 49
188     /*************************************************
189 nigel 91 * Read or extend an input line *
190     *************************************************/
191    
192     /* Input lines are read into buffer, but both patterns and data lines can be
193     continued over multiple input lines. In addition, if the buffer fills up, we
194     want to automatically expand it so as to be able to handle extremely large
195     lines that are needed for certain stress tests. When the input buffer is
196     expanded, the other two buffers must also be expanded likewise, and the
197     contents of pbuffer, which are a copy of the input for callouts, must be
198     preserved (for when expansion happens for a data line). This is not the most
199     optimal way of handling this, but hey, this is just a test program!
200    
201     Arguments:
202     f the file to read
203     start where in buffer to start (this *must* be within buffer)
204 ph10 287 prompt for stdin or readline()
205 nigel 91
206     Returns: pointer to the start of new data
207     could be a copy of start, or could be moved
208     NULL if no data read and EOF reached
209     */
210    
211     static uschar *
212 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
213 nigel 91 {
214     uschar *here = start;
215    
216     for (;;)
217     {
218     int rlen = buffer_size - (here - buffer);
219 nigel 93
220 nigel 91 if (rlen > 1000)
221     {
222     int dlen;
223 ph10 289
224 ph10 287 /* If libreadline support is required, use readline() to read a line if the
225     input is a terminal. Note that readline() removes the trailing newline, so
226     we must put it back again, to be compatible with fgets(). */
227 ph10 289
228 ph10 287 #ifdef SUPPORT_LIBREADLINE
229     if (isatty(fileno(f)))
230     {
231 ph10 289 size_t len;
232 ph10 287 char *s = readline(prompt);
233     if (s == NULL) return (here == start)? NULL : start;
234     len = strlen(s);
235 ph10 289 if (len > 0) add_history(s);
236 ph10 287 if (len > rlen - 1) len = rlen - 1;
237     memcpy(here, s, len);
238     here[len] = '\n';
239 ph10 289 here[len+1] = 0;
240     free(s);
241 ph10 287 }
242 ph10 289 else
243     #endif
244    
245 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
246 ph10 289
247 ph10 287 {
248 ph10 289 if (f == stdin) printf(prompt);
249 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
250     return (here == start)? NULL : start;
251 ph10 289 }
252    
253 nigel 91 dlen = (int)strlen((char *)here);
254     if (dlen > 0 && here[dlen - 1] == '\n') return start;
255     here += dlen;
256     }
257    
258     else
259     {
260     int new_buffer_size = 2*buffer_size;
261     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266     {
267     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268     exit(1);
269     }
270    
271     memcpy(new_buffer, buffer, buffer_size);
272     memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274     buffer_size = new_buffer_size;
275    
276     start = new_buffer + (start - buffer);
277     here = new_buffer + (here - buffer);
278    
279     free(buffer);
280     free(dbuffer);
281     free(pbuffer);
282    
283     buffer = new_buffer;
284     dbuffer = new_dbuffer;
285     pbuffer = new_pbuffer;
286     }
287     }
288    
289     return NULL; /* Control never gets here */
290     }
291    
292    
293    
294    
295    
296    
297    
298     /*************************************************
299 nigel 63 * Read number from string *
300     *************************************************/
301    
302     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303     around with conditional compilation, just do the job by hand. It is only used
304 nigel 93 for unpicking arguments, so just keep it simple.
305 nigel 63
306     Arguments:
307     str string to be converted
308     endptr where to put the end pointer
309    
310     Returns: the unsigned long
311     */
312    
313     static int
314     get_value(unsigned char *str, unsigned char **endptr)
315     {
316     int result = 0;
317     while(*str != 0 && isspace(*str)) str++;
318     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319     *endptr = str;
320     return(result);
321     }
322    
323    
324    
325 nigel 49
326     /*************************************************
327     * Convert UTF-8 string to value *
328     *************************************************/
329    
330     /* This function takes one or more bytes that represents a UTF-8 character,
331     and returns the value of the character.
332    
333     Argument:
334 nigel 91 utf8bytes a pointer to the byte vector
335     vptr a pointer to an int to receive the value
336 nigel 49
337 nigel 91 Returns: > 0 => the number of bytes consumed
338     -6 to 0 => malformed UTF-8 character at offset = (-return)
339 nigel 49 */
340    
341 nigel 79 #if !defined NOUTF8
342    
343 nigel 67 static int
344 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
345 nigel 49 {
346 nigel 91 int c = *utf8bytes++;
347 nigel 49 int d = c;
348     int i, j, s;
349    
350     for (i = -1; i < 6; i++) /* i is number of additional bytes */
351     {
352     if ((d & 0x80) == 0) break;
353     d <<= 1;
354     }
355    
356     if (i == -1) { *vptr = c; return 1; } /* ascii character */
357     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
358    
359     /* i now has a value in the range 1-5 */
360    
361 nigel 59 s = 6*i;
362 nigel 85 d = (c & utf8_table3[i]) << s;
363 nigel 49
364     for (j = 0; j < i; j++)
365     {
366 nigel 91 c = *utf8bytes++;
367 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
368 nigel 59 s -= 6;
369 nigel 49 d |= (c & 0x3f) << s;
370     }
371    
372     /* Check that encoding was the correct unique one */
373    
374 nigel 85 for (j = 0; j < utf8_table1_size; j++)
375     if (d <= utf8_table1[j]) break;
376 nigel 49 if (j != i) return -(i+1);
377    
378     /* Valid value */
379    
380     *vptr = d;
381     return i+1;
382     }
383    
384 nigel 79 #endif
385 nigel 49
386    
387 nigel 79
388 nigel 63 /*************************************************
389 nigel 85 * Convert character value to UTF-8 *
390     *************************************************/
391    
392     /* This function takes an integer value in the range 0 - 0x7fffffff
393     and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395     Arguments:
396     cvalue the character value
397 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
398 nigel 85
399     Returns: number of characters placed in the buffer
400     */
401    
402 nigel 93 #if !defined NOUTF8
403    
404 nigel 85 static int
405 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
406 nigel 85 {
407     register int i, j;
408     for (i = 0; i < utf8_table1_size; i++)
409     if (cvalue <= utf8_table1[i]) break;
410 nigel 91 utf8bytes += i;
411 nigel 85 for (j = i; j > 0; j--)
412     {
413 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414 nigel 85 cvalue >>= 6;
415     }
416 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
417 nigel 85 return i + 1;
418     }
419    
420 nigel 93 #endif
421 nigel 85
422    
423 nigel 93
424 nigel 85 /*************************************************
425 nigel 63 * Print character string *
426     *************************************************/
427 nigel 49
428 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
429     mode. Yields number of characters printed. If handed a NULL file, just counts
430     chars without printing. */
431 nigel 49
432 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
433 nigel 3 {
434 nigel 85 int c = 0;
435 nigel 63 int yield = 0;
436 nigel 3
437 nigel 63 while (length-- > 0)
438 nigel 3 {
439 nigel 79 #if !defined NOUTF8
440 nigel 67 if (use_utf8)
441 nigel 63 {
442     int rc = utf82ord(p, &c);
443 nigel 3
444 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
445     {
446     length -= rc - 1;
447     p += rc;
448 nigel 93 if (PRINTHEX(c))
449 nigel 63 {
450     if (f != NULL) fprintf(f, "%c", c);
451     yield++;
452     }
453     else
454     {
455 nigel 93 int n = 4;
456     if (f != NULL) fprintf(f, "\\x{%02x}", c);
457     yield += (n <= 0x000000ff)? 2 :
458     (n <= 0x00000fff)? 3 :
459     (n <= 0x0000ffff)? 4 :
460     (n <= 0x000fffff)? 5 : 6;
461 nigel 63 }
462     continue;
463     }
464     }
465 nigel 79 #endif
466 nigel 3
467 nigel 63 /* Not UTF-8, or malformed UTF-8 */
468    
469 nigel 93 c = *p++;
470     if (PRINTHEX(c))
471 nigel 3 {
472 nigel 63 if (f != NULL) fprintf(f, "%c", c);
473     yield++;
474 nigel 3 }
475 nigel 63 else
476 nigel 3 {
477 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
478     yield += 4;
479     }
480     }
481 nigel 3
482 nigel 63 return yield;
483     }
484 nigel 23
485 nigel 3
486 nigel 23
487 nigel 63 /*************************************************
488     * Callout function *
489     *************************************************/
490 nigel 3
491 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
492     the match. Yield zero unless more callouts than the fail count, or the callout
493     data is not zero. */
494 nigel 3
495 nigel 63 static int callout(pcre_callout_block *cb)
496     {
497     FILE *f = (first_callout | callout_extra)? outfile : NULL;
498 nigel 75 int i, pre_start, post_start, subject_length;
499 nigel 3
500 nigel 63 if (callout_extra)
501     {
502     fprintf(f, "Callout %d: last capture = %d\n",
503     cb->callout_number, cb->capture_last);
504 nigel 3
505 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
506     {
507     if (cb->offset_vector[i] < 0)
508     fprintf(f, "%2d: <unset>\n", i/2);
509     else
510     {
511     fprintf(f, "%2d: ", i/2);
512     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513     cb->offset_vector[i+1] - cb->offset_vector[i], f);
514     fprintf(f, "\n");
515     }
516     }
517     }
518 nigel 3
519 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
520     datails. On subsequent calls in the same match, we use pchars just to find the
521     printed lengths of the substrings. */
522 nigel 3
523 nigel 63 if (f != NULL) fprintf(f, "--->");
524 nigel 3
525 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527     cb->current_position - cb->start_match, f);
528 nigel 3
529 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530    
531 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532     cb->subject_length - cb->current_position, f);
533 nigel 3
534 nigel 63 if (f != NULL) fprintf(f, "\n");
535 nigel 9
536 nigel 63 /* Always print appropriate indicators, with callout number if not already
537 nigel 75 shown. For automatic callouts, show the pattern offset. */
538 nigel 3
539 nigel 75 if (cb->callout_number == 255)
540     {
541     fprintf(outfile, "%+3d ", cb->pattern_position);
542     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
543     }
544     else
545     {
546     if (callout_extra) fprintf(outfile, " ");
547     else fprintf(outfile, "%3d ", cb->callout_number);
548     }
549 nigel 3
550 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551     fprintf(outfile, "^");
552 nigel 3
553 nigel 63 if (post_start > 0)
554     {
555     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556     fprintf(outfile, "^");
557 nigel 3 }
558    
559 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560     fprintf(outfile, " ");
561    
562     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563     pbuffer + cb->pattern_position);
564    
565 nigel 63 fprintf(outfile, "\n");
566     first_callout = 0;
567 nigel 3
568 nigel 71 if (cb->callout_data != NULL)
569 nigel 49 {
570 nigel 71 int callout_data = *((int *)(cb->callout_data));
571     if (callout_data != 0)
572     {
573     fprintf(outfile, "Callout data = %d\n", callout_data);
574     return callout_data;
575     }
576 nigel 63 }
577 nigel 49
578 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
579     (++callout_count >= callout_fail_count)? 1 : 0;
580 nigel 3 }
581    
582    
583 nigel 63 /*************************************************
584 nigel 73 * Local malloc functions *
585 nigel 63 *************************************************/
586 nigel 3
587     /* Alternative malloc function, to test functionality and show the size of the
588     compiled re. */
589    
590     static void *new_malloc(size_t size)
591     {
592 nigel 73 void *block = malloc(size);
593 nigel 43 gotten_store = size;
594 nigel 73 if (show_malloc)
595 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
596 nigel 73 return block;
597 nigel 3 }
598    
599 nigel 73 static void new_free(void *block)
600     {
601     if (show_malloc)
602     fprintf(outfile, "free %p\n", block);
603     free(block);
604     }
605 nigel 3
606    
607 nigel 73 /* For recursion malloc/free, to test stacking calls */
608    
609     static void *stack_malloc(size_t size)
610     {
611     void *block = malloc(size);
612     if (show_malloc)
613 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614 nigel 73 return block;
615     }
616    
617     static void stack_free(void *block)
618     {
619     if (show_malloc)
620     fprintf(outfile, "stack_free %p\n", block);
621     free(block);
622     }
623    
624    
625 nigel 63 /*************************************************
626     * Call pcre_fullinfo() *
627     *************************************************/
628 nigel 43
629     /* Get one piece of information from the pcre_fullinfo() function */
630    
631     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632     {
633     int rc;
634     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636     }
637    
638    
639    
640 nigel 63 /*************************************************
641 nigel 75 * Byte flipping function *
642     *************************************************/
643    
644 nigel 91 static unsigned long int
645     byteflip(unsigned long int value, int n)
646 nigel 75 {
647     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648     return ((value & 0x000000ff) << 24) |
649     ((value & 0x0000ff00) << 8) |
650     ((value & 0x00ff0000) >> 8) |
651     ((value & 0xff000000) >> 24);
652     }
653    
654    
655    
656    
657     /*************************************************
658 nigel 87 * Check match or recursion limit *
659     *************************************************/
660    
661     static int
662     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663     int start_offset, int options, int *use_offsets, int use_size_offsets,
664     int flag, unsigned long int *limit, int errnumber, const char *msg)
665     {
666     int count;
667     int min = 0;
668     int mid = 64;
669     int max = -1;
670    
671     extra->flags |= flag;
672    
673     for (;;)
674     {
675     *limit = mid;
676    
677     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678     use_offsets, use_size_offsets);
679    
680     if (count == errnumber)
681     {
682     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683     min = mid;
684     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685     }
686    
687     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688     count == PCRE_ERROR_PARTIAL)
689     {
690     if (mid == min + 1)
691     {
692     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693     break;
694     }
695     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696     max = mid;
697     mid = (min + mid)/2;
698     }
699     else break; /* Some other error */
700     }
701    
702     extra->flags &= ~flag;
703     return count;
704     }
705    
706    
707    
708     /*************************************************
709 ph10 227 * Case-independent strncmp() function *
710     *************************************************/
711    
712     /*
713     Arguments:
714     s first string
715     t second string
716     n number of characters to compare
717    
718     Returns: < 0, = 0, or > 0, according to the comparison
719     */
720    
721     static int
722     strncmpic(uschar *s, uschar *t, int n)
723     {
724     while (n--)
725     {
726     int c = tolower(*s++) - tolower(*t++);
727     if (c) return c;
728     }
729     return 0;
730     }
731    
732    
733    
734     /*************************************************
735 nigel 91 * Check newline indicator *
736     *************************************************/
737    
738     /* This is used both at compile and run-time to check for <xxx> escapes, where
739 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740     no match.
741 nigel 91
742     Arguments:
743     p points after the leading '<'
744     f file for error message
745    
746     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
747     */
748    
749     static int
750     check_newline(uschar *p, FILE *f)
751     {
752 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
760     return 0;
761     }
762    
763    
764    
765     /*************************************************
766 nigel 93 * Usage function *
767     *************************************************/
768    
769     static void
770     usage(void)
771     {
772 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
773     printf("Input and output default to stdin and stdout.\n");
774     #ifdef SUPPORT_LIBREADLINE
775     printf("If input is a terminal, readline() is used to read from it.\n");
776     #else
777     printf("This version of pcretest is not linked with readline().\n");
778     #endif
779     printf("\nOptions:\n");
780 nigel 93 printf(" -b show compiled code (bytecode)\n");
781     printf(" -C show PCRE compile-time options and exit\n");
782     printf(" -d debug: show compiled code and information (-b and -i)\n");
783     #if !defined NODFA
784     printf(" -dfa force DFA matching for all subjects\n");
785     #endif
786     printf(" -help show usage information\n");
787     printf(" -i show information about compiled patterns\n"
788 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
789 nigel 93 " -m output memory used information\n"
790     " -o <n> set size of offsets vector to <n>\n");
791     #if !defined NOPOSIX
792     printf(" -p use POSIX interface\n");
793     #endif
794     printf(" -q quiet: do not output PCRE version number at start\n");
795     printf(" -S <n> set stack size to <n> megabytes\n");
796     printf(" -s output store (memory) used information\n"
797     " -t time compilation and execution\n");
798     printf(" -t <n> time compilation and execution, repeating <n> times\n");
799     printf(" -tm time execution (matching) only\n");
800     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
801     }
802    
803    
804    
805     /*************************************************
806 nigel 63 * Main Program *
807     *************************************************/
808 nigel 43
809 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
810     consist of a regular expression, in delimiters and optionally followed by
811     options, followed by a set of test data, terminated by an empty line. */
812    
813     int main(int argc, char **argv)
814     {
815     FILE *infile = stdin;
816     int options = 0;
817     int study_options = 0;
818 ph10 386 int default_find_match_limit = FALSE;
819 nigel 3 int op = 1;
820     int timeit = 0;
821 nigel 93 int timeitm = 0;
822 nigel 3 int showinfo = 0;
823 nigel 31 int showstore = 0;
824 nigel 87 int quiet = 0;
825 nigel 53 int size_offsets = 45;
826     int size_offsets_max;
827 nigel 77 int *offsets = NULL;
828 nigel 53 #if !defined NOPOSIX
829 nigel 3 int posix = 0;
830 nigel 53 #endif
831 nigel 3 int debug = 0;
832 nigel 11 int done = 0;
833 nigel 77 int all_use_dfa = 0;
834     int yield = 0;
835 nigel 91 int stack_size;
836 nigel 3
837 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
838     that 1024 is plenty long enough for the few names we'll be testing. */
839 nigel 69
840 nigel 91 uschar copynames[1024];
841     uschar getnames[1024];
842    
843     uschar *copynamesptr;
844     uschar *getnamesptr;
845    
846 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
847 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
848 nigel 69
849 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
850     dbuffer = (unsigned char *)malloc(buffer_size);
851     pbuffer = (unsigned char *)malloc(buffer_size);
852 nigel 69
853 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
854 nigel 3
855 nigel 93 outfile = stdout;
856    
857     /* The following _setmode() stuff is some Windows magic that tells its runtime
858     library to translate CRLF into a single LF character. At least, that's what
859     I've been told: never having used Windows I take this all on trust. Originally
860     it set 0x8000, but then I was advised that _O_BINARY was better. */
861    
862 nigel 75 #if defined(_WIN32) || defined(WIN32)
863 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
864     #endif
865 nigel 75
866 nigel 3 /* Scan options */
867    
868     while (argc > 1 && argv[op][0] == '-')
869     {
870 nigel 63 unsigned char *endptr;
871 nigel 53
872 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873     showstore = 1;
874 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
876 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878 ph10 386 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879 nigel 79 #if !defined NODFA
880 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881 nigel 79 #endif
882 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884     *endptr == 0))
885 nigel 53 {
886     op++;
887     argc--;
888     }
889 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890     {
891     int both = argv[op][2] == 0;
892     int temp;
893     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894     *endptr == 0))
895     {
896     timeitm = temp;
897     op++;
898     argc--;
899     }
900     else timeitm = LOOPREPEAT;
901     if (both) timeit = timeitm;
902     }
903 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905     *endptr == 0))
906     {
907 nigel 93 #if defined(_WIN32) || defined(WIN32)
908 nigel 91 printf("PCRE: -S not supported on this OS\n");
909     exit(1);
910     #else
911     int rc;
912     struct rlimit rlim;
913     getrlimit(RLIMIT_STACK, &rlim);
914     rlim.rlim_cur = stack_size * 1024 * 1024;
915     rc = setrlimit(RLIMIT_STACK, &rlim);
916     if (rc != 0)
917     {
918     printf("PCRE: setrlimit() failed with error %d\n", rc);
919     exit(1);
920     }
921     op++;
922     argc--;
923     #endif
924     }
925 nigel 53 #if !defined NOPOSIX
926 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
927 nigel 53 #endif
928 nigel 63 else if (strcmp(argv[op], "-C") == 0)
929     {
930     int rc;
931 ph10 376 unsigned long int lrc;
932 nigel 63 printf("PCRE version %s\n", pcre_version());
933     printf("Compiled with\n");
934     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935     printf(" %sUTF-8 support\n", rc? "" : "No ");
936 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937     printf(" %sUnicode properties support\n", rc? "" : "No ");
938 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
940 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
941 ph10 150 (rc == -2)? "ANYCRLF" :
942 nigel 93 (rc == -1)? "ANY" : "???");
943 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
944     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
945     "all Unicode newlines");
946 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
947     printf(" Internal link size = %d\n", rc);
948     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
949     printf(" POSIX malloc threshold = %d\n", rc);
950 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
951     printf(" Default match limit = %ld\n", lrc);
952     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
953     printf(" Default recursion depth limit = %ld\n", lrc);
954 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
955     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
956 ph10 121 goto EXIT;
957 nigel 63 }
958 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
959     strcmp(argv[op], "--help") == 0)
960     {
961     usage();
962     goto EXIT;
963     }
964 nigel 3 else
965     {
966 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
967 nigel 93 usage();
968 nigel 77 yield = 1;
969     goto EXIT;
970 nigel 3 }
971     op++;
972     argc--;
973     }
974    
975 nigel 53 /* Get the store for the offsets vector, and remember what it was */
976    
977     size_offsets_max = size_offsets;
978 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
979 nigel 53 if (offsets == NULL)
980     {
981     printf("** Failed to get %d bytes of memory for offsets vector\n",
982 ph10 151 (int)(size_offsets_max * sizeof(int)));
983 nigel 77 yield = 1;
984     goto EXIT;
985 nigel 53 }
986    
987 nigel 3 /* Sort out the input and output files */
988    
989     if (argc > 1)
990     {
991 nigel 93 infile = fopen(argv[op], INPUT_MODE);
992 nigel 3 if (infile == NULL)
993     {
994     printf("** Failed to open %s\n", argv[op]);
995 nigel 77 yield = 1;
996     goto EXIT;
997 nigel 3 }
998     }
999    
1000     if (argc > 2)
1001     {
1002 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1003 nigel 3 if (outfile == NULL)
1004     {
1005     printf("** Failed to open %s\n", argv[op+1]);
1006 nigel 77 yield = 1;
1007     goto EXIT;
1008 nigel 3 }
1009     }
1010    
1011     /* Set alternative malloc function */
1012    
1013     pcre_malloc = new_malloc;
1014 nigel 73 pcre_free = new_free;
1015     pcre_stack_malloc = stack_malloc;
1016     pcre_stack_free = stack_free;
1017 nigel 3
1018 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1019 nigel 3
1020 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1021 nigel 3
1022     /* Main loop */
1023    
1024 nigel 11 while (!done)
1025 nigel 3 {
1026     pcre *re = NULL;
1027     pcre_extra *extra = NULL;
1028 nigel 37
1029     #if !defined NOPOSIX /* There are still compilers that require no indent */
1030 nigel 3 regex_t preg;
1031 nigel 45 int do_posix = 0;
1032 nigel 37 #endif
1033    
1034 nigel 7 const char *error;
1035 nigel 25 unsigned char *p, *pp, *ppp;
1036 nigel 75 unsigned char *to_file = NULL;
1037 nigel 53 const unsigned char *tables = NULL;
1038 nigel 75 unsigned long int true_size, true_study_size = 0;
1039     size_t size, regex_gotten_store;
1040 nigel 3 int do_study = 0;
1041 nigel 25 int do_debug = debug;
1042 nigel 35 int do_G = 0;
1043     int do_g = 0;
1044 nigel 25 int do_showinfo = showinfo;
1045 nigel 35 int do_showrest = 0;
1046 nigel 75 int do_flip = 0;
1047 nigel 93 int erroroffset, len, delimiter, poffset;
1048 nigel 3
1049 nigel 67 use_utf8 = 0;
1050 ph10 211 debug_lengths = 1;
1051 nigel 63
1052 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1053 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1054 nigel 63 fflush(outfile);
1055 nigel 3
1056     p = buffer;
1057     while (isspace(*p)) p++;
1058     if (*p == 0) continue;
1059    
1060 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1061 nigel 3
1062 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1063     {
1064 nigel 91 unsigned long int magic, get_options;
1065 nigel 75 uschar sbuf[8];
1066     FILE *f;
1067    
1068     p++;
1069     pp = p + (int)strlen((char *)p);
1070     while (isspace(pp[-1])) pp--;
1071     *pp = 0;
1072    
1073     f = fopen((char *)p, "rb");
1074     if (f == NULL)
1075     {
1076     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1077     continue;
1078     }
1079    
1080     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1081    
1082     true_size =
1083     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1084     true_study_size =
1085     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1086    
1087     re = (real_pcre *)new_malloc(true_size);
1088     regex_gotten_store = gotten_store;
1089    
1090     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1091    
1092     magic = ((real_pcre *)re)->magic_number;
1093     if (magic != MAGIC_NUMBER)
1094     {
1095     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1096     {
1097     do_flip = 1;
1098     }
1099     else
1100     {
1101     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1102     fclose(f);
1103     continue;
1104     }
1105     }
1106    
1107     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1108     do_flip? " (byte-inverted)" : "", p);
1109    
1110     /* Need to know if UTF-8 for printing data strings */
1111    
1112 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1113     use_utf8 = (get_options & PCRE_UTF8) != 0;
1114 nigel 75
1115     /* Now see if there is any following study data */
1116    
1117     if (true_study_size != 0)
1118     {
1119     pcre_study_data *psd;
1120    
1121     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1122     extra->flags = PCRE_EXTRA_STUDY_DATA;
1123    
1124     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1125     extra->study_data = psd;
1126    
1127     if (fread(psd, 1, true_study_size, f) != true_study_size)
1128     {
1129     FAIL_READ:
1130     fprintf(outfile, "Failed to read data from %s\n", p);
1131     if (extra != NULL) new_free(extra);
1132     if (re != NULL) new_free(re);
1133     fclose(f);
1134     continue;
1135     }
1136     fprintf(outfile, "Study data loaded from %s\n", p);
1137     do_study = 1; /* To get the data output if requested */
1138     }
1139     else fprintf(outfile, "No study data\n");
1140    
1141     fclose(f);
1142     goto SHOW_INFO;
1143     }
1144    
1145     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1146     the pattern; if is isn't complete, read more. */
1147    
1148 nigel 3 delimiter = *p++;
1149    
1150 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1151 nigel 3 {
1152 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1153 nigel 3 goto SKIP_DATA;
1154     }
1155    
1156     pp = p;
1157 nigel 93 poffset = p - buffer;
1158 nigel 3
1159     for(;;)
1160     {
1161 nigel 29 while (*pp != 0)
1162     {
1163     if (*pp == '\\' && pp[1] != 0) pp++;
1164     else if (*pp == delimiter) break;
1165     pp++;
1166     }
1167 nigel 3 if (*pp != 0) break;
1168 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1169 nigel 3 {
1170     fprintf(outfile, "** Unexpected EOF\n");
1171 nigel 11 done = 1;
1172     goto CONTINUE;
1173 nigel 3 }
1174 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1175 nigel 3 }
1176    
1177 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1178     pointer to the correct relative point in the buffer. */
1179    
1180     p = buffer + poffset;
1181    
1182 nigel 29 /* If the first character after the delimiter is backslash, make
1183     the pattern end with backslash. This is purely to provide a way
1184     of testing for the error message when a pattern ends with backslash. */
1185    
1186     if (pp[1] == '\\') *pp++ = '\\';
1187    
1188 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1189     for callouts. */
1190 nigel 3
1191     *pp++ = 0;
1192 nigel 75 strcpy((char *)pbuffer, (char *)p);
1193 nigel 3
1194     /* Look for options after final delimiter */
1195    
1196     options = 0;
1197     study_options = 0;
1198 nigel 31 log_store = showstore; /* default from command line */
1199    
1200 nigel 3 while (*pp != 0)
1201     {
1202     switch (*pp++)
1203     {
1204 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1205 nigel 35 case 'g': do_g = 1; break;
1206 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1207     case 'm': options |= PCRE_MULTILINE; break;
1208     case 's': options |= PCRE_DOTALL; break;
1209     case 'x': options |= PCRE_EXTENDED; break;
1210 nigel 25
1211 nigel 35 case '+': do_showrest = 1; break;
1212 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1213 nigel 93 case 'B': do_debug = 1; break;
1214 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1215 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1216 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1217 nigel 75 case 'F': do_flip = 1; break;
1218 nigel 35 case 'G': do_G = 1; break;
1219 nigel 25 case 'I': do_showinfo = 1; break;
1220 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1221 nigel 31 case 'M': log_store = 1; break;
1222 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1223 nigel 37
1224     #if !defined NOPOSIX
1225 nigel 3 case 'P': do_posix = 1; break;
1226 nigel 37 #endif
1227    
1228 nigel 3 case 'S': do_study = 1; break;
1229 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1230 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1231 ph10 126 case 'Z': debug_lengths = 0; break;
1232 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1233 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1234 nigel 25
1235     case 'L':
1236     ppp = pp;
1237 nigel 93 /* The '\r' test here is so that it works on Windows. */
1238     /* The '0' test is just in case this is an unterminated line. */
1239     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1240 nigel 25 *ppp = 0;
1241     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1242     {
1243     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1244     goto SKIP_DATA;
1245     }
1246 nigel 93 locale_set = 1;
1247 nigel 25 tables = pcre_maketables();
1248     pp = ppp;
1249     break;
1250    
1251 nigel 75 case '>':
1252     to_file = pp;
1253     while (*pp != 0) pp++;
1254     while (isspace(pp[-1])) pp--;
1255     *pp = 0;
1256     break;
1257    
1258 nigel 91 case '<':
1259     {
1260 ph10 336 if (strncmp((char *)pp, "JS>", 3) == 0)
1261     {
1262     options |= PCRE_JAVASCRIPT_COMPAT;
1263 ph10 345 pp += 3;
1264 ph10 336 }
1265     else
1266 ph10 345 {
1267 ph10 336 int x = check_newline(pp, outfile);
1268     if (x == 0) goto SKIP_DATA;
1269     options |= x;
1270     while (*pp++ != '>');
1271 ph10 345 }
1272 nigel 91 }
1273     break;
1274    
1275 nigel 77 case '\r': /* So that it works in Windows */
1276     case '\n':
1277     case ' ':
1278     break;
1279 nigel 75
1280 nigel 3 default:
1281     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1282     goto SKIP_DATA;
1283     }
1284     }
1285    
1286 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1287 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1288     local character tables. */
1289 nigel 3
1290 nigel 37 #if !defined NOPOSIX
1291 nigel 3 if (posix || do_posix)
1292     {
1293     int rc;
1294     int cflags = 0;
1295 nigel 75
1296 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1297     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1298 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1299 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1300     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1301    
1302 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1303    
1304     /* Compilation failed; go back for another re, skipping to blank line
1305     if non-interactive. */
1306    
1307     if (rc != 0)
1308     {
1309 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1310 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1311     goto SKIP_DATA;
1312     }
1313     }
1314    
1315     /* Handle compiling via the native interface */
1316    
1317     else
1318 nigel 37 #endif /* !defined NOPOSIX */
1319    
1320 nigel 3 {
1321 nigel 93 if (timeit > 0)
1322 nigel 3 {
1323     register int i;
1324     clock_t time_taken;
1325     clock_t start_time = clock();
1326 nigel 93 for (i = 0; i < timeit; i++)
1327 nigel 3 {
1328 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1329 nigel 3 if (re != NULL) free(re);
1330     }
1331     time_taken = clock() - start_time;
1332 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1333     (((double)time_taken * 1000.0) / (double)timeit) /
1334 nigel 63 (double)CLOCKS_PER_SEC);
1335 nigel 3 }
1336    
1337 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1338 nigel 3
1339     /* Compilation failed; go back for another re, skipping to blank line
1340     if non-interactive. */
1341    
1342     if (re == NULL)
1343     {
1344     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1345     SKIP_DATA:
1346     if (infile != stdin)
1347     {
1348     for (;;)
1349     {
1350 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1351 nigel 11 {
1352     done = 1;
1353     goto CONTINUE;
1354     }
1355 nigel 3 len = (int)strlen((char *)buffer);
1356     while (len > 0 && isspace(buffer[len-1])) len--;
1357     if (len == 0) break;
1358     }
1359     fprintf(outfile, "\n");
1360     }
1361 nigel 25 goto CONTINUE;
1362 nigel 3 }
1363    
1364 nigel 43 /* Compilation succeeded; print data if required. There are now two
1365     info-returning functions. The old one has a limited interface and
1366     returns only limited data. Check that it agrees with the newer one. */
1367 nigel 3
1368 nigel 63 if (log_store)
1369     fprintf(outfile, "Memory allocation (code space): %d\n",
1370     (int)(gotten_store -
1371     sizeof(real_pcre) -
1372     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1373    
1374 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1375     and remember the store that was got. */
1376    
1377     true_size = ((real_pcre *)re)->size;
1378     regex_gotten_store = gotten_store;
1379    
1380     /* If /S was present, study the regexp to generate additional info to
1381     help with the matching. */
1382    
1383     if (do_study)
1384     {
1385 nigel 93 if (timeit > 0)
1386 nigel 75 {
1387     register int i;
1388     clock_t time_taken;
1389     clock_t start_time = clock();
1390 nigel 93 for (i = 0; i < timeit; i++)
1391 nigel 75 extra = pcre_study(re, study_options, &error);
1392     time_taken = clock() - start_time;
1393     if (extra != NULL) free(extra);
1394 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1395     (((double)time_taken * 1000.0) / (double)timeit) /
1396 nigel 75 (double)CLOCKS_PER_SEC);
1397     }
1398     extra = pcre_study(re, study_options, &error);
1399     if (error != NULL)
1400     fprintf(outfile, "Failed to study: %s\n", error);
1401     else if (extra != NULL)
1402     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1403     }
1404    
1405     /* If the 'F' option was present, we flip the bytes of all the integer
1406     fields in the regex data block and the study block. This is to make it
1407     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1408     compiled on a different architecture. */
1409    
1410     if (do_flip)
1411     {
1412     real_pcre *rre = (real_pcre *)re;
1413 ph10 259 rre->magic_number =
1414 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1415 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1416     rre->options = byteflip(rre->options, sizeof(rre->options));
1417 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1418 ph10 259 rre->top_bracket =
1419 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1420 ph10 259 rre->top_backref =
1421 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1422 ph10 259 rre->first_byte =
1423 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1424 ph10 259 rre->req_byte =
1425 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1426     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1427 nigel 75 sizeof(rre->name_table_offset));
1428 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1429 nigel 75 sizeof(rre->name_entry_size));
1430 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1431 ph10 255 sizeof(rre->name_count));
1432 nigel 75
1433     if (extra != NULL)
1434     {
1435     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1436     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1437     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1438     }
1439     }
1440    
1441     /* Extract information from the compiled data if required */
1442    
1443     SHOW_INFO:
1444    
1445 nigel 93 if (do_debug)
1446     {
1447     fprintf(outfile, "------------------------------------------------------------------\n");
1448 ph10 116 pcre_printint(re, outfile, debug_lengths);
1449 nigel 93 }
1450    
1451 nigel 25 if (do_showinfo)
1452 nigel 3 {
1453 nigel 75 unsigned long int get_options, all_options;
1454 nigel 79 #if !defined NOINFOCHECK
1455 nigel 43 int old_first_char, old_options, old_count;
1456 nigel 79 #endif
1457 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1458 ph10 227 hascrorlf;
1459 nigel 63 int nameentrysize, namecount;
1460     const uschar *nametable;
1461 nigel 3
1462 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1463 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1464     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1465     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1466 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1467 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1468 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1469     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1470 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1471 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1472     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1473 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1474 nigel 43
1475 nigel 79 #if !defined NOINFOCHECK
1476 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1477 nigel 3 if (count < 0) fprintf(outfile,
1478 nigel 43 "Error %d from pcre_info()\n", count);
1479 nigel 3 else
1480     {
1481 nigel 43 if (old_count != count) fprintf(outfile,
1482     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1483     old_count);
1484 nigel 37
1485 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1486     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1487     first_char, old_first_char);
1488 nigel 37
1489 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1490     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1491     get_options, old_options);
1492 nigel 43 }
1493 nigel 79 #endif
1494 nigel 43
1495 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1496 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1497 nigel 77 (int)size, (int)regex_gotten_store);
1498 nigel 43
1499     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1500     if (backrefmax > 0)
1501     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1502 nigel 63
1503     if (namecount > 0)
1504     {
1505     fprintf(outfile, "Named capturing subpatterns:\n");
1506     while (namecount-- > 0)
1507     {
1508     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1509     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1510     GET2(nametable, 0));
1511     nametable += nameentrysize;
1512     }
1513     }
1514 ph10 172
1515 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1516 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1517 nigel 63
1518 nigel 75 all_options = ((real_pcre *)re)->options;
1519 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1520 nigel 75
1521 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1522 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1523 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1524     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1525     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1526     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1527 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1528 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1529 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1530     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1531 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1532     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1533     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1534 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1535 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1536 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1537     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1538 ph10 172
1539 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1540 nigel 43
1541 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1542 nigel 91 {
1543     case PCRE_NEWLINE_CR:
1544     fprintf(outfile, "Forced newline sequence: CR\n");
1545     break;
1546 nigel 43
1547 nigel 91 case PCRE_NEWLINE_LF:
1548     fprintf(outfile, "Forced newline sequence: LF\n");
1549     break;
1550    
1551     case PCRE_NEWLINE_CRLF:
1552     fprintf(outfile, "Forced newline sequence: CRLF\n");
1553     break;
1554    
1555 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1556     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1557     break;
1558    
1559 nigel 93 case PCRE_NEWLINE_ANY:
1560     fprintf(outfile, "Forced newline sequence: ANY\n");
1561     break;
1562    
1563 nigel 91 default:
1564     break;
1565     }
1566    
1567 nigel 43 if (first_char == -1)
1568     {
1569 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1570 nigel 43 }
1571     else if (first_char < 0)
1572     {
1573     fprintf(outfile, "No first char\n");
1574     }
1575     else
1576     {
1577 nigel 63 int ch = first_char & 255;
1578 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1579 nigel 63 "" : " (caseless)";
1580 nigel 93 if (PRINTHEX(ch))
1581 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1582 nigel 3 else
1583 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1584 nigel 43 }
1585 nigel 37
1586 nigel 43 if (need_char < 0)
1587     {
1588     fprintf(outfile, "No need char\n");
1589 nigel 3 }
1590 nigel 43 else
1591     {
1592 nigel 63 int ch = need_char & 255;
1593 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1594 nigel 63 "" : " (caseless)";
1595 nigel 93 if (PRINTHEX(ch))
1596 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1597 nigel 43 else
1598 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1599 nigel 43 }
1600 nigel 75
1601     /* Don't output study size; at present it is in any case a fixed
1602     value, but it varies, depending on the computer architecture, and
1603     so messes up the test suite. (And with the /F option, it might be
1604     flipped.) */
1605    
1606     if (do_study)
1607     {
1608     if (extra == NULL)
1609     fprintf(outfile, "Study returned NULL\n");
1610     else
1611     {
1612     uschar *start_bits = NULL;
1613     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1614    
1615     if (start_bits == NULL)
1616     fprintf(outfile, "No starting byte set\n");
1617     else
1618     {
1619     int i;
1620     int c = 24;
1621     fprintf(outfile, "Starting byte set: ");
1622     for (i = 0; i < 256; i++)
1623     {
1624     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1625     {
1626     if (c > 75)
1627     {
1628     fprintf(outfile, "\n ");
1629     c = 2;
1630     }
1631 nigel 93 if (PRINTHEX(i) && i != ' ')
1632 nigel 75 {
1633     fprintf(outfile, "%c ", i);
1634     c += 2;
1635     }
1636     else
1637     {
1638     fprintf(outfile, "\\x%02x ", i);
1639     c += 5;
1640     }
1641     }
1642     }
1643     fprintf(outfile, "\n");
1644     }
1645     }
1646     }
1647 nigel 3 }
1648    
1649 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1650     that is all. The first 8 bytes of the file are the regex length and then
1651     the study length, in big-endian order. */
1652 nigel 3
1653 nigel 75 if (to_file != NULL)
1654 nigel 3 {
1655 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1656     if (f == NULL)
1657 nigel 3 {
1658 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1659 nigel 3 }
1660 nigel 75 else
1661     {
1662     uschar sbuf[8];
1663 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1664     sbuf[1] = (uschar)((true_size >> 16) & 255);
1665     sbuf[2] = (uschar)((true_size >> 8) & 255);
1666     sbuf[3] = (uschar)((true_size) & 255);
1667 ph10 259
1668 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1669     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1670     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1671     sbuf[7] = (uschar)((true_study_size) & 255);
1672 nigel 3
1673 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1674     fwrite(re, 1, true_size, f) < true_size)
1675     {
1676     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1677     }
1678 nigel 3 else
1679     {
1680 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1681     if (extra != NULL)
1682 nigel 3 {
1683 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1684     true_study_size)
1685 nigel 3 {
1686 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1687     strerror(errno));
1688 nigel 3 }
1689 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1690 nigel 93
1691 nigel 3 }
1692     }
1693 nigel 75 fclose(f);
1694 nigel 3 }
1695 nigel 77
1696     new_free(re);
1697     if (extra != NULL) new_free(extra);
1698     if (tables != NULL) new_free((void *)tables);
1699 nigel 75 continue; /* With next regex */
1700 nigel 3 }
1701 nigel 75 } /* End of non-POSIX compile */
1702 nigel 3
1703     /* Read data lines and test them */
1704    
1705     for (;;)
1706     {
1707 nigel 87 uschar *q;
1708 ph10 147 uschar *bptr;
1709 nigel 57 int *use_offsets = offsets;
1710 nigel 53 int use_size_offsets = size_offsets;
1711 nigel 63 int callout_data = 0;
1712     int callout_data_set = 0;
1713 nigel 3 int count, c;
1714 nigel 29 int copystrings = 0;
1715 ph10 386 int find_match_limit = default_find_match_limit;
1716 nigel 29 int getstrings = 0;
1717     int getlist = 0;
1718 nigel 39 int gmatched = 0;
1719 nigel 35 int start_offset = 0;
1720 nigel 41 int g_notempty = 0;
1721 nigel 77 int use_dfa = 0;
1722 nigel 3
1723     options = 0;
1724    
1725 nigel 91 *copynames = 0;
1726     *getnames = 0;
1727    
1728     copynamesptr = copynames;
1729     getnamesptr = getnames;
1730    
1731 nigel 63 pcre_callout = callout;
1732     first_callout = 1;
1733     callout_extra = 0;
1734     callout_count = 0;
1735     callout_fail_count = 999999;
1736     callout_fail_id = -1;
1737 nigel 73 show_malloc = 0;
1738 nigel 63
1739 nigel 91 if (extra != NULL) extra->flags &=
1740     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1741    
1742     len = 0;
1743     for (;;)
1744 nigel 11 {
1745 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1746 nigel 91 {
1747     if (len > 0) break;
1748     done = 1;
1749     goto CONTINUE;
1750     }
1751     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1752     len = (int)strlen((char *)buffer);
1753     if (buffer[len-1] == '\n') break;
1754 nigel 11 }
1755 nigel 3
1756     while (len > 0 && isspace(buffer[len-1])) len--;
1757     buffer[len] = 0;
1758     if (len == 0) break;
1759    
1760     p = buffer;
1761     while (isspace(*p)) p++;
1762    
1763 ph10 147 bptr = q = dbuffer;
1764 nigel 3 while ((c = *p++) != 0)
1765     {
1766     int i = 0;
1767     int n = 0;
1768 nigel 63
1769 nigel 3 if (c == '\\') switch ((c = *p++))
1770     {
1771     case 'a': c = 7; break;
1772     case 'b': c = '\b'; break;
1773     case 'e': c = 27; break;
1774     case 'f': c = '\f'; break;
1775     case 'n': c = '\n'; break;
1776     case 'r': c = '\r'; break;
1777     case 't': c = '\t'; break;
1778     case 'v': c = '\v'; break;
1779    
1780     case '0': case '1': case '2': case '3':
1781     case '4': case '5': case '6': case '7':
1782     c -= '0';
1783     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1784     c = c * 8 + *p++ - '0';
1785 nigel 91
1786     #if !defined NOUTF8
1787     if (use_utf8 && c > 255)
1788     {
1789     unsigned char buff8[8];
1790     int ii, utn;
1791     utn = ord2utf8(c, buff8);
1792     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1793     c = buff8[ii]; /* Last byte */
1794     }
1795     #endif
1796 nigel 3 break;
1797    
1798     case 'x':
1799 nigel 49
1800     /* Handle \x{..} specially - new Perl thing for utf8 */
1801    
1802 nigel 79 #if !defined NOUTF8
1803 nigel 49 if (*p == '{')
1804     {
1805     unsigned char *pt = p;
1806     c = 0;
1807     while (isxdigit(*(++pt)))
1808     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1809     if (*pt == '}')
1810     {
1811 nigel 67 unsigned char buff8[8];
1812 nigel 49 int ii, utn;
1813 ph10 355 if (use_utf8)
1814 ph10 358 {
1815 ph10 355 utn = ord2utf8(c, buff8);
1816     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817     c = buff8[ii]; /* Last byte */
1818     }
1819     else
1820     {
1821 ph10 358 if (c > 255)
1822 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1823     "UTF-8 mode is not enabled.\n"
1824     "** Truncation will probably give the wrong result.\n", c);
1825 ph10 358 }
1826 nigel 49 p = pt + 1;
1827     break;
1828     }
1829     /* Not correct form; fall through */
1830     }
1831 nigel 79 #endif
1832 nigel 49
1833     /* Ordinary \x */
1834    
1835 nigel 3 c = 0;
1836     while (i++ < 2 && isxdigit(*p))
1837     {
1838     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1839     p++;
1840     }
1841     break;
1842    
1843 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1844 nigel 3 p--;
1845     continue;
1846    
1847 nigel 75 case '>':
1848     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1849     continue;
1850    
1851 nigel 3 case 'A': /* Option setting */
1852     options |= PCRE_ANCHORED;
1853     continue;
1854    
1855     case 'B':
1856     options |= PCRE_NOTBOL;
1857     continue;
1858    
1859 nigel 29 case 'C':
1860 nigel 63 if (isdigit(*p)) /* Set copy string */
1861     {
1862     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863     copystrings |= 1 << n;
1864     }
1865     else if (isalnum(*p))
1866     {
1867 nigel 91 uschar *npp = copynamesptr;
1868 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1869 nigel 91 *npp++ = 0;
1870 nigel 67 *npp = 0;
1871 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1872 nigel 63 if (n < 0)
1873 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1874     copynamesptr = npp;
1875 nigel 63 }
1876     else if (*p == '+')
1877     {
1878     callout_extra = 1;
1879     p++;
1880     }
1881     else if (*p == '-')
1882     {
1883     pcre_callout = NULL;
1884     p++;
1885     }
1886     else if (*p == '!')
1887     {
1888     callout_fail_id = 0;
1889     p++;
1890     while(isdigit(*p))
1891     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1892     callout_fail_count = 0;
1893     if (*p == '!')
1894     {
1895     p++;
1896     while(isdigit(*p))
1897     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1898     }
1899     }
1900     else if (*p == '*')
1901     {
1902     int sign = 1;
1903     callout_data = 0;
1904     if (*(++p) == '-') { sign = -1; p++; }
1905     while(isdigit(*p))
1906     callout_data = callout_data * 10 + *p++ - '0';
1907     callout_data *= sign;
1908     callout_data_set = 1;
1909     }
1910 nigel 29 continue;
1911    
1912 nigel 79 #if !defined NODFA
1913 nigel 77 case 'D':
1914 nigel 79 #if !defined NOPOSIX
1915 nigel 77 if (posix || do_posix)
1916     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1917     else
1918 nigel 79 #endif
1919 nigel 77 use_dfa = 1;
1920     continue;
1921    
1922     case 'F':
1923     options |= PCRE_DFA_SHORTEST;
1924     continue;
1925 nigel 79 #endif
1926 nigel 77
1927 nigel 29 case 'G':
1928 nigel 63 if (isdigit(*p))
1929     {
1930     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1931     getstrings |= 1 << n;
1932     }
1933     else if (isalnum(*p))
1934     {
1935 nigel 91 uschar *npp = getnamesptr;
1936 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1937 nigel 91 *npp++ = 0;
1938 nigel 67 *npp = 0;
1939 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1940 nigel 63 if (n < 0)
1941 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1942     getnamesptr = npp;
1943 nigel 63 }
1944 nigel 29 continue;
1945    
1946     case 'L':
1947     getlist = 1;
1948     continue;
1949    
1950 nigel 63 case 'M':
1951     find_match_limit = 1;
1952     continue;
1953    
1954 nigel 37 case 'N':
1955     options |= PCRE_NOTEMPTY;
1956     continue;
1957    
1958 nigel 3 case 'O':
1959     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1960 nigel 53 if (n > size_offsets_max)
1961     {
1962     size_offsets_max = n;
1963 nigel 57 free(offsets);
1964 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1965 nigel 53 if (offsets == NULL)
1966     {
1967     printf("** Failed to get %d bytes of memory for offsets vector\n",
1968 ph10 151 (int)(size_offsets_max * sizeof(int)));
1969 nigel 77 yield = 1;
1970     goto EXIT;
1971 nigel 53 }
1972     }
1973     use_size_offsets = n;
1974 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1975 nigel 3 continue;
1976    
1977 nigel 75 case 'P':
1978     options |= PCRE_PARTIAL;
1979     continue;
1980    
1981 nigel 91 case 'Q':
1982     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1983     if (extra == NULL)
1984     {
1985     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1986     extra->flags = 0;
1987     }
1988     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1989     extra->match_limit_recursion = n;
1990     continue;
1991    
1992     case 'q':
1993     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1994     if (extra == NULL)
1995     {
1996     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1997     extra->flags = 0;
1998     }
1999     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2000     extra->match_limit = n;
2001     continue;
2002    
2003 nigel 79 #if !defined NODFA
2004 nigel 77 case 'R':
2005     options |= PCRE_DFA_RESTART;
2006     continue;
2007 nigel 79 #endif
2008 nigel 77
2009 nigel 73 case 'S':
2010     show_malloc = 1;
2011     continue;
2012 ph10 389
2013     case 'Y':
2014     options |= PCRE_NO_START_OPTIMIZE;
2015     continue;
2016 nigel 73
2017 nigel 3 case 'Z':
2018     options |= PCRE_NOTEOL;
2019     continue;
2020 nigel 71
2021     case '?':
2022     options |= PCRE_NO_UTF8_CHECK;
2023     continue;
2024 nigel 91
2025     case '<':
2026     {
2027     int x = check_newline(p, outfile);
2028     if (x == 0) goto NEXT_DATA;
2029     options |= x;
2030     while (*p++ != '>');
2031     }
2032     continue;
2033 nigel 3 }
2034 nigel 9 *q++ = c;
2035 nigel 3 }
2036 nigel 9 *q = 0;
2037     len = q - dbuffer;
2038 ph10 371
2039 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2040 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2041 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2042 ph10 371
2043 ph10 363 #if !defined NOPOSIX
2044     if (posix || do_posix)
2045     {
2046     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2047 ph10 371 bptr += buffer_size - len - 1;
2048 ph10 363 }
2049 ph10 371 else
2050     #endif
2051 ph10 363 {
2052     memmove(bptr + buffer_size - len, bptr, len);
2053 ph10 371 bptr += buffer_size - len;
2054     }
2055 nigel 3
2056 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2057     {
2058     printf("**Match limit not relevant for DFA matching: ignored\n");
2059     find_match_limit = 0;
2060     }
2061    
2062 nigel 3 /* Handle matching via the POSIX interface, which does not
2063 nigel 63 support timing or playing with the match limit or callout data. */
2064 nigel 3
2065 nigel 37 #if !defined NOPOSIX
2066 nigel 3 if (posix || do_posix)
2067     {
2068     int rc;
2069     int eflags = 0;
2070 nigel 63 regmatch_t *pmatch = NULL;
2071     if (use_size_offsets > 0)
2072 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2073 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2074     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2075 ph10 388 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2076 nigel 3
2077 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2078 nigel 3
2079     if (rc != 0)
2080     {
2081 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2082 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2083     }
2084 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2085     != 0)
2086     {
2087     fprintf(outfile, "Matched with REG_NOSUB\n");
2088     }
2089 nigel 3 else
2090     {
2091 nigel 7 size_t i;
2092 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2093 nigel 3 {
2094     if (pmatch[i].rm_so >= 0)
2095     {
2096 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2097 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2098     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2099 nigel 3 fprintf(outfile, "\n");
2100 nigel 35 if (i == 0 && do_showrest)
2101     {
2102     fprintf(outfile, " 0+ ");
2103 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2104     outfile);
2105 nigel 35 fprintf(outfile, "\n");
2106     }
2107 nigel 3 }
2108     }
2109     }
2110 nigel 53 free(pmatch);
2111 nigel 3 }
2112    
2113 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2114 nigel 3
2115 nigel 37 else
2116     #endif /* !defined NOPOSIX */
2117    
2118 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2119 nigel 3 {
2120 nigel 93 if (timeitm > 0)
2121 nigel 3 {
2122     register int i;
2123     clock_t time_taken;
2124     clock_t start_time = clock();
2125 nigel 77
2126 nigel 79 #if !defined NODFA
2127 nigel 77 if (all_use_dfa || use_dfa)
2128     {
2129     int workspace[1000];
2130 nigel 93 for (i = 0; i < timeitm; i++)
2131 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2132     options | g_notempty, use_offsets, use_size_offsets, workspace,
2133     sizeof(workspace)/sizeof(int));
2134     }
2135     else
2136 nigel 79 #endif
2137 nigel 77
2138 nigel 93 for (i = 0; i < timeitm; i++)
2139 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2140 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2141 nigel 77
2142 nigel 3 time_taken = clock() - start_time;
2143 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2144     (((double)time_taken * 1000.0) / (double)timeitm) /
2145 nigel 63 (double)CLOCKS_PER_SEC);
2146 nigel 3 }
2147    
2148 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2149 nigel 87 varying limits in order to find the minimum value for the match limit and
2150     for the recursion limit. */
2151 nigel 63
2152     if (find_match_limit)
2153     {
2154     if (extra == NULL)
2155     {
2156 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2157 nigel 63 extra->flags = 0;
2158     }
2159    
2160 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2161 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2162     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2163     PCRE_ERROR_MATCHLIMIT, "match()");
2164 nigel 63
2165 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2166     options|g_notempty, use_offsets, use_size_offsets,
2167     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2168     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2169 nigel 63 }
2170    
2171     /* If callout_data is set, use the interface with additional data */
2172    
2173     else if (callout_data_set)
2174     {
2175     if (extra == NULL)
2176     {
2177 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2178 nigel 63 extra->flags = 0;
2179     }
2180     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2181 nigel 71 extra->callout_data = &callout_data;
2182 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2183     options | g_notempty, use_offsets, use_size_offsets);
2184     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2185     }
2186    
2187     /* The normal case is just to do the match once, with the default
2188     value of match_limit. */
2189    
2190 nigel 79 #if !defined NODFA
2191 nigel 77 else if (all_use_dfa || use_dfa)
2192     {
2193     int workspace[1000];
2194     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2195     options | g_notempty, use_offsets, use_size_offsets, workspace,
2196     sizeof(workspace)/sizeof(int));
2197     if (count == 0)
2198     {
2199     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2200     count = use_size_offsets/2;
2201     }
2202     }
2203 nigel 79 #endif
2204 nigel 77
2205 nigel 75 else
2206     {
2207     count = pcre_exec(re, extra, (char *)bptr, len,
2208     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2209 nigel 77 if (count == 0)
2210     {
2211     fprintf(outfile, "Matched, but too many substrings\n");
2212     count = use_size_offsets/3;
2213     }
2214 nigel 75 }
2215 nigel 3
2216 nigel 39 /* Matched */
2217    
2218 nigel 3 if (count >= 0)
2219     {
2220 nigel 93 int i, maxcount;
2221    
2222     #if !defined NODFA
2223     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2224     #endif
2225     maxcount = use_size_offsets/3;
2226    
2227     /* This is a check against a lunatic return value. */
2228    
2229     if (count > maxcount)
2230     {
2231     fprintf(outfile,
2232     "** PCRE error: returned count %d is too big for offset size %d\n",
2233     count, use_size_offsets);
2234     count = use_size_offsets/3;
2235     if (do_g || do_G)
2236     {
2237     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2238     do_g = do_G = FALSE; /* Break g/G loop */
2239     }
2240     }
2241    
2242 nigel 29 for (i = 0; i < count * 2; i += 2)
2243 nigel 3 {
2244 nigel 57 if (use_offsets[i] < 0)
2245 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2246     else
2247     {
2248     fprintf(outfile, "%2d: ", i/2);
2249 nigel 63 (void)pchars(bptr + use_offsets[i],
2250     use_offsets[i+1] - use_offsets[i], outfile);
2251 nigel 3 fprintf(outfile, "\n");
2252 nigel 35 if (i == 0)
2253     {
2254     if (do_showrest)
2255     {
2256     fprintf(outfile, " 0+ ");
2257 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2258     outfile);
2259 nigel 35 fprintf(outfile, "\n");
2260     }
2261     }
2262 nigel 3 }
2263     }
2264 nigel 29
2265     for (i = 0; i < 32; i++)
2266     {
2267     if ((copystrings & (1 << i)) != 0)
2268     {
2269 nigel 91 char copybuffer[256];
2270 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2271 nigel 37 i, copybuffer, sizeof(copybuffer));
2272 nigel 29 if (rc < 0)
2273     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2274     else
2275 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2276 nigel 29 }
2277     }
2278    
2279 nigel 91 for (copynamesptr = copynames;
2280     *copynamesptr != 0;
2281     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2282     {
2283     char copybuffer[256];
2284     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2285     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2286     if (rc < 0)
2287     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2288     else
2289     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2290     }
2291    
2292 nigel 29 for (i = 0; i < 32; i++)
2293     {
2294     if ((getstrings & (1 << i)) != 0)
2295     {
2296     const char *substring;
2297 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2298 nigel 29 i, &substring);
2299     if (rc < 0)
2300     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2301     else
2302     {
2303     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2304 nigel 49 pcre_free_substring(substring);
2305 nigel 29 }
2306     }
2307     }
2308    
2309 nigel 91 for (getnamesptr = getnames;
2310     *getnamesptr != 0;
2311     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2312     {
2313     const char *substring;
2314     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2315     count, (char *)getnamesptr, &substring);
2316     if (rc < 0)
2317     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2318     else
2319     {
2320     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2321     pcre_free_substring(substring);
2322     }
2323     }
2324    
2325 nigel 29 if (getlist)
2326     {
2327     const char **stringlist;
2328 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2329 nigel 29 &stringlist);
2330     if (rc < 0)
2331     fprintf(outfile, "get substring list failed %d\n", rc);
2332     else
2333     {
2334     for (i = 0; i < count; i++)
2335     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2336     if (stringlist[i] != NULL)
2337     fprintf(outfile, "string list not terminated by NULL\n");
2338 nigel 49 /* free((void *)stringlist); */
2339     pcre_free_substring_list(stringlist);
2340 nigel 29 }
2341     }
2342 nigel 39 }
2343 nigel 29
2344 nigel 75 /* There was a partial match */
2345    
2346     else if (count == PCRE_ERROR_PARTIAL)
2347     {
2348 nigel 77 fprintf(outfile, "Partial match");
2349 nigel 79 #if !defined NODFA
2350 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2351     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2352     bptr + use_offsets[0]);
2353 nigel 79 #endif
2354 nigel 77 fprintf(outfile, "\n");
2355 nigel 75 break; /* Out of the /g loop */
2356     }
2357    
2358 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2359 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2360     to advance the start offset, and continue. We won't be at the end of the
2361     string - that was checked before setting g_notempty.
2362 nigel 39
2363 ph10 150 Complication arises in the case when the newline option is "any" or
2364 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2365     CRLF, an advance of one character just passes the \r, whereas we should
2366     prefer the longer newline sequence, as does the code in pcre_exec().
2367     Fudge the offset value to achieve this.
2368 ph10 144
2369 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2370     character, not one byte. */
2371    
2372 nigel 3 else
2373     {
2374 nigel 41 if (g_notempty != 0)
2375 nigel 35 {
2376 nigel 73 int onechar = 1;
2377 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2378 nigel 57 use_offsets[0] = start_offset;
2379 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2380     {
2381     int d;
2382     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2383     obits = (d == '\r')? PCRE_NEWLINE_CR :
2384     (d == '\n')? PCRE_NEWLINE_LF :
2385     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2386 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2387 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2388     }
2389 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2390 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2391 ph10 149 &&
2392 ph10 143 start_offset < len - 1 &&
2393     bptr[start_offset] == '\r' &&
2394     bptr[start_offset+1] == '\n')
2395 ph10 144 onechar++;
2396 ph10 143 else if (use_utf8)
2397 nigel 73 {
2398     while (start_offset + onechar < len)
2399     {
2400     int tb = bptr[start_offset+onechar];
2401     if (tb <= 127) break;
2402     tb &= 0xc0;
2403     if (tb != 0 && tb != 0xc0) onechar++;
2404     }
2405     }
2406     use_offsets[1] = start_offset + onechar;
2407 nigel 35 }
2408 nigel 41 else
2409     {
2410 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2411 nigel 41 {
2412 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2413 nigel 41 }
2414 nigel 73 else fprintf(outfile, "Error %d\n", count);
2415 nigel 41 break; /* Out of the /g loop */
2416     }
2417 nigel 3 }
2418 nigel 35
2419 nigel 39 /* If not /g or /G we are done */
2420    
2421     if (!do_g && !do_G) break;
2422    
2423 nigel 41 /* If we have matched an empty string, first check to see if we are at
2424     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2425     what Perl's /g options does. This turns out to be rather cunning. First
2426 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2427     same point. If this fails (picked up above) we advance to the next
2428 ph10 143 character. */
2429 ph10 142
2430 nigel 41 g_notempty = 0;
2431 ph10 142
2432 nigel 57 if (use_offsets[0] == use_offsets[1])
2433 nigel 41 {
2434 nigel 57 if (use_offsets[0] == len) break;
2435 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2436 nigel 41 }
2437 nigel 39
2438     /* For /g, update the start offset, leaving the rest alone */
2439    
2440 ph10 143 if (do_g) start_offset = use_offsets[1];
2441 nigel 39
2442     /* For /G, update the pointer and length */
2443    
2444     else
2445 nigel 35 {
2446 ph10 143 bptr += use_offsets[1];
2447     len -= use_offsets[1];
2448 nigel 35 }
2449 nigel 39 } /* End of loop for /g and /G */
2450 nigel 91
2451     NEXT_DATA: continue;
2452 nigel 39 } /* End of loop for data lines */
2453 nigel 3
2454 nigel 11 CONTINUE:
2455 nigel 37
2456     #if !defined NOPOSIX
2457 nigel 3 if (posix || do_posix) regfree(&preg);
2458 nigel 37 #endif
2459    
2460 nigel 77 if (re != NULL) new_free(re);
2461     if (extra != NULL) new_free(extra);
2462 nigel 25 if (tables != NULL)
2463     {
2464 nigel 77 new_free((void *)tables);
2465 nigel 25 setlocale(LC_CTYPE, "C");
2466 nigel 93 locale_set = 0;
2467 nigel 25 }
2468 nigel 3 }
2469    
2470 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2471 nigel 77
2472     EXIT:
2473    
2474     if (infile != NULL && infile != stdin) fclose(infile);
2475     if (outfile != NULL && outfile != stdout) fclose(outfile);
2476    
2477     free(buffer);
2478     free(dbuffer);
2479     free(pbuffer);
2480     free(offsets);
2481    
2482     return yield;
2483 nigel 3 }
2484    
2485 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12