/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 455 - (hide annotations) (download)
Sat Sep 26 19:12:32 2009 UTC (4 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 74114 byte(s)
Added lower bound length-finding to pcre_study() and use it when matching; make 
the value available via pcre_fullinfo(); also fixed bugs connected with
pcre_study() in pcre_dfa_exec(). 

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 nigel 93 #else
83     #include <sys/time.h> /* These two includes are needed */
84     #include <sys/resource.h> /* for setrlimit(). */
85     #define INPUT_MODE "rb"
86     #define OUTPUT_MODE "wb"
87 nigel 91 #endif
88    
89 nigel 93
90 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
91     displaying the results of pcre_study() and we also need to know about the
92     internal macros, structures, and other internal data values; pcretest has
93     "inside information" compared to a program that strictly follows the PCRE API.
94 nigel 37
95 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97     appropriately for an application, not for building PCRE. */
98 nigel 77
99 ph10 145 #include "pcre.h"
100 nigel 77 #include "pcre_internal.h"
101    
102 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
103     to keep two copies, we include the source file here, changing the names of the
104     external symbols to prevent clashes. */
105 nigel 77
106 ph10 351 #define _pcre_ucp_gentype ucp_gentype
107 nigel 85 #define _pcre_utf8_table1 utf8_table1
108     #define _pcre_utf8_table1_size utf8_table1_size
109     #define _pcre_utf8_table2 utf8_table2
110     #define _pcre_utf8_table3 utf8_table3
111     #define _pcre_utf8_table4 utf8_table4
112     #define _pcre_utt utt
113     #define _pcre_utt_size utt_size
114 ph10 240 #define _pcre_utt_names utt_names
115 nigel 85 #define _pcre_OP_lengths OP_lengths
116    
117     #include "pcre_tables.c"
118    
119     /* We also need the pcre_printint() function for printing out compiled
120     patterns. This function is in a separate file so that it can be included in
121 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
122 nigel 85
123 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
124     output character as-is or as a hex value when showing compiled patterns, is
125     contained in this file. We uses it here also, in cases when the locale has not
126     been explicitly changed, so as to get consistent output from systems that
127     differ in their output from isprint() even in the "C" locale. */
128    
129 nigel 85 #include "pcre_printint.src"
130    
131 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
132 nigel 85
133 nigel 93
134 nigel 37 /* It is possible to compile this test program without including support for
135     testing the POSIX interface, though this is not available via the standard
136     Makefile. */
137    
138     #if !defined NOPOSIX
139 nigel 3 #include "pcreposix.h"
140 nigel 37 #endif
141 nigel 3
142 ph10 107 /* It is also possible, for the benefit of the version currently imported into
143     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
144     interface to the DFA matcher (NODFA), and without the doublecheck of the old
145     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
146     UTF8 support if PCRE is built without it. */
147 nigel 79
148 ph10 107 #ifndef SUPPORT_UTF8
149     #ifndef NOUTF8
150     #define NOUTF8
151     #endif
152     #endif
153 nigel 79
154 ph10 107
155 nigel 85 /* Other parameters */
156    
157 nigel 3 #ifndef CLOCKS_PER_SEC
158     #ifdef CLK_TCK
159     #define CLOCKS_PER_SEC CLK_TCK
160     #else
161     #define CLOCKS_PER_SEC 100
162     #endif
163     #endif
164    
165 nigel 93 /* This is the default loop count for timing. */
166    
167 nigel 75 #define LOOPREPEAT 500000
168 nigel 3
169 nigel 85 /* Static variables */
170    
171 nigel 3 static FILE *outfile;
172     static int log_store = 0;
173 nigel 63 static int callout_count;
174     static int callout_extra;
175     static int callout_fail_count;
176     static int callout_fail_id;
177 ph10 210 static int debug_lengths;
178 nigel 63 static int first_callout;
179 nigel 93 static int locale_set = 0;
180 nigel 73 static int show_malloc;
181 nigel 67 static int use_utf8;
182 nigel 43 static size_t gotten_store;
183 nigel 3
184 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
185    
186     static int buffer_size = 50000;
187     static uschar *buffer = NULL;
188     static uschar *dbuffer = NULL;
189 nigel 75 static uschar *pbuffer = NULL;
190 nigel 3
191 nigel 75
192 nigel 49
193     /*************************************************
194 nigel 91 * Read or extend an input line *
195     *************************************************/
196    
197     /* Input lines are read into buffer, but both patterns and data lines can be
198     continued over multiple input lines. In addition, if the buffer fills up, we
199     want to automatically expand it so as to be able to handle extremely large
200     lines that are needed for certain stress tests. When the input buffer is
201     expanded, the other two buffers must also be expanded likewise, and the
202     contents of pbuffer, which are a copy of the input for callouts, must be
203     preserved (for when expansion happens for a data line). This is not the most
204     optimal way of handling this, but hey, this is just a test program!
205    
206     Arguments:
207     f the file to read
208     start where in buffer to start (this *must* be within buffer)
209 ph10 287 prompt for stdin or readline()
210 nigel 91
211     Returns: pointer to the start of new data
212     could be a copy of start, or could be moved
213     NULL if no data read and EOF reached
214     */
215    
216     static uschar *
217 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
218 nigel 91 {
219     uschar *here = start;
220    
221     for (;;)
222     {
223     int rlen = buffer_size - (here - buffer);
224 nigel 93
225 nigel 91 if (rlen > 1000)
226     {
227     int dlen;
228 ph10 289
229 ph10 287 /* If libreadline support is required, use readline() to read a line if the
230     input is a terminal. Note that readline() removes the trailing newline, so
231     we must put it back again, to be compatible with fgets(). */
232 ph10 289
233 ph10 287 #ifdef SUPPORT_LIBREADLINE
234     if (isatty(fileno(f)))
235     {
236 ph10 289 size_t len;
237 ph10 287 char *s = readline(prompt);
238     if (s == NULL) return (here == start)? NULL : start;
239     len = strlen(s);
240 ph10 289 if (len > 0) add_history(s);
241 ph10 287 if (len > rlen - 1) len = rlen - 1;
242     memcpy(here, s, len);
243     here[len] = '\n';
244 ph10 289 here[len+1] = 0;
245     free(s);
246 ph10 287 }
247 ph10 289 else
248     #endif
249    
250 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
251 ph10 289
252 ph10 287 {
253 ph10 289 if (f == stdin) printf(prompt);
254 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
255     return (here == start)? NULL : start;
256 ph10 289 }
257    
258 nigel 91 dlen = (int)strlen((char *)here);
259     if (dlen > 0 && here[dlen - 1] == '\n') return start;
260     here += dlen;
261     }
262    
263     else
264     {
265     int new_buffer_size = 2*buffer_size;
266     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
267     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
268     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
269    
270     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
271     {
272     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
273     exit(1);
274     }
275    
276     memcpy(new_buffer, buffer, buffer_size);
277     memcpy(new_pbuffer, pbuffer, buffer_size);
278    
279     buffer_size = new_buffer_size;
280    
281     start = new_buffer + (start - buffer);
282     here = new_buffer + (here - buffer);
283    
284     free(buffer);
285     free(dbuffer);
286     free(pbuffer);
287    
288     buffer = new_buffer;
289     dbuffer = new_dbuffer;
290     pbuffer = new_pbuffer;
291     }
292     }
293    
294     return NULL; /* Control never gets here */
295     }
296    
297    
298    
299    
300    
301    
302    
303     /*************************************************
304 nigel 63 * Read number from string *
305     *************************************************/
306    
307     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
308     around with conditional compilation, just do the job by hand. It is only used
309 nigel 93 for unpicking arguments, so just keep it simple.
310 nigel 63
311     Arguments:
312     str string to be converted
313     endptr where to put the end pointer
314    
315     Returns: the unsigned long
316     */
317    
318     static int
319     get_value(unsigned char *str, unsigned char **endptr)
320     {
321     int result = 0;
322     while(*str != 0 && isspace(*str)) str++;
323     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
324     *endptr = str;
325     return(result);
326     }
327    
328    
329    
330 nigel 49
331     /*************************************************
332     * Convert UTF-8 string to value *
333     *************************************************/
334    
335     /* This function takes one or more bytes that represents a UTF-8 character,
336     and returns the value of the character.
337    
338     Argument:
339 nigel 91 utf8bytes a pointer to the byte vector
340     vptr a pointer to an int to receive the value
341 nigel 49
342 nigel 91 Returns: > 0 => the number of bytes consumed
343     -6 to 0 => malformed UTF-8 character at offset = (-return)
344 nigel 49 */
345    
346 nigel 79 #if !defined NOUTF8
347    
348 nigel 67 static int
349 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
350 nigel 49 {
351 nigel 91 int c = *utf8bytes++;
352 nigel 49 int d = c;
353     int i, j, s;
354    
355     for (i = -1; i < 6; i++) /* i is number of additional bytes */
356     {
357     if ((d & 0x80) == 0) break;
358     d <<= 1;
359     }
360    
361     if (i == -1) { *vptr = c; return 1; } /* ascii character */
362     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
363    
364     /* i now has a value in the range 1-5 */
365    
366 nigel 59 s = 6*i;
367 nigel 85 d = (c & utf8_table3[i]) << s;
368 nigel 49
369     for (j = 0; j < i; j++)
370     {
371 nigel 91 c = *utf8bytes++;
372 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
373 nigel 59 s -= 6;
374 nigel 49 d |= (c & 0x3f) << s;
375     }
376    
377     /* Check that encoding was the correct unique one */
378    
379 nigel 85 for (j = 0; j < utf8_table1_size; j++)
380     if (d <= utf8_table1[j]) break;
381 nigel 49 if (j != i) return -(i+1);
382    
383     /* Valid value */
384    
385     *vptr = d;
386     return i+1;
387     }
388    
389 nigel 79 #endif
390 nigel 49
391    
392 nigel 79
393 nigel 63 /*************************************************
394 nigel 85 * Convert character value to UTF-8 *
395     *************************************************/
396    
397     /* This function takes an integer value in the range 0 - 0x7fffffff
398     and encodes it as a UTF-8 character in 0 to 6 bytes.
399    
400     Arguments:
401     cvalue the character value
402 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
403 nigel 85
404     Returns: number of characters placed in the buffer
405     */
406    
407 nigel 93 #if !defined NOUTF8
408    
409 nigel 85 static int
410 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
411 nigel 85 {
412     register int i, j;
413     for (i = 0; i < utf8_table1_size; i++)
414     if (cvalue <= utf8_table1[i]) break;
415 nigel 91 utf8bytes += i;
416 nigel 85 for (j = i; j > 0; j--)
417     {
418 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
419 nigel 85 cvalue >>= 6;
420     }
421 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
422 nigel 85 return i + 1;
423     }
424    
425 nigel 93 #endif
426 nigel 85
427    
428 nigel 93
429 nigel 85 /*************************************************
430 nigel 63 * Print character string *
431     *************************************************/
432 nigel 49
433 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
434     mode. Yields number of characters printed. If handed a NULL file, just counts
435     chars without printing. */
436 nigel 49
437 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
438 nigel 3 {
439 nigel 85 int c = 0;
440 nigel 63 int yield = 0;
441 nigel 3
442 nigel 63 while (length-- > 0)
443 nigel 3 {
444 nigel 79 #if !defined NOUTF8
445 nigel 67 if (use_utf8)
446 nigel 63 {
447     int rc = utf82ord(p, &c);
448 nigel 3
449 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
450     {
451     length -= rc - 1;
452     p += rc;
453 nigel 93 if (PRINTHEX(c))
454 nigel 63 {
455     if (f != NULL) fprintf(f, "%c", c);
456     yield++;
457     }
458     else
459     {
460 nigel 93 int n = 4;
461     if (f != NULL) fprintf(f, "\\x{%02x}", c);
462     yield += (n <= 0x000000ff)? 2 :
463     (n <= 0x00000fff)? 3 :
464     (n <= 0x0000ffff)? 4 :
465     (n <= 0x000fffff)? 5 : 6;
466 nigel 63 }
467     continue;
468     }
469     }
470 nigel 79 #endif
471 nigel 3
472 nigel 63 /* Not UTF-8, or malformed UTF-8 */
473    
474 nigel 93 c = *p++;
475     if (PRINTHEX(c))
476 nigel 3 {
477 nigel 63 if (f != NULL) fprintf(f, "%c", c);
478     yield++;
479 nigel 3 }
480 nigel 63 else
481 nigel 3 {
482 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
483     yield += 4;
484     }
485     }
486 nigel 3
487 nigel 63 return yield;
488     }
489 nigel 23
490 nigel 3
491 nigel 23
492 nigel 63 /*************************************************
493     * Callout function *
494     *************************************************/
495 nigel 3
496 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
497     the match. Yield zero unless more callouts than the fail count, or the callout
498     data is not zero. */
499 nigel 3
500 nigel 63 static int callout(pcre_callout_block *cb)
501     {
502     FILE *f = (first_callout | callout_extra)? outfile : NULL;
503 nigel 75 int i, pre_start, post_start, subject_length;
504 nigel 3
505 nigel 63 if (callout_extra)
506     {
507     fprintf(f, "Callout %d: last capture = %d\n",
508     cb->callout_number, cb->capture_last);
509 nigel 3
510 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
511     {
512     if (cb->offset_vector[i] < 0)
513     fprintf(f, "%2d: <unset>\n", i/2);
514     else
515     {
516     fprintf(f, "%2d: ", i/2);
517     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
518     cb->offset_vector[i+1] - cb->offset_vector[i], f);
519     fprintf(f, "\n");
520     }
521     }
522     }
523 nigel 3
524 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
525     datails. On subsequent calls in the same match, we use pchars just to find the
526     printed lengths of the substrings. */
527 nigel 3
528 nigel 63 if (f != NULL) fprintf(f, "--->");
529 nigel 3
530 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
531     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
532     cb->current_position - cb->start_match, f);
533 nigel 3
534 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
535    
536 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
537     cb->subject_length - cb->current_position, f);
538 nigel 3
539 nigel 63 if (f != NULL) fprintf(f, "\n");
540 nigel 9
541 nigel 63 /* Always print appropriate indicators, with callout number if not already
542 nigel 75 shown. For automatic callouts, show the pattern offset. */
543 nigel 3
544 nigel 75 if (cb->callout_number == 255)
545     {
546     fprintf(outfile, "%+3d ", cb->pattern_position);
547     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
548     }
549     else
550     {
551     if (callout_extra) fprintf(outfile, " ");
552     else fprintf(outfile, "%3d ", cb->callout_number);
553     }
554 nigel 3
555 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
556     fprintf(outfile, "^");
557 nigel 3
558 nigel 63 if (post_start > 0)
559     {
560     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
561     fprintf(outfile, "^");
562 nigel 3 }
563    
564 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
565     fprintf(outfile, " ");
566    
567     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
568     pbuffer + cb->pattern_position);
569    
570 nigel 63 fprintf(outfile, "\n");
571     first_callout = 0;
572 nigel 3
573 nigel 71 if (cb->callout_data != NULL)
574 nigel 49 {
575 nigel 71 int callout_data = *((int *)(cb->callout_data));
576     if (callout_data != 0)
577     {
578     fprintf(outfile, "Callout data = %d\n", callout_data);
579     return callout_data;
580     }
581 nigel 63 }
582 nigel 49
583 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
584     (++callout_count >= callout_fail_count)? 1 : 0;
585 nigel 3 }
586    
587    
588 nigel 63 /*************************************************
589 nigel 73 * Local malloc functions *
590 nigel 63 *************************************************/
591 nigel 3
592     /* Alternative malloc function, to test functionality and show the size of the
593     compiled re. */
594    
595     static void *new_malloc(size_t size)
596     {
597 nigel 73 void *block = malloc(size);
598 nigel 43 gotten_store = size;
599 nigel 73 if (show_malloc)
600 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
601 nigel 73 return block;
602 nigel 3 }
603    
604 nigel 73 static void new_free(void *block)
605     {
606     if (show_malloc)
607     fprintf(outfile, "free %p\n", block);
608     free(block);
609     }
610 nigel 3
611    
612 nigel 73 /* For recursion malloc/free, to test stacking calls */
613    
614     static void *stack_malloc(size_t size)
615     {
616     void *block = malloc(size);
617     if (show_malloc)
618 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
619 nigel 73 return block;
620     }
621    
622     static void stack_free(void *block)
623     {
624     if (show_malloc)
625     fprintf(outfile, "stack_free %p\n", block);
626     free(block);
627     }
628    
629    
630 nigel 63 /*************************************************
631     * Call pcre_fullinfo() *
632     *************************************************/
633 nigel 43
634     /* Get one piece of information from the pcre_fullinfo() function */
635    
636     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
637     {
638     int rc;
639     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
640     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
641     }
642    
643    
644    
645 nigel 63 /*************************************************
646 nigel 75 * Byte flipping function *
647     *************************************************/
648    
649 nigel 91 static unsigned long int
650     byteflip(unsigned long int value, int n)
651 nigel 75 {
652     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
653     return ((value & 0x000000ff) << 24) |
654     ((value & 0x0000ff00) << 8) |
655     ((value & 0x00ff0000) >> 8) |
656     ((value & 0xff000000) >> 24);
657     }
658    
659    
660    
661    
662     /*************************************************
663 nigel 87 * Check match or recursion limit *
664     *************************************************/
665    
666     static int
667     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
668     int start_offset, int options, int *use_offsets, int use_size_offsets,
669     int flag, unsigned long int *limit, int errnumber, const char *msg)
670     {
671     int count;
672     int min = 0;
673     int mid = 64;
674     int max = -1;
675    
676     extra->flags |= flag;
677    
678     for (;;)
679     {
680     *limit = mid;
681    
682     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
683     use_offsets, use_size_offsets);
684    
685     if (count == errnumber)
686     {
687     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
688     min = mid;
689     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
690     }
691    
692     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
693     count == PCRE_ERROR_PARTIAL)
694     {
695     if (mid == min + 1)
696     {
697     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
698     break;
699     }
700     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
701     max = mid;
702     mid = (min + mid)/2;
703     }
704     else break; /* Some other error */
705     }
706    
707     extra->flags &= ~flag;
708     return count;
709     }
710    
711    
712    
713     /*************************************************
714 ph10 227 * Case-independent strncmp() function *
715     *************************************************/
716    
717     /*
718     Arguments:
719     s first string
720     t second string
721     n number of characters to compare
722    
723     Returns: < 0, = 0, or > 0, according to the comparison
724     */
725    
726     static int
727     strncmpic(uschar *s, uschar *t, int n)
728     {
729     while (n--)
730     {
731     int c = tolower(*s++) - tolower(*t++);
732     if (c) return c;
733     }
734     return 0;
735     }
736    
737    
738    
739     /*************************************************
740 nigel 91 * Check newline indicator *
741     *************************************************/
742    
743     /* This is used both at compile and run-time to check for <xxx> escapes, where
744 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
745     no match.
746 nigel 91
747     Arguments:
748     p points after the leading '<'
749     f file for error message
750    
751     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752     */
753    
754     static int
755     check_newline(uschar *p, FILE *f)
756     {
757 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
765     return 0;
766     }
767    
768    
769    
770     /*************************************************
771 nigel 93 * Usage function *
772     *************************************************/
773    
774     static void
775     usage(void)
776     {
777 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778     printf("Input and output default to stdin and stdout.\n");
779     #ifdef SUPPORT_LIBREADLINE
780     printf("If input is a terminal, readline() is used to read from it.\n");
781     #else
782     printf("This version of pcretest is not linked with readline().\n");
783     #endif
784     printf("\nOptions:\n");
785 nigel 93 printf(" -b show compiled code (bytecode)\n");
786     printf(" -C show PCRE compile-time options and exit\n");
787     printf(" -d debug: show compiled code and information (-b and -i)\n");
788     #if !defined NODFA
789     printf(" -dfa force DFA matching for all subjects\n");
790     #endif
791     printf(" -help show usage information\n");
792     printf(" -i show information about compiled patterns\n"
793 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
794 nigel 93 " -m output memory used information\n"
795     " -o <n> set size of offsets vector to <n>\n");
796     #if !defined NOPOSIX
797     printf(" -p use POSIX interface\n");
798     #endif
799     printf(" -q quiet: do not output PCRE version number at start\n");
800     printf(" -S <n> set stack size to <n> megabytes\n");
801     printf(" -s output store (memory) used information\n"
802     " -t time compilation and execution\n");
803     printf(" -t <n> time compilation and execution, repeating <n> times\n");
804     printf(" -tm time execution (matching) only\n");
805     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806     }
807    
808    
809    
810     /*************************************************
811 nigel 63 * Main Program *
812     *************************************************/
813 nigel 43
814 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
815     consist of a regular expression, in delimiters and optionally followed by
816     options, followed by a set of test data, terminated by an empty line. */
817    
818     int main(int argc, char **argv)
819     {
820     FILE *infile = stdin;
821     int options = 0;
822     int study_options = 0;
823 ph10 386 int default_find_match_limit = FALSE;
824 nigel 3 int op = 1;
825     int timeit = 0;
826 nigel 93 int timeitm = 0;
827 nigel 3 int showinfo = 0;
828 nigel 31 int showstore = 0;
829 nigel 87 int quiet = 0;
830 nigel 53 int size_offsets = 45;
831     int size_offsets_max;
832 nigel 77 int *offsets = NULL;
833 nigel 53 #if !defined NOPOSIX
834 nigel 3 int posix = 0;
835 nigel 53 #endif
836 nigel 3 int debug = 0;
837 nigel 11 int done = 0;
838 nigel 77 int all_use_dfa = 0;
839     int yield = 0;
840 nigel 91 int stack_size;
841 nigel 3
842 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
843     that 1024 is plenty long enough for the few names we'll be testing. */
844 nigel 69
845 nigel 91 uschar copynames[1024];
846     uschar getnames[1024];
847    
848     uschar *copynamesptr;
849     uschar *getnamesptr;
850    
851 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
853 nigel 69
854 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
855     dbuffer = (unsigned char *)malloc(buffer_size);
856     pbuffer = (unsigned char *)malloc(buffer_size);
857 nigel 69
858 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
859 nigel 3
860 nigel 93 outfile = stdout;
861    
862     /* The following _setmode() stuff is some Windows magic that tells its runtime
863     library to translate CRLF into a single LF character. At least, that's what
864     I've been told: never having used Windows I take this all on trust. Originally
865     it set 0x8000, but then I was advised that _O_BINARY was better. */
866    
867 nigel 75 #if defined(_WIN32) || defined(WIN32)
868 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
869     #endif
870 nigel 75
871 nigel 3 /* Scan options */
872    
873     while (argc > 1 && argv[op][0] == '-')
874     {
875 nigel 63 unsigned char *endptr;
876 nigel 53
877 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878     showstore = 1;
879 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 nigel 79 #if !defined NODFA
885 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 nigel 79 #endif
887 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889     *endptr == 0))
890 nigel 53 {
891     op++;
892     argc--;
893     }
894 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895     {
896     int both = argv[op][2] == 0;
897     int temp;
898     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899     *endptr == 0))
900     {
901     timeitm = temp;
902     op++;
903     argc--;
904     }
905     else timeitm = LOOPREPEAT;
906     if (both) timeit = timeitm;
907     }
908 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910     *endptr == 0))
911     {
912 nigel 93 #if defined(_WIN32) || defined(WIN32)
913 nigel 91 printf("PCRE: -S not supported on this OS\n");
914     exit(1);
915     #else
916     int rc;
917     struct rlimit rlim;
918     getrlimit(RLIMIT_STACK, &rlim);
919     rlim.rlim_cur = stack_size * 1024 * 1024;
920     rc = setrlimit(RLIMIT_STACK, &rlim);
921     if (rc != 0)
922     {
923     printf("PCRE: setrlimit() failed with error %d\n", rc);
924     exit(1);
925     }
926     op++;
927     argc--;
928     #endif
929     }
930 nigel 53 #if !defined NOPOSIX
931 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 nigel 53 #endif
933 nigel 63 else if (strcmp(argv[op], "-C") == 0)
934     {
935     int rc;
936 ph10 392 unsigned long int lrc;
937 nigel 63 printf("PCRE version %s\n", pcre_version());
938     printf("Compiled with\n");
939     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940     printf(" %sUTF-8 support\n", rc? "" : "No ");
941 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942     printf(" %sUnicode properties support\n", rc? "" : "No ");
943 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 ph10 391 /* Note that these values are always the ASCII values, even
945 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
946 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 ph10 150 (rc == -2)? "ANYCRLF" :
949 nigel 93 (rc == -1)? "ANY" : "???");
950 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952     "all Unicode newlines");
953 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954     printf(" Internal link size = %d\n", rc);
955     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956     printf(" POSIX malloc threshold = %d\n", rc);
957 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958     printf(" Default match limit = %ld\n", lrc);
959     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960     printf(" Default recursion depth limit = %ld\n", lrc);
961 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 ph10 121 goto EXIT;
964 nigel 63 }
965 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
966     strcmp(argv[op], "--help") == 0)
967     {
968     usage();
969     goto EXIT;
970     }
971 nigel 3 else
972     {
973 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
974 nigel 93 usage();
975 nigel 77 yield = 1;
976     goto EXIT;
977 nigel 3 }
978     op++;
979     argc--;
980     }
981    
982 nigel 53 /* Get the store for the offsets vector, and remember what it was */
983    
984     size_offsets_max = size_offsets;
985 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 nigel 53 if (offsets == NULL)
987     {
988     printf("** Failed to get %d bytes of memory for offsets vector\n",
989 ph10 151 (int)(size_offsets_max * sizeof(int)));
990 nigel 77 yield = 1;
991     goto EXIT;
992 nigel 53 }
993    
994 nigel 3 /* Sort out the input and output files */
995    
996     if (argc > 1)
997     {
998 nigel 93 infile = fopen(argv[op], INPUT_MODE);
999 nigel 3 if (infile == NULL)
1000     {
1001     printf("** Failed to open %s\n", argv[op]);
1002 nigel 77 yield = 1;
1003     goto EXIT;
1004 nigel 3 }
1005     }
1006    
1007     if (argc > 2)
1008     {
1009 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 nigel 3 if (outfile == NULL)
1011     {
1012     printf("** Failed to open %s\n", argv[op+1]);
1013 nigel 77 yield = 1;
1014     goto EXIT;
1015 nigel 3 }
1016     }
1017    
1018     /* Set alternative malloc function */
1019    
1020     pcre_malloc = new_malloc;
1021 nigel 73 pcre_free = new_free;
1022     pcre_stack_malloc = stack_malloc;
1023     pcre_stack_free = stack_free;
1024 nigel 3
1025 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1026 nigel 3
1027 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028 nigel 3
1029     /* Main loop */
1030    
1031 nigel 11 while (!done)
1032 nigel 3 {
1033     pcre *re = NULL;
1034     pcre_extra *extra = NULL;
1035 nigel 37
1036     #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 nigel 3 regex_t preg;
1038 nigel 45 int do_posix = 0;
1039 nigel 37 #endif
1040    
1041 nigel 7 const char *error;
1042 nigel 25 unsigned char *p, *pp, *ppp;
1043 nigel 75 unsigned char *to_file = NULL;
1044 nigel 53 const unsigned char *tables = NULL;
1045 nigel 75 unsigned long int true_size, true_study_size = 0;
1046     size_t size, regex_gotten_store;
1047 nigel 3 int do_study = 0;
1048 nigel 25 int do_debug = debug;
1049 nigel 35 int do_G = 0;
1050     int do_g = 0;
1051 nigel 25 int do_showinfo = showinfo;
1052 nigel 35 int do_showrest = 0;
1053 nigel 75 int do_flip = 0;
1054 nigel 93 int erroroffset, len, delimiter, poffset;
1055 nigel 3
1056 nigel 67 use_utf8 = 0;
1057 ph10 211 debug_lengths = 1;
1058 nigel 63
1059 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1060 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1061 nigel 63 fflush(outfile);
1062 nigel 3
1063     p = buffer;
1064     while (isspace(*p)) p++;
1065     if (*p == 0) continue;
1066    
1067 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1068 nigel 3
1069 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1070     {
1071 nigel 91 unsigned long int magic, get_options;
1072 nigel 75 uschar sbuf[8];
1073     FILE *f;
1074    
1075     p++;
1076     pp = p + (int)strlen((char *)p);
1077     while (isspace(pp[-1])) pp--;
1078     *pp = 0;
1079    
1080     f = fopen((char *)p, "rb");
1081     if (f == NULL)
1082     {
1083     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1084     continue;
1085     }
1086    
1087     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1088    
1089     true_size =
1090     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1091     true_study_size =
1092     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1093    
1094     re = (real_pcre *)new_malloc(true_size);
1095     regex_gotten_store = gotten_store;
1096    
1097     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1098    
1099     magic = ((real_pcre *)re)->magic_number;
1100     if (magic != MAGIC_NUMBER)
1101     {
1102     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1103     {
1104     do_flip = 1;
1105     }
1106     else
1107     {
1108     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1109     fclose(f);
1110     continue;
1111     }
1112     }
1113    
1114     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1115     do_flip? " (byte-inverted)" : "", p);
1116    
1117     /* Need to know if UTF-8 for printing data strings */
1118    
1119 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1120     use_utf8 = (get_options & PCRE_UTF8) != 0;
1121 nigel 75
1122     /* Now see if there is any following study data */
1123    
1124     if (true_study_size != 0)
1125     {
1126     pcre_study_data *psd;
1127    
1128     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1129     extra->flags = PCRE_EXTRA_STUDY_DATA;
1130    
1131     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1132     extra->study_data = psd;
1133    
1134     if (fread(psd, 1, true_study_size, f) != true_study_size)
1135     {
1136     FAIL_READ:
1137     fprintf(outfile, "Failed to read data from %s\n", p);
1138     if (extra != NULL) new_free(extra);
1139     if (re != NULL) new_free(re);
1140     fclose(f);
1141     continue;
1142     }
1143     fprintf(outfile, "Study data loaded from %s\n", p);
1144     do_study = 1; /* To get the data output if requested */
1145     }
1146     else fprintf(outfile, "No study data\n");
1147    
1148     fclose(f);
1149     goto SHOW_INFO;
1150     }
1151    
1152     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1153     the pattern; if is isn't complete, read more. */
1154    
1155 nigel 3 delimiter = *p++;
1156    
1157 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1158 nigel 3 {
1159 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1160 nigel 3 goto SKIP_DATA;
1161     }
1162    
1163     pp = p;
1164 nigel 93 poffset = p - buffer;
1165 nigel 3
1166     for(;;)
1167     {
1168 nigel 29 while (*pp != 0)
1169     {
1170     if (*pp == '\\' && pp[1] != 0) pp++;
1171     else if (*pp == delimiter) break;
1172     pp++;
1173     }
1174 nigel 3 if (*pp != 0) break;
1175 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1176 nigel 3 {
1177     fprintf(outfile, "** Unexpected EOF\n");
1178 nigel 11 done = 1;
1179     goto CONTINUE;
1180 nigel 3 }
1181 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1182 nigel 3 }
1183    
1184 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1185     pointer to the correct relative point in the buffer. */
1186    
1187     p = buffer + poffset;
1188    
1189 nigel 29 /* If the first character after the delimiter is backslash, make
1190     the pattern end with backslash. This is purely to provide a way
1191     of testing for the error message when a pattern ends with backslash. */
1192    
1193     if (pp[1] == '\\') *pp++ = '\\';
1194    
1195 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1196     for callouts. */
1197 nigel 3
1198     *pp++ = 0;
1199 nigel 75 strcpy((char *)pbuffer, (char *)p);
1200 nigel 3
1201     /* Look for options after final delimiter */
1202    
1203     options = 0;
1204     study_options = 0;
1205 nigel 31 log_store = showstore; /* default from command line */
1206    
1207 nigel 3 while (*pp != 0)
1208     {
1209     switch (*pp++)
1210     {
1211 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1212 nigel 35 case 'g': do_g = 1; break;
1213 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1214     case 'm': options |= PCRE_MULTILINE; break;
1215     case 's': options |= PCRE_DOTALL; break;
1216     case 'x': options |= PCRE_EXTENDED; break;
1217 nigel 25
1218 nigel 35 case '+': do_showrest = 1; break;
1219 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1220 nigel 93 case 'B': do_debug = 1; break;
1221 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1222 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1223 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1224 nigel 75 case 'F': do_flip = 1; break;
1225 nigel 35 case 'G': do_G = 1; break;
1226 nigel 25 case 'I': do_showinfo = 1; break;
1227 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1228 nigel 31 case 'M': log_store = 1; break;
1229 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1230 nigel 37
1231     #if !defined NOPOSIX
1232 nigel 3 case 'P': do_posix = 1; break;
1233 nigel 37 #endif
1234    
1235 nigel 3 case 'S': do_study = 1; break;
1236 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1237 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1238 ph10 126 case 'Z': debug_lengths = 0; break;
1239 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1240 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1241 nigel 25
1242     case 'L':
1243     ppp = pp;
1244 nigel 93 /* The '\r' test here is so that it works on Windows. */
1245     /* The '0' test is just in case this is an unterminated line. */
1246     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1247 nigel 25 *ppp = 0;
1248     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1249     {
1250     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1251     goto SKIP_DATA;
1252     }
1253 nigel 93 locale_set = 1;
1254 nigel 25 tables = pcre_maketables();
1255     pp = ppp;
1256     break;
1257    
1258 nigel 75 case '>':
1259     to_file = pp;
1260     while (*pp != 0) pp++;
1261     while (isspace(pp[-1])) pp--;
1262     *pp = 0;
1263     break;
1264    
1265 nigel 91 case '<':
1266     {
1267 ph10 336 if (strncmp((char *)pp, "JS>", 3) == 0)
1268     {
1269     options |= PCRE_JAVASCRIPT_COMPAT;
1270 ph10 345 pp += 3;
1271 ph10 336 }
1272     else
1273 ph10 345 {
1274 ph10 336 int x = check_newline(pp, outfile);
1275     if (x == 0) goto SKIP_DATA;
1276     options |= x;
1277     while (*pp++ != '>');
1278 ph10 345 }
1279 nigel 91 }
1280     break;
1281    
1282 nigel 77 case '\r': /* So that it works in Windows */
1283     case '\n':
1284     case ' ':
1285     break;
1286 nigel 75
1287 nigel 3 default:
1288     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1289     goto SKIP_DATA;
1290     }
1291     }
1292    
1293 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1294 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1295     local character tables. */
1296 nigel 3
1297 nigel 37 #if !defined NOPOSIX
1298 nigel 3 if (posix || do_posix)
1299     {
1300     int rc;
1301     int cflags = 0;
1302 nigel 75
1303 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1304     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1305 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1306 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1307     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1308 ph10 432 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1309 nigel 87
1310 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1311    
1312     /* Compilation failed; go back for another re, skipping to blank line
1313     if non-interactive. */
1314    
1315     if (rc != 0)
1316     {
1317 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1318 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1319     goto SKIP_DATA;
1320     }
1321     }
1322    
1323     /* Handle compiling via the native interface */
1324    
1325     else
1326 nigel 37 #endif /* !defined NOPOSIX */
1327    
1328 nigel 3 {
1329 ph10 412 unsigned long int get_options;
1330 ph10 416
1331 nigel 93 if (timeit > 0)
1332 nigel 3 {
1333     register int i;
1334     clock_t time_taken;
1335     clock_t start_time = clock();
1336 nigel 93 for (i = 0; i < timeit; i++)
1337 nigel 3 {
1338 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1339 nigel 3 if (re != NULL) free(re);
1340     }
1341     time_taken = clock() - start_time;
1342 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1343     (((double)time_taken * 1000.0) / (double)timeit) /
1344 nigel 63 (double)CLOCKS_PER_SEC);
1345 nigel 3 }
1346    
1347 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1348 nigel 3
1349     /* Compilation failed; go back for another re, skipping to blank line
1350     if non-interactive. */
1351    
1352     if (re == NULL)
1353     {
1354     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1355     SKIP_DATA:
1356     if (infile != stdin)
1357     {
1358     for (;;)
1359     {
1360 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1361 nigel 11 {
1362     done = 1;
1363     goto CONTINUE;
1364     }
1365 nigel 3 len = (int)strlen((char *)buffer);
1366     while (len > 0 && isspace(buffer[len-1])) len--;
1367     if (len == 0) break;
1368     }
1369     fprintf(outfile, "\n");
1370     }
1371 nigel 25 goto CONTINUE;
1372 nigel 3 }
1373 ph10 416
1374     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1375     within the regex; check for this so that we know how to process the data
1376 ph10 412 lines. */
1377 ph10 416
1378 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1379     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1380 nigel 3
1381 ph10 412 /* Print information if required. There are now two info-returning
1382     functions. The old one has a limited interface and returns only limited
1383     data. Check that it agrees with the newer one. */
1384 nigel 3
1385 nigel 63 if (log_store)
1386     fprintf(outfile, "Memory allocation (code space): %d\n",
1387     (int)(gotten_store -
1388     sizeof(real_pcre) -
1389     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1390    
1391 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1392     and remember the store that was got. */
1393    
1394     true_size = ((real_pcre *)re)->size;
1395     regex_gotten_store = gotten_store;
1396    
1397     /* If /S was present, study the regexp to generate additional info to
1398     help with the matching. */
1399    
1400     if (do_study)
1401     {
1402 nigel 93 if (timeit > 0)
1403 nigel 75 {
1404     register int i;
1405     clock_t time_taken;
1406     clock_t start_time = clock();
1407 nigel 93 for (i = 0; i < timeit; i++)
1408 nigel 75 extra = pcre_study(re, study_options, &error);
1409     time_taken = clock() - start_time;
1410     if (extra != NULL) free(extra);
1411 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1412     (((double)time_taken * 1000.0) / (double)timeit) /
1413 nigel 75 (double)CLOCKS_PER_SEC);
1414     }
1415     extra = pcre_study(re, study_options, &error);
1416     if (error != NULL)
1417     fprintf(outfile, "Failed to study: %s\n", error);
1418     else if (extra != NULL)
1419     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1420     }
1421    
1422     /* If the 'F' option was present, we flip the bytes of all the integer
1423     fields in the regex data block and the study block. This is to make it
1424     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1425     compiled on a different architecture. */
1426    
1427     if (do_flip)
1428     {
1429     real_pcre *rre = (real_pcre *)re;
1430 ph10 259 rre->magic_number =
1431 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1432 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1433     rre->options = byteflip(rre->options, sizeof(rre->options));
1434 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1435 ph10 259 rre->top_bracket =
1436 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1437 ph10 259 rre->top_backref =
1438 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1439 ph10 259 rre->first_byte =
1440 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1441 ph10 259 rre->req_byte =
1442 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1443     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1444 nigel 75 sizeof(rre->name_table_offset));
1445 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1446 nigel 75 sizeof(rre->name_entry_size));
1447 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1448 ph10 255 sizeof(rre->name_count));
1449 nigel 75
1450     if (extra != NULL)
1451     {
1452     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1453     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1454 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1455     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1456 nigel 75 }
1457     }
1458    
1459     /* Extract information from the compiled data if required */
1460    
1461     SHOW_INFO:
1462    
1463 nigel 93 if (do_debug)
1464     {
1465     fprintf(outfile, "------------------------------------------------------------------\n");
1466 ph10 116 pcre_printint(re, outfile, debug_lengths);
1467 nigel 93 }
1468 ph10 416
1469 ph10 412 /* We already have the options in get_options (see above) */
1470 nigel 93
1471 nigel 25 if (do_showinfo)
1472 nigel 3 {
1473 ph10 412 unsigned long int all_options;
1474 nigel 79 #if !defined NOINFOCHECK
1475 nigel 43 int old_first_char, old_options, old_count;
1476 nigel 79 #endif
1477 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1478 ph10 227 hascrorlf;
1479 nigel 63 int nameentrysize, namecount;
1480     const uschar *nametable;
1481 nigel 3
1482 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1483     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1484     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1485 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1486 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1487 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1488     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1489 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1490 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1491     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1492 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1493 nigel 43
1494 nigel 79 #if !defined NOINFOCHECK
1495 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1496 nigel 3 if (count < 0) fprintf(outfile,
1497 nigel 43 "Error %d from pcre_info()\n", count);
1498 nigel 3 else
1499     {
1500 nigel 43 if (old_count != count) fprintf(outfile,
1501     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1502     old_count);
1503 nigel 37
1504 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1505     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1506     first_char, old_first_char);
1507 nigel 37
1508 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1509     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1510     get_options, old_options);
1511 nigel 43 }
1512 nigel 79 #endif
1513 nigel 43
1514 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1515 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1516 nigel 77 (int)size, (int)regex_gotten_store);
1517 nigel 43
1518     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1519     if (backrefmax > 0)
1520     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1521 nigel 63
1522     if (namecount > 0)
1523     {
1524     fprintf(outfile, "Named capturing subpatterns:\n");
1525     while (namecount-- > 0)
1526     {
1527     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1528     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1529     GET2(nametable, 0));
1530     nametable += nameentrysize;
1531     }
1532     }
1533 ph10 172
1534 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1535 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1536 nigel 63
1537 nigel 75 all_options = ((real_pcre *)re)->options;
1538 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1539 nigel 75
1540 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1541 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1542 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1543     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1544     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1545     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1546 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1547 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1548 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1549     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1550 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1551     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1552     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1553 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1554 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1555 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1556     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1557 ph10 172
1558 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1559 nigel 43
1560 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1561 nigel 91 {
1562     case PCRE_NEWLINE_CR:
1563     fprintf(outfile, "Forced newline sequence: CR\n");
1564     break;
1565 nigel 43
1566 nigel 91 case PCRE_NEWLINE_LF:
1567     fprintf(outfile, "Forced newline sequence: LF\n");
1568     break;
1569    
1570     case PCRE_NEWLINE_CRLF:
1571     fprintf(outfile, "Forced newline sequence: CRLF\n");
1572     break;
1573    
1574 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1575     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1576     break;
1577    
1578 nigel 93 case PCRE_NEWLINE_ANY:
1579     fprintf(outfile, "Forced newline sequence: ANY\n");
1580     break;
1581    
1582 nigel 91 default:
1583     break;
1584     }
1585    
1586 nigel 43 if (first_char == -1)
1587     {
1588 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1589 nigel 43 }
1590     else if (first_char < 0)
1591     {
1592     fprintf(outfile, "No first char\n");
1593     }
1594     else
1595     {
1596 nigel 63 int ch = first_char & 255;
1597 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1598 nigel 63 "" : " (caseless)";
1599 nigel 93 if (PRINTHEX(ch))
1600 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1601 nigel 3 else
1602 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1603 nigel 43 }
1604 nigel 37
1605 nigel 43 if (need_char < 0)
1606     {
1607     fprintf(outfile, "No need char\n");
1608 nigel 3 }
1609 nigel 43 else
1610     {
1611 nigel 63 int ch = need_char & 255;
1612 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1613 nigel 63 "" : " (caseless)";
1614 nigel 93 if (PRINTHEX(ch))
1615 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1616 nigel 43 else
1617 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1618 nigel 43 }
1619 nigel 75
1620     /* Don't output study size; at present it is in any case a fixed
1621     value, but it varies, depending on the computer architecture, and
1622     so messes up the test suite. (And with the /F option, it might be
1623     flipped.) */
1624    
1625     if (do_study)
1626     {
1627     if (extra == NULL)
1628     fprintf(outfile, "Study returned NULL\n");
1629     else
1630     {
1631     uschar *start_bits = NULL;
1632 ph10 455 int minlength;
1633    
1634     new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1635     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1636    
1637 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1638     if (start_bits == NULL)
1639 ph10 455 fprintf(outfile, "No set of starting bytes\n");
1640 nigel 75 else
1641     {
1642     int i;
1643     int c = 24;
1644     fprintf(outfile, "Starting byte set: ");
1645     for (i = 0; i < 256; i++)
1646     {
1647     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1648     {
1649     if (c > 75)
1650     {
1651     fprintf(outfile, "\n ");
1652     c = 2;
1653     }
1654 nigel 93 if (PRINTHEX(i) && i != ' ')
1655 nigel 75 {
1656     fprintf(outfile, "%c ", i);
1657     c += 2;
1658     }
1659     else
1660     {
1661     fprintf(outfile, "\\x%02x ", i);
1662     c += 5;
1663     }
1664     }
1665     }
1666     fprintf(outfile, "\n");
1667     }
1668     }
1669     }
1670 nigel 3 }
1671    
1672 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1673     that is all. The first 8 bytes of the file are the regex length and then
1674     the study length, in big-endian order. */
1675 nigel 3
1676 nigel 75 if (to_file != NULL)
1677 nigel 3 {
1678 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1679     if (f == NULL)
1680 nigel 3 {
1681 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1682 nigel 3 }
1683 nigel 75 else
1684     {
1685     uschar sbuf[8];
1686 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1687     sbuf[1] = (uschar)((true_size >> 16) & 255);
1688     sbuf[2] = (uschar)((true_size >> 8) & 255);
1689     sbuf[3] = (uschar)((true_size) & 255);
1690 ph10 259
1691 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1692     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1693     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1694     sbuf[7] = (uschar)((true_study_size) & 255);
1695 nigel 3
1696 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1697     fwrite(re, 1, true_size, f) < true_size)
1698     {
1699     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1700     }
1701 nigel 3 else
1702     {
1703 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1704     if (extra != NULL)
1705 nigel 3 {
1706 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1707     true_study_size)
1708 nigel 3 {
1709 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1710     strerror(errno));
1711 nigel 3 }
1712 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1713 nigel 93
1714 nigel 3 }
1715     }
1716 nigel 75 fclose(f);
1717 nigel 3 }
1718 nigel 77
1719     new_free(re);
1720     if (extra != NULL) new_free(extra);
1721     if (tables != NULL) new_free((void *)tables);
1722 nigel 75 continue; /* With next regex */
1723 nigel 3 }
1724 nigel 75 } /* End of non-POSIX compile */
1725 nigel 3
1726     /* Read data lines and test them */
1727    
1728     for (;;)
1729     {
1730 nigel 87 uschar *q;
1731 ph10 147 uschar *bptr;
1732 nigel 57 int *use_offsets = offsets;
1733 nigel 53 int use_size_offsets = size_offsets;
1734 nigel 63 int callout_data = 0;
1735     int callout_data_set = 0;
1736 nigel 3 int count, c;
1737 nigel 29 int copystrings = 0;
1738 ph10 386 int find_match_limit = default_find_match_limit;
1739 nigel 29 int getstrings = 0;
1740     int getlist = 0;
1741 nigel 39 int gmatched = 0;
1742 nigel 35 int start_offset = 0;
1743 nigel 41 int g_notempty = 0;
1744 nigel 77 int use_dfa = 0;
1745 nigel 3
1746     options = 0;
1747    
1748 nigel 91 *copynames = 0;
1749     *getnames = 0;
1750    
1751     copynamesptr = copynames;
1752     getnamesptr = getnames;
1753    
1754 nigel 63 pcre_callout = callout;
1755     first_callout = 1;
1756     callout_extra = 0;
1757     callout_count = 0;
1758     callout_fail_count = 999999;
1759     callout_fail_id = -1;
1760 nigel 73 show_malloc = 0;
1761 nigel 63
1762 nigel 91 if (extra != NULL) extra->flags &=
1763     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1764    
1765     len = 0;
1766     for (;;)
1767 nigel 11 {
1768 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1769 nigel 91 {
1770     if (len > 0) break;
1771     done = 1;
1772     goto CONTINUE;
1773     }
1774     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1775     len = (int)strlen((char *)buffer);
1776     if (buffer[len-1] == '\n') break;
1777 nigel 11 }
1778 nigel 3
1779     while (len > 0 && isspace(buffer[len-1])) len--;
1780     buffer[len] = 0;
1781     if (len == 0) break;
1782    
1783     p = buffer;
1784     while (isspace(*p)) p++;
1785    
1786 ph10 147 bptr = q = dbuffer;
1787 nigel 3 while ((c = *p++) != 0)
1788     {
1789     int i = 0;
1790     int n = 0;
1791 nigel 63
1792 nigel 3 if (c == '\\') switch ((c = *p++))
1793     {
1794     case 'a': c = 7; break;
1795     case 'b': c = '\b'; break;
1796     case 'e': c = 27; break;
1797     case 'f': c = '\f'; break;
1798     case 'n': c = '\n'; break;
1799     case 'r': c = '\r'; break;
1800     case 't': c = '\t'; break;
1801     case 'v': c = '\v'; break;
1802    
1803     case '0': case '1': case '2': case '3':
1804     case '4': case '5': case '6': case '7':
1805     c -= '0';
1806     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1807     c = c * 8 + *p++ - '0';
1808 nigel 91
1809     #if !defined NOUTF8
1810     if (use_utf8 && c > 255)
1811     {
1812     unsigned char buff8[8];
1813     int ii, utn;
1814     utn = ord2utf8(c, buff8);
1815     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1816     c = buff8[ii]; /* Last byte */
1817     }
1818     #endif
1819 nigel 3 break;
1820    
1821     case 'x':
1822 nigel 49
1823     /* Handle \x{..} specially - new Perl thing for utf8 */
1824    
1825 nigel 79 #if !defined NOUTF8
1826 nigel 49 if (*p == '{')
1827     {
1828     unsigned char *pt = p;
1829     c = 0;
1830     while (isxdigit(*(++pt)))
1831     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1832     if (*pt == '}')
1833     {
1834 nigel 67 unsigned char buff8[8];
1835 nigel 49 int ii, utn;
1836 ph10 355 if (use_utf8)
1837 ph10 358 {
1838 ph10 355 utn = ord2utf8(c, buff8);
1839     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1840     c = buff8[ii]; /* Last byte */
1841     }
1842     else
1843     {
1844 ph10 358 if (c > 255)
1845 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1846     "UTF-8 mode is not enabled.\n"
1847     "** Truncation will probably give the wrong result.\n", c);
1848 ph10 358 }
1849 nigel 49 p = pt + 1;
1850     break;
1851     }
1852     /* Not correct form; fall through */
1853     }
1854 nigel 79 #endif
1855 nigel 49
1856     /* Ordinary \x */
1857    
1858 nigel 3 c = 0;
1859     while (i++ < 2 && isxdigit(*p))
1860     {
1861     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1862     p++;
1863     }
1864     break;
1865    
1866 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1867 nigel 3 p--;
1868     continue;
1869    
1870 nigel 75 case '>':
1871     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1872     continue;
1873    
1874 nigel 3 case 'A': /* Option setting */
1875     options |= PCRE_ANCHORED;
1876     continue;
1877    
1878     case 'B':
1879     options |= PCRE_NOTBOL;
1880     continue;
1881    
1882 nigel 29 case 'C':
1883 nigel 63 if (isdigit(*p)) /* Set copy string */
1884     {
1885     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1886     copystrings |= 1 << n;
1887     }
1888     else if (isalnum(*p))
1889     {
1890 nigel 91 uschar *npp = copynamesptr;
1891 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1892 nigel 91 *npp++ = 0;
1893 nigel 67 *npp = 0;
1894 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1895 nigel 63 if (n < 0)
1896 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1897     copynamesptr = npp;
1898 nigel 63 }
1899     else if (*p == '+')
1900     {
1901     callout_extra = 1;
1902     p++;
1903     }
1904     else if (*p == '-')
1905     {
1906     pcre_callout = NULL;
1907     p++;
1908     }
1909     else if (*p == '!')
1910     {
1911     callout_fail_id = 0;
1912     p++;
1913     while(isdigit(*p))
1914     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1915     callout_fail_count = 0;
1916     if (*p == '!')
1917     {
1918     p++;
1919     while(isdigit(*p))
1920     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1921     }
1922     }
1923     else if (*p == '*')
1924     {
1925     int sign = 1;
1926     callout_data = 0;
1927     if (*(++p) == '-') { sign = -1; p++; }
1928     while(isdigit(*p))
1929     callout_data = callout_data * 10 + *p++ - '0';
1930     callout_data *= sign;
1931     callout_data_set = 1;
1932     }
1933 nigel 29 continue;
1934    
1935 nigel 79 #if !defined NODFA
1936 nigel 77 case 'D':
1937 nigel 79 #if !defined NOPOSIX
1938 nigel 77 if (posix || do_posix)
1939     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1940     else
1941 nigel 79 #endif
1942 nigel 77 use_dfa = 1;
1943     continue;
1944    
1945     case 'F':
1946     options |= PCRE_DFA_SHORTEST;
1947     continue;
1948 nigel 79 #endif
1949 nigel 77
1950 nigel 29 case 'G':
1951 nigel 63 if (isdigit(*p))
1952     {
1953     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1954     getstrings |= 1 << n;
1955     }
1956     else if (isalnum(*p))
1957     {
1958 nigel 91 uschar *npp = getnamesptr;
1959 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1960 nigel 91 *npp++ = 0;
1961 nigel 67 *npp = 0;
1962 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1963 nigel 63 if (n < 0)
1964 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1965     getnamesptr = npp;
1966 nigel 63 }
1967 nigel 29 continue;
1968    
1969     case 'L':
1970     getlist = 1;
1971     continue;
1972    
1973 nigel 63 case 'M':
1974     find_match_limit = 1;
1975     continue;
1976    
1977 nigel 37 case 'N':
1978 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
1979     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1980     else
1981     options |= PCRE_NOTEMPTY;
1982 nigel 37 continue;
1983    
1984 nigel 3 case 'O':
1985     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1986 nigel 53 if (n > size_offsets_max)
1987     {
1988     size_offsets_max = n;
1989 nigel 57 free(offsets);
1990 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1991 nigel 53 if (offsets == NULL)
1992     {
1993     printf("** Failed to get %d bytes of memory for offsets vector\n",
1994 ph10 151 (int)(size_offsets_max * sizeof(int)));
1995 nigel 77 yield = 1;
1996     goto EXIT;
1997 nigel 53 }
1998     }
1999     use_size_offsets = n;
2000 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2001 nigel 3 continue;
2002    
2003 nigel 75 case 'P':
2004 ph10 427 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2005     PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2006 nigel 75 continue;
2007    
2008 nigel 91 case 'Q':
2009     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2010     if (extra == NULL)
2011     {
2012     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2013     extra->flags = 0;
2014     }
2015     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2016     extra->match_limit_recursion = n;
2017     continue;
2018    
2019     case 'q':
2020     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2021     if (extra == NULL)
2022     {
2023     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024     extra->flags = 0;
2025     }
2026     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2027     extra->match_limit = n;
2028     continue;
2029    
2030 nigel 79 #if !defined NODFA
2031 nigel 77 case 'R':
2032     options |= PCRE_DFA_RESTART;
2033     continue;
2034 nigel 79 #endif
2035 nigel 77
2036 nigel 73 case 'S':
2037     show_malloc = 1;
2038     continue;
2039 ph10 392
2040 ph10 389 case 'Y':
2041     options |= PCRE_NO_START_OPTIMIZE;
2042 ph10 392 continue;
2043 nigel 73
2044 nigel 3 case 'Z':
2045     options |= PCRE_NOTEOL;
2046     continue;
2047 nigel 71
2048     case '?':
2049     options |= PCRE_NO_UTF8_CHECK;
2050     continue;
2051 nigel 91
2052     case '<':
2053     {
2054     int x = check_newline(p, outfile);
2055     if (x == 0) goto NEXT_DATA;
2056     options |= x;
2057     while (*p++ != '>');
2058     }
2059     continue;
2060 nigel 3 }
2061 nigel 9 *q++ = c;
2062 nigel 3 }
2063 nigel 9 *q = 0;
2064     len = q - dbuffer;
2065 ph10 371
2066 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2067 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2068 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2069 ph10 371
2070 ph10 363 #if !defined NOPOSIX
2071     if (posix || do_posix)
2072     {
2073     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2074 ph10 371 bptr += buffer_size - len - 1;
2075 ph10 363 }
2076 ph10 371 else
2077     #endif
2078 ph10 363 {
2079     memmove(bptr + buffer_size - len, bptr, len);
2080 ph10 371 bptr += buffer_size - len;
2081     }
2082 nigel 3
2083 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2084     {
2085     printf("**Match limit not relevant for DFA matching: ignored\n");
2086     find_match_limit = 0;
2087     }
2088    
2089 nigel 3 /* Handle matching via the POSIX interface, which does not
2090 nigel 63 support timing or playing with the match limit or callout data. */
2091 nigel 3
2092 nigel 37 #if !defined NOPOSIX
2093 nigel 3 if (posix || do_posix)
2094     {
2095     int rc;
2096     int eflags = 0;
2097 nigel 63 regmatch_t *pmatch = NULL;
2098     if (use_size_offsets > 0)
2099 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2100 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2101     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2102 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2103 nigel 3
2104 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2105 nigel 3
2106     if (rc != 0)
2107     {
2108 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2109 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2110     }
2111 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2112     != 0)
2113     {
2114     fprintf(outfile, "Matched with REG_NOSUB\n");
2115     }
2116 nigel 3 else
2117     {
2118 nigel 7 size_t i;
2119 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2120 nigel 3 {
2121     if (pmatch[i].rm_so >= 0)
2122     {
2123 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2124 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2125     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2126 nigel 3 fprintf(outfile, "\n");
2127 nigel 35 if (i == 0 && do_showrest)
2128     {
2129     fprintf(outfile, " 0+ ");
2130 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2131     outfile);
2132 nigel 35 fprintf(outfile, "\n");
2133     }
2134 nigel 3 }
2135     }
2136     }
2137 nigel 53 free(pmatch);
2138 nigel 3 }
2139    
2140 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2141 nigel 3
2142 nigel 37 else
2143     #endif /* !defined NOPOSIX */
2144    
2145 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2146 nigel 3 {
2147 nigel 93 if (timeitm > 0)
2148 nigel 3 {
2149     register int i;
2150     clock_t time_taken;
2151     clock_t start_time = clock();
2152 nigel 77
2153 nigel 79 #if !defined NODFA
2154 nigel 77 if (all_use_dfa || use_dfa)
2155     {
2156     int workspace[1000];
2157 nigel 93 for (i = 0; i < timeitm; i++)
2158 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2159 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2160     sizeof(workspace)/sizeof(int));
2161     }
2162     else
2163 nigel 79 #endif
2164 nigel 77
2165 nigel 93 for (i = 0; i < timeitm; i++)
2166 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2167 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2168 nigel 77
2169 nigel 3 time_taken = clock() - start_time;
2170 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2171     (((double)time_taken * 1000.0) / (double)timeitm) /
2172 nigel 63 (double)CLOCKS_PER_SEC);
2173 nigel 3 }
2174    
2175 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2176 nigel 87 varying limits in order to find the minimum value for the match limit and
2177     for the recursion limit. */
2178 nigel 63
2179     if (find_match_limit)
2180     {
2181     if (extra == NULL)
2182     {
2183 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2184 nigel 63 extra->flags = 0;
2185     }
2186    
2187 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2188 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2189     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2190     PCRE_ERROR_MATCHLIMIT, "match()");
2191 nigel 63
2192 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2193     options|g_notempty, use_offsets, use_size_offsets,
2194     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2195     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2196 nigel 63 }
2197    
2198     /* If callout_data is set, use the interface with additional data */
2199    
2200     else if (callout_data_set)
2201     {
2202     if (extra == NULL)
2203     {
2204 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2205 nigel 63 extra->flags = 0;
2206     }
2207     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2208 nigel 71 extra->callout_data = &callout_data;
2209 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2210     options | g_notempty, use_offsets, use_size_offsets);
2211     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2212     }
2213    
2214     /* The normal case is just to do the match once, with the default
2215     value of match_limit. */
2216    
2217 nigel 79 #if !defined NODFA
2218 nigel 77 else if (all_use_dfa || use_dfa)
2219     {
2220     int workspace[1000];
2221 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2222 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2223     sizeof(workspace)/sizeof(int));
2224     if (count == 0)
2225     {
2226     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2227     count = use_size_offsets/2;
2228     }
2229     }
2230 nigel 79 #endif
2231 nigel 77
2232 nigel 75 else
2233     {
2234     count = pcre_exec(re, extra, (char *)bptr, len,
2235     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2236 nigel 77 if (count == 0)
2237     {
2238     fprintf(outfile, "Matched, but too many substrings\n");
2239     count = use_size_offsets/3;
2240     }
2241 nigel 75 }
2242 nigel 3
2243 nigel 39 /* Matched */
2244    
2245 nigel 3 if (count >= 0)
2246     {
2247 nigel 93 int i, maxcount;
2248    
2249     #if !defined NODFA
2250     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2251     #endif
2252     maxcount = use_size_offsets/3;
2253    
2254     /* This is a check against a lunatic return value. */
2255    
2256     if (count > maxcount)
2257     {
2258     fprintf(outfile,
2259     "** PCRE error: returned count %d is too big for offset size %d\n",
2260     count, use_size_offsets);
2261     count = use_size_offsets/3;
2262     if (do_g || do_G)
2263     {
2264     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2265     do_g = do_G = FALSE; /* Break g/G loop */
2266     }
2267     }
2268    
2269 nigel 29 for (i = 0; i < count * 2; i += 2)
2270 nigel 3 {
2271 nigel 57 if (use_offsets[i] < 0)
2272 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2273     else
2274     {
2275     fprintf(outfile, "%2d: ", i/2);
2276 nigel 63 (void)pchars(bptr + use_offsets[i],
2277     use_offsets[i+1] - use_offsets[i], outfile);
2278 nigel 3 fprintf(outfile, "\n");
2279 nigel 35 if (i == 0)
2280     {
2281     if (do_showrest)
2282     {
2283     fprintf(outfile, " 0+ ");
2284 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2285     outfile);
2286 nigel 35 fprintf(outfile, "\n");
2287     }
2288     }
2289 nigel 3 }
2290     }
2291 nigel 29
2292     for (i = 0; i < 32; i++)
2293     {
2294     if ((copystrings & (1 << i)) != 0)
2295     {
2296 nigel 91 char copybuffer[256];
2297 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2298 nigel 37 i, copybuffer, sizeof(copybuffer));
2299 nigel 29 if (rc < 0)
2300     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2301     else
2302 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2303 nigel 29 }
2304     }
2305    
2306 nigel 91 for (copynamesptr = copynames;
2307     *copynamesptr != 0;
2308     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2309     {
2310     char copybuffer[256];
2311     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2312     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2313     if (rc < 0)
2314     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2315     else
2316     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2317     }
2318    
2319 nigel 29 for (i = 0; i < 32; i++)
2320     {
2321     if ((getstrings & (1 << i)) != 0)
2322     {
2323     const char *substring;
2324 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2325 nigel 29 i, &substring);
2326     if (rc < 0)
2327     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2328     else
2329     {
2330     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2331 nigel 49 pcre_free_substring(substring);
2332 nigel 29 }
2333     }
2334     }
2335    
2336 nigel 91 for (getnamesptr = getnames;
2337     *getnamesptr != 0;
2338     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2339     {
2340     const char *substring;
2341     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2342     count, (char *)getnamesptr, &substring);
2343     if (rc < 0)
2344     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2345     else
2346     {
2347     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2348     pcre_free_substring(substring);
2349     }
2350     }
2351    
2352 nigel 29 if (getlist)
2353     {
2354     const char **stringlist;
2355 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2356 nigel 29 &stringlist);
2357     if (rc < 0)
2358     fprintf(outfile, "get substring list failed %d\n", rc);
2359     else
2360     {
2361     for (i = 0; i < count; i++)
2362     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2363     if (stringlist[i] != NULL)
2364     fprintf(outfile, "string list not terminated by NULL\n");
2365 nigel 49 /* free((void *)stringlist); */
2366     pcre_free_substring_list(stringlist);
2367 nigel 29 }
2368     }
2369 nigel 39 }
2370 nigel 29
2371 nigel 75 /* There was a partial match */
2372    
2373     else if (count == PCRE_ERROR_PARTIAL)
2374     {
2375 nigel 77 fprintf(outfile, "Partial match");
2376 ph10 426 if (use_size_offsets > 1)
2377     {
2378     fprintf(outfile, ": ");
2379     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2380     outfile);
2381     }
2382 nigel 77 fprintf(outfile, "\n");
2383 nigel 75 break; /* Out of the /g loop */
2384     }
2385    
2386 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2387 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2388     to advance the start offset, and continue. We won't be at the end of the
2389     string - that was checked before setting g_notempty.
2390 nigel 39
2391 ph10 150 Complication arises in the case when the newline option is "any" or
2392 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2393     CRLF, an advance of one character just passes the \r, whereas we should
2394     prefer the longer newline sequence, as does the code in pcre_exec().
2395     Fudge the offset value to achieve this.
2396 ph10 144
2397 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2398     character, not one byte. */
2399    
2400 nigel 3 else
2401     {
2402 nigel 41 if (g_notempty != 0)
2403 nigel 35 {
2404 nigel 73 int onechar = 1;
2405 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2406 nigel 57 use_offsets[0] = start_offset;
2407 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2408     {
2409     int d;
2410     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2411 ph10 391 /* Note that these values are always the ASCII ones, even in
2412     EBCDIC environments. CR = 13, NL = 10. */
2413     obits = (d == 13)? PCRE_NEWLINE_CR :
2414     (d == 10)? PCRE_NEWLINE_LF :
2415     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2416 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2417 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2418     }
2419 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2420 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2421 ph10 149 &&
2422 ph10 143 start_offset < len - 1 &&
2423     bptr[start_offset] == '\r' &&
2424     bptr[start_offset+1] == '\n')
2425 ph10 144 onechar++;
2426 ph10 143 else if (use_utf8)
2427 nigel 73 {
2428     while (start_offset + onechar < len)
2429     {
2430     int tb = bptr[start_offset+onechar];
2431     if (tb <= 127) break;
2432     tb &= 0xc0;
2433     if (tb != 0 && tb != 0xc0) onechar++;
2434     }
2435     }
2436     use_offsets[1] = start_offset + onechar;
2437 nigel 35 }
2438 nigel 41 else
2439     {
2440 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2441 nigel 41 {
2442 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2443 nigel 41 }
2444 nigel 73 else fprintf(outfile, "Error %d\n", count);
2445 nigel 41 break; /* Out of the /g loop */
2446     }
2447 nigel 3 }
2448 nigel 35
2449 nigel 39 /* If not /g or /G we are done */
2450    
2451     if (!do_g && !do_G) break;
2452    
2453 nigel 41 /* If we have matched an empty string, first check to see if we are at
2454 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2455     Perl's /g options does. This turns out to be rather cunning. First we set
2456     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2457 nigel 47 same point. If this fails (picked up above) we advance to the next
2458 ph10 143 character. */
2459 ph10 142
2460 nigel 41 g_notempty = 0;
2461 ph10 142
2462 nigel 57 if (use_offsets[0] == use_offsets[1])
2463 nigel 41 {
2464 nigel 57 if (use_offsets[0] == len) break;
2465 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2466 nigel 41 }
2467 nigel 39
2468     /* For /g, update the start offset, leaving the rest alone */
2469    
2470 ph10 143 if (do_g) start_offset = use_offsets[1];
2471 nigel 39
2472     /* For /G, update the pointer and length */
2473    
2474     else
2475 nigel 35 {
2476 ph10 143 bptr += use_offsets[1];
2477     len -= use_offsets[1];
2478 nigel 35 }
2479 nigel 39 } /* End of loop for /g and /G */
2480 nigel 91
2481     NEXT_DATA: continue;
2482 nigel 39 } /* End of loop for data lines */
2483 nigel 3
2484 nigel 11 CONTINUE:
2485 nigel 37
2486     #if !defined NOPOSIX
2487 nigel 3 if (posix || do_posix) regfree(&preg);
2488 nigel 37 #endif
2489    
2490 nigel 77 if (re != NULL) new_free(re);
2491     if (extra != NULL) new_free(extra);
2492 nigel 25 if (tables != NULL)
2493     {
2494 nigel 77 new_free((void *)tables);
2495 nigel 25 setlocale(LC_CTYPE, "C");
2496 nigel 93 locale_set = 0;
2497 nigel 25 }
2498 nigel 3 }
2499    
2500 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2501 nigel 77
2502     EXIT:
2503    
2504     if (infile != NULL && infile != stdin) fclose(infile);
2505     if (outfile != NULL && outfile != stdout) fclose(outfile);
2506    
2507     free(buffer);
2508     free(dbuffer);
2509     free(pbuffer);
2510     free(offsets);
2511    
2512     return yield;
2513 nigel 3 }
2514    
2515 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12