/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 498 - (hide annotations) (download)
Wed Mar 3 19:29:38 2010 UTC (4 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 74180 byte(s)
Fix missing data in coptable and poptable vectors; add compile-time checks for 
their lengths.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 nigel 93 #else
83     #include <sys/time.h> /* These two includes are needed */
84     #include <sys/resource.h> /* for setrlimit(). */
85     #define INPUT_MODE "rb"
86     #define OUTPUT_MODE "wb"
87 nigel 91 #endif
88    
89 nigel 93
90 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
91     displaying the results of pcre_study() and we also need to know about the
92     internal macros, structures, and other internal data values; pcretest has
93     "inside information" compared to a program that strictly follows the PCRE API.
94 nigel 37
95 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97     appropriately for an application, not for building PCRE. */
98 nigel 77
99 ph10 145 #include "pcre.h"
100 nigel 77 #include "pcre_internal.h"
101    
102 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
103     to keep two copies, we include the source file here, changing the names of the
104     external symbols to prevent clashes. */
105 nigel 77
106 ph10 351 #define _pcre_ucp_gentype ucp_gentype
107 nigel 85 #define _pcre_utf8_table1 utf8_table1
108     #define _pcre_utf8_table1_size utf8_table1_size
109     #define _pcre_utf8_table2 utf8_table2
110     #define _pcre_utf8_table3 utf8_table3
111     #define _pcre_utf8_table4 utf8_table4
112     #define _pcre_utt utt
113     #define _pcre_utt_size utt_size
114 ph10 240 #define _pcre_utt_names utt_names
115 nigel 85 #define _pcre_OP_lengths OP_lengths
116    
117     #include "pcre_tables.c"
118    
119     /* We also need the pcre_printint() function for printing out compiled
120     patterns. This function is in a separate file so that it can be included in
121 ph10 498 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122     know which case is being compiled. */
123 nigel 85
124 ph10 498 #define COMPILING_PCRETEST
125     #include "pcre_printint.src"
126    
127     /* The definition of the macro PRINTABLE, which determines whether to print an
128 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
129 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
130     locale has not been explicitly changed, so as to get consistent output from
131     systems that differ in their output from isprint() even in the "C" locale. */
132 nigel 93
133     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134 nigel 85
135 nigel 37 /* It is possible to compile this test program without including support for
136     testing the POSIX interface, though this is not available via the standard
137     Makefile. */
138    
139     #if !defined NOPOSIX
140 nigel 3 #include "pcreposix.h"
141 nigel 37 #endif
142 nigel 3
143 ph10 107 /* It is also possible, for the benefit of the version currently imported into
144     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145     interface to the DFA matcher (NODFA), and without the doublecheck of the old
146     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147     UTF8 support if PCRE is built without it. */
148 nigel 79
149 ph10 107 #ifndef SUPPORT_UTF8
150     #ifndef NOUTF8
151     #define NOUTF8
152     #endif
153     #endif
154 nigel 79
155 ph10 107
156 nigel 85 /* Other parameters */
157    
158 nigel 3 #ifndef CLOCKS_PER_SEC
159     #ifdef CLK_TCK
160     #define CLOCKS_PER_SEC CLK_TCK
161     #else
162     #define CLOCKS_PER_SEC 100
163     #endif
164     #endif
165    
166 nigel 93 /* This is the default loop count for timing. */
167    
168 nigel 75 #define LOOPREPEAT 500000
169 nigel 3
170 nigel 85 /* Static variables */
171    
172 nigel 3 static FILE *outfile;
173     static int log_store = 0;
174 nigel 63 static int callout_count;
175     static int callout_extra;
176     static int callout_fail_count;
177     static int callout_fail_id;
178 ph10 210 static int debug_lengths;
179 nigel 63 static int first_callout;
180 nigel 93 static int locale_set = 0;
181 nigel 73 static int show_malloc;
182 nigel 67 static int use_utf8;
183 nigel 43 static size_t gotten_store;
184 nigel 3
185 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
186    
187     static int buffer_size = 50000;
188     static uschar *buffer = NULL;
189     static uschar *dbuffer = NULL;
190 nigel 75 static uschar *pbuffer = NULL;
191 nigel 3
192 nigel 75
193 nigel 49
194     /*************************************************
195 nigel 91 * Read or extend an input line *
196     *************************************************/
197    
198     /* Input lines are read into buffer, but both patterns and data lines can be
199     continued over multiple input lines. In addition, if the buffer fills up, we
200     want to automatically expand it so as to be able to handle extremely large
201     lines that are needed for certain stress tests. When the input buffer is
202     expanded, the other two buffers must also be expanded likewise, and the
203     contents of pbuffer, which are a copy of the input for callouts, must be
204     preserved (for when expansion happens for a data line). This is not the most
205     optimal way of handling this, but hey, this is just a test program!
206    
207     Arguments:
208     f the file to read
209     start where in buffer to start (this *must* be within buffer)
210 ph10 287 prompt for stdin or readline()
211 nigel 91
212     Returns: pointer to the start of new data
213     could be a copy of start, or could be moved
214     NULL if no data read and EOF reached
215     */
216    
217     static uschar *
218 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 nigel 91 {
220     uschar *here = start;
221    
222     for (;;)
223     {
224     int rlen = buffer_size - (here - buffer);
225 nigel 93
226 nigel 91 if (rlen > 1000)
227     {
228     int dlen;
229 ph10 289
230 ph10 287 /* If libreadline support is required, use readline() to read a line if the
231     input is a terminal. Note that readline() removes the trailing newline, so
232     we must put it back again, to be compatible with fgets(). */
233 ph10 289
234 ph10 287 #ifdef SUPPORT_LIBREADLINE
235     if (isatty(fileno(f)))
236     {
237 ph10 289 size_t len;
238 ph10 287 char *s = readline(prompt);
239     if (s == NULL) return (here == start)? NULL : start;
240     len = strlen(s);
241 ph10 289 if (len > 0) add_history(s);
242 ph10 287 if (len > rlen - 1) len = rlen - 1;
243     memcpy(here, s, len);
244     here[len] = '\n';
245 ph10 289 here[len+1] = 0;
246     free(s);
247 ph10 287 }
248 ph10 289 else
249     #endif
250    
251 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
252 ph10 289
253 ph10 287 {
254 ph10 289 if (f == stdin) printf(prompt);
255 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
256     return (here == start)? NULL : start;
257 ph10 289 }
258    
259 nigel 91 dlen = (int)strlen((char *)here);
260     if (dlen > 0 && here[dlen - 1] == '\n') return start;
261     here += dlen;
262     }
263    
264     else
265     {
266     int new_buffer_size = 2*buffer_size;
267     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270    
271     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272     {
273     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274     exit(1);
275     }
276    
277     memcpy(new_buffer, buffer, buffer_size);
278     memcpy(new_pbuffer, pbuffer, buffer_size);
279    
280     buffer_size = new_buffer_size;
281    
282     start = new_buffer + (start - buffer);
283     here = new_buffer + (here - buffer);
284    
285     free(buffer);
286     free(dbuffer);
287     free(pbuffer);
288    
289     buffer = new_buffer;
290     dbuffer = new_dbuffer;
291     pbuffer = new_pbuffer;
292     }
293     }
294    
295     return NULL; /* Control never gets here */
296     }
297    
298    
299    
300    
301    
302    
303    
304     /*************************************************
305 nigel 63 * Read number from string *
306     *************************************************/
307    
308     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309     around with conditional compilation, just do the job by hand. It is only used
310 nigel 93 for unpicking arguments, so just keep it simple.
311 nigel 63
312     Arguments:
313     str string to be converted
314     endptr where to put the end pointer
315    
316     Returns: the unsigned long
317     */
318    
319     static int
320     get_value(unsigned char *str, unsigned char **endptr)
321     {
322     int result = 0;
323     while(*str != 0 && isspace(*str)) str++;
324     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325     *endptr = str;
326     return(result);
327     }
328    
329    
330    
331 nigel 49
332     /*************************************************
333     * Convert UTF-8 string to value *
334     *************************************************/
335    
336     /* This function takes one or more bytes that represents a UTF-8 character,
337     and returns the value of the character.
338    
339     Argument:
340 nigel 91 utf8bytes a pointer to the byte vector
341     vptr a pointer to an int to receive the value
342 nigel 49
343 nigel 91 Returns: > 0 => the number of bytes consumed
344     -6 to 0 => malformed UTF-8 character at offset = (-return)
345 nigel 49 */
346    
347 nigel 79 #if !defined NOUTF8
348    
349 nigel 67 static int
350 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
351 nigel 49 {
352 nigel 91 int c = *utf8bytes++;
353 nigel 49 int d = c;
354     int i, j, s;
355    
356     for (i = -1; i < 6; i++) /* i is number of additional bytes */
357     {
358     if ((d & 0x80) == 0) break;
359     d <<= 1;
360     }
361    
362     if (i == -1) { *vptr = c; return 1; } /* ascii character */
363     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364    
365     /* i now has a value in the range 1-5 */
366    
367 nigel 59 s = 6*i;
368 nigel 85 d = (c & utf8_table3[i]) << s;
369 nigel 49
370     for (j = 0; j < i; j++)
371     {
372 nigel 91 c = *utf8bytes++;
373 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
374 nigel 59 s -= 6;
375 nigel 49 d |= (c & 0x3f) << s;
376     }
377    
378     /* Check that encoding was the correct unique one */
379    
380 nigel 85 for (j = 0; j < utf8_table1_size; j++)
381     if (d <= utf8_table1[j]) break;
382 nigel 49 if (j != i) return -(i+1);
383    
384     /* Valid value */
385    
386     *vptr = d;
387     return i+1;
388     }
389    
390 nigel 79 #endif
391 nigel 49
392    
393 nigel 79
394 nigel 63 /*************************************************
395 nigel 85 * Convert character value to UTF-8 *
396     *************************************************/
397    
398     /* This function takes an integer value in the range 0 - 0x7fffffff
399     and encodes it as a UTF-8 character in 0 to 6 bytes.
400    
401     Arguments:
402     cvalue the character value
403 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
404 nigel 85
405     Returns: number of characters placed in the buffer
406     */
407    
408 nigel 93 #if !defined NOUTF8
409    
410 nigel 85 static int
411 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
412 nigel 85 {
413     register int i, j;
414     for (i = 0; i < utf8_table1_size; i++)
415     if (cvalue <= utf8_table1[i]) break;
416 nigel 91 utf8bytes += i;
417 nigel 85 for (j = i; j > 0; j--)
418     {
419 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 nigel 85 cvalue >>= 6;
421     }
422 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
423 nigel 85 return i + 1;
424     }
425    
426 nigel 93 #endif
427 nigel 85
428    
429 nigel 93
430 nigel 85 /*************************************************
431 nigel 63 * Print character string *
432     *************************************************/
433 nigel 49
434 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
435     mode. Yields number of characters printed. If handed a NULL file, just counts
436     chars without printing. */
437 nigel 49
438 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
439 nigel 3 {
440 nigel 85 int c = 0;
441 nigel 63 int yield = 0;
442 nigel 3
443 nigel 63 while (length-- > 0)
444 nigel 3 {
445 nigel 79 #if !defined NOUTF8
446 nigel 67 if (use_utf8)
447 nigel 63 {
448     int rc = utf82ord(p, &c);
449 nigel 3
450 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451     {
452     length -= rc - 1;
453     p += rc;
454 nigel 93 if (PRINTHEX(c))
455 nigel 63 {
456     if (f != NULL) fprintf(f, "%c", c);
457     yield++;
458     }
459     else
460     {
461 nigel 93 int n = 4;
462     if (f != NULL) fprintf(f, "\\x{%02x}", c);
463     yield += (n <= 0x000000ff)? 2 :
464     (n <= 0x00000fff)? 3 :
465     (n <= 0x0000ffff)? 4 :
466     (n <= 0x000fffff)? 5 : 6;
467 nigel 63 }
468     continue;
469     }
470     }
471 nigel 79 #endif
472 nigel 3
473 nigel 63 /* Not UTF-8, or malformed UTF-8 */
474    
475 nigel 93 c = *p++;
476     if (PRINTHEX(c))
477 nigel 3 {
478 nigel 63 if (f != NULL) fprintf(f, "%c", c);
479     yield++;
480 nigel 3 }
481 nigel 63 else
482 nigel 3 {
483 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
484     yield += 4;
485     }
486     }
487 nigel 3
488 nigel 63 return yield;
489     }
490 nigel 23
491 nigel 3
492 nigel 23
493 nigel 63 /*************************************************
494     * Callout function *
495     *************************************************/
496 nigel 3
497 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498     the match. Yield zero unless more callouts than the fail count, or the callout
499     data is not zero. */
500 nigel 3
501 nigel 63 static int callout(pcre_callout_block *cb)
502     {
503     FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 nigel 75 int i, pre_start, post_start, subject_length;
505 nigel 3
506 nigel 63 if (callout_extra)
507     {
508     fprintf(f, "Callout %d: last capture = %d\n",
509     cb->callout_number, cb->capture_last);
510 nigel 3
511 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
512     {
513     if (cb->offset_vector[i] < 0)
514     fprintf(f, "%2d: <unset>\n", i/2);
515     else
516     {
517     fprintf(f, "%2d: ", i/2);
518     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519     cb->offset_vector[i+1] - cb->offset_vector[i], f);
520     fprintf(f, "\n");
521     }
522     }
523     }
524 nigel 3
525 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
526     datails. On subsequent calls in the same match, we use pchars just to find the
527     printed lengths of the substrings. */
528 nigel 3
529 nigel 63 if (f != NULL) fprintf(f, "--->");
530 nigel 3
531 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533     cb->current_position - cb->start_match, f);
534 nigel 3
535 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536    
537 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538     cb->subject_length - cb->current_position, f);
539 nigel 3
540 nigel 63 if (f != NULL) fprintf(f, "\n");
541 nigel 9
542 nigel 63 /* Always print appropriate indicators, with callout number if not already
543 nigel 75 shown. For automatic callouts, show the pattern offset. */
544 nigel 3
545 nigel 75 if (cb->callout_number == 255)
546     {
547     fprintf(outfile, "%+3d ", cb->pattern_position);
548     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549     }
550     else
551     {
552     if (callout_extra) fprintf(outfile, " ");
553     else fprintf(outfile, "%3d ", cb->callout_number);
554     }
555 nigel 3
556 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557     fprintf(outfile, "^");
558 nigel 3
559 nigel 63 if (post_start > 0)
560     {
561     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562     fprintf(outfile, "^");
563 nigel 3 }
564    
565 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566     fprintf(outfile, " ");
567    
568     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569     pbuffer + cb->pattern_position);
570    
571 nigel 63 fprintf(outfile, "\n");
572     first_callout = 0;
573 nigel 3
574 nigel 71 if (cb->callout_data != NULL)
575 nigel 49 {
576 nigel 71 int callout_data = *((int *)(cb->callout_data));
577     if (callout_data != 0)
578     {
579     fprintf(outfile, "Callout data = %d\n", callout_data);
580     return callout_data;
581     }
582 nigel 63 }
583 nigel 49
584 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
585     (++callout_count >= callout_fail_count)? 1 : 0;
586 nigel 3 }
587    
588    
589 nigel 63 /*************************************************
590 nigel 73 * Local malloc functions *
591 nigel 63 *************************************************/
592 nigel 3
593     /* Alternative malloc function, to test functionality and show the size of the
594     compiled re. */
595    
596     static void *new_malloc(size_t size)
597     {
598 nigel 73 void *block = malloc(size);
599 nigel 43 gotten_store = size;
600 nigel 73 if (show_malloc)
601 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 nigel 73 return block;
603 nigel 3 }
604    
605 nigel 73 static void new_free(void *block)
606     {
607     if (show_malloc)
608     fprintf(outfile, "free %p\n", block);
609     free(block);
610     }
611 nigel 3
612    
613 nigel 73 /* For recursion malloc/free, to test stacking calls */
614    
615     static void *stack_malloc(size_t size)
616     {
617     void *block = malloc(size);
618     if (show_malloc)
619 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 nigel 73 return block;
621     }
622    
623     static void stack_free(void *block)
624     {
625     if (show_malloc)
626     fprintf(outfile, "stack_free %p\n", block);
627     free(block);
628     }
629    
630    
631 nigel 63 /*************************************************
632     * Call pcre_fullinfo() *
633     *************************************************/
634 nigel 43
635     /* Get one piece of information from the pcre_fullinfo() function */
636    
637     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638     {
639     int rc;
640     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642     }
643    
644    
645    
646 nigel 63 /*************************************************
647 nigel 75 * Byte flipping function *
648     *************************************************/
649    
650 nigel 91 static unsigned long int
651     byteflip(unsigned long int value, int n)
652 nigel 75 {
653     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654     return ((value & 0x000000ff) << 24) |
655     ((value & 0x0000ff00) << 8) |
656     ((value & 0x00ff0000) >> 8) |
657     ((value & 0xff000000) >> 24);
658     }
659    
660    
661    
662    
663     /*************************************************
664 nigel 87 * Check match or recursion limit *
665     *************************************************/
666    
667     static int
668     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669     int start_offset, int options, int *use_offsets, int use_size_offsets,
670     int flag, unsigned long int *limit, int errnumber, const char *msg)
671     {
672     int count;
673     int min = 0;
674     int mid = 64;
675     int max = -1;
676    
677     extra->flags |= flag;
678    
679     for (;;)
680     {
681     *limit = mid;
682    
683     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684     use_offsets, use_size_offsets);
685    
686     if (count == errnumber)
687     {
688     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689     min = mid;
690     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691     }
692    
693     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694     count == PCRE_ERROR_PARTIAL)
695     {
696     if (mid == min + 1)
697     {
698     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699     break;
700     }
701     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702     max = mid;
703     mid = (min + mid)/2;
704     }
705     else break; /* Some other error */
706     }
707    
708     extra->flags &= ~flag;
709     return count;
710     }
711    
712    
713    
714     /*************************************************
715 ph10 227 * Case-independent strncmp() function *
716     *************************************************/
717    
718     /*
719     Arguments:
720     s first string
721     t second string
722     n number of characters to compare
723    
724     Returns: < 0, = 0, or > 0, according to the comparison
725     */
726    
727     static int
728     strncmpic(uschar *s, uschar *t, int n)
729     {
730     while (n--)
731     {
732     int c = tolower(*s++) - tolower(*t++);
733     if (c) return c;
734     }
735     return 0;
736     }
737    
738    
739    
740     /*************************************************
741 nigel 91 * Check newline indicator *
742     *************************************************/
743    
744     /* This is used both at compile and run-time to check for <xxx> escapes, where
745 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
746     no match.
747 nigel 91
748     Arguments:
749     p points after the leading '<'
750     f file for error message
751    
752     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
753     */
754    
755     static int
756     check_newline(uschar *p, FILE *f)
757     {
758 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
759     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
760     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
761     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
762     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
763 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
764     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
765 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
766     return 0;
767     }
768    
769    
770    
771     /*************************************************
772 nigel 93 * Usage function *
773     *************************************************/
774    
775     static void
776     usage(void)
777     {
778 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
779     printf("Input and output default to stdin and stdout.\n");
780     #ifdef SUPPORT_LIBREADLINE
781     printf("If input is a terminal, readline() is used to read from it.\n");
782     #else
783     printf("This version of pcretest is not linked with readline().\n");
784     #endif
785     printf("\nOptions:\n");
786 nigel 93 printf(" -b show compiled code (bytecode)\n");
787     printf(" -C show PCRE compile-time options and exit\n");
788     printf(" -d debug: show compiled code and information (-b and -i)\n");
789     #if !defined NODFA
790     printf(" -dfa force DFA matching for all subjects\n");
791     #endif
792     printf(" -help show usage information\n");
793     printf(" -i show information about compiled patterns\n"
794 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
795 nigel 93 " -m output memory used information\n"
796     " -o <n> set size of offsets vector to <n>\n");
797     #if !defined NOPOSIX
798     printf(" -p use POSIX interface\n");
799     #endif
800     printf(" -q quiet: do not output PCRE version number at start\n");
801     printf(" -S <n> set stack size to <n> megabytes\n");
802     printf(" -s output store (memory) used information\n"
803     " -t time compilation and execution\n");
804     printf(" -t <n> time compilation and execution, repeating <n> times\n");
805     printf(" -tm time execution (matching) only\n");
806     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
807     }
808    
809    
810    
811     /*************************************************
812 nigel 63 * Main Program *
813     *************************************************/
814 nigel 43
815 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
816     consist of a regular expression, in delimiters and optionally followed by
817     options, followed by a set of test data, terminated by an empty line. */
818    
819     int main(int argc, char **argv)
820     {
821     FILE *infile = stdin;
822     int options = 0;
823     int study_options = 0;
824 ph10 386 int default_find_match_limit = FALSE;
825 nigel 3 int op = 1;
826     int timeit = 0;
827 nigel 93 int timeitm = 0;
828 nigel 3 int showinfo = 0;
829 nigel 31 int showstore = 0;
830 nigel 87 int quiet = 0;
831 nigel 53 int size_offsets = 45;
832     int size_offsets_max;
833 nigel 77 int *offsets = NULL;
834 nigel 53 #if !defined NOPOSIX
835 nigel 3 int posix = 0;
836 nigel 53 #endif
837 nigel 3 int debug = 0;
838 nigel 11 int done = 0;
839 nigel 77 int all_use_dfa = 0;
840     int yield = 0;
841 nigel 91 int stack_size;
842 nigel 3
843 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
844     that 1024 is plenty long enough for the few names we'll be testing. */
845 nigel 69
846 nigel 91 uschar copynames[1024];
847     uschar getnames[1024];
848    
849     uschar *copynamesptr;
850     uschar *getnamesptr;
851    
852 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
853 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
854 nigel 69
855 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
856     dbuffer = (unsigned char *)malloc(buffer_size);
857     pbuffer = (unsigned char *)malloc(buffer_size);
858 nigel 69
859 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
860 nigel 3
861 nigel 93 outfile = stdout;
862    
863     /* The following _setmode() stuff is some Windows magic that tells its runtime
864     library to translate CRLF into a single LF character. At least, that's what
865     I've been told: never having used Windows I take this all on trust. Originally
866     it set 0x8000, but then I was advised that _O_BINARY was better. */
867    
868 nigel 75 #if defined(_WIN32) || defined(WIN32)
869 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
870     #endif
871 nigel 75
872 nigel 3 /* Scan options */
873    
874     while (argc > 1 && argv[op][0] == '-')
875     {
876 nigel 63 unsigned char *endptr;
877 nigel 53
878 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
879     showstore = 1;
880 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
881 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
882 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
883     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
884 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
885 nigel 79 #if !defined NODFA
886 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
887 nigel 79 #endif
888 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
889 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
890     *endptr == 0))
891 nigel 53 {
892     op++;
893     argc--;
894     }
895 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
896     {
897     int both = argv[op][2] == 0;
898     int temp;
899     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
900     *endptr == 0))
901     {
902     timeitm = temp;
903     op++;
904     argc--;
905     }
906     else timeitm = LOOPREPEAT;
907     if (both) timeit = timeitm;
908     }
909 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
910     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
911     *endptr == 0))
912     {
913 nigel 93 #if defined(_WIN32) || defined(WIN32)
914 nigel 91 printf("PCRE: -S not supported on this OS\n");
915     exit(1);
916     #else
917     int rc;
918     struct rlimit rlim;
919     getrlimit(RLIMIT_STACK, &rlim);
920     rlim.rlim_cur = stack_size * 1024 * 1024;
921     rc = setrlimit(RLIMIT_STACK, &rlim);
922     if (rc != 0)
923     {
924     printf("PCRE: setrlimit() failed with error %d\n", rc);
925     exit(1);
926     }
927     op++;
928     argc--;
929     #endif
930     }
931 nigel 53 #if !defined NOPOSIX
932 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
933 nigel 53 #endif
934 nigel 63 else if (strcmp(argv[op], "-C") == 0)
935     {
936     int rc;
937 ph10 392 unsigned long int lrc;
938 nigel 63 printf("PCRE version %s\n", pcre_version());
939     printf("Compiled with\n");
940     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
941     printf(" %sUTF-8 support\n", rc? "" : "No ");
942 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
943     printf(" %sUnicode properties support\n", rc? "" : "No ");
944 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
945 ph10 391 /* Note that these values are always the ASCII values, even
946 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
947 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
948     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
949 ph10 150 (rc == -2)? "ANYCRLF" :
950 nigel 93 (rc == -1)? "ANY" : "???");
951 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
952     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
953     "all Unicode newlines");
954 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
955     printf(" Internal link size = %d\n", rc);
956     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
957     printf(" POSIX malloc threshold = %d\n", rc);
958 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
959     printf(" Default match limit = %ld\n", lrc);
960     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
961     printf(" Default recursion depth limit = %ld\n", lrc);
962 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
963     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
964 ph10 121 goto EXIT;
965 nigel 63 }
966 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
967     strcmp(argv[op], "--help") == 0)
968     {
969     usage();
970     goto EXIT;
971     }
972 nigel 3 else
973     {
974 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
975 nigel 93 usage();
976 nigel 77 yield = 1;
977     goto EXIT;
978 nigel 3 }
979     op++;
980     argc--;
981     }
982    
983 nigel 53 /* Get the store for the offsets vector, and remember what it was */
984    
985     size_offsets_max = size_offsets;
986 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
987 nigel 53 if (offsets == NULL)
988     {
989     printf("** Failed to get %d bytes of memory for offsets vector\n",
990 ph10 151 (int)(size_offsets_max * sizeof(int)));
991 nigel 77 yield = 1;
992     goto EXIT;
993 nigel 53 }
994    
995 nigel 3 /* Sort out the input and output files */
996    
997     if (argc > 1)
998     {
999 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1000 nigel 3 if (infile == NULL)
1001     {
1002     printf("** Failed to open %s\n", argv[op]);
1003 nigel 77 yield = 1;
1004     goto EXIT;
1005 nigel 3 }
1006     }
1007    
1008     if (argc > 2)
1009     {
1010 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1011 nigel 3 if (outfile == NULL)
1012     {
1013     printf("** Failed to open %s\n", argv[op+1]);
1014 nigel 77 yield = 1;
1015     goto EXIT;
1016 nigel 3 }
1017     }
1018    
1019     /* Set alternative malloc function */
1020    
1021     pcre_malloc = new_malloc;
1022 nigel 73 pcre_free = new_free;
1023     pcre_stack_malloc = stack_malloc;
1024     pcre_stack_free = stack_free;
1025 nigel 3
1026 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1027 nigel 3
1028 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1029 nigel 3
1030     /* Main loop */
1031    
1032 nigel 11 while (!done)
1033 nigel 3 {
1034     pcre *re = NULL;
1035     pcre_extra *extra = NULL;
1036 nigel 37
1037     #if !defined NOPOSIX /* There are still compilers that require no indent */
1038 nigel 3 regex_t preg;
1039 nigel 45 int do_posix = 0;
1040 nigel 37 #endif
1041    
1042 nigel 7 const char *error;
1043 nigel 25 unsigned char *p, *pp, *ppp;
1044 nigel 75 unsigned char *to_file = NULL;
1045 nigel 53 const unsigned char *tables = NULL;
1046 nigel 75 unsigned long int true_size, true_study_size = 0;
1047     size_t size, regex_gotten_store;
1048 nigel 3 int do_study = 0;
1049 nigel 25 int do_debug = debug;
1050 nigel 35 int do_G = 0;
1051     int do_g = 0;
1052 nigel 25 int do_showinfo = showinfo;
1053 nigel 35 int do_showrest = 0;
1054 nigel 75 int do_flip = 0;
1055 nigel 93 int erroroffset, len, delimiter, poffset;
1056 nigel 3
1057 nigel 67 use_utf8 = 0;
1058 ph10 211 debug_lengths = 1;
1059 nigel 63
1060 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1061 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1062 nigel 63 fflush(outfile);
1063 nigel 3
1064     p = buffer;
1065     while (isspace(*p)) p++;
1066     if (*p == 0) continue;
1067    
1068 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1069 nigel 3
1070 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1071     {
1072 nigel 91 unsigned long int magic, get_options;
1073 nigel 75 uschar sbuf[8];
1074     FILE *f;
1075    
1076     p++;
1077     pp = p + (int)strlen((char *)p);
1078     while (isspace(pp[-1])) pp--;
1079     *pp = 0;
1080    
1081     f = fopen((char *)p, "rb");
1082     if (f == NULL)
1083     {
1084     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1085     continue;
1086     }
1087    
1088     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1089    
1090     true_size =
1091     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1092     true_study_size =
1093     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1094    
1095     re = (real_pcre *)new_malloc(true_size);
1096     regex_gotten_store = gotten_store;
1097    
1098     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1099    
1100     magic = ((real_pcre *)re)->magic_number;
1101     if (magic != MAGIC_NUMBER)
1102     {
1103     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1104     {
1105     do_flip = 1;
1106     }
1107     else
1108     {
1109     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1110     fclose(f);
1111     continue;
1112     }
1113     }
1114    
1115     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1116     do_flip? " (byte-inverted)" : "", p);
1117    
1118     /* Need to know if UTF-8 for printing data strings */
1119    
1120 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1121     use_utf8 = (get_options & PCRE_UTF8) != 0;
1122 nigel 75
1123     /* Now see if there is any following study data */
1124    
1125     if (true_study_size != 0)
1126     {
1127     pcre_study_data *psd;
1128    
1129     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1130     extra->flags = PCRE_EXTRA_STUDY_DATA;
1131    
1132     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1133     extra->study_data = psd;
1134    
1135     if (fread(psd, 1, true_study_size, f) != true_study_size)
1136     {
1137     FAIL_READ:
1138     fprintf(outfile, "Failed to read data from %s\n", p);
1139     if (extra != NULL) new_free(extra);
1140     if (re != NULL) new_free(re);
1141     fclose(f);
1142     continue;
1143     }
1144     fprintf(outfile, "Study data loaded from %s\n", p);
1145     do_study = 1; /* To get the data output if requested */
1146     }
1147     else fprintf(outfile, "No study data\n");
1148    
1149     fclose(f);
1150     goto SHOW_INFO;
1151     }
1152    
1153     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1154     the pattern; if is isn't complete, read more. */
1155    
1156 nigel 3 delimiter = *p++;
1157    
1158 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1159 nigel 3 {
1160 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1161 nigel 3 goto SKIP_DATA;
1162     }
1163    
1164     pp = p;
1165 nigel 93 poffset = p - buffer;
1166 nigel 3
1167     for(;;)
1168     {
1169 nigel 29 while (*pp != 0)
1170     {
1171     if (*pp == '\\' && pp[1] != 0) pp++;
1172     else if (*pp == delimiter) break;
1173     pp++;
1174     }
1175 nigel 3 if (*pp != 0) break;
1176 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1177 nigel 3 {
1178     fprintf(outfile, "** Unexpected EOF\n");
1179 nigel 11 done = 1;
1180     goto CONTINUE;
1181 nigel 3 }
1182 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1183 nigel 3 }
1184    
1185 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1186     pointer to the correct relative point in the buffer. */
1187    
1188     p = buffer + poffset;
1189    
1190 nigel 29 /* If the first character after the delimiter is backslash, make
1191     the pattern end with backslash. This is purely to provide a way
1192     of testing for the error message when a pattern ends with backslash. */
1193    
1194     if (pp[1] == '\\') *pp++ = '\\';
1195    
1196 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1197     for callouts. */
1198 nigel 3
1199     *pp++ = 0;
1200 nigel 75 strcpy((char *)pbuffer, (char *)p);
1201 nigel 3
1202     /* Look for options after final delimiter */
1203    
1204     options = 0;
1205     study_options = 0;
1206 nigel 31 log_store = showstore; /* default from command line */
1207    
1208 nigel 3 while (*pp != 0)
1209     {
1210     switch (*pp++)
1211     {
1212 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1213 nigel 35 case 'g': do_g = 1; break;
1214 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1215     case 'm': options |= PCRE_MULTILINE; break;
1216     case 's': options |= PCRE_DOTALL; break;
1217     case 'x': options |= PCRE_EXTENDED; break;
1218 nigel 25
1219 nigel 35 case '+': do_showrest = 1; break;
1220 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1221 nigel 93 case 'B': do_debug = 1; break;
1222 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1223 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1224 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1225 nigel 75 case 'F': do_flip = 1; break;
1226 nigel 35 case 'G': do_G = 1; break;
1227 nigel 25 case 'I': do_showinfo = 1; break;
1228 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1229 nigel 31 case 'M': log_store = 1; break;
1230 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1231 nigel 37
1232     #if !defined NOPOSIX
1233 nigel 3 case 'P': do_posix = 1; break;
1234 nigel 37 #endif
1235    
1236 nigel 3 case 'S': do_study = 1; break;
1237 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1238 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1239 ph10 126 case 'Z': debug_lengths = 0; break;
1240 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1241 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1242 nigel 25
1243     case 'L':
1244     ppp = pp;
1245 nigel 93 /* The '\r' test here is so that it works on Windows. */
1246     /* The '0' test is just in case this is an unterminated line. */
1247     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1248 nigel 25 *ppp = 0;
1249     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1250     {
1251     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1252     goto SKIP_DATA;
1253     }
1254 nigel 93 locale_set = 1;
1255 nigel 25 tables = pcre_maketables();
1256     pp = ppp;
1257     break;
1258    
1259 nigel 75 case '>':
1260     to_file = pp;
1261     while (*pp != 0) pp++;
1262     while (isspace(pp[-1])) pp--;
1263     *pp = 0;
1264     break;
1265    
1266 nigel 91 case '<':
1267     {
1268 ph10 336 if (strncmp((char *)pp, "JS>", 3) == 0)
1269     {
1270     options |= PCRE_JAVASCRIPT_COMPAT;
1271 ph10 345 pp += 3;
1272 ph10 336 }
1273     else
1274 ph10 345 {
1275 ph10 336 int x = check_newline(pp, outfile);
1276     if (x == 0) goto SKIP_DATA;
1277     options |= x;
1278     while (*pp++ != '>');
1279 ph10 345 }
1280 nigel 91 }
1281     break;
1282    
1283 nigel 77 case '\r': /* So that it works in Windows */
1284     case '\n':
1285     case ' ':
1286     break;
1287 nigel 75
1288 nigel 3 default:
1289     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1290     goto SKIP_DATA;
1291     }
1292     }
1293    
1294 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1295 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1296     local character tables. */
1297 nigel 3
1298 nigel 37 #if !defined NOPOSIX
1299 nigel 3 if (posix || do_posix)
1300     {
1301     int rc;
1302     int cflags = 0;
1303 nigel 75
1304 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1305     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1306 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1307 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1308     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1309 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1310 nigel 87
1311 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1312    
1313     /* Compilation failed; go back for another re, skipping to blank line
1314     if non-interactive. */
1315    
1316     if (rc != 0)
1317     {
1318 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1319 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1320     goto SKIP_DATA;
1321     }
1322     }
1323    
1324     /* Handle compiling via the native interface */
1325    
1326     else
1327 nigel 37 #endif /* !defined NOPOSIX */
1328    
1329 nigel 3 {
1330 ph10 412 unsigned long int get_options;
1331 ph10 416
1332 nigel 93 if (timeit > 0)
1333 nigel 3 {
1334     register int i;
1335     clock_t time_taken;
1336     clock_t start_time = clock();
1337 nigel 93 for (i = 0; i < timeit; i++)
1338 nigel 3 {
1339 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1340 nigel 3 if (re != NULL) free(re);
1341     }
1342     time_taken = clock() - start_time;
1343 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1344     (((double)time_taken * 1000.0) / (double)timeit) /
1345 nigel 63 (double)CLOCKS_PER_SEC);
1346 nigel 3 }
1347    
1348 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1349 nigel 3
1350     /* Compilation failed; go back for another re, skipping to blank line
1351     if non-interactive. */
1352    
1353     if (re == NULL)
1354     {
1355     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1356     SKIP_DATA:
1357     if (infile != stdin)
1358     {
1359     for (;;)
1360     {
1361 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1362 nigel 11 {
1363     done = 1;
1364     goto CONTINUE;
1365     }
1366 nigel 3 len = (int)strlen((char *)buffer);
1367     while (len > 0 && isspace(buffer[len-1])) len--;
1368     if (len == 0) break;
1369     }
1370     fprintf(outfile, "\n");
1371     }
1372 nigel 25 goto CONTINUE;
1373 nigel 3 }
1374 ph10 416
1375     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1376     within the regex; check for this so that we know how to process the data
1377 ph10 412 lines. */
1378 ph10 416
1379 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1380     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1381 nigel 3
1382 ph10 412 /* Print information if required. There are now two info-returning
1383     functions. The old one has a limited interface and returns only limited
1384     data. Check that it agrees with the newer one. */
1385 nigel 3
1386 nigel 63 if (log_store)
1387     fprintf(outfile, "Memory allocation (code space): %d\n",
1388     (int)(gotten_store -
1389     sizeof(real_pcre) -
1390     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1391    
1392 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1393     and remember the store that was got. */
1394    
1395     true_size = ((real_pcre *)re)->size;
1396     regex_gotten_store = gotten_store;
1397    
1398     /* If /S was present, study the regexp to generate additional info to
1399     help with the matching. */
1400    
1401     if (do_study)
1402     {
1403 nigel 93 if (timeit > 0)
1404 nigel 75 {
1405     register int i;
1406     clock_t time_taken;
1407     clock_t start_time = clock();
1408 nigel 93 for (i = 0; i < timeit; i++)
1409 nigel 75 extra = pcre_study(re, study_options, &error);
1410     time_taken = clock() - start_time;
1411     if (extra != NULL) free(extra);
1412 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1413     (((double)time_taken * 1000.0) / (double)timeit) /
1414 nigel 75 (double)CLOCKS_PER_SEC);
1415     }
1416     extra = pcre_study(re, study_options, &error);
1417     if (error != NULL)
1418     fprintf(outfile, "Failed to study: %s\n", error);
1419     else if (extra != NULL)
1420     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1421     }
1422    
1423     /* If the 'F' option was present, we flip the bytes of all the integer
1424     fields in the regex data block and the study block. This is to make it
1425     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1426     compiled on a different architecture. */
1427    
1428     if (do_flip)
1429     {
1430     real_pcre *rre = (real_pcre *)re;
1431 ph10 259 rre->magic_number =
1432 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1433 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1434     rre->options = byteflip(rre->options, sizeof(rre->options));
1435 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1436 ph10 259 rre->top_bracket =
1437 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1438 ph10 259 rre->top_backref =
1439 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1440 ph10 259 rre->first_byte =
1441 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1442 ph10 259 rre->req_byte =
1443 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1444     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1445 nigel 75 sizeof(rre->name_table_offset));
1446 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1447 nigel 75 sizeof(rre->name_entry_size));
1448 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1449 ph10 255 sizeof(rre->name_count));
1450 nigel 75
1451     if (extra != NULL)
1452     {
1453     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1454     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1455 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1456     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1457 nigel 75 }
1458     }
1459    
1460     /* Extract information from the compiled data if required */
1461    
1462     SHOW_INFO:
1463    
1464 nigel 93 if (do_debug)
1465     {
1466     fprintf(outfile, "------------------------------------------------------------------\n");
1467 ph10 116 pcre_printint(re, outfile, debug_lengths);
1468 nigel 93 }
1469 ph10 416
1470 ph10 412 /* We already have the options in get_options (see above) */
1471 nigel 93
1472 nigel 25 if (do_showinfo)
1473 nigel 3 {
1474 ph10 412 unsigned long int all_options;
1475 nigel 79 #if !defined NOINFOCHECK
1476 nigel 43 int old_first_char, old_options, old_count;
1477 nigel 79 #endif
1478 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1479 ph10 227 hascrorlf;
1480 nigel 63 int nameentrysize, namecount;
1481     const uschar *nametable;
1482 nigel 3
1483 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1484     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1485     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1486 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1487 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1488 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1489     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1490 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1491 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1492     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1493 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1494 nigel 43
1495 nigel 79 #if !defined NOINFOCHECK
1496 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1497 nigel 3 if (count < 0) fprintf(outfile,
1498 nigel 43 "Error %d from pcre_info()\n", count);
1499 nigel 3 else
1500     {
1501 nigel 43 if (old_count != count) fprintf(outfile,
1502     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1503     old_count);
1504 nigel 37
1505 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1506     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1507     first_char, old_first_char);
1508 nigel 37
1509 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1510     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1511     get_options, old_options);
1512 nigel 43 }
1513 nigel 79 #endif
1514 nigel 43
1515 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1516 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1517 nigel 77 (int)size, (int)regex_gotten_store);
1518 nigel 43
1519     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1520     if (backrefmax > 0)
1521     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1522 nigel 63
1523     if (namecount > 0)
1524     {
1525     fprintf(outfile, "Named capturing subpatterns:\n");
1526     while (namecount-- > 0)
1527     {
1528     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1529     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1530     GET2(nametable, 0));
1531     nametable += nameentrysize;
1532     }
1533     }
1534 ph10 172
1535 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1536 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1537 nigel 63
1538 nigel 75 all_options = ((real_pcre *)re)->options;
1539 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1540 nigel 75
1541 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1542 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1543 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1544     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1545     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1546     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1547 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1548 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1549 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1550     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1551 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1552     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1553     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1554 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1555 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1556 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1557     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1558 ph10 172
1559 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1560 nigel 43
1561 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1562 nigel 91 {
1563     case PCRE_NEWLINE_CR:
1564     fprintf(outfile, "Forced newline sequence: CR\n");
1565     break;
1566 nigel 43
1567 nigel 91 case PCRE_NEWLINE_LF:
1568     fprintf(outfile, "Forced newline sequence: LF\n");
1569     break;
1570    
1571     case PCRE_NEWLINE_CRLF:
1572     fprintf(outfile, "Forced newline sequence: CRLF\n");
1573     break;
1574    
1575 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1576     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1577     break;
1578    
1579 nigel 93 case PCRE_NEWLINE_ANY:
1580     fprintf(outfile, "Forced newline sequence: ANY\n");
1581     break;
1582    
1583 nigel 91 default:
1584     break;
1585     }
1586    
1587 nigel 43 if (first_char == -1)
1588     {
1589 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1590 nigel 43 }
1591     else if (first_char < 0)
1592     {
1593     fprintf(outfile, "No first char\n");
1594     }
1595     else
1596     {
1597 nigel 63 int ch = first_char & 255;
1598 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1599 nigel 63 "" : " (caseless)";
1600 nigel 93 if (PRINTHEX(ch))
1601 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1602 nigel 3 else
1603 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1604 nigel 43 }
1605 nigel 37
1606 nigel 43 if (need_char < 0)
1607     {
1608     fprintf(outfile, "No need char\n");
1609 nigel 3 }
1610 nigel 43 else
1611     {
1612 nigel 63 int ch = need_char & 255;
1613 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1614 nigel 63 "" : " (caseless)";
1615 nigel 93 if (PRINTHEX(ch))
1616 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1617 nigel 43 else
1618 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1619 nigel 43 }
1620 nigel 75
1621     /* Don't output study size; at present it is in any case a fixed
1622     value, but it varies, depending on the computer architecture, and
1623     so messes up the test suite. (And with the /F option, it might be
1624     flipped.) */
1625    
1626     if (do_study)
1627     {
1628     if (extra == NULL)
1629     fprintf(outfile, "Study returned NULL\n");
1630     else
1631     {
1632     uschar *start_bits = NULL;
1633 ph10 455 int minlength;
1634 ph10 461
1635 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1636 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1637    
1638 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1639     if (start_bits == NULL)
1640 ph10 455 fprintf(outfile, "No set of starting bytes\n");
1641 nigel 75 else
1642     {
1643     int i;
1644     int c = 24;
1645     fprintf(outfile, "Starting byte set: ");
1646     for (i = 0; i < 256; i++)
1647     {
1648     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1649     {
1650     if (c > 75)
1651     {
1652     fprintf(outfile, "\n ");
1653     c = 2;
1654     }
1655 nigel 93 if (PRINTHEX(i) && i != ' ')
1656 nigel 75 {
1657     fprintf(outfile, "%c ", i);
1658     c += 2;
1659     }
1660     else
1661     {
1662     fprintf(outfile, "\\x%02x ", i);
1663     c += 5;
1664     }
1665     }
1666     }
1667     fprintf(outfile, "\n");
1668     }
1669     }
1670     }
1671 nigel 3 }
1672    
1673 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1674     that is all. The first 8 bytes of the file are the regex length and then
1675     the study length, in big-endian order. */
1676 nigel 3
1677 nigel 75 if (to_file != NULL)
1678 nigel 3 {
1679 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1680     if (f == NULL)
1681 nigel 3 {
1682 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1683 nigel 3 }
1684 nigel 75 else
1685     {
1686     uschar sbuf[8];
1687 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1688     sbuf[1] = (uschar)((true_size >> 16) & 255);
1689     sbuf[2] = (uschar)((true_size >> 8) & 255);
1690     sbuf[3] = (uschar)((true_size) & 255);
1691 ph10 259
1692 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1693     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1694     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1695     sbuf[7] = (uschar)((true_study_size) & 255);
1696 nigel 3
1697 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1698     fwrite(re, 1, true_size, f) < true_size)
1699     {
1700     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1701     }
1702 nigel 3 else
1703     {
1704 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1705     if (extra != NULL)
1706 nigel 3 {
1707 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1708     true_study_size)
1709 nigel 3 {
1710 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1711     strerror(errno));
1712 nigel 3 }
1713 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1714 nigel 93
1715 nigel 3 }
1716     }
1717 nigel 75 fclose(f);
1718 nigel 3 }
1719 nigel 77
1720     new_free(re);
1721     if (extra != NULL) new_free(extra);
1722     if (tables != NULL) new_free((void *)tables);
1723 nigel 75 continue; /* With next regex */
1724 nigel 3 }
1725 nigel 75 } /* End of non-POSIX compile */
1726 nigel 3
1727     /* Read data lines and test them */
1728    
1729     for (;;)
1730     {
1731 nigel 87 uschar *q;
1732 ph10 147 uschar *bptr;
1733 nigel 57 int *use_offsets = offsets;
1734 nigel 53 int use_size_offsets = size_offsets;
1735 nigel 63 int callout_data = 0;
1736     int callout_data_set = 0;
1737 nigel 3 int count, c;
1738 nigel 29 int copystrings = 0;
1739 ph10 386 int find_match_limit = default_find_match_limit;
1740 nigel 29 int getstrings = 0;
1741     int getlist = 0;
1742 nigel 39 int gmatched = 0;
1743 nigel 35 int start_offset = 0;
1744 nigel 41 int g_notempty = 0;
1745 nigel 77 int use_dfa = 0;
1746 nigel 3
1747     options = 0;
1748    
1749 nigel 91 *copynames = 0;
1750     *getnames = 0;
1751    
1752     copynamesptr = copynames;
1753     getnamesptr = getnames;
1754    
1755 nigel 63 pcre_callout = callout;
1756     first_callout = 1;
1757     callout_extra = 0;
1758     callout_count = 0;
1759     callout_fail_count = 999999;
1760     callout_fail_id = -1;
1761 nigel 73 show_malloc = 0;
1762 nigel 63
1763 nigel 91 if (extra != NULL) extra->flags &=
1764     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1765    
1766     len = 0;
1767     for (;;)
1768 nigel 11 {
1769 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1770 nigel 91 {
1771     if (len > 0) break;
1772     done = 1;
1773     goto CONTINUE;
1774     }
1775     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1776     len = (int)strlen((char *)buffer);
1777     if (buffer[len-1] == '\n') break;
1778 nigel 11 }
1779 nigel 3
1780     while (len > 0 && isspace(buffer[len-1])) len--;
1781     buffer[len] = 0;
1782     if (len == 0) break;
1783    
1784     p = buffer;
1785     while (isspace(*p)) p++;
1786    
1787 ph10 147 bptr = q = dbuffer;
1788 nigel 3 while ((c = *p++) != 0)
1789     {
1790     int i = 0;
1791     int n = 0;
1792 nigel 63
1793 nigel 3 if (c == '\\') switch ((c = *p++))
1794     {
1795     case 'a': c = 7; break;
1796     case 'b': c = '\b'; break;
1797     case 'e': c = 27; break;
1798     case 'f': c = '\f'; break;
1799     case 'n': c = '\n'; break;
1800     case 'r': c = '\r'; break;
1801     case 't': c = '\t'; break;
1802     case 'v': c = '\v'; break;
1803    
1804     case '0': case '1': case '2': case '3':
1805     case '4': case '5': case '6': case '7':
1806     c -= '0';
1807     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1808     c = c * 8 + *p++ - '0';
1809 nigel 91
1810     #if !defined NOUTF8
1811     if (use_utf8 && c > 255)
1812     {
1813     unsigned char buff8[8];
1814     int ii, utn;
1815     utn = ord2utf8(c, buff8);
1816     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817     c = buff8[ii]; /* Last byte */
1818     }
1819     #endif
1820 nigel 3 break;
1821    
1822     case 'x':
1823 nigel 49
1824     /* Handle \x{..} specially - new Perl thing for utf8 */
1825    
1826 nigel 79 #if !defined NOUTF8
1827 nigel 49 if (*p == '{')
1828     {
1829     unsigned char *pt = p;
1830     c = 0;
1831     while (isxdigit(*(++pt)))
1832     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1833     if (*pt == '}')
1834     {
1835 nigel 67 unsigned char buff8[8];
1836 nigel 49 int ii, utn;
1837 ph10 355 if (use_utf8)
1838 ph10 358 {
1839 ph10 355 utn = ord2utf8(c, buff8);
1840     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1841     c = buff8[ii]; /* Last byte */
1842     }
1843     else
1844     {
1845 ph10 358 if (c > 255)
1846 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1847     "UTF-8 mode is not enabled.\n"
1848     "** Truncation will probably give the wrong result.\n", c);
1849 ph10 358 }
1850 nigel 49 p = pt + 1;
1851     break;
1852     }
1853     /* Not correct form; fall through */
1854     }
1855 nigel 79 #endif
1856 nigel 49
1857     /* Ordinary \x */
1858    
1859 nigel 3 c = 0;
1860     while (i++ < 2 && isxdigit(*p))
1861     {
1862     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1863     p++;
1864     }
1865     break;
1866    
1867 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1868 nigel 3 p--;
1869     continue;
1870    
1871 nigel 75 case '>':
1872     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1873     continue;
1874    
1875 nigel 3 case 'A': /* Option setting */
1876     options |= PCRE_ANCHORED;
1877     continue;
1878    
1879     case 'B':
1880     options |= PCRE_NOTBOL;
1881     continue;
1882    
1883 nigel 29 case 'C':
1884 nigel 63 if (isdigit(*p)) /* Set copy string */
1885     {
1886     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1887     copystrings |= 1 << n;
1888     }
1889     else if (isalnum(*p))
1890     {
1891 nigel 91 uschar *npp = copynamesptr;
1892 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1893 nigel 91 *npp++ = 0;
1894 nigel 67 *npp = 0;
1895 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1896 nigel 63 if (n < 0)
1897 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1898     copynamesptr = npp;
1899 nigel 63 }
1900     else if (*p == '+')
1901     {
1902     callout_extra = 1;
1903     p++;
1904     }
1905     else if (*p == '-')
1906     {
1907     pcre_callout = NULL;
1908     p++;
1909     }
1910     else if (*p == '!')
1911     {
1912     callout_fail_id = 0;
1913     p++;
1914     while(isdigit(*p))
1915     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1916     callout_fail_count = 0;
1917     if (*p == '!')
1918     {
1919     p++;
1920     while(isdigit(*p))
1921     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1922     }
1923     }
1924     else if (*p == '*')
1925     {
1926     int sign = 1;
1927     callout_data = 0;
1928     if (*(++p) == '-') { sign = -1; p++; }
1929     while(isdigit(*p))
1930     callout_data = callout_data * 10 + *p++ - '0';
1931     callout_data *= sign;
1932     callout_data_set = 1;
1933     }
1934 nigel 29 continue;
1935    
1936 nigel 79 #if !defined NODFA
1937 nigel 77 case 'D':
1938 nigel 79 #if !defined NOPOSIX
1939 nigel 77 if (posix || do_posix)
1940     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1941     else
1942 nigel 79 #endif
1943 nigel 77 use_dfa = 1;
1944     continue;
1945    
1946     case 'F':
1947     options |= PCRE_DFA_SHORTEST;
1948     continue;
1949 nigel 79 #endif
1950 nigel 77
1951 nigel 29 case 'G':
1952 nigel 63 if (isdigit(*p))
1953     {
1954     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1955     getstrings |= 1 << n;
1956     }
1957     else if (isalnum(*p))
1958     {
1959 nigel 91 uschar *npp = getnamesptr;
1960 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1961 nigel 91 *npp++ = 0;
1962 nigel 67 *npp = 0;
1963 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1964 nigel 63 if (n < 0)
1965 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1966     getnamesptr = npp;
1967 nigel 63 }
1968 nigel 29 continue;
1969    
1970     case 'L':
1971     getlist = 1;
1972     continue;
1973    
1974 nigel 63 case 'M':
1975     find_match_limit = 1;
1976     continue;
1977    
1978 nigel 37 case 'N':
1979 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
1980     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1981 ph10 461 else
1982 ph10 442 options |= PCRE_NOTEMPTY;
1983 nigel 37 continue;
1984    
1985 nigel 3 case 'O':
1986     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1987 nigel 53 if (n > size_offsets_max)
1988     {
1989     size_offsets_max = n;
1990 nigel 57 free(offsets);
1991 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1992 nigel 53 if (offsets == NULL)
1993     {
1994     printf("** Failed to get %d bytes of memory for offsets vector\n",
1995 ph10 151 (int)(size_offsets_max * sizeof(int)));
1996 nigel 77 yield = 1;
1997     goto EXIT;
1998 nigel 53 }
1999     }
2000     use_size_offsets = n;
2001 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2002 nigel 3 continue;
2003    
2004 nigel 75 case 'P':
2005 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2006 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2007 nigel 75 continue;
2008    
2009 nigel 91 case 'Q':
2010     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2011     if (extra == NULL)
2012     {
2013     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2014     extra->flags = 0;
2015     }
2016     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2017     extra->match_limit_recursion = n;
2018     continue;
2019    
2020     case 'q':
2021     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2022     if (extra == NULL)
2023     {
2024     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2025     extra->flags = 0;
2026     }
2027     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2028     extra->match_limit = n;
2029     continue;
2030    
2031 nigel 79 #if !defined NODFA
2032 nigel 77 case 'R':
2033     options |= PCRE_DFA_RESTART;
2034     continue;
2035 nigel 79 #endif
2036 nigel 77
2037 nigel 73 case 'S':
2038     show_malloc = 1;
2039     continue;
2040 ph10 392
2041 ph10 389 case 'Y':
2042     options |= PCRE_NO_START_OPTIMIZE;
2043 ph10 392 continue;
2044 nigel 73
2045 nigel 3 case 'Z':
2046     options |= PCRE_NOTEOL;
2047     continue;
2048 nigel 71
2049     case '?':
2050     options |= PCRE_NO_UTF8_CHECK;
2051     continue;
2052 nigel 91
2053     case '<':
2054     {
2055     int x = check_newline(p, outfile);
2056     if (x == 0) goto NEXT_DATA;
2057     options |= x;
2058     while (*p++ != '>');
2059     }
2060     continue;
2061 nigel 3 }
2062 nigel 9 *q++ = c;
2063 nigel 3 }
2064 nigel 9 *q = 0;
2065     len = q - dbuffer;
2066 ph10 371
2067 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2068 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2069 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2070 ph10 371
2071 ph10 363 #if !defined NOPOSIX
2072     if (posix || do_posix)
2073     {
2074     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2075 ph10 371 bptr += buffer_size - len - 1;
2076 ph10 363 }
2077 ph10 371 else
2078     #endif
2079 ph10 363 {
2080     memmove(bptr + buffer_size - len, bptr, len);
2081 ph10 371 bptr += buffer_size - len;
2082     }
2083 nigel 3
2084 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2085     {
2086     printf("**Match limit not relevant for DFA matching: ignored\n");
2087     find_match_limit = 0;
2088     }
2089    
2090 nigel 3 /* Handle matching via the POSIX interface, which does not
2091 nigel 63 support timing or playing with the match limit or callout data. */
2092 nigel 3
2093 nigel 37 #if !defined NOPOSIX
2094 nigel 3 if (posix || do_posix)
2095     {
2096     int rc;
2097     int eflags = 0;
2098 nigel 63 regmatch_t *pmatch = NULL;
2099     if (use_size_offsets > 0)
2100 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2101 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2102     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2103 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2104 nigel 3
2105 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2106 nigel 3
2107     if (rc != 0)
2108     {
2109 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2110 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2111     }
2112 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2113     != 0)
2114     {
2115     fprintf(outfile, "Matched with REG_NOSUB\n");
2116     }
2117 nigel 3 else
2118     {
2119 nigel 7 size_t i;
2120 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2121 nigel 3 {
2122     if (pmatch[i].rm_so >= 0)
2123     {
2124 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2125 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2126     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2127 nigel 3 fprintf(outfile, "\n");
2128 nigel 35 if (i == 0 && do_showrest)
2129     {
2130     fprintf(outfile, " 0+ ");
2131 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2132     outfile);
2133 nigel 35 fprintf(outfile, "\n");
2134     }
2135 nigel 3 }
2136     }
2137     }
2138 nigel 53 free(pmatch);
2139 nigel 3 }
2140    
2141 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2142 nigel 3
2143 nigel 37 else
2144     #endif /* !defined NOPOSIX */
2145    
2146 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2147 nigel 3 {
2148 nigel 93 if (timeitm > 0)
2149 nigel 3 {
2150     register int i;
2151     clock_t time_taken;
2152     clock_t start_time = clock();
2153 nigel 77
2154 nigel 79 #if !defined NODFA
2155 nigel 77 if (all_use_dfa || use_dfa)
2156     {
2157     int workspace[1000];
2158 nigel 93 for (i = 0; i < timeitm; i++)
2159 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2160 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2161     sizeof(workspace)/sizeof(int));
2162     }
2163     else
2164 nigel 79 #endif
2165 nigel 77
2166 nigel 93 for (i = 0; i < timeitm; i++)
2167 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2168 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2169 nigel 77
2170 nigel 3 time_taken = clock() - start_time;
2171 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2172     (((double)time_taken * 1000.0) / (double)timeitm) /
2173 nigel 63 (double)CLOCKS_PER_SEC);
2174 nigel 3 }
2175    
2176 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2177 nigel 87 varying limits in order to find the minimum value for the match limit and
2178     for the recursion limit. */
2179 nigel 63
2180     if (find_match_limit)
2181     {
2182     if (extra == NULL)
2183     {
2184 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2185 nigel 63 extra->flags = 0;
2186     }
2187    
2188 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2189 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2190     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2191     PCRE_ERROR_MATCHLIMIT, "match()");
2192 nigel 63
2193 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2194     options|g_notempty, use_offsets, use_size_offsets,
2195     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2196     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2197 nigel 63 }
2198    
2199     /* If callout_data is set, use the interface with additional data */
2200    
2201     else if (callout_data_set)
2202     {
2203     if (extra == NULL)
2204     {
2205 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2206 nigel 63 extra->flags = 0;
2207     }
2208     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2209 nigel 71 extra->callout_data = &callout_data;
2210 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2211     options | g_notempty, use_offsets, use_size_offsets);
2212     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2213     }
2214    
2215     /* The normal case is just to do the match once, with the default
2216     value of match_limit. */
2217    
2218 nigel 79 #if !defined NODFA
2219 nigel 77 else if (all_use_dfa || use_dfa)
2220     {
2221     int workspace[1000];
2222 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2223 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2224     sizeof(workspace)/sizeof(int));
2225     if (count == 0)
2226     {
2227     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2228     count = use_size_offsets/2;
2229     }
2230     }
2231 nigel 79 #endif
2232 nigel 77
2233 nigel 75 else
2234     {
2235     count = pcre_exec(re, extra, (char *)bptr, len,
2236     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2237 nigel 77 if (count == 0)
2238     {
2239     fprintf(outfile, "Matched, but too many substrings\n");
2240     count = use_size_offsets/3;
2241     }
2242 nigel 75 }
2243 nigel 3
2244 nigel 39 /* Matched */
2245    
2246 nigel 3 if (count >= 0)
2247     {
2248 nigel 93 int i, maxcount;
2249    
2250     #if !defined NODFA
2251     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2252     #endif
2253     maxcount = use_size_offsets/3;
2254    
2255     /* This is a check against a lunatic return value. */
2256    
2257     if (count > maxcount)
2258     {
2259     fprintf(outfile,
2260     "** PCRE error: returned count %d is too big for offset size %d\n",
2261     count, use_size_offsets);
2262     count = use_size_offsets/3;
2263     if (do_g || do_G)
2264     {
2265     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2266     do_g = do_G = FALSE; /* Break g/G loop */
2267     }
2268     }
2269    
2270 nigel 29 for (i = 0; i < count * 2; i += 2)
2271 nigel 3 {
2272 nigel 57 if (use_offsets[i] < 0)
2273 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2274     else
2275     {
2276     fprintf(outfile, "%2d: ", i/2);
2277 nigel 63 (void)pchars(bptr + use_offsets[i],
2278     use_offsets[i+1] - use_offsets[i], outfile);
2279 nigel 3 fprintf(outfile, "\n");
2280 nigel 35 if (i == 0)
2281     {
2282     if (do_showrest)
2283     {
2284     fprintf(outfile, " 0+ ");
2285 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2286     outfile);
2287 nigel 35 fprintf(outfile, "\n");
2288     }
2289     }
2290 nigel 3 }
2291     }
2292 nigel 29
2293     for (i = 0; i < 32; i++)
2294     {
2295     if ((copystrings & (1 << i)) != 0)
2296     {
2297 nigel 91 char copybuffer[256];
2298 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2299 nigel 37 i, copybuffer, sizeof(copybuffer));
2300 nigel 29 if (rc < 0)
2301     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2302     else
2303 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2304 nigel 29 }
2305     }
2306    
2307 nigel 91 for (copynamesptr = copynames;
2308     *copynamesptr != 0;
2309     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2310     {
2311     char copybuffer[256];
2312     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2313     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2314     if (rc < 0)
2315     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2316     else
2317     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2318     }
2319    
2320 nigel 29 for (i = 0; i < 32; i++)
2321     {
2322     if ((getstrings & (1 << i)) != 0)
2323     {
2324     const char *substring;
2325 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2326 nigel 29 i, &substring);
2327     if (rc < 0)
2328     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2329     else
2330     {
2331     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2332 nigel 49 pcre_free_substring(substring);
2333 nigel 29 }
2334     }
2335     }
2336    
2337 nigel 91 for (getnamesptr = getnames;
2338     *getnamesptr != 0;
2339     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2340     {
2341     const char *substring;
2342     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2343     count, (char *)getnamesptr, &substring);
2344     if (rc < 0)
2345     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2346     else
2347     {
2348     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2349     pcre_free_substring(substring);
2350     }
2351     }
2352    
2353 nigel 29 if (getlist)
2354     {
2355     const char **stringlist;
2356 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2357 nigel 29 &stringlist);
2358     if (rc < 0)
2359     fprintf(outfile, "get substring list failed %d\n", rc);
2360     else
2361     {
2362     for (i = 0; i < count; i++)
2363     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2364     if (stringlist[i] != NULL)
2365     fprintf(outfile, "string list not terminated by NULL\n");
2366 nigel 49 /* free((void *)stringlist); */
2367     pcre_free_substring_list(stringlist);
2368 nigel 29 }
2369     }
2370 nigel 39 }
2371 nigel 29
2372 nigel 75 /* There was a partial match */
2373    
2374     else if (count == PCRE_ERROR_PARTIAL)
2375     {
2376 nigel 77 fprintf(outfile, "Partial match");
2377 ph10 426 if (use_size_offsets > 1)
2378     {
2379     fprintf(outfile, ": ");
2380     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2381 ph10 461 outfile);
2382     }
2383 nigel 77 fprintf(outfile, "\n");
2384 nigel 75 break; /* Out of the /g loop */
2385     }
2386    
2387 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2388 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2389     to advance the start offset, and continue. We won't be at the end of the
2390     string - that was checked before setting g_notempty.
2391 nigel 39
2392 ph10 150 Complication arises in the case when the newline option is "any" or
2393 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2394     CRLF, an advance of one character just passes the \r, whereas we should
2395     prefer the longer newline sequence, as does the code in pcre_exec().
2396     Fudge the offset value to achieve this.
2397 ph10 144
2398 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2399     character, not one byte. */
2400    
2401 nigel 3 else
2402     {
2403 nigel 41 if (g_notempty != 0)
2404 nigel 35 {
2405 nigel 73 int onechar = 1;
2406 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2407 nigel 57 use_offsets[0] = start_offset;
2408 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2409     {
2410     int d;
2411     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2412 ph10 391 /* Note that these values are always the ASCII ones, even in
2413     EBCDIC environments. CR = 13, NL = 10. */
2414     obits = (d == 13)? PCRE_NEWLINE_CR :
2415     (d == 10)? PCRE_NEWLINE_LF :
2416     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2417 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2418 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2419     }
2420 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2421 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2422 ph10 149 &&
2423 ph10 143 start_offset < len - 1 &&
2424     bptr[start_offset] == '\r' &&
2425     bptr[start_offset+1] == '\n')
2426 ph10 144 onechar++;
2427 ph10 143 else if (use_utf8)
2428 nigel 73 {
2429     while (start_offset + onechar < len)
2430     {
2431     int tb = bptr[start_offset+onechar];
2432     if (tb <= 127) break;
2433     tb &= 0xc0;
2434     if (tb != 0 && tb != 0xc0) onechar++;
2435     }
2436     }
2437     use_offsets[1] = start_offset + onechar;
2438 nigel 35 }
2439 nigel 41 else
2440     {
2441 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2442 nigel 41 {
2443 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2444 nigel 41 }
2445 nigel 73 else fprintf(outfile, "Error %d\n", count);
2446 nigel 41 break; /* Out of the /g loop */
2447     }
2448 nigel 3 }
2449 nigel 35
2450 nigel 39 /* If not /g or /G we are done */
2451    
2452     if (!do_g && !do_G) break;
2453    
2454 nigel 41 /* If we have matched an empty string, first check to see if we are at
2455 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2456     Perl's /g options does. This turns out to be rather cunning. First we set
2457     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2458 nigel 47 same point. If this fails (picked up above) we advance to the next
2459 ph10 143 character. */
2460 ph10 142
2461 nigel 41 g_notempty = 0;
2462 ph10 142
2463 nigel 57 if (use_offsets[0] == use_offsets[1])
2464 nigel 41 {
2465 nigel 57 if (use_offsets[0] == len) break;
2466 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2467 nigel 41 }
2468 nigel 39
2469     /* For /g, update the start offset, leaving the rest alone */
2470    
2471 ph10 143 if (do_g) start_offset = use_offsets[1];
2472 nigel 39
2473     /* For /G, update the pointer and length */
2474    
2475     else
2476 nigel 35 {
2477 ph10 143 bptr += use_offsets[1];
2478     len -= use_offsets[1];
2479 nigel 35 }
2480 nigel 39 } /* End of loop for /g and /G */
2481 nigel 91
2482     NEXT_DATA: continue;
2483 nigel 39 } /* End of loop for data lines */
2484 nigel 3
2485 nigel 11 CONTINUE:
2486 nigel 37
2487     #if !defined NOPOSIX
2488 nigel 3 if (posix || do_posix) regfree(&preg);
2489 nigel 37 #endif
2490    
2491 nigel 77 if (re != NULL) new_free(re);
2492     if (extra != NULL) new_free(extra);
2493 nigel 25 if (tables != NULL)
2494     {
2495 nigel 77 new_free((void *)tables);
2496 nigel 25 setlocale(LC_CTYPE, "C");
2497 nigel 93 locale_set = 0;
2498 nigel 25 }
2499 nigel 3 }
2500    
2501 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2502 nigel 77
2503     EXIT:
2504    
2505     if (infile != NULL && infile != stdin) fclose(infile);
2506     if (outfile != NULL && outfile != stdout) fclose(outfile);
2507    
2508     free(buffer);
2509     free(dbuffer);
2510     free(pbuffer);
2511     free(offsets);
2512    
2513     return yield;
2514 nigel 3 }
2515    
2516 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12