/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 518 - (hide annotations) (download)
Tue May 18 15:47:01 2010 UTC (4 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 75000 byte(s)
Added PCRE_UCP and related stuff to make \w etc use Unicode properties.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 nigel 93 #else
83     #include <sys/time.h> /* These two includes are needed */
84     #include <sys/resource.h> /* for setrlimit(). */
85     #define INPUT_MODE "rb"
86     #define OUTPUT_MODE "wb"
87 nigel 91 #endif
88    
89 nigel 93
90 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
91     displaying the results of pcre_study() and we also need to know about the
92     internal macros, structures, and other internal data values; pcretest has
93     "inside information" compared to a program that strictly follows the PCRE API.
94 nigel 37
95 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97     appropriately for an application, not for building PCRE. */
98 nigel 77
99 ph10 145 #include "pcre.h"
100 nigel 77 #include "pcre_internal.h"
101    
102 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
103     to keep two copies, we include the source file here, changing the names of the
104     external symbols to prevent clashes. */
105 nigel 77
106 ph10 351 #define _pcre_ucp_gentype ucp_gentype
107 nigel 85 #define _pcre_utf8_table1 utf8_table1
108     #define _pcre_utf8_table1_size utf8_table1_size
109     #define _pcre_utf8_table2 utf8_table2
110     #define _pcre_utf8_table3 utf8_table3
111     #define _pcre_utf8_table4 utf8_table4
112     #define _pcre_utt utt
113     #define _pcre_utt_size utt_size
114 ph10 240 #define _pcre_utt_names utt_names
115 nigel 85 #define _pcre_OP_lengths OP_lengths
116    
117     #include "pcre_tables.c"
118    
119     /* We also need the pcre_printint() function for printing out compiled
120     patterns. This function is in a separate file so that it can be included in
121 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 ph10 498 know which case is being compiled. */
123 nigel 85
124 ph10 498 #define COMPILING_PCRETEST
125     #include "pcre_printint.src"
126    
127     /* The definition of the macro PRINTABLE, which determines whether to print an
128 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
129 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
130     locale has not been explicitly changed, so as to get consistent output from
131     systems that differ in their output from isprint() even in the "C" locale. */
132 nigel 93
133     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134 nigel 85
135 nigel 37 /* It is possible to compile this test program without including support for
136     testing the POSIX interface, though this is not available via the standard
137     Makefile. */
138    
139     #if !defined NOPOSIX
140 nigel 3 #include "pcreposix.h"
141 nigel 37 #endif
142 nigel 3
143 ph10 107 /* It is also possible, for the benefit of the version currently imported into
144     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145     interface to the DFA matcher (NODFA), and without the doublecheck of the old
146     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147     UTF8 support if PCRE is built without it. */
148 nigel 79
149 ph10 107 #ifndef SUPPORT_UTF8
150     #ifndef NOUTF8
151     #define NOUTF8
152     #endif
153     #endif
154 nigel 79
155 ph10 107
156 nigel 85 /* Other parameters */
157    
158 nigel 3 #ifndef CLOCKS_PER_SEC
159     #ifdef CLK_TCK
160     #define CLOCKS_PER_SEC CLK_TCK
161     #else
162     #define CLOCKS_PER_SEC 100
163     #endif
164     #endif
165    
166 nigel 93 /* This is the default loop count for timing. */
167    
168 nigel 75 #define LOOPREPEAT 500000
169 nigel 3
170 nigel 85 /* Static variables */
171    
172 nigel 3 static FILE *outfile;
173     static int log_store = 0;
174 nigel 63 static int callout_count;
175     static int callout_extra;
176     static int callout_fail_count;
177     static int callout_fail_id;
178 ph10 210 static int debug_lengths;
179 nigel 63 static int first_callout;
180 nigel 93 static int locale_set = 0;
181 nigel 73 static int show_malloc;
182 nigel 67 static int use_utf8;
183 nigel 43 static size_t gotten_store;
184 nigel 3
185 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
186    
187     static int buffer_size = 50000;
188     static uschar *buffer = NULL;
189     static uschar *dbuffer = NULL;
190 nigel 75 static uschar *pbuffer = NULL;
191 nigel 3
192 nigel 75
193 nigel 49
194     /*************************************************
195 nigel 91 * Read or extend an input line *
196     *************************************************/
197    
198     /* Input lines are read into buffer, but both patterns and data lines can be
199     continued over multiple input lines. In addition, if the buffer fills up, we
200     want to automatically expand it so as to be able to handle extremely large
201     lines that are needed for certain stress tests. When the input buffer is
202     expanded, the other two buffers must also be expanded likewise, and the
203     contents of pbuffer, which are a copy of the input for callouts, must be
204     preserved (for when expansion happens for a data line). This is not the most
205     optimal way of handling this, but hey, this is just a test program!
206    
207     Arguments:
208     f the file to read
209     start where in buffer to start (this *must* be within buffer)
210 ph10 287 prompt for stdin or readline()
211 nigel 91
212     Returns: pointer to the start of new data
213     could be a copy of start, or could be moved
214     NULL if no data read and EOF reached
215     */
216    
217     static uschar *
218 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 nigel 91 {
220     uschar *here = start;
221    
222     for (;;)
223     {
224     int rlen = buffer_size - (here - buffer);
225 nigel 93
226 nigel 91 if (rlen > 1000)
227     {
228     int dlen;
229 ph10 289
230 ph10 287 /* If libreadline support is required, use readline() to read a line if the
231     input is a terminal. Note that readline() removes the trailing newline, so
232     we must put it back again, to be compatible with fgets(). */
233 ph10 289
234 ph10 287 #ifdef SUPPORT_LIBREADLINE
235     if (isatty(fileno(f)))
236     {
237 ph10 289 size_t len;
238 ph10 287 char *s = readline(prompt);
239     if (s == NULL) return (here == start)? NULL : start;
240     len = strlen(s);
241 ph10 289 if (len > 0) add_history(s);
242 ph10 287 if (len > rlen - 1) len = rlen - 1;
243     memcpy(here, s, len);
244     here[len] = '\n';
245 ph10 289 here[len+1] = 0;
246     free(s);
247 ph10 287 }
248 ph10 289 else
249     #endif
250    
251 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
252 ph10 289
253 ph10 287 {
254 ph10 516 if (f == stdin) printf("%s", prompt);
255 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
256     return (here == start)? NULL : start;
257 ph10 289 }
258    
259 nigel 91 dlen = (int)strlen((char *)here);
260     if (dlen > 0 && here[dlen - 1] == '\n') return start;
261     here += dlen;
262     }
263    
264     else
265     {
266     int new_buffer_size = 2*buffer_size;
267     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270    
271     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272     {
273     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274     exit(1);
275     }
276    
277     memcpy(new_buffer, buffer, buffer_size);
278     memcpy(new_pbuffer, pbuffer, buffer_size);
279    
280     buffer_size = new_buffer_size;
281    
282     start = new_buffer + (start - buffer);
283     here = new_buffer + (here - buffer);
284    
285     free(buffer);
286     free(dbuffer);
287     free(pbuffer);
288    
289     buffer = new_buffer;
290     dbuffer = new_dbuffer;
291     pbuffer = new_pbuffer;
292     }
293     }
294    
295     return NULL; /* Control never gets here */
296     }
297    
298    
299    
300    
301    
302    
303    
304     /*************************************************
305 nigel 63 * Read number from string *
306     *************************************************/
307    
308     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309     around with conditional compilation, just do the job by hand. It is only used
310 nigel 93 for unpicking arguments, so just keep it simple.
311 nigel 63
312     Arguments:
313     str string to be converted
314     endptr where to put the end pointer
315    
316     Returns: the unsigned long
317     */
318    
319     static int
320     get_value(unsigned char *str, unsigned char **endptr)
321     {
322     int result = 0;
323     while(*str != 0 && isspace(*str)) str++;
324     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325     *endptr = str;
326     return(result);
327     }
328    
329    
330    
331 nigel 49
332     /*************************************************
333     * Convert UTF-8 string to value *
334     *************************************************/
335    
336     /* This function takes one or more bytes that represents a UTF-8 character,
337     and returns the value of the character.
338    
339     Argument:
340 nigel 91 utf8bytes a pointer to the byte vector
341     vptr a pointer to an int to receive the value
342 nigel 49
343 nigel 91 Returns: > 0 => the number of bytes consumed
344     -6 to 0 => malformed UTF-8 character at offset = (-return)
345 nigel 49 */
346    
347 nigel 79 #if !defined NOUTF8
348    
349 nigel 67 static int
350 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
351 nigel 49 {
352 nigel 91 int c = *utf8bytes++;
353 nigel 49 int d = c;
354     int i, j, s;
355    
356     for (i = -1; i < 6; i++) /* i is number of additional bytes */
357     {
358     if ((d & 0x80) == 0) break;
359     d <<= 1;
360     }
361    
362     if (i == -1) { *vptr = c; return 1; } /* ascii character */
363     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364    
365     /* i now has a value in the range 1-5 */
366    
367 nigel 59 s = 6*i;
368 nigel 85 d = (c & utf8_table3[i]) << s;
369 nigel 49
370     for (j = 0; j < i; j++)
371     {
372 nigel 91 c = *utf8bytes++;
373 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
374 nigel 59 s -= 6;
375 nigel 49 d |= (c & 0x3f) << s;
376     }
377    
378     /* Check that encoding was the correct unique one */
379    
380 nigel 85 for (j = 0; j < utf8_table1_size; j++)
381     if (d <= utf8_table1[j]) break;
382 nigel 49 if (j != i) return -(i+1);
383    
384     /* Valid value */
385    
386     *vptr = d;
387     return i+1;
388     }
389    
390 nigel 79 #endif
391 nigel 49
392    
393 nigel 79
394 nigel 63 /*************************************************
395 nigel 85 * Convert character value to UTF-8 *
396     *************************************************/
397    
398     /* This function takes an integer value in the range 0 - 0x7fffffff
399     and encodes it as a UTF-8 character in 0 to 6 bytes.
400    
401     Arguments:
402     cvalue the character value
403 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
404 nigel 85
405     Returns: number of characters placed in the buffer
406     */
407    
408 nigel 93 #if !defined NOUTF8
409    
410 nigel 85 static int
411 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
412 nigel 85 {
413     register int i, j;
414     for (i = 0; i < utf8_table1_size; i++)
415     if (cvalue <= utf8_table1[i]) break;
416 nigel 91 utf8bytes += i;
417 nigel 85 for (j = i; j > 0; j--)
418     {
419 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 nigel 85 cvalue >>= 6;
421     }
422 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
423 nigel 85 return i + 1;
424     }
425    
426 nigel 93 #endif
427 nigel 85
428    
429 nigel 93
430 nigel 85 /*************************************************
431 nigel 63 * Print character string *
432     *************************************************/
433 nigel 49
434 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
435     mode. Yields number of characters printed. If handed a NULL file, just counts
436     chars without printing. */
437 nigel 49
438 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
439 nigel 3 {
440 nigel 85 int c = 0;
441 nigel 63 int yield = 0;
442 nigel 3
443 nigel 63 while (length-- > 0)
444 nigel 3 {
445 nigel 79 #if !defined NOUTF8
446 nigel 67 if (use_utf8)
447 nigel 63 {
448     int rc = utf82ord(p, &c);
449 nigel 3
450 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451     {
452     length -= rc - 1;
453     p += rc;
454 nigel 93 if (PRINTHEX(c))
455 nigel 63 {
456     if (f != NULL) fprintf(f, "%c", c);
457     yield++;
458     }
459     else
460     {
461 nigel 93 int n = 4;
462     if (f != NULL) fprintf(f, "\\x{%02x}", c);
463     yield += (n <= 0x000000ff)? 2 :
464     (n <= 0x00000fff)? 3 :
465     (n <= 0x0000ffff)? 4 :
466     (n <= 0x000fffff)? 5 : 6;
467 nigel 63 }
468     continue;
469     }
470     }
471 nigel 79 #endif
472 nigel 3
473 nigel 63 /* Not UTF-8, or malformed UTF-8 */
474    
475 nigel 93 c = *p++;
476     if (PRINTHEX(c))
477 nigel 3 {
478 nigel 63 if (f != NULL) fprintf(f, "%c", c);
479     yield++;
480 nigel 3 }
481 nigel 63 else
482 nigel 3 {
483 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
484     yield += 4;
485     }
486     }
487 nigel 3
488 nigel 63 return yield;
489     }
490 nigel 23
491 nigel 3
492 nigel 23
493 nigel 63 /*************************************************
494     * Callout function *
495     *************************************************/
496 nigel 3
497 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498     the match. Yield zero unless more callouts than the fail count, or the callout
499     data is not zero. */
500 nigel 3
501 nigel 63 static int callout(pcre_callout_block *cb)
502     {
503     FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 nigel 75 int i, pre_start, post_start, subject_length;
505 nigel 3
506 nigel 63 if (callout_extra)
507     {
508     fprintf(f, "Callout %d: last capture = %d\n",
509     cb->callout_number, cb->capture_last);
510 nigel 3
511 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
512     {
513     if (cb->offset_vector[i] < 0)
514     fprintf(f, "%2d: <unset>\n", i/2);
515     else
516     {
517     fprintf(f, "%2d: ", i/2);
518     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519     cb->offset_vector[i+1] - cb->offset_vector[i], f);
520     fprintf(f, "\n");
521     }
522     }
523     }
524 nigel 3
525 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
526     datails. On subsequent calls in the same match, we use pchars just to find the
527     printed lengths of the substrings. */
528 nigel 3
529 nigel 63 if (f != NULL) fprintf(f, "--->");
530 nigel 3
531 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533     cb->current_position - cb->start_match, f);
534 nigel 3
535 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536    
537 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538     cb->subject_length - cb->current_position, f);
539 nigel 3
540 nigel 63 if (f != NULL) fprintf(f, "\n");
541 nigel 9
542 nigel 63 /* Always print appropriate indicators, with callout number if not already
543 nigel 75 shown. For automatic callouts, show the pattern offset. */
544 nigel 3
545 nigel 75 if (cb->callout_number == 255)
546     {
547     fprintf(outfile, "%+3d ", cb->pattern_position);
548     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549     }
550     else
551     {
552     if (callout_extra) fprintf(outfile, " ");
553     else fprintf(outfile, "%3d ", cb->callout_number);
554     }
555 nigel 3
556 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557     fprintf(outfile, "^");
558 nigel 3
559 nigel 63 if (post_start > 0)
560     {
561     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562     fprintf(outfile, "^");
563 nigel 3 }
564    
565 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566     fprintf(outfile, " ");
567    
568     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569     pbuffer + cb->pattern_position);
570    
571 nigel 63 fprintf(outfile, "\n");
572     first_callout = 0;
573 nigel 3
574 nigel 71 if (cb->callout_data != NULL)
575 nigel 49 {
576 nigel 71 int callout_data = *((int *)(cb->callout_data));
577     if (callout_data != 0)
578     {
579     fprintf(outfile, "Callout data = %d\n", callout_data);
580     return callout_data;
581     }
582 nigel 63 }
583 nigel 49
584 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
585     (++callout_count >= callout_fail_count)? 1 : 0;
586 nigel 3 }
587    
588    
589 nigel 63 /*************************************************
590 nigel 73 * Local malloc functions *
591 nigel 63 *************************************************/
592 nigel 3
593     /* Alternative malloc function, to test functionality and show the size of the
594     compiled re. */
595    
596     static void *new_malloc(size_t size)
597     {
598 nigel 73 void *block = malloc(size);
599 nigel 43 gotten_store = size;
600 nigel 73 if (show_malloc)
601 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 nigel 73 return block;
603 nigel 3 }
604    
605 nigel 73 static void new_free(void *block)
606     {
607     if (show_malloc)
608     fprintf(outfile, "free %p\n", block);
609     free(block);
610     }
611 nigel 3
612    
613 nigel 73 /* For recursion malloc/free, to test stacking calls */
614    
615     static void *stack_malloc(size_t size)
616     {
617     void *block = malloc(size);
618     if (show_malloc)
619 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 nigel 73 return block;
621     }
622    
623     static void stack_free(void *block)
624     {
625     if (show_malloc)
626     fprintf(outfile, "stack_free %p\n", block);
627     free(block);
628     }
629    
630    
631 nigel 63 /*************************************************
632     * Call pcre_fullinfo() *
633     *************************************************/
634 nigel 43
635     /* Get one piece of information from the pcre_fullinfo() function */
636    
637     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638     {
639     int rc;
640     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642     }
643    
644    
645    
646 nigel 63 /*************************************************
647 nigel 75 * Byte flipping function *
648     *************************************************/
649    
650 nigel 91 static unsigned long int
651     byteflip(unsigned long int value, int n)
652 nigel 75 {
653     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654     return ((value & 0x000000ff) << 24) |
655     ((value & 0x0000ff00) << 8) |
656     ((value & 0x00ff0000) >> 8) |
657     ((value & 0xff000000) >> 24);
658     }
659    
660    
661    
662    
663     /*************************************************
664 nigel 87 * Check match or recursion limit *
665     *************************************************/
666    
667     static int
668     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669     int start_offset, int options, int *use_offsets, int use_size_offsets,
670     int flag, unsigned long int *limit, int errnumber, const char *msg)
671     {
672     int count;
673     int min = 0;
674     int mid = 64;
675     int max = -1;
676    
677     extra->flags |= flag;
678    
679     for (;;)
680     {
681     *limit = mid;
682    
683     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684     use_offsets, use_size_offsets);
685    
686     if (count == errnumber)
687     {
688     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689     min = mid;
690     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691     }
692    
693     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694     count == PCRE_ERROR_PARTIAL)
695     {
696     if (mid == min + 1)
697     {
698     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699     break;
700     }
701     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702     max = mid;
703     mid = (min + mid)/2;
704     }
705     else break; /* Some other error */
706     }
707    
708     extra->flags &= ~flag;
709     return count;
710     }
711    
712    
713    
714     /*************************************************
715 ph10 227 * Case-independent strncmp() function *
716     *************************************************/
717    
718     /*
719     Arguments:
720     s first string
721     t second string
722     n number of characters to compare
723    
724     Returns: < 0, = 0, or > 0, according to the comparison
725     */
726    
727     static int
728     strncmpic(uschar *s, uschar *t, int n)
729     {
730     while (n--)
731     {
732     int c = tolower(*s++) - tolower(*t++);
733     if (c) return c;
734     }
735     return 0;
736     }
737    
738    
739    
740     /*************************************************
741 nigel 91 * Check newline indicator *
742     *************************************************/
743    
744 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
745     a message and return 0 if there is no match.
746 nigel 91
747     Arguments:
748     p points after the leading '<'
749     f file for error message
750    
751     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752     */
753    
754     static int
755     check_newline(uschar *p, FILE *f)
756     {
757 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
765     return 0;
766     }
767    
768    
769    
770     /*************************************************
771 nigel 93 * Usage function *
772     *************************************************/
773    
774     static void
775     usage(void)
776     {
777 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778     printf("Input and output default to stdin and stdout.\n");
779     #ifdef SUPPORT_LIBREADLINE
780     printf("If input is a terminal, readline() is used to read from it.\n");
781     #else
782     printf("This version of pcretest is not linked with readline().\n");
783     #endif
784     printf("\nOptions:\n");
785 nigel 93 printf(" -b show compiled code (bytecode)\n");
786     printf(" -C show PCRE compile-time options and exit\n");
787     printf(" -d debug: show compiled code and information (-b and -i)\n");
788     #if !defined NODFA
789     printf(" -dfa force DFA matching for all subjects\n");
790     #endif
791     printf(" -help show usage information\n");
792     printf(" -i show information about compiled patterns\n"
793 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
794 nigel 93 " -m output memory used information\n"
795     " -o <n> set size of offsets vector to <n>\n");
796     #if !defined NOPOSIX
797     printf(" -p use POSIX interface\n");
798     #endif
799     printf(" -q quiet: do not output PCRE version number at start\n");
800     printf(" -S <n> set stack size to <n> megabytes\n");
801     printf(" -s output store (memory) used information\n"
802     " -t time compilation and execution\n");
803     printf(" -t <n> time compilation and execution, repeating <n> times\n");
804     printf(" -tm time execution (matching) only\n");
805     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806     }
807    
808    
809    
810     /*************************************************
811 nigel 63 * Main Program *
812     *************************************************/
813 nigel 43
814 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
815     consist of a regular expression, in delimiters and optionally followed by
816     options, followed by a set of test data, terminated by an empty line. */
817    
818     int main(int argc, char **argv)
819     {
820     FILE *infile = stdin;
821     int options = 0;
822     int study_options = 0;
823 ph10 386 int default_find_match_limit = FALSE;
824 nigel 3 int op = 1;
825     int timeit = 0;
826 nigel 93 int timeitm = 0;
827 nigel 3 int showinfo = 0;
828 nigel 31 int showstore = 0;
829 nigel 87 int quiet = 0;
830 nigel 53 int size_offsets = 45;
831     int size_offsets_max;
832 nigel 77 int *offsets = NULL;
833 nigel 53 #if !defined NOPOSIX
834 nigel 3 int posix = 0;
835 nigel 53 #endif
836 nigel 3 int debug = 0;
837 nigel 11 int done = 0;
838 nigel 77 int all_use_dfa = 0;
839     int yield = 0;
840 nigel 91 int stack_size;
841 nigel 3
842 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
843     that 1024 is plenty long enough for the few names we'll be testing. */
844 nigel 69
845 nigel 91 uschar copynames[1024];
846     uschar getnames[1024];
847    
848     uschar *copynamesptr;
849     uschar *getnamesptr;
850    
851 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
853 nigel 69
854 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
855     dbuffer = (unsigned char *)malloc(buffer_size);
856     pbuffer = (unsigned char *)malloc(buffer_size);
857 nigel 69
858 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
859 nigel 3
860 nigel 93 outfile = stdout;
861    
862     /* The following _setmode() stuff is some Windows magic that tells its runtime
863     library to translate CRLF into a single LF character. At least, that's what
864     I've been told: never having used Windows I take this all on trust. Originally
865     it set 0x8000, but then I was advised that _O_BINARY was better. */
866    
867 nigel 75 #if defined(_WIN32) || defined(WIN32)
868 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
869     #endif
870 nigel 75
871 nigel 3 /* Scan options */
872    
873     while (argc > 1 && argv[op][0] == '-')
874     {
875 nigel 63 unsigned char *endptr;
876 nigel 53
877 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878     showstore = 1;
879 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 nigel 79 #if !defined NODFA
885 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 nigel 79 #endif
887 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889     *endptr == 0))
890 nigel 53 {
891     op++;
892     argc--;
893     }
894 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895     {
896     int both = argv[op][2] == 0;
897     int temp;
898     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899     *endptr == 0))
900     {
901     timeitm = temp;
902     op++;
903     argc--;
904     }
905     else timeitm = LOOPREPEAT;
906     if (both) timeit = timeitm;
907     }
908 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910     *endptr == 0))
911     {
912 nigel 93 #if defined(_WIN32) || defined(WIN32)
913 nigel 91 printf("PCRE: -S not supported on this OS\n");
914     exit(1);
915     #else
916     int rc;
917     struct rlimit rlim;
918     getrlimit(RLIMIT_STACK, &rlim);
919     rlim.rlim_cur = stack_size * 1024 * 1024;
920     rc = setrlimit(RLIMIT_STACK, &rlim);
921     if (rc != 0)
922     {
923     printf("PCRE: setrlimit() failed with error %d\n", rc);
924     exit(1);
925     }
926     op++;
927     argc--;
928     #endif
929     }
930 nigel 53 #if !defined NOPOSIX
931 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 nigel 53 #endif
933 nigel 63 else if (strcmp(argv[op], "-C") == 0)
934     {
935     int rc;
936 ph10 392 unsigned long int lrc;
937 nigel 63 printf("PCRE version %s\n", pcre_version());
938     printf("Compiled with\n");
939     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940     printf(" %sUTF-8 support\n", rc? "" : "No ");
941 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942     printf(" %sUnicode properties support\n", rc? "" : "No ");
943 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 ph10 391 /* Note that these values are always the ASCII values, even
945 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
946 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 ph10 150 (rc == -2)? "ANYCRLF" :
949 nigel 93 (rc == -1)? "ANY" : "???");
950 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952     "all Unicode newlines");
953 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954     printf(" Internal link size = %d\n", rc);
955     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956     printf(" POSIX malloc threshold = %d\n", rc);
957 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958     printf(" Default match limit = %ld\n", lrc);
959     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960     printf(" Default recursion depth limit = %ld\n", lrc);
961 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 ph10 121 goto EXIT;
964 nigel 63 }
965 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
966     strcmp(argv[op], "--help") == 0)
967     {
968     usage();
969     goto EXIT;
970     }
971 nigel 3 else
972     {
973 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
974 nigel 93 usage();
975 nigel 77 yield = 1;
976     goto EXIT;
977 nigel 3 }
978     op++;
979     argc--;
980     }
981    
982 nigel 53 /* Get the store for the offsets vector, and remember what it was */
983    
984     size_offsets_max = size_offsets;
985 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 nigel 53 if (offsets == NULL)
987     {
988     printf("** Failed to get %d bytes of memory for offsets vector\n",
989 ph10 151 (int)(size_offsets_max * sizeof(int)));
990 nigel 77 yield = 1;
991     goto EXIT;
992 nigel 53 }
993    
994 nigel 3 /* Sort out the input and output files */
995    
996     if (argc > 1)
997     {
998 nigel 93 infile = fopen(argv[op], INPUT_MODE);
999 nigel 3 if (infile == NULL)
1000     {
1001     printf("** Failed to open %s\n", argv[op]);
1002 nigel 77 yield = 1;
1003     goto EXIT;
1004 nigel 3 }
1005     }
1006    
1007     if (argc > 2)
1008     {
1009 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 nigel 3 if (outfile == NULL)
1011     {
1012     printf("** Failed to open %s\n", argv[op+1]);
1013 nigel 77 yield = 1;
1014     goto EXIT;
1015 nigel 3 }
1016     }
1017    
1018     /* Set alternative malloc function */
1019    
1020     pcre_malloc = new_malloc;
1021 nigel 73 pcre_free = new_free;
1022     pcre_stack_malloc = stack_malloc;
1023     pcre_stack_free = stack_free;
1024 nigel 3
1025 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1026 nigel 3
1027 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028 nigel 3
1029     /* Main loop */
1030    
1031 nigel 11 while (!done)
1032 nigel 3 {
1033     pcre *re = NULL;
1034     pcre_extra *extra = NULL;
1035 nigel 37
1036     #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 nigel 3 regex_t preg;
1038 nigel 45 int do_posix = 0;
1039 nigel 37 #endif
1040    
1041 nigel 7 const char *error;
1042 ph10 512 unsigned char *markptr;
1043 nigel 25 unsigned char *p, *pp, *ppp;
1044 nigel 75 unsigned char *to_file = NULL;
1045 nigel 53 const unsigned char *tables = NULL;
1046 nigel 75 unsigned long int true_size, true_study_size = 0;
1047     size_t size, regex_gotten_store;
1048 ph10 512 int do_mark = 0;
1049 nigel 3 int do_study = 0;
1050 nigel 25 int do_debug = debug;
1051 nigel 35 int do_G = 0;
1052     int do_g = 0;
1053 nigel 25 int do_showinfo = showinfo;
1054 nigel 35 int do_showrest = 0;
1055 nigel 75 int do_flip = 0;
1056 nigel 93 int erroroffset, len, delimiter, poffset;
1057 nigel 3
1058 nigel 67 use_utf8 = 0;
1059 ph10 211 debug_lengths = 1;
1060 nigel 63
1061 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1062 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1063 nigel 63 fflush(outfile);
1064 nigel 3
1065     p = buffer;
1066     while (isspace(*p)) p++;
1067     if (*p == 0) continue;
1068    
1069 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1070 nigel 3
1071 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1072     {
1073 nigel 91 unsigned long int magic, get_options;
1074 nigel 75 uschar sbuf[8];
1075     FILE *f;
1076    
1077     p++;
1078     pp = p + (int)strlen((char *)p);
1079     while (isspace(pp[-1])) pp--;
1080     *pp = 0;
1081    
1082     f = fopen((char *)p, "rb");
1083     if (f == NULL)
1084     {
1085     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1086     continue;
1087     }
1088    
1089     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1090    
1091     true_size =
1092     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1093     true_study_size =
1094     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1095    
1096     re = (real_pcre *)new_malloc(true_size);
1097     regex_gotten_store = gotten_store;
1098    
1099     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1100    
1101     magic = ((real_pcre *)re)->magic_number;
1102     if (magic != MAGIC_NUMBER)
1103     {
1104     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1105     {
1106     do_flip = 1;
1107     }
1108     else
1109     {
1110     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1111     fclose(f);
1112     continue;
1113     }
1114     }
1115    
1116     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1117     do_flip? " (byte-inverted)" : "", p);
1118    
1119     /* Need to know if UTF-8 for printing data strings */
1120    
1121 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1122     use_utf8 = (get_options & PCRE_UTF8) != 0;
1123 nigel 75
1124     /* Now see if there is any following study data */
1125    
1126     if (true_study_size != 0)
1127     {
1128     pcre_study_data *psd;
1129    
1130     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1131     extra->flags = PCRE_EXTRA_STUDY_DATA;
1132    
1133     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1134     extra->study_data = psd;
1135    
1136     if (fread(psd, 1, true_study_size, f) != true_study_size)
1137     {
1138     FAIL_READ:
1139     fprintf(outfile, "Failed to read data from %s\n", p);
1140     if (extra != NULL) new_free(extra);
1141     if (re != NULL) new_free(re);
1142     fclose(f);
1143     continue;
1144     }
1145     fprintf(outfile, "Study data loaded from %s\n", p);
1146     do_study = 1; /* To get the data output if requested */
1147     }
1148     else fprintf(outfile, "No study data\n");
1149    
1150     fclose(f);
1151     goto SHOW_INFO;
1152     }
1153    
1154     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1155     the pattern; if is isn't complete, read more. */
1156    
1157 nigel 3 delimiter = *p++;
1158    
1159 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1160 nigel 3 {
1161 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1162 nigel 3 goto SKIP_DATA;
1163     }
1164    
1165     pp = p;
1166 nigel 93 poffset = p - buffer;
1167 nigel 3
1168     for(;;)
1169     {
1170 nigel 29 while (*pp != 0)
1171     {
1172     if (*pp == '\\' && pp[1] != 0) pp++;
1173     else if (*pp == delimiter) break;
1174     pp++;
1175     }
1176 nigel 3 if (*pp != 0) break;
1177 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1178 nigel 3 {
1179     fprintf(outfile, "** Unexpected EOF\n");
1180 nigel 11 done = 1;
1181     goto CONTINUE;
1182 nigel 3 }
1183 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1184 nigel 3 }
1185    
1186 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1187     pointer to the correct relative point in the buffer. */
1188    
1189     p = buffer + poffset;
1190    
1191 nigel 29 /* If the first character after the delimiter is backslash, make
1192     the pattern end with backslash. This is purely to provide a way
1193     of testing for the error message when a pattern ends with backslash. */
1194    
1195     if (pp[1] == '\\') *pp++ = '\\';
1196    
1197 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1198     for callouts. */
1199 nigel 3
1200     *pp++ = 0;
1201 nigel 75 strcpy((char *)pbuffer, (char *)p);
1202 nigel 3
1203     /* Look for options after final delimiter */
1204    
1205     options = 0;
1206     study_options = 0;
1207 nigel 31 log_store = showstore; /* default from command line */
1208    
1209 nigel 3 while (*pp != 0)
1210     {
1211     switch (*pp++)
1212     {
1213 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1214 nigel 35 case 'g': do_g = 1; break;
1215 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1216     case 'm': options |= PCRE_MULTILINE; break;
1217     case 's': options |= PCRE_DOTALL; break;
1218     case 'x': options |= PCRE_EXTENDED; break;
1219 nigel 25
1220 nigel 35 case '+': do_showrest = 1; break;
1221 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1222 nigel 93 case 'B': do_debug = 1; break;
1223 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1224 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1225 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1226 nigel 75 case 'F': do_flip = 1; break;
1227 nigel 35 case 'G': do_G = 1; break;
1228 nigel 25 case 'I': do_showinfo = 1; break;
1229 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1230 ph10 512 case 'K': do_mark = 1; break;
1231 nigel 31 case 'M': log_store = 1; break;
1232 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1233 nigel 37
1234     #if !defined NOPOSIX
1235 nigel 3 case 'P': do_posix = 1; break;
1236 nigel 37 #endif
1237    
1238 nigel 3 case 'S': do_study = 1; break;
1239 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1240 ph10 518 case 'W': options |= PCRE_UCP; break;
1241 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1242 ph10 126 case 'Z': debug_lengths = 0; break;
1243 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1244 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1245 nigel 25
1246     case 'L':
1247     ppp = pp;
1248 nigel 93 /* The '\r' test here is so that it works on Windows. */
1249     /* The '0' test is just in case this is an unterminated line. */
1250     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1251 nigel 25 *ppp = 0;
1252     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1253     {
1254     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1255     goto SKIP_DATA;
1256     }
1257 nigel 93 locale_set = 1;
1258 nigel 25 tables = pcre_maketables();
1259     pp = ppp;
1260     break;
1261    
1262 nigel 75 case '>':
1263     to_file = pp;
1264     while (*pp != 0) pp++;
1265     while (isspace(pp[-1])) pp--;
1266     *pp = 0;
1267     break;
1268    
1269 nigel 91 case '<':
1270     {
1271 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1272 ph10 336 {
1273     options |= PCRE_JAVASCRIPT_COMPAT;
1274 ph10 345 pp += 3;
1275 ph10 336 }
1276     else
1277 ph10 345 {
1278 ph10 336 int x = check_newline(pp, outfile);
1279     if (x == 0) goto SKIP_DATA;
1280     options |= x;
1281     while (*pp++ != '>');
1282 ph10 345 }
1283 nigel 91 }
1284     break;
1285    
1286 nigel 77 case '\r': /* So that it works in Windows */
1287     case '\n':
1288     case ' ':
1289     break;
1290 nigel 75
1291 nigel 3 default:
1292     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1293     goto SKIP_DATA;
1294     }
1295     }
1296    
1297 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1298 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1299     local character tables. */
1300 nigel 3
1301 nigel 37 #if !defined NOPOSIX
1302 nigel 3 if (posix || do_posix)
1303     {
1304     int rc;
1305     int cflags = 0;
1306 nigel 75
1307 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1308     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1309 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1310 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1311     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1312 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1313 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1314 nigel 87
1315 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1316    
1317     /* Compilation failed; go back for another re, skipping to blank line
1318     if non-interactive. */
1319    
1320     if (rc != 0)
1321     {
1322 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1323 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1324     goto SKIP_DATA;
1325     }
1326     }
1327    
1328     /* Handle compiling via the native interface */
1329    
1330     else
1331 nigel 37 #endif /* !defined NOPOSIX */
1332    
1333 nigel 3 {
1334 ph10 412 unsigned long int get_options;
1335 ph10 416
1336 nigel 93 if (timeit > 0)
1337 nigel 3 {
1338     register int i;
1339     clock_t time_taken;
1340     clock_t start_time = clock();
1341 nigel 93 for (i = 0; i < timeit; i++)
1342 nigel 3 {
1343 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1344 nigel 3 if (re != NULL) free(re);
1345     }
1346     time_taken = clock() - start_time;
1347 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1348     (((double)time_taken * 1000.0) / (double)timeit) /
1349 nigel 63 (double)CLOCKS_PER_SEC);
1350 nigel 3 }
1351    
1352 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1353 nigel 3
1354     /* Compilation failed; go back for another re, skipping to blank line
1355     if non-interactive. */
1356    
1357     if (re == NULL)
1358     {
1359     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1360     SKIP_DATA:
1361     if (infile != stdin)
1362     {
1363     for (;;)
1364     {
1365 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1366 nigel 11 {
1367     done = 1;
1368     goto CONTINUE;
1369     }
1370 nigel 3 len = (int)strlen((char *)buffer);
1371     while (len > 0 && isspace(buffer[len-1])) len--;
1372     if (len == 0) break;
1373     }
1374     fprintf(outfile, "\n");
1375     }
1376 nigel 25 goto CONTINUE;
1377 nigel 3 }
1378 ph10 416
1379     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1380     within the regex; check for this so that we know how to process the data
1381 ph10 412 lines. */
1382 ph10 416
1383 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1384     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1385 nigel 3
1386 ph10 412 /* Print information if required. There are now two info-returning
1387     functions. The old one has a limited interface and returns only limited
1388     data. Check that it agrees with the newer one. */
1389 nigel 3
1390 nigel 63 if (log_store)
1391     fprintf(outfile, "Memory allocation (code space): %d\n",
1392     (int)(gotten_store -
1393     sizeof(real_pcre) -
1394     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1395    
1396 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1397     and remember the store that was got. */
1398    
1399     true_size = ((real_pcre *)re)->size;
1400     regex_gotten_store = gotten_store;
1401    
1402     /* If /S was present, study the regexp to generate additional info to
1403     help with the matching. */
1404    
1405     if (do_study)
1406     {
1407 nigel 93 if (timeit > 0)
1408 nigel 75 {
1409     register int i;
1410     clock_t time_taken;
1411     clock_t start_time = clock();
1412 nigel 93 for (i = 0; i < timeit; i++)
1413 nigel 75 extra = pcre_study(re, study_options, &error);
1414     time_taken = clock() - start_time;
1415     if (extra != NULL) free(extra);
1416 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1417     (((double)time_taken * 1000.0) / (double)timeit) /
1418 nigel 75 (double)CLOCKS_PER_SEC);
1419     }
1420     extra = pcre_study(re, study_options, &error);
1421     if (error != NULL)
1422     fprintf(outfile, "Failed to study: %s\n", error);
1423     else if (extra != NULL)
1424     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1425     }
1426 ph10 512
1427 ph10 510 /* If /K was present, we set up for handling MARK data. */
1428 ph10 512
1429 ph10 510 if (do_mark)
1430     {
1431     if (extra == NULL)
1432     {
1433     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1434     extra->flags = 0;
1435     }
1436 ph10 512 extra->mark = &markptr;
1437 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1438 ph10 512 }
1439 nigel 75
1440     /* If the 'F' option was present, we flip the bytes of all the integer
1441     fields in the regex data block and the study block. This is to make it
1442     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1443     compiled on a different architecture. */
1444    
1445     if (do_flip)
1446     {
1447     real_pcre *rre = (real_pcre *)re;
1448 ph10 259 rre->magic_number =
1449 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1450 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1451     rre->options = byteflip(rre->options, sizeof(rre->options));
1452 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1453 ph10 259 rre->top_bracket =
1454 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1455 ph10 259 rre->top_backref =
1456 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1457 ph10 259 rre->first_byte =
1458 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1459 ph10 259 rre->req_byte =
1460 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1461     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1462 nigel 75 sizeof(rre->name_table_offset));
1463 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1464 nigel 75 sizeof(rre->name_entry_size));
1465 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1466 ph10 255 sizeof(rre->name_count));
1467 nigel 75
1468     if (extra != NULL)
1469     {
1470     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1471     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1472 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1473     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1474 nigel 75 }
1475     }
1476    
1477     /* Extract information from the compiled data if required */
1478    
1479     SHOW_INFO:
1480    
1481 nigel 93 if (do_debug)
1482     {
1483     fprintf(outfile, "------------------------------------------------------------------\n");
1484 ph10 116 pcre_printint(re, outfile, debug_lengths);
1485 nigel 93 }
1486 ph10 416
1487 ph10 412 /* We already have the options in get_options (see above) */
1488 nigel 93
1489 nigel 25 if (do_showinfo)
1490 nigel 3 {
1491 ph10 412 unsigned long int all_options;
1492 nigel 79 #if !defined NOINFOCHECK
1493 nigel 43 int old_first_char, old_options, old_count;
1494 nigel 79 #endif
1495 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1496 ph10 227 hascrorlf;
1497 nigel 63 int nameentrysize, namecount;
1498     const uschar *nametable;
1499 nigel 3
1500 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1501     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1502     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1503 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1504 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1505 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1506     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1507 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1508 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1509     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1510 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1511 nigel 43
1512 nigel 79 #if !defined NOINFOCHECK
1513 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1514 nigel 3 if (count < 0) fprintf(outfile,
1515 nigel 43 "Error %d from pcre_info()\n", count);
1516 nigel 3 else
1517     {
1518 nigel 43 if (old_count != count) fprintf(outfile,
1519     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1520     old_count);
1521 nigel 37
1522 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1523     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1524     first_char, old_first_char);
1525 nigel 37
1526 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1527     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1528     get_options, old_options);
1529 nigel 43 }
1530 nigel 79 #endif
1531 nigel 43
1532 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1533 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1534 nigel 77 (int)size, (int)regex_gotten_store);
1535 nigel 43
1536     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1537     if (backrefmax > 0)
1538     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1539 nigel 63
1540     if (namecount > 0)
1541     {
1542     fprintf(outfile, "Named capturing subpatterns:\n");
1543     while (namecount-- > 0)
1544     {
1545     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1546     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1547     GET2(nametable, 0));
1548     nametable += nameentrysize;
1549     }
1550     }
1551 ph10 172
1552 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1553 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1554 nigel 63
1555 nigel 75 all_options = ((real_pcre *)re)->options;
1556 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1557 nigel 75
1558 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1559 ph10 518 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1560 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1561     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1562     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1563     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1564 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1565 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1566 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1567     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1568 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1569     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1570     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1571 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1572 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1573 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1574 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1575     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1576 ph10 172
1577 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1578 nigel 43
1579 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1580 nigel 91 {
1581     case PCRE_NEWLINE_CR:
1582     fprintf(outfile, "Forced newline sequence: CR\n");
1583     break;
1584 nigel 43
1585 nigel 91 case PCRE_NEWLINE_LF:
1586     fprintf(outfile, "Forced newline sequence: LF\n");
1587     break;
1588    
1589     case PCRE_NEWLINE_CRLF:
1590     fprintf(outfile, "Forced newline sequence: CRLF\n");
1591     break;
1592    
1593 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1594     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1595     break;
1596    
1597 nigel 93 case PCRE_NEWLINE_ANY:
1598     fprintf(outfile, "Forced newline sequence: ANY\n");
1599     break;
1600    
1601 nigel 91 default:
1602     break;
1603     }
1604    
1605 nigel 43 if (first_char == -1)
1606     {
1607 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1608 nigel 43 }
1609     else if (first_char < 0)
1610     {
1611     fprintf(outfile, "No first char\n");
1612     }
1613     else
1614     {
1615 nigel 63 int ch = first_char & 255;
1616 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1617 nigel 63 "" : " (caseless)";
1618 nigel 93 if (PRINTHEX(ch))
1619 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1620 nigel 3 else
1621 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1622 nigel 43 }
1623 nigel 37
1624 nigel 43 if (need_char < 0)
1625     {
1626     fprintf(outfile, "No need char\n");
1627 nigel 3 }
1628 nigel 43 else
1629     {
1630 nigel 63 int ch = need_char & 255;
1631 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1632 nigel 63 "" : " (caseless)";
1633 nigel 93 if (PRINTHEX(ch))
1634 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1635 nigel 43 else
1636 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1637 nigel 43 }
1638 nigel 75
1639     /* Don't output study size; at present it is in any case a fixed
1640     value, but it varies, depending on the computer architecture, and
1641     so messes up the test suite. (And with the /F option, it might be
1642     flipped.) */
1643    
1644     if (do_study)
1645     {
1646     if (extra == NULL)
1647     fprintf(outfile, "Study returned NULL\n");
1648     else
1649     {
1650     uschar *start_bits = NULL;
1651 ph10 455 int minlength;
1652 ph10 461
1653 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1654 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1655    
1656 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1657     if (start_bits == NULL)
1658 ph10 455 fprintf(outfile, "No set of starting bytes\n");
1659 nigel 75 else
1660     {
1661     int i;
1662     int c = 24;
1663     fprintf(outfile, "Starting byte set: ");
1664     for (i = 0; i < 256; i++)
1665     {
1666     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1667     {
1668     if (c > 75)
1669     {
1670     fprintf(outfile, "\n ");
1671     c = 2;
1672     }
1673 nigel 93 if (PRINTHEX(i) && i != ' ')
1674 nigel 75 {
1675     fprintf(outfile, "%c ", i);
1676     c += 2;
1677     }
1678     else
1679     {
1680     fprintf(outfile, "\\x%02x ", i);
1681     c += 5;
1682     }
1683     }
1684     }
1685     fprintf(outfile, "\n");
1686     }
1687     }
1688     }
1689 nigel 3 }
1690    
1691 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1692     that is all. The first 8 bytes of the file are the regex length and then
1693     the study length, in big-endian order. */
1694 nigel 3
1695 nigel 75 if (to_file != NULL)
1696 nigel 3 {
1697 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1698     if (f == NULL)
1699 nigel 3 {
1700 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1701 nigel 3 }
1702 nigel 75 else
1703     {
1704     uschar sbuf[8];
1705 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1706     sbuf[1] = (uschar)((true_size >> 16) & 255);
1707     sbuf[2] = (uschar)((true_size >> 8) & 255);
1708     sbuf[3] = (uschar)((true_size) & 255);
1709 ph10 259
1710 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1711     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1712     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1713     sbuf[7] = (uschar)((true_study_size) & 255);
1714 nigel 3
1715 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1716     fwrite(re, 1, true_size, f) < true_size)
1717     {
1718     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1719     }
1720 nigel 3 else
1721     {
1722 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1723     if (extra != NULL)
1724 nigel 3 {
1725 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1726     true_study_size)
1727 nigel 3 {
1728 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1729     strerror(errno));
1730 nigel 3 }
1731 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1732 nigel 93
1733 nigel 3 }
1734     }
1735 nigel 75 fclose(f);
1736 nigel 3 }
1737 nigel 77
1738     new_free(re);
1739     if (extra != NULL) new_free(extra);
1740     if (tables != NULL) new_free((void *)tables);
1741 nigel 75 continue; /* With next regex */
1742 nigel 3 }
1743 nigel 75 } /* End of non-POSIX compile */
1744 nigel 3
1745     /* Read data lines and test them */
1746    
1747     for (;;)
1748     {
1749 nigel 87 uschar *q;
1750 ph10 147 uschar *bptr;
1751 nigel 57 int *use_offsets = offsets;
1752 nigel 53 int use_size_offsets = size_offsets;
1753 nigel 63 int callout_data = 0;
1754     int callout_data_set = 0;
1755 nigel 3 int count, c;
1756 nigel 29 int copystrings = 0;
1757 ph10 386 int find_match_limit = default_find_match_limit;
1758 nigel 29 int getstrings = 0;
1759     int getlist = 0;
1760 nigel 39 int gmatched = 0;
1761 nigel 35 int start_offset = 0;
1762 nigel 41 int g_notempty = 0;
1763 nigel 77 int use_dfa = 0;
1764 nigel 3
1765     options = 0;
1766    
1767 nigel 91 *copynames = 0;
1768     *getnames = 0;
1769    
1770     copynamesptr = copynames;
1771     getnamesptr = getnames;
1772    
1773 nigel 63 pcre_callout = callout;
1774     first_callout = 1;
1775     callout_extra = 0;
1776     callout_count = 0;
1777     callout_fail_count = 999999;
1778     callout_fail_id = -1;
1779 nigel 73 show_malloc = 0;
1780 nigel 63
1781 nigel 91 if (extra != NULL) extra->flags &=
1782     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1783    
1784     len = 0;
1785     for (;;)
1786 nigel 11 {
1787 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1788 nigel 91 {
1789     if (len > 0) break;
1790     done = 1;
1791     goto CONTINUE;
1792     }
1793     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1794     len = (int)strlen((char *)buffer);
1795     if (buffer[len-1] == '\n') break;
1796 nigel 11 }
1797 nigel 3
1798     while (len > 0 && isspace(buffer[len-1])) len--;
1799     buffer[len] = 0;
1800     if (len == 0) break;
1801    
1802     p = buffer;
1803     while (isspace(*p)) p++;
1804    
1805 ph10 147 bptr = q = dbuffer;
1806 nigel 3 while ((c = *p++) != 0)
1807     {
1808     int i = 0;
1809     int n = 0;
1810 nigel 63
1811 nigel 3 if (c == '\\') switch ((c = *p++))
1812     {
1813     case 'a': c = 7; break;
1814     case 'b': c = '\b'; break;
1815     case 'e': c = 27; break;
1816     case 'f': c = '\f'; break;
1817     case 'n': c = '\n'; break;
1818     case 'r': c = '\r'; break;
1819     case 't': c = '\t'; break;
1820     case 'v': c = '\v'; break;
1821    
1822     case '0': case '1': case '2': case '3':
1823     case '4': case '5': case '6': case '7':
1824     c -= '0';
1825     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1826     c = c * 8 + *p++ - '0';
1827 nigel 91
1828     #if !defined NOUTF8
1829     if (use_utf8 && c > 255)
1830     {
1831     unsigned char buff8[8];
1832     int ii, utn;
1833     utn = ord2utf8(c, buff8);
1834     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1835     c = buff8[ii]; /* Last byte */
1836     }
1837     #endif
1838 nigel 3 break;
1839    
1840     case 'x':
1841 nigel 49
1842     /* Handle \x{..} specially - new Perl thing for utf8 */
1843    
1844 nigel 79 #if !defined NOUTF8
1845 nigel 49 if (*p == '{')
1846     {
1847     unsigned char *pt = p;
1848     c = 0;
1849     while (isxdigit(*(++pt)))
1850     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1851     if (*pt == '}')
1852     {
1853 nigel 67 unsigned char buff8[8];
1854 nigel 49 int ii, utn;
1855 ph10 355 if (use_utf8)
1856 ph10 358 {
1857 ph10 355 utn = ord2utf8(c, buff8);
1858     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1859     c = buff8[ii]; /* Last byte */
1860     }
1861     else
1862     {
1863 ph10 358 if (c > 255)
1864 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1865     "UTF-8 mode is not enabled.\n"
1866     "** Truncation will probably give the wrong result.\n", c);
1867 ph10 358 }
1868 nigel 49 p = pt + 1;
1869     break;
1870     }
1871     /* Not correct form; fall through */
1872     }
1873 nigel 79 #endif
1874 nigel 49
1875     /* Ordinary \x */
1876    
1877 nigel 3 c = 0;
1878     while (i++ < 2 && isxdigit(*p))
1879     {
1880     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1881     p++;
1882     }
1883     break;
1884    
1885 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1886 nigel 3 p--;
1887     continue;
1888    
1889 nigel 75 case '>':
1890     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1891     continue;
1892    
1893 nigel 3 case 'A': /* Option setting */
1894     options |= PCRE_ANCHORED;
1895     continue;
1896    
1897     case 'B':
1898     options |= PCRE_NOTBOL;
1899     continue;
1900    
1901 nigel 29 case 'C':
1902 nigel 63 if (isdigit(*p)) /* Set copy string */
1903     {
1904     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1905     copystrings |= 1 << n;
1906     }
1907     else if (isalnum(*p))
1908     {
1909 nigel 91 uschar *npp = copynamesptr;
1910 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1911 nigel 91 *npp++ = 0;
1912 nigel 67 *npp = 0;
1913 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1914 nigel 63 if (n < 0)
1915 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1916     copynamesptr = npp;
1917 nigel 63 }
1918     else if (*p == '+')
1919     {
1920     callout_extra = 1;
1921     p++;
1922     }
1923     else if (*p == '-')
1924     {
1925     pcre_callout = NULL;
1926     p++;
1927     }
1928     else if (*p == '!')
1929     {
1930     callout_fail_id = 0;
1931     p++;
1932     while(isdigit(*p))
1933     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1934     callout_fail_count = 0;
1935     if (*p == '!')
1936     {
1937     p++;
1938     while(isdigit(*p))
1939     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1940     }
1941     }
1942     else if (*p == '*')
1943     {
1944     int sign = 1;
1945     callout_data = 0;
1946     if (*(++p) == '-') { sign = -1; p++; }
1947     while(isdigit(*p))
1948     callout_data = callout_data * 10 + *p++ - '0';
1949     callout_data *= sign;
1950     callout_data_set = 1;
1951     }
1952 nigel 29 continue;
1953    
1954 nigel 79 #if !defined NODFA
1955 nigel 77 case 'D':
1956 nigel 79 #if !defined NOPOSIX
1957 nigel 77 if (posix || do_posix)
1958     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1959     else
1960 nigel 79 #endif
1961 nigel 77 use_dfa = 1;
1962     continue;
1963    
1964     case 'F':
1965     options |= PCRE_DFA_SHORTEST;
1966     continue;
1967 nigel 79 #endif
1968 nigel 77
1969 nigel 29 case 'G':
1970 nigel 63 if (isdigit(*p))
1971     {
1972     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1973     getstrings |= 1 << n;
1974     }
1975     else if (isalnum(*p))
1976     {
1977 nigel 91 uschar *npp = getnamesptr;
1978 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1979 nigel 91 *npp++ = 0;
1980 nigel 67 *npp = 0;
1981 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1982 nigel 63 if (n < 0)
1983 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1984     getnamesptr = npp;
1985 nigel 63 }
1986 nigel 29 continue;
1987    
1988     case 'L':
1989     getlist = 1;
1990     continue;
1991    
1992 nigel 63 case 'M':
1993     find_match_limit = 1;
1994     continue;
1995    
1996 nigel 37 case 'N':
1997 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
1998     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1999 ph10 461 else
2000 ph10 442 options |= PCRE_NOTEMPTY;
2001 nigel 37 continue;
2002    
2003 nigel 3 case 'O':
2004     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2005 nigel 53 if (n > size_offsets_max)
2006     {
2007     size_offsets_max = n;
2008 nigel 57 free(offsets);
2009 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2010 nigel 53 if (offsets == NULL)
2011     {
2012     printf("** Failed to get %d bytes of memory for offsets vector\n",
2013 ph10 151 (int)(size_offsets_max * sizeof(int)));
2014 nigel 77 yield = 1;
2015     goto EXIT;
2016 nigel 53 }
2017     }
2018     use_size_offsets = n;
2019 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2020 nigel 3 continue;
2021    
2022 nigel 75 case 'P':
2023 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2024 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2025 nigel 75 continue;
2026    
2027 nigel 91 case 'Q':
2028     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2029     if (extra == NULL)
2030     {
2031     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2032     extra->flags = 0;
2033     }
2034     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2035     extra->match_limit_recursion = n;
2036     continue;
2037    
2038     case 'q':
2039     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2040     if (extra == NULL)
2041     {
2042     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2043     extra->flags = 0;
2044     }
2045     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2046     extra->match_limit = n;
2047     continue;
2048    
2049 nigel 79 #if !defined NODFA
2050 nigel 77 case 'R':
2051     options |= PCRE_DFA_RESTART;
2052     continue;
2053 nigel 79 #endif
2054 nigel 77
2055 nigel 73 case 'S':
2056     show_malloc = 1;
2057     continue;
2058 ph10 392
2059 ph10 389 case 'Y':
2060     options |= PCRE_NO_START_OPTIMIZE;
2061 ph10 392 continue;
2062 nigel 73
2063 nigel 3 case 'Z':
2064     options |= PCRE_NOTEOL;
2065     continue;
2066 nigel 71
2067     case '?':
2068     options |= PCRE_NO_UTF8_CHECK;
2069     continue;
2070 nigel 91
2071     case '<':
2072     {
2073     int x = check_newline(p, outfile);
2074     if (x == 0) goto NEXT_DATA;
2075     options |= x;
2076     while (*p++ != '>');
2077     }
2078     continue;
2079 nigel 3 }
2080 nigel 9 *q++ = c;
2081 nigel 3 }
2082 nigel 9 *q = 0;
2083     len = q - dbuffer;
2084 ph10 371
2085 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2086 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2087 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2088 ph10 371
2089 ph10 363 #if !defined NOPOSIX
2090     if (posix || do_posix)
2091     {
2092     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2093 ph10 371 bptr += buffer_size - len - 1;
2094 ph10 363 }
2095 ph10 371 else
2096     #endif
2097 ph10 363 {
2098     memmove(bptr + buffer_size - len, bptr, len);
2099 ph10 371 bptr += buffer_size - len;
2100     }
2101 nigel 3
2102 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2103     {
2104     printf("**Match limit not relevant for DFA matching: ignored\n");
2105     find_match_limit = 0;
2106     }
2107    
2108 nigel 3 /* Handle matching via the POSIX interface, which does not
2109 nigel 63 support timing or playing with the match limit or callout data. */
2110 nigel 3
2111 nigel 37 #if !defined NOPOSIX
2112 nigel 3 if (posix || do_posix)
2113     {
2114     int rc;
2115     int eflags = 0;
2116 nigel 63 regmatch_t *pmatch = NULL;
2117     if (use_size_offsets > 0)
2118 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2119 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2120     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2121 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2122 nigel 3
2123 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2124 nigel 3
2125     if (rc != 0)
2126     {
2127 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2128 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2129     }
2130 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2131     != 0)
2132     {
2133     fprintf(outfile, "Matched with REG_NOSUB\n");
2134     }
2135 nigel 3 else
2136     {
2137 nigel 7 size_t i;
2138 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2139 nigel 3 {
2140     if (pmatch[i].rm_so >= 0)
2141     {
2142 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2143 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2144     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2145 nigel 3 fprintf(outfile, "\n");
2146 nigel 35 if (i == 0 && do_showrest)
2147     {
2148     fprintf(outfile, " 0+ ");
2149 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2150     outfile);
2151 nigel 35 fprintf(outfile, "\n");
2152     }
2153 nigel 3 }
2154     }
2155     }
2156 nigel 53 free(pmatch);
2157 nigel 3 }
2158    
2159 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2160 nigel 3
2161 nigel 37 else
2162     #endif /* !defined NOPOSIX */
2163    
2164 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2165 nigel 3 {
2166 ph10 512 markptr = NULL;
2167    
2168 nigel 93 if (timeitm > 0)
2169 nigel 3 {
2170     register int i;
2171     clock_t time_taken;
2172     clock_t start_time = clock();
2173 nigel 77
2174 nigel 79 #if !defined NODFA
2175 nigel 77 if (all_use_dfa || use_dfa)
2176     {
2177     int workspace[1000];
2178 nigel 93 for (i = 0; i < timeitm; i++)
2179 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2180 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2181     sizeof(workspace)/sizeof(int));
2182     }
2183     else
2184 nigel 79 #endif
2185 nigel 77
2186 nigel 93 for (i = 0; i < timeitm; i++)
2187 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2188 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2189 nigel 77
2190 nigel 3 time_taken = clock() - start_time;
2191 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2192     (((double)time_taken * 1000.0) / (double)timeitm) /
2193 nigel 63 (double)CLOCKS_PER_SEC);
2194 nigel 3 }
2195    
2196 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2197 nigel 87 varying limits in order to find the minimum value for the match limit and
2198     for the recursion limit. */
2199 nigel 63
2200     if (find_match_limit)
2201     {
2202     if (extra == NULL)
2203     {
2204 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2205 nigel 63 extra->flags = 0;
2206     }
2207    
2208 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2209 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2210     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2211     PCRE_ERROR_MATCHLIMIT, "match()");
2212 nigel 63
2213 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2214     options|g_notempty, use_offsets, use_size_offsets,
2215     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2216     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2217 nigel 63 }
2218    
2219     /* If callout_data is set, use the interface with additional data */
2220    
2221     else if (callout_data_set)
2222     {
2223     if (extra == NULL)
2224     {
2225 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2226 nigel 63 extra->flags = 0;
2227     }
2228     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2229 nigel 71 extra->callout_data = &callout_data;
2230 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2231     options | g_notempty, use_offsets, use_size_offsets);
2232     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2233     }
2234    
2235     /* The normal case is just to do the match once, with the default
2236     value of match_limit. */
2237    
2238 nigel 79 #if !defined NODFA
2239 nigel 77 else if (all_use_dfa || use_dfa)
2240     {
2241     int workspace[1000];
2242 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2243 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2244     sizeof(workspace)/sizeof(int));
2245     if (count == 0)
2246     {
2247     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2248     count = use_size_offsets/2;
2249     }
2250     }
2251 nigel 79 #endif
2252 nigel 77
2253 nigel 75 else
2254     {
2255     count = pcre_exec(re, extra, (char *)bptr, len,
2256     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2257 nigel 77 if (count == 0)
2258     {
2259     fprintf(outfile, "Matched, but too many substrings\n");
2260     count = use_size_offsets/3;
2261     }
2262 nigel 75 }
2263 nigel 3
2264 nigel 39 /* Matched */
2265    
2266 nigel 3 if (count >= 0)
2267     {
2268 nigel 93 int i, maxcount;
2269    
2270     #if !defined NODFA
2271     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2272     #endif
2273     maxcount = use_size_offsets/3;
2274    
2275     /* This is a check against a lunatic return value. */
2276    
2277     if (count > maxcount)
2278     {
2279     fprintf(outfile,
2280     "** PCRE error: returned count %d is too big for offset size %d\n",
2281     count, use_size_offsets);
2282     count = use_size_offsets/3;
2283     if (do_g || do_G)
2284     {
2285     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2286     do_g = do_G = FALSE; /* Break g/G loop */
2287     }
2288     }
2289    
2290 nigel 29 for (i = 0; i < count * 2; i += 2)
2291 nigel 3 {
2292 nigel 57 if (use_offsets[i] < 0)
2293 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2294     else
2295     {
2296     fprintf(outfile, "%2d: ", i/2);
2297 nigel 63 (void)pchars(bptr + use_offsets[i],
2298     use_offsets[i+1] - use_offsets[i], outfile);
2299 nigel 3 fprintf(outfile, "\n");
2300 nigel 35 if (i == 0)
2301     {
2302     if (do_showrest)
2303     {
2304     fprintf(outfile, " 0+ ");
2305 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2306     outfile);
2307 nigel 35 fprintf(outfile, "\n");
2308     }
2309     }
2310 nigel 3 }
2311     }
2312 ph10 512
2313 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2314 nigel 29
2315     for (i = 0; i < 32; i++)
2316     {
2317     if ((copystrings & (1 << i)) != 0)
2318     {
2319 nigel 91 char copybuffer[256];
2320 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2321 nigel 37 i, copybuffer, sizeof(copybuffer));
2322 nigel 29 if (rc < 0)
2323     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2324     else
2325 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2326 nigel 29 }
2327     }
2328    
2329 nigel 91 for (copynamesptr = copynames;
2330     *copynamesptr != 0;
2331     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2332     {
2333     char copybuffer[256];
2334     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2335     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2336     if (rc < 0)
2337     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2338     else
2339     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2340     }
2341    
2342 nigel 29 for (i = 0; i < 32; i++)
2343     {
2344     if ((getstrings & (1 << i)) != 0)
2345     {
2346     const char *substring;
2347 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2348 nigel 29 i, &substring);
2349     if (rc < 0)
2350     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2351     else
2352     {
2353     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2354 nigel 49 pcre_free_substring(substring);
2355 nigel 29 }
2356     }
2357     }
2358    
2359 nigel 91 for (getnamesptr = getnames;
2360     *getnamesptr != 0;
2361     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2362     {
2363     const char *substring;
2364     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2365     count, (char *)getnamesptr, &substring);
2366     if (rc < 0)
2367     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2368     else
2369     {
2370     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2371     pcre_free_substring(substring);
2372     }
2373     }
2374    
2375 nigel 29 if (getlist)
2376     {
2377     const char **stringlist;
2378 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2379 nigel 29 &stringlist);
2380     if (rc < 0)
2381     fprintf(outfile, "get substring list failed %d\n", rc);
2382     else
2383     {
2384     for (i = 0; i < count; i++)
2385     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2386     if (stringlist[i] != NULL)
2387     fprintf(outfile, "string list not terminated by NULL\n");
2388 nigel 49 /* free((void *)stringlist); */
2389     pcre_free_substring_list(stringlist);
2390 nigel 29 }
2391     }
2392 nigel 39 }
2393 nigel 29
2394 nigel 75 /* There was a partial match */
2395    
2396     else if (count == PCRE_ERROR_PARTIAL)
2397     {
2398 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2399     else fprintf(outfile, "Partial match, mark=%s", markptr);
2400 ph10 426 if (use_size_offsets > 1)
2401     {
2402     fprintf(outfile, ": ");
2403     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2404 ph10 461 outfile);
2405     }
2406 nigel 77 fprintf(outfile, "\n");
2407 nigel 75 break; /* Out of the /g loop */
2408     }
2409    
2410 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2411 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2412     to advance the start offset, and continue. We won't be at the end of the
2413     string - that was checked before setting g_notempty.
2414 nigel 39
2415 ph10 150 Complication arises in the case when the newline option is "any" or
2416 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2417     CRLF, an advance of one character just passes the \r, whereas we should
2418     prefer the longer newline sequence, as does the code in pcre_exec().
2419     Fudge the offset value to achieve this.
2420 ph10 144
2421 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2422     character, not one byte. */
2423    
2424 nigel 3 else
2425     {
2426 nigel 41 if (g_notempty != 0)
2427 nigel 35 {
2428 nigel 73 int onechar = 1;
2429 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2430 nigel 57 use_offsets[0] = start_offset;
2431 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2432     {
2433     int d;
2434     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2435 ph10 391 /* Note that these values are always the ASCII ones, even in
2436     EBCDIC environments. CR = 13, NL = 10. */
2437     obits = (d == 13)? PCRE_NEWLINE_CR :
2438     (d == 10)? PCRE_NEWLINE_LF :
2439     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2440 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2441 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2442     }
2443 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2444 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2445 ph10 149 &&
2446 ph10 143 start_offset < len - 1 &&
2447     bptr[start_offset] == '\r' &&
2448     bptr[start_offset+1] == '\n')
2449 ph10 144 onechar++;
2450 ph10 143 else if (use_utf8)
2451 nigel 73 {
2452     while (start_offset + onechar < len)
2453     {
2454     int tb = bptr[start_offset+onechar];
2455     if (tb <= 127) break;
2456     tb &= 0xc0;
2457     if (tb != 0 && tb != 0xc0) onechar++;
2458     }
2459     }
2460     use_offsets[1] = start_offset + onechar;
2461 nigel 35 }
2462 nigel 41 else
2463     {
2464 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2465 nigel 41 {
2466 ph10 512 if (gmatched == 0)
2467 ph10 510 {
2468     if (markptr == NULL) fprintf(outfile, "No match\n");
2469     else fprintf(outfile, "No match, mark = %s\n", markptr);
2470 ph10 512 }
2471 nigel 41 }
2472 nigel 73 else fprintf(outfile, "Error %d\n", count);
2473 nigel 41 break; /* Out of the /g loop */
2474     }
2475 nigel 3 }
2476 nigel 35
2477 nigel 39 /* If not /g or /G we are done */
2478    
2479     if (!do_g && !do_G) break;
2480    
2481 nigel 41 /* If we have matched an empty string, first check to see if we are at
2482 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2483     Perl's /g options does. This turns out to be rather cunning. First we set
2484     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2485 nigel 47 same point. If this fails (picked up above) we advance to the next
2486 ph10 143 character. */
2487 ph10 142
2488 nigel 41 g_notempty = 0;
2489 ph10 142
2490 nigel 57 if (use_offsets[0] == use_offsets[1])
2491 nigel 41 {
2492 nigel 57 if (use_offsets[0] == len) break;
2493 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2494 nigel 41 }
2495 nigel 39
2496     /* For /g, update the start offset, leaving the rest alone */
2497    
2498 ph10 143 if (do_g) start_offset = use_offsets[1];
2499 nigel 39
2500     /* For /G, update the pointer and length */
2501    
2502     else
2503 nigel 35 {
2504 ph10 143 bptr += use_offsets[1];
2505     len -= use_offsets[1];
2506 nigel 35 }
2507 nigel 39 } /* End of loop for /g and /G */
2508 nigel 91
2509     NEXT_DATA: continue;
2510 nigel 39 } /* End of loop for data lines */
2511 nigel 3
2512 nigel 11 CONTINUE:
2513 nigel 37
2514     #if !defined NOPOSIX
2515 nigel 3 if (posix || do_posix) regfree(&preg);
2516 nigel 37 #endif
2517    
2518 nigel 77 if (re != NULL) new_free(re);
2519     if (extra != NULL) new_free(extra);
2520 nigel 25 if (tables != NULL)
2521     {
2522 nigel 77 new_free((void *)tables);
2523 nigel 25 setlocale(LC_CTYPE, "C");
2524 nigel 93 locale_set = 0;
2525 nigel 25 }
2526 nigel 3 }
2527    
2528 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2529 nigel 77
2530     EXIT:
2531    
2532     if (infile != NULL && infile != stdin) fclose(infile);
2533     if (outfile != NULL && outfile != stdout) fclose(outfile);
2534    
2535     free(buffer);
2536     free(dbuffer);
2537     free(pbuffer);
2538     free(offsets);
2539    
2540     return yield;
2541 nigel 3 }
2542    
2543 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12