/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 537 - (hide annotations) (download)
Tue Jun 8 15:28:26 2010 UTC (2 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 75141 byte(s)
Add newline in pcretest output if the last data line doesn't have one.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 nigel 93 #else
83     #include <sys/time.h> /* These two includes are needed */
84     #include <sys/resource.h> /* for setrlimit(). */
85     #define INPUT_MODE "rb"
86     #define OUTPUT_MODE "wb"
87 nigel 91 #endif
88    
89 nigel 93
90 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
91     displaying the results of pcre_study() and we also need to know about the
92     internal macros, structures, and other internal data values; pcretest has
93     "inside information" compared to a program that strictly follows the PCRE API.
94 nigel 37
95 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97     appropriately for an application, not for building PCRE. */
98 nigel 77
99 ph10 145 #include "pcre.h"
100 nigel 77 #include "pcre_internal.h"
101    
102 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
103     to keep two copies, we include the source file here, changing the names of the
104     external symbols to prevent clashes. */
105 nigel 77
106 ph10 351 #define _pcre_ucp_gentype ucp_gentype
107 nigel 85 #define _pcre_utf8_table1 utf8_table1
108     #define _pcre_utf8_table1_size utf8_table1_size
109     #define _pcre_utf8_table2 utf8_table2
110     #define _pcre_utf8_table3 utf8_table3
111     #define _pcre_utf8_table4 utf8_table4
112     #define _pcre_utt utt
113     #define _pcre_utt_size utt_size
114 ph10 240 #define _pcre_utt_names utt_names
115 nigel 85 #define _pcre_OP_lengths OP_lengths
116    
117     #include "pcre_tables.c"
118    
119     /* We also need the pcre_printint() function for printing out compiled
120     patterns. This function is in a separate file so that it can be included in
121 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 ph10 498 know which case is being compiled. */
123 nigel 85
124 ph10 498 #define COMPILING_PCRETEST
125     #include "pcre_printint.src"
126    
127     /* The definition of the macro PRINTABLE, which determines whether to print an
128 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
129 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
130     locale has not been explicitly changed, so as to get consistent output from
131     systems that differ in their output from isprint() even in the "C" locale. */
132 nigel 93
133     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134 nigel 85
135 nigel 37 /* It is possible to compile this test program without including support for
136     testing the POSIX interface, though this is not available via the standard
137     Makefile. */
138    
139     #if !defined NOPOSIX
140 nigel 3 #include "pcreposix.h"
141 nigel 37 #endif
142 nigel 3
143 ph10 107 /* It is also possible, for the benefit of the version currently imported into
144     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145     interface to the DFA matcher (NODFA), and without the doublecheck of the old
146     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147     UTF8 support if PCRE is built without it. */
148 nigel 79
149 ph10 107 #ifndef SUPPORT_UTF8
150     #ifndef NOUTF8
151     #define NOUTF8
152     #endif
153     #endif
154 nigel 79
155 ph10 107
156 nigel 85 /* Other parameters */
157    
158 nigel 3 #ifndef CLOCKS_PER_SEC
159     #ifdef CLK_TCK
160     #define CLOCKS_PER_SEC CLK_TCK
161     #else
162     #define CLOCKS_PER_SEC 100
163     #endif
164     #endif
165    
166 nigel 93 /* This is the default loop count for timing. */
167    
168 nigel 75 #define LOOPREPEAT 500000
169 nigel 3
170 nigel 85 /* Static variables */
171    
172 nigel 3 static FILE *outfile;
173     static int log_store = 0;
174 nigel 63 static int callout_count;
175     static int callout_extra;
176     static int callout_fail_count;
177     static int callout_fail_id;
178 ph10 210 static int debug_lengths;
179 nigel 63 static int first_callout;
180 nigel 93 static int locale_set = 0;
181 nigel 73 static int show_malloc;
182 nigel 67 static int use_utf8;
183 nigel 43 static size_t gotten_store;
184 nigel 3
185 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
186    
187     static int buffer_size = 50000;
188     static uschar *buffer = NULL;
189     static uschar *dbuffer = NULL;
190 nigel 75 static uschar *pbuffer = NULL;
191 nigel 3
192 nigel 75
193 nigel 49
194     /*************************************************
195 nigel 91 * Read or extend an input line *
196     *************************************************/
197    
198     /* Input lines are read into buffer, but both patterns and data lines can be
199     continued over multiple input lines. In addition, if the buffer fills up, we
200     want to automatically expand it so as to be able to handle extremely large
201     lines that are needed for certain stress tests. When the input buffer is
202     expanded, the other two buffers must also be expanded likewise, and the
203     contents of pbuffer, which are a copy of the input for callouts, must be
204     preserved (for when expansion happens for a data line). This is not the most
205     optimal way of handling this, but hey, this is just a test program!
206    
207     Arguments:
208     f the file to read
209     start where in buffer to start (this *must* be within buffer)
210 ph10 287 prompt for stdin or readline()
211 nigel 91
212     Returns: pointer to the start of new data
213     could be a copy of start, or could be moved
214     NULL if no data read and EOF reached
215     */
216    
217     static uschar *
218 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 nigel 91 {
220     uschar *here = start;
221    
222     for (;;)
223     {
224 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
225 nigel 93
226 nigel 91 if (rlen > 1000)
227     {
228     int dlen;
229 ph10 289
230 ph10 287 /* If libreadline support is required, use readline() to read a line if the
231     input is a terminal. Note that readline() removes the trailing newline, so
232     we must put it back again, to be compatible with fgets(). */
233 ph10 289
234 ph10 287 #ifdef SUPPORT_LIBREADLINE
235     if (isatty(fileno(f)))
236     {
237 ph10 289 size_t len;
238 ph10 287 char *s = readline(prompt);
239     if (s == NULL) return (here == start)? NULL : start;
240     len = strlen(s);
241 ph10 289 if (len > 0) add_history(s);
242 ph10 287 if (len > rlen - 1) len = rlen - 1;
243     memcpy(here, s, len);
244     here[len] = '\n';
245 ph10 289 here[len+1] = 0;
246     free(s);
247 ph10 287 }
248 ph10 289 else
249     #endif
250    
251 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
252 ph10 289
253 ph10 287 {
254 ph10 516 if (f == stdin) printf("%s", prompt);
255 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
256     return (here == start)? NULL : start;
257 ph10 289 }
258    
259 nigel 91 dlen = (int)strlen((char *)here);
260     if (dlen > 0 && here[dlen - 1] == '\n') return start;
261     here += dlen;
262     }
263    
264     else
265     {
266     int new_buffer_size = 2*buffer_size;
267     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270    
271     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272     {
273     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274     exit(1);
275     }
276    
277     memcpy(new_buffer, buffer, buffer_size);
278     memcpy(new_pbuffer, pbuffer, buffer_size);
279    
280     buffer_size = new_buffer_size;
281    
282     start = new_buffer + (start - buffer);
283     here = new_buffer + (here - buffer);
284    
285     free(buffer);
286     free(dbuffer);
287     free(pbuffer);
288    
289     buffer = new_buffer;
290     dbuffer = new_dbuffer;
291     pbuffer = new_pbuffer;
292     }
293     }
294    
295     return NULL; /* Control never gets here */
296     }
297    
298    
299    
300    
301    
302    
303    
304     /*************************************************
305 nigel 63 * Read number from string *
306     *************************************************/
307    
308     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309     around with conditional compilation, just do the job by hand. It is only used
310 nigel 93 for unpicking arguments, so just keep it simple.
311 nigel 63
312     Arguments:
313     str string to be converted
314     endptr where to put the end pointer
315    
316     Returns: the unsigned long
317     */
318    
319     static int
320     get_value(unsigned char *str, unsigned char **endptr)
321     {
322     int result = 0;
323     while(*str != 0 && isspace(*str)) str++;
324     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325     *endptr = str;
326     return(result);
327     }
328    
329    
330    
331 nigel 49
332     /*************************************************
333     * Convert UTF-8 string to value *
334     *************************************************/
335    
336     /* This function takes one or more bytes that represents a UTF-8 character,
337     and returns the value of the character.
338    
339     Argument:
340 nigel 91 utf8bytes a pointer to the byte vector
341     vptr a pointer to an int to receive the value
342 nigel 49
343 nigel 91 Returns: > 0 => the number of bytes consumed
344     -6 to 0 => malformed UTF-8 character at offset = (-return)
345 nigel 49 */
346    
347 nigel 79 #if !defined NOUTF8
348    
349 nigel 67 static int
350 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
351 nigel 49 {
352 nigel 91 int c = *utf8bytes++;
353 nigel 49 int d = c;
354     int i, j, s;
355    
356     for (i = -1; i < 6; i++) /* i is number of additional bytes */
357     {
358     if ((d & 0x80) == 0) break;
359     d <<= 1;
360     }
361    
362     if (i == -1) { *vptr = c; return 1; } /* ascii character */
363     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364    
365     /* i now has a value in the range 1-5 */
366    
367 nigel 59 s = 6*i;
368 nigel 85 d = (c & utf8_table3[i]) << s;
369 nigel 49
370     for (j = 0; j < i; j++)
371     {
372 nigel 91 c = *utf8bytes++;
373 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
374 nigel 59 s -= 6;
375 nigel 49 d |= (c & 0x3f) << s;
376     }
377    
378     /* Check that encoding was the correct unique one */
379    
380 nigel 85 for (j = 0; j < utf8_table1_size; j++)
381     if (d <= utf8_table1[j]) break;
382 nigel 49 if (j != i) return -(i+1);
383    
384     /* Valid value */
385    
386     *vptr = d;
387     return i+1;
388     }
389    
390 nigel 79 #endif
391 nigel 49
392    
393 nigel 79
394 nigel 63 /*************************************************
395 nigel 85 * Convert character value to UTF-8 *
396     *************************************************/
397    
398     /* This function takes an integer value in the range 0 - 0x7fffffff
399     and encodes it as a UTF-8 character in 0 to 6 bytes.
400    
401     Arguments:
402     cvalue the character value
403 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
404 nigel 85
405     Returns: number of characters placed in the buffer
406     */
407    
408 nigel 93 #if !defined NOUTF8
409    
410 nigel 85 static int
411 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
412 nigel 85 {
413     register int i, j;
414     for (i = 0; i < utf8_table1_size; i++)
415     if (cvalue <= utf8_table1[i]) break;
416 nigel 91 utf8bytes += i;
417 nigel 85 for (j = i; j > 0; j--)
418     {
419 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 nigel 85 cvalue >>= 6;
421     }
422 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
423 nigel 85 return i + 1;
424     }
425    
426 nigel 93 #endif
427 nigel 85
428    
429 nigel 93
430 nigel 85 /*************************************************
431 nigel 63 * Print character string *
432     *************************************************/
433 nigel 49
434 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
435     mode. Yields number of characters printed. If handed a NULL file, just counts
436     chars without printing. */
437 nigel 49
438 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
439 nigel 3 {
440 nigel 85 int c = 0;
441 nigel 63 int yield = 0;
442 nigel 3
443 nigel 63 while (length-- > 0)
444 nigel 3 {
445 nigel 79 #if !defined NOUTF8
446 nigel 67 if (use_utf8)
447 nigel 63 {
448     int rc = utf82ord(p, &c);
449 nigel 3
450 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451     {
452     length -= rc - 1;
453     p += rc;
454 nigel 93 if (PRINTHEX(c))
455 nigel 63 {
456     if (f != NULL) fprintf(f, "%c", c);
457     yield++;
458     }
459     else
460     {
461 nigel 93 int n = 4;
462     if (f != NULL) fprintf(f, "\\x{%02x}", c);
463     yield += (n <= 0x000000ff)? 2 :
464     (n <= 0x00000fff)? 3 :
465     (n <= 0x0000ffff)? 4 :
466     (n <= 0x000fffff)? 5 : 6;
467 nigel 63 }
468     continue;
469     }
470     }
471 nigel 79 #endif
472 nigel 3
473 nigel 63 /* Not UTF-8, or malformed UTF-8 */
474    
475 nigel 93 c = *p++;
476     if (PRINTHEX(c))
477 nigel 3 {
478 nigel 63 if (f != NULL) fprintf(f, "%c", c);
479     yield++;
480 nigel 3 }
481 nigel 63 else
482 nigel 3 {
483 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
484     yield += 4;
485     }
486     }
487 nigel 3
488 nigel 63 return yield;
489     }
490 nigel 23
491 nigel 3
492 nigel 23
493 nigel 63 /*************************************************
494     * Callout function *
495     *************************************************/
496 nigel 3
497 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498     the match. Yield zero unless more callouts than the fail count, or the callout
499     data is not zero. */
500 nigel 3
501 nigel 63 static int callout(pcre_callout_block *cb)
502     {
503     FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 nigel 75 int i, pre_start, post_start, subject_length;
505 nigel 3
506 nigel 63 if (callout_extra)
507     {
508     fprintf(f, "Callout %d: last capture = %d\n",
509     cb->callout_number, cb->capture_last);
510 nigel 3
511 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
512     {
513     if (cb->offset_vector[i] < 0)
514     fprintf(f, "%2d: <unset>\n", i/2);
515     else
516     {
517     fprintf(f, "%2d: ", i/2);
518     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519     cb->offset_vector[i+1] - cb->offset_vector[i], f);
520     fprintf(f, "\n");
521     }
522     }
523     }
524 nigel 3
525 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
526     datails. On subsequent calls in the same match, we use pchars just to find the
527     printed lengths of the substrings. */
528 nigel 3
529 nigel 63 if (f != NULL) fprintf(f, "--->");
530 nigel 3
531 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533     cb->current_position - cb->start_match, f);
534 nigel 3
535 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536    
537 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538     cb->subject_length - cb->current_position, f);
539 nigel 3
540 nigel 63 if (f != NULL) fprintf(f, "\n");
541 nigel 9
542 nigel 63 /* Always print appropriate indicators, with callout number if not already
543 nigel 75 shown. For automatic callouts, show the pattern offset. */
544 nigel 3
545 nigel 75 if (cb->callout_number == 255)
546     {
547     fprintf(outfile, "%+3d ", cb->pattern_position);
548     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549     }
550     else
551     {
552     if (callout_extra) fprintf(outfile, " ");
553     else fprintf(outfile, "%3d ", cb->callout_number);
554     }
555 nigel 3
556 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557     fprintf(outfile, "^");
558 nigel 3
559 nigel 63 if (post_start > 0)
560     {
561     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562     fprintf(outfile, "^");
563 nigel 3 }
564    
565 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566     fprintf(outfile, " ");
567    
568     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569     pbuffer + cb->pattern_position);
570    
571 nigel 63 fprintf(outfile, "\n");
572     first_callout = 0;
573 nigel 3
574 nigel 71 if (cb->callout_data != NULL)
575 nigel 49 {
576 nigel 71 int callout_data = *((int *)(cb->callout_data));
577     if (callout_data != 0)
578     {
579     fprintf(outfile, "Callout data = %d\n", callout_data);
580     return callout_data;
581     }
582 nigel 63 }
583 nigel 49
584 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
585     (++callout_count >= callout_fail_count)? 1 : 0;
586 nigel 3 }
587    
588    
589 nigel 63 /*************************************************
590 nigel 73 * Local malloc functions *
591 nigel 63 *************************************************/
592 nigel 3
593     /* Alternative malloc function, to test functionality and show the size of the
594     compiled re. */
595    
596     static void *new_malloc(size_t size)
597     {
598 nigel 73 void *block = malloc(size);
599 nigel 43 gotten_store = size;
600 nigel 73 if (show_malloc)
601 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 nigel 73 return block;
603 nigel 3 }
604    
605 nigel 73 static void new_free(void *block)
606     {
607     if (show_malloc)
608     fprintf(outfile, "free %p\n", block);
609     free(block);
610     }
611 nigel 3
612    
613 nigel 73 /* For recursion malloc/free, to test stacking calls */
614    
615     static void *stack_malloc(size_t size)
616     {
617     void *block = malloc(size);
618     if (show_malloc)
619 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 nigel 73 return block;
621     }
622    
623     static void stack_free(void *block)
624     {
625     if (show_malloc)
626     fprintf(outfile, "stack_free %p\n", block);
627     free(block);
628     }
629    
630    
631 nigel 63 /*************************************************
632     * Call pcre_fullinfo() *
633     *************************************************/
634 nigel 43
635     /* Get one piece of information from the pcre_fullinfo() function */
636    
637     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638     {
639     int rc;
640     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642     }
643    
644    
645    
646 nigel 63 /*************************************************
647 nigel 75 * Byte flipping function *
648     *************************************************/
649    
650 nigel 91 static unsigned long int
651     byteflip(unsigned long int value, int n)
652 nigel 75 {
653     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654     return ((value & 0x000000ff) << 24) |
655     ((value & 0x0000ff00) << 8) |
656     ((value & 0x00ff0000) >> 8) |
657     ((value & 0xff000000) >> 24);
658     }
659    
660    
661    
662    
663     /*************************************************
664 nigel 87 * Check match or recursion limit *
665     *************************************************/
666    
667     static int
668     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669     int start_offset, int options, int *use_offsets, int use_size_offsets,
670     int flag, unsigned long int *limit, int errnumber, const char *msg)
671     {
672     int count;
673     int min = 0;
674     int mid = 64;
675     int max = -1;
676    
677     extra->flags |= flag;
678    
679     for (;;)
680     {
681     *limit = mid;
682    
683     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684     use_offsets, use_size_offsets);
685    
686     if (count == errnumber)
687     {
688     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689     min = mid;
690     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691     }
692    
693     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694     count == PCRE_ERROR_PARTIAL)
695     {
696     if (mid == min + 1)
697     {
698     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699     break;
700     }
701     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702     max = mid;
703     mid = (min + mid)/2;
704     }
705     else break; /* Some other error */
706     }
707    
708     extra->flags &= ~flag;
709     return count;
710     }
711    
712    
713    
714     /*************************************************
715 ph10 227 * Case-independent strncmp() function *
716     *************************************************/
717    
718     /*
719     Arguments:
720     s first string
721     t second string
722     n number of characters to compare
723    
724     Returns: < 0, = 0, or > 0, according to the comparison
725     */
726    
727     static int
728     strncmpic(uschar *s, uschar *t, int n)
729     {
730     while (n--)
731     {
732     int c = tolower(*s++) - tolower(*t++);
733     if (c) return c;
734     }
735     return 0;
736     }
737    
738    
739    
740     /*************************************************
741 nigel 91 * Check newline indicator *
742     *************************************************/
743    
744 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
745     a message and return 0 if there is no match.
746 nigel 91
747     Arguments:
748     p points after the leading '<'
749     f file for error message
750    
751     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752     */
753    
754     static int
755     check_newline(uschar *p, FILE *f)
756     {
757 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
765     return 0;
766     }
767    
768    
769    
770     /*************************************************
771 nigel 93 * Usage function *
772     *************************************************/
773    
774     static void
775     usage(void)
776     {
777 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778     printf("Input and output default to stdin and stdout.\n");
779     #ifdef SUPPORT_LIBREADLINE
780     printf("If input is a terminal, readline() is used to read from it.\n");
781     #else
782     printf("This version of pcretest is not linked with readline().\n");
783     #endif
784     printf("\nOptions:\n");
785 nigel 93 printf(" -b show compiled code (bytecode)\n");
786     printf(" -C show PCRE compile-time options and exit\n");
787     printf(" -d debug: show compiled code and information (-b and -i)\n");
788     #if !defined NODFA
789     printf(" -dfa force DFA matching for all subjects\n");
790     #endif
791     printf(" -help show usage information\n");
792     printf(" -i show information about compiled patterns\n"
793 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
794 nigel 93 " -m output memory used information\n"
795     " -o <n> set size of offsets vector to <n>\n");
796     #if !defined NOPOSIX
797     printf(" -p use POSIX interface\n");
798     #endif
799     printf(" -q quiet: do not output PCRE version number at start\n");
800     printf(" -S <n> set stack size to <n> megabytes\n");
801     printf(" -s output store (memory) used information\n"
802     " -t time compilation and execution\n");
803     printf(" -t <n> time compilation and execution, repeating <n> times\n");
804     printf(" -tm time execution (matching) only\n");
805     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806     }
807    
808    
809    
810     /*************************************************
811 nigel 63 * Main Program *
812     *************************************************/
813 nigel 43
814 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
815     consist of a regular expression, in delimiters and optionally followed by
816     options, followed by a set of test data, terminated by an empty line. */
817    
818     int main(int argc, char **argv)
819     {
820     FILE *infile = stdin;
821     int options = 0;
822     int study_options = 0;
823 ph10 386 int default_find_match_limit = FALSE;
824 nigel 3 int op = 1;
825     int timeit = 0;
826 nigel 93 int timeitm = 0;
827 nigel 3 int showinfo = 0;
828 nigel 31 int showstore = 0;
829 nigel 87 int quiet = 0;
830 nigel 53 int size_offsets = 45;
831     int size_offsets_max;
832 nigel 77 int *offsets = NULL;
833 nigel 53 #if !defined NOPOSIX
834 nigel 3 int posix = 0;
835 nigel 53 #endif
836 nigel 3 int debug = 0;
837 nigel 11 int done = 0;
838 nigel 77 int all_use_dfa = 0;
839     int yield = 0;
840 nigel 91 int stack_size;
841 nigel 3
842 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
843     that 1024 is plenty long enough for the few names we'll be testing. */
844 nigel 69
845 nigel 91 uschar copynames[1024];
846     uschar getnames[1024];
847    
848     uschar *copynamesptr;
849     uschar *getnamesptr;
850    
851 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
853 nigel 69
854 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
855     dbuffer = (unsigned char *)malloc(buffer_size);
856     pbuffer = (unsigned char *)malloc(buffer_size);
857 nigel 69
858 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
859 nigel 3
860 nigel 93 outfile = stdout;
861    
862     /* The following _setmode() stuff is some Windows magic that tells its runtime
863     library to translate CRLF into a single LF character. At least, that's what
864     I've been told: never having used Windows I take this all on trust. Originally
865     it set 0x8000, but then I was advised that _O_BINARY was better. */
866    
867 nigel 75 #if defined(_WIN32) || defined(WIN32)
868 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
869     #endif
870 nigel 75
871 nigel 3 /* Scan options */
872    
873     while (argc > 1 && argv[op][0] == '-')
874     {
875 nigel 63 unsigned char *endptr;
876 nigel 53
877 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878     showstore = 1;
879 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 nigel 79 #if !defined NODFA
885 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 nigel 79 #endif
887 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889     *endptr == 0))
890 nigel 53 {
891     op++;
892     argc--;
893     }
894 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895     {
896     int both = argv[op][2] == 0;
897     int temp;
898     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899     *endptr == 0))
900     {
901     timeitm = temp;
902     op++;
903     argc--;
904     }
905     else timeitm = LOOPREPEAT;
906     if (both) timeit = timeitm;
907     }
908 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910     *endptr == 0))
911     {
912 nigel 93 #if defined(_WIN32) || defined(WIN32)
913 nigel 91 printf("PCRE: -S not supported on this OS\n");
914     exit(1);
915     #else
916     int rc;
917     struct rlimit rlim;
918     getrlimit(RLIMIT_STACK, &rlim);
919     rlim.rlim_cur = stack_size * 1024 * 1024;
920     rc = setrlimit(RLIMIT_STACK, &rlim);
921     if (rc != 0)
922     {
923     printf("PCRE: setrlimit() failed with error %d\n", rc);
924     exit(1);
925     }
926     op++;
927     argc--;
928     #endif
929     }
930 nigel 53 #if !defined NOPOSIX
931 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 nigel 53 #endif
933 nigel 63 else if (strcmp(argv[op], "-C") == 0)
934     {
935     int rc;
936 ph10 392 unsigned long int lrc;
937 nigel 63 printf("PCRE version %s\n", pcre_version());
938     printf("Compiled with\n");
939     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940     printf(" %sUTF-8 support\n", rc? "" : "No ");
941 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942     printf(" %sUnicode properties support\n", rc? "" : "No ");
943 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 ph10 391 /* Note that these values are always the ASCII values, even
945 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
946 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 ph10 150 (rc == -2)? "ANYCRLF" :
949 nigel 93 (rc == -1)? "ANY" : "???");
950 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952     "all Unicode newlines");
953 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954     printf(" Internal link size = %d\n", rc);
955     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956     printf(" POSIX malloc threshold = %d\n", rc);
957 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958     printf(" Default match limit = %ld\n", lrc);
959     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960     printf(" Default recursion depth limit = %ld\n", lrc);
961 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 ph10 121 goto EXIT;
964 nigel 63 }
965 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
966     strcmp(argv[op], "--help") == 0)
967     {
968     usage();
969     goto EXIT;
970     }
971 nigel 3 else
972     {
973 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
974 nigel 93 usage();
975 nigel 77 yield = 1;
976     goto EXIT;
977 nigel 3 }
978     op++;
979     argc--;
980     }
981    
982 nigel 53 /* Get the store for the offsets vector, and remember what it was */
983    
984     size_offsets_max = size_offsets;
985 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 nigel 53 if (offsets == NULL)
987     {
988     printf("** Failed to get %d bytes of memory for offsets vector\n",
989 ph10 151 (int)(size_offsets_max * sizeof(int)));
990 nigel 77 yield = 1;
991     goto EXIT;
992 nigel 53 }
993    
994 nigel 3 /* Sort out the input and output files */
995    
996     if (argc > 1)
997     {
998 nigel 93 infile = fopen(argv[op], INPUT_MODE);
999 nigel 3 if (infile == NULL)
1000     {
1001     printf("** Failed to open %s\n", argv[op]);
1002 nigel 77 yield = 1;
1003     goto EXIT;
1004 nigel 3 }
1005     }
1006    
1007     if (argc > 2)
1008     {
1009 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 nigel 3 if (outfile == NULL)
1011     {
1012     printf("** Failed to open %s\n", argv[op+1]);
1013 nigel 77 yield = 1;
1014     goto EXIT;
1015 nigel 3 }
1016     }
1017    
1018     /* Set alternative malloc function */
1019    
1020     pcre_malloc = new_malloc;
1021 nigel 73 pcre_free = new_free;
1022     pcre_stack_malloc = stack_malloc;
1023     pcre_stack_free = stack_free;
1024 nigel 3
1025 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1026 nigel 3
1027 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028 nigel 3
1029     /* Main loop */
1030    
1031 nigel 11 while (!done)
1032 nigel 3 {
1033     pcre *re = NULL;
1034     pcre_extra *extra = NULL;
1035 nigel 37
1036     #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 nigel 3 regex_t preg;
1038 nigel 45 int do_posix = 0;
1039 nigel 37 #endif
1040    
1041 nigel 7 const char *error;
1042 ph10 512 unsigned char *markptr;
1043 nigel 25 unsigned char *p, *pp, *ppp;
1044 nigel 75 unsigned char *to_file = NULL;
1045 nigel 53 const unsigned char *tables = NULL;
1046 nigel 75 unsigned long int true_size, true_study_size = 0;
1047     size_t size, regex_gotten_store;
1048 ph10 512 int do_mark = 0;
1049 nigel 3 int do_study = 0;
1050 nigel 25 int do_debug = debug;
1051 nigel 35 int do_G = 0;
1052     int do_g = 0;
1053 nigel 25 int do_showinfo = showinfo;
1054 nigel 35 int do_showrest = 0;
1055 nigel 75 int do_flip = 0;
1056 nigel 93 int erroroffset, len, delimiter, poffset;
1057 nigel 3
1058 nigel 67 use_utf8 = 0;
1059 ph10 211 debug_lengths = 1;
1060 nigel 63
1061 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1062 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1063 nigel 63 fflush(outfile);
1064 nigel 3
1065     p = buffer;
1066     while (isspace(*p)) p++;
1067     if (*p == 0) continue;
1068    
1069 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1070 nigel 3
1071 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1072     {
1073 nigel 91 unsigned long int magic, get_options;
1074 nigel 75 uschar sbuf[8];
1075     FILE *f;
1076    
1077     p++;
1078     pp = p + (int)strlen((char *)p);
1079     while (isspace(pp[-1])) pp--;
1080     *pp = 0;
1081    
1082     f = fopen((char *)p, "rb");
1083     if (f == NULL)
1084     {
1085     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1086     continue;
1087     }
1088    
1089     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1090    
1091     true_size =
1092     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1093     true_study_size =
1094     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1095    
1096     re = (real_pcre *)new_malloc(true_size);
1097     regex_gotten_store = gotten_store;
1098    
1099     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1100    
1101     magic = ((real_pcre *)re)->magic_number;
1102     if (magic != MAGIC_NUMBER)
1103     {
1104     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1105     {
1106     do_flip = 1;
1107     }
1108     else
1109     {
1110     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1111     fclose(f);
1112     continue;
1113     }
1114     }
1115    
1116     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1117     do_flip? " (byte-inverted)" : "", p);
1118    
1119     /* Need to know if UTF-8 for printing data strings */
1120    
1121 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1122     use_utf8 = (get_options & PCRE_UTF8) != 0;
1123 nigel 75
1124     /* Now see if there is any following study data */
1125    
1126     if (true_study_size != 0)
1127     {
1128     pcre_study_data *psd;
1129    
1130     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1131     extra->flags = PCRE_EXTRA_STUDY_DATA;
1132    
1133     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1134     extra->study_data = psd;
1135    
1136     if (fread(psd, 1, true_study_size, f) != true_study_size)
1137     {
1138     FAIL_READ:
1139     fprintf(outfile, "Failed to read data from %s\n", p);
1140     if (extra != NULL) new_free(extra);
1141     if (re != NULL) new_free(re);
1142     fclose(f);
1143     continue;
1144     }
1145     fprintf(outfile, "Study data loaded from %s\n", p);
1146     do_study = 1; /* To get the data output if requested */
1147     }
1148     else fprintf(outfile, "No study data\n");
1149    
1150     fclose(f);
1151     goto SHOW_INFO;
1152     }
1153    
1154     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1155     the pattern; if is isn't complete, read more. */
1156    
1157 nigel 3 delimiter = *p++;
1158    
1159 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1160 nigel 3 {
1161 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1162 nigel 3 goto SKIP_DATA;
1163     }
1164    
1165     pp = p;
1166 ph10 530 poffset = (int)(p - buffer);
1167 nigel 3
1168     for(;;)
1169     {
1170 nigel 29 while (*pp != 0)
1171     {
1172     if (*pp == '\\' && pp[1] != 0) pp++;
1173     else if (*pp == delimiter) break;
1174     pp++;
1175     }
1176 nigel 3 if (*pp != 0) break;
1177 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1178 nigel 3 {
1179     fprintf(outfile, "** Unexpected EOF\n");
1180 nigel 11 done = 1;
1181     goto CONTINUE;
1182 nigel 3 }
1183 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1184 nigel 3 }
1185    
1186 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1187     pointer to the correct relative point in the buffer. */
1188    
1189     p = buffer + poffset;
1190    
1191 nigel 29 /* If the first character after the delimiter is backslash, make
1192     the pattern end with backslash. This is purely to provide a way
1193     of testing for the error message when a pattern ends with backslash. */
1194    
1195     if (pp[1] == '\\') *pp++ = '\\';
1196    
1197 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1198     for callouts. */
1199 nigel 3
1200     *pp++ = 0;
1201 nigel 75 strcpy((char *)pbuffer, (char *)p);
1202 nigel 3
1203     /* Look for options after final delimiter */
1204    
1205     options = 0;
1206     study_options = 0;
1207 nigel 31 log_store = showstore; /* default from command line */
1208    
1209 nigel 3 while (*pp != 0)
1210     {
1211     switch (*pp++)
1212     {
1213 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1214 nigel 35 case 'g': do_g = 1; break;
1215 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1216     case 'm': options |= PCRE_MULTILINE; break;
1217     case 's': options |= PCRE_DOTALL; break;
1218     case 'x': options |= PCRE_EXTENDED; break;
1219 nigel 25
1220 nigel 35 case '+': do_showrest = 1; break;
1221 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1222 nigel 93 case 'B': do_debug = 1; break;
1223 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1224 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1225 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1226 nigel 75 case 'F': do_flip = 1; break;
1227 nigel 35 case 'G': do_G = 1; break;
1228 nigel 25 case 'I': do_showinfo = 1; break;
1229 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1230 ph10 512 case 'K': do_mark = 1; break;
1231 nigel 31 case 'M': log_store = 1; break;
1232 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1233 nigel 37
1234     #if !defined NOPOSIX
1235 nigel 3 case 'P': do_posix = 1; break;
1236 nigel 37 #endif
1237    
1238 nigel 3 case 'S': do_study = 1; break;
1239 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1240 ph10 535 case 'W': options |= PCRE_UCP; break;
1241 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1242 ph10 126 case 'Z': debug_lengths = 0; break;
1243 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1244 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1245 nigel 25
1246     case 'L':
1247     ppp = pp;
1248 nigel 93 /* The '\r' test here is so that it works on Windows. */
1249     /* The '0' test is just in case this is an unterminated line. */
1250     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1251 nigel 25 *ppp = 0;
1252     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1253     {
1254     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1255     goto SKIP_DATA;
1256     }
1257 nigel 93 locale_set = 1;
1258 nigel 25 tables = pcre_maketables();
1259     pp = ppp;
1260     break;
1261    
1262 nigel 75 case '>':
1263     to_file = pp;
1264     while (*pp != 0) pp++;
1265     while (isspace(pp[-1])) pp--;
1266     *pp = 0;
1267     break;
1268    
1269 nigel 91 case '<':
1270     {
1271 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1272 ph10 336 {
1273     options |= PCRE_JAVASCRIPT_COMPAT;
1274 ph10 345 pp += 3;
1275 ph10 336 }
1276     else
1277 ph10 345 {
1278 ph10 336 int x = check_newline(pp, outfile);
1279     if (x == 0) goto SKIP_DATA;
1280     options |= x;
1281     while (*pp++ != '>');
1282 ph10 345 }
1283 nigel 91 }
1284     break;
1285    
1286 nigel 77 case '\r': /* So that it works in Windows */
1287     case '\n':
1288     case ' ':
1289     break;
1290 nigel 75
1291 nigel 3 default:
1292     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1293     goto SKIP_DATA;
1294     }
1295     }
1296    
1297 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1298 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1299     local character tables. */
1300 nigel 3
1301 nigel 37 #if !defined NOPOSIX
1302 nigel 3 if (posix || do_posix)
1303     {
1304     int rc;
1305     int cflags = 0;
1306 nigel 75
1307 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1308     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1309 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1310 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1311     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1312 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1313 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1314 nigel 87
1315 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1316    
1317     /* Compilation failed; go back for another re, skipping to blank line
1318     if non-interactive. */
1319    
1320     if (rc != 0)
1321     {
1322 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1323 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1324     goto SKIP_DATA;
1325     }
1326     }
1327    
1328     /* Handle compiling via the native interface */
1329    
1330     else
1331 nigel 37 #endif /* !defined NOPOSIX */
1332    
1333 nigel 3 {
1334 ph10 412 unsigned long int get_options;
1335 ph10 416
1336 nigel 93 if (timeit > 0)
1337 nigel 3 {
1338     register int i;
1339     clock_t time_taken;
1340     clock_t start_time = clock();
1341 nigel 93 for (i = 0; i < timeit; i++)
1342 nigel 3 {
1343 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1344 nigel 3 if (re != NULL) free(re);
1345     }
1346     time_taken = clock() - start_time;
1347 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1348     (((double)time_taken * 1000.0) / (double)timeit) /
1349 nigel 63 (double)CLOCKS_PER_SEC);
1350 nigel 3 }
1351    
1352 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1353 nigel 3
1354     /* Compilation failed; go back for another re, skipping to blank line
1355     if non-interactive. */
1356    
1357     if (re == NULL)
1358     {
1359     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1360     SKIP_DATA:
1361     if (infile != stdin)
1362     {
1363     for (;;)
1364     {
1365 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1366 nigel 11 {
1367     done = 1;
1368     goto CONTINUE;
1369     }
1370 nigel 3 len = (int)strlen((char *)buffer);
1371     while (len > 0 && isspace(buffer[len-1])) len--;
1372     if (len == 0) break;
1373     }
1374     fprintf(outfile, "\n");
1375     }
1376 nigel 25 goto CONTINUE;
1377 nigel 3 }
1378 ph10 416
1379     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1380     within the regex; check for this so that we know how to process the data
1381 ph10 412 lines. */
1382 ph10 416
1383 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1384     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1385 nigel 3
1386 ph10 412 /* Print information if required. There are now two info-returning
1387     functions. The old one has a limited interface and returns only limited
1388     data. Check that it agrees with the newer one. */
1389 nigel 3
1390 nigel 63 if (log_store)
1391     fprintf(outfile, "Memory allocation (code space): %d\n",
1392     (int)(gotten_store -
1393     sizeof(real_pcre) -
1394     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1395    
1396 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1397     and remember the store that was got. */
1398    
1399     true_size = ((real_pcre *)re)->size;
1400     regex_gotten_store = gotten_store;
1401    
1402     /* If /S was present, study the regexp to generate additional info to
1403     help with the matching. */
1404    
1405     if (do_study)
1406     {
1407 nigel 93 if (timeit > 0)
1408 nigel 75 {
1409     register int i;
1410     clock_t time_taken;
1411     clock_t start_time = clock();
1412 nigel 93 for (i = 0; i < timeit; i++)
1413 nigel 75 extra = pcre_study(re, study_options, &error);
1414     time_taken = clock() - start_time;
1415     if (extra != NULL) free(extra);
1416 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1417     (((double)time_taken * 1000.0) / (double)timeit) /
1418 nigel 75 (double)CLOCKS_PER_SEC);
1419     }
1420     extra = pcre_study(re, study_options, &error);
1421     if (error != NULL)
1422     fprintf(outfile, "Failed to study: %s\n", error);
1423     else if (extra != NULL)
1424     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1425     }
1426 ph10 512
1427 ph10 510 /* If /K was present, we set up for handling MARK data. */
1428 ph10 512
1429 ph10 510 if (do_mark)
1430     {
1431     if (extra == NULL)
1432     {
1433     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1434     extra->flags = 0;
1435     }
1436 ph10 512 extra->mark = &markptr;
1437 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1438 ph10 512 }
1439 nigel 75
1440     /* If the 'F' option was present, we flip the bytes of all the integer
1441     fields in the regex data block and the study block. This is to make it
1442     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1443     compiled on a different architecture. */
1444    
1445     if (do_flip)
1446     {
1447     real_pcre *rre = (real_pcre *)re;
1448 ph10 259 rre->magic_number =
1449 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1450 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1451     rre->options = byteflip(rre->options, sizeof(rre->options));
1452 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1453 ph10 259 rre->top_bracket =
1454 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1455 ph10 259 rre->top_backref =
1456 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1457 ph10 259 rre->first_byte =
1458 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1459 ph10 259 rre->req_byte =
1460 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1461     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1462 nigel 75 sizeof(rre->name_table_offset));
1463 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1464 nigel 75 sizeof(rre->name_entry_size));
1465 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1466 ph10 255 sizeof(rre->name_count));
1467 nigel 75
1468     if (extra != NULL)
1469     {
1470     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1471     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1472 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1473     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1474 nigel 75 }
1475     }
1476    
1477     /* Extract information from the compiled data if required */
1478    
1479     SHOW_INFO:
1480    
1481 nigel 93 if (do_debug)
1482     {
1483     fprintf(outfile, "------------------------------------------------------------------\n");
1484 ph10 116 pcre_printint(re, outfile, debug_lengths);
1485 nigel 93 }
1486 ph10 416
1487 ph10 412 /* We already have the options in get_options (see above) */
1488 nigel 93
1489 nigel 25 if (do_showinfo)
1490 nigel 3 {
1491 ph10 412 unsigned long int all_options;
1492 nigel 79 #if !defined NOINFOCHECK
1493 nigel 43 int old_first_char, old_options, old_count;
1494 nigel 79 #endif
1495 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1496 ph10 227 hascrorlf;
1497 nigel 63 int nameentrysize, namecount;
1498     const uschar *nametable;
1499 nigel 3
1500 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1501     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1502     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1503 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1504 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1505 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1506     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1507 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1508 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1509     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1510 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1511 nigel 43
1512 nigel 79 #if !defined NOINFOCHECK
1513 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1514 nigel 3 if (count < 0) fprintf(outfile,
1515 nigel 43 "Error %d from pcre_info()\n", count);
1516 nigel 3 else
1517     {
1518 nigel 43 if (old_count != count) fprintf(outfile,
1519     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1520     old_count);
1521 nigel 37
1522 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1523     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1524     first_char, old_first_char);
1525 nigel 37
1526 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1527     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1528     get_options, old_options);
1529 nigel 43 }
1530 nigel 79 #endif
1531 nigel 43
1532 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1533 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1534 nigel 77 (int)size, (int)regex_gotten_store);
1535 nigel 43
1536     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1537     if (backrefmax > 0)
1538     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1539 nigel 63
1540     if (namecount > 0)
1541     {
1542     fprintf(outfile, "Named capturing subpatterns:\n");
1543     while (namecount-- > 0)
1544     {
1545     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1546     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1547     GET2(nametable, 0));
1548     nametable += nameentrysize;
1549     }
1550     }
1551 ph10 172
1552 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1553 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1554 nigel 63
1555 nigel 75 all_options = ((real_pcre *)re)->options;
1556 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1557 nigel 75
1558 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1559 ph10 518 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1560 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1561     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1562     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1563     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1564 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1565 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1566 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1567     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1568 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1569     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1570     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1571 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1572 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1573 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1574 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1575     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1576 ph10 172
1577 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1578 nigel 43
1579 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1580 nigel 91 {
1581     case PCRE_NEWLINE_CR:
1582     fprintf(outfile, "Forced newline sequence: CR\n");
1583     break;
1584 nigel 43
1585 nigel 91 case PCRE_NEWLINE_LF:
1586     fprintf(outfile, "Forced newline sequence: LF\n");
1587     break;
1588    
1589     case PCRE_NEWLINE_CRLF:
1590     fprintf(outfile, "Forced newline sequence: CRLF\n");
1591     break;
1592    
1593 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1594     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1595     break;
1596    
1597 nigel 93 case PCRE_NEWLINE_ANY:
1598     fprintf(outfile, "Forced newline sequence: ANY\n");
1599     break;
1600    
1601 nigel 91 default:
1602     break;
1603     }
1604    
1605 nigel 43 if (first_char == -1)
1606     {
1607 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1608 nigel 43 }
1609     else if (first_char < 0)
1610     {
1611     fprintf(outfile, "No first char\n");
1612     }
1613     else
1614     {
1615 nigel 63 int ch = first_char & 255;
1616 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1617 nigel 63 "" : " (caseless)";
1618 nigel 93 if (PRINTHEX(ch))
1619 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1620 nigel 3 else
1621 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1622 nigel 43 }
1623 nigel 37
1624 nigel 43 if (need_char < 0)
1625     {
1626     fprintf(outfile, "No need char\n");
1627 nigel 3 }
1628 nigel 43 else
1629     {
1630 nigel 63 int ch = need_char & 255;
1631 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1632 nigel 63 "" : " (caseless)";
1633 nigel 93 if (PRINTHEX(ch))
1634 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1635 nigel 43 else
1636 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1637 nigel 43 }
1638 nigel 75
1639     /* Don't output study size; at present it is in any case a fixed
1640     value, but it varies, depending on the computer architecture, and
1641     so messes up the test suite. (And with the /F option, it might be
1642     flipped.) */
1643    
1644     if (do_study)
1645     {
1646     if (extra == NULL)
1647     fprintf(outfile, "Study returned NULL\n");
1648     else
1649     {
1650     uschar *start_bits = NULL;
1651 ph10 455 int minlength;
1652 ph10 461
1653 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1654 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1655    
1656 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1657     if (start_bits == NULL)
1658 ph10 455 fprintf(outfile, "No set of starting bytes\n");
1659 nigel 75 else
1660     {
1661     int i;
1662     int c = 24;
1663     fprintf(outfile, "Starting byte set: ");
1664     for (i = 0; i < 256; i++)
1665     {
1666     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1667     {
1668     if (c > 75)
1669     {
1670     fprintf(outfile, "\n ");
1671     c = 2;
1672     }
1673 nigel 93 if (PRINTHEX(i) && i != ' ')
1674 nigel 75 {
1675     fprintf(outfile, "%c ", i);
1676     c += 2;
1677     }
1678     else
1679     {
1680     fprintf(outfile, "\\x%02x ", i);
1681     c += 5;
1682     }
1683     }
1684     }
1685     fprintf(outfile, "\n");
1686     }
1687     }
1688     }
1689 nigel 3 }
1690    
1691 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1692     that is all. The first 8 bytes of the file are the regex length and then
1693     the study length, in big-endian order. */
1694 nigel 3
1695 nigel 75 if (to_file != NULL)
1696 nigel 3 {
1697 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1698     if (f == NULL)
1699 nigel 3 {
1700 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1701 nigel 3 }
1702 nigel 75 else
1703     {
1704     uschar sbuf[8];
1705 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1706     sbuf[1] = (uschar)((true_size >> 16) & 255);
1707     sbuf[2] = (uschar)((true_size >> 8) & 255);
1708     sbuf[3] = (uschar)((true_size) & 255);
1709 ph10 259
1710 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1711     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1712     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1713     sbuf[7] = (uschar)((true_study_size) & 255);
1714 nigel 3
1715 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1716     fwrite(re, 1, true_size, f) < true_size)
1717     {
1718     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1719     }
1720 nigel 3 else
1721     {
1722 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1723     if (extra != NULL)
1724 nigel 3 {
1725 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1726     true_study_size)
1727 nigel 3 {
1728 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1729     strerror(errno));
1730 nigel 3 }
1731 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1732 nigel 93
1733 nigel 3 }
1734     }
1735 nigel 75 fclose(f);
1736 nigel 3 }
1737 nigel 77
1738     new_free(re);
1739     if (extra != NULL) new_free(extra);
1740     if (tables != NULL) new_free((void *)tables);
1741 nigel 75 continue; /* With next regex */
1742 nigel 3 }
1743 nigel 75 } /* End of non-POSIX compile */
1744 nigel 3
1745     /* Read data lines and test them */
1746    
1747     for (;;)
1748     {
1749 nigel 87 uschar *q;
1750 ph10 147 uschar *bptr;
1751 nigel 57 int *use_offsets = offsets;
1752 nigel 53 int use_size_offsets = size_offsets;
1753 nigel 63 int callout_data = 0;
1754     int callout_data_set = 0;
1755 nigel 3 int count, c;
1756 nigel 29 int copystrings = 0;
1757 ph10 386 int find_match_limit = default_find_match_limit;
1758 nigel 29 int getstrings = 0;
1759     int getlist = 0;
1760 nigel 39 int gmatched = 0;
1761 nigel 35 int start_offset = 0;
1762 nigel 41 int g_notempty = 0;
1763 nigel 77 int use_dfa = 0;
1764 nigel 3
1765     options = 0;
1766    
1767 nigel 91 *copynames = 0;
1768     *getnames = 0;
1769    
1770     copynamesptr = copynames;
1771     getnamesptr = getnames;
1772    
1773 nigel 63 pcre_callout = callout;
1774     first_callout = 1;
1775     callout_extra = 0;
1776     callout_count = 0;
1777     callout_fail_count = 999999;
1778     callout_fail_id = -1;
1779 nigel 73 show_malloc = 0;
1780 nigel 63
1781 nigel 91 if (extra != NULL) extra->flags &=
1782     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1783    
1784     len = 0;
1785     for (;;)
1786 nigel 11 {
1787 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1788 nigel 91 {
1789 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
1790     {
1791     fprintf(outfile, "\n");
1792     break;
1793     }
1794 nigel 91 done = 1;
1795     goto CONTINUE;
1796     }
1797     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1798     len = (int)strlen((char *)buffer);
1799     if (buffer[len-1] == '\n') break;
1800 nigel 11 }
1801 nigel 3
1802     while (len > 0 && isspace(buffer[len-1])) len--;
1803     buffer[len] = 0;
1804     if (len == 0) break;
1805    
1806     p = buffer;
1807     while (isspace(*p)) p++;
1808    
1809 ph10 147 bptr = q = dbuffer;
1810 nigel 3 while ((c = *p++) != 0)
1811     {
1812     int i = 0;
1813     int n = 0;
1814 nigel 63
1815 nigel 3 if (c == '\\') switch ((c = *p++))
1816     {
1817     case 'a': c = 7; break;
1818     case 'b': c = '\b'; break;
1819     case 'e': c = 27; break;
1820     case 'f': c = '\f'; break;
1821     case 'n': c = '\n'; break;
1822     case 'r': c = '\r'; break;
1823     case 't': c = '\t'; break;
1824     case 'v': c = '\v'; break;
1825    
1826     case '0': case '1': case '2': case '3':
1827     case '4': case '5': case '6': case '7':
1828     c -= '0';
1829     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1830     c = c * 8 + *p++ - '0';
1831 nigel 91
1832     #if !defined NOUTF8
1833     if (use_utf8 && c > 255)
1834     {
1835     unsigned char buff8[8];
1836     int ii, utn;
1837     utn = ord2utf8(c, buff8);
1838     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1839     c = buff8[ii]; /* Last byte */
1840     }
1841     #endif
1842 nigel 3 break;
1843    
1844     case 'x':
1845 nigel 49
1846     /* Handle \x{..} specially - new Perl thing for utf8 */
1847    
1848 nigel 79 #if !defined NOUTF8
1849 nigel 49 if (*p == '{')
1850     {
1851     unsigned char *pt = p;
1852     c = 0;
1853     while (isxdigit(*(++pt)))
1854     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1855     if (*pt == '}')
1856     {
1857 nigel 67 unsigned char buff8[8];
1858 nigel 49 int ii, utn;
1859 ph10 355 if (use_utf8)
1860 ph10 358 {
1861 ph10 355 utn = ord2utf8(c, buff8);
1862     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1863     c = buff8[ii]; /* Last byte */
1864     }
1865     else
1866     {
1867 ph10 358 if (c > 255)
1868 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1869     "UTF-8 mode is not enabled.\n"
1870     "** Truncation will probably give the wrong result.\n", c);
1871 ph10 358 }
1872 nigel 49 p = pt + 1;
1873     break;
1874     }
1875     /* Not correct form; fall through */
1876     }
1877 nigel 79 #endif
1878 nigel 49
1879     /* Ordinary \x */
1880    
1881 nigel 3 c = 0;
1882     while (i++ < 2 && isxdigit(*p))
1883     {
1884     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1885     p++;
1886     }
1887     break;
1888    
1889 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1890 nigel 3 p--;
1891     continue;
1892    
1893 nigel 75 case '>':
1894     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1895     continue;
1896    
1897 nigel 3 case 'A': /* Option setting */
1898     options |= PCRE_ANCHORED;
1899     continue;
1900    
1901     case 'B':
1902     options |= PCRE_NOTBOL;
1903     continue;
1904    
1905 nigel 29 case 'C':
1906 nigel 63 if (isdigit(*p)) /* Set copy string */
1907     {
1908     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1909     copystrings |= 1 << n;
1910     }
1911     else if (isalnum(*p))
1912     {
1913 nigel 91 uschar *npp = copynamesptr;
1914 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1915 nigel 91 *npp++ = 0;
1916 nigel 67 *npp = 0;
1917 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1918 nigel 63 if (n < 0)
1919 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1920     copynamesptr = npp;
1921 nigel 63 }
1922     else if (*p == '+')
1923     {
1924     callout_extra = 1;
1925     p++;
1926     }
1927     else if (*p == '-')
1928     {
1929     pcre_callout = NULL;
1930     p++;
1931     }
1932     else if (*p == '!')
1933     {
1934     callout_fail_id = 0;
1935     p++;
1936     while(isdigit(*p))
1937     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1938     callout_fail_count = 0;
1939     if (*p == '!')
1940     {
1941     p++;
1942     while(isdigit(*p))
1943     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1944     }
1945     }
1946     else if (*p == '*')
1947     {
1948     int sign = 1;
1949     callout_data = 0;
1950     if (*(++p) == '-') { sign = -1; p++; }
1951     while(isdigit(*p))
1952     callout_data = callout_data * 10 + *p++ - '0';
1953     callout_data *= sign;
1954     callout_data_set = 1;
1955     }
1956 nigel 29 continue;
1957    
1958 nigel 79 #if !defined NODFA
1959 nigel 77 case 'D':
1960 nigel 79 #if !defined NOPOSIX
1961 nigel 77 if (posix || do_posix)
1962     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1963     else
1964 nigel 79 #endif
1965 nigel 77 use_dfa = 1;
1966     continue;
1967    
1968     case 'F':
1969     options |= PCRE_DFA_SHORTEST;
1970     continue;
1971 nigel 79 #endif
1972 nigel 77
1973 nigel 29 case 'G':
1974 nigel 63 if (isdigit(*p))
1975     {
1976     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1977     getstrings |= 1 << n;
1978     }
1979     else if (isalnum(*p))
1980     {
1981 nigel 91 uschar *npp = getnamesptr;
1982 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1983 nigel 91 *npp++ = 0;
1984 nigel 67 *npp = 0;
1985 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1986 nigel 63 if (n < 0)
1987 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1988     getnamesptr = npp;
1989 nigel 63 }
1990 nigel 29 continue;
1991    
1992     case 'L':
1993     getlist = 1;
1994     continue;
1995    
1996 nigel 63 case 'M':
1997     find_match_limit = 1;
1998     continue;
1999    
2000 nigel 37 case 'N':
2001 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2002     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2003 ph10 461 else
2004 ph10 442 options |= PCRE_NOTEMPTY;
2005 nigel 37 continue;
2006    
2007 nigel 3 case 'O':
2008     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2009 nigel 53 if (n > size_offsets_max)
2010     {
2011     size_offsets_max = n;
2012 nigel 57 free(offsets);
2013 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2014 nigel 53 if (offsets == NULL)
2015     {
2016     printf("** Failed to get %d bytes of memory for offsets vector\n",
2017 ph10 151 (int)(size_offsets_max * sizeof(int)));
2018 nigel 77 yield = 1;
2019     goto EXIT;
2020 nigel 53 }
2021     }
2022     use_size_offsets = n;
2023 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2024 nigel 3 continue;
2025    
2026 nigel 75 case 'P':
2027 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2028 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2029 nigel 75 continue;
2030    
2031 nigel 91 case 'Q':
2032     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2033     if (extra == NULL)
2034     {
2035     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2036     extra->flags = 0;
2037     }
2038     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2039     extra->match_limit_recursion = n;
2040     continue;
2041    
2042     case 'q':
2043     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2044     if (extra == NULL)
2045     {
2046     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2047     extra->flags = 0;
2048     }
2049     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2050     extra->match_limit = n;
2051     continue;
2052    
2053 nigel 79 #if !defined NODFA
2054 nigel 77 case 'R':
2055     options |= PCRE_DFA_RESTART;
2056     continue;
2057 nigel 79 #endif
2058 nigel 77
2059 nigel 73 case 'S':
2060     show_malloc = 1;
2061     continue;
2062 ph10 392
2063 ph10 389 case 'Y':
2064     options |= PCRE_NO_START_OPTIMIZE;
2065 ph10 392 continue;
2066 nigel 73
2067 nigel 3 case 'Z':
2068     options |= PCRE_NOTEOL;
2069     continue;
2070 nigel 71
2071     case '?':
2072     options |= PCRE_NO_UTF8_CHECK;
2073     continue;
2074 nigel 91
2075     case '<':
2076     {
2077     int x = check_newline(p, outfile);
2078     if (x == 0) goto NEXT_DATA;
2079     options |= x;
2080     while (*p++ != '>');
2081     }
2082     continue;
2083 nigel 3 }
2084 nigel 9 *q++ = c;
2085 nigel 3 }
2086 nigel 9 *q = 0;
2087 ph10 530 len = (int)(q - dbuffer);
2088 ph10 537
2089 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2090 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2091 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2092 ph10 371
2093 ph10 363 #if !defined NOPOSIX
2094     if (posix || do_posix)
2095     {
2096     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2097 ph10 371 bptr += buffer_size - len - 1;
2098 ph10 363 }
2099 ph10 371 else
2100     #endif
2101 ph10 363 {
2102     memmove(bptr + buffer_size - len, bptr, len);
2103 ph10 371 bptr += buffer_size - len;
2104     }
2105 nigel 3
2106 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2107     {
2108     printf("**Match limit not relevant for DFA matching: ignored\n");
2109     find_match_limit = 0;
2110     }
2111    
2112 nigel 3 /* Handle matching via the POSIX interface, which does not
2113 nigel 63 support timing or playing with the match limit or callout data. */
2114 nigel 3
2115 nigel 37 #if !defined NOPOSIX
2116 nigel 3 if (posix || do_posix)
2117     {
2118     int rc;
2119     int eflags = 0;
2120 nigel 63 regmatch_t *pmatch = NULL;
2121     if (use_size_offsets > 0)
2122 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2123 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2124     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2125 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2126 nigel 3
2127 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2128 nigel 3
2129     if (rc != 0)
2130     {
2131 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2132 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2133     }
2134 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2135     != 0)
2136     {
2137     fprintf(outfile, "Matched with REG_NOSUB\n");
2138     }
2139 nigel 3 else
2140     {
2141 nigel 7 size_t i;
2142 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2143 nigel 3 {
2144     if (pmatch[i].rm_so >= 0)
2145     {
2146 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2147 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2148     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2149 nigel 3 fprintf(outfile, "\n");
2150 nigel 35 if (i == 0 && do_showrest)
2151     {
2152     fprintf(outfile, " 0+ ");
2153 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2154     outfile);
2155 nigel 35 fprintf(outfile, "\n");
2156     }
2157 nigel 3 }
2158     }
2159     }
2160 nigel 53 free(pmatch);
2161 nigel 3 }
2162    
2163 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2164 nigel 3
2165 nigel 37 else
2166     #endif /* !defined NOPOSIX */
2167    
2168 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2169 nigel 3 {
2170 ph10 512 markptr = NULL;
2171    
2172 nigel 93 if (timeitm > 0)
2173 nigel 3 {
2174     register int i;
2175     clock_t time_taken;
2176     clock_t start_time = clock();
2177 nigel 77
2178 nigel 79 #if !defined NODFA
2179 nigel 77 if (all_use_dfa || use_dfa)
2180     {
2181     int workspace[1000];
2182 nigel 93 for (i = 0; i < timeitm; i++)
2183 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2184 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2185     sizeof(workspace)/sizeof(int));
2186     }
2187     else
2188 nigel 79 #endif
2189 nigel 77
2190 nigel 93 for (i = 0; i < timeitm; i++)
2191 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2192 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2193 nigel 77
2194 nigel 3 time_taken = clock() - start_time;
2195 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2196     (((double)time_taken * 1000.0) / (double)timeitm) /
2197 nigel 63 (double)CLOCKS_PER_SEC);
2198 nigel 3 }
2199    
2200 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2201 nigel 87 varying limits in order to find the minimum value for the match limit and
2202     for the recursion limit. */
2203 nigel 63
2204     if (find_match_limit)
2205     {
2206     if (extra == NULL)
2207     {
2208 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2209 nigel 63 extra->flags = 0;
2210     }
2211    
2212 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2213 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2214     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2215     PCRE_ERROR_MATCHLIMIT, "match()");
2216 nigel 63
2217 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2218     options|g_notempty, use_offsets, use_size_offsets,
2219     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2220     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2221 nigel 63 }
2222    
2223     /* If callout_data is set, use the interface with additional data */
2224    
2225     else if (callout_data_set)
2226     {
2227     if (extra == NULL)
2228     {
2229 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2230 nigel 63 extra->flags = 0;
2231     }
2232     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2233 nigel 71 extra->callout_data = &callout_data;
2234 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2235     options | g_notempty, use_offsets, use_size_offsets);
2236     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2237     }
2238    
2239     /* The normal case is just to do the match once, with the default
2240     value of match_limit. */
2241    
2242 nigel 79 #if !defined NODFA
2243 nigel 77 else if (all_use_dfa || use_dfa)
2244     {
2245     int workspace[1000];
2246 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2247 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2248     sizeof(workspace)/sizeof(int));
2249     if (count == 0)
2250     {
2251     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2252     count = use_size_offsets/2;
2253     }
2254     }
2255 nigel 79 #endif
2256 nigel 77
2257 nigel 75 else
2258     {
2259     count = pcre_exec(re, extra, (char *)bptr, len,
2260     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2261 nigel 77 if (count == 0)
2262     {
2263     fprintf(outfile, "Matched, but too many substrings\n");
2264     count = use_size_offsets/3;
2265     }
2266 nigel 75 }
2267 nigel 3
2268 nigel 39 /* Matched */
2269    
2270 nigel 3 if (count >= 0)
2271     {
2272 nigel 93 int i, maxcount;
2273    
2274     #if !defined NODFA
2275     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2276     #endif
2277     maxcount = use_size_offsets/3;
2278    
2279     /* This is a check against a lunatic return value. */
2280    
2281     if (count > maxcount)
2282     {
2283     fprintf(outfile,
2284     "** PCRE error: returned count %d is too big for offset size %d\n",
2285     count, use_size_offsets);
2286     count = use_size_offsets/3;
2287     if (do_g || do_G)
2288     {
2289     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2290     do_g = do_G = FALSE; /* Break g/G loop */
2291     }
2292     }
2293    
2294 nigel 29 for (i = 0; i < count * 2; i += 2)
2295 nigel 3 {
2296 nigel 57 if (use_offsets[i] < 0)
2297 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2298     else
2299     {
2300     fprintf(outfile, "%2d: ", i/2);
2301 nigel 63 (void)pchars(bptr + use_offsets[i],
2302     use_offsets[i+1] - use_offsets[i], outfile);
2303 nigel 3 fprintf(outfile, "\n");
2304 nigel 35 if (i == 0)
2305     {
2306     if (do_showrest)
2307     {
2308     fprintf(outfile, " 0+ ");
2309 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2310     outfile);
2311 nigel 35 fprintf(outfile, "\n");
2312     }
2313     }
2314 nigel 3 }
2315     }
2316 ph10 512
2317 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2318 nigel 29
2319     for (i = 0; i < 32; i++)
2320     {
2321     if ((copystrings & (1 << i)) != 0)
2322     {
2323 nigel 91 char copybuffer[256];
2324 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2325 nigel 37 i, copybuffer, sizeof(copybuffer));
2326 nigel 29 if (rc < 0)
2327     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2328     else
2329 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2330 nigel 29 }
2331     }
2332    
2333 nigel 91 for (copynamesptr = copynames;
2334     *copynamesptr != 0;
2335     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2336     {
2337     char copybuffer[256];
2338     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2339     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2340     if (rc < 0)
2341     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2342     else
2343     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2344     }
2345    
2346 nigel 29 for (i = 0; i < 32; i++)
2347     {
2348     if ((getstrings & (1 << i)) != 0)
2349     {
2350     const char *substring;
2351 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2352 nigel 29 i, &substring);
2353     if (rc < 0)
2354     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2355     else
2356     {
2357     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2358 nigel 49 pcre_free_substring(substring);
2359 nigel 29 }
2360     }
2361     }
2362    
2363 nigel 91 for (getnamesptr = getnames;
2364     *getnamesptr != 0;
2365     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2366     {
2367     const char *substring;
2368     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2369     count, (char *)getnamesptr, &substring);
2370     if (rc < 0)
2371     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2372     else
2373     {
2374     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2375     pcre_free_substring(substring);
2376     }
2377     }
2378    
2379 nigel 29 if (getlist)
2380     {
2381     const char **stringlist;
2382 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2383 nigel 29 &stringlist);
2384     if (rc < 0)
2385     fprintf(outfile, "get substring list failed %d\n", rc);
2386     else
2387     {
2388     for (i = 0; i < count; i++)
2389     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2390     if (stringlist[i] != NULL)
2391     fprintf(outfile, "string list not terminated by NULL\n");
2392 nigel 49 /* free((void *)stringlist); */
2393     pcre_free_substring_list(stringlist);
2394 nigel 29 }
2395     }
2396 nigel 39 }
2397 nigel 29
2398 nigel 75 /* There was a partial match */
2399    
2400     else if (count == PCRE_ERROR_PARTIAL)
2401     {
2402 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2403     else fprintf(outfile, "Partial match, mark=%s", markptr);
2404 ph10 426 if (use_size_offsets > 1)
2405     {
2406     fprintf(outfile, ": ");
2407     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2408 ph10 461 outfile);
2409     }
2410 nigel 77 fprintf(outfile, "\n");
2411 nigel 75 break; /* Out of the /g loop */
2412     }
2413    
2414 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2415 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2416     to advance the start offset, and continue. We won't be at the end of the
2417     string - that was checked before setting g_notempty.
2418 nigel 39
2419 ph10 150 Complication arises in the case when the newline option is "any" or
2420 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2421     CRLF, an advance of one character just passes the \r, whereas we should
2422     prefer the longer newline sequence, as does the code in pcre_exec().
2423     Fudge the offset value to achieve this.
2424 ph10 144
2425 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2426     character, not one byte. */
2427    
2428 nigel 3 else
2429     {
2430 nigel 41 if (g_notempty != 0)
2431 nigel 35 {
2432 nigel 73 int onechar = 1;
2433 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2434 nigel 57 use_offsets[0] = start_offset;
2435 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2436     {
2437     int d;
2438     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2439 ph10 391 /* Note that these values are always the ASCII ones, even in
2440     EBCDIC environments. CR = 13, NL = 10. */
2441     obits = (d == 13)? PCRE_NEWLINE_CR :
2442     (d == 10)? PCRE_NEWLINE_LF :
2443     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2444 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2445 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2446     }
2447 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2448 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2449 ph10 149 &&
2450 ph10 143 start_offset < len - 1 &&
2451     bptr[start_offset] == '\r' &&
2452     bptr[start_offset+1] == '\n')
2453 ph10 144 onechar++;
2454 ph10 143 else if (use_utf8)
2455 nigel 73 {
2456     while (start_offset + onechar < len)
2457     {
2458     int tb = bptr[start_offset+onechar];
2459     if (tb <= 127) break;
2460     tb &= 0xc0;
2461     if (tb != 0 && tb != 0xc0) onechar++;
2462     }
2463     }
2464     use_offsets[1] = start_offset + onechar;
2465 nigel 35 }
2466 nigel 41 else
2467     {
2468 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2469 nigel 41 {
2470 ph10 512 if (gmatched == 0)
2471 ph10 510 {
2472     if (markptr == NULL) fprintf(outfile, "No match\n");
2473     else fprintf(outfile, "No match, mark = %s\n", markptr);
2474 ph10 512 }
2475 nigel 41 }
2476 nigel 73 else fprintf(outfile, "Error %d\n", count);
2477 nigel 41 break; /* Out of the /g loop */
2478     }
2479 nigel 3 }
2480 nigel 35
2481 nigel 39 /* If not /g or /G we are done */
2482    
2483     if (!do_g && !do_G) break;
2484    
2485 nigel 41 /* If we have matched an empty string, first check to see if we are at
2486 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2487     Perl's /g options does. This turns out to be rather cunning. First we set
2488     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2489 nigel 47 same point. If this fails (picked up above) we advance to the next
2490 ph10 143 character. */
2491 ph10 142
2492 nigel 41 g_notempty = 0;
2493 ph10 142
2494 nigel 57 if (use_offsets[0] == use_offsets[1])
2495 nigel 41 {
2496 nigel 57 if (use_offsets[0] == len) break;
2497 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2498 nigel 41 }
2499 nigel 39
2500     /* For /g, update the start offset, leaving the rest alone */
2501    
2502 ph10 143 if (do_g) start_offset = use_offsets[1];
2503 nigel 39
2504     /* For /G, update the pointer and length */
2505    
2506     else
2507 nigel 35 {
2508 ph10 143 bptr += use_offsets[1];
2509     len -= use_offsets[1];
2510 nigel 35 }
2511 nigel 39 } /* End of loop for /g and /G */
2512 nigel 91
2513     NEXT_DATA: continue;
2514 nigel 39 } /* End of loop for data lines */
2515 nigel 3
2516 nigel 11 CONTINUE:
2517 nigel 37
2518     #if !defined NOPOSIX
2519 nigel 3 if (posix || do_posix) regfree(&preg);
2520 nigel 37 #endif
2521    
2522 nigel 77 if (re != NULL) new_free(re);
2523     if (extra != NULL) new_free(extra);
2524 nigel 25 if (tables != NULL)
2525     {
2526 nigel 77 new_free((void *)tables);
2527 nigel 25 setlocale(LC_CTYPE, "C");
2528 nigel 93 locale_set = 0;
2529 nigel 25 }
2530 nigel 3 }
2531    
2532 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2533 nigel 77
2534     EXIT:
2535    
2536     if (infile != NULL && infile != stdin) fclose(infile);
2537     if (outfile != NULL && outfile != stdout) fclose(outfile);
2538    
2539     free(buffer);
2540     free(dbuffer);
2541     free(pbuffer);
2542     free(offsets);
2543    
2544     return yield;
2545 nigel 3 }
2546    
2547 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12