/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 512 - (hide annotations) (download)
Tue Mar 30 11:11:52 2010 UTC (4 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 74877 byte(s)
Fix compile problems when heap is in use

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 nigel 93 #else
83     #include <sys/time.h> /* These two includes are needed */
84     #include <sys/resource.h> /* for setrlimit(). */
85     #define INPUT_MODE "rb"
86     #define OUTPUT_MODE "wb"
87 nigel 91 #endif
88    
89 nigel 93
90 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
91     displaying the results of pcre_study() and we also need to know about the
92     internal macros, structures, and other internal data values; pcretest has
93     "inside information" compared to a program that strictly follows the PCRE API.
94 nigel 37
95 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97     appropriately for an application, not for building PCRE. */
98 nigel 77
99 ph10 145 #include "pcre.h"
100 nigel 77 #include "pcre_internal.h"
101    
102 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
103     to keep two copies, we include the source file here, changing the names of the
104     external symbols to prevent clashes. */
105 nigel 77
106 ph10 351 #define _pcre_ucp_gentype ucp_gentype
107 nigel 85 #define _pcre_utf8_table1 utf8_table1
108     #define _pcre_utf8_table1_size utf8_table1_size
109     #define _pcre_utf8_table2 utf8_table2
110     #define _pcre_utf8_table3 utf8_table3
111     #define _pcre_utf8_table4 utf8_table4
112     #define _pcre_utt utt
113     #define _pcre_utt_size utt_size
114 ph10 240 #define _pcre_utt_names utt_names
115 nigel 85 #define _pcre_OP_lengths OP_lengths
116    
117     #include "pcre_tables.c"
118    
119     /* We also need the pcre_printint() function for printing out compiled
120     patterns. This function is in a separate file so that it can be included in
121 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 ph10 498 know which case is being compiled. */
123 nigel 85
124 ph10 498 #define COMPILING_PCRETEST
125     #include "pcre_printint.src"
126    
127     /* The definition of the macro PRINTABLE, which determines whether to print an
128 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
129 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
130     locale has not been explicitly changed, so as to get consistent output from
131     systems that differ in their output from isprint() even in the "C" locale. */
132 nigel 93
133     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134 nigel 85
135 nigel 37 /* It is possible to compile this test program without including support for
136     testing the POSIX interface, though this is not available via the standard
137     Makefile. */
138    
139     #if !defined NOPOSIX
140 nigel 3 #include "pcreposix.h"
141 nigel 37 #endif
142 nigel 3
143 ph10 107 /* It is also possible, for the benefit of the version currently imported into
144     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145     interface to the DFA matcher (NODFA), and without the doublecheck of the old
146     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147     UTF8 support if PCRE is built without it. */
148 nigel 79
149 ph10 107 #ifndef SUPPORT_UTF8
150     #ifndef NOUTF8
151     #define NOUTF8
152     #endif
153     #endif
154 nigel 79
155 ph10 107
156 nigel 85 /* Other parameters */
157    
158 nigel 3 #ifndef CLOCKS_PER_SEC
159     #ifdef CLK_TCK
160     #define CLOCKS_PER_SEC CLK_TCK
161     #else
162     #define CLOCKS_PER_SEC 100
163     #endif
164     #endif
165    
166 nigel 93 /* This is the default loop count for timing. */
167    
168 nigel 75 #define LOOPREPEAT 500000
169 nigel 3
170 nigel 85 /* Static variables */
171    
172 nigel 3 static FILE *outfile;
173     static int log_store = 0;
174 nigel 63 static int callout_count;
175     static int callout_extra;
176     static int callout_fail_count;
177     static int callout_fail_id;
178 ph10 210 static int debug_lengths;
179 nigel 63 static int first_callout;
180 nigel 93 static int locale_set = 0;
181 nigel 73 static int show_malloc;
182 nigel 67 static int use_utf8;
183 nigel 43 static size_t gotten_store;
184 nigel 3
185 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
186    
187     static int buffer_size = 50000;
188     static uschar *buffer = NULL;
189     static uschar *dbuffer = NULL;
190 nigel 75 static uschar *pbuffer = NULL;
191 nigel 3
192 nigel 75
193 nigel 49
194     /*************************************************
195 nigel 91 * Read or extend an input line *
196     *************************************************/
197    
198     /* Input lines are read into buffer, but both patterns and data lines can be
199     continued over multiple input lines. In addition, if the buffer fills up, we
200     want to automatically expand it so as to be able to handle extremely large
201     lines that are needed for certain stress tests. When the input buffer is
202     expanded, the other two buffers must also be expanded likewise, and the
203     contents of pbuffer, which are a copy of the input for callouts, must be
204     preserved (for when expansion happens for a data line). This is not the most
205     optimal way of handling this, but hey, this is just a test program!
206    
207     Arguments:
208     f the file to read
209     start where in buffer to start (this *must* be within buffer)
210 ph10 287 prompt for stdin or readline()
211 nigel 91
212     Returns: pointer to the start of new data
213     could be a copy of start, or could be moved
214     NULL if no data read and EOF reached
215     */
216    
217     static uschar *
218 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 nigel 91 {
220     uschar *here = start;
221    
222     for (;;)
223     {
224     int rlen = buffer_size - (here - buffer);
225 nigel 93
226 nigel 91 if (rlen > 1000)
227     {
228     int dlen;
229 ph10 289
230 ph10 287 /* If libreadline support is required, use readline() to read a line if the
231     input is a terminal. Note that readline() removes the trailing newline, so
232     we must put it back again, to be compatible with fgets(). */
233 ph10 289
234 ph10 287 #ifdef SUPPORT_LIBREADLINE
235     if (isatty(fileno(f)))
236     {
237 ph10 289 size_t len;
238 ph10 287 char *s = readline(prompt);
239     if (s == NULL) return (here == start)? NULL : start;
240     len = strlen(s);
241 ph10 289 if (len > 0) add_history(s);
242 ph10 287 if (len > rlen - 1) len = rlen - 1;
243     memcpy(here, s, len);
244     here[len] = '\n';
245 ph10 289 here[len+1] = 0;
246     free(s);
247 ph10 287 }
248 ph10 289 else
249     #endif
250    
251 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
252 ph10 289
253 ph10 287 {
254 ph10 289 if (f == stdin) printf(prompt);
255 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
256     return (here == start)? NULL : start;
257 ph10 289 }
258    
259 nigel 91 dlen = (int)strlen((char *)here);
260     if (dlen > 0 && here[dlen - 1] == '\n') return start;
261     here += dlen;
262     }
263    
264     else
265     {
266     int new_buffer_size = 2*buffer_size;
267     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270    
271     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272     {
273     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274     exit(1);
275     }
276    
277     memcpy(new_buffer, buffer, buffer_size);
278     memcpy(new_pbuffer, pbuffer, buffer_size);
279    
280     buffer_size = new_buffer_size;
281    
282     start = new_buffer + (start - buffer);
283     here = new_buffer + (here - buffer);
284    
285     free(buffer);
286     free(dbuffer);
287     free(pbuffer);
288    
289     buffer = new_buffer;
290     dbuffer = new_dbuffer;
291     pbuffer = new_pbuffer;
292     }
293     }
294    
295     return NULL; /* Control never gets here */
296     }
297    
298    
299    
300    
301    
302    
303    
304     /*************************************************
305 nigel 63 * Read number from string *
306     *************************************************/
307    
308     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309     around with conditional compilation, just do the job by hand. It is only used
310 nigel 93 for unpicking arguments, so just keep it simple.
311 nigel 63
312     Arguments:
313     str string to be converted
314     endptr where to put the end pointer
315    
316     Returns: the unsigned long
317     */
318    
319     static int
320     get_value(unsigned char *str, unsigned char **endptr)
321     {
322     int result = 0;
323     while(*str != 0 && isspace(*str)) str++;
324     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325     *endptr = str;
326     return(result);
327     }
328    
329    
330    
331 nigel 49
332     /*************************************************
333     * Convert UTF-8 string to value *
334     *************************************************/
335    
336     /* This function takes one or more bytes that represents a UTF-8 character,
337     and returns the value of the character.
338    
339     Argument:
340 nigel 91 utf8bytes a pointer to the byte vector
341     vptr a pointer to an int to receive the value
342 nigel 49
343 nigel 91 Returns: > 0 => the number of bytes consumed
344     -6 to 0 => malformed UTF-8 character at offset = (-return)
345 nigel 49 */
346    
347 nigel 79 #if !defined NOUTF8
348    
349 nigel 67 static int
350 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
351 nigel 49 {
352 nigel 91 int c = *utf8bytes++;
353 nigel 49 int d = c;
354     int i, j, s;
355    
356     for (i = -1; i < 6; i++) /* i is number of additional bytes */
357     {
358     if ((d & 0x80) == 0) break;
359     d <<= 1;
360     }
361    
362     if (i == -1) { *vptr = c; return 1; } /* ascii character */
363     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364    
365     /* i now has a value in the range 1-5 */
366    
367 nigel 59 s = 6*i;
368 nigel 85 d = (c & utf8_table3[i]) << s;
369 nigel 49
370     for (j = 0; j < i; j++)
371     {
372 nigel 91 c = *utf8bytes++;
373 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
374 nigel 59 s -= 6;
375 nigel 49 d |= (c & 0x3f) << s;
376     }
377    
378     /* Check that encoding was the correct unique one */
379    
380 nigel 85 for (j = 0; j < utf8_table1_size; j++)
381     if (d <= utf8_table1[j]) break;
382 nigel 49 if (j != i) return -(i+1);
383    
384     /* Valid value */
385    
386     *vptr = d;
387     return i+1;
388     }
389    
390 nigel 79 #endif
391 nigel 49
392    
393 nigel 79
394 nigel 63 /*************************************************
395 nigel 85 * Convert character value to UTF-8 *
396     *************************************************/
397    
398     /* This function takes an integer value in the range 0 - 0x7fffffff
399     and encodes it as a UTF-8 character in 0 to 6 bytes.
400    
401     Arguments:
402     cvalue the character value
403 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
404 nigel 85
405     Returns: number of characters placed in the buffer
406     */
407    
408 nigel 93 #if !defined NOUTF8
409    
410 nigel 85 static int
411 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
412 nigel 85 {
413     register int i, j;
414     for (i = 0; i < utf8_table1_size; i++)
415     if (cvalue <= utf8_table1[i]) break;
416 nigel 91 utf8bytes += i;
417 nigel 85 for (j = i; j > 0; j--)
418     {
419 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 nigel 85 cvalue >>= 6;
421     }
422 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
423 nigel 85 return i + 1;
424     }
425    
426 nigel 93 #endif
427 nigel 85
428    
429 nigel 93
430 nigel 85 /*************************************************
431 nigel 63 * Print character string *
432     *************************************************/
433 nigel 49
434 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
435     mode. Yields number of characters printed. If handed a NULL file, just counts
436     chars without printing. */
437 nigel 49
438 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
439 nigel 3 {
440 nigel 85 int c = 0;
441 nigel 63 int yield = 0;
442 nigel 3
443 nigel 63 while (length-- > 0)
444 nigel 3 {
445 nigel 79 #if !defined NOUTF8
446 nigel 67 if (use_utf8)
447 nigel 63 {
448     int rc = utf82ord(p, &c);
449 nigel 3
450 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451     {
452     length -= rc - 1;
453     p += rc;
454 nigel 93 if (PRINTHEX(c))
455 nigel 63 {
456     if (f != NULL) fprintf(f, "%c", c);
457     yield++;
458     }
459     else
460     {
461 nigel 93 int n = 4;
462     if (f != NULL) fprintf(f, "\\x{%02x}", c);
463     yield += (n <= 0x000000ff)? 2 :
464     (n <= 0x00000fff)? 3 :
465     (n <= 0x0000ffff)? 4 :
466     (n <= 0x000fffff)? 5 : 6;
467 nigel 63 }
468     continue;
469     }
470     }
471 nigel 79 #endif
472 nigel 3
473 nigel 63 /* Not UTF-8, or malformed UTF-8 */
474    
475 nigel 93 c = *p++;
476     if (PRINTHEX(c))
477 nigel 3 {
478 nigel 63 if (f != NULL) fprintf(f, "%c", c);
479     yield++;
480 nigel 3 }
481 nigel 63 else
482 nigel 3 {
483 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
484     yield += 4;
485     }
486     }
487 nigel 3
488 nigel 63 return yield;
489     }
490 nigel 23
491 nigel 3
492 nigel 23
493 nigel 63 /*************************************************
494     * Callout function *
495     *************************************************/
496 nigel 3
497 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498     the match. Yield zero unless more callouts than the fail count, or the callout
499     data is not zero. */
500 nigel 3
501 nigel 63 static int callout(pcre_callout_block *cb)
502     {
503     FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 nigel 75 int i, pre_start, post_start, subject_length;
505 nigel 3
506 nigel 63 if (callout_extra)
507     {
508     fprintf(f, "Callout %d: last capture = %d\n",
509     cb->callout_number, cb->capture_last);
510 nigel 3
511 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
512     {
513     if (cb->offset_vector[i] < 0)
514     fprintf(f, "%2d: <unset>\n", i/2);
515     else
516     {
517     fprintf(f, "%2d: ", i/2);
518     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519     cb->offset_vector[i+1] - cb->offset_vector[i], f);
520     fprintf(f, "\n");
521     }
522     }
523     }
524 nigel 3
525 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
526     datails. On subsequent calls in the same match, we use pchars just to find the
527     printed lengths of the substrings. */
528 nigel 3
529 nigel 63 if (f != NULL) fprintf(f, "--->");
530 nigel 3
531 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533     cb->current_position - cb->start_match, f);
534 nigel 3
535 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536    
537 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538     cb->subject_length - cb->current_position, f);
539 nigel 3
540 nigel 63 if (f != NULL) fprintf(f, "\n");
541 nigel 9
542 nigel 63 /* Always print appropriate indicators, with callout number if not already
543 nigel 75 shown. For automatic callouts, show the pattern offset. */
544 nigel 3
545 nigel 75 if (cb->callout_number == 255)
546     {
547     fprintf(outfile, "%+3d ", cb->pattern_position);
548     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549     }
550     else
551     {
552     if (callout_extra) fprintf(outfile, " ");
553     else fprintf(outfile, "%3d ", cb->callout_number);
554     }
555 nigel 3
556 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557     fprintf(outfile, "^");
558 nigel 3
559 nigel 63 if (post_start > 0)
560     {
561     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562     fprintf(outfile, "^");
563 nigel 3 }
564    
565 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566     fprintf(outfile, " ");
567    
568     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569     pbuffer + cb->pattern_position);
570    
571 nigel 63 fprintf(outfile, "\n");
572     first_callout = 0;
573 nigel 3
574 nigel 71 if (cb->callout_data != NULL)
575 nigel 49 {
576 nigel 71 int callout_data = *((int *)(cb->callout_data));
577     if (callout_data != 0)
578     {
579     fprintf(outfile, "Callout data = %d\n", callout_data);
580     return callout_data;
581     }
582 nigel 63 }
583 nigel 49
584 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
585     (++callout_count >= callout_fail_count)? 1 : 0;
586 nigel 3 }
587    
588    
589 nigel 63 /*************************************************
590 nigel 73 * Local malloc functions *
591 nigel 63 *************************************************/
592 nigel 3
593     /* Alternative malloc function, to test functionality and show the size of the
594     compiled re. */
595    
596     static void *new_malloc(size_t size)
597     {
598 nigel 73 void *block = malloc(size);
599 nigel 43 gotten_store = size;
600 nigel 73 if (show_malloc)
601 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 nigel 73 return block;
603 nigel 3 }
604    
605 nigel 73 static void new_free(void *block)
606     {
607     if (show_malloc)
608     fprintf(outfile, "free %p\n", block);
609     free(block);
610     }
611 nigel 3
612    
613 nigel 73 /* For recursion malloc/free, to test stacking calls */
614    
615     static void *stack_malloc(size_t size)
616     {
617     void *block = malloc(size);
618     if (show_malloc)
619 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 nigel 73 return block;
621     }
622    
623     static void stack_free(void *block)
624     {
625     if (show_malloc)
626     fprintf(outfile, "stack_free %p\n", block);
627     free(block);
628     }
629    
630    
631 nigel 63 /*************************************************
632     * Call pcre_fullinfo() *
633     *************************************************/
634 nigel 43
635     /* Get one piece of information from the pcre_fullinfo() function */
636    
637     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638     {
639     int rc;
640     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642     }
643    
644    
645    
646 nigel 63 /*************************************************
647 nigel 75 * Byte flipping function *
648     *************************************************/
649    
650 nigel 91 static unsigned long int
651     byteflip(unsigned long int value, int n)
652 nigel 75 {
653     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654     return ((value & 0x000000ff) << 24) |
655     ((value & 0x0000ff00) << 8) |
656     ((value & 0x00ff0000) >> 8) |
657     ((value & 0xff000000) >> 24);
658     }
659    
660    
661    
662    
663     /*************************************************
664 nigel 87 * Check match or recursion limit *
665     *************************************************/
666    
667     static int
668     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669     int start_offset, int options, int *use_offsets, int use_size_offsets,
670     int flag, unsigned long int *limit, int errnumber, const char *msg)
671     {
672     int count;
673     int min = 0;
674     int mid = 64;
675     int max = -1;
676    
677     extra->flags |= flag;
678    
679     for (;;)
680     {
681     *limit = mid;
682    
683     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684     use_offsets, use_size_offsets);
685    
686     if (count == errnumber)
687     {
688     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689     min = mid;
690     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691     }
692    
693     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694     count == PCRE_ERROR_PARTIAL)
695     {
696     if (mid == min + 1)
697     {
698     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699     break;
700     }
701     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702     max = mid;
703     mid = (min + mid)/2;
704     }
705     else break; /* Some other error */
706     }
707    
708     extra->flags &= ~flag;
709     return count;
710     }
711    
712    
713    
714     /*************************************************
715 ph10 227 * Case-independent strncmp() function *
716     *************************************************/
717    
718     /*
719     Arguments:
720     s first string
721     t second string
722     n number of characters to compare
723    
724     Returns: < 0, = 0, or > 0, according to the comparison
725     */
726    
727     static int
728     strncmpic(uschar *s, uschar *t, int n)
729     {
730     while (n--)
731     {
732     int c = tolower(*s++) - tolower(*t++);
733     if (c) return c;
734     }
735     return 0;
736     }
737    
738    
739    
740     /*************************************************
741 nigel 91 * Check newline indicator *
742     *************************************************/
743    
744     /* This is used both at compile and run-time to check for <xxx> escapes, where
745 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
746     no match.
747 nigel 91
748     Arguments:
749     p points after the leading '<'
750     f file for error message
751    
752     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
753     */
754    
755     static int
756     check_newline(uschar *p, FILE *f)
757     {
758 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
759     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
760     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
761     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
762     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
763 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
764     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
765 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
766     return 0;
767     }
768    
769    
770    
771     /*************************************************
772 nigel 93 * Usage function *
773     *************************************************/
774    
775     static void
776     usage(void)
777     {
778 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
779     printf("Input and output default to stdin and stdout.\n");
780     #ifdef SUPPORT_LIBREADLINE
781     printf("If input is a terminal, readline() is used to read from it.\n");
782     #else
783     printf("This version of pcretest is not linked with readline().\n");
784     #endif
785     printf("\nOptions:\n");
786 nigel 93 printf(" -b show compiled code (bytecode)\n");
787     printf(" -C show PCRE compile-time options and exit\n");
788     printf(" -d debug: show compiled code and information (-b and -i)\n");
789     #if !defined NODFA
790     printf(" -dfa force DFA matching for all subjects\n");
791     #endif
792     printf(" -help show usage information\n");
793     printf(" -i show information about compiled patterns\n"
794 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
795 nigel 93 " -m output memory used information\n"
796     " -o <n> set size of offsets vector to <n>\n");
797     #if !defined NOPOSIX
798     printf(" -p use POSIX interface\n");
799     #endif
800     printf(" -q quiet: do not output PCRE version number at start\n");
801     printf(" -S <n> set stack size to <n> megabytes\n");
802     printf(" -s output store (memory) used information\n"
803     " -t time compilation and execution\n");
804     printf(" -t <n> time compilation and execution, repeating <n> times\n");
805     printf(" -tm time execution (matching) only\n");
806     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
807     }
808    
809    
810    
811     /*************************************************
812 nigel 63 * Main Program *
813     *************************************************/
814 nigel 43
815 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
816     consist of a regular expression, in delimiters and optionally followed by
817     options, followed by a set of test data, terminated by an empty line. */
818    
819     int main(int argc, char **argv)
820     {
821     FILE *infile = stdin;
822     int options = 0;
823     int study_options = 0;
824 ph10 386 int default_find_match_limit = FALSE;
825 nigel 3 int op = 1;
826     int timeit = 0;
827 nigel 93 int timeitm = 0;
828 nigel 3 int showinfo = 0;
829 nigel 31 int showstore = 0;
830 nigel 87 int quiet = 0;
831 nigel 53 int size_offsets = 45;
832     int size_offsets_max;
833 nigel 77 int *offsets = NULL;
834 nigel 53 #if !defined NOPOSIX
835 nigel 3 int posix = 0;
836 nigel 53 #endif
837 nigel 3 int debug = 0;
838 nigel 11 int done = 0;
839 nigel 77 int all_use_dfa = 0;
840     int yield = 0;
841 nigel 91 int stack_size;
842 nigel 3
843 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
844     that 1024 is plenty long enough for the few names we'll be testing. */
845 nigel 69
846 nigel 91 uschar copynames[1024];
847     uschar getnames[1024];
848    
849     uschar *copynamesptr;
850     uschar *getnamesptr;
851    
852 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
853 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
854 nigel 69
855 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
856     dbuffer = (unsigned char *)malloc(buffer_size);
857     pbuffer = (unsigned char *)malloc(buffer_size);
858 nigel 69
859 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
860 nigel 3
861 nigel 93 outfile = stdout;
862    
863     /* The following _setmode() stuff is some Windows magic that tells its runtime
864     library to translate CRLF into a single LF character. At least, that's what
865     I've been told: never having used Windows I take this all on trust. Originally
866     it set 0x8000, but then I was advised that _O_BINARY was better. */
867    
868 nigel 75 #if defined(_WIN32) || defined(WIN32)
869 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
870     #endif
871 nigel 75
872 nigel 3 /* Scan options */
873    
874     while (argc > 1 && argv[op][0] == '-')
875     {
876 nigel 63 unsigned char *endptr;
877 nigel 53
878 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
879     showstore = 1;
880 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
881 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
882 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
883     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
884 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
885 nigel 79 #if !defined NODFA
886 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
887 nigel 79 #endif
888 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
889 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
890     *endptr == 0))
891 nigel 53 {
892     op++;
893     argc--;
894     }
895 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
896     {
897     int both = argv[op][2] == 0;
898     int temp;
899     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
900     *endptr == 0))
901     {
902     timeitm = temp;
903     op++;
904     argc--;
905     }
906     else timeitm = LOOPREPEAT;
907     if (both) timeit = timeitm;
908     }
909 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
910     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
911     *endptr == 0))
912     {
913 nigel 93 #if defined(_WIN32) || defined(WIN32)
914 nigel 91 printf("PCRE: -S not supported on this OS\n");
915     exit(1);
916     #else
917     int rc;
918     struct rlimit rlim;
919     getrlimit(RLIMIT_STACK, &rlim);
920     rlim.rlim_cur = stack_size * 1024 * 1024;
921     rc = setrlimit(RLIMIT_STACK, &rlim);
922     if (rc != 0)
923     {
924     printf("PCRE: setrlimit() failed with error %d\n", rc);
925     exit(1);
926     }
927     op++;
928     argc--;
929     #endif
930     }
931 nigel 53 #if !defined NOPOSIX
932 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
933 nigel 53 #endif
934 nigel 63 else if (strcmp(argv[op], "-C") == 0)
935     {
936     int rc;
937 ph10 392 unsigned long int lrc;
938 nigel 63 printf("PCRE version %s\n", pcre_version());
939     printf("Compiled with\n");
940     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
941     printf(" %sUTF-8 support\n", rc? "" : "No ");
942 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
943     printf(" %sUnicode properties support\n", rc? "" : "No ");
944 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
945 ph10 391 /* Note that these values are always the ASCII values, even
946 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
947 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
948     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
949 ph10 150 (rc == -2)? "ANYCRLF" :
950 nigel 93 (rc == -1)? "ANY" : "???");
951 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
952     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
953     "all Unicode newlines");
954 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
955     printf(" Internal link size = %d\n", rc);
956     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
957     printf(" POSIX malloc threshold = %d\n", rc);
958 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
959     printf(" Default match limit = %ld\n", lrc);
960     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
961     printf(" Default recursion depth limit = %ld\n", lrc);
962 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
963     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
964 ph10 121 goto EXIT;
965 nigel 63 }
966 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
967     strcmp(argv[op], "--help") == 0)
968     {
969     usage();
970     goto EXIT;
971     }
972 nigel 3 else
973     {
974 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
975 nigel 93 usage();
976 nigel 77 yield = 1;
977     goto EXIT;
978 nigel 3 }
979     op++;
980     argc--;
981     }
982    
983 nigel 53 /* Get the store for the offsets vector, and remember what it was */
984    
985     size_offsets_max = size_offsets;
986 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
987 nigel 53 if (offsets == NULL)
988     {
989     printf("** Failed to get %d bytes of memory for offsets vector\n",
990 ph10 151 (int)(size_offsets_max * sizeof(int)));
991 nigel 77 yield = 1;
992     goto EXIT;
993 nigel 53 }
994    
995 nigel 3 /* Sort out the input and output files */
996    
997     if (argc > 1)
998     {
999 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1000 nigel 3 if (infile == NULL)
1001     {
1002     printf("** Failed to open %s\n", argv[op]);
1003 nigel 77 yield = 1;
1004     goto EXIT;
1005 nigel 3 }
1006     }
1007    
1008     if (argc > 2)
1009     {
1010 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1011 nigel 3 if (outfile == NULL)
1012     {
1013     printf("** Failed to open %s\n", argv[op+1]);
1014 nigel 77 yield = 1;
1015     goto EXIT;
1016 nigel 3 }
1017     }
1018    
1019     /* Set alternative malloc function */
1020    
1021     pcre_malloc = new_malloc;
1022 nigel 73 pcre_free = new_free;
1023     pcre_stack_malloc = stack_malloc;
1024     pcre_stack_free = stack_free;
1025 nigel 3
1026 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1027 nigel 3
1028 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1029 nigel 3
1030     /* Main loop */
1031    
1032 nigel 11 while (!done)
1033 nigel 3 {
1034     pcre *re = NULL;
1035     pcre_extra *extra = NULL;
1036 nigel 37
1037     #if !defined NOPOSIX /* There are still compilers that require no indent */
1038 nigel 3 regex_t preg;
1039 nigel 45 int do_posix = 0;
1040 nigel 37 #endif
1041    
1042 nigel 7 const char *error;
1043 ph10 512 unsigned char *markptr;
1044 nigel 25 unsigned char *p, *pp, *ppp;
1045 nigel 75 unsigned char *to_file = NULL;
1046 nigel 53 const unsigned char *tables = NULL;
1047 nigel 75 unsigned long int true_size, true_study_size = 0;
1048     size_t size, regex_gotten_store;
1049 ph10 512 int do_mark = 0;
1050 nigel 3 int do_study = 0;
1051 nigel 25 int do_debug = debug;
1052 nigel 35 int do_G = 0;
1053     int do_g = 0;
1054 nigel 25 int do_showinfo = showinfo;
1055 nigel 35 int do_showrest = 0;
1056 nigel 75 int do_flip = 0;
1057 nigel 93 int erroroffset, len, delimiter, poffset;
1058 nigel 3
1059 nigel 67 use_utf8 = 0;
1060 ph10 211 debug_lengths = 1;
1061 nigel 63
1062 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1063 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1064 nigel 63 fflush(outfile);
1065 nigel 3
1066     p = buffer;
1067     while (isspace(*p)) p++;
1068     if (*p == 0) continue;
1069    
1070 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1071 nigel 3
1072 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1073     {
1074 nigel 91 unsigned long int magic, get_options;
1075 nigel 75 uschar sbuf[8];
1076     FILE *f;
1077    
1078     p++;
1079     pp = p + (int)strlen((char *)p);
1080     while (isspace(pp[-1])) pp--;
1081     *pp = 0;
1082    
1083     f = fopen((char *)p, "rb");
1084     if (f == NULL)
1085     {
1086     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1087     continue;
1088     }
1089    
1090     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1091    
1092     true_size =
1093     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1094     true_study_size =
1095     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1096    
1097     re = (real_pcre *)new_malloc(true_size);
1098     regex_gotten_store = gotten_store;
1099    
1100     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1101    
1102     magic = ((real_pcre *)re)->magic_number;
1103     if (magic != MAGIC_NUMBER)
1104     {
1105     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1106     {
1107     do_flip = 1;
1108     }
1109     else
1110     {
1111     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1112     fclose(f);
1113     continue;
1114     }
1115     }
1116    
1117     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1118     do_flip? " (byte-inverted)" : "", p);
1119    
1120     /* Need to know if UTF-8 for printing data strings */
1121    
1122 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1123     use_utf8 = (get_options & PCRE_UTF8) != 0;
1124 nigel 75
1125     /* Now see if there is any following study data */
1126    
1127     if (true_study_size != 0)
1128     {
1129     pcre_study_data *psd;
1130    
1131     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1132     extra->flags = PCRE_EXTRA_STUDY_DATA;
1133    
1134     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1135     extra->study_data = psd;
1136    
1137     if (fread(psd, 1, true_study_size, f) != true_study_size)
1138     {
1139     FAIL_READ:
1140     fprintf(outfile, "Failed to read data from %s\n", p);
1141     if (extra != NULL) new_free(extra);
1142     if (re != NULL) new_free(re);
1143     fclose(f);
1144     continue;
1145     }
1146     fprintf(outfile, "Study data loaded from %s\n", p);
1147     do_study = 1; /* To get the data output if requested */
1148     }
1149     else fprintf(outfile, "No study data\n");
1150    
1151     fclose(f);
1152     goto SHOW_INFO;
1153     }
1154    
1155     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1156     the pattern; if is isn't complete, read more. */
1157    
1158 nigel 3 delimiter = *p++;
1159    
1160 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1161 nigel 3 {
1162 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1163 nigel 3 goto SKIP_DATA;
1164     }
1165    
1166     pp = p;
1167 nigel 93 poffset = p - buffer;
1168 nigel 3
1169     for(;;)
1170     {
1171 nigel 29 while (*pp != 0)
1172     {
1173     if (*pp == '\\' && pp[1] != 0) pp++;
1174     else if (*pp == delimiter) break;
1175     pp++;
1176     }
1177 nigel 3 if (*pp != 0) break;
1178 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1179 nigel 3 {
1180     fprintf(outfile, "** Unexpected EOF\n");
1181 nigel 11 done = 1;
1182     goto CONTINUE;
1183 nigel 3 }
1184 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1185 nigel 3 }
1186    
1187 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1188     pointer to the correct relative point in the buffer. */
1189    
1190     p = buffer + poffset;
1191    
1192 nigel 29 /* If the first character after the delimiter is backslash, make
1193     the pattern end with backslash. This is purely to provide a way
1194     of testing for the error message when a pattern ends with backslash. */
1195    
1196     if (pp[1] == '\\') *pp++ = '\\';
1197    
1198 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1199     for callouts. */
1200 nigel 3
1201     *pp++ = 0;
1202 nigel 75 strcpy((char *)pbuffer, (char *)p);
1203 nigel 3
1204     /* Look for options after final delimiter */
1205    
1206     options = 0;
1207     study_options = 0;
1208 nigel 31 log_store = showstore; /* default from command line */
1209    
1210 nigel 3 while (*pp != 0)
1211     {
1212     switch (*pp++)
1213     {
1214 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1215 nigel 35 case 'g': do_g = 1; break;
1216 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1217     case 'm': options |= PCRE_MULTILINE; break;
1218     case 's': options |= PCRE_DOTALL; break;
1219     case 'x': options |= PCRE_EXTENDED; break;
1220 nigel 25
1221 nigel 35 case '+': do_showrest = 1; break;
1222 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1223 nigel 93 case 'B': do_debug = 1; break;
1224 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1225 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1226 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1227 nigel 75 case 'F': do_flip = 1; break;
1228 nigel 35 case 'G': do_G = 1; break;
1229 nigel 25 case 'I': do_showinfo = 1; break;
1230 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1231 ph10 512 case 'K': do_mark = 1; break;
1232 nigel 31 case 'M': log_store = 1; break;
1233 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1234 nigel 37
1235     #if !defined NOPOSIX
1236 nigel 3 case 'P': do_posix = 1; break;
1237 nigel 37 #endif
1238    
1239 nigel 3 case 'S': do_study = 1; break;
1240 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1241 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1242 ph10 126 case 'Z': debug_lengths = 0; break;
1243 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1244 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1245 nigel 25
1246     case 'L':
1247     ppp = pp;
1248 nigel 93 /* The '\r' test here is so that it works on Windows. */
1249     /* The '0' test is just in case this is an unterminated line. */
1250     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1251 nigel 25 *ppp = 0;
1252     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1253     {
1254     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1255     goto SKIP_DATA;
1256     }
1257 nigel 93 locale_set = 1;
1258 nigel 25 tables = pcre_maketables();
1259     pp = ppp;
1260     break;
1261    
1262 nigel 75 case '>':
1263     to_file = pp;
1264     while (*pp != 0) pp++;
1265     while (isspace(pp[-1])) pp--;
1266     *pp = 0;
1267     break;
1268    
1269 nigel 91 case '<':
1270     {
1271 ph10 336 if (strncmp((char *)pp, "JS>", 3) == 0)
1272     {
1273     options |= PCRE_JAVASCRIPT_COMPAT;
1274 ph10 345 pp += 3;
1275 ph10 336 }
1276     else
1277 ph10 345 {
1278 ph10 336 int x = check_newline(pp, outfile);
1279     if (x == 0) goto SKIP_DATA;
1280     options |= x;
1281     while (*pp++ != '>');
1282 ph10 345 }
1283 nigel 91 }
1284     break;
1285    
1286 nigel 77 case '\r': /* So that it works in Windows */
1287     case '\n':
1288     case ' ':
1289     break;
1290 nigel 75
1291 nigel 3 default:
1292     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1293     goto SKIP_DATA;
1294     }
1295     }
1296    
1297 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1298 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1299     local character tables. */
1300 nigel 3
1301 nigel 37 #if !defined NOPOSIX
1302 nigel 3 if (posix || do_posix)
1303     {
1304     int rc;
1305     int cflags = 0;
1306 nigel 75
1307 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1308     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1309 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1310 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1311     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1312 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1313 nigel 87
1314 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1315    
1316     /* Compilation failed; go back for another re, skipping to blank line
1317     if non-interactive. */
1318    
1319     if (rc != 0)
1320     {
1321 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1322 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1323     goto SKIP_DATA;
1324     }
1325     }
1326    
1327     /* Handle compiling via the native interface */
1328    
1329     else
1330 nigel 37 #endif /* !defined NOPOSIX */
1331    
1332 nigel 3 {
1333 ph10 412 unsigned long int get_options;
1334 ph10 416
1335 nigel 93 if (timeit > 0)
1336 nigel 3 {
1337     register int i;
1338     clock_t time_taken;
1339     clock_t start_time = clock();
1340 nigel 93 for (i = 0; i < timeit; i++)
1341 nigel 3 {
1342 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1343 nigel 3 if (re != NULL) free(re);
1344     }
1345     time_taken = clock() - start_time;
1346 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1347     (((double)time_taken * 1000.0) / (double)timeit) /
1348 nigel 63 (double)CLOCKS_PER_SEC);
1349 nigel 3 }
1350    
1351 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1352 nigel 3
1353     /* Compilation failed; go back for another re, skipping to blank line
1354     if non-interactive. */
1355    
1356     if (re == NULL)
1357     {
1358     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1359     SKIP_DATA:
1360     if (infile != stdin)
1361     {
1362     for (;;)
1363     {
1364 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1365 nigel 11 {
1366     done = 1;
1367     goto CONTINUE;
1368     }
1369 nigel 3 len = (int)strlen((char *)buffer);
1370     while (len > 0 && isspace(buffer[len-1])) len--;
1371     if (len == 0) break;
1372     }
1373     fprintf(outfile, "\n");
1374     }
1375 nigel 25 goto CONTINUE;
1376 nigel 3 }
1377 ph10 416
1378     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1379     within the regex; check for this so that we know how to process the data
1380 ph10 412 lines. */
1381 ph10 416
1382 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1383     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1384 nigel 3
1385 ph10 412 /* Print information if required. There are now two info-returning
1386     functions. The old one has a limited interface and returns only limited
1387     data. Check that it agrees with the newer one. */
1388 nigel 3
1389 nigel 63 if (log_store)
1390     fprintf(outfile, "Memory allocation (code space): %d\n",
1391     (int)(gotten_store -
1392     sizeof(real_pcre) -
1393     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1394    
1395 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1396     and remember the store that was got. */
1397    
1398     true_size = ((real_pcre *)re)->size;
1399     regex_gotten_store = gotten_store;
1400    
1401     /* If /S was present, study the regexp to generate additional info to
1402     help with the matching. */
1403    
1404     if (do_study)
1405     {
1406 nigel 93 if (timeit > 0)
1407 nigel 75 {
1408     register int i;
1409     clock_t time_taken;
1410     clock_t start_time = clock();
1411 nigel 93 for (i = 0; i < timeit; i++)
1412 nigel 75 extra = pcre_study(re, study_options, &error);
1413     time_taken = clock() - start_time;
1414     if (extra != NULL) free(extra);
1415 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1416     (((double)time_taken * 1000.0) / (double)timeit) /
1417 nigel 75 (double)CLOCKS_PER_SEC);
1418     }
1419     extra = pcre_study(re, study_options, &error);
1420     if (error != NULL)
1421     fprintf(outfile, "Failed to study: %s\n", error);
1422     else if (extra != NULL)
1423     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1424     }
1425 ph10 512
1426 ph10 510 /* If /K was present, we set up for handling MARK data. */
1427 ph10 512
1428 ph10 510 if (do_mark)
1429     {
1430     if (extra == NULL)
1431     {
1432     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1433     extra->flags = 0;
1434     }
1435 ph10 512 extra->mark = &markptr;
1436 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1437 ph10 512 }
1438 nigel 75
1439     /* If the 'F' option was present, we flip the bytes of all the integer
1440     fields in the regex data block and the study block. This is to make it
1441     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1442     compiled on a different architecture. */
1443    
1444     if (do_flip)
1445     {
1446     real_pcre *rre = (real_pcre *)re;
1447 ph10 259 rre->magic_number =
1448 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1449 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1450     rre->options = byteflip(rre->options, sizeof(rre->options));
1451 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1452 ph10 259 rre->top_bracket =
1453 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1454 ph10 259 rre->top_backref =
1455 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1456 ph10 259 rre->first_byte =
1457 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1458 ph10 259 rre->req_byte =
1459 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1460     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1461 nigel 75 sizeof(rre->name_table_offset));
1462 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1463 nigel 75 sizeof(rre->name_entry_size));
1464 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1465 ph10 255 sizeof(rre->name_count));
1466 nigel 75
1467     if (extra != NULL)
1468     {
1469     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1470     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1471 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1472     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1473 nigel 75 }
1474     }
1475    
1476     /* Extract information from the compiled data if required */
1477    
1478     SHOW_INFO:
1479    
1480 nigel 93 if (do_debug)
1481     {
1482     fprintf(outfile, "------------------------------------------------------------------\n");
1483 ph10 116 pcre_printint(re, outfile, debug_lengths);
1484 nigel 93 }
1485 ph10 416
1486 ph10 412 /* We already have the options in get_options (see above) */
1487 nigel 93
1488 nigel 25 if (do_showinfo)
1489 nigel 3 {
1490 ph10 412 unsigned long int all_options;
1491 nigel 79 #if !defined NOINFOCHECK
1492 nigel 43 int old_first_char, old_options, old_count;
1493 nigel 79 #endif
1494 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1495 ph10 227 hascrorlf;
1496 nigel 63 int nameentrysize, namecount;
1497     const uschar *nametable;
1498 nigel 3
1499 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1500     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1501     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1502 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1503 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1504 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1505     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1506 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1507 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1508     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1509 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1510 nigel 43
1511 nigel 79 #if !defined NOINFOCHECK
1512 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1513 nigel 3 if (count < 0) fprintf(outfile,
1514 nigel 43 "Error %d from pcre_info()\n", count);
1515 nigel 3 else
1516     {
1517 nigel 43 if (old_count != count) fprintf(outfile,
1518     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1519     old_count);
1520 nigel 37
1521 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1522     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1523     first_char, old_first_char);
1524 nigel 37
1525 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1526     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1527     get_options, old_options);
1528 nigel 43 }
1529 nigel 79 #endif
1530 nigel 43
1531 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1532 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1533 nigel 77 (int)size, (int)regex_gotten_store);
1534 nigel 43
1535     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1536     if (backrefmax > 0)
1537     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1538 nigel 63
1539     if (namecount > 0)
1540     {
1541     fprintf(outfile, "Named capturing subpatterns:\n");
1542     while (namecount-- > 0)
1543     {
1544     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1545     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1546     GET2(nametable, 0));
1547     nametable += nameentrysize;
1548     }
1549     }
1550 ph10 172
1551 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1552 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1553 nigel 63
1554 nigel 75 all_options = ((real_pcre *)re)->options;
1555 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1556 nigel 75
1557 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1558 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1559 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1560     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1561     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1562     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1563 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1564 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1565 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1566     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1567 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1568     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1569     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1570 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1571 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1572 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1573     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1574 ph10 172
1575 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1576 nigel 43
1577 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1578 nigel 91 {
1579     case PCRE_NEWLINE_CR:
1580     fprintf(outfile, "Forced newline sequence: CR\n");
1581     break;
1582 nigel 43
1583 nigel 91 case PCRE_NEWLINE_LF:
1584     fprintf(outfile, "Forced newline sequence: LF\n");
1585     break;
1586    
1587     case PCRE_NEWLINE_CRLF:
1588     fprintf(outfile, "Forced newline sequence: CRLF\n");
1589     break;
1590    
1591 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1592     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1593     break;
1594    
1595 nigel 93 case PCRE_NEWLINE_ANY:
1596     fprintf(outfile, "Forced newline sequence: ANY\n");
1597     break;
1598    
1599 nigel 91 default:
1600     break;
1601     }
1602    
1603 nigel 43 if (first_char == -1)
1604     {
1605 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1606 nigel 43 }
1607     else if (first_char < 0)
1608     {
1609     fprintf(outfile, "No first char\n");
1610     }
1611     else
1612     {
1613 nigel 63 int ch = first_char & 255;
1614 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1615 nigel 63 "" : " (caseless)";
1616 nigel 93 if (PRINTHEX(ch))
1617 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1618 nigel 3 else
1619 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1620 nigel 43 }
1621 nigel 37
1622 nigel 43 if (need_char < 0)
1623     {
1624     fprintf(outfile, "No need char\n");
1625 nigel 3 }
1626 nigel 43 else
1627     {
1628 nigel 63 int ch = need_char & 255;
1629 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1630 nigel 63 "" : " (caseless)";
1631 nigel 93 if (PRINTHEX(ch))
1632 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1633 nigel 43 else
1634 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1635 nigel 43 }
1636 nigel 75
1637     /* Don't output study size; at present it is in any case a fixed
1638     value, but it varies, depending on the computer architecture, and
1639     so messes up the test suite. (And with the /F option, it might be
1640     flipped.) */
1641    
1642     if (do_study)
1643     {
1644     if (extra == NULL)
1645     fprintf(outfile, "Study returned NULL\n");
1646     else
1647     {
1648     uschar *start_bits = NULL;
1649 ph10 455 int minlength;
1650 ph10 461
1651 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1652 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1653    
1654 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1655     if (start_bits == NULL)
1656 ph10 455 fprintf(outfile, "No set of starting bytes\n");
1657 nigel 75 else
1658     {
1659     int i;
1660     int c = 24;
1661     fprintf(outfile, "Starting byte set: ");
1662     for (i = 0; i < 256; i++)
1663     {
1664     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1665     {
1666     if (c > 75)
1667     {
1668     fprintf(outfile, "\n ");
1669     c = 2;
1670     }
1671 nigel 93 if (PRINTHEX(i) && i != ' ')
1672 nigel 75 {
1673     fprintf(outfile, "%c ", i);
1674     c += 2;
1675     }
1676     else
1677     {
1678     fprintf(outfile, "\\x%02x ", i);
1679     c += 5;
1680     }
1681     }
1682     }
1683     fprintf(outfile, "\n");
1684     }
1685     }
1686     }
1687 nigel 3 }
1688    
1689 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1690     that is all. The first 8 bytes of the file are the regex length and then
1691     the study length, in big-endian order. */
1692 nigel 3
1693 nigel 75 if (to_file != NULL)
1694 nigel 3 {
1695 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1696     if (f == NULL)
1697 nigel 3 {
1698 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1699 nigel 3 }
1700 nigel 75 else
1701     {
1702     uschar sbuf[8];
1703 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1704     sbuf[1] = (uschar)((true_size >> 16) & 255);
1705     sbuf[2] = (uschar)((true_size >> 8) & 255);
1706     sbuf[3] = (uschar)((true_size) & 255);
1707 ph10 259
1708 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1709     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1710     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1711     sbuf[7] = (uschar)((true_study_size) & 255);
1712 nigel 3
1713 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1714     fwrite(re, 1, true_size, f) < true_size)
1715     {
1716     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1717     }
1718 nigel 3 else
1719     {
1720 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1721     if (extra != NULL)
1722 nigel 3 {
1723 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1724     true_study_size)
1725 nigel 3 {
1726 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1727     strerror(errno));
1728 nigel 3 }
1729 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1730 nigel 93
1731 nigel 3 }
1732     }
1733 nigel 75 fclose(f);
1734 nigel 3 }
1735 nigel 77
1736     new_free(re);
1737     if (extra != NULL) new_free(extra);
1738     if (tables != NULL) new_free((void *)tables);
1739 nigel 75 continue; /* With next regex */
1740 nigel 3 }
1741 nigel 75 } /* End of non-POSIX compile */
1742 nigel 3
1743     /* Read data lines and test them */
1744    
1745     for (;;)
1746     {
1747 nigel 87 uschar *q;
1748 ph10 147 uschar *bptr;
1749 nigel 57 int *use_offsets = offsets;
1750 nigel 53 int use_size_offsets = size_offsets;
1751 nigel 63 int callout_data = 0;
1752     int callout_data_set = 0;
1753 nigel 3 int count, c;
1754 nigel 29 int copystrings = 0;
1755 ph10 386 int find_match_limit = default_find_match_limit;
1756 nigel 29 int getstrings = 0;
1757     int getlist = 0;
1758 nigel 39 int gmatched = 0;
1759 nigel 35 int start_offset = 0;
1760 nigel 41 int g_notempty = 0;
1761 nigel 77 int use_dfa = 0;
1762 nigel 3
1763     options = 0;
1764    
1765 nigel 91 *copynames = 0;
1766     *getnames = 0;
1767    
1768     copynamesptr = copynames;
1769     getnamesptr = getnames;
1770    
1771 nigel 63 pcre_callout = callout;
1772     first_callout = 1;
1773     callout_extra = 0;
1774     callout_count = 0;
1775     callout_fail_count = 999999;
1776     callout_fail_id = -1;
1777 nigel 73 show_malloc = 0;
1778 nigel 63
1779 nigel 91 if (extra != NULL) extra->flags &=
1780     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1781    
1782     len = 0;
1783     for (;;)
1784 nigel 11 {
1785 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1786 nigel 91 {
1787     if (len > 0) break;
1788     done = 1;
1789     goto CONTINUE;
1790     }
1791     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1792     len = (int)strlen((char *)buffer);
1793     if (buffer[len-1] == '\n') break;
1794 nigel 11 }
1795 nigel 3
1796     while (len > 0 && isspace(buffer[len-1])) len--;
1797     buffer[len] = 0;
1798     if (len == 0) break;
1799    
1800     p = buffer;
1801     while (isspace(*p)) p++;
1802    
1803 ph10 147 bptr = q = dbuffer;
1804 nigel 3 while ((c = *p++) != 0)
1805     {
1806     int i = 0;
1807     int n = 0;
1808 nigel 63
1809 nigel 3 if (c == '\\') switch ((c = *p++))
1810     {
1811     case 'a': c = 7; break;
1812     case 'b': c = '\b'; break;
1813     case 'e': c = 27; break;
1814     case 'f': c = '\f'; break;
1815     case 'n': c = '\n'; break;
1816     case 'r': c = '\r'; break;
1817     case 't': c = '\t'; break;
1818     case 'v': c = '\v'; break;
1819    
1820     case '0': case '1': case '2': case '3':
1821     case '4': case '5': case '6': case '7':
1822     c -= '0';
1823     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1824     c = c * 8 + *p++ - '0';
1825 nigel 91
1826     #if !defined NOUTF8
1827     if (use_utf8 && c > 255)
1828     {
1829     unsigned char buff8[8];
1830     int ii, utn;
1831     utn = ord2utf8(c, buff8);
1832     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1833     c = buff8[ii]; /* Last byte */
1834     }
1835     #endif
1836 nigel 3 break;
1837    
1838     case 'x':
1839 nigel 49
1840     /* Handle \x{..} specially - new Perl thing for utf8 */
1841    
1842 nigel 79 #if !defined NOUTF8
1843 nigel 49 if (*p == '{')
1844     {
1845     unsigned char *pt = p;
1846     c = 0;
1847     while (isxdigit(*(++pt)))
1848     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1849     if (*pt == '}')
1850     {
1851 nigel 67 unsigned char buff8[8];
1852 nigel 49 int ii, utn;
1853 ph10 355 if (use_utf8)
1854 ph10 358 {
1855 ph10 355 utn = ord2utf8(c, buff8);
1856     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1857     c = buff8[ii]; /* Last byte */
1858     }
1859     else
1860     {
1861 ph10 358 if (c > 255)
1862 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1863     "UTF-8 mode is not enabled.\n"
1864     "** Truncation will probably give the wrong result.\n", c);
1865 ph10 358 }
1866 nigel 49 p = pt + 1;
1867     break;
1868     }
1869     /* Not correct form; fall through */
1870     }
1871 nigel 79 #endif
1872 nigel 49
1873     /* Ordinary \x */
1874    
1875 nigel 3 c = 0;
1876     while (i++ < 2 && isxdigit(*p))
1877     {
1878     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1879     p++;
1880     }
1881     break;
1882    
1883 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1884 nigel 3 p--;
1885     continue;
1886    
1887 nigel 75 case '>':
1888     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1889     continue;
1890    
1891 nigel 3 case 'A': /* Option setting */
1892     options |= PCRE_ANCHORED;
1893     continue;
1894    
1895     case 'B':
1896     options |= PCRE_NOTBOL;
1897     continue;
1898    
1899 nigel 29 case 'C':
1900 nigel 63 if (isdigit(*p)) /* Set copy string */
1901     {
1902     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1903     copystrings |= 1 << n;
1904     }
1905     else if (isalnum(*p))
1906     {
1907 nigel 91 uschar *npp = copynamesptr;
1908 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1909 nigel 91 *npp++ = 0;
1910 nigel 67 *npp = 0;
1911 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1912 nigel 63 if (n < 0)
1913 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1914     copynamesptr = npp;
1915 nigel 63 }
1916     else if (*p == '+')
1917     {
1918     callout_extra = 1;
1919     p++;
1920     }
1921     else if (*p == '-')
1922     {
1923     pcre_callout = NULL;
1924     p++;
1925     }
1926     else if (*p == '!')
1927     {
1928     callout_fail_id = 0;
1929     p++;
1930     while(isdigit(*p))
1931     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1932     callout_fail_count = 0;
1933     if (*p == '!')
1934     {
1935     p++;
1936     while(isdigit(*p))
1937     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1938     }
1939     }
1940     else if (*p == '*')
1941     {
1942     int sign = 1;
1943     callout_data = 0;
1944     if (*(++p) == '-') { sign = -1; p++; }
1945     while(isdigit(*p))
1946     callout_data = callout_data * 10 + *p++ - '0';
1947     callout_data *= sign;
1948     callout_data_set = 1;
1949     }
1950 nigel 29 continue;
1951    
1952 nigel 79 #if !defined NODFA
1953 nigel 77 case 'D':
1954 nigel 79 #if !defined NOPOSIX
1955 nigel 77 if (posix || do_posix)
1956     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1957     else
1958 nigel 79 #endif
1959 nigel 77 use_dfa = 1;
1960     continue;
1961    
1962     case 'F':
1963     options |= PCRE_DFA_SHORTEST;
1964     continue;
1965 nigel 79 #endif
1966 nigel 77
1967 nigel 29 case 'G':
1968 nigel 63 if (isdigit(*p))
1969     {
1970     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1971     getstrings |= 1 << n;
1972     }
1973     else if (isalnum(*p))
1974     {
1975 nigel 91 uschar *npp = getnamesptr;
1976 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1977 nigel 91 *npp++ = 0;
1978 nigel 67 *npp = 0;
1979 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1980 nigel 63 if (n < 0)
1981 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1982     getnamesptr = npp;
1983 nigel 63 }
1984 nigel 29 continue;
1985    
1986     case 'L':
1987     getlist = 1;
1988     continue;
1989    
1990 nigel 63 case 'M':
1991     find_match_limit = 1;
1992     continue;
1993    
1994 nigel 37 case 'N':
1995 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
1996     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1997 ph10 461 else
1998 ph10 442 options |= PCRE_NOTEMPTY;
1999 nigel 37 continue;
2000    
2001 nigel 3 case 'O':
2002     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2003 nigel 53 if (n > size_offsets_max)
2004     {
2005     size_offsets_max = n;
2006 nigel 57 free(offsets);
2007 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2008 nigel 53 if (offsets == NULL)
2009     {
2010     printf("** Failed to get %d bytes of memory for offsets vector\n",
2011 ph10 151 (int)(size_offsets_max * sizeof(int)));
2012 nigel 77 yield = 1;
2013     goto EXIT;
2014 nigel 53 }
2015     }
2016     use_size_offsets = n;
2017 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2018 nigel 3 continue;
2019    
2020 nigel 75 case 'P':
2021 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2022 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2023 nigel 75 continue;
2024    
2025 nigel 91 case 'Q':
2026     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2027     if (extra == NULL)
2028     {
2029     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2030     extra->flags = 0;
2031     }
2032     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2033     extra->match_limit_recursion = n;
2034     continue;
2035    
2036     case 'q':
2037     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2038     if (extra == NULL)
2039     {
2040     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2041     extra->flags = 0;
2042     }
2043     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2044     extra->match_limit = n;
2045     continue;
2046    
2047 nigel 79 #if !defined NODFA
2048 nigel 77 case 'R':
2049     options |= PCRE_DFA_RESTART;
2050     continue;
2051 nigel 79 #endif
2052 nigel 77
2053 nigel 73 case 'S':
2054     show_malloc = 1;
2055     continue;
2056 ph10 392
2057 ph10 389 case 'Y':
2058     options |= PCRE_NO_START_OPTIMIZE;
2059 ph10 392 continue;
2060 nigel 73
2061 nigel 3 case 'Z':
2062     options |= PCRE_NOTEOL;
2063     continue;
2064 nigel 71
2065     case '?':
2066     options |= PCRE_NO_UTF8_CHECK;
2067     continue;
2068 nigel 91
2069     case '<':
2070     {
2071     int x = check_newline(p, outfile);
2072     if (x == 0) goto NEXT_DATA;
2073     options |= x;
2074     while (*p++ != '>');
2075     }
2076     continue;
2077 nigel 3 }
2078 nigel 9 *q++ = c;
2079 nigel 3 }
2080 nigel 9 *q = 0;
2081     len = q - dbuffer;
2082 ph10 371
2083 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2084 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2085 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2086 ph10 371
2087 ph10 363 #if !defined NOPOSIX
2088     if (posix || do_posix)
2089     {
2090     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2091 ph10 371 bptr += buffer_size - len - 1;
2092 ph10 363 }
2093 ph10 371 else
2094     #endif
2095 ph10 363 {
2096     memmove(bptr + buffer_size - len, bptr, len);
2097 ph10 371 bptr += buffer_size - len;
2098     }
2099 nigel 3
2100 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2101     {
2102     printf("**Match limit not relevant for DFA matching: ignored\n");
2103     find_match_limit = 0;
2104     }
2105    
2106 nigel 3 /* Handle matching via the POSIX interface, which does not
2107 nigel 63 support timing or playing with the match limit or callout data. */
2108 nigel 3
2109 nigel 37 #if !defined NOPOSIX
2110 nigel 3 if (posix || do_posix)
2111     {
2112     int rc;
2113     int eflags = 0;
2114 nigel 63 regmatch_t *pmatch = NULL;
2115     if (use_size_offsets > 0)
2116 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2117 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2118     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2119 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2120 nigel 3
2121 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2122 nigel 3
2123     if (rc != 0)
2124     {
2125 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2126 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2127     }
2128 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2129     != 0)
2130     {
2131     fprintf(outfile, "Matched with REG_NOSUB\n");
2132     }
2133 nigel 3 else
2134     {
2135 nigel 7 size_t i;
2136 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2137 nigel 3 {
2138     if (pmatch[i].rm_so >= 0)
2139     {
2140 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2141 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2142     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2143 nigel 3 fprintf(outfile, "\n");
2144 nigel 35 if (i == 0 && do_showrest)
2145     {
2146     fprintf(outfile, " 0+ ");
2147 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2148     outfile);
2149 nigel 35 fprintf(outfile, "\n");
2150     }
2151 nigel 3 }
2152     }
2153     }
2154 nigel 53 free(pmatch);
2155 nigel 3 }
2156    
2157 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2158 nigel 3
2159 nigel 37 else
2160     #endif /* !defined NOPOSIX */
2161    
2162 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2163 nigel 3 {
2164 ph10 512 markptr = NULL;
2165    
2166 nigel 93 if (timeitm > 0)
2167 nigel 3 {
2168     register int i;
2169     clock_t time_taken;
2170     clock_t start_time = clock();
2171 nigel 77
2172 nigel 79 #if !defined NODFA
2173 nigel 77 if (all_use_dfa || use_dfa)
2174     {
2175     int workspace[1000];
2176 nigel 93 for (i = 0; i < timeitm; i++)
2177 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2178 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2179     sizeof(workspace)/sizeof(int));
2180     }
2181     else
2182 nigel 79 #endif
2183 nigel 77
2184 nigel 93 for (i = 0; i < timeitm; i++)
2185 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2186 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2187 nigel 77
2188 nigel 3 time_taken = clock() - start_time;
2189 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2190     (((double)time_taken * 1000.0) / (double)timeitm) /
2191 nigel 63 (double)CLOCKS_PER_SEC);
2192 nigel 3 }
2193    
2194 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2195 nigel 87 varying limits in order to find the minimum value for the match limit and
2196     for the recursion limit. */
2197 nigel 63
2198     if (find_match_limit)
2199     {
2200     if (extra == NULL)
2201     {
2202 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2203 nigel 63 extra->flags = 0;
2204     }
2205    
2206 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2207 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2208     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2209     PCRE_ERROR_MATCHLIMIT, "match()");
2210 nigel 63
2211 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2212     options|g_notempty, use_offsets, use_size_offsets,
2213     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2214     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2215 nigel 63 }
2216    
2217     /* If callout_data is set, use the interface with additional data */
2218    
2219     else if (callout_data_set)
2220     {
2221     if (extra == NULL)
2222     {
2223 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2224 nigel 63 extra->flags = 0;
2225     }
2226     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2227 nigel 71 extra->callout_data = &callout_data;
2228 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2229     options | g_notempty, use_offsets, use_size_offsets);
2230     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2231     }
2232    
2233     /* The normal case is just to do the match once, with the default
2234     value of match_limit. */
2235    
2236 nigel 79 #if !defined NODFA
2237 nigel 77 else if (all_use_dfa || use_dfa)
2238     {
2239     int workspace[1000];
2240 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2241 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2242     sizeof(workspace)/sizeof(int));
2243     if (count == 0)
2244     {
2245     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2246     count = use_size_offsets/2;
2247     }
2248     }
2249 nigel 79 #endif
2250 nigel 77
2251 nigel 75 else
2252     {
2253     count = pcre_exec(re, extra, (char *)bptr, len,
2254     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2255 nigel 77 if (count == 0)
2256     {
2257     fprintf(outfile, "Matched, but too many substrings\n");
2258     count = use_size_offsets/3;
2259     }
2260 nigel 75 }
2261 nigel 3
2262 nigel 39 /* Matched */
2263    
2264 nigel 3 if (count >= 0)
2265     {
2266 nigel 93 int i, maxcount;
2267    
2268     #if !defined NODFA
2269     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2270     #endif
2271     maxcount = use_size_offsets/3;
2272    
2273     /* This is a check against a lunatic return value. */
2274    
2275     if (count > maxcount)
2276     {
2277     fprintf(outfile,
2278     "** PCRE error: returned count %d is too big for offset size %d\n",
2279     count, use_size_offsets);
2280     count = use_size_offsets/3;
2281     if (do_g || do_G)
2282     {
2283     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2284     do_g = do_G = FALSE; /* Break g/G loop */
2285     }
2286     }
2287    
2288 nigel 29 for (i = 0; i < count * 2; i += 2)
2289 nigel 3 {
2290 nigel 57 if (use_offsets[i] < 0)
2291 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2292     else
2293     {
2294     fprintf(outfile, "%2d: ", i/2);
2295 nigel 63 (void)pchars(bptr + use_offsets[i],
2296     use_offsets[i+1] - use_offsets[i], outfile);
2297 nigel 3 fprintf(outfile, "\n");
2298 nigel 35 if (i == 0)
2299     {
2300     if (do_showrest)
2301     {
2302     fprintf(outfile, " 0+ ");
2303 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2304     outfile);
2305 nigel 35 fprintf(outfile, "\n");
2306     }
2307     }
2308 nigel 3 }
2309     }
2310 ph10 512
2311 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2312 nigel 29
2313     for (i = 0; i < 32; i++)
2314     {
2315     if ((copystrings & (1 << i)) != 0)
2316     {
2317 nigel 91 char copybuffer[256];
2318 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2319 nigel 37 i, copybuffer, sizeof(copybuffer));
2320 nigel 29 if (rc < 0)
2321     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2322     else
2323 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2324 nigel 29 }
2325     }
2326    
2327 nigel 91 for (copynamesptr = copynames;
2328     *copynamesptr != 0;
2329     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2330     {
2331     char copybuffer[256];
2332     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2333     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2334     if (rc < 0)
2335     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2336     else
2337     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2338     }
2339    
2340 nigel 29 for (i = 0; i < 32; i++)
2341     {
2342     if ((getstrings & (1 << i)) != 0)
2343     {
2344     const char *substring;
2345 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2346 nigel 29 i, &substring);
2347     if (rc < 0)
2348     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2349     else
2350     {
2351     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2352 nigel 49 pcre_free_substring(substring);
2353 nigel 29 }
2354     }
2355     }
2356    
2357 nigel 91 for (getnamesptr = getnames;
2358     *getnamesptr != 0;
2359     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2360     {
2361     const char *substring;
2362     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2363     count, (char *)getnamesptr, &substring);
2364     if (rc < 0)
2365     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2366     else
2367     {
2368     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2369     pcre_free_substring(substring);
2370     }
2371     }
2372    
2373 nigel 29 if (getlist)
2374     {
2375     const char **stringlist;
2376 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2377 nigel 29 &stringlist);
2378     if (rc < 0)
2379     fprintf(outfile, "get substring list failed %d\n", rc);
2380     else
2381     {
2382     for (i = 0; i < count; i++)
2383     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2384     if (stringlist[i] != NULL)
2385     fprintf(outfile, "string list not terminated by NULL\n");
2386 nigel 49 /* free((void *)stringlist); */
2387     pcre_free_substring_list(stringlist);
2388 nigel 29 }
2389     }
2390 nigel 39 }
2391 nigel 29
2392 nigel 75 /* There was a partial match */
2393    
2394     else if (count == PCRE_ERROR_PARTIAL)
2395     {
2396 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2397     else fprintf(outfile, "Partial match, mark=%s", markptr);
2398 ph10 426 if (use_size_offsets > 1)
2399     {
2400     fprintf(outfile, ": ");
2401     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2402 ph10 461 outfile);
2403     }
2404 nigel 77 fprintf(outfile, "\n");
2405 nigel 75 break; /* Out of the /g loop */
2406     }
2407    
2408 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2409 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2410     to advance the start offset, and continue. We won't be at the end of the
2411     string - that was checked before setting g_notempty.
2412 nigel 39
2413 ph10 150 Complication arises in the case when the newline option is "any" or
2414 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2415     CRLF, an advance of one character just passes the \r, whereas we should
2416     prefer the longer newline sequence, as does the code in pcre_exec().
2417     Fudge the offset value to achieve this.
2418 ph10 144
2419 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2420     character, not one byte. */
2421    
2422 nigel 3 else
2423     {
2424 nigel 41 if (g_notempty != 0)
2425 nigel 35 {
2426 nigel 73 int onechar = 1;
2427 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2428 nigel 57 use_offsets[0] = start_offset;
2429 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2430     {
2431     int d;
2432     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2433 ph10 391 /* Note that these values are always the ASCII ones, even in
2434     EBCDIC environments. CR = 13, NL = 10. */
2435     obits = (d == 13)? PCRE_NEWLINE_CR :
2436     (d == 10)? PCRE_NEWLINE_LF :
2437     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2438 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2439 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2440     }
2441 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2442 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2443 ph10 149 &&
2444 ph10 143 start_offset < len - 1 &&
2445     bptr[start_offset] == '\r' &&
2446     bptr[start_offset+1] == '\n')
2447 ph10 144 onechar++;
2448 ph10 143 else if (use_utf8)
2449 nigel 73 {
2450     while (start_offset + onechar < len)
2451     {
2452     int tb = bptr[start_offset+onechar];
2453     if (tb <= 127) break;
2454     tb &= 0xc0;
2455     if (tb != 0 && tb != 0xc0) onechar++;
2456     }
2457     }
2458     use_offsets[1] = start_offset + onechar;
2459 nigel 35 }
2460 nigel 41 else
2461     {
2462 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2463 nigel 41 {
2464 ph10 512 if (gmatched == 0)
2465 ph10 510 {
2466     if (markptr == NULL) fprintf(outfile, "No match\n");
2467     else fprintf(outfile, "No match, mark = %s\n", markptr);
2468 ph10 512 }
2469 nigel 41 }
2470 nigel 73 else fprintf(outfile, "Error %d\n", count);
2471 nigel 41 break; /* Out of the /g loop */
2472     }
2473 nigel 3 }
2474 nigel 35
2475 nigel 39 /* If not /g or /G we are done */
2476    
2477     if (!do_g && !do_G) break;
2478    
2479 nigel 41 /* If we have matched an empty string, first check to see if we are at
2480 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2481     Perl's /g options does. This turns out to be rather cunning. First we set
2482     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2483 nigel 47 same point. If this fails (picked up above) we advance to the next
2484 ph10 143 character. */
2485 ph10 142
2486 nigel 41 g_notempty = 0;
2487 ph10 142
2488 nigel 57 if (use_offsets[0] == use_offsets[1])
2489 nigel 41 {
2490 nigel 57 if (use_offsets[0] == len) break;
2491 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2492 nigel 41 }
2493 nigel 39
2494     /* For /g, update the start offset, leaving the rest alone */
2495    
2496 ph10 143 if (do_g) start_offset = use_offsets[1];
2497 nigel 39
2498     /* For /G, update the pointer and length */
2499    
2500     else
2501 nigel 35 {
2502 ph10 143 bptr += use_offsets[1];
2503     len -= use_offsets[1];
2504 nigel 35 }
2505 nigel 39 } /* End of loop for /g and /G */
2506 nigel 91
2507     NEXT_DATA: continue;
2508 nigel 39 } /* End of loop for data lines */
2509 nigel 3
2510 nigel 11 CONTINUE:
2511 nigel 37
2512     #if !defined NOPOSIX
2513 nigel 3 if (posix || do_posix) regfree(&preg);
2514 nigel 37 #endif
2515    
2516 nigel 77 if (re != NULL) new_free(re);
2517     if (extra != NULL) new_free(extra);
2518 nigel 25 if (tables != NULL)
2519     {
2520 nigel 77 new_free((void *)tables);
2521 nigel 25 setlocale(LC_CTYPE, "C");
2522 nigel 93 locale_set = 0;
2523 nigel 25 }
2524 nigel 3 }
2525    
2526 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2527 nigel 77
2528     EXIT:
2529    
2530     if (infile != NULL && infile != stdin) fclose(infile);
2531     if (outfile != NULL && outfile != stdout) fclose(outfile);
2532    
2533     free(buffer);
2534     free(dbuffer);
2535     free(pbuffer);
2536     free(offsets);
2537    
2538     return yield;
2539 nigel 3 }
2540    
2541 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12