/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 412 - (hide annotations) (download)
Sat Apr 11 10:34:37 2009 UTC (5 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 73594 byte(s)
Add support for (*UTF8).

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 nigel 93 #else
83     #include <sys/time.h> /* These two includes are needed */
84     #include <sys/resource.h> /* for setrlimit(). */
85     #define INPUT_MODE "rb"
86     #define OUTPUT_MODE "wb"
87 nigel 91 #endif
88    
89 nigel 93
90 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
91     displaying the results of pcre_study() and we also need to know about the
92     internal macros, structures, and other internal data values; pcretest has
93     "inside information" compared to a program that strictly follows the PCRE API.
94 nigel 37
95 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97     appropriately for an application, not for building PCRE. */
98 nigel 77
99 ph10 145 #include "pcre.h"
100 nigel 77 #include "pcre_internal.h"
101    
102 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
103     to keep two copies, we include the source file here, changing the names of the
104     external symbols to prevent clashes. */
105 nigel 77
106 ph10 351 #define _pcre_ucp_gentype ucp_gentype
107 nigel 85 #define _pcre_utf8_table1 utf8_table1
108     #define _pcre_utf8_table1_size utf8_table1_size
109     #define _pcre_utf8_table2 utf8_table2
110     #define _pcre_utf8_table3 utf8_table3
111     #define _pcre_utf8_table4 utf8_table4
112     #define _pcre_utt utt
113     #define _pcre_utt_size utt_size
114 ph10 240 #define _pcre_utt_names utt_names
115 nigel 85 #define _pcre_OP_lengths OP_lengths
116    
117     #include "pcre_tables.c"
118    
119     /* We also need the pcre_printint() function for printing out compiled
120     patterns. This function is in a separate file so that it can be included in
121 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
122 nigel 85
123 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
124     output character as-is or as a hex value when showing compiled patterns, is
125     contained in this file. We uses it here also, in cases when the locale has not
126     been explicitly changed, so as to get consistent output from systems that
127     differ in their output from isprint() even in the "C" locale. */
128    
129 nigel 85 #include "pcre_printint.src"
130    
131 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
132 nigel 85
133 nigel 93
134 nigel 37 /* It is possible to compile this test program without including support for
135     testing the POSIX interface, though this is not available via the standard
136     Makefile. */
137    
138     #if !defined NOPOSIX
139 nigel 3 #include "pcreposix.h"
140 nigel 37 #endif
141 nigel 3
142 ph10 107 /* It is also possible, for the benefit of the version currently imported into
143     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
144     interface to the DFA matcher (NODFA), and without the doublecheck of the old
145     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
146     UTF8 support if PCRE is built without it. */
147 nigel 79
148 ph10 107 #ifndef SUPPORT_UTF8
149     #ifndef NOUTF8
150     #define NOUTF8
151     #endif
152     #endif
153 nigel 79
154 ph10 107
155 nigel 85 /* Other parameters */
156    
157 nigel 3 #ifndef CLOCKS_PER_SEC
158     #ifdef CLK_TCK
159     #define CLOCKS_PER_SEC CLK_TCK
160     #else
161     #define CLOCKS_PER_SEC 100
162     #endif
163     #endif
164    
165 nigel 93 /* This is the default loop count for timing. */
166    
167 nigel 75 #define LOOPREPEAT 500000
168 nigel 3
169 nigel 85 /* Static variables */
170    
171 nigel 3 static FILE *outfile;
172     static int log_store = 0;
173 nigel 63 static int callout_count;
174     static int callout_extra;
175     static int callout_fail_count;
176     static int callout_fail_id;
177 ph10 210 static int debug_lengths;
178 nigel 63 static int first_callout;
179 nigel 93 static int locale_set = 0;
180 nigel 73 static int show_malloc;
181 nigel 67 static int use_utf8;
182 nigel 43 static size_t gotten_store;
183 nigel 3
184 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
185    
186     static int buffer_size = 50000;
187     static uschar *buffer = NULL;
188     static uschar *dbuffer = NULL;
189 nigel 75 static uschar *pbuffer = NULL;
190 nigel 3
191 nigel 75
192 nigel 49
193     /*************************************************
194 nigel 91 * Read or extend an input line *
195     *************************************************/
196    
197     /* Input lines are read into buffer, but both patterns and data lines can be
198     continued over multiple input lines. In addition, if the buffer fills up, we
199     want to automatically expand it so as to be able to handle extremely large
200     lines that are needed for certain stress tests. When the input buffer is
201     expanded, the other two buffers must also be expanded likewise, and the
202     contents of pbuffer, which are a copy of the input for callouts, must be
203     preserved (for when expansion happens for a data line). This is not the most
204     optimal way of handling this, but hey, this is just a test program!
205    
206     Arguments:
207     f the file to read
208     start where in buffer to start (this *must* be within buffer)
209 ph10 287 prompt for stdin or readline()
210 nigel 91
211     Returns: pointer to the start of new data
212     could be a copy of start, or could be moved
213     NULL if no data read and EOF reached
214     */
215    
216     static uschar *
217 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
218 nigel 91 {
219     uschar *here = start;
220    
221     for (;;)
222     {
223     int rlen = buffer_size - (here - buffer);
224 nigel 93
225 nigel 91 if (rlen > 1000)
226     {
227     int dlen;
228 ph10 289
229 ph10 287 /* If libreadline support is required, use readline() to read a line if the
230     input is a terminal. Note that readline() removes the trailing newline, so
231     we must put it back again, to be compatible with fgets(). */
232 ph10 289
233 ph10 287 #ifdef SUPPORT_LIBREADLINE
234     if (isatty(fileno(f)))
235     {
236 ph10 289 size_t len;
237 ph10 287 char *s = readline(prompt);
238     if (s == NULL) return (here == start)? NULL : start;
239     len = strlen(s);
240 ph10 289 if (len > 0) add_history(s);
241 ph10 287 if (len > rlen - 1) len = rlen - 1;
242     memcpy(here, s, len);
243     here[len] = '\n';
244 ph10 289 here[len+1] = 0;
245     free(s);
246 ph10 287 }
247 ph10 289 else
248     #endif
249    
250 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
251 ph10 289
252 ph10 287 {
253 ph10 289 if (f == stdin) printf(prompt);
254 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
255     return (here == start)? NULL : start;
256 ph10 289 }
257    
258 nigel 91 dlen = (int)strlen((char *)here);
259     if (dlen > 0 && here[dlen - 1] == '\n') return start;
260     here += dlen;
261     }
262    
263     else
264     {
265     int new_buffer_size = 2*buffer_size;
266     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
267     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
268     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
269    
270     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
271     {
272     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
273     exit(1);
274     }
275    
276     memcpy(new_buffer, buffer, buffer_size);
277     memcpy(new_pbuffer, pbuffer, buffer_size);
278    
279     buffer_size = new_buffer_size;
280    
281     start = new_buffer + (start - buffer);
282     here = new_buffer + (here - buffer);
283    
284     free(buffer);
285     free(dbuffer);
286     free(pbuffer);
287    
288     buffer = new_buffer;
289     dbuffer = new_dbuffer;
290     pbuffer = new_pbuffer;
291     }
292     }
293    
294     return NULL; /* Control never gets here */
295     }
296    
297    
298    
299    
300    
301    
302    
303     /*************************************************
304 nigel 63 * Read number from string *
305     *************************************************/
306    
307     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
308     around with conditional compilation, just do the job by hand. It is only used
309 nigel 93 for unpicking arguments, so just keep it simple.
310 nigel 63
311     Arguments:
312     str string to be converted
313     endptr where to put the end pointer
314    
315     Returns: the unsigned long
316     */
317    
318     static int
319     get_value(unsigned char *str, unsigned char **endptr)
320     {
321     int result = 0;
322     while(*str != 0 && isspace(*str)) str++;
323     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
324     *endptr = str;
325     return(result);
326     }
327    
328    
329    
330 nigel 49
331     /*************************************************
332     * Convert UTF-8 string to value *
333     *************************************************/
334    
335     /* This function takes one or more bytes that represents a UTF-8 character,
336     and returns the value of the character.
337    
338     Argument:
339 nigel 91 utf8bytes a pointer to the byte vector
340     vptr a pointer to an int to receive the value
341 nigel 49
342 nigel 91 Returns: > 0 => the number of bytes consumed
343     -6 to 0 => malformed UTF-8 character at offset = (-return)
344 nigel 49 */
345    
346 nigel 79 #if !defined NOUTF8
347    
348 nigel 67 static int
349 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
350 nigel 49 {
351 nigel 91 int c = *utf8bytes++;
352 nigel 49 int d = c;
353     int i, j, s;
354    
355     for (i = -1; i < 6; i++) /* i is number of additional bytes */
356     {
357     if ((d & 0x80) == 0) break;
358     d <<= 1;
359     }
360    
361     if (i == -1) { *vptr = c; return 1; } /* ascii character */
362     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
363    
364     /* i now has a value in the range 1-5 */
365    
366 nigel 59 s = 6*i;
367 nigel 85 d = (c & utf8_table3[i]) << s;
368 nigel 49
369     for (j = 0; j < i; j++)
370     {
371 nigel 91 c = *utf8bytes++;
372 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
373 nigel 59 s -= 6;
374 nigel 49 d |= (c & 0x3f) << s;
375     }
376    
377     /* Check that encoding was the correct unique one */
378    
379 nigel 85 for (j = 0; j < utf8_table1_size; j++)
380     if (d <= utf8_table1[j]) break;
381 nigel 49 if (j != i) return -(i+1);
382    
383     /* Valid value */
384    
385     *vptr = d;
386     return i+1;
387     }
388    
389 nigel 79 #endif
390 nigel 49
391    
392 nigel 79
393 nigel 63 /*************************************************
394 nigel 85 * Convert character value to UTF-8 *
395     *************************************************/
396    
397     /* This function takes an integer value in the range 0 - 0x7fffffff
398     and encodes it as a UTF-8 character in 0 to 6 bytes.
399    
400     Arguments:
401     cvalue the character value
402 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
403 nigel 85
404     Returns: number of characters placed in the buffer
405     */
406    
407 nigel 93 #if !defined NOUTF8
408    
409 nigel 85 static int
410 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
411 nigel 85 {
412     register int i, j;
413     for (i = 0; i < utf8_table1_size; i++)
414     if (cvalue <= utf8_table1[i]) break;
415 nigel 91 utf8bytes += i;
416 nigel 85 for (j = i; j > 0; j--)
417     {
418 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
419 nigel 85 cvalue >>= 6;
420     }
421 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
422 nigel 85 return i + 1;
423     }
424    
425 nigel 93 #endif
426 nigel 85
427    
428 nigel 93
429 nigel 85 /*************************************************
430 nigel 63 * Print character string *
431     *************************************************/
432 nigel 49
433 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
434     mode. Yields number of characters printed. If handed a NULL file, just counts
435     chars without printing. */
436 nigel 49
437 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
438 nigel 3 {
439 nigel 85 int c = 0;
440 nigel 63 int yield = 0;
441 nigel 3
442 nigel 63 while (length-- > 0)
443 nigel 3 {
444 nigel 79 #if !defined NOUTF8
445 nigel 67 if (use_utf8)
446 nigel 63 {
447     int rc = utf82ord(p, &c);
448 nigel 3
449 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
450     {
451     length -= rc - 1;
452     p += rc;
453 nigel 93 if (PRINTHEX(c))
454 nigel 63 {
455     if (f != NULL) fprintf(f, "%c", c);
456     yield++;
457     }
458     else
459     {
460 nigel 93 int n = 4;
461     if (f != NULL) fprintf(f, "\\x{%02x}", c);
462     yield += (n <= 0x000000ff)? 2 :
463     (n <= 0x00000fff)? 3 :
464     (n <= 0x0000ffff)? 4 :
465     (n <= 0x000fffff)? 5 : 6;
466 nigel 63 }
467     continue;
468     }
469     }
470 nigel 79 #endif
471 nigel 3
472 nigel 63 /* Not UTF-8, or malformed UTF-8 */
473    
474 nigel 93 c = *p++;
475     if (PRINTHEX(c))
476 nigel 3 {
477 nigel 63 if (f != NULL) fprintf(f, "%c", c);
478     yield++;
479 nigel 3 }
480 nigel 63 else
481 nigel 3 {
482 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
483     yield += 4;
484     }
485     }
486 nigel 3
487 nigel 63 return yield;
488     }
489 nigel 23
490 nigel 3
491 nigel 23
492 nigel 63 /*************************************************
493     * Callout function *
494     *************************************************/
495 nigel 3
496 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
497     the match. Yield zero unless more callouts than the fail count, or the callout
498     data is not zero. */
499 nigel 3
500 nigel 63 static int callout(pcre_callout_block *cb)
501     {
502     FILE *f = (first_callout | callout_extra)? outfile : NULL;
503 nigel 75 int i, pre_start, post_start, subject_length;
504 nigel 3
505 nigel 63 if (callout_extra)
506     {
507     fprintf(f, "Callout %d: last capture = %d\n",
508     cb->callout_number, cb->capture_last);
509 nigel 3
510 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
511     {
512     if (cb->offset_vector[i] < 0)
513     fprintf(f, "%2d: <unset>\n", i/2);
514     else
515     {
516     fprintf(f, "%2d: ", i/2);
517     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
518     cb->offset_vector[i+1] - cb->offset_vector[i], f);
519     fprintf(f, "\n");
520     }
521     }
522     }
523 nigel 3
524 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
525     datails. On subsequent calls in the same match, we use pchars just to find the
526     printed lengths of the substrings. */
527 nigel 3
528 nigel 63 if (f != NULL) fprintf(f, "--->");
529 nigel 3
530 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
531     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
532     cb->current_position - cb->start_match, f);
533 nigel 3
534 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
535    
536 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
537     cb->subject_length - cb->current_position, f);
538 nigel 3
539 nigel 63 if (f != NULL) fprintf(f, "\n");
540 nigel 9
541 nigel 63 /* Always print appropriate indicators, with callout number if not already
542 nigel 75 shown. For automatic callouts, show the pattern offset. */
543 nigel 3
544 nigel 75 if (cb->callout_number == 255)
545     {
546     fprintf(outfile, "%+3d ", cb->pattern_position);
547     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
548     }
549     else
550     {
551     if (callout_extra) fprintf(outfile, " ");
552     else fprintf(outfile, "%3d ", cb->callout_number);
553     }
554 nigel 3
555 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
556     fprintf(outfile, "^");
557 nigel 3
558 nigel 63 if (post_start > 0)
559     {
560     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
561     fprintf(outfile, "^");
562 nigel 3 }
563    
564 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
565     fprintf(outfile, " ");
566    
567     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
568     pbuffer + cb->pattern_position);
569    
570 nigel 63 fprintf(outfile, "\n");
571     first_callout = 0;
572 nigel 3
573 nigel 71 if (cb->callout_data != NULL)
574 nigel 49 {
575 nigel 71 int callout_data = *((int *)(cb->callout_data));
576     if (callout_data != 0)
577     {
578     fprintf(outfile, "Callout data = %d\n", callout_data);
579     return callout_data;
580     }
581 nigel 63 }
582 nigel 49
583 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
584     (++callout_count >= callout_fail_count)? 1 : 0;
585 nigel 3 }
586    
587    
588 nigel 63 /*************************************************
589 nigel 73 * Local malloc functions *
590 nigel 63 *************************************************/
591 nigel 3
592     /* Alternative malloc function, to test functionality and show the size of the
593     compiled re. */
594    
595     static void *new_malloc(size_t size)
596     {
597 nigel 73 void *block = malloc(size);
598 nigel 43 gotten_store = size;
599 nigel 73 if (show_malloc)
600 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
601 nigel 73 return block;
602 nigel 3 }
603    
604 nigel 73 static void new_free(void *block)
605     {
606     if (show_malloc)
607     fprintf(outfile, "free %p\n", block);
608     free(block);
609     }
610 nigel 3
611    
612 nigel 73 /* For recursion malloc/free, to test stacking calls */
613    
614     static void *stack_malloc(size_t size)
615     {
616     void *block = malloc(size);
617     if (show_malloc)
618 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
619 nigel 73 return block;
620     }
621    
622     static void stack_free(void *block)
623     {
624     if (show_malloc)
625     fprintf(outfile, "stack_free %p\n", block);
626     free(block);
627     }
628    
629    
630 nigel 63 /*************************************************
631     * Call pcre_fullinfo() *
632     *************************************************/
633 nigel 43
634     /* Get one piece of information from the pcre_fullinfo() function */
635    
636     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
637     {
638     int rc;
639     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
640     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
641     }
642    
643    
644    
645 nigel 63 /*************************************************
646 nigel 75 * Byte flipping function *
647     *************************************************/
648    
649 nigel 91 static unsigned long int
650     byteflip(unsigned long int value, int n)
651 nigel 75 {
652     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
653     return ((value & 0x000000ff) << 24) |
654     ((value & 0x0000ff00) << 8) |
655     ((value & 0x00ff0000) >> 8) |
656     ((value & 0xff000000) >> 24);
657     }
658    
659    
660    
661    
662     /*************************************************
663 nigel 87 * Check match or recursion limit *
664     *************************************************/
665    
666     static int
667     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
668     int start_offset, int options, int *use_offsets, int use_size_offsets,
669     int flag, unsigned long int *limit, int errnumber, const char *msg)
670     {
671     int count;
672     int min = 0;
673     int mid = 64;
674     int max = -1;
675    
676     extra->flags |= flag;
677    
678     for (;;)
679     {
680     *limit = mid;
681    
682     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
683     use_offsets, use_size_offsets);
684    
685     if (count == errnumber)
686     {
687     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
688     min = mid;
689     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
690     }
691    
692     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
693     count == PCRE_ERROR_PARTIAL)
694     {
695     if (mid == min + 1)
696     {
697     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
698     break;
699     }
700     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
701     max = mid;
702     mid = (min + mid)/2;
703     }
704     else break; /* Some other error */
705     }
706    
707     extra->flags &= ~flag;
708     return count;
709     }
710    
711    
712    
713     /*************************************************
714 ph10 227 * Case-independent strncmp() function *
715     *************************************************/
716    
717     /*
718     Arguments:
719     s first string
720     t second string
721     n number of characters to compare
722    
723     Returns: < 0, = 0, or > 0, according to the comparison
724     */
725    
726     static int
727     strncmpic(uschar *s, uschar *t, int n)
728     {
729     while (n--)
730     {
731     int c = tolower(*s++) - tolower(*t++);
732     if (c) return c;
733     }
734     return 0;
735     }
736    
737    
738    
739     /*************************************************
740 nigel 91 * Check newline indicator *
741     *************************************************/
742    
743     /* This is used both at compile and run-time to check for <xxx> escapes, where
744 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
745     no match.
746 nigel 91
747     Arguments:
748     p points after the leading '<'
749     f file for error message
750    
751     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752     */
753    
754     static int
755     check_newline(uschar *p, FILE *f)
756     {
757 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
765     return 0;
766     }
767    
768    
769    
770     /*************************************************
771 nigel 93 * Usage function *
772     *************************************************/
773    
774     static void
775     usage(void)
776     {
777 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778     printf("Input and output default to stdin and stdout.\n");
779     #ifdef SUPPORT_LIBREADLINE
780     printf("If input is a terminal, readline() is used to read from it.\n");
781     #else
782     printf("This version of pcretest is not linked with readline().\n");
783     #endif
784     printf("\nOptions:\n");
785 nigel 93 printf(" -b show compiled code (bytecode)\n");
786     printf(" -C show PCRE compile-time options and exit\n");
787     printf(" -d debug: show compiled code and information (-b and -i)\n");
788     #if !defined NODFA
789     printf(" -dfa force DFA matching for all subjects\n");
790     #endif
791     printf(" -help show usage information\n");
792     printf(" -i show information about compiled patterns\n"
793 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
794 nigel 93 " -m output memory used information\n"
795     " -o <n> set size of offsets vector to <n>\n");
796     #if !defined NOPOSIX
797     printf(" -p use POSIX interface\n");
798     #endif
799     printf(" -q quiet: do not output PCRE version number at start\n");
800     printf(" -S <n> set stack size to <n> megabytes\n");
801     printf(" -s output store (memory) used information\n"
802     " -t time compilation and execution\n");
803     printf(" -t <n> time compilation and execution, repeating <n> times\n");
804     printf(" -tm time execution (matching) only\n");
805     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806     }
807    
808    
809    
810     /*************************************************
811 nigel 63 * Main Program *
812     *************************************************/
813 nigel 43
814 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
815     consist of a regular expression, in delimiters and optionally followed by
816     options, followed by a set of test data, terminated by an empty line. */
817    
818     int main(int argc, char **argv)
819     {
820     FILE *infile = stdin;
821     int options = 0;
822     int study_options = 0;
823 ph10 386 int default_find_match_limit = FALSE;
824 nigel 3 int op = 1;
825     int timeit = 0;
826 nigel 93 int timeitm = 0;
827 nigel 3 int showinfo = 0;
828 nigel 31 int showstore = 0;
829 nigel 87 int quiet = 0;
830 nigel 53 int size_offsets = 45;
831     int size_offsets_max;
832 nigel 77 int *offsets = NULL;
833 nigel 53 #if !defined NOPOSIX
834 nigel 3 int posix = 0;
835 nigel 53 #endif
836 nigel 3 int debug = 0;
837 nigel 11 int done = 0;
838 nigel 77 int all_use_dfa = 0;
839     int yield = 0;
840 nigel 91 int stack_size;
841 nigel 3
842 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
843     that 1024 is plenty long enough for the few names we'll be testing. */
844 nigel 69
845 nigel 91 uschar copynames[1024];
846     uschar getnames[1024];
847    
848     uschar *copynamesptr;
849     uschar *getnamesptr;
850    
851 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
853 nigel 69
854 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
855     dbuffer = (unsigned char *)malloc(buffer_size);
856     pbuffer = (unsigned char *)malloc(buffer_size);
857 nigel 69
858 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
859 nigel 3
860 nigel 93 outfile = stdout;
861    
862     /* The following _setmode() stuff is some Windows magic that tells its runtime
863     library to translate CRLF into a single LF character. At least, that's what
864     I've been told: never having used Windows I take this all on trust. Originally
865     it set 0x8000, but then I was advised that _O_BINARY was better. */
866    
867 nigel 75 #if defined(_WIN32) || defined(WIN32)
868 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
869     #endif
870 nigel 75
871 nigel 3 /* Scan options */
872    
873     while (argc > 1 && argv[op][0] == '-')
874     {
875 nigel 63 unsigned char *endptr;
876 nigel 53
877 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878     showstore = 1;
879 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 nigel 79 #if !defined NODFA
885 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 nigel 79 #endif
887 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889     *endptr == 0))
890 nigel 53 {
891     op++;
892     argc--;
893     }
894 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895     {
896     int both = argv[op][2] == 0;
897     int temp;
898     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899     *endptr == 0))
900     {
901     timeitm = temp;
902     op++;
903     argc--;
904     }
905     else timeitm = LOOPREPEAT;
906     if (both) timeit = timeitm;
907     }
908 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910     *endptr == 0))
911     {
912 nigel 93 #if defined(_WIN32) || defined(WIN32)
913 nigel 91 printf("PCRE: -S not supported on this OS\n");
914     exit(1);
915     #else
916     int rc;
917     struct rlimit rlim;
918     getrlimit(RLIMIT_STACK, &rlim);
919     rlim.rlim_cur = stack_size * 1024 * 1024;
920     rc = setrlimit(RLIMIT_STACK, &rlim);
921     if (rc != 0)
922     {
923     printf("PCRE: setrlimit() failed with error %d\n", rc);
924     exit(1);
925     }
926     op++;
927     argc--;
928     #endif
929     }
930 nigel 53 #if !defined NOPOSIX
931 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 nigel 53 #endif
933 nigel 63 else if (strcmp(argv[op], "-C") == 0)
934     {
935     int rc;
936 ph10 392 unsigned long int lrc;
937 nigel 63 printf("PCRE version %s\n", pcre_version());
938     printf("Compiled with\n");
939     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940     printf(" %sUTF-8 support\n", rc? "" : "No ");
941 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942     printf(" %sUnicode properties support\n", rc? "" : "No ");
943 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 ph10 391 /* Note that these values are always the ASCII values, even
945 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
946 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 ph10 150 (rc == -2)? "ANYCRLF" :
949 nigel 93 (rc == -1)? "ANY" : "???");
950 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952     "all Unicode newlines");
953 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954     printf(" Internal link size = %d\n", rc);
955     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956     printf(" POSIX malloc threshold = %d\n", rc);
957 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958     printf(" Default match limit = %ld\n", lrc);
959     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960     printf(" Default recursion depth limit = %ld\n", lrc);
961 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 ph10 121 goto EXIT;
964 nigel 63 }
965 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
966     strcmp(argv[op], "--help") == 0)
967     {
968     usage();
969     goto EXIT;
970     }
971 nigel 3 else
972     {
973 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
974 nigel 93 usage();
975 nigel 77 yield = 1;
976     goto EXIT;
977 nigel 3 }
978     op++;
979     argc--;
980     }
981    
982 nigel 53 /* Get the store for the offsets vector, and remember what it was */
983    
984     size_offsets_max = size_offsets;
985 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 nigel 53 if (offsets == NULL)
987     {
988     printf("** Failed to get %d bytes of memory for offsets vector\n",
989 ph10 151 (int)(size_offsets_max * sizeof(int)));
990 nigel 77 yield = 1;
991     goto EXIT;
992 nigel 53 }
993    
994 nigel 3 /* Sort out the input and output files */
995    
996     if (argc > 1)
997     {
998 nigel 93 infile = fopen(argv[op], INPUT_MODE);
999 nigel 3 if (infile == NULL)
1000     {
1001     printf("** Failed to open %s\n", argv[op]);
1002 nigel 77 yield = 1;
1003     goto EXIT;
1004 nigel 3 }
1005     }
1006    
1007     if (argc > 2)
1008     {
1009 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 nigel 3 if (outfile == NULL)
1011     {
1012     printf("** Failed to open %s\n", argv[op+1]);
1013 nigel 77 yield = 1;
1014     goto EXIT;
1015 nigel 3 }
1016     }
1017    
1018     /* Set alternative malloc function */
1019    
1020     pcre_malloc = new_malloc;
1021 nigel 73 pcre_free = new_free;
1022     pcre_stack_malloc = stack_malloc;
1023     pcre_stack_free = stack_free;
1024 nigel 3
1025 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1026 nigel 3
1027 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028 nigel 3
1029     /* Main loop */
1030    
1031 nigel 11 while (!done)
1032 nigel 3 {
1033     pcre *re = NULL;
1034     pcre_extra *extra = NULL;
1035 nigel 37
1036     #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 nigel 3 regex_t preg;
1038 nigel 45 int do_posix = 0;
1039 nigel 37 #endif
1040    
1041 nigel 7 const char *error;
1042 nigel 25 unsigned char *p, *pp, *ppp;
1043 nigel 75 unsigned char *to_file = NULL;
1044 nigel 53 const unsigned char *tables = NULL;
1045 nigel 75 unsigned long int true_size, true_study_size = 0;
1046     size_t size, regex_gotten_store;
1047 nigel 3 int do_study = 0;
1048 nigel 25 int do_debug = debug;
1049 nigel 35 int do_G = 0;
1050     int do_g = 0;
1051 nigel 25 int do_showinfo = showinfo;
1052 nigel 35 int do_showrest = 0;
1053 nigel 75 int do_flip = 0;
1054 nigel 93 int erroroffset, len, delimiter, poffset;
1055 nigel 3
1056 nigel 67 use_utf8 = 0;
1057 ph10 211 debug_lengths = 1;
1058 nigel 63
1059 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1060 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1061 nigel 63 fflush(outfile);
1062 nigel 3
1063     p = buffer;
1064     while (isspace(*p)) p++;
1065     if (*p == 0) continue;
1066    
1067 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1068 nigel 3
1069 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1070     {
1071 nigel 91 unsigned long int magic, get_options;
1072 nigel 75 uschar sbuf[8];
1073     FILE *f;
1074    
1075     p++;
1076     pp = p + (int)strlen((char *)p);
1077     while (isspace(pp[-1])) pp--;
1078     *pp = 0;
1079    
1080     f = fopen((char *)p, "rb");
1081     if (f == NULL)
1082     {
1083     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1084     continue;
1085     }
1086    
1087     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1088    
1089     true_size =
1090     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1091     true_study_size =
1092     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1093    
1094     re = (real_pcre *)new_malloc(true_size);
1095     regex_gotten_store = gotten_store;
1096    
1097     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1098    
1099     magic = ((real_pcre *)re)->magic_number;
1100     if (magic != MAGIC_NUMBER)
1101     {
1102     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1103     {
1104     do_flip = 1;
1105     }
1106     else
1107     {
1108     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1109     fclose(f);
1110     continue;
1111     }
1112     }
1113    
1114     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1115     do_flip? " (byte-inverted)" : "", p);
1116    
1117     /* Need to know if UTF-8 for printing data strings */
1118    
1119 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1120     use_utf8 = (get_options & PCRE_UTF8) != 0;
1121 nigel 75
1122     /* Now see if there is any following study data */
1123    
1124     if (true_study_size != 0)
1125     {
1126     pcre_study_data *psd;
1127    
1128     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1129     extra->flags = PCRE_EXTRA_STUDY_DATA;
1130    
1131     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1132     extra->study_data = psd;
1133    
1134     if (fread(psd, 1, true_study_size, f) != true_study_size)
1135     {
1136     FAIL_READ:
1137     fprintf(outfile, "Failed to read data from %s\n", p);
1138     if (extra != NULL) new_free(extra);
1139     if (re != NULL) new_free(re);
1140     fclose(f);
1141     continue;
1142     }
1143     fprintf(outfile, "Study data loaded from %s\n", p);
1144     do_study = 1; /* To get the data output if requested */
1145     }
1146     else fprintf(outfile, "No study data\n");
1147    
1148     fclose(f);
1149     goto SHOW_INFO;
1150     }
1151    
1152     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1153     the pattern; if is isn't complete, read more. */
1154    
1155 nigel 3 delimiter = *p++;
1156    
1157 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1158 nigel 3 {
1159 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1160 nigel 3 goto SKIP_DATA;
1161     }
1162    
1163     pp = p;
1164 nigel 93 poffset = p - buffer;
1165 nigel 3
1166     for(;;)
1167     {
1168 nigel 29 while (*pp != 0)
1169     {
1170     if (*pp == '\\' && pp[1] != 0) pp++;
1171     else if (*pp == delimiter) break;
1172     pp++;
1173     }
1174 nigel 3 if (*pp != 0) break;
1175 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1176 nigel 3 {
1177     fprintf(outfile, "** Unexpected EOF\n");
1178 nigel 11 done = 1;
1179     goto CONTINUE;
1180 nigel 3 }
1181 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1182 nigel 3 }
1183    
1184 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1185     pointer to the correct relative point in the buffer. */
1186    
1187     p = buffer + poffset;
1188    
1189 nigel 29 /* If the first character after the delimiter is backslash, make
1190     the pattern end with backslash. This is purely to provide a way
1191     of testing for the error message when a pattern ends with backslash. */
1192    
1193     if (pp[1] == '\\') *pp++ = '\\';
1194    
1195 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1196     for callouts. */
1197 nigel 3
1198     *pp++ = 0;
1199 nigel 75 strcpy((char *)pbuffer, (char *)p);
1200 nigel 3
1201     /* Look for options after final delimiter */
1202    
1203     options = 0;
1204     study_options = 0;
1205 nigel 31 log_store = showstore; /* default from command line */
1206    
1207 nigel 3 while (*pp != 0)
1208     {
1209     switch (*pp++)
1210     {
1211 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1212 nigel 35 case 'g': do_g = 1; break;
1213 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1214     case 'm': options |= PCRE_MULTILINE; break;
1215     case 's': options |= PCRE_DOTALL; break;
1216     case 'x': options |= PCRE_EXTENDED; break;
1217 nigel 25
1218 nigel 35 case '+': do_showrest = 1; break;
1219 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1220 nigel 93 case 'B': do_debug = 1; break;
1221 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1222 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1223 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1224 nigel 75 case 'F': do_flip = 1; break;
1225 nigel 35 case 'G': do_G = 1; break;
1226 nigel 25 case 'I': do_showinfo = 1; break;
1227 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1228 nigel 31 case 'M': log_store = 1; break;
1229 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1230 nigel 37
1231     #if !defined NOPOSIX
1232 nigel 3 case 'P': do_posix = 1; break;
1233 nigel 37 #endif
1234    
1235 nigel 3 case 'S': do_study = 1; break;
1236 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1237 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1238 ph10 126 case 'Z': debug_lengths = 0; break;
1239 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1240 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1241 nigel 25
1242     case 'L':
1243     ppp = pp;
1244 nigel 93 /* The '\r' test here is so that it works on Windows. */
1245     /* The '0' test is just in case this is an unterminated line. */
1246     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1247 nigel 25 *ppp = 0;
1248     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1249     {
1250     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1251     goto SKIP_DATA;
1252     }
1253 nigel 93 locale_set = 1;
1254 nigel 25 tables = pcre_maketables();
1255     pp = ppp;
1256     break;
1257    
1258 nigel 75 case '>':
1259     to_file = pp;
1260     while (*pp != 0) pp++;
1261     while (isspace(pp[-1])) pp--;
1262     *pp = 0;
1263     break;
1264    
1265 nigel 91 case '<':
1266     {
1267 ph10 336 if (strncmp((char *)pp, "JS>", 3) == 0)
1268     {
1269     options |= PCRE_JAVASCRIPT_COMPAT;
1270 ph10 345 pp += 3;
1271 ph10 336 }
1272     else
1273 ph10 345 {
1274 ph10 336 int x = check_newline(pp, outfile);
1275     if (x == 0) goto SKIP_DATA;
1276     options |= x;
1277     while (*pp++ != '>');
1278 ph10 345 }
1279 nigel 91 }
1280     break;
1281    
1282 nigel 77 case '\r': /* So that it works in Windows */
1283     case '\n':
1284     case ' ':
1285     break;
1286 nigel 75
1287 nigel 3 default:
1288     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1289     goto SKIP_DATA;
1290     }
1291     }
1292    
1293 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1294 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1295     local character tables. */
1296 nigel 3
1297 nigel 37 #if !defined NOPOSIX
1298 nigel 3 if (posix || do_posix)
1299     {
1300     int rc;
1301     int cflags = 0;
1302 nigel 75
1303 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1304     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1305 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1306 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1307     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1308    
1309 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1310    
1311     /* Compilation failed; go back for another re, skipping to blank line
1312     if non-interactive. */
1313    
1314     if (rc != 0)
1315     {
1316 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1317 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1318     goto SKIP_DATA;
1319     }
1320     }
1321    
1322     /* Handle compiling via the native interface */
1323    
1324     else
1325 nigel 37 #endif /* !defined NOPOSIX */
1326    
1327 nigel 3 {
1328 ph10 412 unsigned long int get_options;
1329    
1330 nigel 93 if (timeit > 0)
1331 nigel 3 {
1332     register int i;
1333     clock_t time_taken;
1334     clock_t start_time = clock();
1335 nigel 93 for (i = 0; i < timeit; i++)
1336 nigel 3 {
1337 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1338 nigel 3 if (re != NULL) free(re);
1339     }
1340     time_taken = clock() - start_time;
1341 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1342     (((double)time_taken * 1000.0) / (double)timeit) /
1343 nigel 63 (double)CLOCKS_PER_SEC);
1344 nigel 3 }
1345    
1346 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1347 nigel 3
1348     /* Compilation failed; go back for another re, skipping to blank line
1349     if non-interactive. */
1350    
1351     if (re == NULL)
1352     {
1353     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1354     SKIP_DATA:
1355     if (infile != stdin)
1356     {
1357     for (;;)
1358     {
1359 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1360 nigel 11 {
1361     done = 1;
1362     goto CONTINUE;
1363     }
1364 nigel 3 len = (int)strlen((char *)buffer);
1365     while (len > 0 && isspace(buffer[len-1])) len--;
1366     if (len == 0) break;
1367     }
1368     fprintf(outfile, "\n");
1369     }
1370 nigel 25 goto CONTINUE;
1371 nigel 3 }
1372 ph10 412
1373     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1374     within the regex; check for this so that we know how to process the data
1375     lines. */
1376    
1377     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1378     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1379 nigel 3
1380 ph10 412 /* Print information if required. There are now two info-returning
1381     functions. The old one has a limited interface and returns only limited
1382     data. Check that it agrees with the newer one. */
1383 nigel 3
1384 nigel 63 if (log_store)
1385     fprintf(outfile, "Memory allocation (code space): %d\n",
1386     (int)(gotten_store -
1387     sizeof(real_pcre) -
1388     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1389    
1390 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1391     and remember the store that was got. */
1392    
1393     true_size = ((real_pcre *)re)->size;
1394     regex_gotten_store = gotten_store;
1395    
1396     /* If /S was present, study the regexp to generate additional info to
1397     help with the matching. */
1398    
1399     if (do_study)
1400     {
1401 nigel 93 if (timeit > 0)
1402 nigel 75 {
1403     register int i;
1404     clock_t time_taken;
1405     clock_t start_time = clock();
1406 nigel 93 for (i = 0; i < timeit; i++)
1407 nigel 75 extra = pcre_study(re, study_options, &error);
1408     time_taken = clock() - start_time;
1409     if (extra != NULL) free(extra);
1410 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1411     (((double)time_taken * 1000.0) / (double)timeit) /
1412 nigel 75 (double)CLOCKS_PER_SEC);
1413     }
1414     extra = pcre_study(re, study_options, &error);
1415     if (error != NULL)
1416     fprintf(outfile, "Failed to study: %s\n", error);
1417     else if (extra != NULL)
1418     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1419     }
1420    
1421     /* If the 'F' option was present, we flip the bytes of all the integer
1422     fields in the regex data block and the study block. This is to make it
1423     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1424     compiled on a different architecture. */
1425    
1426     if (do_flip)
1427     {
1428     real_pcre *rre = (real_pcre *)re;
1429 ph10 259 rre->magic_number =
1430 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1431 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1432     rre->options = byteflip(rre->options, sizeof(rre->options));
1433 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1434 ph10 259 rre->top_bracket =
1435 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1436 ph10 259 rre->top_backref =
1437 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1438 ph10 259 rre->first_byte =
1439 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1440 ph10 259 rre->req_byte =
1441 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1442     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1443 nigel 75 sizeof(rre->name_table_offset));
1444 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1445 nigel 75 sizeof(rre->name_entry_size));
1446 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1447 ph10 255 sizeof(rre->name_count));
1448 nigel 75
1449     if (extra != NULL)
1450     {
1451     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1452     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1453     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1454     }
1455     }
1456    
1457     /* Extract information from the compiled data if required */
1458    
1459     SHOW_INFO:
1460    
1461 nigel 93 if (do_debug)
1462     {
1463     fprintf(outfile, "------------------------------------------------------------------\n");
1464 ph10 116 pcre_printint(re, outfile, debug_lengths);
1465 nigel 93 }
1466 ph10 412
1467     /* We already have the options in get_options (see above) */
1468 nigel 93
1469 nigel 25 if (do_showinfo)
1470 nigel 3 {
1471 ph10 412 unsigned long int all_options;
1472 nigel 79 #if !defined NOINFOCHECK
1473 nigel 43 int old_first_char, old_options, old_count;
1474 nigel 79 #endif
1475 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1476 ph10 227 hascrorlf;
1477 nigel 63 int nameentrysize, namecount;
1478     const uschar *nametable;
1479 nigel 3
1480 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1481     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1482     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1483 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1484 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1485 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1486     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1487 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1488 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1489     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1490 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1491 nigel 43
1492 nigel 79 #if !defined NOINFOCHECK
1493 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1494 nigel 3 if (count < 0) fprintf(outfile,
1495 nigel 43 "Error %d from pcre_info()\n", count);
1496 nigel 3 else
1497     {
1498 nigel 43 if (old_count != count) fprintf(outfile,
1499     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1500     old_count);
1501 nigel 37
1502 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1503     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1504     first_char, old_first_char);
1505 nigel 37
1506 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1507     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1508     get_options, old_options);
1509 nigel 43 }
1510 nigel 79 #endif
1511 nigel 43
1512 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1513 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1514 nigel 77 (int)size, (int)regex_gotten_store);
1515 nigel 43
1516     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1517     if (backrefmax > 0)
1518     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1519 nigel 63
1520     if (namecount > 0)
1521     {
1522     fprintf(outfile, "Named capturing subpatterns:\n");
1523     while (namecount-- > 0)
1524     {
1525     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1526     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1527     GET2(nametable, 0));
1528     nametable += nameentrysize;
1529     }
1530     }
1531 ph10 172
1532 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1533 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1534 nigel 63
1535 nigel 75 all_options = ((real_pcre *)re)->options;
1536 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1537 nigel 75
1538 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1539 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1540 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1541     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1542     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1543     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1544 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1545 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1546 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1547     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1548 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1549     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1550     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1551 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1552 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1553 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1554     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1555 ph10 172
1556 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1557 nigel 43
1558 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1559 nigel 91 {
1560     case PCRE_NEWLINE_CR:
1561     fprintf(outfile, "Forced newline sequence: CR\n");
1562     break;
1563 nigel 43
1564 nigel 91 case PCRE_NEWLINE_LF:
1565     fprintf(outfile, "Forced newline sequence: LF\n");
1566     break;
1567    
1568     case PCRE_NEWLINE_CRLF:
1569     fprintf(outfile, "Forced newline sequence: CRLF\n");
1570     break;
1571    
1572 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1573     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1574     break;
1575    
1576 nigel 93 case PCRE_NEWLINE_ANY:
1577     fprintf(outfile, "Forced newline sequence: ANY\n");
1578     break;
1579    
1580 nigel 91 default:
1581     break;
1582     }
1583    
1584 nigel 43 if (first_char == -1)
1585     {
1586 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1587 nigel 43 }
1588     else if (first_char < 0)
1589     {
1590     fprintf(outfile, "No first char\n");
1591     }
1592     else
1593     {
1594 nigel 63 int ch = first_char & 255;
1595 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1596 nigel 63 "" : " (caseless)";
1597 nigel 93 if (PRINTHEX(ch))
1598 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1599 nigel 3 else
1600 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1601 nigel 43 }
1602 nigel 37
1603 nigel 43 if (need_char < 0)
1604     {
1605     fprintf(outfile, "No need char\n");
1606 nigel 3 }
1607 nigel 43 else
1608     {
1609 nigel 63 int ch = need_char & 255;
1610 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1611 nigel 63 "" : " (caseless)";
1612 nigel 93 if (PRINTHEX(ch))
1613 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1614 nigel 43 else
1615 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1616 nigel 43 }
1617 nigel 75
1618     /* Don't output study size; at present it is in any case a fixed
1619     value, but it varies, depending on the computer architecture, and
1620     so messes up the test suite. (And with the /F option, it might be
1621     flipped.) */
1622    
1623     if (do_study)
1624     {
1625     if (extra == NULL)
1626     fprintf(outfile, "Study returned NULL\n");
1627     else
1628     {
1629     uschar *start_bits = NULL;
1630     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1631    
1632     if (start_bits == NULL)
1633     fprintf(outfile, "No starting byte set\n");
1634     else
1635     {
1636     int i;
1637     int c = 24;
1638     fprintf(outfile, "Starting byte set: ");
1639     for (i = 0; i < 256; i++)
1640     {
1641     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1642     {
1643     if (c > 75)
1644     {
1645     fprintf(outfile, "\n ");
1646     c = 2;
1647     }
1648 nigel 93 if (PRINTHEX(i) && i != ' ')
1649 nigel 75 {
1650     fprintf(outfile, "%c ", i);
1651     c += 2;
1652     }
1653     else
1654     {
1655     fprintf(outfile, "\\x%02x ", i);
1656     c += 5;
1657     }
1658     }
1659     }
1660     fprintf(outfile, "\n");
1661     }
1662     }
1663     }
1664 nigel 3 }
1665    
1666 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1667     that is all. The first 8 bytes of the file are the regex length and then
1668     the study length, in big-endian order. */
1669 nigel 3
1670 nigel 75 if (to_file != NULL)
1671 nigel 3 {
1672 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1673     if (f == NULL)
1674 nigel 3 {
1675 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1676 nigel 3 }
1677 nigel 75 else
1678     {
1679     uschar sbuf[8];
1680 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1681     sbuf[1] = (uschar)((true_size >> 16) & 255);
1682     sbuf[2] = (uschar)((true_size >> 8) & 255);
1683     sbuf[3] = (uschar)((true_size) & 255);
1684 ph10 259
1685 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1686     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1687     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1688     sbuf[7] = (uschar)((true_study_size) & 255);
1689 nigel 3
1690 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1691     fwrite(re, 1, true_size, f) < true_size)
1692     {
1693     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1694     }
1695 nigel 3 else
1696     {
1697 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1698     if (extra != NULL)
1699 nigel 3 {
1700 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1701     true_study_size)
1702 nigel 3 {
1703 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1704     strerror(errno));
1705 nigel 3 }
1706 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1707 nigel 93
1708 nigel 3 }
1709     }
1710 nigel 75 fclose(f);
1711 nigel 3 }
1712 nigel 77
1713     new_free(re);
1714     if (extra != NULL) new_free(extra);
1715     if (tables != NULL) new_free((void *)tables);
1716 nigel 75 continue; /* With next regex */
1717 nigel 3 }
1718 nigel 75 } /* End of non-POSIX compile */
1719 nigel 3
1720     /* Read data lines and test them */
1721    
1722     for (;;)
1723     {
1724 nigel 87 uschar *q;
1725 ph10 147 uschar *bptr;
1726 nigel 57 int *use_offsets = offsets;
1727 nigel 53 int use_size_offsets = size_offsets;
1728 nigel 63 int callout_data = 0;
1729     int callout_data_set = 0;
1730 nigel 3 int count, c;
1731 nigel 29 int copystrings = 0;
1732 ph10 386 int find_match_limit = default_find_match_limit;
1733 nigel 29 int getstrings = 0;
1734     int getlist = 0;
1735 nigel 39 int gmatched = 0;
1736 nigel 35 int start_offset = 0;
1737 nigel 41 int g_notempty = 0;
1738 nigel 77 int use_dfa = 0;
1739 nigel 3
1740     options = 0;
1741    
1742 nigel 91 *copynames = 0;
1743     *getnames = 0;
1744    
1745     copynamesptr = copynames;
1746     getnamesptr = getnames;
1747    
1748 nigel 63 pcre_callout = callout;
1749     first_callout = 1;
1750     callout_extra = 0;
1751     callout_count = 0;
1752     callout_fail_count = 999999;
1753     callout_fail_id = -1;
1754 nigel 73 show_malloc = 0;
1755 nigel 63
1756 nigel 91 if (extra != NULL) extra->flags &=
1757     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1758    
1759     len = 0;
1760     for (;;)
1761 nigel 11 {
1762 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1763 nigel 91 {
1764     if (len > 0) break;
1765     done = 1;
1766     goto CONTINUE;
1767     }
1768     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1769     len = (int)strlen((char *)buffer);
1770     if (buffer[len-1] == '\n') break;
1771 nigel 11 }
1772 nigel 3
1773     while (len > 0 && isspace(buffer[len-1])) len--;
1774     buffer[len] = 0;
1775     if (len == 0) break;
1776    
1777     p = buffer;
1778     while (isspace(*p)) p++;
1779    
1780 ph10 147 bptr = q = dbuffer;
1781 nigel 3 while ((c = *p++) != 0)
1782     {
1783     int i = 0;
1784     int n = 0;
1785 nigel 63
1786 nigel 3 if (c == '\\') switch ((c = *p++))
1787     {
1788     case 'a': c = 7; break;
1789     case 'b': c = '\b'; break;
1790     case 'e': c = 27; break;
1791     case 'f': c = '\f'; break;
1792     case 'n': c = '\n'; break;
1793     case 'r': c = '\r'; break;
1794     case 't': c = '\t'; break;
1795     case 'v': c = '\v'; break;
1796    
1797     case '0': case '1': case '2': case '3':
1798     case '4': case '5': case '6': case '7':
1799     c -= '0';
1800     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1801     c = c * 8 + *p++ - '0';
1802 nigel 91
1803     #if !defined NOUTF8
1804     if (use_utf8 && c > 255)
1805     {
1806     unsigned char buff8[8];
1807     int ii, utn;
1808     utn = ord2utf8(c, buff8);
1809     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1810     c = buff8[ii]; /* Last byte */
1811     }
1812     #endif
1813 nigel 3 break;
1814    
1815     case 'x':
1816 nigel 49
1817     /* Handle \x{..} specially - new Perl thing for utf8 */
1818    
1819 nigel 79 #if !defined NOUTF8
1820 nigel 49 if (*p == '{')
1821     {
1822     unsigned char *pt = p;
1823     c = 0;
1824     while (isxdigit(*(++pt)))
1825     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1826     if (*pt == '}')
1827     {
1828 nigel 67 unsigned char buff8[8];
1829 nigel 49 int ii, utn;
1830 ph10 355 if (use_utf8)
1831 ph10 358 {
1832 ph10 355 utn = ord2utf8(c, buff8);
1833     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1834     c = buff8[ii]; /* Last byte */
1835     }
1836     else
1837     {
1838 ph10 358 if (c > 255)
1839 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1840     "UTF-8 mode is not enabled.\n"
1841     "** Truncation will probably give the wrong result.\n", c);
1842 ph10 358 }
1843 nigel 49 p = pt + 1;
1844     break;
1845     }
1846     /* Not correct form; fall through */
1847     }
1848 nigel 79 #endif
1849 nigel 49
1850     /* Ordinary \x */
1851    
1852 nigel 3 c = 0;
1853     while (i++ < 2 && isxdigit(*p))
1854     {
1855     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1856     p++;
1857     }
1858     break;
1859    
1860 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1861 nigel 3 p--;
1862     continue;
1863    
1864 nigel 75 case '>':
1865     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1866     continue;
1867    
1868 nigel 3 case 'A': /* Option setting */
1869     options |= PCRE_ANCHORED;
1870     continue;
1871    
1872     case 'B':
1873     options |= PCRE_NOTBOL;
1874     continue;
1875    
1876 nigel 29 case 'C':
1877 nigel 63 if (isdigit(*p)) /* Set copy string */
1878     {
1879     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1880     copystrings |= 1 << n;
1881     }
1882     else if (isalnum(*p))
1883     {
1884 nigel 91 uschar *npp = copynamesptr;
1885 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1886 nigel 91 *npp++ = 0;
1887 nigel 67 *npp = 0;
1888 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1889 nigel 63 if (n < 0)
1890 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1891     copynamesptr = npp;
1892 nigel 63 }
1893     else if (*p == '+')
1894     {
1895     callout_extra = 1;
1896     p++;
1897     }
1898     else if (*p == '-')
1899     {
1900     pcre_callout = NULL;
1901     p++;
1902     }
1903     else if (*p == '!')
1904     {
1905     callout_fail_id = 0;
1906     p++;
1907     while(isdigit(*p))
1908     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1909     callout_fail_count = 0;
1910     if (*p == '!')
1911     {
1912     p++;
1913     while(isdigit(*p))
1914     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1915     }
1916     }
1917     else if (*p == '*')
1918     {
1919     int sign = 1;
1920     callout_data = 0;
1921     if (*(++p) == '-') { sign = -1; p++; }
1922     while(isdigit(*p))
1923     callout_data = callout_data * 10 + *p++ - '0';
1924     callout_data *= sign;
1925     callout_data_set = 1;
1926     }
1927 nigel 29 continue;
1928    
1929 nigel 79 #if !defined NODFA
1930 nigel 77 case 'D':
1931 nigel 79 #if !defined NOPOSIX
1932 nigel 77 if (posix || do_posix)
1933     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1934     else
1935 nigel 79 #endif
1936 nigel 77 use_dfa = 1;
1937     continue;
1938    
1939     case 'F':
1940     options |= PCRE_DFA_SHORTEST;
1941     continue;
1942 nigel 79 #endif
1943 nigel 77
1944 nigel 29 case 'G':
1945 nigel 63 if (isdigit(*p))
1946     {
1947     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1948     getstrings |= 1 << n;
1949     }
1950     else if (isalnum(*p))
1951     {
1952 nigel 91 uschar *npp = getnamesptr;
1953 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1954 nigel 91 *npp++ = 0;
1955 nigel 67 *npp = 0;
1956 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1957 nigel 63 if (n < 0)
1958 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1959     getnamesptr = npp;
1960 nigel 63 }
1961 nigel 29 continue;
1962    
1963     case 'L':
1964     getlist = 1;
1965     continue;
1966    
1967 nigel 63 case 'M':
1968     find_match_limit = 1;
1969     continue;
1970    
1971 nigel 37 case 'N':
1972     options |= PCRE_NOTEMPTY;
1973     continue;
1974    
1975 nigel 3 case 'O':
1976     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1977 nigel 53 if (n > size_offsets_max)
1978     {
1979     size_offsets_max = n;
1980 nigel 57 free(offsets);
1981 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1982 nigel 53 if (offsets == NULL)
1983     {
1984     printf("** Failed to get %d bytes of memory for offsets vector\n",
1985 ph10 151 (int)(size_offsets_max * sizeof(int)));
1986 nigel 77 yield = 1;
1987     goto EXIT;
1988 nigel 53 }
1989     }
1990     use_size_offsets = n;
1991 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1992 nigel 3 continue;
1993    
1994 nigel 75 case 'P':
1995     options |= PCRE_PARTIAL;
1996     continue;
1997    
1998 nigel 91 case 'Q':
1999     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2000     if (extra == NULL)
2001     {
2002     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2003     extra->flags = 0;
2004     }
2005     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2006     extra->match_limit_recursion = n;
2007     continue;
2008    
2009     case 'q':
2010     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2011     if (extra == NULL)
2012     {
2013     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2014     extra->flags = 0;
2015     }
2016     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2017     extra->match_limit = n;
2018     continue;
2019    
2020 nigel 79 #if !defined NODFA
2021 nigel 77 case 'R':
2022     options |= PCRE_DFA_RESTART;
2023     continue;
2024 nigel 79 #endif
2025 nigel 77
2026 nigel 73 case 'S':
2027     show_malloc = 1;
2028     continue;
2029 ph10 392
2030 ph10 389 case 'Y':
2031     options |= PCRE_NO_START_OPTIMIZE;
2032 ph10 392 continue;
2033 nigel 73
2034 nigel 3 case 'Z':
2035     options |= PCRE_NOTEOL;
2036     continue;
2037 nigel 71
2038     case '?':
2039     options |= PCRE_NO_UTF8_CHECK;
2040     continue;
2041 nigel 91
2042     case '<':
2043     {
2044     int x = check_newline(p, outfile);
2045     if (x == 0) goto NEXT_DATA;
2046     options |= x;
2047     while (*p++ != '>');
2048     }
2049     continue;
2050 nigel 3 }
2051 nigel 9 *q++ = c;
2052 nigel 3 }
2053 nigel 9 *q = 0;
2054     len = q - dbuffer;
2055 ph10 371
2056 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2057 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2058 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2059 ph10 371
2060 ph10 363 #if !defined NOPOSIX
2061     if (posix || do_posix)
2062     {
2063     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2064 ph10 371 bptr += buffer_size - len - 1;
2065 ph10 363 }
2066 ph10 371 else
2067     #endif
2068 ph10 363 {
2069     memmove(bptr + buffer_size - len, bptr, len);
2070 ph10 371 bptr += buffer_size - len;
2071     }
2072 nigel 3
2073 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2074     {
2075     printf("**Match limit not relevant for DFA matching: ignored\n");
2076     find_match_limit = 0;
2077     }
2078    
2079 nigel 3 /* Handle matching via the POSIX interface, which does not
2080 nigel 63 support timing or playing with the match limit or callout data. */
2081 nigel 3
2082 nigel 37 #if !defined NOPOSIX
2083 nigel 3 if (posix || do_posix)
2084     {
2085     int rc;
2086     int eflags = 0;
2087 nigel 63 regmatch_t *pmatch = NULL;
2088     if (use_size_offsets > 0)
2089 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2090 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2091     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2092 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2093 nigel 3
2094 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2095 nigel 3
2096     if (rc != 0)
2097     {
2098 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2099 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2100     }
2101 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2102     != 0)
2103     {
2104     fprintf(outfile, "Matched with REG_NOSUB\n");
2105     }
2106 nigel 3 else
2107     {
2108 nigel 7 size_t i;
2109 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2110 nigel 3 {
2111     if (pmatch[i].rm_so >= 0)
2112     {
2113 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2114 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2115     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2116 nigel 3 fprintf(outfile, "\n");
2117 nigel 35 if (i == 0 && do_showrest)
2118     {
2119     fprintf(outfile, " 0+ ");
2120 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2121     outfile);
2122 nigel 35 fprintf(outfile, "\n");
2123     }
2124 nigel 3 }
2125     }
2126     }
2127 nigel 53 free(pmatch);
2128 nigel 3 }
2129    
2130 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2131 nigel 3
2132 nigel 37 else
2133     #endif /* !defined NOPOSIX */
2134    
2135 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2136 nigel 3 {
2137 nigel 93 if (timeitm > 0)
2138 nigel 3 {
2139     register int i;
2140     clock_t time_taken;
2141     clock_t start_time = clock();
2142 nigel 77
2143 nigel 79 #if !defined NODFA
2144 nigel 77 if (all_use_dfa || use_dfa)
2145     {
2146     int workspace[1000];
2147 nigel 93 for (i = 0; i < timeitm; i++)
2148 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2149     options | g_notempty, use_offsets, use_size_offsets, workspace,
2150     sizeof(workspace)/sizeof(int));
2151     }
2152     else
2153 nigel 79 #endif
2154 nigel 77
2155 nigel 93 for (i = 0; i < timeitm; i++)
2156 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2157 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2158 nigel 77
2159 nigel 3 time_taken = clock() - start_time;
2160 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2161     (((double)time_taken * 1000.0) / (double)timeitm) /
2162 nigel 63 (double)CLOCKS_PER_SEC);
2163 nigel 3 }
2164    
2165 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2166 nigel 87 varying limits in order to find the minimum value for the match limit and
2167     for the recursion limit. */
2168 nigel 63
2169     if (find_match_limit)
2170     {
2171     if (extra == NULL)
2172     {
2173 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2174 nigel 63 extra->flags = 0;
2175     }
2176    
2177 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2178 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2179     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2180     PCRE_ERROR_MATCHLIMIT, "match()");
2181 nigel 63
2182 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2183     options|g_notempty, use_offsets, use_size_offsets,
2184     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2185     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2186 nigel 63 }
2187    
2188     /* If callout_data is set, use the interface with additional data */
2189    
2190     else if (callout_data_set)
2191     {
2192     if (extra == NULL)
2193     {
2194 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2195 nigel 63 extra->flags = 0;
2196     }
2197     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2198 nigel 71 extra->callout_data = &callout_data;
2199 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2200     options | g_notempty, use_offsets, use_size_offsets);
2201     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2202     }
2203    
2204     /* The normal case is just to do the match once, with the default
2205     value of match_limit. */
2206    
2207 nigel 79 #if !defined NODFA
2208 nigel 77 else if (all_use_dfa || use_dfa)
2209     {
2210     int workspace[1000];
2211     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2212     options | g_notempty, use_offsets, use_size_offsets, workspace,
2213     sizeof(workspace)/sizeof(int));
2214     if (count == 0)
2215     {
2216     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2217     count = use_size_offsets/2;
2218     }
2219     }
2220 nigel 79 #endif
2221 nigel 77
2222 nigel 75 else
2223     {
2224     count = pcre_exec(re, extra, (char *)bptr, len,
2225     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2226 nigel 77 if (count == 0)
2227     {
2228     fprintf(outfile, "Matched, but too many substrings\n");
2229     count = use_size_offsets/3;
2230     }
2231 nigel 75 }
2232 nigel 3
2233 nigel 39 /* Matched */
2234    
2235 nigel 3 if (count >= 0)
2236     {
2237 nigel 93 int i, maxcount;
2238    
2239     #if !defined NODFA
2240     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2241     #endif
2242     maxcount = use_size_offsets/3;
2243    
2244     /* This is a check against a lunatic return value. */
2245    
2246     if (count > maxcount)
2247     {
2248     fprintf(outfile,
2249     "** PCRE error: returned count %d is too big for offset size %d\n",
2250     count, use_size_offsets);
2251     count = use_size_offsets/3;
2252     if (do_g || do_G)
2253     {
2254     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2255     do_g = do_G = FALSE; /* Break g/G loop */
2256     }
2257     }
2258    
2259 nigel 29 for (i = 0; i < count * 2; i += 2)
2260 nigel 3 {
2261 nigel 57 if (use_offsets[i] < 0)
2262 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2263     else
2264     {
2265     fprintf(outfile, "%2d: ", i/2);
2266 nigel 63 (void)pchars(bptr + use_offsets[i],
2267     use_offsets[i+1] - use_offsets[i], outfile);
2268 nigel 3 fprintf(outfile, "\n");
2269 nigel 35 if (i == 0)
2270     {
2271     if (do_showrest)
2272     {
2273     fprintf(outfile, " 0+ ");
2274 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2275     outfile);
2276 nigel 35 fprintf(outfile, "\n");
2277     }
2278     }
2279 nigel 3 }
2280     }
2281 nigel 29
2282     for (i = 0; i < 32; i++)
2283     {
2284     if ((copystrings & (1 << i)) != 0)
2285     {
2286 nigel 91 char copybuffer[256];
2287 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2288 nigel 37 i, copybuffer, sizeof(copybuffer));
2289 nigel 29 if (rc < 0)
2290     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2291     else
2292 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2293 nigel 29 }
2294     }
2295    
2296 nigel 91 for (copynamesptr = copynames;
2297     *copynamesptr != 0;
2298     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2299     {
2300     char copybuffer[256];
2301     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2302     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2303     if (rc < 0)
2304     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2305     else
2306     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2307     }
2308    
2309 nigel 29 for (i = 0; i < 32; i++)
2310     {
2311     if ((getstrings & (1 << i)) != 0)
2312     {
2313     const char *substring;
2314 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2315 nigel 29 i, &substring);
2316     if (rc < 0)
2317     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2318     else
2319     {
2320     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2321 nigel 49 pcre_free_substring(substring);
2322 nigel 29 }
2323     }
2324     }
2325    
2326 nigel 91 for (getnamesptr = getnames;
2327     *getnamesptr != 0;
2328     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2329     {
2330     const char *substring;
2331     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2332     count, (char *)getnamesptr, &substring);
2333     if (rc < 0)
2334     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2335     else
2336     {
2337     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2338     pcre_free_substring(substring);
2339     }
2340     }
2341    
2342 nigel 29 if (getlist)
2343     {
2344     const char **stringlist;
2345 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2346 nigel 29 &stringlist);
2347     if (rc < 0)
2348     fprintf(outfile, "get substring list failed %d\n", rc);
2349     else
2350     {
2351     for (i = 0; i < count; i++)
2352     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2353     if (stringlist[i] != NULL)
2354     fprintf(outfile, "string list not terminated by NULL\n");
2355 nigel 49 /* free((void *)stringlist); */
2356     pcre_free_substring_list(stringlist);
2357 nigel 29 }
2358     }
2359 nigel 39 }
2360 nigel 29
2361 nigel 75 /* There was a partial match */
2362    
2363     else if (count == PCRE_ERROR_PARTIAL)
2364     {
2365 nigel 77 fprintf(outfile, "Partial match");
2366 nigel 79 #if !defined NODFA
2367 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2368     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2369     bptr + use_offsets[0]);
2370 nigel 79 #endif
2371 nigel 77 fprintf(outfile, "\n");
2372 nigel 75 break; /* Out of the /g loop */
2373     }
2374    
2375 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2376 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2377     to advance the start offset, and continue. We won't be at the end of the
2378     string - that was checked before setting g_notempty.
2379 nigel 39
2380 ph10 150 Complication arises in the case when the newline option is "any" or
2381 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2382     CRLF, an advance of one character just passes the \r, whereas we should
2383     prefer the longer newline sequence, as does the code in pcre_exec().
2384     Fudge the offset value to achieve this.
2385 ph10 144
2386 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2387     character, not one byte. */
2388    
2389 nigel 3 else
2390     {
2391 nigel 41 if (g_notempty != 0)
2392 nigel 35 {
2393 nigel 73 int onechar = 1;
2394 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2395 nigel 57 use_offsets[0] = start_offset;
2396 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2397     {
2398     int d;
2399     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2400 ph10 391 /* Note that these values are always the ASCII ones, even in
2401     EBCDIC environments. CR = 13, NL = 10. */
2402     obits = (d == 13)? PCRE_NEWLINE_CR :
2403     (d == 10)? PCRE_NEWLINE_LF :
2404     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2405 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2406 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2407     }
2408 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2409 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2410 ph10 149 &&
2411 ph10 143 start_offset < len - 1 &&
2412     bptr[start_offset] == '\r' &&
2413     bptr[start_offset+1] == '\n')
2414 ph10 144 onechar++;
2415 ph10 143 else if (use_utf8)
2416 nigel 73 {
2417     while (start_offset + onechar < len)
2418     {
2419     int tb = bptr[start_offset+onechar];
2420     if (tb <= 127) break;
2421     tb &= 0xc0;
2422     if (tb != 0 && tb != 0xc0) onechar++;
2423     }
2424     }
2425     use_offsets[1] = start_offset + onechar;
2426 nigel 35 }
2427 nigel 41 else
2428     {
2429 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2430 nigel 41 {
2431 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2432 nigel 41 }
2433 nigel 73 else fprintf(outfile, "Error %d\n", count);
2434 nigel 41 break; /* Out of the /g loop */
2435     }
2436 nigel 3 }
2437 nigel 35
2438 nigel 39 /* If not /g or /G we are done */
2439    
2440     if (!do_g && !do_G) break;
2441    
2442 nigel 41 /* If we have matched an empty string, first check to see if we are at
2443     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2444     what Perl's /g options does. This turns out to be rather cunning. First
2445 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2446     same point. If this fails (picked up above) we advance to the next
2447 ph10 143 character. */
2448 ph10 142
2449 nigel 41 g_notempty = 0;
2450 ph10 142
2451 nigel 57 if (use_offsets[0] == use_offsets[1])
2452 nigel 41 {
2453 nigel 57 if (use_offsets[0] == len) break;
2454 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2455 nigel 41 }
2456 nigel 39
2457     /* For /g, update the start offset, leaving the rest alone */
2458    
2459 ph10 143 if (do_g) start_offset = use_offsets[1];
2460 nigel 39
2461     /* For /G, update the pointer and length */
2462    
2463     else
2464 nigel 35 {
2465 ph10 143 bptr += use_offsets[1];
2466     len -= use_offsets[1];
2467 nigel 35 }
2468 nigel 39 } /* End of loop for /g and /G */
2469 nigel 91
2470     NEXT_DATA: continue;
2471 nigel 39 } /* End of loop for data lines */
2472 nigel 3
2473 nigel 11 CONTINUE:
2474 nigel 37
2475     #if !defined NOPOSIX
2476 nigel 3 if (posix || do_posix) regfree(&preg);
2477 nigel 37 #endif
2478    
2479 nigel 77 if (re != NULL) new_free(re);
2480     if (extra != NULL) new_free(extra);
2481 nigel 25 if (tables != NULL)
2482     {
2483 nigel 77 new_free((void *)tables);
2484 nigel 25 setlocale(LC_CTYPE, "C");
2485 nigel 93 locale_set = 0;
2486 nigel 25 }
2487 nigel 3 }
2488    
2489 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2490 nigel 77
2491     EXIT:
2492    
2493     if (infile != NULL && infile != stdin) fclose(infile);
2494     if (outfile != NULL && outfile != stdout) fclose(outfile);
2495    
2496     free(buffer);
2497     free(dbuffer);
2498     free(pbuffer);
2499     free(offsets);
2500    
2501     return yield;
2502 nigel 3 }
2503    
2504 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12