/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 287 - (hide annotations) (download)
Tue Dec 18 20:11:28 2007 UTC (6 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 71282 byte(s)
Added --enable-pcretest-libreadline.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52     #include <unistd.h>
53     #include <readline/readline.h>
54     #include <readline/history.h>
55     #endif
56 nigel 93
57 ph10 287
58 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
59     input and output without "b"; then I was told that "b" was needed in some
60     environments, so it was added for release 5.0 to both the input and output. (It
61     makes no difference on Unix-like systems.) Later I was told that it is wrong
62     for the input on Windows. I've now abstracted the modes into two macros that
63     are set here, to make it easier to fiddle with them, and removed "b" from the
64     input mode under Windows. */
65    
66     #if defined(_WIN32) || defined(WIN32)
67     #include <io.h> /* For _setmode() */
68     #include <fcntl.h> /* For _O_BINARY */
69     #define INPUT_MODE "r"
70     #define OUTPUT_MODE "wb"
71    
72     #else
73     #include <sys/time.h> /* These two includes are needed */
74     #include <sys/resource.h> /* for setrlimit(). */
75     #define INPUT_MODE "rb"
76     #define OUTPUT_MODE "wb"
77 nigel 91 #endif
78    
79 nigel 93
80 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
81     displaying the results of pcre_study() and we also need to know about the
82     internal macros, structures, and other internal data values; pcretest has
83     "inside information" compared to a program that strictly follows the PCRE API.
84 nigel 37
85 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
86     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87     appropriately for an application, not for building PCRE. */
88 nigel 77
89 ph10 145 #include "pcre.h"
90 nigel 77 #include "pcre_internal.h"
91    
92 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
93     two copies, we include the source file here, changing the names of the external
94     symbols to prevent clashes. */
95 nigel 77
96 nigel 85 #define _pcre_utf8_table1 utf8_table1
97     #define _pcre_utf8_table1_size utf8_table1_size
98     #define _pcre_utf8_table2 utf8_table2
99     #define _pcre_utf8_table3 utf8_table3
100     #define _pcre_utf8_table4 utf8_table4
101     #define _pcre_utt utt
102     #define _pcre_utt_size utt_size
103 ph10 240 #define _pcre_utt_names utt_names
104 nigel 85 #define _pcre_OP_lengths OP_lengths
105    
106     #include "pcre_tables.c"
107    
108     /* We also need the pcre_printint() function for printing out compiled
109     patterns. This function is in a separate file so that it can be included in
110 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
111 nigel 85
112 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
113     output character as-is or as a hex value when showing compiled patterns, is
114     contained in this file. We uses it here also, in cases when the locale has not
115     been explicitly changed, so as to get consistent output from systems that
116     differ in their output from isprint() even in the "C" locale. */
117    
118 nigel 85 #include "pcre_printint.src"
119    
120 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
121 nigel 85
122 nigel 93
123 nigel 37 /* It is possible to compile this test program without including support for
124     testing the POSIX interface, though this is not available via the standard
125     Makefile. */
126    
127     #if !defined NOPOSIX
128 nigel 3 #include "pcreposix.h"
129 nigel 37 #endif
130 nigel 3
131 ph10 107 /* It is also possible, for the benefit of the version currently imported into
132     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133     interface to the DFA matcher (NODFA), and without the doublecheck of the old
134     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135     UTF8 support if PCRE is built without it. */
136 nigel 79
137 ph10 107 #ifndef SUPPORT_UTF8
138     #ifndef NOUTF8
139     #define NOUTF8
140     #endif
141     #endif
142 nigel 79
143 ph10 107
144 nigel 85 /* Other parameters */
145    
146 nigel 3 #ifndef CLOCKS_PER_SEC
147     #ifdef CLK_TCK
148     #define CLOCKS_PER_SEC CLK_TCK
149     #else
150     #define CLOCKS_PER_SEC 100
151     #endif
152     #endif
153    
154 nigel 93 /* This is the default loop count for timing. */
155    
156 nigel 75 #define LOOPREPEAT 500000
157 nigel 3
158 nigel 85 /* Static variables */
159    
160 nigel 3 static FILE *outfile;
161     static int log_store = 0;
162 nigel 63 static int callout_count;
163     static int callout_extra;
164     static int callout_fail_count;
165     static int callout_fail_id;
166 ph10 210 static int debug_lengths;
167 nigel 63 static int first_callout;
168 nigel 93 static int locale_set = 0;
169 nigel 73 static int show_malloc;
170 nigel 67 static int use_utf8;
171 nigel 43 static size_t gotten_store;
172 nigel 3
173 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
174    
175     static int buffer_size = 50000;
176     static uschar *buffer = NULL;
177     static uschar *dbuffer = NULL;
178 nigel 75 static uschar *pbuffer = NULL;
179 nigel 3
180 nigel 75
181 nigel 49
182     /*************************************************
183 nigel 91 * Read or extend an input line *
184     *************************************************/
185    
186     /* Input lines are read into buffer, but both patterns and data lines can be
187     continued over multiple input lines. In addition, if the buffer fills up, we
188     want to automatically expand it so as to be able to handle extremely large
189     lines that are needed for certain stress tests. When the input buffer is
190     expanded, the other two buffers must also be expanded likewise, and the
191     contents of pbuffer, which are a copy of the input for callouts, must be
192     preserved (for when expansion happens for a data line). This is not the most
193     optimal way of handling this, but hey, this is just a test program!
194    
195     Arguments:
196     f the file to read
197     start where in buffer to start (this *must* be within buffer)
198 ph10 287 prompt for stdin or readline()
199 nigel 91
200     Returns: pointer to the start of new data
201     could be a copy of start, or could be moved
202     NULL if no data read and EOF reached
203     */
204    
205     static uschar *
206 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
207 nigel 91 {
208     uschar *here = start;
209    
210     for (;;)
211     {
212     int rlen = buffer_size - (here - buffer);
213 nigel 93
214 nigel 91 if (rlen > 1000)
215     {
216     int dlen;
217 ph10 287
218     /* If libreadline support is required, use readline() to read a line if the
219     input is a terminal. Note that readline() removes the trailing newline, so
220     we must put it back again, to be compatible with fgets(). */
221    
222     #ifdef SUPPORT_LIBREADLINE
223     if (isatty(fileno(f)))
224     {
225     size_t len;
226     char *s = readline(prompt);
227     if (s == NULL) return (here == start)? NULL : start;
228     len = strlen(s);
229     if (len > 0) add_history(s);
230     if (len > rlen - 1) len = rlen - 1;
231     memcpy(here, s, len);
232     here[len] = '\n';
233     here[len+1] = 0;
234     free(s);
235     }
236     else
237     #endif
238    
239     /* Read the next line by normal means, prompting if the file is stdin. */
240    
241     {
242     if (f == stdin) printf(prompt);
243     if (fgets((char *)here, rlen, f) == NULL)
244     return (here == start)? NULL : start;
245     }
246    
247 nigel 91 dlen = (int)strlen((char *)here);
248     if (dlen > 0 && here[dlen - 1] == '\n') return start;
249     here += dlen;
250     }
251    
252     else
253     {
254     int new_buffer_size = 2*buffer_size;
255     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
258    
259     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
260     {
261     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
262     exit(1);
263     }
264    
265     memcpy(new_buffer, buffer, buffer_size);
266     memcpy(new_pbuffer, pbuffer, buffer_size);
267    
268     buffer_size = new_buffer_size;
269    
270     start = new_buffer + (start - buffer);
271     here = new_buffer + (here - buffer);
272    
273     free(buffer);
274     free(dbuffer);
275     free(pbuffer);
276    
277     buffer = new_buffer;
278     dbuffer = new_dbuffer;
279     pbuffer = new_pbuffer;
280     }
281     }
282    
283     return NULL; /* Control never gets here */
284     }
285    
286    
287    
288    
289    
290    
291    
292     /*************************************************
293 nigel 63 * Read number from string *
294     *************************************************/
295    
296     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297     around with conditional compilation, just do the job by hand. It is only used
298 nigel 93 for unpicking arguments, so just keep it simple.
299 nigel 63
300     Arguments:
301     str string to be converted
302     endptr where to put the end pointer
303    
304     Returns: the unsigned long
305     */
306    
307     static int
308     get_value(unsigned char *str, unsigned char **endptr)
309     {
310     int result = 0;
311     while(*str != 0 && isspace(*str)) str++;
312     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
313     *endptr = str;
314     return(result);
315     }
316    
317    
318    
319 nigel 49
320     /*************************************************
321     * Convert UTF-8 string to value *
322     *************************************************/
323    
324     /* This function takes one or more bytes that represents a UTF-8 character,
325     and returns the value of the character.
326    
327     Argument:
328 nigel 91 utf8bytes a pointer to the byte vector
329     vptr a pointer to an int to receive the value
330 nigel 49
331 nigel 91 Returns: > 0 => the number of bytes consumed
332     -6 to 0 => malformed UTF-8 character at offset = (-return)
333 nigel 49 */
334    
335 nigel 79 #if !defined NOUTF8
336    
337 nigel 67 static int
338 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
339 nigel 49 {
340 nigel 91 int c = *utf8bytes++;
341 nigel 49 int d = c;
342     int i, j, s;
343    
344     for (i = -1; i < 6; i++) /* i is number of additional bytes */
345     {
346     if ((d & 0x80) == 0) break;
347     d <<= 1;
348     }
349    
350     if (i == -1) { *vptr = c; return 1; } /* ascii character */
351     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
352    
353     /* i now has a value in the range 1-5 */
354    
355 nigel 59 s = 6*i;
356 nigel 85 d = (c & utf8_table3[i]) << s;
357 nigel 49
358     for (j = 0; j < i; j++)
359     {
360 nigel 91 c = *utf8bytes++;
361 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
362 nigel 59 s -= 6;
363 nigel 49 d |= (c & 0x3f) << s;
364     }
365    
366     /* Check that encoding was the correct unique one */
367    
368 nigel 85 for (j = 0; j < utf8_table1_size; j++)
369     if (d <= utf8_table1[j]) break;
370 nigel 49 if (j != i) return -(i+1);
371    
372     /* Valid value */
373    
374     *vptr = d;
375     return i+1;
376     }
377    
378 nigel 79 #endif
379 nigel 49
380    
381 nigel 79
382 nigel 63 /*************************************************
383 nigel 85 * Convert character value to UTF-8 *
384     *************************************************/
385    
386     /* This function takes an integer value in the range 0 - 0x7fffffff
387     and encodes it as a UTF-8 character in 0 to 6 bytes.
388    
389     Arguments:
390     cvalue the character value
391 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
392 nigel 85
393     Returns: number of characters placed in the buffer
394     */
395    
396 nigel 93 #if !defined NOUTF8
397    
398 nigel 85 static int
399 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
400 nigel 85 {
401     register int i, j;
402     for (i = 0; i < utf8_table1_size; i++)
403     if (cvalue <= utf8_table1[i]) break;
404 nigel 91 utf8bytes += i;
405 nigel 85 for (j = i; j > 0; j--)
406     {
407 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
408 nigel 85 cvalue >>= 6;
409     }
410 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
411 nigel 85 return i + 1;
412     }
413    
414 nigel 93 #endif
415 nigel 85
416    
417 nigel 93
418 nigel 85 /*************************************************
419 nigel 63 * Print character string *
420     *************************************************/
421 nigel 49
422 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
423     mode. Yields number of characters printed. If handed a NULL file, just counts
424     chars without printing. */
425 nigel 49
426 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
427 nigel 3 {
428 nigel 85 int c = 0;
429 nigel 63 int yield = 0;
430 nigel 3
431 nigel 63 while (length-- > 0)
432 nigel 3 {
433 nigel 79 #if !defined NOUTF8
434 nigel 67 if (use_utf8)
435 nigel 63 {
436     int rc = utf82ord(p, &c);
437 nigel 3
438 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
439     {
440     length -= rc - 1;
441     p += rc;
442 nigel 93 if (PRINTHEX(c))
443 nigel 63 {
444     if (f != NULL) fprintf(f, "%c", c);
445     yield++;
446     }
447     else
448     {
449 nigel 93 int n = 4;
450     if (f != NULL) fprintf(f, "\\x{%02x}", c);
451     yield += (n <= 0x000000ff)? 2 :
452     (n <= 0x00000fff)? 3 :
453     (n <= 0x0000ffff)? 4 :
454     (n <= 0x000fffff)? 5 : 6;
455 nigel 63 }
456     continue;
457     }
458     }
459 nigel 79 #endif
460 nigel 3
461 nigel 63 /* Not UTF-8, or malformed UTF-8 */
462    
463 nigel 93 c = *p++;
464     if (PRINTHEX(c))
465 nigel 3 {
466 nigel 63 if (f != NULL) fprintf(f, "%c", c);
467     yield++;
468 nigel 3 }
469 nigel 63 else
470 nigel 3 {
471 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
472     yield += 4;
473     }
474     }
475 nigel 3
476 nigel 63 return yield;
477     }
478 nigel 23
479 nigel 3
480 nigel 23
481 nigel 63 /*************************************************
482     * Callout function *
483     *************************************************/
484 nigel 3
485 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
486     the match. Yield zero unless more callouts than the fail count, or the callout
487     data is not zero. */
488 nigel 3
489 nigel 63 static int callout(pcre_callout_block *cb)
490     {
491     FILE *f = (first_callout | callout_extra)? outfile : NULL;
492 nigel 75 int i, pre_start, post_start, subject_length;
493 nigel 3
494 nigel 63 if (callout_extra)
495     {
496     fprintf(f, "Callout %d: last capture = %d\n",
497     cb->callout_number, cb->capture_last);
498 nigel 3
499 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
500     {
501     if (cb->offset_vector[i] < 0)
502     fprintf(f, "%2d: <unset>\n", i/2);
503     else
504     {
505     fprintf(f, "%2d: ", i/2);
506     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
507     cb->offset_vector[i+1] - cb->offset_vector[i], f);
508     fprintf(f, "\n");
509     }
510     }
511     }
512 nigel 3
513 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
514     datails. On subsequent calls in the same match, we use pchars just to find the
515     printed lengths of the substrings. */
516 nigel 3
517 nigel 63 if (f != NULL) fprintf(f, "--->");
518 nigel 3
519 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
520     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
521     cb->current_position - cb->start_match, f);
522 nigel 3
523 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
524    
525 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
526     cb->subject_length - cb->current_position, f);
527 nigel 3
528 nigel 63 if (f != NULL) fprintf(f, "\n");
529 nigel 9
530 nigel 63 /* Always print appropriate indicators, with callout number if not already
531 nigel 75 shown. For automatic callouts, show the pattern offset. */
532 nigel 3
533 nigel 75 if (cb->callout_number == 255)
534     {
535     fprintf(outfile, "%+3d ", cb->pattern_position);
536     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
537     }
538     else
539     {
540     if (callout_extra) fprintf(outfile, " ");
541     else fprintf(outfile, "%3d ", cb->callout_number);
542     }
543 nigel 3
544 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
545     fprintf(outfile, "^");
546 nigel 3
547 nigel 63 if (post_start > 0)
548     {
549     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
550     fprintf(outfile, "^");
551 nigel 3 }
552    
553 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
554     fprintf(outfile, " ");
555    
556     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
557     pbuffer + cb->pattern_position);
558    
559 nigel 63 fprintf(outfile, "\n");
560     first_callout = 0;
561 nigel 3
562 nigel 71 if (cb->callout_data != NULL)
563 nigel 49 {
564 nigel 71 int callout_data = *((int *)(cb->callout_data));
565     if (callout_data != 0)
566     {
567     fprintf(outfile, "Callout data = %d\n", callout_data);
568     return callout_data;
569     }
570 nigel 63 }
571 nigel 49
572 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
573     (++callout_count >= callout_fail_count)? 1 : 0;
574 nigel 3 }
575    
576    
577 nigel 63 /*************************************************
578 nigel 73 * Local malloc functions *
579 nigel 63 *************************************************/
580 nigel 3
581     /* Alternative malloc function, to test functionality and show the size of the
582     compiled re. */
583    
584     static void *new_malloc(size_t size)
585     {
586 nigel 73 void *block = malloc(size);
587 nigel 43 gotten_store = size;
588 nigel 73 if (show_malloc)
589 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
590 nigel 73 return block;
591 nigel 3 }
592    
593 nigel 73 static void new_free(void *block)
594     {
595     if (show_malloc)
596     fprintf(outfile, "free %p\n", block);
597     free(block);
598     }
599 nigel 3
600    
601 nigel 73 /* For recursion malloc/free, to test stacking calls */
602    
603     static void *stack_malloc(size_t size)
604     {
605     void *block = malloc(size);
606     if (show_malloc)
607 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
608 nigel 73 return block;
609     }
610    
611     static void stack_free(void *block)
612     {
613     if (show_malloc)
614     fprintf(outfile, "stack_free %p\n", block);
615     free(block);
616     }
617    
618    
619 nigel 63 /*************************************************
620     * Call pcre_fullinfo() *
621     *************************************************/
622 nigel 43
623     /* Get one piece of information from the pcre_fullinfo() function */
624    
625     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
626     {
627     int rc;
628     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
629     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
630     }
631    
632    
633    
634 nigel 63 /*************************************************
635 nigel 75 * Byte flipping function *
636     *************************************************/
637    
638 nigel 91 static unsigned long int
639     byteflip(unsigned long int value, int n)
640 nigel 75 {
641     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642     return ((value & 0x000000ff) << 24) |
643     ((value & 0x0000ff00) << 8) |
644     ((value & 0x00ff0000) >> 8) |
645     ((value & 0xff000000) >> 24);
646     }
647    
648    
649    
650    
651     /*************************************************
652 nigel 87 * Check match or recursion limit *
653     *************************************************/
654    
655     static int
656     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657     int start_offset, int options, int *use_offsets, int use_size_offsets,
658     int flag, unsigned long int *limit, int errnumber, const char *msg)
659     {
660     int count;
661     int min = 0;
662     int mid = 64;
663     int max = -1;
664    
665     extra->flags |= flag;
666    
667     for (;;)
668     {
669     *limit = mid;
670    
671     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672     use_offsets, use_size_offsets);
673    
674     if (count == errnumber)
675     {
676     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
677     min = mid;
678     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
679     }
680    
681     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682     count == PCRE_ERROR_PARTIAL)
683     {
684     if (mid == min + 1)
685     {
686     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
687     break;
688     }
689     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
690     max = mid;
691     mid = (min + mid)/2;
692     }
693     else break; /* Some other error */
694     }
695    
696     extra->flags &= ~flag;
697     return count;
698     }
699    
700    
701    
702     /*************************************************
703 ph10 227 * Case-independent strncmp() function *
704     *************************************************/
705    
706     /*
707     Arguments:
708     s first string
709     t second string
710     n number of characters to compare
711    
712     Returns: < 0, = 0, or > 0, according to the comparison
713     */
714    
715     static int
716     strncmpic(uschar *s, uschar *t, int n)
717     {
718     while (n--)
719     {
720     int c = tolower(*s++) - tolower(*t++);
721     if (c) return c;
722     }
723     return 0;
724     }
725    
726    
727    
728     /*************************************************
729 nigel 91 * Check newline indicator *
730     *************************************************/
731    
732     /* This is used both at compile and run-time to check for <xxx> escapes, where
733 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734     no match.
735 nigel 91
736     Arguments:
737     p points after the leading '<'
738     f file for error message
739    
740     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
741     */
742    
743     static int
744     check_newline(uschar *p, FILE *f)
745     {
746 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
754     return 0;
755     }
756    
757    
758    
759     /*************************************************
760 nigel 93 * Usage function *
761     *************************************************/
762    
763     static void
764     usage(void)
765     {
766 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
767     printf("Input and output default to stdin and stdout.\n");
768     #ifdef SUPPORT_LIBREADLINE
769     printf("If input is a terminal, readline() is used to read from it.\n");
770     #else
771     printf("This version of pcretest is not linked with readline().\n");
772     #endif
773     printf("\nOptions:\n");
774 nigel 93 printf(" -b show compiled code (bytecode)\n");
775     printf(" -C show PCRE compile-time options and exit\n");
776     printf(" -d debug: show compiled code and information (-b and -i)\n");
777     #if !defined NODFA
778     printf(" -dfa force DFA matching for all subjects\n");
779     #endif
780     printf(" -help show usage information\n");
781     printf(" -i show information about compiled patterns\n"
782     " -m output memory used information\n"
783     " -o <n> set size of offsets vector to <n>\n");
784     #if !defined NOPOSIX
785     printf(" -p use POSIX interface\n");
786     #endif
787     printf(" -q quiet: do not output PCRE version number at start\n");
788     printf(" -S <n> set stack size to <n> megabytes\n");
789     printf(" -s output store (memory) used information\n"
790     " -t time compilation and execution\n");
791     printf(" -t <n> time compilation and execution, repeating <n> times\n");
792     printf(" -tm time execution (matching) only\n");
793     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
794     }
795    
796    
797    
798     /*************************************************
799 nigel 63 * Main Program *
800     *************************************************/
801 nigel 43
802 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
803     consist of a regular expression, in delimiters and optionally followed by
804     options, followed by a set of test data, terminated by an empty line. */
805    
806     int main(int argc, char **argv)
807     {
808     FILE *infile = stdin;
809     int options = 0;
810     int study_options = 0;
811     int op = 1;
812     int timeit = 0;
813 nigel 93 int timeitm = 0;
814 nigel 3 int showinfo = 0;
815 nigel 31 int showstore = 0;
816 nigel 87 int quiet = 0;
817 nigel 53 int size_offsets = 45;
818     int size_offsets_max;
819 nigel 77 int *offsets = NULL;
820 nigel 53 #if !defined NOPOSIX
821 nigel 3 int posix = 0;
822 nigel 53 #endif
823 nigel 3 int debug = 0;
824 nigel 11 int done = 0;
825 nigel 77 int all_use_dfa = 0;
826     int yield = 0;
827 nigel 91 int stack_size;
828 nigel 3
829 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
830     that 1024 is plenty long enough for the few names we'll be testing. */
831 nigel 69
832 nigel 91 uschar copynames[1024];
833     uschar getnames[1024];
834    
835     uschar *copynamesptr;
836     uschar *getnamesptr;
837    
838 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
839 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
840 nigel 69
841 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
842     dbuffer = (unsigned char *)malloc(buffer_size);
843     pbuffer = (unsigned char *)malloc(buffer_size);
844 nigel 69
845 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
846 nigel 3
847 nigel 93 outfile = stdout;
848    
849     /* The following _setmode() stuff is some Windows magic that tells its runtime
850     library to translate CRLF into a single LF character. At least, that's what
851     I've been told: never having used Windows I take this all on trust. Originally
852     it set 0x8000, but then I was advised that _O_BINARY was better. */
853    
854 nigel 75 #if defined(_WIN32) || defined(WIN32)
855 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
856     #endif
857 nigel 75
858 nigel 3 /* Scan options */
859    
860     while (argc > 1 && argv[op][0] == '-')
861     {
862 nigel 63 unsigned char *endptr;
863 nigel 53
864 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
865     showstore = 1;
866 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
868 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
870 nigel 79 #if !defined NODFA
871 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
872 nigel 79 #endif
873 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
874 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
875     *endptr == 0))
876 nigel 53 {
877     op++;
878     argc--;
879     }
880 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
881     {
882     int both = argv[op][2] == 0;
883     int temp;
884     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
885     *endptr == 0))
886     {
887     timeitm = temp;
888     op++;
889     argc--;
890     }
891     else timeitm = LOOPREPEAT;
892     if (both) timeit = timeitm;
893     }
894 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
896     *endptr == 0))
897     {
898 nigel 93 #if defined(_WIN32) || defined(WIN32)
899 nigel 91 printf("PCRE: -S not supported on this OS\n");
900     exit(1);
901     #else
902     int rc;
903     struct rlimit rlim;
904     getrlimit(RLIMIT_STACK, &rlim);
905     rlim.rlim_cur = stack_size * 1024 * 1024;
906     rc = setrlimit(RLIMIT_STACK, &rlim);
907     if (rc != 0)
908     {
909     printf("PCRE: setrlimit() failed with error %d\n", rc);
910     exit(1);
911     }
912     op++;
913     argc--;
914     #endif
915     }
916 nigel 53 #if !defined NOPOSIX
917 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
918 nigel 53 #endif
919 nigel 63 else if (strcmp(argv[op], "-C") == 0)
920     {
921     int rc;
922     printf("PCRE version %s\n", pcre_version());
923     printf("Compiled with\n");
924     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
925     printf(" %sUTF-8 support\n", rc? "" : "No ");
926 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927     printf(" %sUnicode properties support\n", rc? "" : "No ");
928 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
930 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931 ph10 150 (rc == -2)? "ANYCRLF" :
932 nigel 93 (rc == -1)? "ANY" : "???");
933 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935     "all Unicode newlines");
936 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937     printf(" Internal link size = %d\n", rc);
938     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939     printf(" POSIX malloc threshold = %d\n", rc);
940     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941     printf(" Default match limit = %d\n", rc);
942 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943     printf(" Default recursion depth limit = %d\n", rc);
944 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
946 ph10 121 goto EXIT;
947 nigel 63 }
948 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
949     strcmp(argv[op], "--help") == 0)
950     {
951     usage();
952     goto EXIT;
953     }
954 nigel 3 else
955     {
956 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
957 nigel 93 usage();
958 nigel 77 yield = 1;
959     goto EXIT;
960 nigel 3 }
961     op++;
962     argc--;
963     }
964    
965 nigel 53 /* Get the store for the offsets vector, and remember what it was */
966    
967     size_offsets_max = size_offsets;
968 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
969 nigel 53 if (offsets == NULL)
970     {
971     printf("** Failed to get %d bytes of memory for offsets vector\n",
972 ph10 151 (int)(size_offsets_max * sizeof(int)));
973 nigel 77 yield = 1;
974     goto EXIT;
975 nigel 53 }
976    
977 nigel 3 /* Sort out the input and output files */
978    
979     if (argc > 1)
980     {
981 nigel 93 infile = fopen(argv[op], INPUT_MODE);
982 nigel 3 if (infile == NULL)
983     {
984     printf("** Failed to open %s\n", argv[op]);
985 nigel 77 yield = 1;
986     goto EXIT;
987 nigel 3 }
988     }
989    
990     if (argc > 2)
991     {
992 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
993 nigel 3 if (outfile == NULL)
994     {
995     printf("** Failed to open %s\n", argv[op+1]);
996 nigel 77 yield = 1;
997     goto EXIT;
998 nigel 3 }
999     }
1000    
1001     /* Set alternative malloc function */
1002    
1003     pcre_malloc = new_malloc;
1004 nigel 73 pcre_free = new_free;
1005     pcre_stack_malloc = stack_malloc;
1006     pcre_stack_free = stack_free;
1007 nigel 3
1008 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1009 nigel 3
1010 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1011 nigel 3
1012     /* Main loop */
1013    
1014 nigel 11 while (!done)
1015 nigel 3 {
1016     pcre *re = NULL;
1017     pcre_extra *extra = NULL;
1018 nigel 37
1019     #if !defined NOPOSIX /* There are still compilers that require no indent */
1020 nigel 3 regex_t preg;
1021 nigel 45 int do_posix = 0;
1022 nigel 37 #endif
1023    
1024 nigel 7 const char *error;
1025 nigel 25 unsigned char *p, *pp, *ppp;
1026 nigel 75 unsigned char *to_file = NULL;
1027 nigel 53 const unsigned char *tables = NULL;
1028 nigel 75 unsigned long int true_size, true_study_size = 0;
1029     size_t size, regex_gotten_store;
1030 nigel 3 int do_study = 0;
1031 nigel 25 int do_debug = debug;
1032 nigel 35 int do_G = 0;
1033     int do_g = 0;
1034 nigel 25 int do_showinfo = showinfo;
1035 nigel 35 int do_showrest = 0;
1036 nigel 75 int do_flip = 0;
1037 nigel 93 int erroroffset, len, delimiter, poffset;
1038 nigel 3
1039 nigel 67 use_utf8 = 0;
1040 ph10 211 debug_lengths = 1;
1041 nigel 63
1042 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1043 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044 nigel 63 fflush(outfile);
1045 nigel 3
1046     p = buffer;
1047     while (isspace(*p)) p++;
1048     if (*p == 0) continue;
1049    
1050 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1051 nigel 3
1052 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1053     {
1054 nigel 91 unsigned long int magic, get_options;
1055 nigel 75 uschar sbuf[8];
1056     FILE *f;
1057    
1058     p++;
1059     pp = p + (int)strlen((char *)p);
1060     while (isspace(pp[-1])) pp--;
1061     *pp = 0;
1062    
1063     f = fopen((char *)p, "rb");
1064     if (f == NULL)
1065     {
1066     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1067     continue;
1068     }
1069    
1070     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1071    
1072     true_size =
1073     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1074     true_study_size =
1075     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1076    
1077     re = (real_pcre *)new_malloc(true_size);
1078     regex_gotten_store = gotten_store;
1079    
1080     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1081    
1082     magic = ((real_pcre *)re)->magic_number;
1083     if (magic != MAGIC_NUMBER)
1084     {
1085     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1086     {
1087     do_flip = 1;
1088     }
1089     else
1090     {
1091     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1092     fclose(f);
1093     continue;
1094     }
1095     }
1096    
1097     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1098     do_flip? " (byte-inverted)" : "", p);
1099    
1100     /* Need to know if UTF-8 for printing data strings */
1101    
1102 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103     use_utf8 = (get_options & PCRE_UTF8) != 0;
1104 nigel 75
1105     /* Now see if there is any following study data */
1106    
1107     if (true_study_size != 0)
1108     {
1109     pcre_study_data *psd;
1110    
1111     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1112     extra->flags = PCRE_EXTRA_STUDY_DATA;
1113    
1114     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1115     extra->study_data = psd;
1116    
1117     if (fread(psd, 1, true_study_size, f) != true_study_size)
1118     {
1119     FAIL_READ:
1120     fprintf(outfile, "Failed to read data from %s\n", p);
1121     if (extra != NULL) new_free(extra);
1122     if (re != NULL) new_free(re);
1123     fclose(f);
1124     continue;
1125     }
1126     fprintf(outfile, "Study data loaded from %s\n", p);
1127     do_study = 1; /* To get the data output if requested */
1128     }
1129     else fprintf(outfile, "No study data\n");
1130    
1131     fclose(f);
1132     goto SHOW_INFO;
1133     }
1134    
1135     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1136     the pattern; if is isn't complete, read more. */
1137    
1138 nigel 3 delimiter = *p++;
1139    
1140 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1141 nigel 3 {
1142 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143 nigel 3 goto SKIP_DATA;
1144     }
1145    
1146     pp = p;
1147 nigel 93 poffset = p - buffer;
1148 nigel 3
1149     for(;;)
1150     {
1151 nigel 29 while (*pp != 0)
1152     {
1153     if (*pp == '\\' && pp[1] != 0) pp++;
1154     else if (*pp == delimiter) break;
1155     pp++;
1156     }
1157 nigel 3 if (*pp != 0) break;
1158 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1159 nigel 3 {
1160     fprintf(outfile, "** Unexpected EOF\n");
1161 nigel 11 done = 1;
1162     goto CONTINUE;
1163 nigel 3 }
1164 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1165 nigel 3 }
1166    
1167 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1168     pointer to the correct relative point in the buffer. */
1169    
1170     p = buffer + poffset;
1171    
1172 nigel 29 /* If the first character after the delimiter is backslash, make
1173     the pattern end with backslash. This is purely to provide a way
1174     of testing for the error message when a pattern ends with backslash. */
1175    
1176     if (pp[1] == '\\') *pp++ = '\\';
1177    
1178 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1179     for callouts. */
1180 nigel 3
1181     *pp++ = 0;
1182 nigel 75 strcpy((char *)pbuffer, (char *)p);
1183 nigel 3
1184     /* Look for options after final delimiter */
1185    
1186     options = 0;
1187     study_options = 0;
1188 nigel 31 log_store = showstore; /* default from command line */
1189    
1190 nigel 3 while (*pp != 0)
1191     {
1192     switch (*pp++)
1193     {
1194 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1195 nigel 35 case 'g': do_g = 1; break;
1196 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1197     case 'm': options |= PCRE_MULTILINE; break;
1198     case 's': options |= PCRE_DOTALL; break;
1199     case 'x': options |= PCRE_EXTENDED; break;
1200 nigel 25
1201 nigel 35 case '+': do_showrest = 1; break;
1202 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1203 nigel 93 case 'B': do_debug = 1; break;
1204 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1206 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207 nigel 75 case 'F': do_flip = 1; break;
1208 nigel 35 case 'G': do_G = 1; break;
1209 nigel 25 case 'I': do_showinfo = 1; break;
1210 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1211 nigel 31 case 'M': log_store = 1; break;
1212 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1213 nigel 37
1214     #if !defined NOPOSIX
1215 nigel 3 case 'P': do_posix = 1; break;
1216 nigel 37 #endif
1217    
1218 nigel 3 case 'S': do_study = 1; break;
1219 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1220 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1221 ph10 126 case 'Z': debug_lengths = 0; break;
1222 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224 nigel 25
1225     case 'L':
1226     ppp = pp;
1227 nigel 93 /* The '\r' test here is so that it works on Windows. */
1228     /* The '0' test is just in case this is an unterminated line. */
1229     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1230 nigel 25 *ppp = 0;
1231     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1232     {
1233     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1234     goto SKIP_DATA;
1235     }
1236 nigel 93 locale_set = 1;
1237 nigel 25 tables = pcre_maketables();
1238     pp = ppp;
1239     break;
1240    
1241 nigel 75 case '>':
1242     to_file = pp;
1243     while (*pp != 0) pp++;
1244     while (isspace(pp[-1])) pp--;
1245     *pp = 0;
1246     break;
1247    
1248 nigel 91 case '<':
1249     {
1250     int x = check_newline(pp, outfile);
1251     if (x == 0) goto SKIP_DATA;
1252     options |= x;
1253     while (*pp++ != '>');
1254     }
1255     break;
1256    
1257 nigel 77 case '\r': /* So that it works in Windows */
1258     case '\n':
1259     case ' ':
1260     break;
1261 nigel 75
1262 nigel 3 default:
1263     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1264     goto SKIP_DATA;
1265     }
1266     }
1267    
1268 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1269 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1270     local character tables. */
1271 nigel 3
1272 nigel 37 #if !defined NOPOSIX
1273 nigel 3 if (posix || do_posix)
1274     {
1275     int rc;
1276     int cflags = 0;
1277 nigel 75
1278 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1279     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1280 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1281 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1282     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1283    
1284 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1285    
1286     /* Compilation failed; go back for another re, skipping to blank line
1287     if non-interactive. */
1288    
1289     if (rc != 0)
1290     {
1291 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1292 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1293     goto SKIP_DATA;
1294     }
1295     }
1296    
1297     /* Handle compiling via the native interface */
1298    
1299     else
1300 nigel 37 #endif /* !defined NOPOSIX */
1301    
1302 nigel 3 {
1303 nigel 93 if (timeit > 0)
1304 nigel 3 {
1305     register int i;
1306     clock_t time_taken;
1307     clock_t start_time = clock();
1308 nigel 93 for (i = 0; i < timeit; i++)
1309 nigel 3 {
1310 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1311 nigel 3 if (re != NULL) free(re);
1312     }
1313     time_taken = clock() - start_time;
1314 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1315     (((double)time_taken * 1000.0) / (double)timeit) /
1316 nigel 63 (double)CLOCKS_PER_SEC);
1317 nigel 3 }
1318    
1319 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1320 nigel 3
1321     /* Compilation failed; go back for another re, skipping to blank line
1322     if non-interactive. */
1323    
1324     if (re == NULL)
1325     {
1326     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1327     SKIP_DATA:
1328     if (infile != stdin)
1329     {
1330     for (;;)
1331     {
1332 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1333 nigel 11 {
1334     done = 1;
1335     goto CONTINUE;
1336     }
1337 nigel 3 len = (int)strlen((char *)buffer);
1338     while (len > 0 && isspace(buffer[len-1])) len--;
1339     if (len == 0) break;
1340     }
1341     fprintf(outfile, "\n");
1342     }
1343 nigel 25 goto CONTINUE;
1344 nigel 3 }
1345    
1346 nigel 43 /* Compilation succeeded; print data if required. There are now two
1347     info-returning functions. The old one has a limited interface and
1348     returns only limited data. Check that it agrees with the newer one. */
1349 nigel 3
1350 nigel 63 if (log_store)
1351     fprintf(outfile, "Memory allocation (code space): %d\n",
1352     (int)(gotten_store -
1353     sizeof(real_pcre) -
1354     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1355    
1356 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1357     and remember the store that was got. */
1358    
1359     true_size = ((real_pcre *)re)->size;
1360     regex_gotten_store = gotten_store;
1361    
1362     /* If /S was present, study the regexp to generate additional info to
1363     help with the matching. */
1364    
1365     if (do_study)
1366     {
1367 nigel 93 if (timeit > 0)
1368 nigel 75 {
1369     register int i;
1370     clock_t time_taken;
1371     clock_t start_time = clock();
1372 nigel 93 for (i = 0; i < timeit; i++)
1373 nigel 75 extra = pcre_study(re, study_options, &error);
1374     time_taken = clock() - start_time;
1375     if (extra != NULL) free(extra);
1376 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1377     (((double)time_taken * 1000.0) / (double)timeit) /
1378 nigel 75 (double)CLOCKS_PER_SEC);
1379     }
1380     extra = pcre_study(re, study_options, &error);
1381     if (error != NULL)
1382     fprintf(outfile, "Failed to study: %s\n", error);
1383     else if (extra != NULL)
1384     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1385     }
1386    
1387     /* If the 'F' option was present, we flip the bytes of all the integer
1388     fields in the regex data block and the study block. This is to make it
1389     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1390     compiled on a different architecture. */
1391    
1392     if (do_flip)
1393     {
1394     real_pcre *rre = (real_pcre *)re;
1395 ph10 259 rre->magic_number =
1396 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1397 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1398     rre->options = byteflip(rre->options, sizeof(rre->options));
1399 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1400 ph10 259 rre->top_bracket =
1401 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1402 ph10 259 rre->top_backref =
1403 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1404 ph10 259 rre->first_byte =
1405 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1406 ph10 259 rre->req_byte =
1407 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1408     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1409 nigel 75 sizeof(rre->name_table_offset));
1410 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1411 nigel 75 sizeof(rre->name_entry_size));
1412 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1413 ph10 255 sizeof(rre->name_count));
1414 nigel 75
1415     if (extra != NULL)
1416     {
1417     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1418     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1419     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1420     }
1421     }
1422    
1423     /* Extract information from the compiled data if required */
1424    
1425     SHOW_INFO:
1426    
1427 nigel 93 if (do_debug)
1428     {
1429     fprintf(outfile, "------------------------------------------------------------------\n");
1430 ph10 116 pcre_printint(re, outfile, debug_lengths);
1431 nigel 93 }
1432    
1433 nigel 25 if (do_showinfo)
1434 nigel 3 {
1435 nigel 75 unsigned long int get_options, all_options;
1436 nigel 79 #if !defined NOINFOCHECK
1437 nigel 43 int old_first_char, old_options, old_count;
1438 nigel 79 #endif
1439 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1440 ph10 227 hascrorlf;
1441 nigel 63 int nameentrysize, namecount;
1442     const uschar *nametable;
1443 nigel 3
1444 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1445 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1446     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1447     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1448 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1449 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1450 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1451     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1452 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1453 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1454     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1455 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1456 nigel 43
1457 nigel 79 #if !defined NOINFOCHECK
1458 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1459 nigel 3 if (count < 0) fprintf(outfile,
1460 nigel 43 "Error %d from pcre_info()\n", count);
1461 nigel 3 else
1462     {
1463 nigel 43 if (old_count != count) fprintf(outfile,
1464     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1465     old_count);
1466 nigel 37
1467 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1468     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1469     first_char, old_first_char);
1470 nigel 37
1471 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1472     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1473     get_options, old_options);
1474 nigel 43 }
1475 nigel 79 #endif
1476 nigel 43
1477 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1478 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1479 nigel 77 (int)size, (int)regex_gotten_store);
1480 nigel 43
1481     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1482     if (backrefmax > 0)
1483     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1484 nigel 63
1485     if (namecount > 0)
1486     {
1487     fprintf(outfile, "Named capturing subpatterns:\n");
1488     while (namecount-- > 0)
1489     {
1490     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1491     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1492     GET2(nametable, 0));
1493     nametable += nameentrysize;
1494     }
1495     }
1496 ph10 172
1497 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1498 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1499 nigel 63
1500 nigel 75 all_options = ((real_pcre *)re)->options;
1501 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1502 nigel 75
1503 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1504 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1505 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1506     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1507     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1508     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1509 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1510 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1511 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1512     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1513 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1514     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1515     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1516 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1517 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1518 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1519     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1520 ph10 172
1521 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1522 nigel 43
1523 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1524 nigel 91 {
1525     case PCRE_NEWLINE_CR:
1526     fprintf(outfile, "Forced newline sequence: CR\n");
1527     break;
1528 nigel 43
1529 nigel 91 case PCRE_NEWLINE_LF:
1530     fprintf(outfile, "Forced newline sequence: LF\n");
1531     break;
1532    
1533     case PCRE_NEWLINE_CRLF:
1534     fprintf(outfile, "Forced newline sequence: CRLF\n");
1535     break;
1536    
1537 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1538     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1539     break;
1540    
1541 nigel 93 case PCRE_NEWLINE_ANY:
1542     fprintf(outfile, "Forced newline sequence: ANY\n");
1543     break;
1544    
1545 nigel 91 default:
1546     break;
1547     }
1548    
1549 nigel 43 if (first_char == -1)
1550     {
1551 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1552 nigel 43 }
1553     else if (first_char < 0)
1554     {
1555     fprintf(outfile, "No first char\n");
1556     }
1557     else
1558     {
1559 nigel 63 int ch = first_char & 255;
1560 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1561 nigel 63 "" : " (caseless)";
1562 nigel 93 if (PRINTHEX(ch))
1563 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1564 nigel 3 else
1565 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1566 nigel 43 }
1567 nigel 37
1568 nigel 43 if (need_char < 0)
1569     {
1570     fprintf(outfile, "No need char\n");
1571 nigel 3 }
1572 nigel 43 else
1573     {
1574 nigel 63 int ch = need_char & 255;
1575 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1576 nigel 63 "" : " (caseless)";
1577 nigel 93 if (PRINTHEX(ch))
1578 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1579 nigel 43 else
1580 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1581 nigel 43 }
1582 nigel 75
1583     /* Don't output study size; at present it is in any case a fixed
1584     value, but it varies, depending on the computer architecture, and
1585     so messes up the test suite. (And with the /F option, it might be
1586     flipped.) */
1587    
1588     if (do_study)
1589     {
1590     if (extra == NULL)
1591     fprintf(outfile, "Study returned NULL\n");
1592     else
1593     {
1594     uschar *start_bits = NULL;
1595     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1596    
1597     if (start_bits == NULL)
1598     fprintf(outfile, "No starting byte set\n");
1599     else
1600     {
1601     int i;
1602     int c = 24;
1603     fprintf(outfile, "Starting byte set: ");
1604     for (i = 0; i < 256; i++)
1605     {
1606     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1607     {
1608     if (c > 75)
1609     {
1610     fprintf(outfile, "\n ");
1611     c = 2;
1612     }
1613 nigel 93 if (PRINTHEX(i) && i != ' ')
1614 nigel 75 {
1615     fprintf(outfile, "%c ", i);
1616     c += 2;
1617     }
1618     else
1619     {
1620     fprintf(outfile, "\\x%02x ", i);
1621     c += 5;
1622     }
1623     }
1624     }
1625     fprintf(outfile, "\n");
1626     }
1627     }
1628     }
1629 nigel 3 }
1630    
1631 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1632     that is all. The first 8 bytes of the file are the regex length and then
1633     the study length, in big-endian order. */
1634 nigel 3
1635 nigel 75 if (to_file != NULL)
1636 nigel 3 {
1637 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1638     if (f == NULL)
1639 nigel 3 {
1640 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1641 nigel 3 }
1642 nigel 75 else
1643     {
1644     uschar sbuf[8];
1645 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1646     sbuf[1] = (uschar)((true_size >> 16) & 255);
1647     sbuf[2] = (uschar)((true_size >> 8) & 255);
1648     sbuf[3] = (uschar)((true_size) & 255);
1649 ph10 259
1650 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1651     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1652     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1653     sbuf[7] = (uschar)((true_study_size) & 255);
1654 nigel 3
1655 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1656     fwrite(re, 1, true_size, f) < true_size)
1657     {
1658     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1659     }
1660 nigel 3 else
1661     {
1662 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1663     if (extra != NULL)
1664 nigel 3 {
1665 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1666     true_study_size)
1667 nigel 3 {
1668 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1669     strerror(errno));
1670 nigel 3 }
1671 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1672 nigel 93
1673 nigel 3 }
1674     }
1675 nigel 75 fclose(f);
1676 nigel 3 }
1677 nigel 77
1678     new_free(re);
1679     if (extra != NULL) new_free(extra);
1680     if (tables != NULL) new_free((void *)tables);
1681 nigel 75 continue; /* With next regex */
1682 nigel 3 }
1683 nigel 75 } /* End of non-POSIX compile */
1684 nigel 3
1685     /* Read data lines and test them */
1686    
1687     for (;;)
1688     {
1689 nigel 87 uschar *q;
1690 ph10 147 uschar *bptr;
1691 nigel 57 int *use_offsets = offsets;
1692 nigel 53 int use_size_offsets = size_offsets;
1693 nigel 63 int callout_data = 0;
1694     int callout_data_set = 0;
1695 nigel 3 int count, c;
1696 nigel 29 int copystrings = 0;
1697 nigel 63 int find_match_limit = 0;
1698 nigel 29 int getstrings = 0;
1699     int getlist = 0;
1700 nigel 39 int gmatched = 0;
1701 nigel 35 int start_offset = 0;
1702 nigel 41 int g_notempty = 0;
1703 nigel 77 int use_dfa = 0;
1704 nigel 3
1705     options = 0;
1706    
1707 nigel 91 *copynames = 0;
1708     *getnames = 0;
1709    
1710     copynamesptr = copynames;
1711     getnamesptr = getnames;
1712    
1713 nigel 63 pcre_callout = callout;
1714     first_callout = 1;
1715     callout_extra = 0;
1716     callout_count = 0;
1717     callout_fail_count = 999999;
1718     callout_fail_id = -1;
1719 nigel 73 show_malloc = 0;
1720 nigel 63
1721 nigel 91 if (extra != NULL) extra->flags &=
1722     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1723    
1724     len = 0;
1725     for (;;)
1726 nigel 11 {
1727 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1728 nigel 91 {
1729     if (len > 0) break;
1730     done = 1;
1731     goto CONTINUE;
1732     }
1733     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1734     len = (int)strlen((char *)buffer);
1735     if (buffer[len-1] == '\n') break;
1736 nigel 11 }
1737 nigel 3
1738     while (len > 0 && isspace(buffer[len-1])) len--;
1739     buffer[len] = 0;
1740     if (len == 0) break;
1741    
1742     p = buffer;
1743     while (isspace(*p)) p++;
1744    
1745 ph10 147 bptr = q = dbuffer;
1746 nigel 3 while ((c = *p++) != 0)
1747     {
1748     int i = 0;
1749     int n = 0;
1750 nigel 63
1751 nigel 3 if (c == '\\') switch ((c = *p++))
1752     {
1753     case 'a': c = 7; break;
1754     case 'b': c = '\b'; break;
1755     case 'e': c = 27; break;
1756     case 'f': c = '\f'; break;
1757     case 'n': c = '\n'; break;
1758     case 'r': c = '\r'; break;
1759     case 't': c = '\t'; break;
1760     case 'v': c = '\v'; break;
1761    
1762     case '0': case '1': case '2': case '3':
1763     case '4': case '5': case '6': case '7':
1764     c -= '0';
1765     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1766     c = c * 8 + *p++ - '0';
1767 nigel 91
1768     #if !defined NOUTF8
1769     if (use_utf8 && c > 255)
1770     {
1771     unsigned char buff8[8];
1772     int ii, utn;
1773     utn = ord2utf8(c, buff8);
1774     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1775     c = buff8[ii]; /* Last byte */
1776     }
1777     #endif
1778 nigel 3 break;
1779    
1780     case 'x':
1781 nigel 49
1782     /* Handle \x{..} specially - new Perl thing for utf8 */
1783    
1784 nigel 79 #if !defined NOUTF8
1785 nigel 49 if (*p == '{')
1786     {
1787     unsigned char *pt = p;
1788     c = 0;
1789     while (isxdigit(*(++pt)))
1790     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1791     if (*pt == '}')
1792     {
1793 nigel 67 unsigned char buff8[8];
1794 nigel 49 int ii, utn;
1795 nigel 85 utn = ord2utf8(c, buff8);
1796 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1797     c = buff8[ii]; /* Last byte */
1798 nigel 49 p = pt + 1;
1799     break;
1800     }
1801     /* Not correct form; fall through */
1802     }
1803 nigel 79 #endif
1804 nigel 49
1805     /* Ordinary \x */
1806    
1807 nigel 3 c = 0;
1808     while (i++ < 2 && isxdigit(*p))
1809     {
1810     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1811     p++;
1812     }
1813     break;
1814    
1815 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1816 nigel 3 p--;
1817     continue;
1818    
1819 nigel 75 case '>':
1820     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1821     continue;
1822    
1823 nigel 3 case 'A': /* Option setting */
1824     options |= PCRE_ANCHORED;
1825     continue;
1826    
1827     case 'B':
1828     options |= PCRE_NOTBOL;
1829     continue;
1830    
1831 nigel 29 case 'C':
1832 nigel 63 if (isdigit(*p)) /* Set copy string */
1833     {
1834     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1835     copystrings |= 1 << n;
1836     }
1837     else if (isalnum(*p))
1838     {
1839 nigel 91 uschar *npp = copynamesptr;
1840 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1841 nigel 91 *npp++ = 0;
1842 nigel 67 *npp = 0;
1843 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1844 nigel 63 if (n < 0)
1845 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1846     copynamesptr = npp;
1847 nigel 63 }
1848     else if (*p == '+')
1849     {
1850     callout_extra = 1;
1851     p++;
1852     }
1853     else if (*p == '-')
1854     {
1855     pcre_callout = NULL;
1856     p++;
1857     }
1858     else if (*p == '!')
1859     {
1860     callout_fail_id = 0;
1861     p++;
1862     while(isdigit(*p))
1863     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1864     callout_fail_count = 0;
1865     if (*p == '!')
1866     {
1867     p++;
1868     while(isdigit(*p))
1869     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1870     }
1871     }
1872     else if (*p == '*')
1873     {
1874     int sign = 1;
1875     callout_data = 0;
1876     if (*(++p) == '-') { sign = -1; p++; }
1877     while(isdigit(*p))
1878     callout_data = callout_data * 10 + *p++ - '0';
1879     callout_data *= sign;
1880     callout_data_set = 1;
1881     }
1882 nigel 29 continue;
1883    
1884 nigel 79 #if !defined NODFA
1885 nigel 77 case 'D':
1886 nigel 79 #if !defined NOPOSIX
1887 nigel 77 if (posix || do_posix)
1888     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1889     else
1890 nigel 79 #endif
1891 nigel 77 use_dfa = 1;
1892     continue;
1893    
1894     case 'F':
1895     options |= PCRE_DFA_SHORTEST;
1896     continue;
1897 nigel 79 #endif
1898 nigel 77
1899 nigel 29 case 'G':
1900 nigel 63 if (isdigit(*p))
1901     {
1902     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1903     getstrings |= 1 << n;
1904     }
1905     else if (isalnum(*p))
1906     {
1907 nigel 91 uschar *npp = getnamesptr;
1908 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1909 nigel 91 *npp++ = 0;
1910 nigel 67 *npp = 0;
1911 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1912 nigel 63 if (n < 0)
1913 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1914     getnamesptr = npp;
1915 nigel 63 }
1916 nigel 29 continue;
1917    
1918     case 'L':
1919     getlist = 1;
1920     continue;
1921    
1922 nigel 63 case 'M':
1923     find_match_limit = 1;
1924     continue;
1925    
1926 nigel 37 case 'N':
1927     options |= PCRE_NOTEMPTY;
1928     continue;
1929    
1930 nigel 3 case 'O':
1931     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1932 nigel 53 if (n > size_offsets_max)
1933     {
1934     size_offsets_max = n;
1935 nigel 57 free(offsets);
1936 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1937 nigel 53 if (offsets == NULL)
1938     {
1939     printf("** Failed to get %d bytes of memory for offsets vector\n",
1940 ph10 151 (int)(size_offsets_max * sizeof(int)));
1941 nigel 77 yield = 1;
1942     goto EXIT;
1943 nigel 53 }
1944     }
1945     use_size_offsets = n;
1946 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1947 nigel 3 continue;
1948    
1949 nigel 75 case 'P':
1950     options |= PCRE_PARTIAL;
1951     continue;
1952    
1953 nigel 91 case 'Q':
1954     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1955     if (extra == NULL)
1956     {
1957     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1958     extra->flags = 0;
1959     }
1960     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1961     extra->match_limit_recursion = n;
1962     continue;
1963    
1964     case 'q':
1965     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1966     if (extra == NULL)
1967     {
1968     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1969     extra->flags = 0;
1970     }
1971     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1972     extra->match_limit = n;
1973     continue;
1974    
1975 nigel 79 #if !defined NODFA
1976 nigel 77 case 'R':
1977     options |= PCRE_DFA_RESTART;
1978     continue;
1979 nigel 79 #endif
1980 nigel 77
1981 nigel 73 case 'S':
1982     show_malloc = 1;
1983     continue;
1984    
1985 nigel 3 case 'Z':
1986     options |= PCRE_NOTEOL;
1987     continue;
1988 nigel 71
1989     case '?':
1990     options |= PCRE_NO_UTF8_CHECK;
1991     continue;
1992 nigel 91
1993     case '<':
1994     {
1995     int x = check_newline(p, outfile);
1996     if (x == 0) goto NEXT_DATA;
1997     options |= x;
1998     while (*p++ != '>');
1999     }
2000     continue;
2001 nigel 3 }
2002 nigel 9 *q++ = c;
2003 nigel 3 }
2004 nigel 9 *q = 0;
2005     len = q - dbuffer;
2006 nigel 3
2007 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2008     {
2009     printf("**Match limit not relevant for DFA matching: ignored\n");
2010     find_match_limit = 0;
2011     }
2012    
2013 nigel 3 /* Handle matching via the POSIX interface, which does not
2014 nigel 63 support timing or playing with the match limit or callout data. */
2015 nigel 3
2016 nigel 37 #if !defined NOPOSIX
2017 nigel 3 if (posix || do_posix)
2018     {
2019     int rc;
2020     int eflags = 0;
2021 nigel 63 regmatch_t *pmatch = NULL;
2022     if (use_size_offsets > 0)
2023 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2024 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2025     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2026    
2027 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2028 nigel 3
2029     if (rc != 0)
2030     {
2031 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2032 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2033     }
2034 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2035     != 0)
2036     {
2037     fprintf(outfile, "Matched with REG_NOSUB\n");
2038     }
2039 nigel 3 else
2040     {
2041 nigel 7 size_t i;
2042 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2043 nigel 3 {
2044     if (pmatch[i].rm_so >= 0)
2045     {
2046 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2047 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2048     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2049 nigel 3 fprintf(outfile, "\n");
2050 nigel 35 if (i == 0 && do_showrest)
2051     {
2052     fprintf(outfile, " 0+ ");
2053 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2054     outfile);
2055 nigel 35 fprintf(outfile, "\n");
2056     }
2057 nigel 3 }
2058     }
2059     }
2060 nigel 53 free(pmatch);
2061 nigel 3 }
2062    
2063 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2064 nigel 3
2065 nigel 37 else
2066     #endif /* !defined NOPOSIX */
2067    
2068 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2069 nigel 3 {
2070 nigel 93 if (timeitm > 0)
2071 nigel 3 {
2072     register int i;
2073     clock_t time_taken;
2074     clock_t start_time = clock();
2075 nigel 77
2076 nigel 79 #if !defined NODFA
2077 nigel 77 if (all_use_dfa || use_dfa)
2078     {
2079     int workspace[1000];
2080 nigel 93 for (i = 0; i < timeitm; i++)
2081 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2082     options | g_notempty, use_offsets, use_size_offsets, workspace,
2083     sizeof(workspace)/sizeof(int));
2084     }
2085     else
2086 nigel 79 #endif
2087 nigel 77
2088 nigel 93 for (i = 0; i < timeitm; i++)
2089 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2090 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2091 nigel 77
2092 nigel 3 time_taken = clock() - start_time;
2093 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2094     (((double)time_taken * 1000.0) / (double)timeitm) /
2095 nigel 63 (double)CLOCKS_PER_SEC);
2096 nigel 3 }
2097    
2098 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2099 nigel 87 varying limits in order to find the minimum value for the match limit and
2100     for the recursion limit. */
2101 nigel 63
2102     if (find_match_limit)
2103     {
2104     if (extra == NULL)
2105     {
2106 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2107 nigel 63 extra->flags = 0;
2108     }
2109    
2110 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2111 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2112     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2113     PCRE_ERROR_MATCHLIMIT, "match()");
2114 nigel 63
2115 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2116     options|g_notempty, use_offsets, use_size_offsets,
2117     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2118     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2119 nigel 63 }
2120    
2121     /* If callout_data is set, use the interface with additional data */
2122    
2123     else if (callout_data_set)
2124     {
2125     if (extra == NULL)
2126     {
2127 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2128 nigel 63 extra->flags = 0;
2129     }
2130     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2131 nigel 71 extra->callout_data = &callout_data;
2132 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2133     options | g_notempty, use_offsets, use_size_offsets);
2134     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2135     }
2136    
2137     /* The normal case is just to do the match once, with the default
2138     value of match_limit. */
2139    
2140 nigel 79 #if !defined NODFA
2141 nigel 77 else if (all_use_dfa || use_dfa)
2142     {
2143     int workspace[1000];
2144     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2145     options | g_notempty, use_offsets, use_size_offsets, workspace,
2146     sizeof(workspace)/sizeof(int));
2147     if (count == 0)
2148     {
2149     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2150     count = use_size_offsets/2;
2151     }
2152     }
2153 nigel 79 #endif
2154 nigel 77
2155 nigel 75 else
2156     {
2157     count = pcre_exec(re, extra, (char *)bptr, len,
2158     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2159 nigel 77 if (count == 0)
2160     {
2161     fprintf(outfile, "Matched, but too many substrings\n");
2162     count = use_size_offsets/3;
2163     }
2164 nigel 75 }
2165 nigel 3
2166 nigel 39 /* Matched */
2167    
2168 nigel 3 if (count >= 0)
2169     {
2170 nigel 93 int i, maxcount;
2171    
2172     #if !defined NODFA
2173     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2174     #endif
2175     maxcount = use_size_offsets/3;
2176    
2177     /* This is a check against a lunatic return value. */
2178    
2179     if (count > maxcount)
2180     {
2181     fprintf(outfile,
2182     "** PCRE error: returned count %d is too big for offset size %d\n",
2183     count, use_size_offsets);
2184     count = use_size_offsets/3;
2185     if (do_g || do_G)
2186     {
2187     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2188     do_g = do_G = FALSE; /* Break g/G loop */
2189     }
2190     }
2191    
2192 nigel 29 for (i = 0; i < count * 2; i += 2)
2193 nigel 3 {
2194 nigel 57 if (use_offsets[i] < 0)
2195 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2196     else
2197     {
2198     fprintf(outfile, "%2d: ", i/2);
2199 nigel 63 (void)pchars(bptr + use_offsets[i],
2200     use_offsets[i+1] - use_offsets[i], outfile);
2201 nigel 3 fprintf(outfile, "\n");
2202 nigel 35 if (i == 0)
2203     {
2204     if (do_showrest)
2205     {
2206     fprintf(outfile, " 0+ ");
2207 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2208     outfile);
2209 nigel 35 fprintf(outfile, "\n");
2210     }
2211     }
2212 nigel 3 }
2213     }
2214 nigel 29
2215     for (i = 0; i < 32; i++)
2216     {
2217     if ((copystrings & (1 << i)) != 0)
2218     {
2219 nigel 91 char copybuffer[256];
2220 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2221 nigel 37 i, copybuffer, sizeof(copybuffer));
2222 nigel 29 if (rc < 0)
2223     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2224     else
2225 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2226 nigel 29 }
2227     }
2228    
2229 nigel 91 for (copynamesptr = copynames;
2230     *copynamesptr != 0;
2231     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2232     {
2233     char copybuffer[256];
2234     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2235     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2236     if (rc < 0)
2237     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2238     else
2239     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2240     }
2241    
2242 nigel 29 for (i = 0; i < 32; i++)
2243     {
2244     if ((getstrings & (1 << i)) != 0)
2245     {
2246     const char *substring;
2247 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2248 nigel 29 i, &substring);
2249     if (rc < 0)
2250     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2251     else
2252     {
2253     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2254 nigel 49 pcre_free_substring(substring);
2255 nigel 29 }
2256     }
2257     }
2258    
2259 nigel 91 for (getnamesptr = getnames;
2260     *getnamesptr != 0;
2261     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2262     {
2263     const char *substring;
2264     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2265     count, (char *)getnamesptr, &substring);
2266     if (rc < 0)
2267     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2268     else
2269     {
2270     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2271     pcre_free_substring(substring);
2272     }
2273     }
2274    
2275 nigel 29 if (getlist)
2276     {
2277     const char **stringlist;
2278 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2279 nigel 29 &stringlist);
2280     if (rc < 0)
2281     fprintf(outfile, "get substring list failed %d\n", rc);
2282     else
2283     {
2284     for (i = 0; i < count; i++)
2285     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2286     if (stringlist[i] != NULL)
2287     fprintf(outfile, "string list not terminated by NULL\n");
2288 nigel 49 /* free((void *)stringlist); */
2289     pcre_free_substring_list(stringlist);
2290 nigel 29 }
2291     }
2292 nigel 39 }
2293 nigel 29
2294 nigel 75 /* There was a partial match */
2295    
2296     else if (count == PCRE_ERROR_PARTIAL)
2297     {
2298 nigel 77 fprintf(outfile, "Partial match");
2299 nigel 79 #if !defined NODFA
2300 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2301     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2302     bptr + use_offsets[0]);
2303 nigel 79 #endif
2304 nigel 77 fprintf(outfile, "\n");
2305 nigel 75 break; /* Out of the /g loop */
2306     }
2307    
2308 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2309 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2310     to advance the start offset, and continue. We won't be at the end of the
2311     string - that was checked before setting g_notempty.
2312 nigel 39
2313 ph10 150 Complication arises in the case when the newline option is "any" or
2314 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2315     CRLF, an advance of one character just passes the \r, whereas we should
2316     prefer the longer newline sequence, as does the code in pcre_exec().
2317     Fudge the offset value to achieve this.
2318 ph10 144
2319 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2320     character, not one byte. */
2321    
2322 nigel 3 else
2323     {
2324 nigel 41 if (g_notempty != 0)
2325 nigel 35 {
2326 nigel 73 int onechar = 1;
2327 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2328 nigel 57 use_offsets[0] = start_offset;
2329 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2330     {
2331     int d;
2332     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2333     obits = (d == '\r')? PCRE_NEWLINE_CR :
2334     (d == '\n')? PCRE_NEWLINE_LF :
2335     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2336 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2337 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2338     }
2339 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2340 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2341 ph10 149 &&
2342 ph10 143 start_offset < len - 1 &&
2343     bptr[start_offset] == '\r' &&
2344     bptr[start_offset+1] == '\n')
2345 ph10 144 onechar++;
2346 ph10 143 else if (use_utf8)
2347 nigel 73 {
2348     while (start_offset + onechar < len)
2349     {
2350     int tb = bptr[start_offset+onechar];
2351     if (tb <= 127) break;
2352     tb &= 0xc0;
2353     if (tb != 0 && tb != 0xc0) onechar++;
2354     }
2355     }
2356     use_offsets[1] = start_offset + onechar;
2357 nigel 35 }
2358 nigel 41 else
2359     {
2360 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2361 nigel 41 {
2362 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2363 nigel 41 }
2364 nigel 73 else fprintf(outfile, "Error %d\n", count);
2365 nigel 41 break; /* Out of the /g loop */
2366     }
2367 nigel 3 }
2368 nigel 35
2369 nigel 39 /* If not /g or /G we are done */
2370    
2371     if (!do_g && !do_G) break;
2372    
2373 nigel 41 /* If we have matched an empty string, first check to see if we are at
2374     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2375     what Perl's /g options does. This turns out to be rather cunning. First
2376 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2377     same point. If this fails (picked up above) we advance to the next
2378 ph10 143 character. */
2379 ph10 142
2380 nigel 41 g_notempty = 0;
2381 ph10 142
2382 nigel 57 if (use_offsets[0] == use_offsets[1])
2383 nigel 41 {
2384 nigel 57 if (use_offsets[0] == len) break;
2385 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2386 nigel 41 }
2387 nigel 39
2388     /* For /g, update the start offset, leaving the rest alone */
2389    
2390 ph10 143 if (do_g) start_offset = use_offsets[1];
2391 nigel 39
2392     /* For /G, update the pointer and length */
2393    
2394     else
2395 nigel 35 {
2396 ph10 143 bptr += use_offsets[1];
2397     len -= use_offsets[1];
2398 nigel 35 }
2399 nigel 39 } /* End of loop for /g and /G */
2400 nigel 91
2401     NEXT_DATA: continue;
2402 nigel 39 } /* End of loop for data lines */
2403 nigel 3
2404 nigel 11 CONTINUE:
2405 nigel 37
2406     #if !defined NOPOSIX
2407 nigel 3 if (posix || do_posix) regfree(&preg);
2408 nigel 37 #endif
2409    
2410 nigel 77 if (re != NULL) new_free(re);
2411     if (extra != NULL) new_free(extra);
2412 nigel 25 if (tables != NULL)
2413     {
2414 nigel 77 new_free((void *)tables);
2415 nigel 25 setlocale(LC_CTYPE, "C");
2416 nigel 93 locale_set = 0;
2417 nigel 25 }
2418 nigel 3 }
2419    
2420 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2421 nigel 77
2422     EXIT:
2423    
2424     if (infile != NULL && infile != stdin) fclose(infile);
2425     if (outfile != NULL && outfile != stdout) fclose(outfile);
2426    
2427     free(buffer);
2428     free(dbuffer);
2429     free(pbuffer);
2430     free(offsets);
2431    
2432     return yield;
2433 nigel 3 }
2434    
2435 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12