/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 236 - (hide annotations) (download)
Tue Sep 11 12:57:06 2007 UTC (7 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 69873 byte(s)
<config.h> => "config.h" and also some cases of <pcre.h>.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97     #define _pcre_OP_lengths OP_lengths
98    
99     #include "pcre_tables.c"
100    
101     /* We also need the pcre_printint() function for printing out compiled
102     patterns. This function is in a separate file so that it can be included in
103 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
104 nigel 85
105 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
106     output character as-is or as a hex value when showing compiled patterns, is
107     contained in this file. We uses it here also, in cases when the locale has not
108     been explicitly changed, so as to get consistent output from systems that
109     differ in their output from isprint() even in the "C" locale. */
110    
111 nigel 85 #include "pcre_printint.src"
112    
113 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114 nigel 85
115 nigel 93
116 nigel 37 /* It is possible to compile this test program without including support for
117     testing the POSIX interface, though this is not available via the standard
118     Makefile. */
119    
120     #if !defined NOPOSIX
121 nigel 3 #include "pcreposix.h"
122 nigel 37 #endif
123 nigel 3
124 ph10 107 /* It is also possible, for the benefit of the version currently imported into
125     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126     interface to the DFA matcher (NODFA), and without the doublecheck of the old
127     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128     UTF8 support if PCRE is built without it. */
129 nigel 79
130 ph10 107 #ifndef SUPPORT_UTF8
131     #ifndef NOUTF8
132     #define NOUTF8
133     #endif
134     #endif
135 nigel 79
136 ph10 107
137 nigel 85 /* Other parameters */
138    
139 nigel 3 #ifndef CLOCKS_PER_SEC
140     #ifdef CLK_TCK
141     #define CLOCKS_PER_SEC CLK_TCK
142     #else
143     #define CLOCKS_PER_SEC 100
144     #endif
145     #endif
146    
147 nigel 93 /* This is the default loop count for timing. */
148    
149 nigel 75 #define LOOPREPEAT 500000
150 nigel 3
151 nigel 85 /* Static variables */
152    
153 nigel 3 static FILE *outfile;
154     static int log_store = 0;
155 nigel 63 static int callout_count;
156     static int callout_extra;
157     static int callout_fail_count;
158     static int callout_fail_id;
159 ph10 210 static int debug_lengths;
160 nigel 63 static int first_callout;
161 nigel 93 static int locale_set = 0;
162 nigel 73 static int show_malloc;
163 nigel 67 static int use_utf8;
164 nigel 43 static size_t gotten_store;
165 nigel 3
166 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
167    
168     static int buffer_size = 50000;
169     static uschar *buffer = NULL;
170     static uschar *dbuffer = NULL;
171 nigel 75 static uschar *pbuffer = NULL;
172 nigel 3
173 nigel 75
174 nigel 49
175     /*************************************************
176 nigel 91 * Read or extend an input line *
177     *************************************************/
178    
179     /* Input lines are read into buffer, but both patterns and data lines can be
180     continued over multiple input lines. In addition, if the buffer fills up, we
181     want to automatically expand it so as to be able to handle extremely large
182     lines that are needed for certain stress tests. When the input buffer is
183     expanded, the other two buffers must also be expanded likewise, and the
184     contents of pbuffer, which are a copy of the input for callouts, must be
185     preserved (for when expansion happens for a data line). This is not the most
186     optimal way of handling this, but hey, this is just a test program!
187    
188     Arguments:
189     f the file to read
190     start where in buffer to start (this *must* be within buffer)
191    
192     Returns: pointer to the start of new data
193     could be a copy of start, or could be moved
194     NULL if no data read and EOF reached
195     */
196    
197     static uschar *
198     extend_inputline(FILE *f, uschar *start)
199     {
200     uschar *here = start;
201    
202     for (;;)
203     {
204     int rlen = buffer_size - (here - buffer);
205 nigel 93
206 nigel 91 if (rlen > 1000)
207     {
208     int dlen;
209     if (fgets((char *)here, rlen, f) == NULL)
210     return (here == start)? NULL : start;
211     dlen = (int)strlen((char *)here);
212     if (dlen > 0 && here[dlen - 1] == '\n') return start;
213     here += dlen;
214     }
215    
216     else
217     {
218     int new_buffer_size = 2*buffer_size;
219     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222    
223     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224     {
225     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226     exit(1);
227     }
228    
229     memcpy(new_buffer, buffer, buffer_size);
230     memcpy(new_pbuffer, pbuffer, buffer_size);
231    
232     buffer_size = new_buffer_size;
233    
234     start = new_buffer + (start - buffer);
235     here = new_buffer + (here - buffer);
236    
237     free(buffer);
238     free(dbuffer);
239     free(pbuffer);
240    
241     buffer = new_buffer;
242     dbuffer = new_dbuffer;
243     pbuffer = new_pbuffer;
244     }
245     }
246    
247     return NULL; /* Control never gets here */
248     }
249    
250    
251    
252    
253    
254    
255    
256     /*************************************************
257 nigel 63 * Read number from string *
258     *************************************************/
259    
260     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261     around with conditional compilation, just do the job by hand. It is only used
262 nigel 93 for unpicking arguments, so just keep it simple.
263 nigel 63
264     Arguments:
265     str string to be converted
266     endptr where to put the end pointer
267    
268     Returns: the unsigned long
269     */
270    
271     static int
272     get_value(unsigned char *str, unsigned char **endptr)
273     {
274     int result = 0;
275     while(*str != 0 && isspace(*str)) str++;
276     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277     *endptr = str;
278     return(result);
279     }
280    
281    
282    
283 nigel 49
284     /*************************************************
285     * Convert UTF-8 string to value *
286     *************************************************/
287    
288     /* This function takes one or more bytes that represents a UTF-8 character,
289     and returns the value of the character.
290    
291     Argument:
292 nigel 91 utf8bytes a pointer to the byte vector
293     vptr a pointer to an int to receive the value
294 nigel 49
295 nigel 91 Returns: > 0 => the number of bytes consumed
296     -6 to 0 => malformed UTF-8 character at offset = (-return)
297 nigel 49 */
298    
299 nigel 79 #if !defined NOUTF8
300    
301 nigel 67 static int
302 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
303 nigel 49 {
304 nigel 91 int c = *utf8bytes++;
305 nigel 49 int d = c;
306     int i, j, s;
307    
308     for (i = -1; i < 6; i++) /* i is number of additional bytes */
309     {
310     if ((d & 0x80) == 0) break;
311     d <<= 1;
312     }
313    
314     if (i == -1) { *vptr = c; return 1; } /* ascii character */
315     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316    
317     /* i now has a value in the range 1-5 */
318    
319 nigel 59 s = 6*i;
320 nigel 85 d = (c & utf8_table3[i]) << s;
321 nigel 49
322     for (j = 0; j < i; j++)
323     {
324 nigel 91 c = *utf8bytes++;
325 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
326 nigel 59 s -= 6;
327 nigel 49 d |= (c & 0x3f) << s;
328     }
329    
330     /* Check that encoding was the correct unique one */
331    
332 nigel 85 for (j = 0; j < utf8_table1_size; j++)
333     if (d <= utf8_table1[j]) break;
334 nigel 49 if (j != i) return -(i+1);
335    
336     /* Valid value */
337    
338     *vptr = d;
339     return i+1;
340     }
341    
342 nigel 79 #endif
343 nigel 49
344    
345 nigel 79
346 nigel 63 /*************************************************
347 nigel 85 * Convert character value to UTF-8 *
348     *************************************************/
349    
350     /* This function takes an integer value in the range 0 - 0x7fffffff
351     and encodes it as a UTF-8 character in 0 to 6 bytes.
352    
353     Arguments:
354     cvalue the character value
355 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
356 nigel 85
357     Returns: number of characters placed in the buffer
358     */
359    
360 nigel 93 #if !defined NOUTF8
361    
362 nigel 85 static int
363 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
364 nigel 85 {
365     register int i, j;
366     for (i = 0; i < utf8_table1_size; i++)
367     if (cvalue <= utf8_table1[i]) break;
368 nigel 91 utf8bytes += i;
369 nigel 85 for (j = i; j > 0; j--)
370     {
371 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 nigel 85 cvalue >>= 6;
373     }
374 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
375 nigel 85 return i + 1;
376     }
377    
378 nigel 93 #endif
379 nigel 85
380    
381 nigel 93
382 nigel 85 /*************************************************
383 nigel 63 * Print character string *
384     *************************************************/
385 nigel 49
386 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
387     mode. Yields number of characters printed. If handed a NULL file, just counts
388     chars without printing. */
389 nigel 49
390 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
391 nigel 3 {
392 nigel 85 int c = 0;
393 nigel 63 int yield = 0;
394 nigel 3
395 nigel 63 while (length-- > 0)
396 nigel 3 {
397 nigel 79 #if !defined NOUTF8
398 nigel 67 if (use_utf8)
399 nigel 63 {
400     int rc = utf82ord(p, &c);
401 nigel 3
402 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403     {
404     length -= rc - 1;
405     p += rc;
406 nigel 93 if (PRINTHEX(c))
407 nigel 63 {
408     if (f != NULL) fprintf(f, "%c", c);
409     yield++;
410     }
411     else
412     {
413 nigel 93 int n = 4;
414     if (f != NULL) fprintf(f, "\\x{%02x}", c);
415     yield += (n <= 0x000000ff)? 2 :
416     (n <= 0x00000fff)? 3 :
417     (n <= 0x0000ffff)? 4 :
418     (n <= 0x000fffff)? 5 : 6;
419 nigel 63 }
420     continue;
421     }
422     }
423 nigel 79 #endif
424 nigel 3
425 nigel 63 /* Not UTF-8, or malformed UTF-8 */
426    
427 nigel 93 c = *p++;
428     if (PRINTHEX(c))
429 nigel 3 {
430 nigel 63 if (f != NULL) fprintf(f, "%c", c);
431     yield++;
432 nigel 3 }
433 nigel 63 else
434 nigel 3 {
435 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
436     yield += 4;
437     }
438     }
439 nigel 3
440 nigel 63 return yield;
441     }
442 nigel 23
443 nigel 3
444 nigel 23
445 nigel 63 /*************************************************
446     * Callout function *
447     *************************************************/
448 nigel 3
449 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450     the match. Yield zero unless more callouts than the fail count, or the callout
451     data is not zero. */
452 nigel 3
453 nigel 63 static int callout(pcre_callout_block *cb)
454     {
455     FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 nigel 75 int i, pre_start, post_start, subject_length;
457 nigel 3
458 nigel 63 if (callout_extra)
459     {
460     fprintf(f, "Callout %d: last capture = %d\n",
461     cb->callout_number, cb->capture_last);
462 nigel 3
463 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
464     {
465     if (cb->offset_vector[i] < 0)
466     fprintf(f, "%2d: <unset>\n", i/2);
467     else
468     {
469     fprintf(f, "%2d: ", i/2);
470     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471     cb->offset_vector[i+1] - cb->offset_vector[i], f);
472     fprintf(f, "\n");
473     }
474     }
475     }
476 nigel 3
477 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
478     datails. On subsequent calls in the same match, we use pchars just to find the
479     printed lengths of the substrings. */
480 nigel 3
481 nigel 63 if (f != NULL) fprintf(f, "--->");
482 nigel 3
483 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485     cb->current_position - cb->start_match, f);
486 nigel 3
487 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488    
489 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490     cb->subject_length - cb->current_position, f);
491 nigel 3
492 nigel 63 if (f != NULL) fprintf(f, "\n");
493 nigel 9
494 nigel 63 /* Always print appropriate indicators, with callout number if not already
495 nigel 75 shown. For automatic callouts, show the pattern offset. */
496 nigel 3
497 nigel 75 if (cb->callout_number == 255)
498     {
499     fprintf(outfile, "%+3d ", cb->pattern_position);
500     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501     }
502     else
503     {
504     if (callout_extra) fprintf(outfile, " ");
505     else fprintf(outfile, "%3d ", cb->callout_number);
506     }
507 nigel 3
508 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509     fprintf(outfile, "^");
510 nigel 3
511 nigel 63 if (post_start > 0)
512     {
513     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514     fprintf(outfile, "^");
515 nigel 3 }
516    
517 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518     fprintf(outfile, " ");
519    
520     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521     pbuffer + cb->pattern_position);
522    
523 nigel 63 fprintf(outfile, "\n");
524     first_callout = 0;
525 nigel 3
526 nigel 71 if (cb->callout_data != NULL)
527 nigel 49 {
528 nigel 71 int callout_data = *((int *)(cb->callout_data));
529     if (callout_data != 0)
530     {
531     fprintf(outfile, "Callout data = %d\n", callout_data);
532     return callout_data;
533     }
534 nigel 63 }
535 nigel 49
536 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
537     (++callout_count >= callout_fail_count)? 1 : 0;
538 nigel 3 }
539    
540    
541 nigel 63 /*************************************************
542 nigel 73 * Local malloc functions *
543 nigel 63 *************************************************/
544 nigel 3
545     /* Alternative malloc function, to test functionality and show the size of the
546     compiled re. */
547    
548     static void *new_malloc(size_t size)
549     {
550 nigel 73 void *block = malloc(size);
551 nigel 43 gotten_store = size;
552 nigel 73 if (show_malloc)
553 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 nigel 73 return block;
555 nigel 3 }
556    
557 nigel 73 static void new_free(void *block)
558     {
559     if (show_malloc)
560     fprintf(outfile, "free %p\n", block);
561     free(block);
562     }
563 nigel 3
564    
565 nigel 73 /* For recursion malloc/free, to test stacking calls */
566    
567     static void *stack_malloc(size_t size)
568     {
569     void *block = malloc(size);
570     if (show_malloc)
571 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 nigel 73 return block;
573     }
574    
575     static void stack_free(void *block)
576     {
577     if (show_malloc)
578     fprintf(outfile, "stack_free %p\n", block);
579     free(block);
580     }
581    
582    
583 nigel 63 /*************************************************
584     * Call pcre_fullinfo() *
585     *************************************************/
586 nigel 43
587     /* Get one piece of information from the pcre_fullinfo() function */
588    
589     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590     {
591     int rc;
592     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594     }
595    
596    
597    
598 nigel 63 /*************************************************
599 nigel 75 * Byte flipping function *
600     *************************************************/
601    
602 nigel 91 static unsigned long int
603     byteflip(unsigned long int value, int n)
604 nigel 75 {
605     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606     return ((value & 0x000000ff) << 24) |
607     ((value & 0x0000ff00) << 8) |
608     ((value & 0x00ff0000) >> 8) |
609     ((value & 0xff000000) >> 24);
610     }
611    
612    
613    
614    
615     /*************************************************
616 nigel 87 * Check match or recursion limit *
617     *************************************************/
618    
619     static int
620     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621     int start_offset, int options, int *use_offsets, int use_size_offsets,
622     int flag, unsigned long int *limit, int errnumber, const char *msg)
623     {
624     int count;
625     int min = 0;
626     int mid = 64;
627     int max = -1;
628    
629     extra->flags |= flag;
630    
631     for (;;)
632     {
633     *limit = mid;
634    
635     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636     use_offsets, use_size_offsets);
637    
638     if (count == errnumber)
639     {
640     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641     min = mid;
642     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643     }
644    
645     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646     count == PCRE_ERROR_PARTIAL)
647     {
648     if (mid == min + 1)
649     {
650     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651     break;
652     }
653     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654     max = mid;
655     mid = (min + mid)/2;
656     }
657     else break; /* Some other error */
658     }
659    
660     extra->flags &= ~flag;
661     return count;
662     }
663    
664    
665    
666     /*************************************************
667 ph10 227 * Case-independent strncmp() function *
668     *************************************************/
669    
670     /*
671     Arguments:
672     s first string
673     t second string
674     n number of characters to compare
675    
676     Returns: < 0, = 0, or > 0, according to the comparison
677     */
678    
679     static int
680     strncmpic(uschar *s, uschar *t, int n)
681     {
682     while (n--)
683     {
684     int c = tolower(*s++) - tolower(*t++);
685     if (c) return c;
686     }
687     return 0;
688     }
689    
690    
691    
692     /*************************************************
693 nigel 91 * Check newline indicator *
694     *************************************************/
695    
696     /* This is used both at compile and run-time to check for <xxx> escapes, where
697 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698     no match.
699 nigel 91
700     Arguments:
701     p points after the leading '<'
702     f file for error message
703    
704     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
705     */
706    
707     static int
708     check_newline(uschar *p, FILE *f)
709     {
710 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
716     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
717 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
718     return 0;
719     }
720    
721    
722    
723     /*************************************************
724 nigel 93 * Usage function *
725     *************************************************/
726    
727     static void
728     usage(void)
729     {
730     printf("Usage: pcretest [options] [<input> [<output>]]\n");
731     printf(" -b show compiled code (bytecode)\n");
732     printf(" -C show PCRE compile-time options and exit\n");
733     printf(" -d debug: show compiled code and information (-b and -i)\n");
734     #if !defined NODFA
735     printf(" -dfa force DFA matching for all subjects\n");
736     #endif
737     printf(" -help show usage information\n");
738     printf(" -i show information about compiled patterns\n"
739     " -m output memory used information\n"
740     " -o <n> set size of offsets vector to <n>\n");
741     #if !defined NOPOSIX
742     printf(" -p use POSIX interface\n");
743     #endif
744     printf(" -q quiet: do not output PCRE version number at start\n");
745     printf(" -S <n> set stack size to <n> megabytes\n");
746     printf(" -s output store (memory) used information\n"
747     " -t time compilation and execution\n");
748     printf(" -t <n> time compilation and execution, repeating <n> times\n");
749     printf(" -tm time execution (matching) only\n");
750     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
751     }
752    
753    
754    
755     /*************************************************
756 nigel 63 * Main Program *
757     *************************************************/
758 nigel 43
759 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
760     consist of a regular expression, in delimiters and optionally followed by
761     options, followed by a set of test data, terminated by an empty line. */
762    
763     int main(int argc, char **argv)
764     {
765     FILE *infile = stdin;
766     int options = 0;
767     int study_options = 0;
768     int op = 1;
769     int timeit = 0;
770 nigel 93 int timeitm = 0;
771 nigel 3 int showinfo = 0;
772 nigel 31 int showstore = 0;
773 nigel 87 int quiet = 0;
774 nigel 53 int size_offsets = 45;
775     int size_offsets_max;
776 nigel 77 int *offsets = NULL;
777 nigel 53 #if !defined NOPOSIX
778 nigel 3 int posix = 0;
779 nigel 53 #endif
780 nigel 3 int debug = 0;
781 nigel 11 int done = 0;
782 nigel 77 int all_use_dfa = 0;
783     int yield = 0;
784 nigel 91 int stack_size;
785 nigel 3
786 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
787     that 1024 is plenty long enough for the few names we'll be testing. */
788 nigel 69
789 nigel 91 uschar copynames[1024];
790     uschar getnames[1024];
791    
792     uschar *copynamesptr;
793     uschar *getnamesptr;
794    
795 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
796 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
797 nigel 69
798 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
799     dbuffer = (unsigned char *)malloc(buffer_size);
800     pbuffer = (unsigned char *)malloc(buffer_size);
801 nigel 69
802 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
803 nigel 3
804 nigel 93 outfile = stdout;
805    
806     /* The following _setmode() stuff is some Windows magic that tells its runtime
807     library to translate CRLF into a single LF character. At least, that's what
808     I've been told: never having used Windows I take this all on trust. Originally
809     it set 0x8000, but then I was advised that _O_BINARY was better. */
810    
811 nigel 75 #if defined(_WIN32) || defined(WIN32)
812 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
813     #endif
814 nigel 75
815 nigel 3 /* Scan options */
816    
817     while (argc > 1 && argv[op][0] == '-')
818     {
819 nigel 63 unsigned char *endptr;
820 nigel 53
821 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
822     showstore = 1;
823 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
824 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
825 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
826     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
827 nigel 79 #if !defined NODFA
828 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
829 nigel 79 #endif
830 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
831 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
832     *endptr == 0))
833 nigel 53 {
834     op++;
835     argc--;
836     }
837 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
838     {
839     int both = argv[op][2] == 0;
840     int temp;
841     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
842     *endptr == 0))
843     {
844     timeitm = temp;
845     op++;
846     argc--;
847     }
848     else timeitm = LOOPREPEAT;
849     if (both) timeit = timeitm;
850     }
851 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
852     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
853     *endptr == 0))
854     {
855 nigel 93 #if defined(_WIN32) || defined(WIN32)
856 nigel 91 printf("PCRE: -S not supported on this OS\n");
857     exit(1);
858     #else
859     int rc;
860     struct rlimit rlim;
861     getrlimit(RLIMIT_STACK, &rlim);
862     rlim.rlim_cur = stack_size * 1024 * 1024;
863     rc = setrlimit(RLIMIT_STACK, &rlim);
864     if (rc != 0)
865     {
866     printf("PCRE: setrlimit() failed with error %d\n", rc);
867     exit(1);
868     }
869     op++;
870     argc--;
871     #endif
872     }
873 nigel 53 #if !defined NOPOSIX
874 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
875 nigel 53 #endif
876 nigel 63 else if (strcmp(argv[op], "-C") == 0)
877     {
878     int rc;
879     printf("PCRE version %s\n", pcre_version());
880     printf("Compiled with\n");
881     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
882     printf(" %sUTF-8 support\n", rc? "" : "No ");
883 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
884     printf(" %sUnicode properties support\n", rc? "" : "No ");
885 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
886 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
887 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
888 ph10 150 (rc == -2)? "ANYCRLF" :
889 nigel 93 (rc == -1)? "ANY" : "???");
890 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
891     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
892     "all Unicode newlines");
893 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
894     printf(" Internal link size = %d\n", rc);
895     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
896     printf(" POSIX malloc threshold = %d\n", rc);
897     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
898     printf(" Default match limit = %d\n", rc);
899 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
900     printf(" Default recursion depth limit = %d\n", rc);
901 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
902     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
903 ph10 121 goto EXIT;
904 nigel 63 }
905 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
906     strcmp(argv[op], "--help") == 0)
907     {
908     usage();
909     goto EXIT;
910     }
911 nigel 3 else
912     {
913 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
914 nigel 93 usage();
915 nigel 77 yield = 1;
916     goto EXIT;
917 nigel 3 }
918     op++;
919     argc--;
920     }
921    
922 nigel 53 /* Get the store for the offsets vector, and remember what it was */
923    
924     size_offsets_max = size_offsets;
925 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
926 nigel 53 if (offsets == NULL)
927     {
928     printf("** Failed to get %d bytes of memory for offsets vector\n",
929 ph10 151 (int)(size_offsets_max * sizeof(int)));
930 nigel 77 yield = 1;
931     goto EXIT;
932 nigel 53 }
933    
934 nigel 3 /* Sort out the input and output files */
935    
936     if (argc > 1)
937     {
938 nigel 93 infile = fopen(argv[op], INPUT_MODE);
939 nigel 3 if (infile == NULL)
940     {
941     printf("** Failed to open %s\n", argv[op]);
942 nigel 77 yield = 1;
943     goto EXIT;
944 nigel 3 }
945     }
946    
947     if (argc > 2)
948     {
949 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
950 nigel 3 if (outfile == NULL)
951     {
952     printf("** Failed to open %s\n", argv[op+1]);
953 nigel 77 yield = 1;
954     goto EXIT;
955 nigel 3 }
956     }
957    
958     /* Set alternative malloc function */
959    
960     pcre_malloc = new_malloc;
961 nigel 73 pcre_free = new_free;
962     pcre_stack_malloc = stack_malloc;
963     pcre_stack_free = stack_free;
964 nigel 3
965 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
966 nigel 3
967 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
968 nigel 3
969     /* Main loop */
970    
971 nigel 11 while (!done)
972 nigel 3 {
973     pcre *re = NULL;
974     pcre_extra *extra = NULL;
975 nigel 37
976     #if !defined NOPOSIX /* There are still compilers that require no indent */
977 nigel 3 regex_t preg;
978 nigel 45 int do_posix = 0;
979 nigel 37 #endif
980    
981 nigel 7 const char *error;
982 nigel 25 unsigned char *p, *pp, *ppp;
983 nigel 75 unsigned char *to_file = NULL;
984 nigel 53 const unsigned char *tables = NULL;
985 nigel 75 unsigned long int true_size, true_study_size = 0;
986     size_t size, regex_gotten_store;
987 nigel 3 int do_study = 0;
988 nigel 25 int do_debug = debug;
989 nigel 35 int do_G = 0;
990     int do_g = 0;
991 nigel 25 int do_showinfo = showinfo;
992 nigel 35 int do_showrest = 0;
993 nigel 75 int do_flip = 0;
994 nigel 93 int erroroffset, len, delimiter, poffset;
995 nigel 3
996 nigel 67 use_utf8 = 0;
997 ph10 211 debug_lengths = 1;
998 nigel 63
999 nigel 3 if (infile == stdin) printf(" re> ");
1000 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
1001 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1002 nigel 63 fflush(outfile);
1003 nigel 3
1004     p = buffer;
1005     while (isspace(*p)) p++;
1006     if (*p == 0) continue;
1007    
1008 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1009 nigel 3
1010 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1011     {
1012 nigel 91 unsigned long int magic, get_options;
1013 nigel 75 uschar sbuf[8];
1014     FILE *f;
1015    
1016     p++;
1017     pp = p + (int)strlen((char *)p);
1018     while (isspace(pp[-1])) pp--;
1019     *pp = 0;
1020    
1021     f = fopen((char *)p, "rb");
1022     if (f == NULL)
1023     {
1024     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1025     continue;
1026     }
1027    
1028     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1029    
1030     true_size =
1031     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1032     true_study_size =
1033     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1034    
1035     re = (real_pcre *)new_malloc(true_size);
1036     regex_gotten_store = gotten_store;
1037    
1038     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1039    
1040     magic = ((real_pcre *)re)->magic_number;
1041     if (magic != MAGIC_NUMBER)
1042     {
1043     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1044     {
1045     do_flip = 1;
1046     }
1047     else
1048     {
1049     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1050     fclose(f);
1051     continue;
1052     }
1053     }
1054    
1055     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1056     do_flip? " (byte-inverted)" : "", p);
1057    
1058     /* Need to know if UTF-8 for printing data strings */
1059    
1060 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1061     use_utf8 = (get_options & PCRE_UTF8) != 0;
1062 nigel 75
1063     /* Now see if there is any following study data */
1064    
1065     if (true_study_size != 0)
1066     {
1067     pcre_study_data *psd;
1068    
1069     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1070     extra->flags = PCRE_EXTRA_STUDY_DATA;
1071    
1072     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1073     extra->study_data = psd;
1074    
1075     if (fread(psd, 1, true_study_size, f) != true_study_size)
1076     {
1077     FAIL_READ:
1078     fprintf(outfile, "Failed to read data from %s\n", p);
1079     if (extra != NULL) new_free(extra);
1080     if (re != NULL) new_free(re);
1081     fclose(f);
1082     continue;
1083     }
1084     fprintf(outfile, "Study data loaded from %s\n", p);
1085     do_study = 1; /* To get the data output if requested */
1086     }
1087     else fprintf(outfile, "No study data\n");
1088    
1089     fclose(f);
1090     goto SHOW_INFO;
1091     }
1092    
1093     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1094     the pattern; if is isn't complete, read more. */
1095    
1096 nigel 3 delimiter = *p++;
1097    
1098 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1099 nigel 3 {
1100 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1101 nigel 3 goto SKIP_DATA;
1102     }
1103    
1104     pp = p;
1105 nigel 93 poffset = p - buffer;
1106 nigel 3
1107     for(;;)
1108     {
1109 nigel 29 while (*pp != 0)
1110     {
1111     if (*pp == '\\' && pp[1] != 0) pp++;
1112     else if (*pp == delimiter) break;
1113     pp++;
1114     }
1115 nigel 3 if (*pp != 0) break;
1116     if (infile == stdin) printf(" > ");
1117 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1118 nigel 3 {
1119     fprintf(outfile, "** Unexpected EOF\n");
1120 nigel 11 done = 1;
1121     goto CONTINUE;
1122 nigel 3 }
1123 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1124 nigel 3 }
1125    
1126 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1127     pointer to the correct relative point in the buffer. */
1128    
1129     p = buffer + poffset;
1130    
1131 nigel 29 /* If the first character after the delimiter is backslash, make
1132     the pattern end with backslash. This is purely to provide a way
1133     of testing for the error message when a pattern ends with backslash. */
1134    
1135     if (pp[1] == '\\') *pp++ = '\\';
1136    
1137 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1138     for callouts. */
1139 nigel 3
1140     *pp++ = 0;
1141 nigel 75 strcpy((char *)pbuffer, (char *)p);
1142 nigel 3
1143     /* Look for options after final delimiter */
1144    
1145     options = 0;
1146     study_options = 0;
1147 nigel 31 log_store = showstore; /* default from command line */
1148    
1149 nigel 3 while (*pp != 0)
1150     {
1151     switch (*pp++)
1152     {
1153 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1154 nigel 35 case 'g': do_g = 1; break;
1155 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1156     case 'm': options |= PCRE_MULTILINE; break;
1157     case 's': options |= PCRE_DOTALL; break;
1158     case 'x': options |= PCRE_EXTENDED; break;
1159 nigel 25
1160 nigel 35 case '+': do_showrest = 1; break;
1161 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1162 nigel 93 case 'B': do_debug = 1; break;
1163 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1164 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1165 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1166 nigel 75 case 'F': do_flip = 1; break;
1167 nigel 35 case 'G': do_G = 1; break;
1168 nigel 25 case 'I': do_showinfo = 1; break;
1169 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1170 nigel 31 case 'M': log_store = 1; break;
1171 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1172 nigel 37
1173     #if !defined NOPOSIX
1174 nigel 3 case 'P': do_posix = 1; break;
1175 nigel 37 #endif
1176    
1177 nigel 3 case 'S': do_study = 1; break;
1178 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1179 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1180 ph10 126 case 'Z': debug_lengths = 0; break;
1181 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1182 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1183 nigel 25
1184     case 'L':
1185     ppp = pp;
1186 nigel 93 /* The '\r' test here is so that it works on Windows. */
1187     /* The '0' test is just in case this is an unterminated line. */
1188     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1189 nigel 25 *ppp = 0;
1190     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1191     {
1192     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1193     goto SKIP_DATA;
1194     }
1195 nigel 93 locale_set = 1;
1196 nigel 25 tables = pcre_maketables();
1197     pp = ppp;
1198     break;
1199    
1200 nigel 75 case '>':
1201     to_file = pp;
1202     while (*pp != 0) pp++;
1203     while (isspace(pp[-1])) pp--;
1204     *pp = 0;
1205     break;
1206    
1207 nigel 91 case '<':
1208     {
1209     int x = check_newline(pp, outfile);
1210     if (x == 0) goto SKIP_DATA;
1211     options |= x;
1212     while (*pp++ != '>');
1213     }
1214     break;
1215    
1216 nigel 77 case '\r': /* So that it works in Windows */
1217     case '\n':
1218     case ' ':
1219     break;
1220 nigel 75
1221 nigel 3 default:
1222     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1223     goto SKIP_DATA;
1224     }
1225     }
1226    
1227 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1228 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1229     local character tables. */
1230 nigel 3
1231 nigel 37 #if !defined NOPOSIX
1232 nigel 3 if (posix || do_posix)
1233     {
1234     int rc;
1235     int cflags = 0;
1236 nigel 75
1237 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1238     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1239 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1240 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1241     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1242    
1243 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1244    
1245     /* Compilation failed; go back for another re, skipping to blank line
1246     if non-interactive. */
1247    
1248     if (rc != 0)
1249     {
1250 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1251 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1252     goto SKIP_DATA;
1253     }
1254     }
1255    
1256     /* Handle compiling via the native interface */
1257    
1258     else
1259 nigel 37 #endif /* !defined NOPOSIX */
1260    
1261 nigel 3 {
1262 nigel 93 if (timeit > 0)
1263 nigel 3 {
1264     register int i;
1265     clock_t time_taken;
1266     clock_t start_time = clock();
1267 nigel 93 for (i = 0; i < timeit; i++)
1268 nigel 3 {
1269 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1270 nigel 3 if (re != NULL) free(re);
1271     }
1272     time_taken = clock() - start_time;
1273 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1274     (((double)time_taken * 1000.0) / (double)timeit) /
1275 nigel 63 (double)CLOCKS_PER_SEC);
1276 nigel 3 }
1277    
1278 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1279 nigel 3
1280     /* Compilation failed; go back for another re, skipping to blank line
1281     if non-interactive. */
1282    
1283     if (re == NULL)
1284     {
1285     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1286     SKIP_DATA:
1287     if (infile != stdin)
1288     {
1289     for (;;)
1290     {
1291 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1292 nigel 11 {
1293     done = 1;
1294     goto CONTINUE;
1295     }
1296 nigel 3 len = (int)strlen((char *)buffer);
1297     while (len > 0 && isspace(buffer[len-1])) len--;
1298     if (len == 0) break;
1299     }
1300     fprintf(outfile, "\n");
1301     }
1302 nigel 25 goto CONTINUE;
1303 nigel 3 }
1304    
1305 nigel 43 /* Compilation succeeded; print data if required. There are now two
1306     info-returning functions. The old one has a limited interface and
1307     returns only limited data. Check that it agrees with the newer one. */
1308 nigel 3
1309 nigel 63 if (log_store)
1310     fprintf(outfile, "Memory allocation (code space): %d\n",
1311     (int)(gotten_store -
1312     sizeof(real_pcre) -
1313     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1314    
1315 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1316     and remember the store that was got. */
1317    
1318     true_size = ((real_pcre *)re)->size;
1319     regex_gotten_store = gotten_store;
1320    
1321     /* If /S was present, study the regexp to generate additional info to
1322     help with the matching. */
1323    
1324     if (do_study)
1325     {
1326 nigel 93 if (timeit > 0)
1327 nigel 75 {
1328     register int i;
1329     clock_t time_taken;
1330     clock_t start_time = clock();
1331 nigel 93 for (i = 0; i < timeit; i++)
1332 nigel 75 extra = pcre_study(re, study_options, &error);
1333     time_taken = clock() - start_time;
1334     if (extra != NULL) free(extra);
1335 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1336     (((double)time_taken * 1000.0) / (double)timeit) /
1337 nigel 75 (double)CLOCKS_PER_SEC);
1338     }
1339     extra = pcre_study(re, study_options, &error);
1340     if (error != NULL)
1341     fprintf(outfile, "Failed to study: %s\n", error);
1342     else if (extra != NULL)
1343     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1344     }
1345    
1346     /* If the 'F' option was present, we flip the bytes of all the integer
1347     fields in the regex data block and the study block. This is to make it
1348     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1349     compiled on a different architecture. */
1350    
1351     if (do_flip)
1352     {
1353     real_pcre *rre = (real_pcre *)re;
1354     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1355     rre->size = byteflip(rre->size, sizeof(rre->size));
1356     rre->options = byteflip(rre->options, sizeof(rre->options));
1357 ph10 231 rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1358 nigel 75 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1359     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1360     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1361     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1362     rre->name_table_offset = byteflip(rre->name_table_offset,
1363     sizeof(rre->name_table_offset));
1364     rre->name_entry_size = byteflip(rre->name_entry_size,
1365     sizeof(rre->name_entry_size));
1366     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1367    
1368     if (extra != NULL)
1369     {
1370     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1371     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1372     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1373     }
1374     }
1375    
1376     /* Extract information from the compiled data if required */
1377    
1378     SHOW_INFO:
1379    
1380 nigel 93 if (do_debug)
1381     {
1382     fprintf(outfile, "------------------------------------------------------------------\n");
1383 ph10 116 pcre_printint(re, outfile, debug_lengths);
1384 nigel 93 }
1385    
1386 nigel 25 if (do_showinfo)
1387 nigel 3 {
1388 nigel 75 unsigned long int get_options, all_options;
1389 nigel 79 #if !defined NOINFOCHECK
1390 nigel 43 int old_first_char, old_options, old_count;
1391 nigel 79 #endif
1392 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1393 ph10 227 hascrorlf;
1394 nigel 63 int nameentrysize, namecount;
1395     const uschar *nametable;
1396 nigel 3
1397 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1398 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1399     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1400     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1401 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1402 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1403 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1404     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1405 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1406 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1407     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1408 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1409 nigel 43
1410 nigel 79 #if !defined NOINFOCHECK
1411 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1412 nigel 3 if (count < 0) fprintf(outfile,
1413 nigel 43 "Error %d from pcre_info()\n", count);
1414 nigel 3 else
1415     {
1416 nigel 43 if (old_count != count) fprintf(outfile,
1417     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1418     old_count);
1419 nigel 37
1420 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1421     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1422     first_char, old_first_char);
1423 nigel 37
1424 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1425     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1426     get_options, old_options);
1427 nigel 43 }
1428 nigel 79 #endif
1429 nigel 43
1430 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1431 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1432 nigel 77 (int)size, (int)regex_gotten_store);
1433 nigel 43
1434     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1435     if (backrefmax > 0)
1436     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1437 nigel 63
1438     if (namecount > 0)
1439     {
1440     fprintf(outfile, "Named capturing subpatterns:\n");
1441     while (namecount-- > 0)
1442     {
1443     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1444     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1445     GET2(nametable, 0));
1446     nametable += nameentrysize;
1447     }
1448     }
1449 ph10 172
1450 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1451 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1452 nigel 63
1453 nigel 75 all_options = ((real_pcre *)re)->options;
1454 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1455 nigel 75
1456 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1457 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1458 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1459     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1460     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1461     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1462 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1463 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1464 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1465     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1466 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1467     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1468     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1469 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1470 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1471 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1472     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1473 ph10 172
1474 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1475 nigel 43
1476 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1477 nigel 91 {
1478     case PCRE_NEWLINE_CR:
1479     fprintf(outfile, "Forced newline sequence: CR\n");
1480     break;
1481 nigel 43
1482 nigel 91 case PCRE_NEWLINE_LF:
1483     fprintf(outfile, "Forced newline sequence: LF\n");
1484     break;
1485    
1486     case PCRE_NEWLINE_CRLF:
1487     fprintf(outfile, "Forced newline sequence: CRLF\n");
1488     break;
1489    
1490 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1491     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1492     break;
1493    
1494 nigel 93 case PCRE_NEWLINE_ANY:
1495     fprintf(outfile, "Forced newline sequence: ANY\n");
1496     break;
1497    
1498 nigel 91 default:
1499     break;
1500     }
1501    
1502 nigel 43 if (first_char == -1)
1503     {
1504 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1505 nigel 43 }
1506     else if (first_char < 0)
1507     {
1508     fprintf(outfile, "No first char\n");
1509     }
1510     else
1511     {
1512 nigel 63 int ch = first_char & 255;
1513 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1514 nigel 63 "" : " (caseless)";
1515 nigel 93 if (PRINTHEX(ch))
1516 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1517 nigel 3 else
1518 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1519 nigel 43 }
1520 nigel 37
1521 nigel 43 if (need_char < 0)
1522     {
1523     fprintf(outfile, "No need char\n");
1524 nigel 3 }
1525 nigel 43 else
1526     {
1527 nigel 63 int ch = need_char & 255;
1528 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1529 nigel 63 "" : " (caseless)";
1530 nigel 93 if (PRINTHEX(ch))
1531 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1532 nigel 43 else
1533 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1534 nigel 43 }
1535 nigel 75
1536     /* Don't output study size; at present it is in any case a fixed
1537     value, but it varies, depending on the computer architecture, and
1538     so messes up the test suite. (And with the /F option, it might be
1539     flipped.) */
1540    
1541     if (do_study)
1542     {
1543     if (extra == NULL)
1544     fprintf(outfile, "Study returned NULL\n");
1545     else
1546     {
1547     uschar *start_bits = NULL;
1548     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1549    
1550     if (start_bits == NULL)
1551     fprintf(outfile, "No starting byte set\n");
1552     else
1553     {
1554     int i;
1555     int c = 24;
1556     fprintf(outfile, "Starting byte set: ");
1557     for (i = 0; i < 256; i++)
1558     {
1559     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1560     {
1561     if (c > 75)
1562     {
1563     fprintf(outfile, "\n ");
1564     c = 2;
1565     }
1566 nigel 93 if (PRINTHEX(i) && i != ' ')
1567 nigel 75 {
1568     fprintf(outfile, "%c ", i);
1569     c += 2;
1570     }
1571     else
1572     {
1573     fprintf(outfile, "\\x%02x ", i);
1574     c += 5;
1575     }
1576     }
1577     }
1578     fprintf(outfile, "\n");
1579     }
1580     }
1581     }
1582 nigel 3 }
1583    
1584 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1585     that is all. The first 8 bytes of the file are the regex length and then
1586     the study length, in big-endian order. */
1587 nigel 3
1588 nigel 75 if (to_file != NULL)
1589 nigel 3 {
1590 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1591     if (f == NULL)
1592 nigel 3 {
1593 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1594 nigel 3 }
1595 nigel 75 else
1596     {
1597     uschar sbuf[8];
1598     sbuf[0] = (true_size >> 24) & 255;
1599     sbuf[1] = (true_size >> 16) & 255;
1600     sbuf[2] = (true_size >> 8) & 255;
1601     sbuf[3] = (true_size) & 255;
1602 nigel 3
1603 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1604     sbuf[5] = (true_study_size >> 16) & 255;
1605     sbuf[6] = (true_study_size >> 8) & 255;
1606     sbuf[7] = (true_study_size) & 255;
1607 nigel 3
1608 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1609     fwrite(re, 1, true_size, f) < true_size)
1610     {
1611     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1612     }
1613 nigel 3 else
1614     {
1615 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1616     if (extra != NULL)
1617 nigel 3 {
1618 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1619     true_study_size)
1620 nigel 3 {
1621 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1622     strerror(errno));
1623 nigel 3 }
1624 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1625 nigel 93
1626 nigel 3 }
1627     }
1628 nigel 75 fclose(f);
1629 nigel 3 }
1630 nigel 77
1631     new_free(re);
1632     if (extra != NULL) new_free(extra);
1633     if (tables != NULL) new_free((void *)tables);
1634 nigel 75 continue; /* With next regex */
1635 nigel 3 }
1636 nigel 75 } /* End of non-POSIX compile */
1637 nigel 3
1638     /* Read data lines and test them */
1639    
1640     for (;;)
1641     {
1642 nigel 87 uschar *q;
1643 ph10 147 uschar *bptr;
1644 nigel 57 int *use_offsets = offsets;
1645 nigel 53 int use_size_offsets = size_offsets;
1646 nigel 63 int callout_data = 0;
1647     int callout_data_set = 0;
1648 nigel 3 int count, c;
1649 nigel 29 int copystrings = 0;
1650 nigel 63 int find_match_limit = 0;
1651 nigel 29 int getstrings = 0;
1652     int getlist = 0;
1653 nigel 39 int gmatched = 0;
1654 nigel 35 int start_offset = 0;
1655 nigel 41 int g_notempty = 0;
1656 nigel 77 int use_dfa = 0;
1657 nigel 3
1658     options = 0;
1659    
1660 nigel 91 *copynames = 0;
1661     *getnames = 0;
1662    
1663     copynamesptr = copynames;
1664     getnamesptr = getnames;
1665    
1666 nigel 63 pcre_callout = callout;
1667     first_callout = 1;
1668     callout_extra = 0;
1669     callout_count = 0;
1670     callout_fail_count = 999999;
1671     callout_fail_id = -1;
1672 nigel 73 show_malloc = 0;
1673 nigel 63
1674 nigel 91 if (extra != NULL) extra->flags &=
1675     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1676    
1677     len = 0;
1678     for (;;)
1679 nigel 11 {
1680 nigel 91 if (infile == stdin) printf("data> ");
1681     if (extend_inputline(infile, buffer + len) == NULL)
1682     {
1683     if (len > 0) break;
1684     done = 1;
1685     goto CONTINUE;
1686     }
1687     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1688     len = (int)strlen((char *)buffer);
1689     if (buffer[len-1] == '\n') break;
1690 nigel 11 }
1691 nigel 3
1692     while (len > 0 && isspace(buffer[len-1])) len--;
1693     buffer[len] = 0;
1694     if (len == 0) break;
1695    
1696     p = buffer;
1697     while (isspace(*p)) p++;
1698    
1699 ph10 147 bptr = q = dbuffer;
1700 nigel 3 while ((c = *p++) != 0)
1701     {
1702     int i = 0;
1703     int n = 0;
1704 nigel 63
1705 nigel 3 if (c == '\\') switch ((c = *p++))
1706     {
1707     case 'a': c = 7; break;
1708     case 'b': c = '\b'; break;
1709     case 'e': c = 27; break;
1710     case 'f': c = '\f'; break;
1711     case 'n': c = '\n'; break;
1712     case 'r': c = '\r'; break;
1713     case 't': c = '\t'; break;
1714     case 'v': c = '\v'; break;
1715    
1716     case '0': case '1': case '2': case '3':
1717     case '4': case '5': case '6': case '7':
1718     c -= '0';
1719     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1720     c = c * 8 + *p++ - '0';
1721 nigel 91
1722     #if !defined NOUTF8
1723     if (use_utf8 && c > 255)
1724     {
1725     unsigned char buff8[8];
1726     int ii, utn;
1727     utn = ord2utf8(c, buff8);
1728     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1729     c = buff8[ii]; /* Last byte */
1730     }
1731     #endif
1732 nigel 3 break;
1733    
1734     case 'x':
1735 nigel 49
1736     /* Handle \x{..} specially - new Perl thing for utf8 */
1737    
1738 nigel 79 #if !defined NOUTF8
1739 nigel 49 if (*p == '{')
1740     {
1741     unsigned char *pt = p;
1742     c = 0;
1743     while (isxdigit(*(++pt)))
1744     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1745     if (*pt == '}')
1746     {
1747 nigel 67 unsigned char buff8[8];
1748 nigel 49 int ii, utn;
1749 nigel 85 utn = ord2utf8(c, buff8);
1750 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1751     c = buff8[ii]; /* Last byte */
1752 nigel 49 p = pt + 1;
1753     break;
1754     }
1755     /* Not correct form; fall through */
1756     }
1757 nigel 79 #endif
1758 nigel 49
1759     /* Ordinary \x */
1760    
1761 nigel 3 c = 0;
1762     while (i++ < 2 && isxdigit(*p))
1763     {
1764     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1765     p++;
1766     }
1767     break;
1768    
1769 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1770 nigel 3 p--;
1771     continue;
1772    
1773 nigel 75 case '>':
1774     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1775     continue;
1776    
1777 nigel 3 case 'A': /* Option setting */
1778     options |= PCRE_ANCHORED;
1779     continue;
1780    
1781     case 'B':
1782     options |= PCRE_NOTBOL;
1783     continue;
1784    
1785 nigel 29 case 'C':
1786 nigel 63 if (isdigit(*p)) /* Set copy string */
1787     {
1788     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1789     copystrings |= 1 << n;
1790     }
1791     else if (isalnum(*p))
1792     {
1793 nigel 91 uschar *npp = copynamesptr;
1794 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1795 nigel 91 *npp++ = 0;
1796 nigel 67 *npp = 0;
1797 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1798 nigel 63 if (n < 0)
1799 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1800     copynamesptr = npp;
1801 nigel 63 }
1802     else if (*p == '+')
1803     {
1804     callout_extra = 1;
1805     p++;
1806     }
1807     else if (*p == '-')
1808     {
1809     pcre_callout = NULL;
1810     p++;
1811     }
1812     else if (*p == '!')
1813     {
1814     callout_fail_id = 0;
1815     p++;
1816     while(isdigit(*p))
1817     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1818     callout_fail_count = 0;
1819     if (*p == '!')
1820     {
1821     p++;
1822     while(isdigit(*p))
1823     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1824     }
1825     }
1826     else if (*p == '*')
1827     {
1828     int sign = 1;
1829     callout_data = 0;
1830     if (*(++p) == '-') { sign = -1; p++; }
1831     while(isdigit(*p))
1832     callout_data = callout_data * 10 + *p++ - '0';
1833     callout_data *= sign;
1834     callout_data_set = 1;
1835     }
1836 nigel 29 continue;
1837    
1838 nigel 79 #if !defined NODFA
1839 nigel 77 case 'D':
1840 nigel 79 #if !defined NOPOSIX
1841 nigel 77 if (posix || do_posix)
1842     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1843     else
1844 nigel 79 #endif
1845 nigel 77 use_dfa = 1;
1846     continue;
1847    
1848     case 'F':
1849     options |= PCRE_DFA_SHORTEST;
1850     continue;
1851 nigel 79 #endif
1852 nigel 77
1853 nigel 29 case 'G':
1854 nigel 63 if (isdigit(*p))
1855     {
1856     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1857     getstrings |= 1 << n;
1858     }
1859     else if (isalnum(*p))
1860     {
1861 nigel 91 uschar *npp = getnamesptr;
1862 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1863 nigel 91 *npp++ = 0;
1864 nigel 67 *npp = 0;
1865 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1866 nigel 63 if (n < 0)
1867 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1868     getnamesptr = npp;
1869 nigel 63 }
1870 nigel 29 continue;
1871    
1872     case 'L':
1873     getlist = 1;
1874     continue;
1875    
1876 nigel 63 case 'M':
1877     find_match_limit = 1;
1878     continue;
1879    
1880 nigel 37 case 'N':
1881     options |= PCRE_NOTEMPTY;
1882     continue;
1883    
1884 nigel 3 case 'O':
1885     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1886 nigel 53 if (n > size_offsets_max)
1887     {
1888     size_offsets_max = n;
1889 nigel 57 free(offsets);
1890 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1891 nigel 53 if (offsets == NULL)
1892     {
1893     printf("** Failed to get %d bytes of memory for offsets vector\n",
1894 ph10 151 (int)(size_offsets_max * sizeof(int)));
1895 nigel 77 yield = 1;
1896     goto EXIT;
1897 nigel 53 }
1898     }
1899     use_size_offsets = n;
1900 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1901 nigel 3 continue;
1902    
1903 nigel 75 case 'P':
1904     options |= PCRE_PARTIAL;
1905     continue;
1906    
1907 nigel 91 case 'Q':
1908     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1909     if (extra == NULL)
1910     {
1911     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1912     extra->flags = 0;
1913     }
1914     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1915     extra->match_limit_recursion = n;
1916     continue;
1917    
1918     case 'q':
1919     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1920     if (extra == NULL)
1921     {
1922     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1923     extra->flags = 0;
1924     }
1925     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1926     extra->match_limit = n;
1927     continue;
1928    
1929 nigel 79 #if !defined NODFA
1930 nigel 77 case 'R':
1931     options |= PCRE_DFA_RESTART;
1932     continue;
1933 nigel 79 #endif
1934 nigel 77
1935 nigel 73 case 'S':
1936     show_malloc = 1;
1937     continue;
1938    
1939 nigel 3 case 'Z':
1940     options |= PCRE_NOTEOL;
1941     continue;
1942 nigel 71
1943     case '?':
1944     options |= PCRE_NO_UTF8_CHECK;
1945     continue;
1946 nigel 91
1947     case '<':
1948     {
1949     int x = check_newline(p, outfile);
1950     if (x == 0) goto NEXT_DATA;
1951     options |= x;
1952     while (*p++ != '>');
1953     }
1954     continue;
1955 nigel 3 }
1956 nigel 9 *q++ = c;
1957 nigel 3 }
1958 nigel 9 *q = 0;
1959     len = q - dbuffer;
1960 nigel 3
1961 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1962     {
1963     printf("**Match limit not relevant for DFA matching: ignored\n");
1964     find_match_limit = 0;
1965     }
1966    
1967 nigel 3 /* Handle matching via the POSIX interface, which does not
1968 nigel 63 support timing or playing with the match limit or callout data. */
1969 nigel 3
1970 nigel 37 #if !defined NOPOSIX
1971 nigel 3 if (posix || do_posix)
1972     {
1973     int rc;
1974     int eflags = 0;
1975 nigel 63 regmatch_t *pmatch = NULL;
1976     if (use_size_offsets > 0)
1977 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1978 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1979     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1980    
1981 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1982 nigel 3
1983     if (rc != 0)
1984     {
1985 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1986 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1987     }
1988 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1989     != 0)
1990     {
1991     fprintf(outfile, "Matched with REG_NOSUB\n");
1992     }
1993 nigel 3 else
1994     {
1995 nigel 7 size_t i;
1996 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1997 nigel 3 {
1998     if (pmatch[i].rm_so >= 0)
1999     {
2000 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2001 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2002     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2003 nigel 3 fprintf(outfile, "\n");
2004 nigel 35 if (i == 0 && do_showrest)
2005     {
2006     fprintf(outfile, " 0+ ");
2007 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2008     outfile);
2009 nigel 35 fprintf(outfile, "\n");
2010     }
2011 nigel 3 }
2012     }
2013     }
2014 nigel 53 free(pmatch);
2015 nigel 3 }
2016    
2017 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2018 nigel 3
2019 nigel 37 else
2020     #endif /* !defined NOPOSIX */
2021    
2022 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2023 nigel 3 {
2024 nigel 93 if (timeitm > 0)
2025 nigel 3 {
2026     register int i;
2027     clock_t time_taken;
2028     clock_t start_time = clock();
2029 nigel 77
2030 nigel 79 #if !defined NODFA
2031 nigel 77 if (all_use_dfa || use_dfa)
2032     {
2033     int workspace[1000];
2034 nigel 93 for (i = 0; i < timeitm; i++)
2035 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2036     options | g_notempty, use_offsets, use_size_offsets, workspace,
2037     sizeof(workspace)/sizeof(int));
2038     }
2039     else
2040 nigel 79 #endif
2041 nigel 77
2042 nigel 93 for (i = 0; i < timeitm; i++)
2043 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2044 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2045 nigel 77
2046 nigel 3 time_taken = clock() - start_time;
2047 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2048     (((double)time_taken * 1000.0) / (double)timeitm) /
2049 nigel 63 (double)CLOCKS_PER_SEC);
2050 nigel 3 }
2051    
2052 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2053 nigel 87 varying limits in order to find the minimum value for the match limit and
2054     for the recursion limit. */
2055 nigel 63
2056     if (find_match_limit)
2057     {
2058     if (extra == NULL)
2059     {
2060 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2061 nigel 63 extra->flags = 0;
2062     }
2063    
2064 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2065 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2066     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2067     PCRE_ERROR_MATCHLIMIT, "match()");
2068 nigel 63
2069 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2070     options|g_notempty, use_offsets, use_size_offsets,
2071     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2072     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2073 nigel 63 }
2074    
2075     /* If callout_data is set, use the interface with additional data */
2076    
2077     else if (callout_data_set)
2078     {
2079     if (extra == NULL)
2080     {
2081 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2082 nigel 63 extra->flags = 0;
2083     }
2084     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2085 nigel 71 extra->callout_data = &callout_data;
2086 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2087     options | g_notempty, use_offsets, use_size_offsets);
2088     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2089     }
2090    
2091     /* The normal case is just to do the match once, with the default
2092     value of match_limit. */
2093    
2094 nigel 79 #if !defined NODFA
2095 nigel 77 else if (all_use_dfa || use_dfa)
2096     {
2097     int workspace[1000];
2098     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2099     options | g_notempty, use_offsets, use_size_offsets, workspace,
2100     sizeof(workspace)/sizeof(int));
2101     if (count == 0)
2102     {
2103     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2104     count = use_size_offsets/2;
2105     }
2106     }
2107 nigel 79 #endif
2108 nigel 77
2109 nigel 75 else
2110     {
2111     count = pcre_exec(re, extra, (char *)bptr, len,
2112     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2113 nigel 77 if (count == 0)
2114     {
2115     fprintf(outfile, "Matched, but too many substrings\n");
2116     count = use_size_offsets/3;
2117     }
2118 nigel 75 }
2119 nigel 3
2120 nigel 39 /* Matched */
2121    
2122 nigel 3 if (count >= 0)
2123     {
2124 nigel 93 int i, maxcount;
2125    
2126     #if !defined NODFA
2127     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2128     #endif
2129     maxcount = use_size_offsets/3;
2130    
2131     /* This is a check against a lunatic return value. */
2132    
2133     if (count > maxcount)
2134     {
2135     fprintf(outfile,
2136     "** PCRE error: returned count %d is too big for offset size %d\n",
2137     count, use_size_offsets);
2138     count = use_size_offsets/3;
2139     if (do_g || do_G)
2140     {
2141     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2142     do_g = do_G = FALSE; /* Break g/G loop */
2143     }
2144     }
2145    
2146 nigel 29 for (i = 0; i < count * 2; i += 2)
2147 nigel 3 {
2148 nigel 57 if (use_offsets[i] < 0)
2149 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2150     else
2151     {
2152     fprintf(outfile, "%2d: ", i/2);
2153 nigel 63 (void)pchars(bptr + use_offsets[i],
2154     use_offsets[i+1] - use_offsets[i], outfile);
2155 nigel 3 fprintf(outfile, "\n");
2156 nigel 35 if (i == 0)
2157     {
2158     if (do_showrest)
2159     {
2160     fprintf(outfile, " 0+ ");
2161 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2162     outfile);
2163 nigel 35 fprintf(outfile, "\n");
2164     }
2165     }
2166 nigel 3 }
2167     }
2168 nigel 29
2169     for (i = 0; i < 32; i++)
2170     {
2171     if ((copystrings & (1 << i)) != 0)
2172     {
2173 nigel 91 char copybuffer[256];
2174 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2175 nigel 37 i, copybuffer, sizeof(copybuffer));
2176 nigel 29 if (rc < 0)
2177     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2178     else
2179 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2180 nigel 29 }
2181     }
2182    
2183 nigel 91 for (copynamesptr = copynames;
2184     *copynamesptr != 0;
2185     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2186     {
2187     char copybuffer[256];
2188     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2189     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2190     if (rc < 0)
2191     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2192     else
2193     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2194     }
2195    
2196 nigel 29 for (i = 0; i < 32; i++)
2197     {
2198     if ((getstrings & (1 << i)) != 0)
2199     {
2200     const char *substring;
2201 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2202 nigel 29 i, &substring);
2203     if (rc < 0)
2204     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2205     else
2206     {
2207     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2208 nigel 49 pcre_free_substring(substring);
2209 nigel 29 }
2210     }
2211     }
2212    
2213 nigel 91 for (getnamesptr = getnames;
2214     *getnamesptr != 0;
2215     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2216     {
2217     const char *substring;
2218     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2219     count, (char *)getnamesptr, &substring);
2220     if (rc < 0)
2221     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2222     else
2223     {
2224     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2225     pcre_free_substring(substring);
2226     }
2227     }
2228    
2229 nigel 29 if (getlist)
2230     {
2231     const char **stringlist;
2232 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2233 nigel 29 &stringlist);
2234     if (rc < 0)
2235     fprintf(outfile, "get substring list failed %d\n", rc);
2236     else
2237     {
2238     for (i = 0; i < count; i++)
2239     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2240     if (stringlist[i] != NULL)
2241     fprintf(outfile, "string list not terminated by NULL\n");
2242 nigel 49 /* free((void *)stringlist); */
2243     pcre_free_substring_list(stringlist);
2244 nigel 29 }
2245     }
2246 nigel 39 }
2247 nigel 29
2248 nigel 75 /* There was a partial match */
2249    
2250     else if (count == PCRE_ERROR_PARTIAL)
2251     {
2252 nigel 77 fprintf(outfile, "Partial match");
2253 nigel 79 #if !defined NODFA
2254 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2255     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2256     bptr + use_offsets[0]);
2257 nigel 79 #endif
2258 nigel 77 fprintf(outfile, "\n");
2259 nigel 75 break; /* Out of the /g loop */
2260     }
2261    
2262 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2263 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2264     to advance the start offset, and continue. We won't be at the end of the
2265     string - that was checked before setting g_notempty.
2266 nigel 39
2267 ph10 150 Complication arises in the case when the newline option is "any" or
2268 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2269     CRLF, an advance of one character just passes the \r, whereas we should
2270     prefer the longer newline sequence, as does the code in pcre_exec().
2271     Fudge the offset value to achieve this.
2272 ph10 144
2273 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2274     character, not one byte. */
2275    
2276 nigel 3 else
2277     {
2278 nigel 41 if (g_notempty != 0)
2279 nigel 35 {
2280 nigel 73 int onechar = 1;
2281 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2282 nigel 57 use_offsets[0] = start_offset;
2283 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2284     {
2285     int d;
2286     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2287     obits = (d == '\r')? PCRE_NEWLINE_CR :
2288     (d == '\n')? PCRE_NEWLINE_LF :
2289     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2290 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2291 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2292     }
2293 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2294 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2295 ph10 149 &&
2296 ph10 143 start_offset < len - 1 &&
2297     bptr[start_offset] == '\r' &&
2298     bptr[start_offset+1] == '\n')
2299 ph10 144 onechar++;
2300 ph10 143 else if (use_utf8)
2301 nigel 73 {
2302     while (start_offset + onechar < len)
2303     {
2304     int tb = bptr[start_offset+onechar];
2305     if (tb <= 127) break;
2306     tb &= 0xc0;
2307     if (tb != 0 && tb != 0xc0) onechar++;
2308     }
2309     }
2310     use_offsets[1] = start_offset + onechar;
2311 nigel 35 }
2312 nigel 41 else
2313     {
2314 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2315 nigel 41 {
2316 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2317 nigel 41 }
2318 nigel 73 else fprintf(outfile, "Error %d\n", count);
2319 nigel 41 break; /* Out of the /g loop */
2320     }
2321 nigel 3 }
2322 nigel 35
2323 nigel 39 /* If not /g or /G we are done */
2324    
2325     if (!do_g && !do_G) break;
2326    
2327 nigel 41 /* If we have matched an empty string, first check to see if we are at
2328     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2329     what Perl's /g options does. This turns out to be rather cunning. First
2330 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2331     same point. If this fails (picked up above) we advance to the next
2332 ph10 143 character. */
2333 ph10 142
2334 nigel 41 g_notempty = 0;
2335 ph10 142
2336 nigel 57 if (use_offsets[0] == use_offsets[1])
2337 nigel 41 {
2338 nigel 57 if (use_offsets[0] == len) break;
2339 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2340 nigel 41 }
2341 nigel 39
2342     /* For /g, update the start offset, leaving the rest alone */
2343    
2344 ph10 143 if (do_g) start_offset = use_offsets[1];
2345 nigel 39
2346     /* For /G, update the pointer and length */
2347    
2348     else
2349 nigel 35 {
2350 ph10 143 bptr += use_offsets[1];
2351     len -= use_offsets[1];
2352 nigel 35 }
2353 nigel 39 } /* End of loop for /g and /G */
2354 nigel 91
2355     NEXT_DATA: continue;
2356 nigel 39 } /* End of loop for data lines */
2357 nigel 3
2358 nigel 11 CONTINUE:
2359 nigel 37
2360     #if !defined NOPOSIX
2361 nigel 3 if (posix || do_posix) regfree(&preg);
2362 nigel 37 #endif
2363    
2364 nigel 77 if (re != NULL) new_free(re);
2365     if (extra != NULL) new_free(extra);
2366 nigel 25 if (tables != NULL)
2367     {
2368 nigel 77 new_free((void *)tables);
2369 nigel 25 setlocale(LC_CTYPE, "C");
2370 nigel 93 locale_set = 0;
2371 nigel 25 }
2372 nigel 3 }
2373    
2374 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2375 nigel 77
2376     EXIT:
2377    
2378     if (infile != NULL && infile != stdin) fclose(infile);
2379     if (outfile != NULL && outfile != stdout) fclose(outfile);
2380    
2381     free(buffer);
2382     free(dbuffer);
2383     free(pbuffer);
2384     free(offsets);
2385    
2386     return yield;
2387 nigel 3 }
2388    
2389 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12