/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 227 - (hide annotations) (download)
Tue Aug 21 15:00:15 2007 UTC (6 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 69339 byte(s)
Add (*CR) etc.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 199 #include <config.h>
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97     #define _pcre_OP_lengths OP_lengths
98    
99     #include "pcre_tables.c"
100    
101     /* We also need the pcre_printint() function for printing out compiled
102     patterns. This function is in a separate file so that it can be included in
103 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
104 nigel 85
105 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
106     output character as-is or as a hex value when showing compiled patterns, is
107     contained in this file. We uses it here also, in cases when the locale has not
108     been explicitly changed, so as to get consistent output from systems that
109     differ in their output from isprint() even in the "C" locale. */
110    
111 nigel 85 #include "pcre_printint.src"
112    
113 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114 nigel 85
115 nigel 93
116 nigel 37 /* It is possible to compile this test program without including support for
117     testing the POSIX interface, though this is not available via the standard
118     Makefile. */
119    
120     #if !defined NOPOSIX
121 nigel 3 #include "pcreposix.h"
122 nigel 37 #endif
123 nigel 3
124 ph10 107 /* It is also possible, for the benefit of the version currently imported into
125     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126     interface to the DFA matcher (NODFA), and without the doublecheck of the old
127     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128     UTF8 support if PCRE is built without it. */
129 nigel 79
130 ph10 107 #ifndef SUPPORT_UTF8
131     #ifndef NOUTF8
132     #define NOUTF8
133     #endif
134     #endif
135 nigel 79
136 ph10 107
137 nigel 85 /* Other parameters */
138    
139 nigel 3 #ifndef CLOCKS_PER_SEC
140     #ifdef CLK_TCK
141     #define CLOCKS_PER_SEC CLK_TCK
142     #else
143     #define CLOCKS_PER_SEC 100
144     #endif
145     #endif
146    
147 nigel 93 /* This is the default loop count for timing. */
148    
149 nigel 75 #define LOOPREPEAT 500000
150 nigel 3
151 nigel 85 /* Static variables */
152    
153 nigel 3 static FILE *outfile;
154     static int log_store = 0;
155 nigel 63 static int callout_count;
156     static int callout_extra;
157     static int callout_fail_count;
158     static int callout_fail_id;
159 ph10 210 static int debug_lengths;
160 nigel 63 static int first_callout;
161 nigel 93 static int locale_set = 0;
162 nigel 73 static int show_malloc;
163 nigel 67 static int use_utf8;
164 nigel 43 static size_t gotten_store;
165 nigel 3
166 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
167    
168     static int buffer_size = 50000;
169     static uschar *buffer = NULL;
170     static uschar *dbuffer = NULL;
171 nigel 75 static uschar *pbuffer = NULL;
172 nigel 3
173 nigel 75
174 nigel 49
175     /*************************************************
176 nigel 91 * Read or extend an input line *
177     *************************************************/
178    
179     /* Input lines are read into buffer, but both patterns and data lines can be
180     continued over multiple input lines. In addition, if the buffer fills up, we
181     want to automatically expand it so as to be able to handle extremely large
182     lines that are needed for certain stress tests. When the input buffer is
183     expanded, the other two buffers must also be expanded likewise, and the
184     contents of pbuffer, which are a copy of the input for callouts, must be
185     preserved (for when expansion happens for a data line). This is not the most
186     optimal way of handling this, but hey, this is just a test program!
187    
188     Arguments:
189     f the file to read
190     start where in buffer to start (this *must* be within buffer)
191    
192     Returns: pointer to the start of new data
193     could be a copy of start, or could be moved
194     NULL if no data read and EOF reached
195     */
196    
197     static uschar *
198     extend_inputline(FILE *f, uschar *start)
199     {
200     uschar *here = start;
201    
202     for (;;)
203     {
204     int rlen = buffer_size - (here - buffer);
205 nigel 93
206 nigel 91 if (rlen > 1000)
207     {
208     int dlen;
209     if (fgets((char *)here, rlen, f) == NULL)
210     return (here == start)? NULL : start;
211     dlen = (int)strlen((char *)here);
212     if (dlen > 0 && here[dlen - 1] == '\n') return start;
213     here += dlen;
214     }
215    
216     else
217     {
218     int new_buffer_size = 2*buffer_size;
219     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222    
223     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224     {
225     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226     exit(1);
227     }
228    
229     memcpy(new_buffer, buffer, buffer_size);
230     memcpy(new_pbuffer, pbuffer, buffer_size);
231    
232     buffer_size = new_buffer_size;
233    
234     start = new_buffer + (start - buffer);
235     here = new_buffer + (here - buffer);
236    
237     free(buffer);
238     free(dbuffer);
239     free(pbuffer);
240    
241     buffer = new_buffer;
242     dbuffer = new_dbuffer;
243     pbuffer = new_pbuffer;
244     }
245     }
246    
247     return NULL; /* Control never gets here */
248     }
249    
250    
251    
252    
253    
254    
255    
256     /*************************************************
257 nigel 63 * Read number from string *
258     *************************************************/
259    
260     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261     around with conditional compilation, just do the job by hand. It is only used
262 nigel 93 for unpicking arguments, so just keep it simple.
263 nigel 63
264     Arguments:
265     str string to be converted
266     endptr where to put the end pointer
267    
268     Returns: the unsigned long
269     */
270    
271     static int
272     get_value(unsigned char *str, unsigned char **endptr)
273     {
274     int result = 0;
275     while(*str != 0 && isspace(*str)) str++;
276     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277     *endptr = str;
278     return(result);
279     }
280    
281    
282    
283 nigel 49
284     /*************************************************
285     * Convert UTF-8 string to value *
286     *************************************************/
287    
288     /* This function takes one or more bytes that represents a UTF-8 character,
289     and returns the value of the character.
290    
291     Argument:
292 nigel 91 utf8bytes a pointer to the byte vector
293     vptr a pointer to an int to receive the value
294 nigel 49
295 nigel 91 Returns: > 0 => the number of bytes consumed
296     -6 to 0 => malformed UTF-8 character at offset = (-return)
297 nigel 49 */
298    
299 nigel 79 #if !defined NOUTF8
300    
301 nigel 67 static int
302 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
303 nigel 49 {
304 nigel 91 int c = *utf8bytes++;
305 nigel 49 int d = c;
306     int i, j, s;
307    
308     for (i = -1; i < 6; i++) /* i is number of additional bytes */
309     {
310     if ((d & 0x80) == 0) break;
311     d <<= 1;
312     }
313    
314     if (i == -1) { *vptr = c; return 1; } /* ascii character */
315     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316    
317     /* i now has a value in the range 1-5 */
318    
319 nigel 59 s = 6*i;
320 nigel 85 d = (c & utf8_table3[i]) << s;
321 nigel 49
322     for (j = 0; j < i; j++)
323     {
324 nigel 91 c = *utf8bytes++;
325 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
326 nigel 59 s -= 6;
327 nigel 49 d |= (c & 0x3f) << s;
328     }
329    
330     /* Check that encoding was the correct unique one */
331    
332 nigel 85 for (j = 0; j < utf8_table1_size; j++)
333     if (d <= utf8_table1[j]) break;
334 nigel 49 if (j != i) return -(i+1);
335    
336     /* Valid value */
337    
338     *vptr = d;
339     return i+1;
340     }
341    
342 nigel 79 #endif
343 nigel 49
344    
345 nigel 79
346 nigel 63 /*************************************************
347 nigel 85 * Convert character value to UTF-8 *
348     *************************************************/
349    
350     /* This function takes an integer value in the range 0 - 0x7fffffff
351     and encodes it as a UTF-8 character in 0 to 6 bytes.
352    
353     Arguments:
354     cvalue the character value
355 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
356 nigel 85
357     Returns: number of characters placed in the buffer
358     */
359    
360 nigel 93 #if !defined NOUTF8
361    
362 nigel 85 static int
363 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
364 nigel 85 {
365     register int i, j;
366     for (i = 0; i < utf8_table1_size; i++)
367     if (cvalue <= utf8_table1[i]) break;
368 nigel 91 utf8bytes += i;
369 nigel 85 for (j = i; j > 0; j--)
370     {
371 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 nigel 85 cvalue >>= 6;
373     }
374 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
375 nigel 85 return i + 1;
376     }
377    
378 nigel 93 #endif
379 nigel 85
380    
381 nigel 93
382 nigel 85 /*************************************************
383 nigel 63 * Print character string *
384     *************************************************/
385 nigel 49
386 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
387     mode. Yields number of characters printed. If handed a NULL file, just counts
388     chars without printing. */
389 nigel 49
390 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
391 nigel 3 {
392 nigel 85 int c = 0;
393 nigel 63 int yield = 0;
394 nigel 3
395 nigel 63 while (length-- > 0)
396 nigel 3 {
397 nigel 79 #if !defined NOUTF8
398 nigel 67 if (use_utf8)
399 nigel 63 {
400     int rc = utf82ord(p, &c);
401 nigel 3
402 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403     {
404     length -= rc - 1;
405     p += rc;
406 nigel 93 if (PRINTHEX(c))
407 nigel 63 {
408     if (f != NULL) fprintf(f, "%c", c);
409     yield++;
410     }
411     else
412     {
413 nigel 93 int n = 4;
414     if (f != NULL) fprintf(f, "\\x{%02x}", c);
415     yield += (n <= 0x000000ff)? 2 :
416     (n <= 0x00000fff)? 3 :
417     (n <= 0x0000ffff)? 4 :
418     (n <= 0x000fffff)? 5 : 6;
419 nigel 63 }
420     continue;
421     }
422     }
423 nigel 79 #endif
424 nigel 3
425 nigel 63 /* Not UTF-8, or malformed UTF-8 */
426    
427 nigel 93 c = *p++;
428     if (PRINTHEX(c))
429 nigel 3 {
430 nigel 63 if (f != NULL) fprintf(f, "%c", c);
431     yield++;
432 nigel 3 }
433 nigel 63 else
434 nigel 3 {
435 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
436     yield += 4;
437     }
438     }
439 nigel 3
440 nigel 63 return yield;
441     }
442 nigel 23
443 nigel 3
444 nigel 23
445 nigel 63 /*************************************************
446     * Callout function *
447     *************************************************/
448 nigel 3
449 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450     the match. Yield zero unless more callouts than the fail count, or the callout
451     data is not zero. */
452 nigel 3
453 nigel 63 static int callout(pcre_callout_block *cb)
454     {
455     FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 nigel 75 int i, pre_start, post_start, subject_length;
457 nigel 3
458 nigel 63 if (callout_extra)
459     {
460     fprintf(f, "Callout %d: last capture = %d\n",
461     cb->callout_number, cb->capture_last);
462 nigel 3
463 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
464     {
465     if (cb->offset_vector[i] < 0)
466     fprintf(f, "%2d: <unset>\n", i/2);
467     else
468     {
469     fprintf(f, "%2d: ", i/2);
470     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471     cb->offset_vector[i+1] - cb->offset_vector[i], f);
472     fprintf(f, "\n");
473     }
474     }
475     }
476 nigel 3
477 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
478     datails. On subsequent calls in the same match, we use pchars just to find the
479     printed lengths of the substrings. */
480 nigel 3
481 nigel 63 if (f != NULL) fprintf(f, "--->");
482 nigel 3
483 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485     cb->current_position - cb->start_match, f);
486 nigel 3
487 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488    
489 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490     cb->subject_length - cb->current_position, f);
491 nigel 3
492 nigel 63 if (f != NULL) fprintf(f, "\n");
493 nigel 9
494 nigel 63 /* Always print appropriate indicators, with callout number if not already
495 nigel 75 shown. For automatic callouts, show the pattern offset. */
496 nigel 3
497 nigel 75 if (cb->callout_number == 255)
498     {
499     fprintf(outfile, "%+3d ", cb->pattern_position);
500     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501     }
502     else
503     {
504     if (callout_extra) fprintf(outfile, " ");
505     else fprintf(outfile, "%3d ", cb->callout_number);
506     }
507 nigel 3
508 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509     fprintf(outfile, "^");
510 nigel 3
511 nigel 63 if (post_start > 0)
512     {
513     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514     fprintf(outfile, "^");
515 nigel 3 }
516    
517 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518     fprintf(outfile, " ");
519    
520     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521     pbuffer + cb->pattern_position);
522    
523 nigel 63 fprintf(outfile, "\n");
524     first_callout = 0;
525 nigel 3
526 nigel 71 if (cb->callout_data != NULL)
527 nigel 49 {
528 nigel 71 int callout_data = *((int *)(cb->callout_data));
529     if (callout_data != 0)
530     {
531     fprintf(outfile, "Callout data = %d\n", callout_data);
532     return callout_data;
533     }
534 nigel 63 }
535 nigel 49
536 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
537     (++callout_count >= callout_fail_count)? 1 : 0;
538 nigel 3 }
539    
540    
541 nigel 63 /*************************************************
542 nigel 73 * Local malloc functions *
543 nigel 63 *************************************************/
544 nigel 3
545     /* Alternative malloc function, to test functionality and show the size of the
546     compiled re. */
547    
548     static void *new_malloc(size_t size)
549     {
550 nigel 73 void *block = malloc(size);
551 nigel 43 gotten_store = size;
552 nigel 73 if (show_malloc)
553 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 nigel 73 return block;
555 nigel 3 }
556    
557 nigel 73 static void new_free(void *block)
558     {
559     if (show_malloc)
560     fprintf(outfile, "free %p\n", block);
561     free(block);
562     }
563 nigel 3
564    
565 nigel 73 /* For recursion malloc/free, to test stacking calls */
566    
567     static void *stack_malloc(size_t size)
568     {
569     void *block = malloc(size);
570     if (show_malloc)
571 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 nigel 73 return block;
573     }
574    
575     static void stack_free(void *block)
576     {
577     if (show_malloc)
578     fprintf(outfile, "stack_free %p\n", block);
579     free(block);
580     }
581    
582    
583 nigel 63 /*************************************************
584     * Call pcre_fullinfo() *
585     *************************************************/
586 nigel 43
587     /* Get one piece of information from the pcre_fullinfo() function */
588    
589     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590     {
591     int rc;
592     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594     }
595    
596    
597    
598 nigel 63 /*************************************************
599 nigel 75 * Byte flipping function *
600     *************************************************/
601    
602 nigel 91 static unsigned long int
603     byteflip(unsigned long int value, int n)
604 nigel 75 {
605     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606     return ((value & 0x000000ff) << 24) |
607     ((value & 0x0000ff00) << 8) |
608     ((value & 0x00ff0000) >> 8) |
609     ((value & 0xff000000) >> 24);
610     }
611    
612    
613    
614    
615     /*************************************************
616 nigel 87 * Check match or recursion limit *
617     *************************************************/
618    
619     static int
620     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621     int start_offset, int options, int *use_offsets, int use_size_offsets,
622     int flag, unsigned long int *limit, int errnumber, const char *msg)
623     {
624     int count;
625     int min = 0;
626     int mid = 64;
627     int max = -1;
628    
629     extra->flags |= flag;
630    
631     for (;;)
632     {
633     *limit = mid;
634    
635     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636     use_offsets, use_size_offsets);
637    
638     if (count == errnumber)
639     {
640     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641     min = mid;
642     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643     }
644    
645     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646     count == PCRE_ERROR_PARTIAL)
647     {
648     if (mid == min + 1)
649     {
650     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651     break;
652     }
653     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654     max = mid;
655     mid = (min + mid)/2;
656     }
657     else break; /* Some other error */
658     }
659    
660     extra->flags &= ~flag;
661     return count;
662     }
663    
664    
665    
666     /*************************************************
667 ph10 227 * Case-independent strncmp() function *
668     *************************************************/
669    
670     /*
671     Arguments:
672     s first string
673     t second string
674     n number of characters to compare
675    
676     Returns: < 0, = 0, or > 0, according to the comparison
677     */
678    
679     static int
680     strncmpic(uschar *s, uschar *t, int n)
681     {
682     while (n--)
683     {
684     int c = tolower(*s++) - tolower(*t++);
685     if (c) return c;
686     }
687     return 0;
688     }
689    
690    
691    
692     /*************************************************
693 nigel 91 * Check newline indicator *
694     *************************************************/
695    
696     /* This is used both at compile and run-time to check for <xxx> escapes, where
697 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698     no match.
699 nigel 91
700     Arguments:
701     p points after the leading '<'
702     f file for error message
703    
704     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
705     */
706    
707     static int
708     check_newline(uschar *p, FILE *f)
709     {
710 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
716     return 0;
717     }
718    
719    
720    
721     /*************************************************
722 nigel 93 * Usage function *
723     *************************************************/
724    
725     static void
726     usage(void)
727     {
728     printf("Usage: pcretest [options] [<input> [<output>]]\n");
729     printf(" -b show compiled code (bytecode)\n");
730     printf(" -C show PCRE compile-time options and exit\n");
731     printf(" -d debug: show compiled code and information (-b and -i)\n");
732     #if !defined NODFA
733     printf(" -dfa force DFA matching for all subjects\n");
734     #endif
735     printf(" -help show usage information\n");
736     printf(" -i show information about compiled patterns\n"
737     " -m output memory used information\n"
738     " -o <n> set size of offsets vector to <n>\n");
739     #if !defined NOPOSIX
740     printf(" -p use POSIX interface\n");
741     #endif
742     printf(" -q quiet: do not output PCRE version number at start\n");
743     printf(" -S <n> set stack size to <n> megabytes\n");
744     printf(" -s output store (memory) used information\n"
745     " -t time compilation and execution\n");
746     printf(" -t <n> time compilation and execution, repeating <n> times\n");
747     printf(" -tm time execution (matching) only\n");
748     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
749     }
750    
751    
752    
753     /*************************************************
754 nigel 63 * Main Program *
755     *************************************************/
756 nigel 43
757 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
758     consist of a regular expression, in delimiters and optionally followed by
759     options, followed by a set of test data, terminated by an empty line. */
760    
761     int main(int argc, char **argv)
762     {
763     FILE *infile = stdin;
764     int options = 0;
765     int study_options = 0;
766     int op = 1;
767     int timeit = 0;
768 nigel 93 int timeitm = 0;
769 nigel 3 int showinfo = 0;
770 nigel 31 int showstore = 0;
771 nigel 87 int quiet = 0;
772 nigel 53 int size_offsets = 45;
773     int size_offsets_max;
774 nigel 77 int *offsets = NULL;
775 nigel 53 #if !defined NOPOSIX
776 nigel 3 int posix = 0;
777 nigel 53 #endif
778 nigel 3 int debug = 0;
779 nigel 11 int done = 0;
780 nigel 77 int all_use_dfa = 0;
781     int yield = 0;
782 nigel 91 int stack_size;
783 nigel 3
784 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
785     that 1024 is plenty long enough for the few names we'll be testing. */
786 nigel 69
787 nigel 91 uschar copynames[1024];
788     uschar getnames[1024];
789    
790     uschar *copynamesptr;
791     uschar *getnamesptr;
792    
793 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
794 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
795 nigel 69
796 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
797     dbuffer = (unsigned char *)malloc(buffer_size);
798     pbuffer = (unsigned char *)malloc(buffer_size);
799 nigel 69
800 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
801 nigel 3
802 nigel 93 outfile = stdout;
803    
804     /* The following _setmode() stuff is some Windows magic that tells its runtime
805     library to translate CRLF into a single LF character. At least, that's what
806     I've been told: never having used Windows I take this all on trust. Originally
807     it set 0x8000, but then I was advised that _O_BINARY was better. */
808    
809 nigel 75 #if defined(_WIN32) || defined(WIN32)
810 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
811     #endif
812 nigel 75
813 nigel 3 /* Scan options */
814    
815     while (argc > 1 && argv[op][0] == '-')
816     {
817 nigel 63 unsigned char *endptr;
818 nigel 53
819 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
820     showstore = 1;
821 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
822 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
823 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
824     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
825 nigel 79 #if !defined NODFA
826 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
827 nigel 79 #endif
828 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
829 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
830     *endptr == 0))
831 nigel 53 {
832     op++;
833     argc--;
834     }
835 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
836     {
837     int both = argv[op][2] == 0;
838     int temp;
839     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
840     *endptr == 0))
841     {
842     timeitm = temp;
843     op++;
844     argc--;
845     }
846     else timeitm = LOOPREPEAT;
847     if (both) timeit = timeitm;
848     }
849 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
850     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
851     *endptr == 0))
852     {
853 nigel 93 #if defined(_WIN32) || defined(WIN32)
854 nigel 91 printf("PCRE: -S not supported on this OS\n");
855     exit(1);
856     #else
857     int rc;
858     struct rlimit rlim;
859     getrlimit(RLIMIT_STACK, &rlim);
860     rlim.rlim_cur = stack_size * 1024 * 1024;
861     rc = setrlimit(RLIMIT_STACK, &rlim);
862     if (rc != 0)
863     {
864     printf("PCRE: setrlimit() failed with error %d\n", rc);
865     exit(1);
866     }
867     op++;
868     argc--;
869     #endif
870     }
871 nigel 53 #if !defined NOPOSIX
872 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
873 nigel 53 #endif
874 nigel 63 else if (strcmp(argv[op], "-C") == 0)
875     {
876     int rc;
877     printf("PCRE version %s\n", pcre_version());
878     printf("Compiled with\n");
879     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
880     printf(" %sUTF-8 support\n", rc? "" : "No ");
881 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
882     printf(" %sUnicode properties support\n", rc? "" : "No ");
883 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
885 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886 ph10 150 (rc == -2)? "ANYCRLF" :
887 nigel 93 (rc == -1)? "ANY" : "???");
888 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889     printf(" Internal link size = %d\n", rc);
890     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
891     printf(" POSIX malloc threshold = %d\n", rc);
892     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
893     printf(" Default match limit = %d\n", rc);
894 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
895     printf(" Default recursion depth limit = %d\n", rc);
896 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
898 ph10 121 goto EXIT;
899 nigel 63 }
900 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
901     strcmp(argv[op], "--help") == 0)
902     {
903     usage();
904     goto EXIT;
905     }
906 nigel 3 else
907     {
908 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
909 nigel 93 usage();
910 nigel 77 yield = 1;
911     goto EXIT;
912 nigel 3 }
913     op++;
914     argc--;
915     }
916    
917 nigel 53 /* Get the store for the offsets vector, and remember what it was */
918    
919     size_offsets_max = size_offsets;
920 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
921 nigel 53 if (offsets == NULL)
922     {
923     printf("** Failed to get %d bytes of memory for offsets vector\n",
924 ph10 151 (int)(size_offsets_max * sizeof(int)));
925 nigel 77 yield = 1;
926     goto EXIT;
927 nigel 53 }
928    
929 nigel 3 /* Sort out the input and output files */
930    
931     if (argc > 1)
932     {
933 nigel 93 infile = fopen(argv[op], INPUT_MODE);
934 nigel 3 if (infile == NULL)
935     {
936     printf("** Failed to open %s\n", argv[op]);
937 nigel 77 yield = 1;
938     goto EXIT;
939 nigel 3 }
940     }
941    
942     if (argc > 2)
943     {
944 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
945 nigel 3 if (outfile == NULL)
946     {
947     printf("** Failed to open %s\n", argv[op+1]);
948 nigel 77 yield = 1;
949     goto EXIT;
950 nigel 3 }
951     }
952    
953     /* Set alternative malloc function */
954    
955     pcre_malloc = new_malloc;
956 nigel 73 pcre_free = new_free;
957     pcre_stack_malloc = stack_malloc;
958     pcre_stack_free = stack_free;
959 nigel 3
960 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
961 nigel 3
962 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
963 nigel 3
964     /* Main loop */
965    
966 nigel 11 while (!done)
967 nigel 3 {
968     pcre *re = NULL;
969     pcre_extra *extra = NULL;
970 nigel 37
971     #if !defined NOPOSIX /* There are still compilers that require no indent */
972 nigel 3 regex_t preg;
973 nigel 45 int do_posix = 0;
974 nigel 37 #endif
975    
976 nigel 7 const char *error;
977 nigel 25 unsigned char *p, *pp, *ppp;
978 nigel 75 unsigned char *to_file = NULL;
979 nigel 53 const unsigned char *tables = NULL;
980 nigel 75 unsigned long int true_size, true_study_size = 0;
981     size_t size, regex_gotten_store;
982 nigel 3 int do_study = 0;
983 nigel 25 int do_debug = debug;
984 nigel 35 int do_G = 0;
985     int do_g = 0;
986 nigel 25 int do_showinfo = showinfo;
987 nigel 35 int do_showrest = 0;
988 nigel 75 int do_flip = 0;
989 nigel 93 int erroroffset, len, delimiter, poffset;
990 nigel 3
991 nigel 67 use_utf8 = 0;
992 ph10 211 debug_lengths = 1;
993 nigel 63
994 nigel 3 if (infile == stdin) printf(" re> ");
995 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
996 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
997 nigel 63 fflush(outfile);
998 nigel 3
999     p = buffer;
1000     while (isspace(*p)) p++;
1001     if (*p == 0) continue;
1002    
1003 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1004 nigel 3
1005 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1006     {
1007 nigel 91 unsigned long int magic, get_options;
1008 nigel 75 uschar sbuf[8];
1009     FILE *f;
1010    
1011     p++;
1012     pp = p + (int)strlen((char *)p);
1013     while (isspace(pp[-1])) pp--;
1014     *pp = 0;
1015    
1016     f = fopen((char *)p, "rb");
1017     if (f == NULL)
1018     {
1019     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1020     continue;
1021     }
1022    
1023     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1024    
1025     true_size =
1026     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1027     true_study_size =
1028     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1029    
1030     re = (real_pcre *)new_malloc(true_size);
1031     regex_gotten_store = gotten_store;
1032    
1033     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1034    
1035     magic = ((real_pcre *)re)->magic_number;
1036     if (magic != MAGIC_NUMBER)
1037     {
1038     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1039     {
1040     do_flip = 1;
1041     }
1042     else
1043     {
1044     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1045     fclose(f);
1046     continue;
1047     }
1048     }
1049    
1050     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1051     do_flip? " (byte-inverted)" : "", p);
1052    
1053     /* Need to know if UTF-8 for printing data strings */
1054    
1055 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1056     use_utf8 = (get_options & PCRE_UTF8) != 0;
1057 nigel 75
1058     /* Now see if there is any following study data */
1059    
1060     if (true_study_size != 0)
1061     {
1062     pcre_study_data *psd;
1063    
1064     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1065     extra->flags = PCRE_EXTRA_STUDY_DATA;
1066    
1067     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1068     extra->study_data = psd;
1069    
1070     if (fread(psd, 1, true_study_size, f) != true_study_size)
1071     {
1072     FAIL_READ:
1073     fprintf(outfile, "Failed to read data from %s\n", p);
1074     if (extra != NULL) new_free(extra);
1075     if (re != NULL) new_free(re);
1076     fclose(f);
1077     continue;
1078     }
1079     fprintf(outfile, "Study data loaded from %s\n", p);
1080     do_study = 1; /* To get the data output if requested */
1081     }
1082     else fprintf(outfile, "No study data\n");
1083    
1084     fclose(f);
1085     goto SHOW_INFO;
1086     }
1087    
1088     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1089     the pattern; if is isn't complete, read more. */
1090    
1091 nigel 3 delimiter = *p++;
1092    
1093 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1094 nigel 3 {
1095 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1096 nigel 3 goto SKIP_DATA;
1097     }
1098    
1099     pp = p;
1100 nigel 93 poffset = p - buffer;
1101 nigel 3
1102     for(;;)
1103     {
1104 nigel 29 while (*pp != 0)
1105     {
1106     if (*pp == '\\' && pp[1] != 0) pp++;
1107     else if (*pp == delimiter) break;
1108     pp++;
1109     }
1110 nigel 3 if (*pp != 0) break;
1111     if (infile == stdin) printf(" > ");
1112 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1113 nigel 3 {
1114     fprintf(outfile, "** Unexpected EOF\n");
1115 nigel 11 done = 1;
1116     goto CONTINUE;
1117 nigel 3 }
1118 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1119 nigel 3 }
1120    
1121 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1122     pointer to the correct relative point in the buffer. */
1123    
1124     p = buffer + poffset;
1125    
1126 nigel 29 /* If the first character after the delimiter is backslash, make
1127     the pattern end with backslash. This is purely to provide a way
1128     of testing for the error message when a pattern ends with backslash. */
1129    
1130     if (pp[1] == '\\') *pp++ = '\\';
1131    
1132 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1133     for callouts. */
1134 nigel 3
1135     *pp++ = 0;
1136 nigel 75 strcpy((char *)pbuffer, (char *)p);
1137 nigel 3
1138     /* Look for options after final delimiter */
1139    
1140     options = 0;
1141     study_options = 0;
1142 nigel 31 log_store = showstore; /* default from command line */
1143    
1144 nigel 3 while (*pp != 0)
1145     {
1146     switch (*pp++)
1147     {
1148 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1149 nigel 35 case 'g': do_g = 1; break;
1150 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1151     case 'm': options |= PCRE_MULTILINE; break;
1152     case 's': options |= PCRE_DOTALL; break;
1153     case 'x': options |= PCRE_EXTENDED; break;
1154 nigel 25
1155 nigel 35 case '+': do_showrest = 1; break;
1156 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1157 nigel 93 case 'B': do_debug = 1; break;
1158 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1159 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1160 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1161 nigel 75 case 'F': do_flip = 1; break;
1162 nigel 35 case 'G': do_G = 1; break;
1163 nigel 25 case 'I': do_showinfo = 1; break;
1164 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1165 nigel 31 case 'M': log_store = 1; break;
1166 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1167 nigel 37
1168     #if !defined NOPOSIX
1169 nigel 3 case 'P': do_posix = 1; break;
1170 nigel 37 #endif
1171    
1172 nigel 3 case 'S': do_study = 1; break;
1173 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1174 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1175 ph10 126 case 'Z': debug_lengths = 0; break;
1176 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178 nigel 25
1179     case 'L':
1180     ppp = pp;
1181 nigel 93 /* The '\r' test here is so that it works on Windows. */
1182     /* The '0' test is just in case this is an unterminated line. */
1183     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1184 nigel 25 *ppp = 0;
1185     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1186     {
1187     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1188     goto SKIP_DATA;
1189     }
1190 nigel 93 locale_set = 1;
1191 nigel 25 tables = pcre_maketables();
1192     pp = ppp;
1193     break;
1194    
1195 nigel 75 case '>':
1196     to_file = pp;
1197     while (*pp != 0) pp++;
1198     while (isspace(pp[-1])) pp--;
1199     *pp = 0;
1200     break;
1201    
1202 nigel 91 case '<':
1203     {
1204     int x = check_newline(pp, outfile);
1205     if (x == 0) goto SKIP_DATA;
1206     options |= x;
1207     while (*pp++ != '>');
1208     }
1209     break;
1210    
1211 nigel 77 case '\r': /* So that it works in Windows */
1212     case '\n':
1213     case ' ':
1214     break;
1215 nigel 75
1216 nigel 3 default:
1217     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1218     goto SKIP_DATA;
1219     }
1220     }
1221    
1222 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1223 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1224     local character tables. */
1225 nigel 3
1226 nigel 37 #if !defined NOPOSIX
1227 nigel 3 if (posix || do_posix)
1228     {
1229     int rc;
1230     int cflags = 0;
1231 nigel 75
1232 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1233     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1234 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1235 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1236     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1237    
1238 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1239    
1240     /* Compilation failed; go back for another re, skipping to blank line
1241     if non-interactive. */
1242    
1243     if (rc != 0)
1244     {
1245 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1246 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1247     goto SKIP_DATA;
1248     }
1249     }
1250    
1251     /* Handle compiling via the native interface */
1252    
1253     else
1254 nigel 37 #endif /* !defined NOPOSIX */
1255    
1256 nigel 3 {
1257 nigel 93 if (timeit > 0)
1258 nigel 3 {
1259     register int i;
1260     clock_t time_taken;
1261     clock_t start_time = clock();
1262 nigel 93 for (i = 0; i < timeit; i++)
1263 nigel 3 {
1264 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1265 nigel 3 if (re != NULL) free(re);
1266     }
1267     time_taken = clock() - start_time;
1268 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1269     (((double)time_taken * 1000.0) / (double)timeit) /
1270 nigel 63 (double)CLOCKS_PER_SEC);
1271 nigel 3 }
1272    
1273 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1274 nigel 3
1275     /* Compilation failed; go back for another re, skipping to blank line
1276     if non-interactive. */
1277    
1278     if (re == NULL)
1279     {
1280     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1281     SKIP_DATA:
1282     if (infile != stdin)
1283     {
1284     for (;;)
1285     {
1286 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1287 nigel 11 {
1288     done = 1;
1289     goto CONTINUE;
1290     }
1291 nigel 3 len = (int)strlen((char *)buffer);
1292     while (len > 0 && isspace(buffer[len-1])) len--;
1293     if (len == 0) break;
1294     }
1295     fprintf(outfile, "\n");
1296     }
1297 nigel 25 goto CONTINUE;
1298 nigel 3 }
1299    
1300 nigel 43 /* Compilation succeeded; print data if required. There are now two
1301     info-returning functions. The old one has a limited interface and
1302     returns only limited data. Check that it agrees with the newer one. */
1303 nigel 3
1304 nigel 63 if (log_store)
1305     fprintf(outfile, "Memory allocation (code space): %d\n",
1306     (int)(gotten_store -
1307     sizeof(real_pcre) -
1308     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1309    
1310 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1311     and remember the store that was got. */
1312    
1313     true_size = ((real_pcre *)re)->size;
1314     regex_gotten_store = gotten_store;
1315    
1316     /* If /S was present, study the regexp to generate additional info to
1317     help with the matching. */
1318    
1319     if (do_study)
1320     {
1321 nigel 93 if (timeit > 0)
1322 nigel 75 {
1323     register int i;
1324     clock_t time_taken;
1325     clock_t start_time = clock();
1326 nigel 93 for (i = 0; i < timeit; i++)
1327 nigel 75 extra = pcre_study(re, study_options, &error);
1328     time_taken = clock() - start_time;
1329     if (extra != NULL) free(extra);
1330 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1331     (((double)time_taken * 1000.0) / (double)timeit) /
1332 nigel 75 (double)CLOCKS_PER_SEC);
1333     }
1334     extra = pcre_study(re, study_options, &error);
1335     if (error != NULL)
1336     fprintf(outfile, "Failed to study: %s\n", error);
1337     else if (extra != NULL)
1338     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1339     }
1340    
1341     /* If the 'F' option was present, we flip the bytes of all the integer
1342     fields in the regex data block and the study block. This is to make it
1343     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1344     compiled on a different architecture. */
1345    
1346     if (do_flip)
1347     {
1348     real_pcre *rre = (real_pcre *)re;
1349     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1350     rre->size = byteflip(rre->size, sizeof(rre->size));
1351     rre->options = byteflip(rre->options, sizeof(rre->options));
1352     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1353     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1354     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1355     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1356     rre->name_table_offset = byteflip(rre->name_table_offset,
1357     sizeof(rre->name_table_offset));
1358     rre->name_entry_size = byteflip(rre->name_entry_size,
1359     sizeof(rre->name_entry_size));
1360     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1361    
1362     if (extra != NULL)
1363     {
1364     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1365     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1366     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1367     }
1368     }
1369    
1370     /* Extract information from the compiled data if required */
1371    
1372     SHOW_INFO:
1373    
1374 nigel 93 if (do_debug)
1375     {
1376     fprintf(outfile, "------------------------------------------------------------------\n");
1377 ph10 116 pcre_printint(re, outfile, debug_lengths);
1378 nigel 93 }
1379    
1380 nigel 25 if (do_showinfo)
1381 nigel 3 {
1382 nigel 75 unsigned long int get_options, all_options;
1383 nigel 79 #if !defined NOINFOCHECK
1384 nigel 43 int old_first_char, old_options, old_count;
1385 nigel 79 #endif
1386 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1387 ph10 227 hascrorlf;
1388 nigel 63 int nameentrysize, namecount;
1389     const uschar *nametable;
1390 nigel 3
1391 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1392 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1393     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1394     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1395 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1396 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1397 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1398     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1399 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1400 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1401     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1402 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1403 nigel 43
1404 nigel 79 #if !defined NOINFOCHECK
1405 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1406 nigel 3 if (count < 0) fprintf(outfile,
1407 nigel 43 "Error %d from pcre_info()\n", count);
1408 nigel 3 else
1409     {
1410 nigel 43 if (old_count != count) fprintf(outfile,
1411     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1412     old_count);
1413 nigel 37
1414 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1415     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1416     first_char, old_first_char);
1417 nigel 37
1418 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1419     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1420     get_options, old_options);
1421 nigel 43 }
1422 nigel 79 #endif
1423 nigel 43
1424 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1425 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1426 nigel 77 (int)size, (int)regex_gotten_store);
1427 nigel 43
1428     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1429     if (backrefmax > 0)
1430     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1431 nigel 63
1432     if (namecount > 0)
1433     {
1434     fprintf(outfile, "Named capturing subpatterns:\n");
1435     while (namecount-- > 0)
1436     {
1437     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1438     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1439     GET2(nametable, 0));
1440     nametable += nameentrysize;
1441     }
1442     }
1443 ph10 172
1444 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1445 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1446 nigel 63
1447 nigel 75 all_options = ((real_pcre *)re)->options;
1448 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1449 nigel 75
1450 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1451 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1452 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1453     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1454     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1455     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1456 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1457 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1458     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1459     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1460     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1461 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1462 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1463 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1464     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1465 ph10 172
1466 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1467 nigel 43
1468 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1469 nigel 91 {
1470     case PCRE_NEWLINE_CR:
1471     fprintf(outfile, "Forced newline sequence: CR\n");
1472     break;
1473 nigel 43
1474 nigel 91 case PCRE_NEWLINE_LF:
1475     fprintf(outfile, "Forced newline sequence: LF\n");
1476     break;
1477    
1478     case PCRE_NEWLINE_CRLF:
1479     fprintf(outfile, "Forced newline sequence: CRLF\n");
1480     break;
1481    
1482 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1483     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1484     break;
1485    
1486 nigel 93 case PCRE_NEWLINE_ANY:
1487     fprintf(outfile, "Forced newline sequence: ANY\n");
1488     break;
1489    
1490 nigel 91 default:
1491     break;
1492     }
1493    
1494 nigel 43 if (first_char == -1)
1495     {
1496 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1497 nigel 43 }
1498     else if (first_char < 0)
1499     {
1500     fprintf(outfile, "No first char\n");
1501     }
1502     else
1503     {
1504 nigel 63 int ch = first_char & 255;
1505 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1506 nigel 63 "" : " (caseless)";
1507 nigel 93 if (PRINTHEX(ch))
1508 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1509 nigel 3 else
1510 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1511 nigel 43 }
1512 nigel 37
1513 nigel 43 if (need_char < 0)
1514     {
1515     fprintf(outfile, "No need char\n");
1516 nigel 3 }
1517 nigel 43 else
1518     {
1519 nigel 63 int ch = need_char & 255;
1520 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1521 nigel 63 "" : " (caseless)";
1522 nigel 93 if (PRINTHEX(ch))
1523 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1524 nigel 43 else
1525 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1526 nigel 43 }
1527 nigel 75
1528     /* Don't output study size; at present it is in any case a fixed
1529     value, but it varies, depending on the computer architecture, and
1530     so messes up the test suite. (And with the /F option, it might be
1531     flipped.) */
1532    
1533     if (do_study)
1534     {
1535     if (extra == NULL)
1536     fprintf(outfile, "Study returned NULL\n");
1537     else
1538     {
1539     uschar *start_bits = NULL;
1540     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1541    
1542     if (start_bits == NULL)
1543     fprintf(outfile, "No starting byte set\n");
1544     else
1545     {
1546     int i;
1547     int c = 24;
1548     fprintf(outfile, "Starting byte set: ");
1549     for (i = 0; i < 256; i++)
1550     {
1551     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1552     {
1553     if (c > 75)
1554     {
1555     fprintf(outfile, "\n ");
1556     c = 2;
1557     }
1558 nigel 93 if (PRINTHEX(i) && i != ' ')
1559 nigel 75 {
1560     fprintf(outfile, "%c ", i);
1561     c += 2;
1562     }
1563     else
1564     {
1565     fprintf(outfile, "\\x%02x ", i);
1566     c += 5;
1567     }
1568     }
1569     }
1570     fprintf(outfile, "\n");
1571     }
1572     }
1573     }
1574 nigel 3 }
1575    
1576 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1577     that is all. The first 8 bytes of the file are the regex length and then
1578     the study length, in big-endian order. */
1579 nigel 3
1580 nigel 75 if (to_file != NULL)
1581 nigel 3 {
1582 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1583     if (f == NULL)
1584 nigel 3 {
1585 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1586 nigel 3 }
1587 nigel 75 else
1588     {
1589     uschar sbuf[8];
1590     sbuf[0] = (true_size >> 24) & 255;
1591     sbuf[1] = (true_size >> 16) & 255;
1592     sbuf[2] = (true_size >> 8) & 255;
1593     sbuf[3] = (true_size) & 255;
1594 nigel 3
1595 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1596     sbuf[5] = (true_study_size >> 16) & 255;
1597     sbuf[6] = (true_study_size >> 8) & 255;
1598     sbuf[7] = (true_study_size) & 255;
1599 nigel 3
1600 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1601     fwrite(re, 1, true_size, f) < true_size)
1602     {
1603     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1604     }
1605 nigel 3 else
1606     {
1607 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1608     if (extra != NULL)
1609 nigel 3 {
1610 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1611     true_study_size)
1612 nigel 3 {
1613 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1614     strerror(errno));
1615 nigel 3 }
1616 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1617 nigel 93
1618 nigel 3 }
1619     }
1620 nigel 75 fclose(f);
1621 nigel 3 }
1622 nigel 77
1623     new_free(re);
1624     if (extra != NULL) new_free(extra);
1625     if (tables != NULL) new_free((void *)tables);
1626 nigel 75 continue; /* With next regex */
1627 nigel 3 }
1628 nigel 75 } /* End of non-POSIX compile */
1629 nigel 3
1630     /* Read data lines and test them */
1631    
1632     for (;;)
1633     {
1634 nigel 87 uschar *q;
1635 ph10 147 uschar *bptr;
1636 nigel 57 int *use_offsets = offsets;
1637 nigel 53 int use_size_offsets = size_offsets;
1638 nigel 63 int callout_data = 0;
1639     int callout_data_set = 0;
1640 nigel 3 int count, c;
1641 nigel 29 int copystrings = 0;
1642 nigel 63 int find_match_limit = 0;
1643 nigel 29 int getstrings = 0;
1644     int getlist = 0;
1645 nigel 39 int gmatched = 0;
1646 nigel 35 int start_offset = 0;
1647 nigel 41 int g_notempty = 0;
1648 nigel 77 int use_dfa = 0;
1649 nigel 3
1650     options = 0;
1651    
1652 nigel 91 *copynames = 0;
1653     *getnames = 0;
1654    
1655     copynamesptr = copynames;
1656     getnamesptr = getnames;
1657    
1658 nigel 63 pcre_callout = callout;
1659     first_callout = 1;
1660     callout_extra = 0;
1661     callout_count = 0;
1662     callout_fail_count = 999999;
1663     callout_fail_id = -1;
1664 nigel 73 show_malloc = 0;
1665 nigel 63
1666 nigel 91 if (extra != NULL) extra->flags &=
1667     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1668    
1669     len = 0;
1670     for (;;)
1671 nigel 11 {
1672 nigel 91 if (infile == stdin) printf("data> ");
1673     if (extend_inputline(infile, buffer + len) == NULL)
1674     {
1675     if (len > 0) break;
1676     done = 1;
1677     goto CONTINUE;
1678     }
1679     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1680     len = (int)strlen((char *)buffer);
1681     if (buffer[len-1] == '\n') break;
1682 nigel 11 }
1683 nigel 3
1684     while (len > 0 && isspace(buffer[len-1])) len--;
1685     buffer[len] = 0;
1686     if (len == 0) break;
1687    
1688     p = buffer;
1689     while (isspace(*p)) p++;
1690    
1691 ph10 147 bptr = q = dbuffer;
1692 nigel 3 while ((c = *p++) != 0)
1693     {
1694     int i = 0;
1695     int n = 0;
1696 nigel 63
1697 nigel 3 if (c == '\\') switch ((c = *p++))
1698     {
1699     case 'a': c = 7; break;
1700     case 'b': c = '\b'; break;
1701     case 'e': c = 27; break;
1702     case 'f': c = '\f'; break;
1703     case 'n': c = '\n'; break;
1704     case 'r': c = '\r'; break;
1705     case 't': c = '\t'; break;
1706     case 'v': c = '\v'; break;
1707    
1708     case '0': case '1': case '2': case '3':
1709     case '4': case '5': case '6': case '7':
1710     c -= '0';
1711     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1712     c = c * 8 + *p++ - '0';
1713 nigel 91
1714     #if !defined NOUTF8
1715     if (use_utf8 && c > 255)
1716     {
1717     unsigned char buff8[8];
1718     int ii, utn;
1719     utn = ord2utf8(c, buff8);
1720     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1721     c = buff8[ii]; /* Last byte */
1722     }
1723     #endif
1724 nigel 3 break;
1725    
1726     case 'x':
1727 nigel 49
1728     /* Handle \x{..} specially - new Perl thing for utf8 */
1729    
1730 nigel 79 #if !defined NOUTF8
1731 nigel 49 if (*p == '{')
1732     {
1733     unsigned char *pt = p;
1734     c = 0;
1735     while (isxdigit(*(++pt)))
1736     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1737     if (*pt == '}')
1738     {
1739 nigel 67 unsigned char buff8[8];
1740 nigel 49 int ii, utn;
1741 nigel 85 utn = ord2utf8(c, buff8);
1742 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1743     c = buff8[ii]; /* Last byte */
1744 nigel 49 p = pt + 1;
1745     break;
1746     }
1747     /* Not correct form; fall through */
1748     }
1749 nigel 79 #endif
1750 nigel 49
1751     /* Ordinary \x */
1752    
1753 nigel 3 c = 0;
1754     while (i++ < 2 && isxdigit(*p))
1755     {
1756     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1757     p++;
1758     }
1759     break;
1760    
1761 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1762 nigel 3 p--;
1763     continue;
1764    
1765 nigel 75 case '>':
1766     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1767     continue;
1768    
1769 nigel 3 case 'A': /* Option setting */
1770     options |= PCRE_ANCHORED;
1771     continue;
1772    
1773     case 'B':
1774     options |= PCRE_NOTBOL;
1775     continue;
1776    
1777 nigel 29 case 'C':
1778 nigel 63 if (isdigit(*p)) /* Set copy string */
1779     {
1780     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1781     copystrings |= 1 << n;
1782     }
1783     else if (isalnum(*p))
1784     {
1785 nigel 91 uschar *npp = copynamesptr;
1786 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1787 nigel 91 *npp++ = 0;
1788 nigel 67 *npp = 0;
1789 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1790 nigel 63 if (n < 0)
1791 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1792     copynamesptr = npp;
1793 nigel 63 }
1794     else if (*p == '+')
1795     {
1796     callout_extra = 1;
1797     p++;
1798     }
1799     else if (*p == '-')
1800     {
1801     pcre_callout = NULL;
1802     p++;
1803     }
1804     else if (*p == '!')
1805     {
1806     callout_fail_id = 0;
1807     p++;
1808     while(isdigit(*p))
1809     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1810     callout_fail_count = 0;
1811     if (*p == '!')
1812     {
1813     p++;
1814     while(isdigit(*p))
1815     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1816     }
1817     }
1818     else if (*p == '*')
1819     {
1820     int sign = 1;
1821     callout_data = 0;
1822     if (*(++p) == '-') { sign = -1; p++; }
1823     while(isdigit(*p))
1824     callout_data = callout_data * 10 + *p++ - '0';
1825     callout_data *= sign;
1826     callout_data_set = 1;
1827     }
1828 nigel 29 continue;
1829    
1830 nigel 79 #if !defined NODFA
1831 nigel 77 case 'D':
1832 nigel 79 #if !defined NOPOSIX
1833 nigel 77 if (posix || do_posix)
1834     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1835     else
1836 nigel 79 #endif
1837 nigel 77 use_dfa = 1;
1838     continue;
1839    
1840     case 'F':
1841     options |= PCRE_DFA_SHORTEST;
1842     continue;
1843 nigel 79 #endif
1844 nigel 77
1845 nigel 29 case 'G':
1846 nigel 63 if (isdigit(*p))
1847     {
1848     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1849     getstrings |= 1 << n;
1850     }
1851     else if (isalnum(*p))
1852     {
1853 nigel 91 uschar *npp = getnamesptr;
1854 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1855 nigel 91 *npp++ = 0;
1856 nigel 67 *npp = 0;
1857 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1858 nigel 63 if (n < 0)
1859 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1860     getnamesptr = npp;
1861 nigel 63 }
1862 nigel 29 continue;
1863    
1864     case 'L':
1865     getlist = 1;
1866     continue;
1867    
1868 nigel 63 case 'M':
1869     find_match_limit = 1;
1870     continue;
1871    
1872 nigel 37 case 'N':
1873     options |= PCRE_NOTEMPTY;
1874     continue;
1875    
1876 nigel 3 case 'O':
1877     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1878 nigel 53 if (n > size_offsets_max)
1879     {
1880     size_offsets_max = n;
1881 nigel 57 free(offsets);
1882 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1883 nigel 53 if (offsets == NULL)
1884     {
1885     printf("** Failed to get %d bytes of memory for offsets vector\n",
1886 ph10 151 (int)(size_offsets_max * sizeof(int)));
1887 nigel 77 yield = 1;
1888     goto EXIT;
1889 nigel 53 }
1890     }
1891     use_size_offsets = n;
1892 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1893 nigel 3 continue;
1894    
1895 nigel 75 case 'P':
1896     options |= PCRE_PARTIAL;
1897     continue;
1898    
1899 nigel 91 case 'Q':
1900     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1901     if (extra == NULL)
1902     {
1903     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1904     extra->flags = 0;
1905     }
1906     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1907     extra->match_limit_recursion = n;
1908     continue;
1909    
1910     case 'q':
1911     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1912     if (extra == NULL)
1913     {
1914     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1915     extra->flags = 0;
1916     }
1917     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1918     extra->match_limit = n;
1919     continue;
1920    
1921 nigel 79 #if !defined NODFA
1922 nigel 77 case 'R':
1923     options |= PCRE_DFA_RESTART;
1924     continue;
1925 nigel 79 #endif
1926 nigel 77
1927 nigel 73 case 'S':
1928     show_malloc = 1;
1929     continue;
1930    
1931 nigel 3 case 'Z':
1932     options |= PCRE_NOTEOL;
1933     continue;
1934 nigel 71
1935     case '?':
1936     options |= PCRE_NO_UTF8_CHECK;
1937     continue;
1938 nigel 91
1939     case '<':
1940     {
1941     int x = check_newline(p, outfile);
1942     if (x == 0) goto NEXT_DATA;
1943     options |= x;
1944     while (*p++ != '>');
1945     }
1946     continue;
1947 nigel 3 }
1948 nigel 9 *q++ = c;
1949 nigel 3 }
1950 nigel 9 *q = 0;
1951     len = q - dbuffer;
1952 nigel 3
1953 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1954     {
1955     printf("**Match limit not relevant for DFA matching: ignored\n");
1956     find_match_limit = 0;
1957     }
1958    
1959 nigel 3 /* Handle matching via the POSIX interface, which does not
1960 nigel 63 support timing or playing with the match limit or callout data. */
1961 nigel 3
1962 nigel 37 #if !defined NOPOSIX
1963 nigel 3 if (posix || do_posix)
1964     {
1965     int rc;
1966     int eflags = 0;
1967 nigel 63 regmatch_t *pmatch = NULL;
1968     if (use_size_offsets > 0)
1969 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1970 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1971     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1972    
1973 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1974 nigel 3
1975     if (rc != 0)
1976     {
1977 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1978 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1979     }
1980 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1981     != 0)
1982     {
1983     fprintf(outfile, "Matched with REG_NOSUB\n");
1984     }
1985 nigel 3 else
1986     {
1987 nigel 7 size_t i;
1988 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1989 nigel 3 {
1990     if (pmatch[i].rm_so >= 0)
1991     {
1992 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1993 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1994     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1995 nigel 3 fprintf(outfile, "\n");
1996 nigel 35 if (i == 0 && do_showrest)
1997     {
1998     fprintf(outfile, " 0+ ");
1999 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2000     outfile);
2001 nigel 35 fprintf(outfile, "\n");
2002     }
2003 nigel 3 }
2004     }
2005     }
2006 nigel 53 free(pmatch);
2007 nigel 3 }
2008    
2009 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2010 nigel 3
2011 nigel 37 else
2012     #endif /* !defined NOPOSIX */
2013    
2014 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2015 nigel 3 {
2016 nigel 93 if (timeitm > 0)
2017 nigel 3 {
2018     register int i;
2019     clock_t time_taken;
2020     clock_t start_time = clock();
2021 nigel 77
2022 nigel 79 #if !defined NODFA
2023 nigel 77 if (all_use_dfa || use_dfa)
2024     {
2025     int workspace[1000];
2026 nigel 93 for (i = 0; i < timeitm; i++)
2027 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2028     options | g_notempty, use_offsets, use_size_offsets, workspace,
2029     sizeof(workspace)/sizeof(int));
2030     }
2031     else
2032 nigel 79 #endif
2033 nigel 77
2034 nigel 93 for (i = 0; i < timeitm; i++)
2035 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2036 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2037 nigel 77
2038 nigel 3 time_taken = clock() - start_time;
2039 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2040     (((double)time_taken * 1000.0) / (double)timeitm) /
2041 nigel 63 (double)CLOCKS_PER_SEC);
2042 nigel 3 }
2043    
2044 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2045 nigel 87 varying limits in order to find the minimum value for the match limit and
2046     for the recursion limit. */
2047 nigel 63
2048     if (find_match_limit)
2049     {
2050     if (extra == NULL)
2051     {
2052 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2053 nigel 63 extra->flags = 0;
2054     }
2055    
2056 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2057 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2058     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2059     PCRE_ERROR_MATCHLIMIT, "match()");
2060 nigel 63
2061 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2062     options|g_notempty, use_offsets, use_size_offsets,
2063     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2064     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2065 nigel 63 }
2066    
2067     /* If callout_data is set, use the interface with additional data */
2068    
2069     else if (callout_data_set)
2070     {
2071     if (extra == NULL)
2072     {
2073 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2074 nigel 63 extra->flags = 0;
2075     }
2076     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2077 nigel 71 extra->callout_data = &callout_data;
2078 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2079     options | g_notempty, use_offsets, use_size_offsets);
2080     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2081     }
2082    
2083     /* The normal case is just to do the match once, with the default
2084     value of match_limit. */
2085    
2086 nigel 79 #if !defined NODFA
2087 nigel 77 else if (all_use_dfa || use_dfa)
2088     {
2089     int workspace[1000];
2090     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2091     options | g_notempty, use_offsets, use_size_offsets, workspace,
2092     sizeof(workspace)/sizeof(int));
2093     if (count == 0)
2094     {
2095     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2096     count = use_size_offsets/2;
2097     }
2098     }
2099 nigel 79 #endif
2100 nigel 77
2101 nigel 75 else
2102     {
2103     count = pcre_exec(re, extra, (char *)bptr, len,
2104     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2105 nigel 77 if (count == 0)
2106     {
2107     fprintf(outfile, "Matched, but too many substrings\n");
2108     count = use_size_offsets/3;
2109     }
2110 nigel 75 }
2111 nigel 3
2112 nigel 39 /* Matched */
2113    
2114 nigel 3 if (count >= 0)
2115     {
2116 nigel 93 int i, maxcount;
2117    
2118     #if !defined NODFA
2119     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2120     #endif
2121     maxcount = use_size_offsets/3;
2122    
2123     /* This is a check against a lunatic return value. */
2124    
2125     if (count > maxcount)
2126     {
2127     fprintf(outfile,
2128     "** PCRE error: returned count %d is too big for offset size %d\n",
2129     count, use_size_offsets);
2130     count = use_size_offsets/3;
2131     if (do_g || do_G)
2132     {
2133     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2134     do_g = do_G = FALSE; /* Break g/G loop */
2135     }
2136     }
2137    
2138 nigel 29 for (i = 0; i < count * 2; i += 2)
2139 nigel 3 {
2140 nigel 57 if (use_offsets[i] < 0)
2141 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2142     else
2143     {
2144     fprintf(outfile, "%2d: ", i/2);
2145 nigel 63 (void)pchars(bptr + use_offsets[i],
2146     use_offsets[i+1] - use_offsets[i], outfile);
2147 nigel 3 fprintf(outfile, "\n");
2148 nigel 35 if (i == 0)
2149     {
2150     if (do_showrest)
2151     {
2152     fprintf(outfile, " 0+ ");
2153 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2154     outfile);
2155 nigel 35 fprintf(outfile, "\n");
2156     }
2157     }
2158 nigel 3 }
2159     }
2160 nigel 29
2161     for (i = 0; i < 32; i++)
2162     {
2163     if ((copystrings & (1 << i)) != 0)
2164     {
2165 nigel 91 char copybuffer[256];
2166 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2167 nigel 37 i, copybuffer, sizeof(copybuffer));
2168 nigel 29 if (rc < 0)
2169     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2170     else
2171 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2172 nigel 29 }
2173     }
2174    
2175 nigel 91 for (copynamesptr = copynames;
2176     *copynamesptr != 0;
2177     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2178     {
2179     char copybuffer[256];
2180     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2181     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2182     if (rc < 0)
2183     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2184     else
2185     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2186     }
2187    
2188 nigel 29 for (i = 0; i < 32; i++)
2189     {
2190     if ((getstrings & (1 << i)) != 0)
2191     {
2192     const char *substring;
2193 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2194 nigel 29 i, &substring);
2195     if (rc < 0)
2196     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2197     else
2198     {
2199     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2200 nigel 49 pcre_free_substring(substring);
2201 nigel 29 }
2202     }
2203     }
2204    
2205 nigel 91 for (getnamesptr = getnames;
2206     *getnamesptr != 0;
2207     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2208     {
2209     const char *substring;
2210     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2211     count, (char *)getnamesptr, &substring);
2212     if (rc < 0)
2213     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2214     else
2215     {
2216     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2217     pcre_free_substring(substring);
2218     }
2219     }
2220    
2221 nigel 29 if (getlist)
2222     {
2223     const char **stringlist;
2224 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2225 nigel 29 &stringlist);
2226     if (rc < 0)
2227     fprintf(outfile, "get substring list failed %d\n", rc);
2228     else
2229     {
2230     for (i = 0; i < count; i++)
2231     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2232     if (stringlist[i] != NULL)
2233     fprintf(outfile, "string list not terminated by NULL\n");
2234 nigel 49 /* free((void *)stringlist); */
2235     pcre_free_substring_list(stringlist);
2236 nigel 29 }
2237     }
2238 nigel 39 }
2239 nigel 29
2240 nigel 75 /* There was a partial match */
2241    
2242     else if (count == PCRE_ERROR_PARTIAL)
2243     {
2244 nigel 77 fprintf(outfile, "Partial match");
2245 nigel 79 #if !defined NODFA
2246 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2247     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2248     bptr + use_offsets[0]);
2249 nigel 79 #endif
2250 nigel 77 fprintf(outfile, "\n");
2251 nigel 75 break; /* Out of the /g loop */
2252     }
2253    
2254 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2255 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2256     to advance the start offset, and continue. We won't be at the end of the
2257     string - that was checked before setting g_notempty.
2258 nigel 39
2259 ph10 150 Complication arises in the case when the newline option is "any" or
2260 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2261     CRLF, an advance of one character just passes the \r, whereas we should
2262     prefer the longer newline sequence, as does the code in pcre_exec().
2263     Fudge the offset value to achieve this.
2264 ph10 144
2265 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2266     character, not one byte. */
2267    
2268 nigel 3 else
2269     {
2270 nigel 41 if (g_notempty != 0)
2271 nigel 35 {
2272 nigel 73 int onechar = 1;
2273 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2274 nigel 57 use_offsets[0] = start_offset;
2275 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2276     {
2277     int d;
2278     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2279     obits = (d == '\r')? PCRE_NEWLINE_CR :
2280     (d == '\n')? PCRE_NEWLINE_LF :
2281     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2282 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2283 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2284     }
2285 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2286 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2287 ph10 149 &&
2288 ph10 143 start_offset < len - 1 &&
2289     bptr[start_offset] == '\r' &&
2290     bptr[start_offset+1] == '\n')
2291 ph10 144 onechar++;
2292 ph10 143 else if (use_utf8)
2293 nigel 73 {
2294     while (start_offset + onechar < len)
2295     {
2296     int tb = bptr[start_offset+onechar];
2297     if (tb <= 127) break;
2298     tb &= 0xc0;
2299     if (tb != 0 && tb != 0xc0) onechar++;
2300     }
2301     }
2302     use_offsets[1] = start_offset + onechar;
2303 nigel 35 }
2304 nigel 41 else
2305     {
2306 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2307 nigel 41 {
2308 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2309 nigel 41 }
2310 nigel 73 else fprintf(outfile, "Error %d\n", count);
2311 nigel 41 break; /* Out of the /g loop */
2312     }
2313 nigel 3 }
2314 nigel 35
2315 nigel 39 /* If not /g or /G we are done */
2316    
2317     if (!do_g && !do_G) break;
2318    
2319 nigel 41 /* If we have matched an empty string, first check to see if we are at
2320     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2321     what Perl's /g options does. This turns out to be rather cunning. First
2322 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2323     same point. If this fails (picked up above) we advance to the next
2324 ph10 143 character. */
2325 ph10 142
2326 nigel 41 g_notempty = 0;
2327 ph10 142
2328 nigel 57 if (use_offsets[0] == use_offsets[1])
2329 nigel 41 {
2330 nigel 57 if (use_offsets[0] == len) break;
2331 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2332 nigel 41 }
2333 nigel 39
2334     /* For /g, update the start offset, leaving the rest alone */
2335    
2336 ph10 143 if (do_g) start_offset = use_offsets[1];
2337 nigel 39
2338     /* For /G, update the pointer and length */
2339    
2340     else
2341 nigel 35 {
2342 ph10 143 bptr += use_offsets[1];
2343     len -= use_offsets[1];
2344 nigel 35 }
2345 nigel 39 } /* End of loop for /g and /G */
2346 nigel 91
2347     NEXT_DATA: continue;
2348 nigel 39 } /* End of loop for data lines */
2349 nigel 3
2350 nigel 11 CONTINUE:
2351 nigel 37
2352     #if !defined NOPOSIX
2353 nigel 3 if (posix || do_posix) regfree(&preg);
2354 nigel 37 #endif
2355    
2356 nigel 77 if (re != NULL) new_free(re);
2357     if (extra != NULL) new_free(extra);
2358 nigel 25 if (tables != NULL)
2359     {
2360 nigel 77 new_free((void *)tables);
2361 nigel 25 setlocale(LC_CTYPE, "C");
2362 nigel 93 locale_set = 0;
2363 nigel 25 }
2364 nigel 3 }
2365    
2366 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2367 nigel 77
2368     EXIT:
2369    
2370     if (infile != NULL && infile != stdin) fclose(infile);
2371     if (outfile != NULL && outfile != stdout) fclose(outfile);
2372    
2373     free(buffer);
2374     free(dbuffer);
2375     free(pbuffer);
2376     free(offsets);
2377    
2378     return yield;
2379 nigel 3 }
2380    
2381 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12