/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 255 - (hide annotations) (download)
Wed Sep 19 08:50:04 2007 UTC (7 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 70172 byte(s)
Add casts to pcretest.c to avoid compiler warnings.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97 ph10 240 #define _pcre_utt_names utt_names
98 nigel 85 #define _pcre_OP_lengths OP_lengths
99    
100     #include "pcre_tables.c"
101    
102     /* We also need the pcre_printint() function for printing out compiled
103     patterns. This function is in a separate file so that it can be included in
104 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
105 nigel 85
106 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
107     output character as-is or as a hex value when showing compiled patterns, is
108     contained in this file. We uses it here also, in cases when the locale has not
109     been explicitly changed, so as to get consistent output from systems that
110     differ in their output from isprint() even in the "C" locale. */
111    
112 nigel 85 #include "pcre_printint.src"
113    
114 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
115 nigel 85
116 nigel 93
117 nigel 37 /* It is possible to compile this test program without including support for
118     testing the POSIX interface, though this is not available via the standard
119     Makefile. */
120    
121     #if !defined NOPOSIX
122 nigel 3 #include "pcreposix.h"
123 nigel 37 #endif
124 nigel 3
125 ph10 107 /* It is also possible, for the benefit of the version currently imported into
126     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
127     interface to the DFA matcher (NODFA), and without the doublecheck of the old
128     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
129     UTF8 support if PCRE is built without it. */
130 nigel 79
131 ph10 107 #ifndef SUPPORT_UTF8
132     #ifndef NOUTF8
133     #define NOUTF8
134     #endif
135     #endif
136 nigel 79
137 ph10 107
138 nigel 85 /* Other parameters */
139    
140 nigel 3 #ifndef CLOCKS_PER_SEC
141     #ifdef CLK_TCK
142     #define CLOCKS_PER_SEC CLK_TCK
143     #else
144     #define CLOCKS_PER_SEC 100
145     #endif
146     #endif
147    
148 nigel 93 /* This is the default loop count for timing. */
149    
150 nigel 75 #define LOOPREPEAT 500000
151 nigel 3
152 nigel 85 /* Static variables */
153    
154 nigel 3 static FILE *outfile;
155     static int log_store = 0;
156 nigel 63 static int callout_count;
157     static int callout_extra;
158     static int callout_fail_count;
159     static int callout_fail_id;
160 ph10 210 static int debug_lengths;
161 nigel 63 static int first_callout;
162 nigel 93 static int locale_set = 0;
163 nigel 73 static int show_malloc;
164 nigel 67 static int use_utf8;
165 nigel 43 static size_t gotten_store;
166 nigel 3
167 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
168    
169     static int buffer_size = 50000;
170     static uschar *buffer = NULL;
171     static uschar *dbuffer = NULL;
172 nigel 75 static uschar *pbuffer = NULL;
173 nigel 3
174 nigel 75
175 nigel 49
176     /*************************************************
177 nigel 91 * Read or extend an input line *
178     *************************************************/
179    
180     /* Input lines are read into buffer, but both patterns and data lines can be
181     continued over multiple input lines. In addition, if the buffer fills up, we
182     want to automatically expand it so as to be able to handle extremely large
183     lines that are needed for certain stress tests. When the input buffer is
184     expanded, the other two buffers must also be expanded likewise, and the
185     contents of pbuffer, which are a copy of the input for callouts, must be
186     preserved (for when expansion happens for a data line). This is not the most
187     optimal way of handling this, but hey, this is just a test program!
188    
189     Arguments:
190     f the file to read
191     start where in buffer to start (this *must* be within buffer)
192    
193     Returns: pointer to the start of new data
194     could be a copy of start, or could be moved
195     NULL if no data read and EOF reached
196     */
197    
198     static uschar *
199     extend_inputline(FILE *f, uschar *start)
200     {
201     uschar *here = start;
202    
203     for (;;)
204     {
205     int rlen = buffer_size - (here - buffer);
206 nigel 93
207 nigel 91 if (rlen > 1000)
208     {
209     int dlen;
210     if (fgets((char *)here, rlen, f) == NULL)
211     return (here == start)? NULL : start;
212     dlen = (int)strlen((char *)here);
213     if (dlen > 0 && here[dlen - 1] == '\n') return start;
214     here += dlen;
215     }
216    
217     else
218     {
219     int new_buffer_size = 2*buffer_size;
220     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
221     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
222     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
223    
224     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
225     {
226     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
227     exit(1);
228     }
229    
230     memcpy(new_buffer, buffer, buffer_size);
231     memcpy(new_pbuffer, pbuffer, buffer_size);
232    
233     buffer_size = new_buffer_size;
234    
235     start = new_buffer + (start - buffer);
236     here = new_buffer + (here - buffer);
237    
238     free(buffer);
239     free(dbuffer);
240     free(pbuffer);
241    
242     buffer = new_buffer;
243     dbuffer = new_dbuffer;
244     pbuffer = new_pbuffer;
245     }
246     }
247    
248     return NULL; /* Control never gets here */
249     }
250    
251    
252    
253    
254    
255    
256    
257     /*************************************************
258 nigel 63 * Read number from string *
259     *************************************************/
260    
261     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
262     around with conditional compilation, just do the job by hand. It is only used
263 nigel 93 for unpicking arguments, so just keep it simple.
264 nigel 63
265     Arguments:
266     str string to be converted
267     endptr where to put the end pointer
268    
269     Returns: the unsigned long
270     */
271    
272     static int
273     get_value(unsigned char *str, unsigned char **endptr)
274     {
275     int result = 0;
276     while(*str != 0 && isspace(*str)) str++;
277     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
278     *endptr = str;
279     return(result);
280     }
281    
282    
283    
284 nigel 49
285     /*************************************************
286     * Convert UTF-8 string to value *
287     *************************************************/
288    
289     /* This function takes one or more bytes that represents a UTF-8 character,
290     and returns the value of the character.
291    
292     Argument:
293 nigel 91 utf8bytes a pointer to the byte vector
294     vptr a pointer to an int to receive the value
295 nigel 49
296 nigel 91 Returns: > 0 => the number of bytes consumed
297     -6 to 0 => malformed UTF-8 character at offset = (-return)
298 nigel 49 */
299    
300 nigel 79 #if !defined NOUTF8
301    
302 nigel 67 static int
303 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
304 nigel 49 {
305 nigel 91 int c = *utf8bytes++;
306 nigel 49 int d = c;
307     int i, j, s;
308    
309     for (i = -1; i < 6; i++) /* i is number of additional bytes */
310     {
311     if ((d & 0x80) == 0) break;
312     d <<= 1;
313     }
314    
315     if (i == -1) { *vptr = c; return 1; } /* ascii character */
316     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
317    
318     /* i now has a value in the range 1-5 */
319    
320 nigel 59 s = 6*i;
321 nigel 85 d = (c & utf8_table3[i]) << s;
322 nigel 49
323     for (j = 0; j < i; j++)
324     {
325 nigel 91 c = *utf8bytes++;
326 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
327 nigel 59 s -= 6;
328 nigel 49 d |= (c & 0x3f) << s;
329     }
330    
331     /* Check that encoding was the correct unique one */
332    
333 nigel 85 for (j = 0; j < utf8_table1_size; j++)
334     if (d <= utf8_table1[j]) break;
335 nigel 49 if (j != i) return -(i+1);
336    
337     /* Valid value */
338    
339     *vptr = d;
340     return i+1;
341     }
342    
343 nigel 79 #endif
344 nigel 49
345    
346 nigel 79
347 nigel 63 /*************************************************
348 nigel 85 * Convert character value to UTF-8 *
349     *************************************************/
350    
351     /* This function takes an integer value in the range 0 - 0x7fffffff
352     and encodes it as a UTF-8 character in 0 to 6 bytes.
353    
354     Arguments:
355     cvalue the character value
356 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
357 nigel 85
358     Returns: number of characters placed in the buffer
359     */
360    
361 nigel 93 #if !defined NOUTF8
362    
363 nigel 85 static int
364 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
365 nigel 85 {
366     register int i, j;
367     for (i = 0; i < utf8_table1_size; i++)
368     if (cvalue <= utf8_table1[i]) break;
369 nigel 91 utf8bytes += i;
370 nigel 85 for (j = i; j > 0; j--)
371     {
372 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
373 nigel 85 cvalue >>= 6;
374     }
375 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
376 nigel 85 return i + 1;
377     }
378    
379 nigel 93 #endif
380 nigel 85
381    
382 nigel 93
383 nigel 85 /*************************************************
384 nigel 63 * Print character string *
385     *************************************************/
386 nigel 49
387 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
388     mode. Yields number of characters printed. If handed a NULL file, just counts
389     chars without printing. */
390 nigel 49
391 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
392 nigel 3 {
393 nigel 85 int c = 0;
394 nigel 63 int yield = 0;
395 nigel 3
396 nigel 63 while (length-- > 0)
397 nigel 3 {
398 nigel 79 #if !defined NOUTF8
399 nigel 67 if (use_utf8)
400 nigel 63 {
401     int rc = utf82ord(p, &c);
402 nigel 3
403 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
404     {
405     length -= rc - 1;
406     p += rc;
407 nigel 93 if (PRINTHEX(c))
408 nigel 63 {
409     if (f != NULL) fprintf(f, "%c", c);
410     yield++;
411     }
412     else
413     {
414 nigel 93 int n = 4;
415     if (f != NULL) fprintf(f, "\\x{%02x}", c);
416     yield += (n <= 0x000000ff)? 2 :
417     (n <= 0x00000fff)? 3 :
418     (n <= 0x0000ffff)? 4 :
419     (n <= 0x000fffff)? 5 : 6;
420 nigel 63 }
421     continue;
422     }
423     }
424 nigel 79 #endif
425 nigel 3
426 nigel 63 /* Not UTF-8, or malformed UTF-8 */
427    
428 nigel 93 c = *p++;
429     if (PRINTHEX(c))
430 nigel 3 {
431 nigel 63 if (f != NULL) fprintf(f, "%c", c);
432     yield++;
433 nigel 3 }
434 nigel 63 else
435 nigel 3 {
436 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
437     yield += 4;
438     }
439     }
440 nigel 3
441 nigel 63 return yield;
442     }
443 nigel 23
444 nigel 3
445 nigel 23
446 nigel 63 /*************************************************
447     * Callout function *
448     *************************************************/
449 nigel 3
450 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
451     the match. Yield zero unless more callouts than the fail count, or the callout
452     data is not zero. */
453 nigel 3
454 nigel 63 static int callout(pcre_callout_block *cb)
455     {
456     FILE *f = (first_callout | callout_extra)? outfile : NULL;
457 nigel 75 int i, pre_start, post_start, subject_length;
458 nigel 3
459 nigel 63 if (callout_extra)
460     {
461     fprintf(f, "Callout %d: last capture = %d\n",
462     cb->callout_number, cb->capture_last);
463 nigel 3
464 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
465     {
466     if (cb->offset_vector[i] < 0)
467     fprintf(f, "%2d: <unset>\n", i/2);
468     else
469     {
470     fprintf(f, "%2d: ", i/2);
471     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
472     cb->offset_vector[i+1] - cb->offset_vector[i], f);
473     fprintf(f, "\n");
474     }
475     }
476     }
477 nigel 3
478 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
479     datails. On subsequent calls in the same match, we use pchars just to find the
480     printed lengths of the substrings. */
481 nigel 3
482 nigel 63 if (f != NULL) fprintf(f, "--->");
483 nigel 3
484 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
485     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
486     cb->current_position - cb->start_match, f);
487 nigel 3
488 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
489    
490 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
491     cb->subject_length - cb->current_position, f);
492 nigel 3
493 nigel 63 if (f != NULL) fprintf(f, "\n");
494 nigel 9
495 nigel 63 /* Always print appropriate indicators, with callout number if not already
496 nigel 75 shown. For automatic callouts, show the pattern offset. */
497 nigel 3
498 nigel 75 if (cb->callout_number == 255)
499     {
500     fprintf(outfile, "%+3d ", cb->pattern_position);
501     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
502     }
503     else
504     {
505     if (callout_extra) fprintf(outfile, " ");
506     else fprintf(outfile, "%3d ", cb->callout_number);
507     }
508 nigel 3
509 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
510     fprintf(outfile, "^");
511 nigel 3
512 nigel 63 if (post_start > 0)
513     {
514     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
515     fprintf(outfile, "^");
516 nigel 3 }
517    
518 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
519     fprintf(outfile, " ");
520    
521     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
522     pbuffer + cb->pattern_position);
523    
524 nigel 63 fprintf(outfile, "\n");
525     first_callout = 0;
526 nigel 3
527 nigel 71 if (cb->callout_data != NULL)
528 nigel 49 {
529 nigel 71 int callout_data = *((int *)(cb->callout_data));
530     if (callout_data != 0)
531     {
532     fprintf(outfile, "Callout data = %d\n", callout_data);
533     return callout_data;
534     }
535 nigel 63 }
536 nigel 49
537 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
538     (++callout_count >= callout_fail_count)? 1 : 0;
539 nigel 3 }
540    
541    
542 nigel 63 /*************************************************
543 nigel 73 * Local malloc functions *
544 nigel 63 *************************************************/
545 nigel 3
546     /* Alternative malloc function, to test functionality and show the size of the
547     compiled re. */
548    
549     static void *new_malloc(size_t size)
550     {
551 nigel 73 void *block = malloc(size);
552 nigel 43 gotten_store = size;
553 nigel 73 if (show_malloc)
554 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
555 nigel 73 return block;
556 nigel 3 }
557    
558 nigel 73 static void new_free(void *block)
559     {
560     if (show_malloc)
561     fprintf(outfile, "free %p\n", block);
562     free(block);
563     }
564 nigel 3
565    
566 nigel 73 /* For recursion malloc/free, to test stacking calls */
567    
568     static void *stack_malloc(size_t size)
569     {
570     void *block = malloc(size);
571     if (show_malloc)
572 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
573 nigel 73 return block;
574     }
575    
576     static void stack_free(void *block)
577     {
578     if (show_malloc)
579     fprintf(outfile, "stack_free %p\n", block);
580     free(block);
581     }
582    
583    
584 nigel 63 /*************************************************
585     * Call pcre_fullinfo() *
586     *************************************************/
587 nigel 43
588     /* Get one piece of information from the pcre_fullinfo() function */
589    
590     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
591     {
592     int rc;
593     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
594     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
595     }
596    
597    
598    
599 nigel 63 /*************************************************
600 nigel 75 * Byte flipping function *
601     *************************************************/
602    
603 nigel 91 static unsigned long int
604     byteflip(unsigned long int value, int n)
605 nigel 75 {
606     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
607     return ((value & 0x000000ff) << 24) |
608     ((value & 0x0000ff00) << 8) |
609     ((value & 0x00ff0000) >> 8) |
610     ((value & 0xff000000) >> 24);
611     }
612    
613    
614    
615    
616     /*************************************************
617 nigel 87 * Check match or recursion limit *
618     *************************************************/
619    
620     static int
621     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
622     int start_offset, int options, int *use_offsets, int use_size_offsets,
623     int flag, unsigned long int *limit, int errnumber, const char *msg)
624     {
625     int count;
626     int min = 0;
627     int mid = 64;
628     int max = -1;
629    
630     extra->flags |= flag;
631    
632     for (;;)
633     {
634     *limit = mid;
635    
636     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
637     use_offsets, use_size_offsets);
638    
639     if (count == errnumber)
640     {
641     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
642     min = mid;
643     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
644     }
645    
646     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
647     count == PCRE_ERROR_PARTIAL)
648     {
649     if (mid == min + 1)
650     {
651     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
652     break;
653     }
654     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
655     max = mid;
656     mid = (min + mid)/2;
657     }
658     else break; /* Some other error */
659     }
660    
661     extra->flags &= ~flag;
662     return count;
663     }
664    
665    
666    
667     /*************************************************
668 ph10 227 * Case-independent strncmp() function *
669     *************************************************/
670    
671     /*
672     Arguments:
673     s first string
674     t second string
675     n number of characters to compare
676    
677     Returns: < 0, = 0, or > 0, according to the comparison
678     */
679    
680     static int
681     strncmpic(uschar *s, uschar *t, int n)
682     {
683     while (n--)
684     {
685     int c = tolower(*s++) - tolower(*t++);
686     if (c) return c;
687     }
688     return 0;
689     }
690    
691    
692    
693     /*************************************************
694 nigel 91 * Check newline indicator *
695     *************************************************/
696    
697     /* This is used both at compile and run-time to check for <xxx> escapes, where
698 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
699     no match.
700 nigel 91
701     Arguments:
702     p points after the leading '<'
703     f file for error message
704    
705     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
706     */
707    
708     static int
709     check_newline(uschar *p, FILE *f)
710     {
711 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
712     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
713     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
714     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
715     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
716 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
717     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
718 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
719     return 0;
720     }
721    
722    
723    
724     /*************************************************
725 nigel 93 * Usage function *
726     *************************************************/
727    
728     static void
729     usage(void)
730     {
731     printf("Usage: pcretest [options] [<input> [<output>]]\n");
732     printf(" -b show compiled code (bytecode)\n");
733     printf(" -C show PCRE compile-time options and exit\n");
734     printf(" -d debug: show compiled code and information (-b and -i)\n");
735     #if !defined NODFA
736     printf(" -dfa force DFA matching for all subjects\n");
737     #endif
738     printf(" -help show usage information\n");
739     printf(" -i show information about compiled patterns\n"
740     " -m output memory used information\n"
741     " -o <n> set size of offsets vector to <n>\n");
742     #if !defined NOPOSIX
743     printf(" -p use POSIX interface\n");
744     #endif
745     printf(" -q quiet: do not output PCRE version number at start\n");
746     printf(" -S <n> set stack size to <n> megabytes\n");
747     printf(" -s output store (memory) used information\n"
748     " -t time compilation and execution\n");
749     printf(" -t <n> time compilation and execution, repeating <n> times\n");
750     printf(" -tm time execution (matching) only\n");
751     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
752     }
753    
754    
755    
756     /*************************************************
757 nigel 63 * Main Program *
758     *************************************************/
759 nigel 43
760 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
761     consist of a regular expression, in delimiters and optionally followed by
762     options, followed by a set of test data, terminated by an empty line. */
763    
764     int main(int argc, char **argv)
765     {
766     FILE *infile = stdin;
767     int options = 0;
768     int study_options = 0;
769     int op = 1;
770     int timeit = 0;
771 nigel 93 int timeitm = 0;
772 nigel 3 int showinfo = 0;
773 nigel 31 int showstore = 0;
774 nigel 87 int quiet = 0;
775 nigel 53 int size_offsets = 45;
776     int size_offsets_max;
777 nigel 77 int *offsets = NULL;
778 nigel 53 #if !defined NOPOSIX
779 nigel 3 int posix = 0;
780 nigel 53 #endif
781 nigel 3 int debug = 0;
782 nigel 11 int done = 0;
783 nigel 77 int all_use_dfa = 0;
784     int yield = 0;
785 nigel 91 int stack_size;
786 nigel 3
787 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
788     that 1024 is plenty long enough for the few names we'll be testing. */
789 nigel 69
790 nigel 91 uschar copynames[1024];
791     uschar getnames[1024];
792    
793     uschar *copynamesptr;
794     uschar *getnamesptr;
795    
796 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
797 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
798 nigel 69
799 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
800     dbuffer = (unsigned char *)malloc(buffer_size);
801     pbuffer = (unsigned char *)malloc(buffer_size);
802 nigel 69
803 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
804 nigel 3
805 nigel 93 outfile = stdout;
806    
807     /* The following _setmode() stuff is some Windows magic that tells its runtime
808     library to translate CRLF into a single LF character. At least, that's what
809     I've been told: never having used Windows I take this all on trust. Originally
810     it set 0x8000, but then I was advised that _O_BINARY was better. */
811    
812 nigel 75 #if defined(_WIN32) || defined(WIN32)
813 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
814     #endif
815 nigel 75
816 nigel 3 /* Scan options */
817    
818     while (argc > 1 && argv[op][0] == '-')
819     {
820 nigel 63 unsigned char *endptr;
821 nigel 53
822 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
823     showstore = 1;
824 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
825 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
826 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
827     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
828 nigel 79 #if !defined NODFA
829 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
830 nigel 79 #endif
831 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
832 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
833     *endptr == 0))
834 nigel 53 {
835     op++;
836     argc--;
837     }
838 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
839     {
840     int both = argv[op][2] == 0;
841     int temp;
842     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
843     *endptr == 0))
844     {
845     timeitm = temp;
846     op++;
847     argc--;
848     }
849     else timeitm = LOOPREPEAT;
850     if (both) timeit = timeitm;
851     }
852 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
853     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
854     *endptr == 0))
855     {
856 nigel 93 #if defined(_WIN32) || defined(WIN32)
857 nigel 91 printf("PCRE: -S not supported on this OS\n");
858     exit(1);
859     #else
860     int rc;
861     struct rlimit rlim;
862     getrlimit(RLIMIT_STACK, &rlim);
863     rlim.rlim_cur = stack_size * 1024 * 1024;
864     rc = setrlimit(RLIMIT_STACK, &rlim);
865     if (rc != 0)
866     {
867     printf("PCRE: setrlimit() failed with error %d\n", rc);
868     exit(1);
869     }
870     op++;
871     argc--;
872     #endif
873     }
874 nigel 53 #if !defined NOPOSIX
875 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
876 nigel 53 #endif
877 nigel 63 else if (strcmp(argv[op], "-C") == 0)
878     {
879     int rc;
880     printf("PCRE version %s\n", pcre_version());
881     printf("Compiled with\n");
882     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
883     printf(" %sUTF-8 support\n", rc? "" : "No ");
884 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
885     printf(" %sUnicode properties support\n", rc? "" : "No ");
886 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
887 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
888 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
889 ph10 150 (rc == -2)? "ANYCRLF" :
890 nigel 93 (rc == -1)? "ANY" : "???");
891 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
892     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
893     "all Unicode newlines");
894 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
895     printf(" Internal link size = %d\n", rc);
896     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
897     printf(" POSIX malloc threshold = %d\n", rc);
898     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
899     printf(" Default match limit = %d\n", rc);
900 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
901     printf(" Default recursion depth limit = %d\n", rc);
902 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
903     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
904 ph10 121 goto EXIT;
905 nigel 63 }
906 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
907     strcmp(argv[op], "--help") == 0)
908     {
909     usage();
910     goto EXIT;
911     }
912 nigel 3 else
913     {
914 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
915 nigel 93 usage();
916 nigel 77 yield = 1;
917     goto EXIT;
918 nigel 3 }
919     op++;
920     argc--;
921     }
922    
923 nigel 53 /* Get the store for the offsets vector, and remember what it was */
924    
925     size_offsets_max = size_offsets;
926 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
927 nigel 53 if (offsets == NULL)
928     {
929     printf("** Failed to get %d bytes of memory for offsets vector\n",
930 ph10 151 (int)(size_offsets_max * sizeof(int)));
931 nigel 77 yield = 1;
932     goto EXIT;
933 nigel 53 }
934    
935 nigel 3 /* Sort out the input and output files */
936    
937     if (argc > 1)
938     {
939 nigel 93 infile = fopen(argv[op], INPUT_MODE);
940 nigel 3 if (infile == NULL)
941     {
942     printf("** Failed to open %s\n", argv[op]);
943 nigel 77 yield = 1;
944     goto EXIT;
945 nigel 3 }
946     }
947    
948     if (argc > 2)
949     {
950 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
951 nigel 3 if (outfile == NULL)
952     {
953     printf("** Failed to open %s\n", argv[op+1]);
954 nigel 77 yield = 1;
955     goto EXIT;
956 nigel 3 }
957     }
958    
959     /* Set alternative malloc function */
960    
961     pcre_malloc = new_malloc;
962 nigel 73 pcre_free = new_free;
963     pcre_stack_malloc = stack_malloc;
964     pcre_stack_free = stack_free;
965 nigel 3
966 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
967 nigel 3
968 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
969 nigel 3
970     /* Main loop */
971    
972 nigel 11 while (!done)
973 nigel 3 {
974     pcre *re = NULL;
975     pcre_extra *extra = NULL;
976 nigel 37
977     #if !defined NOPOSIX /* There are still compilers that require no indent */
978 nigel 3 regex_t preg;
979 nigel 45 int do_posix = 0;
980 nigel 37 #endif
981    
982 nigel 7 const char *error;
983 nigel 25 unsigned char *p, *pp, *ppp;
984 nigel 75 unsigned char *to_file = NULL;
985 nigel 53 const unsigned char *tables = NULL;
986 nigel 75 unsigned long int true_size, true_study_size = 0;
987     size_t size, regex_gotten_store;
988 nigel 3 int do_study = 0;
989 nigel 25 int do_debug = debug;
990 nigel 35 int do_G = 0;
991     int do_g = 0;
992 nigel 25 int do_showinfo = showinfo;
993 nigel 35 int do_showrest = 0;
994 nigel 75 int do_flip = 0;
995 nigel 93 int erroroffset, len, delimiter, poffset;
996 nigel 3
997 nigel 67 use_utf8 = 0;
998 ph10 211 debug_lengths = 1;
999 nigel 63
1000 nigel 3 if (infile == stdin) printf(" re> ");
1001 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
1002 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1003 nigel 63 fflush(outfile);
1004 nigel 3
1005     p = buffer;
1006     while (isspace(*p)) p++;
1007     if (*p == 0) continue;
1008    
1009 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1010 nigel 3
1011 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1012     {
1013 nigel 91 unsigned long int magic, get_options;
1014 nigel 75 uschar sbuf[8];
1015     FILE *f;
1016    
1017     p++;
1018     pp = p + (int)strlen((char *)p);
1019     while (isspace(pp[-1])) pp--;
1020     *pp = 0;
1021    
1022     f = fopen((char *)p, "rb");
1023     if (f == NULL)
1024     {
1025     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1026     continue;
1027     }
1028    
1029     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1030    
1031     true_size =
1032     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1033     true_study_size =
1034     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1035    
1036     re = (real_pcre *)new_malloc(true_size);
1037     regex_gotten_store = gotten_store;
1038    
1039     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1040    
1041     magic = ((real_pcre *)re)->magic_number;
1042     if (magic != MAGIC_NUMBER)
1043     {
1044     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1045     {
1046     do_flip = 1;
1047     }
1048     else
1049     {
1050     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1051     fclose(f);
1052     continue;
1053     }
1054     }
1055    
1056     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1057     do_flip? " (byte-inverted)" : "", p);
1058    
1059     /* Need to know if UTF-8 for printing data strings */
1060    
1061 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1062     use_utf8 = (get_options & PCRE_UTF8) != 0;
1063 nigel 75
1064     /* Now see if there is any following study data */
1065    
1066     if (true_study_size != 0)
1067     {
1068     pcre_study_data *psd;
1069    
1070     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1071     extra->flags = PCRE_EXTRA_STUDY_DATA;
1072    
1073     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1074     extra->study_data = psd;
1075    
1076     if (fread(psd, 1, true_study_size, f) != true_study_size)
1077     {
1078     FAIL_READ:
1079     fprintf(outfile, "Failed to read data from %s\n", p);
1080     if (extra != NULL) new_free(extra);
1081     if (re != NULL) new_free(re);
1082     fclose(f);
1083     continue;
1084     }
1085     fprintf(outfile, "Study data loaded from %s\n", p);
1086     do_study = 1; /* To get the data output if requested */
1087     }
1088     else fprintf(outfile, "No study data\n");
1089    
1090     fclose(f);
1091     goto SHOW_INFO;
1092     }
1093    
1094     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1095     the pattern; if is isn't complete, read more. */
1096    
1097 nigel 3 delimiter = *p++;
1098    
1099 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1100 nigel 3 {
1101 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1102 nigel 3 goto SKIP_DATA;
1103     }
1104    
1105     pp = p;
1106 nigel 93 poffset = p - buffer;
1107 nigel 3
1108     for(;;)
1109     {
1110 nigel 29 while (*pp != 0)
1111     {
1112     if (*pp == '\\' && pp[1] != 0) pp++;
1113     else if (*pp == delimiter) break;
1114     pp++;
1115     }
1116 nigel 3 if (*pp != 0) break;
1117     if (infile == stdin) printf(" > ");
1118 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1119 nigel 3 {
1120     fprintf(outfile, "** Unexpected EOF\n");
1121 nigel 11 done = 1;
1122     goto CONTINUE;
1123 nigel 3 }
1124 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1125 nigel 3 }
1126    
1127 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1128     pointer to the correct relative point in the buffer. */
1129    
1130     p = buffer + poffset;
1131    
1132 nigel 29 /* If the first character after the delimiter is backslash, make
1133     the pattern end with backslash. This is purely to provide a way
1134     of testing for the error message when a pattern ends with backslash. */
1135    
1136     if (pp[1] == '\\') *pp++ = '\\';
1137    
1138 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1139     for callouts. */
1140 nigel 3
1141     *pp++ = 0;
1142 nigel 75 strcpy((char *)pbuffer, (char *)p);
1143 nigel 3
1144     /* Look for options after final delimiter */
1145    
1146     options = 0;
1147     study_options = 0;
1148 nigel 31 log_store = showstore; /* default from command line */
1149    
1150 nigel 3 while (*pp != 0)
1151     {
1152     switch (*pp++)
1153     {
1154 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1155 nigel 35 case 'g': do_g = 1; break;
1156 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1157     case 'm': options |= PCRE_MULTILINE; break;
1158     case 's': options |= PCRE_DOTALL; break;
1159     case 'x': options |= PCRE_EXTENDED; break;
1160 nigel 25
1161 nigel 35 case '+': do_showrest = 1; break;
1162 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1163 nigel 93 case 'B': do_debug = 1; break;
1164 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1165 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1166 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1167 nigel 75 case 'F': do_flip = 1; break;
1168 nigel 35 case 'G': do_G = 1; break;
1169 nigel 25 case 'I': do_showinfo = 1; break;
1170 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1171 nigel 31 case 'M': log_store = 1; break;
1172 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1173 nigel 37
1174     #if !defined NOPOSIX
1175 nigel 3 case 'P': do_posix = 1; break;
1176 nigel 37 #endif
1177    
1178 nigel 3 case 'S': do_study = 1; break;
1179 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1180 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1181 ph10 126 case 'Z': debug_lengths = 0; break;
1182 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1183 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1184 nigel 25
1185     case 'L':
1186     ppp = pp;
1187 nigel 93 /* The '\r' test here is so that it works on Windows. */
1188     /* The '0' test is just in case this is an unterminated line. */
1189     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1190 nigel 25 *ppp = 0;
1191     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1192     {
1193     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1194     goto SKIP_DATA;
1195     }
1196 nigel 93 locale_set = 1;
1197 nigel 25 tables = pcre_maketables();
1198     pp = ppp;
1199     break;
1200    
1201 nigel 75 case '>':
1202     to_file = pp;
1203     while (*pp != 0) pp++;
1204     while (isspace(pp[-1])) pp--;
1205     *pp = 0;
1206     break;
1207    
1208 nigel 91 case '<':
1209     {
1210     int x = check_newline(pp, outfile);
1211     if (x == 0) goto SKIP_DATA;
1212     options |= x;
1213     while (*pp++ != '>');
1214     }
1215     break;
1216    
1217 nigel 77 case '\r': /* So that it works in Windows */
1218     case '\n':
1219     case ' ':
1220     break;
1221 nigel 75
1222 nigel 3 default:
1223     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1224     goto SKIP_DATA;
1225     }
1226     }
1227    
1228 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1229 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1230     local character tables. */
1231 nigel 3
1232 nigel 37 #if !defined NOPOSIX
1233 nigel 3 if (posix || do_posix)
1234     {
1235     int rc;
1236     int cflags = 0;
1237 nigel 75
1238 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1239     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1240 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1241 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1242     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1243    
1244 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1245    
1246     /* Compilation failed; go back for another re, skipping to blank line
1247     if non-interactive. */
1248    
1249     if (rc != 0)
1250     {
1251 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1252 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1253     goto SKIP_DATA;
1254     }
1255     }
1256    
1257     /* Handle compiling via the native interface */
1258    
1259     else
1260 nigel 37 #endif /* !defined NOPOSIX */
1261    
1262 nigel 3 {
1263 nigel 93 if (timeit > 0)
1264 nigel 3 {
1265     register int i;
1266     clock_t time_taken;
1267     clock_t start_time = clock();
1268 nigel 93 for (i = 0; i < timeit; i++)
1269 nigel 3 {
1270 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1271 nigel 3 if (re != NULL) free(re);
1272     }
1273     time_taken = clock() - start_time;
1274 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1275     (((double)time_taken * 1000.0) / (double)timeit) /
1276 nigel 63 (double)CLOCKS_PER_SEC);
1277 nigel 3 }
1278    
1279 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1280 nigel 3
1281     /* Compilation failed; go back for another re, skipping to blank line
1282     if non-interactive. */
1283    
1284     if (re == NULL)
1285     {
1286     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1287     SKIP_DATA:
1288     if (infile != stdin)
1289     {
1290     for (;;)
1291     {
1292 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1293 nigel 11 {
1294     done = 1;
1295     goto CONTINUE;
1296     }
1297 nigel 3 len = (int)strlen((char *)buffer);
1298     while (len > 0 && isspace(buffer[len-1])) len--;
1299     if (len == 0) break;
1300     }
1301     fprintf(outfile, "\n");
1302     }
1303 nigel 25 goto CONTINUE;
1304 nigel 3 }
1305    
1306 nigel 43 /* Compilation succeeded; print data if required. There are now two
1307     info-returning functions. The old one has a limited interface and
1308     returns only limited data. Check that it agrees with the newer one. */
1309 nigel 3
1310 nigel 63 if (log_store)
1311     fprintf(outfile, "Memory allocation (code space): %d\n",
1312     (int)(gotten_store -
1313     sizeof(real_pcre) -
1314     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1315    
1316 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1317     and remember the store that was got. */
1318    
1319     true_size = ((real_pcre *)re)->size;
1320     regex_gotten_store = gotten_store;
1321    
1322     /* If /S was present, study the regexp to generate additional info to
1323     help with the matching. */
1324    
1325     if (do_study)
1326     {
1327 nigel 93 if (timeit > 0)
1328 nigel 75 {
1329     register int i;
1330     clock_t time_taken;
1331     clock_t start_time = clock();
1332 nigel 93 for (i = 0; i < timeit; i++)
1333 nigel 75 extra = pcre_study(re, study_options, &error);
1334     time_taken = clock() - start_time;
1335     if (extra != NULL) free(extra);
1336 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1337     (((double)time_taken * 1000.0) / (double)timeit) /
1338 nigel 75 (double)CLOCKS_PER_SEC);
1339     }
1340     extra = pcre_study(re, study_options, &error);
1341     if (error != NULL)
1342     fprintf(outfile, "Failed to study: %s\n", error);
1343     else if (extra != NULL)
1344     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1345     }
1346    
1347     /* If the 'F' option was present, we flip the bytes of all the integer
1348     fields in the regex data block and the study block. This is to make it
1349     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1350     compiled on a different architecture. */
1351    
1352     if (do_flip)
1353     {
1354     real_pcre *rre = (real_pcre *)re;
1355 ph10 255 rre->magic_number =
1356     byteflip(rre->magic_number, sizeof(rre->magic_number));
1357 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1358     rre->options = byteflip(rre->options, sizeof(rre->options));
1359 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1360     rre->top_bracket =
1361     (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1362     rre->top_backref =
1363     (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1364     rre->first_byte =
1365     (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1366     rre->req_byte =
1367     (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1368     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1369 nigel 75 sizeof(rre->name_table_offset));
1370 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1371 nigel 75 sizeof(rre->name_entry_size));
1372 ph10 255 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1373     sizeof(rre->name_count));
1374 nigel 75
1375     if (extra != NULL)
1376     {
1377     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1378     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1379     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1380     }
1381     }
1382    
1383     /* Extract information from the compiled data if required */
1384    
1385     SHOW_INFO:
1386    
1387 nigel 93 if (do_debug)
1388     {
1389     fprintf(outfile, "------------------------------------------------------------------\n");
1390 ph10 116 pcre_printint(re, outfile, debug_lengths);
1391 nigel 93 }
1392    
1393 nigel 25 if (do_showinfo)
1394 nigel 3 {
1395 nigel 75 unsigned long int get_options, all_options;
1396 nigel 79 #if !defined NOINFOCHECK
1397 nigel 43 int old_first_char, old_options, old_count;
1398 nigel 79 #endif
1399 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1400 ph10 227 hascrorlf;
1401 nigel 63 int nameentrysize, namecount;
1402     const uschar *nametable;
1403 nigel 3
1404 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1405 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1406     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1407     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1408 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1409 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1410 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1411     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1412 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1413 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1414     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1415 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1416 nigel 43
1417 nigel 79 #if !defined NOINFOCHECK
1418 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1419 nigel 3 if (count < 0) fprintf(outfile,
1420 nigel 43 "Error %d from pcre_info()\n", count);
1421 nigel 3 else
1422     {
1423 nigel 43 if (old_count != count) fprintf(outfile,
1424     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1425     old_count);
1426 nigel 37
1427 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1428     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1429     first_char, old_first_char);
1430 nigel 37
1431 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1432     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1433     get_options, old_options);
1434 nigel 43 }
1435 nigel 79 #endif
1436 nigel 43
1437 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1438 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1439 nigel 77 (int)size, (int)regex_gotten_store);
1440 nigel 43
1441     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1442     if (backrefmax > 0)
1443     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1444 nigel 63
1445     if (namecount > 0)
1446     {
1447     fprintf(outfile, "Named capturing subpatterns:\n");
1448     while (namecount-- > 0)
1449     {
1450     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1451     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1452     GET2(nametable, 0));
1453     nametable += nameentrysize;
1454     }
1455     }
1456 ph10 172
1457 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1458 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1459 nigel 63
1460 nigel 75 all_options = ((real_pcre *)re)->options;
1461 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1462 nigel 75
1463 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1464 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1465 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1466     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1467     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1468     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1469 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1470 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1471 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1472     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1473 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1474     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1475     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1476 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1477 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1478 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1479     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1480 ph10 172
1481 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1482 nigel 43
1483 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1484 nigel 91 {
1485     case PCRE_NEWLINE_CR:
1486     fprintf(outfile, "Forced newline sequence: CR\n");
1487     break;
1488 nigel 43
1489 nigel 91 case PCRE_NEWLINE_LF:
1490     fprintf(outfile, "Forced newline sequence: LF\n");
1491     break;
1492    
1493     case PCRE_NEWLINE_CRLF:
1494     fprintf(outfile, "Forced newline sequence: CRLF\n");
1495     break;
1496    
1497 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1498     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1499     break;
1500    
1501 nigel 93 case PCRE_NEWLINE_ANY:
1502     fprintf(outfile, "Forced newline sequence: ANY\n");
1503     break;
1504    
1505 nigel 91 default:
1506     break;
1507     }
1508    
1509 nigel 43 if (first_char == -1)
1510     {
1511 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1512 nigel 43 }
1513     else if (first_char < 0)
1514     {
1515     fprintf(outfile, "No first char\n");
1516     }
1517     else
1518     {
1519 nigel 63 int ch = first_char & 255;
1520 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1521 nigel 63 "" : " (caseless)";
1522 nigel 93 if (PRINTHEX(ch))
1523 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1524 nigel 3 else
1525 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1526 nigel 43 }
1527 nigel 37
1528 nigel 43 if (need_char < 0)
1529     {
1530     fprintf(outfile, "No need char\n");
1531 nigel 3 }
1532 nigel 43 else
1533     {
1534 nigel 63 int ch = need_char & 255;
1535 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1536 nigel 63 "" : " (caseless)";
1537 nigel 93 if (PRINTHEX(ch))
1538 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1539 nigel 43 else
1540 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1541 nigel 43 }
1542 nigel 75
1543     /* Don't output study size; at present it is in any case a fixed
1544     value, but it varies, depending on the computer architecture, and
1545     so messes up the test suite. (And with the /F option, it might be
1546     flipped.) */
1547    
1548     if (do_study)
1549     {
1550     if (extra == NULL)
1551     fprintf(outfile, "Study returned NULL\n");
1552     else
1553     {
1554     uschar *start_bits = NULL;
1555     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1556    
1557     if (start_bits == NULL)
1558     fprintf(outfile, "No starting byte set\n");
1559     else
1560     {
1561     int i;
1562     int c = 24;
1563     fprintf(outfile, "Starting byte set: ");
1564     for (i = 0; i < 256; i++)
1565     {
1566     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1567     {
1568     if (c > 75)
1569     {
1570     fprintf(outfile, "\n ");
1571     c = 2;
1572     }
1573 nigel 93 if (PRINTHEX(i) && i != ' ')
1574 nigel 75 {
1575     fprintf(outfile, "%c ", i);
1576     c += 2;
1577     }
1578     else
1579     {
1580     fprintf(outfile, "\\x%02x ", i);
1581     c += 5;
1582     }
1583     }
1584     }
1585     fprintf(outfile, "\n");
1586     }
1587     }
1588     }
1589 nigel 3 }
1590    
1591 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1592     that is all. The first 8 bytes of the file are the regex length and then
1593     the study length, in big-endian order. */
1594 nigel 3
1595 nigel 75 if (to_file != NULL)
1596 nigel 3 {
1597 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1598     if (f == NULL)
1599 nigel 3 {
1600 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1601 nigel 3 }
1602 nigel 75 else
1603     {
1604     uschar sbuf[8];
1605 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
1606     sbuf[1] = (uschar)((true_size >> 16) & 255);
1607     sbuf[2] = (uschar)((true_size >> 8) & 255);
1608     sbuf[3] = (uschar)((true_size) & 255);
1609    
1610     sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1611     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1612     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1613     sbuf[7] = (uschar)((true_study_size) & 255);
1614 nigel 3
1615 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1616     fwrite(re, 1, true_size, f) < true_size)
1617     {
1618     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1619     }
1620 nigel 3 else
1621     {
1622 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1623     if (extra != NULL)
1624 nigel 3 {
1625 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1626     true_study_size)
1627 nigel 3 {
1628 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1629     strerror(errno));
1630 nigel 3 }
1631 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1632 nigel 93
1633 nigel 3 }
1634     }
1635 nigel 75 fclose(f);
1636 nigel 3 }
1637 nigel 77
1638     new_free(re);
1639     if (extra != NULL) new_free(extra);
1640     if (tables != NULL) new_free((void *)tables);
1641 nigel 75 continue; /* With next regex */
1642 nigel 3 }
1643 nigel 75 } /* End of non-POSIX compile */
1644 nigel 3
1645     /* Read data lines and test them */
1646    
1647     for (;;)
1648     {
1649 nigel 87 uschar *q;
1650 ph10 147 uschar *bptr;
1651 nigel 57 int *use_offsets = offsets;
1652 nigel 53 int use_size_offsets = size_offsets;
1653 nigel 63 int callout_data = 0;
1654     int callout_data_set = 0;
1655 nigel 3 int count, c;
1656 nigel 29 int copystrings = 0;
1657 nigel 63 int find_match_limit = 0;
1658 nigel 29 int getstrings = 0;
1659     int getlist = 0;
1660 nigel 39 int gmatched = 0;
1661 nigel 35 int start_offset = 0;
1662 nigel 41 int g_notempty = 0;
1663 nigel 77 int use_dfa = 0;
1664 nigel 3
1665     options = 0;
1666    
1667 nigel 91 *copynames = 0;
1668     *getnames = 0;
1669    
1670     copynamesptr = copynames;
1671     getnamesptr = getnames;
1672    
1673 nigel 63 pcre_callout = callout;
1674     first_callout = 1;
1675     callout_extra = 0;
1676     callout_count = 0;
1677     callout_fail_count = 999999;
1678     callout_fail_id = -1;
1679 nigel 73 show_malloc = 0;
1680 nigel 63
1681 nigel 91 if (extra != NULL) extra->flags &=
1682     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1683    
1684     len = 0;
1685     for (;;)
1686 nigel 11 {
1687 nigel 91 if (infile == stdin) printf("data> ");
1688     if (extend_inputline(infile, buffer + len) == NULL)
1689     {
1690     if (len > 0) break;
1691     done = 1;
1692     goto CONTINUE;
1693     }
1694     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1695     len = (int)strlen((char *)buffer);
1696     if (buffer[len-1] == '\n') break;
1697 nigel 11 }
1698 nigel 3
1699     while (len > 0 && isspace(buffer[len-1])) len--;
1700     buffer[len] = 0;
1701     if (len == 0) break;
1702    
1703     p = buffer;
1704     while (isspace(*p)) p++;
1705    
1706 ph10 147 bptr = q = dbuffer;
1707 nigel 3 while ((c = *p++) != 0)
1708     {
1709     int i = 0;
1710     int n = 0;
1711 nigel 63
1712 nigel 3 if (c == '\\') switch ((c = *p++))
1713     {
1714     case 'a': c = 7; break;
1715     case 'b': c = '\b'; break;
1716     case 'e': c = 27; break;
1717     case 'f': c = '\f'; break;
1718     case 'n': c = '\n'; break;
1719     case 'r': c = '\r'; break;
1720     case 't': c = '\t'; break;
1721     case 'v': c = '\v'; break;
1722    
1723     case '0': case '1': case '2': case '3':
1724     case '4': case '5': case '6': case '7':
1725     c -= '0';
1726     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1727     c = c * 8 + *p++ - '0';
1728 nigel 91
1729     #if !defined NOUTF8
1730     if (use_utf8 && c > 255)
1731     {
1732     unsigned char buff8[8];
1733     int ii, utn;
1734     utn = ord2utf8(c, buff8);
1735     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1736     c = buff8[ii]; /* Last byte */
1737     }
1738     #endif
1739 nigel 3 break;
1740    
1741     case 'x':
1742 nigel 49
1743     /* Handle \x{..} specially - new Perl thing for utf8 */
1744    
1745 nigel 79 #if !defined NOUTF8
1746 nigel 49 if (*p == '{')
1747     {
1748     unsigned char *pt = p;
1749     c = 0;
1750     while (isxdigit(*(++pt)))
1751     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1752     if (*pt == '}')
1753     {
1754 nigel 67 unsigned char buff8[8];
1755 nigel 49 int ii, utn;
1756 nigel 85 utn = ord2utf8(c, buff8);
1757 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1758     c = buff8[ii]; /* Last byte */
1759 nigel 49 p = pt + 1;
1760     break;
1761     }
1762     /* Not correct form; fall through */
1763     }
1764 nigel 79 #endif
1765 nigel 49
1766     /* Ordinary \x */
1767    
1768 nigel 3 c = 0;
1769     while (i++ < 2 && isxdigit(*p))
1770     {
1771     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1772     p++;
1773     }
1774     break;
1775    
1776 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1777 nigel 3 p--;
1778     continue;
1779    
1780 nigel 75 case '>':
1781     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1782     continue;
1783    
1784 nigel 3 case 'A': /* Option setting */
1785     options |= PCRE_ANCHORED;
1786     continue;
1787    
1788     case 'B':
1789     options |= PCRE_NOTBOL;
1790     continue;
1791    
1792 nigel 29 case 'C':
1793 nigel 63 if (isdigit(*p)) /* Set copy string */
1794     {
1795     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1796     copystrings |= 1 << n;
1797     }
1798     else if (isalnum(*p))
1799     {
1800 nigel 91 uschar *npp = copynamesptr;
1801 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1802 nigel 91 *npp++ = 0;
1803 nigel 67 *npp = 0;
1804 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1805 nigel 63 if (n < 0)
1806 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1807     copynamesptr = npp;
1808 nigel 63 }
1809     else if (*p == '+')
1810     {
1811     callout_extra = 1;
1812     p++;
1813     }
1814     else if (*p == '-')
1815     {
1816     pcre_callout = NULL;
1817     p++;
1818     }
1819     else if (*p == '!')
1820     {
1821     callout_fail_id = 0;
1822     p++;
1823     while(isdigit(*p))
1824     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1825     callout_fail_count = 0;
1826     if (*p == '!')
1827     {
1828     p++;
1829     while(isdigit(*p))
1830     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1831     }
1832     }
1833     else if (*p == '*')
1834     {
1835     int sign = 1;
1836     callout_data = 0;
1837     if (*(++p) == '-') { sign = -1; p++; }
1838     while(isdigit(*p))
1839     callout_data = callout_data * 10 + *p++ - '0';
1840     callout_data *= sign;
1841     callout_data_set = 1;
1842     }
1843 nigel 29 continue;
1844    
1845 nigel 79 #if !defined NODFA
1846 nigel 77 case 'D':
1847 nigel 79 #if !defined NOPOSIX
1848 nigel 77 if (posix || do_posix)
1849     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1850     else
1851 nigel 79 #endif
1852 nigel 77 use_dfa = 1;
1853     continue;
1854    
1855     case 'F':
1856     options |= PCRE_DFA_SHORTEST;
1857     continue;
1858 nigel 79 #endif
1859 nigel 77
1860 nigel 29 case 'G':
1861 nigel 63 if (isdigit(*p))
1862     {
1863     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1864     getstrings |= 1 << n;
1865     }
1866     else if (isalnum(*p))
1867     {
1868 nigel 91 uschar *npp = getnamesptr;
1869 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1870 nigel 91 *npp++ = 0;
1871 nigel 67 *npp = 0;
1872 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1873 nigel 63 if (n < 0)
1874 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1875     getnamesptr = npp;
1876 nigel 63 }
1877 nigel 29 continue;
1878    
1879     case 'L':
1880     getlist = 1;
1881     continue;
1882    
1883 nigel 63 case 'M':
1884     find_match_limit = 1;
1885     continue;
1886    
1887 nigel 37 case 'N':
1888     options |= PCRE_NOTEMPTY;
1889     continue;
1890    
1891 nigel 3 case 'O':
1892     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1893 nigel 53 if (n > size_offsets_max)
1894     {
1895     size_offsets_max = n;
1896 nigel 57 free(offsets);
1897 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1898 nigel 53 if (offsets == NULL)
1899     {
1900     printf("** Failed to get %d bytes of memory for offsets vector\n",
1901 ph10 151 (int)(size_offsets_max * sizeof(int)));
1902 nigel 77 yield = 1;
1903     goto EXIT;
1904 nigel 53 }
1905     }
1906     use_size_offsets = n;
1907 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1908 nigel 3 continue;
1909    
1910 nigel 75 case 'P':
1911     options |= PCRE_PARTIAL;
1912     continue;
1913    
1914 nigel 91 case 'Q':
1915     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1916     if (extra == NULL)
1917     {
1918     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1919     extra->flags = 0;
1920     }
1921     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1922     extra->match_limit_recursion = n;
1923     continue;
1924    
1925     case 'q':
1926     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1927     if (extra == NULL)
1928     {
1929     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1930     extra->flags = 0;
1931     }
1932     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1933     extra->match_limit = n;
1934     continue;
1935    
1936 nigel 79 #if !defined NODFA
1937 nigel 77 case 'R':
1938     options |= PCRE_DFA_RESTART;
1939     continue;
1940 nigel 79 #endif
1941 nigel 77
1942 nigel 73 case 'S':
1943     show_malloc = 1;
1944     continue;
1945    
1946 nigel 3 case 'Z':
1947     options |= PCRE_NOTEOL;
1948     continue;
1949 nigel 71
1950     case '?':
1951     options |= PCRE_NO_UTF8_CHECK;
1952     continue;
1953 nigel 91
1954     case '<':
1955     {
1956     int x = check_newline(p, outfile);
1957     if (x == 0) goto NEXT_DATA;
1958     options |= x;
1959     while (*p++ != '>');
1960     }
1961     continue;
1962 nigel 3 }
1963 nigel 9 *q++ = c;
1964 nigel 3 }
1965 nigel 9 *q = 0;
1966     len = q - dbuffer;
1967 nigel 3
1968 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1969     {
1970     printf("**Match limit not relevant for DFA matching: ignored\n");
1971     find_match_limit = 0;
1972     }
1973    
1974 nigel 3 /* Handle matching via the POSIX interface, which does not
1975 nigel 63 support timing or playing with the match limit or callout data. */
1976 nigel 3
1977 nigel 37 #if !defined NOPOSIX
1978 nigel 3 if (posix || do_posix)
1979     {
1980     int rc;
1981     int eflags = 0;
1982 nigel 63 regmatch_t *pmatch = NULL;
1983     if (use_size_offsets > 0)
1984 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1985 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1986     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1987    
1988 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1989 nigel 3
1990     if (rc != 0)
1991     {
1992 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1993 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1994     }
1995 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1996     != 0)
1997     {
1998     fprintf(outfile, "Matched with REG_NOSUB\n");
1999     }
2000 nigel 3 else
2001     {
2002 nigel 7 size_t i;
2003 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2004 nigel 3 {
2005     if (pmatch[i].rm_so >= 0)
2006     {
2007 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2008 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2009     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2010 nigel 3 fprintf(outfile, "\n");
2011 nigel 35 if (i == 0 && do_showrest)
2012     {
2013     fprintf(outfile, " 0+ ");
2014 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2015     outfile);
2016 nigel 35 fprintf(outfile, "\n");
2017     }
2018 nigel 3 }
2019     }
2020     }
2021 nigel 53 free(pmatch);
2022 nigel 3 }
2023    
2024 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2025 nigel 3
2026 nigel 37 else
2027     #endif /* !defined NOPOSIX */
2028    
2029 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2030 nigel 3 {
2031 nigel 93 if (timeitm > 0)
2032 nigel 3 {
2033     register int i;
2034     clock_t time_taken;
2035     clock_t start_time = clock();
2036 nigel 77
2037 nigel 79 #if !defined NODFA
2038 nigel 77 if (all_use_dfa || use_dfa)
2039     {
2040     int workspace[1000];
2041 nigel 93 for (i = 0; i < timeitm; i++)
2042 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2043     options | g_notempty, use_offsets, use_size_offsets, workspace,
2044     sizeof(workspace)/sizeof(int));
2045     }
2046     else
2047 nigel 79 #endif
2048 nigel 77
2049 nigel 93 for (i = 0; i < timeitm; i++)
2050 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2051 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2052 nigel 77
2053 nigel 3 time_taken = clock() - start_time;
2054 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2055     (((double)time_taken * 1000.0) / (double)timeitm) /
2056 nigel 63 (double)CLOCKS_PER_SEC);
2057 nigel 3 }
2058    
2059 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2060 nigel 87 varying limits in order to find the minimum value for the match limit and
2061     for the recursion limit. */
2062 nigel 63
2063     if (find_match_limit)
2064     {
2065     if (extra == NULL)
2066     {
2067 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2068 nigel 63 extra->flags = 0;
2069     }
2070    
2071 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2072 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2073     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2074     PCRE_ERROR_MATCHLIMIT, "match()");
2075 nigel 63
2076 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2077     options|g_notempty, use_offsets, use_size_offsets,
2078     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2079     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2080 nigel 63 }
2081    
2082     /* If callout_data is set, use the interface with additional data */
2083    
2084     else if (callout_data_set)
2085     {
2086     if (extra == NULL)
2087     {
2088 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2089 nigel 63 extra->flags = 0;
2090     }
2091     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2092 nigel 71 extra->callout_data = &callout_data;
2093 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2094     options | g_notempty, use_offsets, use_size_offsets);
2095     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2096     }
2097    
2098     /* The normal case is just to do the match once, with the default
2099     value of match_limit. */
2100    
2101 nigel 79 #if !defined NODFA
2102 nigel 77 else if (all_use_dfa || use_dfa)
2103     {
2104     int workspace[1000];
2105     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2106     options | g_notempty, use_offsets, use_size_offsets, workspace,
2107     sizeof(workspace)/sizeof(int));
2108     if (count == 0)
2109     {
2110     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2111     count = use_size_offsets/2;
2112     }
2113     }
2114 nigel 79 #endif
2115 nigel 77
2116 nigel 75 else
2117     {
2118     count = pcre_exec(re, extra, (char *)bptr, len,
2119     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2120 nigel 77 if (count == 0)
2121     {
2122     fprintf(outfile, "Matched, but too many substrings\n");
2123     count = use_size_offsets/3;
2124     }
2125 nigel 75 }
2126 nigel 3
2127 nigel 39 /* Matched */
2128    
2129 nigel 3 if (count >= 0)
2130     {
2131 nigel 93 int i, maxcount;
2132    
2133     #if !defined NODFA
2134     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2135     #endif
2136     maxcount = use_size_offsets/3;
2137    
2138     /* This is a check against a lunatic return value. */
2139    
2140     if (count > maxcount)
2141     {
2142     fprintf(outfile,
2143     "** PCRE error: returned count %d is too big for offset size %d\n",
2144     count, use_size_offsets);
2145     count = use_size_offsets/3;
2146     if (do_g || do_G)
2147     {
2148     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2149     do_g = do_G = FALSE; /* Break g/G loop */
2150     }
2151     }
2152    
2153 nigel 29 for (i = 0; i < count * 2; i += 2)
2154 nigel 3 {
2155 nigel 57 if (use_offsets[i] < 0)
2156 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2157     else
2158     {
2159     fprintf(outfile, "%2d: ", i/2);
2160 nigel 63 (void)pchars(bptr + use_offsets[i],
2161     use_offsets[i+1] - use_offsets[i], outfile);
2162 nigel 3 fprintf(outfile, "\n");
2163 nigel 35 if (i == 0)
2164     {
2165     if (do_showrest)
2166     {
2167     fprintf(outfile, " 0+ ");
2168 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2169     outfile);
2170 nigel 35 fprintf(outfile, "\n");
2171     }
2172     }
2173 nigel 3 }
2174     }
2175 nigel 29
2176     for (i = 0; i < 32; i++)
2177     {
2178     if ((copystrings & (1 << i)) != 0)
2179     {
2180 nigel 91 char copybuffer[256];
2181 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2182 nigel 37 i, copybuffer, sizeof(copybuffer));
2183 nigel 29 if (rc < 0)
2184     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2185     else
2186 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2187 nigel 29 }
2188     }
2189    
2190 nigel 91 for (copynamesptr = copynames;
2191     *copynamesptr != 0;
2192     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2193     {
2194     char copybuffer[256];
2195     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2196     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2197     if (rc < 0)
2198     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2199     else
2200     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2201     }
2202    
2203 nigel 29 for (i = 0; i < 32; i++)
2204     {
2205     if ((getstrings & (1 << i)) != 0)
2206     {
2207     const char *substring;
2208 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2209 nigel 29 i, &substring);
2210     if (rc < 0)
2211     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2212     else
2213     {
2214     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2215 nigel 49 pcre_free_substring(substring);
2216 nigel 29 }
2217     }
2218     }
2219    
2220 nigel 91 for (getnamesptr = getnames;
2221     *getnamesptr != 0;
2222     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2223     {
2224     const char *substring;
2225     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2226     count, (char *)getnamesptr, &substring);
2227     if (rc < 0)
2228     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2229     else
2230     {
2231     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2232     pcre_free_substring(substring);
2233     }
2234     }
2235    
2236 nigel 29 if (getlist)
2237     {
2238     const char **stringlist;
2239 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2240 nigel 29 &stringlist);
2241     if (rc < 0)
2242     fprintf(outfile, "get substring list failed %d\n", rc);
2243     else
2244     {
2245     for (i = 0; i < count; i++)
2246     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2247     if (stringlist[i] != NULL)
2248     fprintf(outfile, "string list not terminated by NULL\n");
2249 nigel 49 /* free((void *)stringlist); */
2250     pcre_free_substring_list(stringlist);
2251 nigel 29 }
2252     }
2253 nigel 39 }
2254 nigel 29
2255 nigel 75 /* There was a partial match */
2256    
2257     else if (count == PCRE_ERROR_PARTIAL)
2258     {
2259 nigel 77 fprintf(outfile, "Partial match");
2260 nigel 79 #if !defined NODFA
2261 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2262     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2263     bptr + use_offsets[0]);
2264 nigel 79 #endif
2265 nigel 77 fprintf(outfile, "\n");
2266 nigel 75 break; /* Out of the /g loop */
2267     }
2268    
2269 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2270 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2271     to advance the start offset, and continue. We won't be at the end of the
2272     string - that was checked before setting g_notempty.
2273 nigel 39
2274 ph10 150 Complication arises in the case when the newline option is "any" or
2275 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2276     CRLF, an advance of one character just passes the \r, whereas we should
2277     prefer the longer newline sequence, as does the code in pcre_exec().
2278     Fudge the offset value to achieve this.
2279 ph10 144
2280 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2281     character, not one byte. */
2282    
2283 nigel 3 else
2284     {
2285 nigel 41 if (g_notempty != 0)
2286 nigel 35 {
2287 nigel 73 int onechar = 1;
2288 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2289 nigel 57 use_offsets[0] = start_offset;
2290 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2291     {
2292     int d;
2293     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2294     obits = (d == '\r')? PCRE_NEWLINE_CR :
2295     (d == '\n')? PCRE_NEWLINE_LF :
2296     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2297 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2298 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2299     }
2300 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2301 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2302 ph10 149 &&
2303 ph10 143 start_offset < len - 1 &&
2304     bptr[start_offset] == '\r' &&
2305     bptr[start_offset+1] == '\n')
2306 ph10 144 onechar++;
2307 ph10 143 else if (use_utf8)
2308 nigel 73 {
2309     while (start_offset + onechar < len)
2310     {
2311     int tb = bptr[start_offset+onechar];
2312     if (tb <= 127) break;
2313     tb &= 0xc0;
2314     if (tb != 0 && tb != 0xc0) onechar++;
2315     }
2316     }
2317     use_offsets[1] = start_offset + onechar;
2318 nigel 35 }
2319 nigel 41 else
2320     {
2321 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2322 nigel 41 {
2323 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2324 nigel 41 }
2325 nigel 73 else fprintf(outfile, "Error %d\n", count);
2326 nigel 41 break; /* Out of the /g loop */
2327     }
2328 nigel 3 }
2329 nigel 35
2330 nigel 39 /* If not /g or /G we are done */
2331    
2332     if (!do_g && !do_G) break;
2333    
2334 nigel 41 /* If we have matched an empty string, first check to see if we are at
2335     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2336     what Perl's /g options does. This turns out to be rather cunning. First
2337 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2338     same point. If this fails (picked up above) we advance to the next
2339 ph10 143 character. */
2340 ph10 142
2341 nigel 41 g_notempty = 0;
2342 ph10 142
2343 nigel 57 if (use_offsets[0] == use_offsets[1])
2344 nigel 41 {
2345 nigel 57 if (use_offsets[0] == len) break;
2346 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2347 nigel 41 }
2348 nigel 39
2349     /* For /g, update the start offset, leaving the rest alone */
2350    
2351 ph10 143 if (do_g) start_offset = use_offsets[1];
2352 nigel 39
2353     /* For /G, update the pointer and length */
2354    
2355     else
2356 nigel 35 {
2357 ph10 143 bptr += use_offsets[1];
2358     len -= use_offsets[1];
2359 nigel 35 }
2360 nigel 39 } /* End of loop for /g and /G */
2361 nigel 91
2362     NEXT_DATA: continue;
2363 nigel 39 } /* End of loop for data lines */
2364 nigel 3
2365 nigel 11 CONTINUE:
2366 nigel 37
2367     #if !defined NOPOSIX
2368 nigel 3 if (posix || do_posix) regfree(&preg);
2369 nigel 37 #endif
2370    
2371 nigel 77 if (re != NULL) new_free(re);
2372     if (extra != NULL) new_free(extra);
2373 nigel 25 if (tables != NULL)
2374     {
2375 nigel 77 new_free((void *)tables);
2376 nigel 25 setlocale(LC_CTYPE, "C");
2377 nigel 93 locale_set = 0;
2378 nigel 25 }
2379 nigel 3 }
2380    
2381 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2382 nigel 77
2383     EXIT:
2384    
2385     if (infile != NULL && infile != stdin) fclose(infile);
2386     if (outfile != NULL && outfile != stdout) fclose(outfile);
2387    
2388     free(buffer);
2389     free(dbuffer);
2390     free(pbuffer);
2391     free(offsets);
2392    
2393     return yield;
2394 nigel 3 }
2395    
2396 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12