/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 240 - (hide annotations) (download)
Tue Sep 11 15:47:20 2007 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 69914 byte(s)
Refactoring to reduce the number of relocations in a shared library.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97 ph10 240 #define _pcre_utt_names utt_names
98 nigel 85 #define _pcre_OP_lengths OP_lengths
99    
100     #include "pcre_tables.c"
101    
102     /* We also need the pcre_printint() function for printing out compiled
103     patterns. This function is in a separate file so that it can be included in
104 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
105 nigel 85
106 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
107     output character as-is or as a hex value when showing compiled patterns, is
108     contained in this file. We uses it here also, in cases when the locale has not
109     been explicitly changed, so as to get consistent output from systems that
110     differ in their output from isprint() even in the "C" locale. */
111    
112 nigel 85 #include "pcre_printint.src"
113    
114 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
115 nigel 85
116 nigel 93
117 nigel 37 /* It is possible to compile this test program without including support for
118     testing the POSIX interface, though this is not available via the standard
119     Makefile. */
120    
121     #if !defined NOPOSIX
122 nigel 3 #include "pcreposix.h"
123 nigel 37 #endif
124 nigel 3
125 ph10 107 /* It is also possible, for the benefit of the version currently imported into
126     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
127     interface to the DFA matcher (NODFA), and without the doublecheck of the old
128     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
129     UTF8 support if PCRE is built without it. */
130 nigel 79
131 ph10 107 #ifndef SUPPORT_UTF8
132     #ifndef NOUTF8
133     #define NOUTF8
134     #endif
135     #endif
136 nigel 79
137 ph10 107
138 nigel 85 /* Other parameters */
139    
140 nigel 3 #ifndef CLOCKS_PER_SEC
141     #ifdef CLK_TCK
142     #define CLOCKS_PER_SEC CLK_TCK
143     #else
144     #define CLOCKS_PER_SEC 100
145     #endif
146     #endif
147    
148 nigel 93 /* This is the default loop count for timing. */
149    
150 nigel 75 #define LOOPREPEAT 500000
151 nigel 3
152 nigel 85 /* Static variables */
153    
154 nigel 3 static FILE *outfile;
155     static int log_store = 0;
156 nigel 63 static int callout_count;
157     static int callout_extra;
158     static int callout_fail_count;
159     static int callout_fail_id;
160 ph10 210 static int debug_lengths;
161 nigel 63 static int first_callout;
162 nigel 93 static int locale_set = 0;
163 nigel 73 static int show_malloc;
164 nigel 67 static int use_utf8;
165 nigel 43 static size_t gotten_store;
166 nigel 3
167 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
168    
169     static int buffer_size = 50000;
170     static uschar *buffer = NULL;
171     static uschar *dbuffer = NULL;
172 nigel 75 static uschar *pbuffer = NULL;
173 nigel 3
174 nigel 75
175 nigel 49
176     /*************************************************
177 nigel 91 * Read or extend an input line *
178     *************************************************/
179    
180     /* Input lines are read into buffer, but both patterns and data lines can be
181     continued over multiple input lines. In addition, if the buffer fills up, we
182     want to automatically expand it so as to be able to handle extremely large
183     lines that are needed for certain stress tests. When the input buffer is
184     expanded, the other two buffers must also be expanded likewise, and the
185     contents of pbuffer, which are a copy of the input for callouts, must be
186     preserved (for when expansion happens for a data line). This is not the most
187     optimal way of handling this, but hey, this is just a test program!
188    
189     Arguments:
190     f the file to read
191     start where in buffer to start (this *must* be within buffer)
192    
193     Returns: pointer to the start of new data
194     could be a copy of start, or could be moved
195     NULL if no data read and EOF reached
196     */
197    
198     static uschar *
199     extend_inputline(FILE *f, uschar *start)
200     {
201     uschar *here = start;
202    
203     for (;;)
204     {
205     int rlen = buffer_size - (here - buffer);
206 nigel 93
207 nigel 91 if (rlen > 1000)
208     {
209     int dlen;
210     if (fgets((char *)here, rlen, f) == NULL)
211     return (here == start)? NULL : start;
212     dlen = (int)strlen((char *)here);
213     if (dlen > 0 && here[dlen - 1] == '\n') return start;
214     here += dlen;
215     }
216    
217     else
218     {
219     int new_buffer_size = 2*buffer_size;
220     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
221     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
222     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
223    
224     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
225     {
226     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
227     exit(1);
228     }
229    
230     memcpy(new_buffer, buffer, buffer_size);
231     memcpy(new_pbuffer, pbuffer, buffer_size);
232    
233     buffer_size = new_buffer_size;
234    
235     start = new_buffer + (start - buffer);
236     here = new_buffer + (here - buffer);
237    
238     free(buffer);
239     free(dbuffer);
240     free(pbuffer);
241    
242     buffer = new_buffer;
243     dbuffer = new_dbuffer;
244     pbuffer = new_pbuffer;
245     }
246     }
247    
248     return NULL; /* Control never gets here */
249     }
250    
251    
252    
253    
254    
255    
256    
257     /*************************************************
258 nigel 63 * Read number from string *
259     *************************************************/
260    
261     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
262     around with conditional compilation, just do the job by hand. It is only used
263 nigel 93 for unpicking arguments, so just keep it simple.
264 nigel 63
265     Arguments:
266     str string to be converted
267     endptr where to put the end pointer
268    
269     Returns: the unsigned long
270     */
271    
272     static int
273     get_value(unsigned char *str, unsigned char **endptr)
274     {
275     int result = 0;
276     while(*str != 0 && isspace(*str)) str++;
277     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
278     *endptr = str;
279     return(result);
280     }
281    
282    
283    
284 nigel 49
285     /*************************************************
286     * Convert UTF-8 string to value *
287     *************************************************/
288    
289     /* This function takes one or more bytes that represents a UTF-8 character,
290     and returns the value of the character.
291    
292     Argument:
293 nigel 91 utf8bytes a pointer to the byte vector
294     vptr a pointer to an int to receive the value
295 nigel 49
296 nigel 91 Returns: > 0 => the number of bytes consumed
297     -6 to 0 => malformed UTF-8 character at offset = (-return)
298 nigel 49 */
299    
300 nigel 79 #if !defined NOUTF8
301    
302 nigel 67 static int
303 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
304 nigel 49 {
305 nigel 91 int c = *utf8bytes++;
306 nigel 49 int d = c;
307     int i, j, s;
308    
309     for (i = -1; i < 6; i++) /* i is number of additional bytes */
310     {
311     if ((d & 0x80) == 0) break;
312     d <<= 1;
313     }
314    
315     if (i == -1) { *vptr = c; return 1; } /* ascii character */
316     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
317    
318     /* i now has a value in the range 1-5 */
319    
320 nigel 59 s = 6*i;
321 nigel 85 d = (c & utf8_table3[i]) << s;
322 nigel 49
323     for (j = 0; j < i; j++)
324     {
325 nigel 91 c = *utf8bytes++;
326 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
327 nigel 59 s -= 6;
328 nigel 49 d |= (c & 0x3f) << s;
329     }
330    
331     /* Check that encoding was the correct unique one */
332    
333 nigel 85 for (j = 0; j < utf8_table1_size; j++)
334     if (d <= utf8_table1[j]) break;
335 nigel 49 if (j != i) return -(i+1);
336    
337     /* Valid value */
338    
339     *vptr = d;
340     return i+1;
341     }
342    
343 nigel 79 #endif
344 nigel 49
345    
346 nigel 79
347 nigel 63 /*************************************************
348 nigel 85 * Convert character value to UTF-8 *
349     *************************************************/
350    
351     /* This function takes an integer value in the range 0 - 0x7fffffff
352     and encodes it as a UTF-8 character in 0 to 6 bytes.
353    
354     Arguments:
355     cvalue the character value
356 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
357 nigel 85
358     Returns: number of characters placed in the buffer
359     */
360    
361 nigel 93 #if !defined NOUTF8
362    
363 nigel 85 static int
364 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
365 nigel 85 {
366     register int i, j;
367     for (i = 0; i < utf8_table1_size; i++)
368     if (cvalue <= utf8_table1[i]) break;
369 nigel 91 utf8bytes += i;
370 nigel 85 for (j = i; j > 0; j--)
371     {
372 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
373 nigel 85 cvalue >>= 6;
374     }
375 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
376 nigel 85 return i + 1;
377     }
378    
379 nigel 93 #endif
380 nigel 85
381    
382 nigel 93
383 nigel 85 /*************************************************
384 nigel 63 * Print character string *
385     *************************************************/
386 nigel 49
387 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
388     mode. Yields number of characters printed. If handed a NULL file, just counts
389     chars without printing. */
390 nigel 49
391 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
392 nigel 3 {
393 nigel 85 int c = 0;
394 nigel 63 int yield = 0;
395 nigel 3
396 nigel 63 while (length-- > 0)
397 nigel 3 {
398 nigel 79 #if !defined NOUTF8
399 nigel 67 if (use_utf8)
400 nigel 63 {
401     int rc = utf82ord(p, &c);
402 nigel 3
403 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
404     {
405     length -= rc - 1;
406     p += rc;
407 nigel 93 if (PRINTHEX(c))
408 nigel 63 {
409     if (f != NULL) fprintf(f, "%c", c);
410     yield++;
411     }
412     else
413     {
414 nigel 93 int n = 4;
415     if (f != NULL) fprintf(f, "\\x{%02x}", c);
416     yield += (n <= 0x000000ff)? 2 :
417     (n <= 0x00000fff)? 3 :
418     (n <= 0x0000ffff)? 4 :
419     (n <= 0x000fffff)? 5 : 6;
420 nigel 63 }
421     continue;
422     }
423     }
424 nigel 79 #endif
425 nigel 3
426 nigel 63 /* Not UTF-8, or malformed UTF-8 */
427    
428 nigel 93 c = *p++;
429     if (PRINTHEX(c))
430 nigel 3 {
431 nigel 63 if (f != NULL) fprintf(f, "%c", c);
432     yield++;
433 nigel 3 }
434 nigel 63 else
435 nigel 3 {
436 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
437     yield += 4;
438     }
439     }
440 nigel 3
441 nigel 63 return yield;
442     }
443 nigel 23
444 nigel 3
445 nigel 23
446 nigel 63 /*************************************************
447     * Callout function *
448     *************************************************/
449 nigel 3
450 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
451     the match. Yield zero unless more callouts than the fail count, or the callout
452     data is not zero. */
453 nigel 3
454 nigel 63 static int callout(pcre_callout_block *cb)
455     {
456     FILE *f = (first_callout | callout_extra)? outfile : NULL;
457 nigel 75 int i, pre_start, post_start, subject_length;
458 nigel 3
459 nigel 63 if (callout_extra)
460     {
461     fprintf(f, "Callout %d: last capture = %d\n",
462     cb->callout_number, cb->capture_last);
463 nigel 3
464 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
465     {
466     if (cb->offset_vector[i] < 0)
467     fprintf(f, "%2d: <unset>\n", i/2);
468     else
469     {
470     fprintf(f, "%2d: ", i/2);
471     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
472     cb->offset_vector[i+1] - cb->offset_vector[i], f);
473     fprintf(f, "\n");
474     }
475     }
476     }
477 nigel 3
478 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
479     datails. On subsequent calls in the same match, we use pchars just to find the
480     printed lengths of the substrings. */
481 nigel 3
482 nigel 63 if (f != NULL) fprintf(f, "--->");
483 nigel 3
484 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
485     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
486     cb->current_position - cb->start_match, f);
487 nigel 3
488 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
489    
490 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
491     cb->subject_length - cb->current_position, f);
492 nigel 3
493 nigel 63 if (f != NULL) fprintf(f, "\n");
494 nigel 9
495 nigel 63 /* Always print appropriate indicators, with callout number if not already
496 nigel 75 shown. For automatic callouts, show the pattern offset. */
497 nigel 3
498 nigel 75 if (cb->callout_number == 255)
499     {
500     fprintf(outfile, "%+3d ", cb->pattern_position);
501     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
502     }
503     else
504     {
505     if (callout_extra) fprintf(outfile, " ");
506     else fprintf(outfile, "%3d ", cb->callout_number);
507     }
508 nigel 3
509 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
510     fprintf(outfile, "^");
511 nigel 3
512 nigel 63 if (post_start > 0)
513     {
514     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
515     fprintf(outfile, "^");
516 nigel 3 }
517    
518 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
519     fprintf(outfile, " ");
520    
521     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
522     pbuffer + cb->pattern_position);
523    
524 nigel 63 fprintf(outfile, "\n");
525     first_callout = 0;
526 nigel 3
527 nigel 71 if (cb->callout_data != NULL)
528 nigel 49 {
529 nigel 71 int callout_data = *((int *)(cb->callout_data));
530     if (callout_data != 0)
531     {
532     fprintf(outfile, "Callout data = %d\n", callout_data);
533     return callout_data;
534     }
535 nigel 63 }
536 nigel 49
537 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
538     (++callout_count >= callout_fail_count)? 1 : 0;
539 nigel 3 }
540    
541    
542 nigel 63 /*************************************************
543 nigel 73 * Local malloc functions *
544 nigel 63 *************************************************/
545 nigel 3
546     /* Alternative malloc function, to test functionality and show the size of the
547     compiled re. */
548    
549     static void *new_malloc(size_t size)
550     {
551 nigel 73 void *block = malloc(size);
552 nigel 43 gotten_store = size;
553 nigel 73 if (show_malloc)
554 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
555 nigel 73 return block;
556 nigel 3 }
557    
558 nigel 73 static void new_free(void *block)
559     {
560     if (show_malloc)
561     fprintf(outfile, "free %p\n", block);
562     free(block);
563     }
564 nigel 3
565    
566 nigel 73 /* For recursion malloc/free, to test stacking calls */
567    
568     static void *stack_malloc(size_t size)
569     {
570     void *block = malloc(size);
571     if (show_malloc)
572 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
573 nigel 73 return block;
574     }
575    
576     static void stack_free(void *block)
577     {
578     if (show_malloc)
579     fprintf(outfile, "stack_free %p\n", block);
580     free(block);
581     }
582    
583    
584 nigel 63 /*************************************************
585     * Call pcre_fullinfo() *
586     *************************************************/
587 nigel 43
588     /* Get one piece of information from the pcre_fullinfo() function */
589    
590     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
591     {
592     int rc;
593     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
594     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
595     }
596    
597    
598    
599 nigel 63 /*************************************************
600 nigel 75 * Byte flipping function *
601     *************************************************/
602    
603 nigel 91 static unsigned long int
604     byteflip(unsigned long int value, int n)
605 nigel 75 {
606     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
607     return ((value & 0x000000ff) << 24) |
608     ((value & 0x0000ff00) << 8) |
609     ((value & 0x00ff0000) >> 8) |
610     ((value & 0xff000000) >> 24);
611     }
612    
613    
614    
615    
616     /*************************************************
617 nigel 87 * Check match or recursion limit *
618     *************************************************/
619    
620     static int
621     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
622     int start_offset, int options, int *use_offsets, int use_size_offsets,
623     int flag, unsigned long int *limit, int errnumber, const char *msg)
624     {
625     int count;
626     int min = 0;
627     int mid = 64;
628     int max = -1;
629    
630     extra->flags |= flag;
631    
632     for (;;)
633     {
634     *limit = mid;
635    
636     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
637     use_offsets, use_size_offsets);
638    
639     if (count == errnumber)
640     {
641     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
642     min = mid;
643     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
644     }
645    
646     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
647     count == PCRE_ERROR_PARTIAL)
648     {
649     if (mid == min + 1)
650     {
651     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
652     break;
653     }
654     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
655     max = mid;
656     mid = (min + mid)/2;
657     }
658     else break; /* Some other error */
659     }
660    
661     extra->flags &= ~flag;
662     return count;
663     }
664    
665    
666    
667     /*************************************************
668 ph10 227 * Case-independent strncmp() function *
669     *************************************************/
670    
671     /*
672     Arguments:
673     s first string
674     t second string
675     n number of characters to compare
676    
677     Returns: < 0, = 0, or > 0, according to the comparison
678     */
679    
680     static int
681     strncmpic(uschar *s, uschar *t, int n)
682     {
683     while (n--)
684     {
685     int c = tolower(*s++) - tolower(*t++);
686     if (c) return c;
687     }
688     return 0;
689     }
690    
691    
692    
693     /*************************************************
694 nigel 91 * Check newline indicator *
695     *************************************************/
696    
697     /* This is used both at compile and run-time to check for <xxx> escapes, where
698 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
699     no match.
700 nigel 91
701     Arguments:
702     p points after the leading '<'
703     f file for error message
704    
705     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
706     */
707    
708     static int
709     check_newline(uschar *p, FILE *f)
710     {
711 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
712     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
713     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
714     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
715     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
716 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
717     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
718 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
719     return 0;
720     }
721    
722    
723    
724     /*************************************************
725 nigel 93 * Usage function *
726     *************************************************/
727    
728     static void
729     usage(void)
730     {
731     printf("Usage: pcretest [options] [<input> [<output>]]\n");
732     printf(" -b show compiled code (bytecode)\n");
733     printf(" -C show PCRE compile-time options and exit\n");
734     printf(" -d debug: show compiled code and information (-b and -i)\n");
735     #if !defined NODFA
736     printf(" -dfa force DFA matching for all subjects\n");
737     #endif
738     printf(" -help show usage information\n");
739     printf(" -i show information about compiled patterns\n"
740     " -m output memory used information\n"
741     " -o <n> set size of offsets vector to <n>\n");
742     #if !defined NOPOSIX
743     printf(" -p use POSIX interface\n");
744     #endif
745     printf(" -q quiet: do not output PCRE version number at start\n");
746     printf(" -S <n> set stack size to <n> megabytes\n");
747     printf(" -s output store (memory) used information\n"
748     " -t time compilation and execution\n");
749     printf(" -t <n> time compilation and execution, repeating <n> times\n");
750     printf(" -tm time execution (matching) only\n");
751     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
752     }
753    
754    
755    
756     /*************************************************
757 nigel 63 * Main Program *
758     *************************************************/
759 nigel 43
760 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
761     consist of a regular expression, in delimiters and optionally followed by
762     options, followed by a set of test data, terminated by an empty line. */
763    
764     int main(int argc, char **argv)
765     {
766     FILE *infile = stdin;
767     int options = 0;
768     int study_options = 0;
769     int op = 1;
770     int timeit = 0;
771 nigel 93 int timeitm = 0;
772 nigel 3 int showinfo = 0;
773 nigel 31 int showstore = 0;
774 nigel 87 int quiet = 0;
775 nigel 53 int size_offsets = 45;
776     int size_offsets_max;
777 nigel 77 int *offsets = NULL;
778 nigel 53 #if !defined NOPOSIX
779 nigel 3 int posix = 0;
780 nigel 53 #endif
781 nigel 3 int debug = 0;
782 nigel 11 int done = 0;
783 nigel 77 int all_use_dfa = 0;
784     int yield = 0;
785 nigel 91 int stack_size;
786 nigel 3
787 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
788     that 1024 is plenty long enough for the few names we'll be testing. */
789 nigel 69
790 nigel 91 uschar copynames[1024];
791     uschar getnames[1024];
792    
793     uschar *copynamesptr;
794     uschar *getnamesptr;
795    
796 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
797 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
798 nigel 69
799 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
800     dbuffer = (unsigned char *)malloc(buffer_size);
801     pbuffer = (unsigned char *)malloc(buffer_size);
802 nigel 69
803 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
804 nigel 3
805 nigel 93 outfile = stdout;
806    
807     /* The following _setmode() stuff is some Windows magic that tells its runtime
808     library to translate CRLF into a single LF character. At least, that's what
809     I've been told: never having used Windows I take this all on trust. Originally
810     it set 0x8000, but then I was advised that _O_BINARY was better. */
811    
812 nigel 75 #if defined(_WIN32) || defined(WIN32)
813 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
814     #endif
815 nigel 75
816 nigel 3 /* Scan options */
817    
818     while (argc > 1 && argv[op][0] == '-')
819     {
820 nigel 63 unsigned char *endptr;
821 nigel 53
822 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
823     showstore = 1;
824 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
825 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
826 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
827     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
828 nigel 79 #if !defined NODFA
829 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
830 nigel 79 #endif
831 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
832 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
833     *endptr == 0))
834 nigel 53 {
835     op++;
836     argc--;
837     }
838 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
839     {
840     int both = argv[op][2] == 0;
841     int temp;
842     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
843     *endptr == 0))
844     {
845     timeitm = temp;
846     op++;
847     argc--;
848     }
849     else timeitm = LOOPREPEAT;
850     if (both) timeit = timeitm;
851     }
852 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
853     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
854     *endptr == 0))
855     {
856 nigel 93 #if defined(_WIN32) || defined(WIN32)
857 nigel 91 printf("PCRE: -S not supported on this OS\n");
858     exit(1);
859     #else
860     int rc;
861     struct rlimit rlim;
862     getrlimit(RLIMIT_STACK, &rlim);
863     rlim.rlim_cur = stack_size * 1024 * 1024;
864     rc = setrlimit(RLIMIT_STACK, &rlim);
865     if (rc != 0)
866     {
867     printf("PCRE: setrlimit() failed with error %d\n", rc);
868     exit(1);
869     }
870     op++;
871     argc--;
872     #endif
873     }
874 nigel 53 #if !defined NOPOSIX
875 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
876 nigel 53 #endif
877 nigel 63 else if (strcmp(argv[op], "-C") == 0)
878     {
879     int rc;
880     printf("PCRE version %s\n", pcre_version());
881     printf("Compiled with\n");
882     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
883     printf(" %sUTF-8 support\n", rc? "" : "No ");
884 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
885     printf(" %sUnicode properties support\n", rc? "" : "No ");
886 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
887 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
888 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
889 ph10 150 (rc == -2)? "ANYCRLF" :
890 nigel 93 (rc == -1)? "ANY" : "???");
891 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
892     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
893     "all Unicode newlines");
894 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
895     printf(" Internal link size = %d\n", rc);
896     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
897     printf(" POSIX malloc threshold = %d\n", rc);
898     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
899     printf(" Default match limit = %d\n", rc);
900 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
901     printf(" Default recursion depth limit = %d\n", rc);
902 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
903     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
904 ph10 121 goto EXIT;
905 nigel 63 }
906 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
907     strcmp(argv[op], "--help") == 0)
908     {
909     usage();
910     goto EXIT;
911     }
912 nigel 3 else
913     {
914 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
915 nigel 93 usage();
916 nigel 77 yield = 1;
917     goto EXIT;
918 nigel 3 }
919     op++;
920     argc--;
921     }
922    
923 nigel 53 /* Get the store for the offsets vector, and remember what it was */
924    
925     size_offsets_max = size_offsets;
926 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
927 nigel 53 if (offsets == NULL)
928     {
929     printf("** Failed to get %d bytes of memory for offsets vector\n",
930 ph10 151 (int)(size_offsets_max * sizeof(int)));
931 nigel 77 yield = 1;
932     goto EXIT;
933 nigel 53 }
934    
935 nigel 3 /* Sort out the input and output files */
936    
937     if (argc > 1)
938     {
939 nigel 93 infile = fopen(argv[op], INPUT_MODE);
940 nigel 3 if (infile == NULL)
941     {
942     printf("** Failed to open %s\n", argv[op]);
943 nigel 77 yield = 1;
944     goto EXIT;
945 nigel 3 }
946     }
947    
948     if (argc > 2)
949     {
950 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
951 nigel 3 if (outfile == NULL)
952     {
953     printf("** Failed to open %s\n", argv[op+1]);
954 nigel 77 yield = 1;
955     goto EXIT;
956 nigel 3 }
957     }
958    
959     /* Set alternative malloc function */
960    
961     pcre_malloc = new_malloc;
962 nigel 73 pcre_free = new_free;
963     pcre_stack_malloc = stack_malloc;
964     pcre_stack_free = stack_free;
965 nigel 3
966 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
967 nigel 3
968 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
969 nigel 3
970     /* Main loop */
971    
972 nigel 11 while (!done)
973 nigel 3 {
974     pcre *re = NULL;
975     pcre_extra *extra = NULL;
976 nigel 37
977     #if !defined NOPOSIX /* There are still compilers that require no indent */
978 nigel 3 regex_t preg;
979 nigel 45 int do_posix = 0;
980 nigel 37 #endif
981    
982 nigel 7 const char *error;
983 nigel 25 unsigned char *p, *pp, *ppp;
984 nigel 75 unsigned char *to_file = NULL;
985 nigel 53 const unsigned char *tables = NULL;
986 nigel 75 unsigned long int true_size, true_study_size = 0;
987     size_t size, regex_gotten_store;
988 nigel 3 int do_study = 0;
989 nigel 25 int do_debug = debug;
990 nigel 35 int do_G = 0;
991     int do_g = 0;
992 nigel 25 int do_showinfo = showinfo;
993 nigel 35 int do_showrest = 0;
994 nigel 75 int do_flip = 0;
995 nigel 93 int erroroffset, len, delimiter, poffset;
996 nigel 3
997 nigel 67 use_utf8 = 0;
998 ph10 211 debug_lengths = 1;
999 nigel 63
1000 nigel 3 if (infile == stdin) printf(" re> ");
1001 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
1002 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1003 nigel 63 fflush(outfile);
1004 nigel 3
1005     p = buffer;
1006     while (isspace(*p)) p++;
1007     if (*p == 0) continue;
1008    
1009 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1010 nigel 3
1011 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1012     {
1013 nigel 91 unsigned long int magic, get_options;
1014 nigel 75 uschar sbuf[8];
1015     FILE *f;
1016    
1017     p++;
1018     pp = p + (int)strlen((char *)p);
1019     while (isspace(pp[-1])) pp--;
1020     *pp = 0;
1021    
1022     f = fopen((char *)p, "rb");
1023     if (f == NULL)
1024     {
1025     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1026     continue;
1027     }
1028    
1029     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1030    
1031     true_size =
1032     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1033     true_study_size =
1034     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1035    
1036     re = (real_pcre *)new_malloc(true_size);
1037     regex_gotten_store = gotten_store;
1038    
1039     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1040    
1041     magic = ((real_pcre *)re)->magic_number;
1042     if (magic != MAGIC_NUMBER)
1043     {
1044     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1045     {
1046     do_flip = 1;
1047     }
1048     else
1049     {
1050     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1051     fclose(f);
1052     continue;
1053     }
1054     }
1055    
1056     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1057     do_flip? " (byte-inverted)" : "", p);
1058    
1059     /* Need to know if UTF-8 for printing data strings */
1060    
1061 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1062     use_utf8 = (get_options & PCRE_UTF8) != 0;
1063 nigel 75
1064     /* Now see if there is any following study data */
1065    
1066     if (true_study_size != 0)
1067     {
1068     pcre_study_data *psd;
1069    
1070     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1071     extra->flags = PCRE_EXTRA_STUDY_DATA;
1072    
1073     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1074     extra->study_data = psd;
1075    
1076     if (fread(psd, 1, true_study_size, f) != true_study_size)
1077     {
1078     FAIL_READ:
1079     fprintf(outfile, "Failed to read data from %s\n", p);
1080     if (extra != NULL) new_free(extra);
1081     if (re != NULL) new_free(re);
1082     fclose(f);
1083     continue;
1084     }
1085     fprintf(outfile, "Study data loaded from %s\n", p);
1086     do_study = 1; /* To get the data output if requested */
1087     }
1088     else fprintf(outfile, "No study data\n");
1089    
1090     fclose(f);
1091     goto SHOW_INFO;
1092     }
1093    
1094     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1095     the pattern; if is isn't complete, read more. */
1096    
1097 nigel 3 delimiter = *p++;
1098    
1099 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1100 nigel 3 {
1101 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1102 nigel 3 goto SKIP_DATA;
1103     }
1104    
1105     pp = p;
1106 nigel 93 poffset = p - buffer;
1107 nigel 3
1108     for(;;)
1109     {
1110 nigel 29 while (*pp != 0)
1111     {
1112     if (*pp == '\\' && pp[1] != 0) pp++;
1113     else if (*pp == delimiter) break;
1114     pp++;
1115     }
1116 nigel 3 if (*pp != 0) break;
1117     if (infile == stdin) printf(" > ");
1118 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1119 nigel 3 {
1120     fprintf(outfile, "** Unexpected EOF\n");
1121 nigel 11 done = 1;
1122     goto CONTINUE;
1123 nigel 3 }
1124 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1125 nigel 3 }
1126    
1127 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1128     pointer to the correct relative point in the buffer. */
1129    
1130     p = buffer + poffset;
1131    
1132 nigel 29 /* If the first character after the delimiter is backslash, make
1133     the pattern end with backslash. This is purely to provide a way
1134     of testing for the error message when a pattern ends with backslash. */
1135    
1136     if (pp[1] == '\\') *pp++ = '\\';
1137    
1138 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1139     for callouts. */
1140 nigel 3
1141     *pp++ = 0;
1142 nigel 75 strcpy((char *)pbuffer, (char *)p);
1143 nigel 3
1144     /* Look for options after final delimiter */
1145    
1146     options = 0;
1147     study_options = 0;
1148 nigel 31 log_store = showstore; /* default from command line */
1149    
1150 nigel 3 while (*pp != 0)
1151     {
1152     switch (*pp++)
1153     {
1154 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1155 nigel 35 case 'g': do_g = 1; break;
1156 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1157     case 'm': options |= PCRE_MULTILINE; break;
1158     case 's': options |= PCRE_DOTALL; break;
1159     case 'x': options |= PCRE_EXTENDED; break;
1160 nigel 25
1161 nigel 35 case '+': do_showrest = 1; break;
1162 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1163 nigel 93 case 'B': do_debug = 1; break;
1164 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1165 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1166 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1167 nigel 75 case 'F': do_flip = 1; break;
1168 nigel 35 case 'G': do_G = 1; break;
1169 nigel 25 case 'I': do_showinfo = 1; break;
1170 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1171 nigel 31 case 'M': log_store = 1; break;
1172 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1173 nigel 37
1174     #if !defined NOPOSIX
1175 nigel 3 case 'P': do_posix = 1; break;
1176 nigel 37 #endif
1177    
1178 nigel 3 case 'S': do_study = 1; break;
1179 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1180 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1181 ph10 126 case 'Z': debug_lengths = 0; break;
1182 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1183 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1184 nigel 25
1185     case 'L':
1186     ppp = pp;
1187 nigel 93 /* The '\r' test here is so that it works on Windows. */
1188     /* The '0' test is just in case this is an unterminated line. */
1189     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1190 nigel 25 *ppp = 0;
1191     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1192     {
1193     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1194     goto SKIP_DATA;
1195     }
1196 nigel 93 locale_set = 1;
1197 nigel 25 tables = pcre_maketables();
1198     pp = ppp;
1199     break;
1200    
1201 nigel 75 case '>':
1202     to_file = pp;
1203     while (*pp != 0) pp++;
1204     while (isspace(pp[-1])) pp--;
1205     *pp = 0;
1206     break;
1207    
1208 nigel 91 case '<':
1209     {
1210     int x = check_newline(pp, outfile);
1211     if (x == 0) goto SKIP_DATA;
1212     options |= x;
1213     while (*pp++ != '>');
1214     }
1215     break;
1216    
1217 nigel 77 case '\r': /* So that it works in Windows */
1218     case '\n':
1219     case ' ':
1220     break;
1221 nigel 75
1222 nigel 3 default:
1223     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1224     goto SKIP_DATA;
1225     }
1226     }
1227    
1228 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1229 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1230     local character tables. */
1231 nigel 3
1232 nigel 37 #if !defined NOPOSIX
1233 nigel 3 if (posix || do_posix)
1234     {
1235     int rc;
1236     int cflags = 0;
1237 nigel 75
1238 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1239     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1240 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1241 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1242     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1243    
1244 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1245    
1246     /* Compilation failed; go back for another re, skipping to blank line
1247     if non-interactive. */
1248    
1249     if (rc != 0)
1250     {
1251 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1252 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1253     goto SKIP_DATA;
1254     }
1255     }
1256    
1257     /* Handle compiling via the native interface */
1258    
1259     else
1260 nigel 37 #endif /* !defined NOPOSIX */
1261    
1262 nigel 3 {
1263 nigel 93 if (timeit > 0)
1264 nigel 3 {
1265     register int i;
1266     clock_t time_taken;
1267     clock_t start_time = clock();
1268 nigel 93 for (i = 0; i < timeit; i++)
1269 nigel 3 {
1270 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1271 nigel 3 if (re != NULL) free(re);
1272     }
1273     time_taken = clock() - start_time;
1274 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1275     (((double)time_taken * 1000.0) / (double)timeit) /
1276 nigel 63 (double)CLOCKS_PER_SEC);
1277 nigel 3 }
1278    
1279 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1280 nigel 3
1281     /* Compilation failed; go back for another re, skipping to blank line
1282     if non-interactive. */
1283    
1284     if (re == NULL)
1285     {
1286     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1287     SKIP_DATA:
1288     if (infile != stdin)
1289     {
1290     for (;;)
1291     {
1292 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1293 nigel 11 {
1294     done = 1;
1295     goto CONTINUE;
1296     }
1297 nigel 3 len = (int)strlen((char *)buffer);
1298     while (len > 0 && isspace(buffer[len-1])) len--;
1299     if (len == 0) break;
1300     }
1301     fprintf(outfile, "\n");
1302     }
1303 nigel 25 goto CONTINUE;
1304 nigel 3 }
1305    
1306 nigel 43 /* Compilation succeeded; print data if required. There are now two
1307     info-returning functions. The old one has a limited interface and
1308     returns only limited data. Check that it agrees with the newer one. */
1309 nigel 3
1310 nigel 63 if (log_store)
1311     fprintf(outfile, "Memory allocation (code space): %d\n",
1312     (int)(gotten_store -
1313     sizeof(real_pcre) -
1314     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1315    
1316 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1317     and remember the store that was got. */
1318    
1319     true_size = ((real_pcre *)re)->size;
1320     regex_gotten_store = gotten_store;
1321    
1322     /* If /S was present, study the regexp to generate additional info to
1323     help with the matching. */
1324    
1325     if (do_study)
1326     {
1327 nigel 93 if (timeit > 0)
1328 nigel 75 {
1329     register int i;
1330     clock_t time_taken;
1331     clock_t start_time = clock();
1332 nigel 93 for (i = 0; i < timeit; i++)
1333 nigel 75 extra = pcre_study(re, study_options, &error);
1334     time_taken = clock() - start_time;
1335     if (extra != NULL) free(extra);
1336 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1337     (((double)time_taken * 1000.0) / (double)timeit) /
1338 nigel 75 (double)CLOCKS_PER_SEC);
1339     }
1340     extra = pcre_study(re, study_options, &error);
1341     if (error != NULL)
1342     fprintf(outfile, "Failed to study: %s\n", error);
1343     else if (extra != NULL)
1344     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1345     }
1346    
1347     /* If the 'F' option was present, we flip the bytes of all the integer
1348     fields in the regex data block and the study block. This is to make it
1349     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1350     compiled on a different architecture. */
1351    
1352     if (do_flip)
1353     {
1354     real_pcre *rre = (real_pcre *)re;
1355     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1356     rre->size = byteflip(rre->size, sizeof(rre->size));
1357     rre->options = byteflip(rre->options, sizeof(rre->options));
1358 ph10 231 rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1359 nigel 75 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1360     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1361     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1362     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1363     rre->name_table_offset = byteflip(rre->name_table_offset,
1364     sizeof(rre->name_table_offset));
1365     rre->name_entry_size = byteflip(rre->name_entry_size,
1366     sizeof(rre->name_entry_size));
1367     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1368    
1369     if (extra != NULL)
1370     {
1371     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1372     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1373     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1374     }
1375     }
1376    
1377     /* Extract information from the compiled data if required */
1378    
1379     SHOW_INFO:
1380    
1381 nigel 93 if (do_debug)
1382     {
1383     fprintf(outfile, "------------------------------------------------------------------\n");
1384 ph10 116 pcre_printint(re, outfile, debug_lengths);
1385 nigel 93 }
1386    
1387 nigel 25 if (do_showinfo)
1388 nigel 3 {
1389 nigel 75 unsigned long int get_options, all_options;
1390 nigel 79 #if !defined NOINFOCHECK
1391 nigel 43 int old_first_char, old_options, old_count;
1392 nigel 79 #endif
1393 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1394 ph10 227 hascrorlf;
1395 nigel 63 int nameentrysize, namecount;
1396     const uschar *nametable;
1397 nigel 3
1398 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1399 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1400     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1401     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1402 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1403 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1404 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1405     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1406 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1407 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1408     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1409 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1410 nigel 43
1411 nigel 79 #if !defined NOINFOCHECK
1412 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1413 nigel 3 if (count < 0) fprintf(outfile,
1414 nigel 43 "Error %d from pcre_info()\n", count);
1415 nigel 3 else
1416     {
1417 nigel 43 if (old_count != count) fprintf(outfile,
1418     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1419     old_count);
1420 nigel 37
1421 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1422     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1423     first_char, old_first_char);
1424 nigel 37
1425 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1426     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1427     get_options, old_options);
1428 nigel 43 }
1429 nigel 79 #endif
1430 nigel 43
1431 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1432 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1433 nigel 77 (int)size, (int)regex_gotten_store);
1434 nigel 43
1435     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1436     if (backrefmax > 0)
1437     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1438 nigel 63
1439     if (namecount > 0)
1440     {
1441     fprintf(outfile, "Named capturing subpatterns:\n");
1442     while (namecount-- > 0)
1443     {
1444     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1445     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1446     GET2(nametable, 0));
1447     nametable += nameentrysize;
1448     }
1449     }
1450 ph10 172
1451 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1452 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1453 nigel 63
1454 nigel 75 all_options = ((real_pcre *)re)->options;
1455 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1456 nigel 75
1457 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1458 ph10 231 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1459 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1460     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1461     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1462     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1463 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1464 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1465 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1466     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1467 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1468     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1469     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1470 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1471 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1472 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1473     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1474 ph10 172
1475 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1476 nigel 43
1477 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1478 nigel 91 {
1479     case PCRE_NEWLINE_CR:
1480     fprintf(outfile, "Forced newline sequence: CR\n");
1481     break;
1482 nigel 43
1483 nigel 91 case PCRE_NEWLINE_LF:
1484     fprintf(outfile, "Forced newline sequence: LF\n");
1485     break;
1486    
1487     case PCRE_NEWLINE_CRLF:
1488     fprintf(outfile, "Forced newline sequence: CRLF\n");
1489     break;
1490    
1491 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1492     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1493     break;
1494    
1495 nigel 93 case PCRE_NEWLINE_ANY:
1496     fprintf(outfile, "Forced newline sequence: ANY\n");
1497     break;
1498    
1499 nigel 91 default:
1500     break;
1501     }
1502    
1503 nigel 43 if (first_char == -1)
1504     {
1505 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1506 nigel 43 }
1507     else if (first_char < 0)
1508     {
1509     fprintf(outfile, "No first char\n");
1510     }
1511     else
1512     {
1513 nigel 63 int ch = first_char & 255;
1514 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1515 nigel 63 "" : " (caseless)";
1516 nigel 93 if (PRINTHEX(ch))
1517 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1518 nigel 3 else
1519 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1520 nigel 43 }
1521 nigel 37
1522 nigel 43 if (need_char < 0)
1523     {
1524     fprintf(outfile, "No need char\n");
1525 nigel 3 }
1526 nigel 43 else
1527     {
1528 nigel 63 int ch = need_char & 255;
1529 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1530 nigel 63 "" : " (caseless)";
1531 nigel 93 if (PRINTHEX(ch))
1532 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1533 nigel 43 else
1534 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1535 nigel 43 }
1536 nigel 75
1537     /* Don't output study size; at present it is in any case a fixed
1538     value, but it varies, depending on the computer architecture, and
1539     so messes up the test suite. (And with the /F option, it might be
1540     flipped.) */
1541    
1542     if (do_study)
1543     {
1544     if (extra == NULL)
1545     fprintf(outfile, "Study returned NULL\n");
1546     else
1547     {
1548     uschar *start_bits = NULL;
1549     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1550    
1551     if (start_bits == NULL)
1552     fprintf(outfile, "No starting byte set\n");
1553     else
1554     {
1555     int i;
1556     int c = 24;
1557     fprintf(outfile, "Starting byte set: ");
1558     for (i = 0; i < 256; i++)
1559     {
1560     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1561     {
1562     if (c > 75)
1563     {
1564     fprintf(outfile, "\n ");
1565     c = 2;
1566     }
1567 nigel 93 if (PRINTHEX(i) && i != ' ')
1568 nigel 75 {
1569     fprintf(outfile, "%c ", i);
1570     c += 2;
1571     }
1572     else
1573     {
1574     fprintf(outfile, "\\x%02x ", i);
1575     c += 5;
1576     }
1577     }
1578     }
1579     fprintf(outfile, "\n");
1580     }
1581     }
1582     }
1583 nigel 3 }
1584    
1585 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1586     that is all. The first 8 bytes of the file are the regex length and then
1587     the study length, in big-endian order. */
1588 nigel 3
1589 nigel 75 if (to_file != NULL)
1590 nigel 3 {
1591 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1592     if (f == NULL)
1593 nigel 3 {
1594 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1595 nigel 3 }
1596 nigel 75 else
1597     {
1598     uschar sbuf[8];
1599     sbuf[0] = (true_size >> 24) & 255;
1600     sbuf[1] = (true_size >> 16) & 255;
1601     sbuf[2] = (true_size >> 8) & 255;
1602     sbuf[3] = (true_size) & 255;
1603 nigel 3
1604 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1605     sbuf[5] = (true_study_size >> 16) & 255;
1606     sbuf[6] = (true_study_size >> 8) & 255;
1607     sbuf[7] = (true_study_size) & 255;
1608 nigel 3
1609 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1610     fwrite(re, 1, true_size, f) < true_size)
1611     {
1612     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1613     }
1614 nigel 3 else
1615     {
1616 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1617     if (extra != NULL)
1618 nigel 3 {
1619 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1620     true_study_size)
1621 nigel 3 {
1622 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1623     strerror(errno));
1624 nigel 3 }
1625 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1626 nigel 93
1627 nigel 3 }
1628     }
1629 nigel 75 fclose(f);
1630 nigel 3 }
1631 nigel 77
1632     new_free(re);
1633     if (extra != NULL) new_free(extra);
1634     if (tables != NULL) new_free((void *)tables);
1635 nigel 75 continue; /* With next regex */
1636 nigel 3 }
1637 nigel 75 } /* End of non-POSIX compile */
1638 nigel 3
1639     /* Read data lines and test them */
1640    
1641     for (;;)
1642     {
1643 nigel 87 uschar *q;
1644 ph10 147 uschar *bptr;
1645 nigel 57 int *use_offsets = offsets;
1646 nigel 53 int use_size_offsets = size_offsets;
1647 nigel 63 int callout_data = 0;
1648     int callout_data_set = 0;
1649 nigel 3 int count, c;
1650 nigel 29 int copystrings = 0;
1651 nigel 63 int find_match_limit = 0;
1652 nigel 29 int getstrings = 0;
1653     int getlist = 0;
1654 nigel 39 int gmatched = 0;
1655 nigel 35 int start_offset = 0;
1656 nigel 41 int g_notempty = 0;
1657 nigel 77 int use_dfa = 0;
1658 nigel 3
1659     options = 0;
1660    
1661 nigel 91 *copynames = 0;
1662     *getnames = 0;
1663    
1664     copynamesptr = copynames;
1665     getnamesptr = getnames;
1666    
1667 nigel 63 pcre_callout = callout;
1668     first_callout = 1;
1669     callout_extra = 0;
1670     callout_count = 0;
1671     callout_fail_count = 999999;
1672     callout_fail_id = -1;
1673 nigel 73 show_malloc = 0;
1674 nigel 63
1675 nigel 91 if (extra != NULL) extra->flags &=
1676     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1677    
1678     len = 0;
1679     for (;;)
1680 nigel 11 {
1681 nigel 91 if (infile == stdin) printf("data> ");
1682     if (extend_inputline(infile, buffer + len) == NULL)
1683     {
1684     if (len > 0) break;
1685     done = 1;
1686     goto CONTINUE;
1687     }
1688     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1689     len = (int)strlen((char *)buffer);
1690     if (buffer[len-1] == '\n') break;
1691 nigel 11 }
1692 nigel 3
1693     while (len > 0 && isspace(buffer[len-1])) len--;
1694     buffer[len] = 0;
1695     if (len == 0) break;
1696    
1697     p = buffer;
1698     while (isspace(*p)) p++;
1699    
1700 ph10 147 bptr = q = dbuffer;
1701 nigel 3 while ((c = *p++) != 0)
1702     {
1703     int i = 0;
1704     int n = 0;
1705 nigel 63
1706 nigel 3 if (c == '\\') switch ((c = *p++))
1707     {
1708     case 'a': c = 7; break;
1709     case 'b': c = '\b'; break;
1710     case 'e': c = 27; break;
1711     case 'f': c = '\f'; break;
1712     case 'n': c = '\n'; break;
1713     case 'r': c = '\r'; break;
1714     case 't': c = '\t'; break;
1715     case 'v': c = '\v'; break;
1716    
1717     case '0': case '1': case '2': case '3':
1718     case '4': case '5': case '6': case '7':
1719     c -= '0';
1720     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1721     c = c * 8 + *p++ - '0';
1722 nigel 91
1723     #if !defined NOUTF8
1724     if (use_utf8 && c > 255)
1725     {
1726     unsigned char buff8[8];
1727     int ii, utn;
1728     utn = ord2utf8(c, buff8);
1729     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1730     c = buff8[ii]; /* Last byte */
1731     }
1732     #endif
1733 nigel 3 break;
1734    
1735     case 'x':
1736 nigel 49
1737     /* Handle \x{..} specially - new Perl thing for utf8 */
1738    
1739 nigel 79 #if !defined NOUTF8
1740 nigel 49 if (*p == '{')
1741     {
1742     unsigned char *pt = p;
1743     c = 0;
1744     while (isxdigit(*(++pt)))
1745     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1746     if (*pt == '}')
1747     {
1748 nigel 67 unsigned char buff8[8];
1749 nigel 49 int ii, utn;
1750 nigel 85 utn = ord2utf8(c, buff8);
1751 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1752     c = buff8[ii]; /* Last byte */
1753 nigel 49 p = pt + 1;
1754     break;
1755     }
1756     /* Not correct form; fall through */
1757     }
1758 nigel 79 #endif
1759 nigel 49
1760     /* Ordinary \x */
1761    
1762 nigel 3 c = 0;
1763     while (i++ < 2 && isxdigit(*p))
1764     {
1765     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1766     p++;
1767     }
1768     break;
1769    
1770 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1771 nigel 3 p--;
1772     continue;
1773    
1774 nigel 75 case '>':
1775     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1776     continue;
1777    
1778 nigel 3 case 'A': /* Option setting */
1779     options |= PCRE_ANCHORED;
1780     continue;
1781    
1782     case 'B':
1783     options |= PCRE_NOTBOL;
1784     continue;
1785    
1786 nigel 29 case 'C':
1787 nigel 63 if (isdigit(*p)) /* Set copy string */
1788     {
1789     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1790     copystrings |= 1 << n;
1791     }
1792     else if (isalnum(*p))
1793     {
1794 nigel 91 uschar *npp = copynamesptr;
1795 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1796 nigel 91 *npp++ = 0;
1797 nigel 67 *npp = 0;
1798 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1799 nigel 63 if (n < 0)
1800 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1801     copynamesptr = npp;
1802 nigel 63 }
1803     else if (*p == '+')
1804     {
1805     callout_extra = 1;
1806     p++;
1807     }
1808     else if (*p == '-')
1809     {
1810     pcre_callout = NULL;
1811     p++;
1812     }
1813     else if (*p == '!')
1814     {
1815     callout_fail_id = 0;
1816     p++;
1817     while(isdigit(*p))
1818     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1819     callout_fail_count = 0;
1820     if (*p == '!')
1821     {
1822     p++;
1823     while(isdigit(*p))
1824     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1825     }
1826     }
1827     else if (*p == '*')
1828     {
1829     int sign = 1;
1830     callout_data = 0;
1831     if (*(++p) == '-') { sign = -1; p++; }
1832     while(isdigit(*p))
1833     callout_data = callout_data * 10 + *p++ - '0';
1834     callout_data *= sign;
1835     callout_data_set = 1;
1836     }
1837 nigel 29 continue;
1838    
1839 nigel 79 #if !defined NODFA
1840 nigel 77 case 'D':
1841 nigel 79 #if !defined NOPOSIX
1842 nigel 77 if (posix || do_posix)
1843     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1844     else
1845 nigel 79 #endif
1846 nigel 77 use_dfa = 1;
1847     continue;
1848    
1849     case 'F':
1850     options |= PCRE_DFA_SHORTEST;
1851     continue;
1852 nigel 79 #endif
1853 nigel 77
1854 nigel 29 case 'G':
1855 nigel 63 if (isdigit(*p))
1856     {
1857     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1858     getstrings |= 1 << n;
1859     }
1860     else if (isalnum(*p))
1861     {
1862 nigel 91 uschar *npp = getnamesptr;
1863 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1864 nigel 91 *npp++ = 0;
1865 nigel 67 *npp = 0;
1866 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1867 nigel 63 if (n < 0)
1868 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1869     getnamesptr = npp;
1870 nigel 63 }
1871 nigel 29 continue;
1872    
1873     case 'L':
1874     getlist = 1;
1875     continue;
1876    
1877 nigel 63 case 'M':
1878     find_match_limit = 1;
1879     continue;
1880    
1881 nigel 37 case 'N':
1882     options |= PCRE_NOTEMPTY;
1883     continue;
1884    
1885 nigel 3 case 'O':
1886     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1887 nigel 53 if (n > size_offsets_max)
1888     {
1889     size_offsets_max = n;
1890 nigel 57 free(offsets);
1891 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1892 nigel 53 if (offsets == NULL)
1893     {
1894     printf("** Failed to get %d bytes of memory for offsets vector\n",
1895 ph10 151 (int)(size_offsets_max * sizeof(int)));
1896 nigel 77 yield = 1;
1897     goto EXIT;
1898 nigel 53 }
1899     }
1900     use_size_offsets = n;
1901 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1902 nigel 3 continue;
1903    
1904 nigel 75 case 'P':
1905     options |= PCRE_PARTIAL;
1906     continue;
1907    
1908 nigel 91 case 'Q':
1909     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1910     if (extra == NULL)
1911     {
1912     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1913     extra->flags = 0;
1914     }
1915     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1916     extra->match_limit_recursion = n;
1917     continue;
1918    
1919     case 'q':
1920     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1921     if (extra == NULL)
1922     {
1923     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1924     extra->flags = 0;
1925     }
1926     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1927     extra->match_limit = n;
1928     continue;
1929    
1930 nigel 79 #if !defined NODFA
1931 nigel 77 case 'R':
1932     options |= PCRE_DFA_RESTART;
1933     continue;
1934 nigel 79 #endif
1935 nigel 77
1936 nigel 73 case 'S':
1937     show_malloc = 1;
1938     continue;
1939    
1940 nigel 3 case 'Z':
1941     options |= PCRE_NOTEOL;
1942     continue;
1943 nigel 71
1944     case '?':
1945     options |= PCRE_NO_UTF8_CHECK;
1946     continue;
1947 nigel 91
1948     case '<':
1949     {
1950     int x = check_newline(p, outfile);
1951     if (x == 0) goto NEXT_DATA;
1952     options |= x;
1953     while (*p++ != '>');
1954     }
1955     continue;
1956 nigel 3 }
1957 nigel 9 *q++ = c;
1958 nigel 3 }
1959 nigel 9 *q = 0;
1960     len = q - dbuffer;
1961 nigel 3
1962 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1963     {
1964     printf("**Match limit not relevant for DFA matching: ignored\n");
1965     find_match_limit = 0;
1966     }
1967    
1968 nigel 3 /* Handle matching via the POSIX interface, which does not
1969 nigel 63 support timing or playing with the match limit or callout data. */
1970 nigel 3
1971 nigel 37 #if !defined NOPOSIX
1972 nigel 3 if (posix || do_posix)
1973     {
1974     int rc;
1975     int eflags = 0;
1976 nigel 63 regmatch_t *pmatch = NULL;
1977     if (use_size_offsets > 0)
1978 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1979 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1980     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1981    
1982 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1983 nigel 3
1984     if (rc != 0)
1985     {
1986 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1987 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1988     }
1989 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1990     != 0)
1991     {
1992     fprintf(outfile, "Matched with REG_NOSUB\n");
1993     }
1994 nigel 3 else
1995     {
1996 nigel 7 size_t i;
1997 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1998 nigel 3 {
1999     if (pmatch[i].rm_so >= 0)
2000     {
2001 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2002 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2003     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2004 nigel 3 fprintf(outfile, "\n");
2005 nigel 35 if (i == 0 && do_showrest)
2006     {
2007     fprintf(outfile, " 0+ ");
2008 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2009     outfile);
2010 nigel 35 fprintf(outfile, "\n");
2011     }
2012 nigel 3 }
2013     }
2014     }
2015 nigel 53 free(pmatch);
2016 nigel 3 }
2017    
2018 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2019 nigel 3
2020 nigel 37 else
2021     #endif /* !defined NOPOSIX */
2022    
2023 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2024 nigel 3 {
2025 nigel 93 if (timeitm > 0)
2026 nigel 3 {
2027     register int i;
2028     clock_t time_taken;
2029     clock_t start_time = clock();
2030 nigel 77
2031 nigel 79 #if !defined NODFA
2032 nigel 77 if (all_use_dfa || use_dfa)
2033     {
2034     int workspace[1000];
2035 nigel 93 for (i = 0; i < timeitm; i++)
2036 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2037     options | g_notempty, use_offsets, use_size_offsets, workspace,
2038     sizeof(workspace)/sizeof(int));
2039     }
2040     else
2041 nigel 79 #endif
2042 nigel 77
2043 nigel 93 for (i = 0; i < timeitm; i++)
2044 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2045 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2046 nigel 77
2047 nigel 3 time_taken = clock() - start_time;
2048 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2049     (((double)time_taken * 1000.0) / (double)timeitm) /
2050 nigel 63 (double)CLOCKS_PER_SEC);
2051 nigel 3 }
2052    
2053 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2054 nigel 87 varying limits in order to find the minimum value for the match limit and
2055     for the recursion limit. */
2056 nigel 63
2057     if (find_match_limit)
2058     {
2059     if (extra == NULL)
2060     {
2061 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2062 nigel 63 extra->flags = 0;
2063     }
2064    
2065 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2066 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2067     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2068     PCRE_ERROR_MATCHLIMIT, "match()");
2069 nigel 63
2070 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2071     options|g_notempty, use_offsets, use_size_offsets,
2072     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2073     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2074 nigel 63 }
2075    
2076     /* If callout_data is set, use the interface with additional data */
2077    
2078     else if (callout_data_set)
2079     {
2080     if (extra == NULL)
2081     {
2082 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2083 nigel 63 extra->flags = 0;
2084     }
2085     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2086 nigel 71 extra->callout_data = &callout_data;
2087 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2088     options | g_notempty, use_offsets, use_size_offsets);
2089     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2090     }
2091    
2092     /* The normal case is just to do the match once, with the default
2093     value of match_limit. */
2094    
2095 nigel 79 #if !defined NODFA
2096 nigel 77 else if (all_use_dfa || use_dfa)
2097     {
2098     int workspace[1000];
2099     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2100     options | g_notempty, use_offsets, use_size_offsets, workspace,
2101     sizeof(workspace)/sizeof(int));
2102     if (count == 0)
2103     {
2104     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2105     count = use_size_offsets/2;
2106     }
2107     }
2108 nigel 79 #endif
2109 nigel 77
2110 nigel 75 else
2111     {
2112     count = pcre_exec(re, extra, (char *)bptr, len,
2113     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2114 nigel 77 if (count == 0)
2115     {
2116     fprintf(outfile, "Matched, but too many substrings\n");
2117     count = use_size_offsets/3;
2118     }
2119 nigel 75 }
2120 nigel 3
2121 nigel 39 /* Matched */
2122    
2123 nigel 3 if (count >= 0)
2124     {
2125 nigel 93 int i, maxcount;
2126    
2127     #if !defined NODFA
2128     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2129     #endif
2130     maxcount = use_size_offsets/3;
2131    
2132     /* This is a check against a lunatic return value. */
2133    
2134     if (count > maxcount)
2135     {
2136     fprintf(outfile,
2137     "** PCRE error: returned count %d is too big for offset size %d\n",
2138     count, use_size_offsets);
2139     count = use_size_offsets/3;
2140     if (do_g || do_G)
2141     {
2142     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2143     do_g = do_G = FALSE; /* Break g/G loop */
2144     }
2145     }
2146    
2147 nigel 29 for (i = 0; i < count * 2; i += 2)
2148 nigel 3 {
2149 nigel 57 if (use_offsets[i] < 0)
2150 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2151     else
2152     {
2153     fprintf(outfile, "%2d: ", i/2);
2154 nigel 63 (void)pchars(bptr + use_offsets[i],
2155     use_offsets[i+1] - use_offsets[i], outfile);
2156 nigel 3 fprintf(outfile, "\n");
2157 nigel 35 if (i == 0)
2158     {
2159     if (do_showrest)
2160     {
2161     fprintf(outfile, " 0+ ");
2162 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2163     outfile);
2164 nigel 35 fprintf(outfile, "\n");
2165     }
2166     }
2167 nigel 3 }
2168     }
2169 nigel 29
2170     for (i = 0; i < 32; i++)
2171     {
2172     if ((copystrings & (1 << i)) != 0)
2173     {
2174 nigel 91 char copybuffer[256];
2175 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2176 nigel 37 i, copybuffer, sizeof(copybuffer));
2177 nigel 29 if (rc < 0)
2178     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2179     else
2180 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2181 nigel 29 }
2182     }
2183    
2184 nigel 91 for (copynamesptr = copynames;
2185     *copynamesptr != 0;
2186     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2187     {
2188     char copybuffer[256];
2189     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2190     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2191     if (rc < 0)
2192     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2193     else
2194     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2195     }
2196    
2197 nigel 29 for (i = 0; i < 32; i++)
2198     {
2199     if ((getstrings & (1 << i)) != 0)
2200     {
2201     const char *substring;
2202 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2203 nigel 29 i, &substring);
2204     if (rc < 0)
2205     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2206     else
2207     {
2208     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2209 nigel 49 pcre_free_substring(substring);
2210 nigel 29 }
2211     }
2212     }
2213    
2214 nigel 91 for (getnamesptr = getnames;
2215     *getnamesptr != 0;
2216     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2217     {
2218     const char *substring;
2219     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2220     count, (char *)getnamesptr, &substring);
2221     if (rc < 0)
2222     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2223     else
2224     {
2225     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2226     pcre_free_substring(substring);
2227     }
2228     }
2229    
2230 nigel 29 if (getlist)
2231     {
2232     const char **stringlist;
2233 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2234 nigel 29 &stringlist);
2235     if (rc < 0)
2236     fprintf(outfile, "get substring list failed %d\n", rc);
2237     else
2238     {
2239     for (i = 0; i < count; i++)
2240     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2241     if (stringlist[i] != NULL)
2242     fprintf(outfile, "string list not terminated by NULL\n");
2243 nigel 49 /* free((void *)stringlist); */
2244     pcre_free_substring_list(stringlist);
2245 nigel 29 }
2246     }
2247 nigel 39 }
2248 nigel 29
2249 nigel 75 /* There was a partial match */
2250    
2251     else if (count == PCRE_ERROR_PARTIAL)
2252     {
2253 nigel 77 fprintf(outfile, "Partial match");
2254 nigel 79 #if !defined NODFA
2255 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2256     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2257     bptr + use_offsets[0]);
2258 nigel 79 #endif
2259 nigel 77 fprintf(outfile, "\n");
2260 nigel 75 break; /* Out of the /g loop */
2261     }
2262    
2263 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2264 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2265     to advance the start offset, and continue. We won't be at the end of the
2266     string - that was checked before setting g_notempty.
2267 nigel 39
2268 ph10 150 Complication arises in the case when the newline option is "any" or
2269 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2270     CRLF, an advance of one character just passes the \r, whereas we should
2271     prefer the longer newline sequence, as does the code in pcre_exec().
2272     Fudge the offset value to achieve this.
2273 ph10 144
2274 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2275     character, not one byte. */
2276    
2277 nigel 3 else
2278     {
2279 nigel 41 if (g_notempty != 0)
2280 nigel 35 {
2281 nigel 73 int onechar = 1;
2282 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2283 nigel 57 use_offsets[0] = start_offset;
2284 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2285     {
2286     int d;
2287     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2288     obits = (d == '\r')? PCRE_NEWLINE_CR :
2289     (d == '\n')? PCRE_NEWLINE_LF :
2290     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2291 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2292 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2293     }
2294 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2295 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2296 ph10 149 &&
2297 ph10 143 start_offset < len - 1 &&
2298     bptr[start_offset] == '\r' &&
2299     bptr[start_offset+1] == '\n')
2300 ph10 144 onechar++;
2301 ph10 143 else if (use_utf8)
2302 nigel 73 {
2303     while (start_offset + onechar < len)
2304     {
2305     int tb = bptr[start_offset+onechar];
2306     if (tb <= 127) break;
2307     tb &= 0xc0;
2308     if (tb != 0 && tb != 0xc0) onechar++;
2309     }
2310     }
2311     use_offsets[1] = start_offset + onechar;
2312 nigel 35 }
2313 nigel 41 else
2314     {
2315 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2316 nigel 41 {
2317 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2318 nigel 41 }
2319 nigel 73 else fprintf(outfile, "Error %d\n", count);
2320 nigel 41 break; /* Out of the /g loop */
2321     }
2322 nigel 3 }
2323 nigel 35
2324 nigel 39 /* If not /g or /G we are done */
2325    
2326     if (!do_g && !do_G) break;
2327    
2328 nigel 41 /* If we have matched an empty string, first check to see if we are at
2329     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2330     what Perl's /g options does. This turns out to be rather cunning. First
2331 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2332     same point. If this fails (picked up above) we advance to the next
2333 ph10 143 character. */
2334 ph10 142
2335 nigel 41 g_notempty = 0;
2336 ph10 142
2337 nigel 57 if (use_offsets[0] == use_offsets[1])
2338 nigel 41 {
2339 nigel 57 if (use_offsets[0] == len) break;
2340 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2341 nigel 41 }
2342 nigel 39
2343     /* For /g, update the start offset, leaving the rest alone */
2344    
2345 ph10 143 if (do_g) start_offset = use_offsets[1];
2346 nigel 39
2347     /* For /G, update the pointer and length */
2348    
2349     else
2350 nigel 35 {
2351 ph10 143 bptr += use_offsets[1];
2352     len -= use_offsets[1];
2353 nigel 35 }
2354 nigel 39 } /* End of loop for /g and /G */
2355 nigel 91
2356     NEXT_DATA: continue;
2357 nigel 39 } /* End of loop for data lines */
2358 nigel 3
2359 nigel 11 CONTINUE:
2360 nigel 37
2361     #if !defined NOPOSIX
2362 nigel 3 if (posix || do_posix) regfree(&preg);
2363 nigel 37 #endif
2364    
2365 nigel 77 if (re != NULL) new_free(re);
2366     if (extra != NULL) new_free(extra);
2367 nigel 25 if (tables != NULL)
2368     {
2369 nigel 77 new_free((void *)tables);
2370 nigel 25 setlocale(LC_CTYPE, "C");
2371 nigel 93 locale_set = 0;
2372 nigel 25 }
2373 nigel 3 }
2374    
2375 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2376 nigel 77
2377     EXIT:
2378    
2379     if (infile != NULL && infile != stdin) fclose(infile);
2380     if (outfile != NULL && outfile != stdout) fclose(outfile);
2381    
2382     free(buffer);
2383     free(dbuffer);
2384     free(pbuffer);
2385     free(offsets);
2386    
2387     return yield;
2388 nigel 3 }
2389    
2390 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12