/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 150 - (hide annotations) (download)
Tue Apr 17 08:22:40 2007 UTC (7 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 68571 byte(s)
Update HTML documentation.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 93
48     /* A number of things vary for Windows builds. Originally, pcretest opened its
49     input and output without "b"; then I was told that "b" was needed in some
50     environments, so it was added for release 5.0 to both the input and output. (It
51     makes no difference on Unix-like systems.) Later I was told that it is wrong
52     for the input on Windows. I've now abstracted the modes into two macros that
53     are set here, to make it easier to fiddle with them, and removed "b" from the
54     input mode under Windows. */
55    
56     #if defined(_WIN32) || defined(WIN32)
57     #include <io.h> /* For _setmode() */
58     #include <fcntl.h> /* For _O_BINARY */
59     #define INPUT_MODE "r"
60     #define OUTPUT_MODE "wb"
61    
62     #else
63     #include <sys/time.h> /* These two includes are needed */
64     #include <sys/resource.h> /* for setrlimit(). */
65     #define INPUT_MODE "rb"
66     #define OUTPUT_MODE "wb"
67 nigel 91 #endif
68    
69 nigel 93
70 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
71     displaying the results of pcre_study() and we also need to know about the
72     internal macros, structures, and other internal data values; pcretest has
73     "inside information" compared to a program that strictly follows the PCRE API.
74 nigel 37
75 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
76     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77     appropriately for an application, not for building PCRE. */
78 nigel 77
79 ph10 145 #include "pcre.h"
80 nigel 77 #include "pcre_internal.h"
81    
82 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
83     two copies, we include the source file here, changing the names of the external
84     symbols to prevent clashes. */
85 nigel 77
86 nigel 85 #define _pcre_utf8_table1 utf8_table1
87     #define _pcre_utf8_table1_size utf8_table1_size
88     #define _pcre_utf8_table2 utf8_table2
89     #define _pcre_utf8_table3 utf8_table3
90     #define _pcre_utf8_table4 utf8_table4
91     #define _pcre_utt utt
92     #define _pcre_utt_size utt_size
93     #define _pcre_OP_lengths OP_lengths
94    
95     #include "pcre_tables.c"
96    
97     /* We also need the pcre_printint() function for printing out compiled
98     patterns. This function is in a separate file so that it can be included in
99 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
100 nigel 85
101 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
102     output character as-is or as a hex value when showing compiled patterns, is
103     contained in this file. We uses it here also, in cases when the locale has not
104     been explicitly changed, so as to get consistent output from systems that
105     differ in their output from isprint() even in the "C" locale. */
106    
107 nigel 85 #include "pcre_printint.src"
108    
109 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110 nigel 85
111 nigel 93
112 nigel 37 /* It is possible to compile this test program without including support for
113     testing the POSIX interface, though this is not available via the standard
114     Makefile. */
115    
116     #if !defined NOPOSIX
117 nigel 3 #include "pcreposix.h"
118 nigel 37 #endif
119 nigel 3
120 ph10 107 /* It is also possible, for the benefit of the version currently imported into
121     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122     interface to the DFA matcher (NODFA), and without the doublecheck of the old
123     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124     UTF8 support if PCRE is built without it. */
125 nigel 79
126 ph10 107 #ifndef SUPPORT_UTF8
127     #ifndef NOUTF8
128     #define NOUTF8
129     #endif
130     #endif
131 nigel 79
132 ph10 107
133 nigel 85 /* Other parameters */
134    
135 nigel 3 #ifndef CLOCKS_PER_SEC
136     #ifdef CLK_TCK
137     #define CLOCKS_PER_SEC CLK_TCK
138     #else
139     #define CLOCKS_PER_SEC 100
140     #endif
141     #endif
142    
143 nigel 93 /* This is the default loop count for timing. */
144    
145 nigel 75 #define LOOPREPEAT 500000
146 nigel 3
147 nigel 85 /* Static variables */
148    
149 nigel 3 static FILE *outfile;
150     static int log_store = 0;
151 nigel 63 static int callout_count;
152     static int callout_extra;
153     static int callout_fail_count;
154     static int callout_fail_id;
155     static int first_callout;
156 nigel 93 static int locale_set = 0;
157 nigel 73 static int show_malloc;
158 nigel 67 static int use_utf8;
159 nigel 43 static size_t gotten_store;
160 nigel 3
161 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
162    
163     static int buffer_size = 50000;
164     static uschar *buffer = NULL;
165     static uschar *dbuffer = NULL;
166 nigel 75 static uschar *pbuffer = NULL;
167 nigel 3
168 nigel 75
169 nigel 49
170     /*************************************************
171 nigel 91 * Read or extend an input line *
172     *************************************************/
173    
174     /* Input lines are read into buffer, but both patterns and data lines can be
175     continued over multiple input lines. In addition, if the buffer fills up, we
176     want to automatically expand it so as to be able to handle extremely large
177     lines that are needed for certain stress tests. When the input buffer is
178     expanded, the other two buffers must also be expanded likewise, and the
179     contents of pbuffer, which are a copy of the input for callouts, must be
180     preserved (for when expansion happens for a data line). This is not the most
181     optimal way of handling this, but hey, this is just a test program!
182    
183     Arguments:
184     f the file to read
185     start where in buffer to start (this *must* be within buffer)
186    
187     Returns: pointer to the start of new data
188     could be a copy of start, or could be moved
189     NULL if no data read and EOF reached
190     */
191    
192     static uschar *
193     extend_inputline(FILE *f, uschar *start)
194     {
195     uschar *here = start;
196    
197     for (;;)
198     {
199     int rlen = buffer_size - (here - buffer);
200 nigel 93
201 nigel 91 if (rlen > 1000)
202     {
203     int dlen;
204     if (fgets((char *)here, rlen, f) == NULL)
205     return (here == start)? NULL : start;
206     dlen = (int)strlen((char *)here);
207     if (dlen > 0 && here[dlen - 1] == '\n') return start;
208     here += dlen;
209     }
210    
211     else
212     {
213     int new_buffer_size = 2*buffer_size;
214     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217    
218     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219     {
220     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221     exit(1);
222     }
223    
224     memcpy(new_buffer, buffer, buffer_size);
225     memcpy(new_pbuffer, pbuffer, buffer_size);
226    
227     buffer_size = new_buffer_size;
228    
229     start = new_buffer + (start - buffer);
230     here = new_buffer + (here - buffer);
231    
232     free(buffer);
233     free(dbuffer);
234     free(pbuffer);
235    
236     buffer = new_buffer;
237     dbuffer = new_dbuffer;
238     pbuffer = new_pbuffer;
239     }
240     }
241    
242     return NULL; /* Control never gets here */
243     }
244    
245    
246    
247    
248    
249    
250    
251     /*************************************************
252 nigel 63 * Read number from string *
253     *************************************************/
254    
255     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256     around with conditional compilation, just do the job by hand. It is only used
257 nigel 93 for unpicking arguments, so just keep it simple.
258 nigel 63
259     Arguments:
260     str string to be converted
261     endptr where to put the end pointer
262    
263     Returns: the unsigned long
264     */
265    
266     static int
267     get_value(unsigned char *str, unsigned char **endptr)
268     {
269     int result = 0;
270     while(*str != 0 && isspace(*str)) str++;
271     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272     *endptr = str;
273     return(result);
274     }
275    
276    
277    
278 nigel 49
279     /*************************************************
280     * Convert UTF-8 string to value *
281     *************************************************/
282    
283     /* This function takes one or more bytes that represents a UTF-8 character,
284     and returns the value of the character.
285    
286     Argument:
287 nigel 91 utf8bytes a pointer to the byte vector
288     vptr a pointer to an int to receive the value
289 nigel 49
290 nigel 91 Returns: > 0 => the number of bytes consumed
291     -6 to 0 => malformed UTF-8 character at offset = (-return)
292 nigel 49 */
293    
294 nigel 79 #if !defined NOUTF8
295    
296 nigel 67 static int
297 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
298 nigel 49 {
299 nigel 91 int c = *utf8bytes++;
300 nigel 49 int d = c;
301     int i, j, s;
302    
303     for (i = -1; i < 6; i++) /* i is number of additional bytes */
304     {
305     if ((d & 0x80) == 0) break;
306     d <<= 1;
307     }
308    
309     if (i == -1) { *vptr = c; return 1; } /* ascii character */
310     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
311    
312     /* i now has a value in the range 1-5 */
313    
314 nigel 59 s = 6*i;
315 nigel 85 d = (c & utf8_table3[i]) << s;
316 nigel 49
317     for (j = 0; j < i; j++)
318     {
319 nigel 91 c = *utf8bytes++;
320 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
321 nigel 59 s -= 6;
322 nigel 49 d |= (c & 0x3f) << s;
323     }
324    
325     /* Check that encoding was the correct unique one */
326    
327 nigel 85 for (j = 0; j < utf8_table1_size; j++)
328     if (d <= utf8_table1[j]) break;
329 nigel 49 if (j != i) return -(i+1);
330    
331     /* Valid value */
332    
333     *vptr = d;
334     return i+1;
335     }
336    
337 nigel 79 #endif
338 nigel 49
339    
340 nigel 79
341 nigel 63 /*************************************************
342 nigel 85 * Convert character value to UTF-8 *
343     *************************************************/
344    
345     /* This function takes an integer value in the range 0 - 0x7fffffff
346     and encodes it as a UTF-8 character in 0 to 6 bytes.
347    
348     Arguments:
349     cvalue the character value
350 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
351 nigel 85
352     Returns: number of characters placed in the buffer
353     */
354    
355 nigel 93 #if !defined NOUTF8
356    
357 nigel 85 static int
358 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
359 nigel 85 {
360     register int i, j;
361     for (i = 0; i < utf8_table1_size; i++)
362     if (cvalue <= utf8_table1[i]) break;
363 nigel 91 utf8bytes += i;
364 nigel 85 for (j = i; j > 0; j--)
365     {
366 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367 nigel 85 cvalue >>= 6;
368     }
369 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
370 nigel 85 return i + 1;
371     }
372    
373 nigel 93 #endif
374 nigel 85
375    
376 nigel 93
377 nigel 85 /*************************************************
378 nigel 63 * Print character string *
379     *************************************************/
380 nigel 49
381 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
382     mode. Yields number of characters printed. If handed a NULL file, just counts
383     chars without printing. */
384 nigel 49
385 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
386 nigel 3 {
387 nigel 85 int c = 0;
388 nigel 63 int yield = 0;
389 nigel 3
390 nigel 63 while (length-- > 0)
391 nigel 3 {
392 nigel 79 #if !defined NOUTF8
393 nigel 67 if (use_utf8)
394 nigel 63 {
395     int rc = utf82ord(p, &c);
396 nigel 3
397 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
398     {
399     length -= rc - 1;
400     p += rc;
401 nigel 93 if (PRINTHEX(c))
402 nigel 63 {
403     if (f != NULL) fprintf(f, "%c", c);
404     yield++;
405     }
406     else
407     {
408 nigel 93 int n = 4;
409     if (f != NULL) fprintf(f, "\\x{%02x}", c);
410     yield += (n <= 0x000000ff)? 2 :
411     (n <= 0x00000fff)? 3 :
412     (n <= 0x0000ffff)? 4 :
413     (n <= 0x000fffff)? 5 : 6;
414 nigel 63 }
415     continue;
416     }
417     }
418 nigel 79 #endif
419 nigel 3
420 nigel 63 /* Not UTF-8, or malformed UTF-8 */
421    
422 nigel 93 c = *p++;
423     if (PRINTHEX(c))
424 nigel 3 {
425 nigel 63 if (f != NULL) fprintf(f, "%c", c);
426     yield++;
427 nigel 3 }
428 nigel 63 else
429 nigel 3 {
430 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
431     yield += 4;
432     }
433     }
434 nigel 3
435 nigel 63 return yield;
436     }
437 nigel 23
438 nigel 3
439 nigel 23
440 nigel 63 /*************************************************
441     * Callout function *
442     *************************************************/
443 nigel 3
444 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
445     the match. Yield zero unless more callouts than the fail count, or the callout
446     data is not zero. */
447 nigel 3
448 nigel 63 static int callout(pcre_callout_block *cb)
449     {
450     FILE *f = (first_callout | callout_extra)? outfile : NULL;
451 nigel 75 int i, pre_start, post_start, subject_length;
452 nigel 3
453 nigel 63 if (callout_extra)
454     {
455     fprintf(f, "Callout %d: last capture = %d\n",
456     cb->callout_number, cb->capture_last);
457 nigel 3
458 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
459     {
460     if (cb->offset_vector[i] < 0)
461     fprintf(f, "%2d: <unset>\n", i/2);
462     else
463     {
464     fprintf(f, "%2d: ", i/2);
465     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466     cb->offset_vector[i+1] - cb->offset_vector[i], f);
467     fprintf(f, "\n");
468     }
469     }
470     }
471 nigel 3
472 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
473     datails. On subsequent calls in the same match, we use pchars just to find the
474     printed lengths of the substrings. */
475 nigel 3
476 nigel 63 if (f != NULL) fprintf(f, "--->");
477 nigel 3
478 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480     cb->current_position - cb->start_match, f);
481 nigel 3
482 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483    
484 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485     cb->subject_length - cb->current_position, f);
486 nigel 3
487 nigel 63 if (f != NULL) fprintf(f, "\n");
488 nigel 9
489 nigel 63 /* Always print appropriate indicators, with callout number if not already
490 nigel 75 shown. For automatic callouts, show the pattern offset. */
491 nigel 3
492 nigel 75 if (cb->callout_number == 255)
493     {
494     fprintf(outfile, "%+3d ", cb->pattern_position);
495     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
496     }
497     else
498     {
499     if (callout_extra) fprintf(outfile, " ");
500     else fprintf(outfile, "%3d ", cb->callout_number);
501     }
502 nigel 3
503 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504     fprintf(outfile, "^");
505 nigel 3
506 nigel 63 if (post_start > 0)
507     {
508     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509     fprintf(outfile, "^");
510 nigel 3 }
511    
512 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513     fprintf(outfile, " ");
514    
515     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516     pbuffer + cb->pattern_position);
517    
518 nigel 63 fprintf(outfile, "\n");
519     first_callout = 0;
520 nigel 3
521 nigel 71 if (cb->callout_data != NULL)
522 nigel 49 {
523 nigel 71 int callout_data = *((int *)(cb->callout_data));
524     if (callout_data != 0)
525     {
526     fprintf(outfile, "Callout data = %d\n", callout_data);
527     return callout_data;
528     }
529 nigel 63 }
530 nigel 49
531 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
532     (++callout_count >= callout_fail_count)? 1 : 0;
533 nigel 3 }
534    
535    
536 nigel 63 /*************************************************
537 nigel 73 * Local malloc functions *
538 nigel 63 *************************************************/
539 nigel 3
540     /* Alternative malloc function, to test functionality and show the size of the
541     compiled re. */
542    
543     static void *new_malloc(size_t size)
544     {
545 nigel 73 void *block = malloc(size);
546 nigel 43 gotten_store = size;
547 nigel 73 if (show_malloc)
548 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
549 nigel 73 return block;
550 nigel 3 }
551    
552 nigel 73 static void new_free(void *block)
553     {
554     if (show_malloc)
555     fprintf(outfile, "free %p\n", block);
556     free(block);
557     }
558 nigel 3
559    
560 nigel 73 /* For recursion malloc/free, to test stacking calls */
561    
562     static void *stack_malloc(size_t size)
563     {
564     void *block = malloc(size);
565     if (show_malloc)
566 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567 nigel 73 return block;
568     }
569    
570     static void stack_free(void *block)
571     {
572     if (show_malloc)
573     fprintf(outfile, "stack_free %p\n", block);
574     free(block);
575     }
576    
577    
578 nigel 63 /*************************************************
579     * Call pcre_fullinfo() *
580     *************************************************/
581 nigel 43
582     /* Get one piece of information from the pcre_fullinfo() function */
583    
584     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
585     {
586     int rc;
587     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
588     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
589     }
590    
591    
592    
593 nigel 63 /*************************************************
594 nigel 75 * Byte flipping function *
595     *************************************************/
596    
597 nigel 91 static unsigned long int
598     byteflip(unsigned long int value, int n)
599 nigel 75 {
600     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601     return ((value & 0x000000ff) << 24) |
602     ((value & 0x0000ff00) << 8) |
603     ((value & 0x00ff0000) >> 8) |
604     ((value & 0xff000000) >> 24);
605     }
606    
607    
608    
609    
610     /*************************************************
611 nigel 87 * Check match or recursion limit *
612     *************************************************/
613    
614     static int
615     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616     int start_offset, int options, int *use_offsets, int use_size_offsets,
617     int flag, unsigned long int *limit, int errnumber, const char *msg)
618     {
619     int count;
620     int min = 0;
621     int mid = 64;
622     int max = -1;
623    
624     extra->flags |= flag;
625    
626     for (;;)
627     {
628     *limit = mid;
629    
630     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631     use_offsets, use_size_offsets);
632    
633     if (count == errnumber)
634     {
635     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636     min = mid;
637     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638     }
639    
640     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641     count == PCRE_ERROR_PARTIAL)
642     {
643     if (mid == min + 1)
644     {
645     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646     break;
647     }
648     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649     max = mid;
650     mid = (min + mid)/2;
651     }
652     else break; /* Some other error */
653     }
654    
655     extra->flags &= ~flag;
656     return count;
657     }
658    
659    
660    
661     /*************************************************
662 nigel 91 * Check newline indicator *
663     *************************************************/
664    
665     /* This is used both at compile and run-time to check for <xxx> escapes, where
666 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667     no match.
668 nigel 91
669     Arguments:
670     p points after the leading '<'
671     f file for error message
672    
673     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
674     */
675    
676     static int
677     check_newline(uschar *p, FILE *f)
678     {
679     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682 ph10 149 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
685     return 0;
686     }
687    
688    
689    
690     /*************************************************
691 nigel 93 * Usage function *
692     *************************************************/
693    
694     static void
695     usage(void)
696     {
697     printf("Usage: pcretest [options] [<input> [<output>]]\n");
698     printf(" -b show compiled code (bytecode)\n");
699     printf(" -C show PCRE compile-time options and exit\n");
700     printf(" -d debug: show compiled code and information (-b and -i)\n");
701     #if !defined NODFA
702     printf(" -dfa force DFA matching for all subjects\n");
703     #endif
704     printf(" -help show usage information\n");
705     printf(" -i show information about compiled patterns\n"
706     " -m output memory used information\n"
707     " -o <n> set size of offsets vector to <n>\n");
708     #if !defined NOPOSIX
709     printf(" -p use POSIX interface\n");
710     #endif
711     printf(" -q quiet: do not output PCRE version number at start\n");
712     printf(" -S <n> set stack size to <n> megabytes\n");
713     printf(" -s output store (memory) used information\n"
714     " -t time compilation and execution\n");
715     printf(" -t <n> time compilation and execution, repeating <n> times\n");
716     printf(" -tm time execution (matching) only\n");
717     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
718     }
719    
720    
721    
722     /*************************************************
723 nigel 63 * Main Program *
724     *************************************************/
725 nigel 43
726 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
727     consist of a regular expression, in delimiters and optionally followed by
728     options, followed by a set of test data, terminated by an empty line. */
729    
730     int main(int argc, char **argv)
731     {
732     FILE *infile = stdin;
733     int options = 0;
734     int study_options = 0;
735     int op = 1;
736     int timeit = 0;
737 nigel 93 int timeitm = 0;
738 nigel 3 int showinfo = 0;
739 nigel 31 int showstore = 0;
740 nigel 87 int quiet = 0;
741 nigel 53 int size_offsets = 45;
742     int size_offsets_max;
743 nigel 77 int *offsets = NULL;
744 nigel 53 #if !defined NOPOSIX
745 nigel 3 int posix = 0;
746 nigel 53 #endif
747 nigel 3 int debug = 0;
748 nigel 11 int done = 0;
749 nigel 77 int all_use_dfa = 0;
750     int yield = 0;
751 nigel 91 int stack_size;
752 nigel 3
753 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
754     that 1024 is plenty long enough for the few names we'll be testing. */
755 nigel 69
756 nigel 91 uschar copynames[1024];
757     uschar getnames[1024];
758    
759     uschar *copynamesptr;
760     uschar *getnamesptr;
761    
762 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
763 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
764 nigel 69
765 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
766     dbuffer = (unsigned char *)malloc(buffer_size);
767     pbuffer = (unsigned char *)malloc(buffer_size);
768 nigel 69
769 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
770 nigel 3
771 nigel 93 outfile = stdout;
772    
773     /* The following _setmode() stuff is some Windows magic that tells its runtime
774     library to translate CRLF into a single LF character. At least, that's what
775     I've been told: never having used Windows I take this all on trust. Originally
776     it set 0x8000, but then I was advised that _O_BINARY was better. */
777    
778 nigel 75 #if defined(_WIN32) || defined(WIN32)
779 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
780     #endif
781 nigel 75
782 nigel 3 /* Scan options */
783    
784     while (argc > 1 && argv[op][0] == '-')
785     {
786 nigel 63 unsigned char *endptr;
787 nigel 53
788 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
789     showstore = 1;
790 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
791 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
792 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
793     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
794 nigel 79 #if !defined NODFA
795 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
796 nigel 79 #endif
797 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
798 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
799     *endptr == 0))
800 nigel 53 {
801     op++;
802     argc--;
803     }
804 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
805     {
806     int both = argv[op][2] == 0;
807     int temp;
808     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
809     *endptr == 0))
810     {
811     timeitm = temp;
812     op++;
813     argc--;
814     }
815     else timeitm = LOOPREPEAT;
816     if (both) timeit = timeitm;
817     }
818 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
819     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
820     *endptr == 0))
821     {
822 nigel 93 #if defined(_WIN32) || defined(WIN32)
823 nigel 91 printf("PCRE: -S not supported on this OS\n");
824     exit(1);
825     #else
826     int rc;
827     struct rlimit rlim;
828     getrlimit(RLIMIT_STACK, &rlim);
829     rlim.rlim_cur = stack_size * 1024 * 1024;
830     rc = setrlimit(RLIMIT_STACK, &rlim);
831     if (rc != 0)
832     {
833     printf("PCRE: setrlimit() failed with error %d\n", rc);
834     exit(1);
835     }
836     op++;
837     argc--;
838     #endif
839     }
840 nigel 53 #if !defined NOPOSIX
841 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
842 nigel 53 #endif
843 nigel 63 else if (strcmp(argv[op], "-C") == 0)
844     {
845     int rc;
846     printf("PCRE version %s\n", pcre_version());
847     printf("Compiled with\n");
848     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
849     printf(" %sUTF-8 support\n", rc? "" : "No ");
850 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
851     printf(" %sUnicode properties support\n", rc? "" : "No ");
852 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
854 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855 ph10 150 (rc == -2)? "ANYCRLF" :
856 nigel 93 (rc == -1)? "ANY" : "???");
857 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858     printf(" Internal link size = %d\n", rc);
859     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
860     printf(" POSIX malloc threshold = %d\n", rc);
861     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
862     printf(" Default match limit = %d\n", rc);
863 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
864     printf(" Default recursion depth limit = %d\n", rc);
865 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
867 ph10 121 goto EXIT;
868 nigel 63 }
869 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
870     strcmp(argv[op], "--help") == 0)
871     {
872     usage();
873     goto EXIT;
874     }
875 nigel 3 else
876     {
877 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
878 nigel 93 usage();
879 nigel 77 yield = 1;
880     goto EXIT;
881 nigel 3 }
882     op++;
883     argc--;
884     }
885    
886 nigel 53 /* Get the store for the offsets vector, and remember what it was */
887    
888     size_offsets_max = size_offsets;
889 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
890 nigel 53 if (offsets == NULL)
891     {
892     printf("** Failed to get %d bytes of memory for offsets vector\n",
893     size_offsets_max * sizeof(int));
894 nigel 77 yield = 1;
895     goto EXIT;
896 nigel 53 }
897    
898 nigel 3 /* Sort out the input and output files */
899    
900     if (argc > 1)
901     {
902 nigel 93 infile = fopen(argv[op], INPUT_MODE);
903 nigel 3 if (infile == NULL)
904     {
905     printf("** Failed to open %s\n", argv[op]);
906 nigel 77 yield = 1;
907     goto EXIT;
908 nigel 3 }
909     }
910    
911     if (argc > 2)
912     {
913 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
914 nigel 3 if (outfile == NULL)
915     {
916     printf("** Failed to open %s\n", argv[op+1]);
917 nigel 77 yield = 1;
918     goto EXIT;
919 nigel 3 }
920     }
921    
922     /* Set alternative malloc function */
923    
924     pcre_malloc = new_malloc;
925 nigel 73 pcre_free = new_free;
926     pcre_stack_malloc = stack_malloc;
927     pcre_stack_free = stack_free;
928 nigel 3
929 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
930 nigel 3
931 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
932 nigel 3
933     /* Main loop */
934    
935 nigel 11 while (!done)
936 nigel 3 {
937     pcre *re = NULL;
938     pcre_extra *extra = NULL;
939 nigel 37
940     #if !defined NOPOSIX /* There are still compilers that require no indent */
941 nigel 3 regex_t preg;
942 nigel 45 int do_posix = 0;
943 nigel 37 #endif
944    
945 nigel 7 const char *error;
946 nigel 25 unsigned char *p, *pp, *ppp;
947 nigel 75 unsigned char *to_file = NULL;
948 nigel 53 const unsigned char *tables = NULL;
949 nigel 75 unsigned long int true_size, true_study_size = 0;
950     size_t size, regex_gotten_store;
951 nigel 3 int do_study = 0;
952 nigel 25 int do_debug = debug;
953 ph10 123 int debug_lengths = 1;
954 nigel 35 int do_G = 0;
955     int do_g = 0;
956 nigel 25 int do_showinfo = showinfo;
957 nigel 35 int do_showrest = 0;
958 nigel 75 int do_flip = 0;
959 nigel 93 int erroroffset, len, delimiter, poffset;
960 nigel 3
961 nigel 67 use_utf8 = 0;
962 nigel 63
963 nigel 3 if (infile == stdin) printf(" re> ");
964 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
965 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
966 nigel 63 fflush(outfile);
967 nigel 3
968     p = buffer;
969     while (isspace(*p)) p++;
970     if (*p == 0) continue;
971    
972 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
973 nigel 3
974 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
975     {
976 nigel 91 unsigned long int magic, get_options;
977 nigel 75 uschar sbuf[8];
978     FILE *f;
979    
980     p++;
981     pp = p + (int)strlen((char *)p);
982     while (isspace(pp[-1])) pp--;
983     *pp = 0;
984    
985     f = fopen((char *)p, "rb");
986     if (f == NULL)
987     {
988     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
989     continue;
990     }
991    
992     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
993    
994     true_size =
995     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
996     true_study_size =
997     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
998    
999     re = (real_pcre *)new_malloc(true_size);
1000     regex_gotten_store = gotten_store;
1001    
1002     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1003    
1004     magic = ((real_pcre *)re)->magic_number;
1005     if (magic != MAGIC_NUMBER)
1006     {
1007     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1008     {
1009     do_flip = 1;
1010     }
1011     else
1012     {
1013     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1014     fclose(f);
1015     continue;
1016     }
1017     }
1018    
1019     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1020     do_flip? " (byte-inverted)" : "", p);
1021    
1022     /* Need to know if UTF-8 for printing data strings */
1023    
1024 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1025     use_utf8 = (get_options & PCRE_UTF8) != 0;
1026 nigel 75
1027     /* Now see if there is any following study data */
1028    
1029     if (true_study_size != 0)
1030     {
1031     pcre_study_data *psd;
1032    
1033     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1034     extra->flags = PCRE_EXTRA_STUDY_DATA;
1035    
1036     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1037     extra->study_data = psd;
1038    
1039     if (fread(psd, 1, true_study_size, f) != true_study_size)
1040     {
1041     FAIL_READ:
1042     fprintf(outfile, "Failed to read data from %s\n", p);
1043     if (extra != NULL) new_free(extra);
1044     if (re != NULL) new_free(re);
1045     fclose(f);
1046     continue;
1047     }
1048     fprintf(outfile, "Study data loaded from %s\n", p);
1049     do_study = 1; /* To get the data output if requested */
1050     }
1051     else fprintf(outfile, "No study data\n");
1052    
1053     fclose(f);
1054     goto SHOW_INFO;
1055     }
1056    
1057     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1058     the pattern; if is isn't complete, read more. */
1059    
1060 nigel 3 delimiter = *p++;
1061    
1062 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1063 nigel 3 {
1064 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1065 nigel 3 goto SKIP_DATA;
1066     }
1067    
1068     pp = p;
1069 nigel 93 poffset = p - buffer;
1070 nigel 3
1071     for(;;)
1072     {
1073 nigel 29 while (*pp != 0)
1074     {
1075     if (*pp == '\\' && pp[1] != 0) pp++;
1076     else if (*pp == delimiter) break;
1077     pp++;
1078     }
1079 nigel 3 if (*pp != 0) break;
1080     if (infile == stdin) printf(" > ");
1081 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1082 nigel 3 {
1083     fprintf(outfile, "** Unexpected EOF\n");
1084 nigel 11 done = 1;
1085     goto CONTINUE;
1086 nigel 3 }
1087 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1088 nigel 3 }
1089    
1090 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1091     pointer to the correct relative point in the buffer. */
1092    
1093     p = buffer + poffset;
1094    
1095 nigel 29 /* If the first character after the delimiter is backslash, make
1096     the pattern end with backslash. This is purely to provide a way
1097     of testing for the error message when a pattern ends with backslash. */
1098    
1099     if (pp[1] == '\\') *pp++ = '\\';
1100    
1101 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1102     for callouts. */
1103 nigel 3
1104     *pp++ = 0;
1105 nigel 75 strcpy((char *)pbuffer, (char *)p);
1106 nigel 3
1107     /* Look for options after final delimiter */
1108    
1109     options = 0;
1110     study_options = 0;
1111 nigel 31 log_store = showstore; /* default from command line */
1112    
1113 nigel 3 while (*pp != 0)
1114     {
1115     switch (*pp++)
1116     {
1117 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1118 nigel 35 case 'g': do_g = 1; break;
1119 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1120     case 'm': options |= PCRE_MULTILINE; break;
1121     case 's': options |= PCRE_DOTALL; break;
1122     case 'x': options |= PCRE_EXTENDED; break;
1123 nigel 25
1124 nigel 35 case '+': do_showrest = 1; break;
1125 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1126 nigel 93 case 'B': do_debug = 1; break;
1127 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1128 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1129 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1130 nigel 75 case 'F': do_flip = 1; break;
1131 nigel 35 case 'G': do_G = 1; break;
1132 nigel 25 case 'I': do_showinfo = 1; break;
1133 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1134 nigel 31 case 'M': log_store = 1; break;
1135 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1136 nigel 37
1137     #if !defined NOPOSIX
1138 nigel 3 case 'P': do_posix = 1; break;
1139 nigel 37 #endif
1140    
1141 nigel 3 case 'S': do_study = 1; break;
1142 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1143 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1144 ph10 126 case 'Z': debug_lengths = 0; break;
1145 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147 nigel 25
1148     case 'L':
1149     ppp = pp;
1150 nigel 93 /* The '\r' test here is so that it works on Windows. */
1151     /* The '0' test is just in case this is an unterminated line. */
1152     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1153 nigel 25 *ppp = 0;
1154     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1155     {
1156     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1157     goto SKIP_DATA;
1158     }
1159 nigel 93 locale_set = 1;
1160 nigel 25 tables = pcre_maketables();
1161     pp = ppp;
1162     break;
1163    
1164 nigel 75 case '>':
1165     to_file = pp;
1166     while (*pp != 0) pp++;
1167     while (isspace(pp[-1])) pp--;
1168     *pp = 0;
1169     break;
1170    
1171 nigel 91 case '<':
1172     {
1173     int x = check_newline(pp, outfile);
1174     if (x == 0) goto SKIP_DATA;
1175     options |= x;
1176     while (*pp++ != '>');
1177     }
1178     break;
1179    
1180 nigel 77 case '\r': /* So that it works in Windows */
1181     case '\n':
1182     case ' ':
1183     break;
1184 nigel 75
1185 nigel 3 default:
1186     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1187     goto SKIP_DATA;
1188     }
1189     }
1190    
1191 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1192 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1193     local character tables. */
1194 nigel 3
1195 nigel 37 #if !defined NOPOSIX
1196 nigel 3 if (posix || do_posix)
1197     {
1198     int rc;
1199     int cflags = 0;
1200 nigel 75
1201 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1202     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1203 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1204 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1205     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1206    
1207 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1208    
1209     /* Compilation failed; go back for another re, skipping to blank line
1210     if non-interactive. */
1211    
1212     if (rc != 0)
1213     {
1214 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1215 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1216     goto SKIP_DATA;
1217     }
1218     }
1219    
1220     /* Handle compiling via the native interface */
1221    
1222     else
1223 nigel 37 #endif /* !defined NOPOSIX */
1224    
1225 nigel 3 {
1226 nigel 93 if (timeit > 0)
1227 nigel 3 {
1228     register int i;
1229     clock_t time_taken;
1230     clock_t start_time = clock();
1231 nigel 93 for (i = 0; i < timeit; i++)
1232 nigel 3 {
1233 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1234 nigel 3 if (re != NULL) free(re);
1235     }
1236     time_taken = clock() - start_time;
1237 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1238     (((double)time_taken * 1000.0) / (double)timeit) /
1239 nigel 63 (double)CLOCKS_PER_SEC);
1240 nigel 3 }
1241    
1242 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1243 nigel 3
1244     /* Compilation failed; go back for another re, skipping to blank line
1245     if non-interactive. */
1246    
1247     if (re == NULL)
1248     {
1249     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1250     SKIP_DATA:
1251     if (infile != stdin)
1252     {
1253     for (;;)
1254     {
1255 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1256 nigel 11 {
1257     done = 1;
1258     goto CONTINUE;
1259     }
1260 nigel 3 len = (int)strlen((char *)buffer);
1261     while (len > 0 && isspace(buffer[len-1])) len--;
1262     if (len == 0) break;
1263     }
1264     fprintf(outfile, "\n");
1265     }
1266 nigel 25 goto CONTINUE;
1267 nigel 3 }
1268    
1269 nigel 43 /* Compilation succeeded; print data if required. There are now two
1270     info-returning functions. The old one has a limited interface and
1271     returns only limited data. Check that it agrees with the newer one. */
1272 nigel 3
1273 nigel 63 if (log_store)
1274     fprintf(outfile, "Memory allocation (code space): %d\n",
1275     (int)(gotten_store -
1276     sizeof(real_pcre) -
1277     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1278    
1279 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1280     and remember the store that was got. */
1281    
1282     true_size = ((real_pcre *)re)->size;
1283     regex_gotten_store = gotten_store;
1284    
1285     /* If /S was present, study the regexp to generate additional info to
1286     help with the matching. */
1287    
1288     if (do_study)
1289     {
1290 nigel 93 if (timeit > 0)
1291 nigel 75 {
1292     register int i;
1293     clock_t time_taken;
1294     clock_t start_time = clock();
1295 nigel 93 for (i = 0; i < timeit; i++)
1296 nigel 75 extra = pcre_study(re, study_options, &error);
1297     time_taken = clock() - start_time;
1298     if (extra != NULL) free(extra);
1299 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1300     (((double)time_taken * 1000.0) / (double)timeit) /
1301 nigel 75 (double)CLOCKS_PER_SEC);
1302     }
1303     extra = pcre_study(re, study_options, &error);
1304     if (error != NULL)
1305     fprintf(outfile, "Failed to study: %s\n", error);
1306     else if (extra != NULL)
1307     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1308     }
1309    
1310     /* If the 'F' option was present, we flip the bytes of all the integer
1311     fields in the regex data block and the study block. This is to make it
1312     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1313     compiled on a different architecture. */
1314    
1315     if (do_flip)
1316     {
1317     real_pcre *rre = (real_pcre *)re;
1318     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1319     rre->size = byteflip(rre->size, sizeof(rre->size));
1320     rre->options = byteflip(rre->options, sizeof(rre->options));
1321     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1322     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1323     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1324     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1325     rre->name_table_offset = byteflip(rre->name_table_offset,
1326     sizeof(rre->name_table_offset));
1327     rre->name_entry_size = byteflip(rre->name_entry_size,
1328     sizeof(rre->name_entry_size));
1329     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1330    
1331     if (extra != NULL)
1332     {
1333     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1334     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1335     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1336     }
1337     }
1338    
1339     /* Extract information from the compiled data if required */
1340    
1341     SHOW_INFO:
1342    
1343 nigel 93 if (do_debug)
1344     {
1345     fprintf(outfile, "------------------------------------------------------------------\n");
1346 ph10 116 pcre_printint(re, outfile, debug_lengths);
1347 nigel 93 }
1348    
1349 nigel 25 if (do_showinfo)
1350 nigel 3 {
1351 nigel 75 unsigned long int get_options, all_options;
1352 nigel 79 #if !defined NOINFOCHECK
1353 nigel 43 int old_first_char, old_options, old_count;
1354 nigel 79 #endif
1355 nigel 43 int count, backrefmax, first_char, need_char;
1356 nigel 63 int nameentrysize, namecount;
1357     const uschar *nametable;
1358 nigel 3
1359 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1360 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1361     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1362     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1363 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1364 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1365 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368 nigel 43
1369 nigel 79 #if !defined NOINFOCHECK
1370 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1371 nigel 3 if (count < 0) fprintf(outfile,
1372 nigel 43 "Error %d from pcre_info()\n", count);
1373 nigel 3 else
1374     {
1375 nigel 43 if (old_count != count) fprintf(outfile,
1376     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1377     old_count);
1378 nigel 37
1379 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1380     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1381     first_char, old_first_char);
1382 nigel 37
1383 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1384     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1385     get_options, old_options);
1386 nigel 43 }
1387 nigel 79 #endif
1388 nigel 43
1389 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1390 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1391 nigel 77 (int)size, (int)regex_gotten_store);
1392 nigel 43
1393     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1394     if (backrefmax > 0)
1395     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1396 nigel 63
1397     if (namecount > 0)
1398     {
1399     fprintf(outfile, "Named capturing subpatterns:\n");
1400     while (namecount-- > 0)
1401     {
1402     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1403     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1404     GET2(nametable, 0));
1405     nametable += nameentrysize;
1406     }
1407     }
1408    
1409 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1410     to fish it out via out back door */
1411    
1412     all_options = ((real_pcre *)re)->options;
1413     if (do_flip)
1414     {
1415     all_options = byteflip(all_options, sizeof(all_options));
1416 nigel 91 }
1417 nigel 75
1418     if ((all_options & PCRE_NOPARTIAL) != 0)
1419     fprintf(outfile, "Partial matching not supported\n");
1420    
1421 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1422 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1423 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1424     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1425     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1426     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1427 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1428 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1429     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1430     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1431     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1432 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1433 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1434 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1435     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1436 nigel 43
1437 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1438 nigel 91 {
1439     case PCRE_NEWLINE_CR:
1440     fprintf(outfile, "Forced newline sequence: CR\n");
1441     break;
1442 nigel 43
1443 nigel 91 case PCRE_NEWLINE_LF:
1444     fprintf(outfile, "Forced newline sequence: LF\n");
1445     break;
1446    
1447     case PCRE_NEWLINE_CRLF:
1448     fprintf(outfile, "Forced newline sequence: CRLF\n");
1449     break;
1450    
1451 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1452     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1453     break;
1454    
1455 nigel 93 case PCRE_NEWLINE_ANY:
1456     fprintf(outfile, "Forced newline sequence: ANY\n");
1457     break;
1458    
1459 nigel 91 default:
1460     break;
1461     }
1462    
1463 nigel 43 if (first_char == -1)
1464     {
1465 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1466 nigel 43 }
1467     else if (first_char < 0)
1468     {
1469     fprintf(outfile, "No first char\n");
1470     }
1471     else
1472     {
1473 nigel 63 int ch = first_char & 255;
1474 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1475 nigel 63 "" : " (caseless)";
1476 nigel 93 if (PRINTHEX(ch))
1477 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1478 nigel 3 else
1479 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1480 nigel 43 }
1481 nigel 37
1482 nigel 43 if (need_char < 0)
1483     {
1484     fprintf(outfile, "No need char\n");
1485 nigel 3 }
1486 nigel 43 else
1487     {
1488 nigel 63 int ch = need_char & 255;
1489 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1490 nigel 63 "" : " (caseless)";
1491 nigel 93 if (PRINTHEX(ch))
1492 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1493 nigel 43 else
1494 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1495 nigel 43 }
1496 nigel 75
1497     /* Don't output study size; at present it is in any case a fixed
1498     value, but it varies, depending on the computer architecture, and
1499     so messes up the test suite. (And with the /F option, it might be
1500     flipped.) */
1501    
1502     if (do_study)
1503     {
1504     if (extra == NULL)
1505     fprintf(outfile, "Study returned NULL\n");
1506     else
1507     {
1508     uschar *start_bits = NULL;
1509     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1510    
1511     if (start_bits == NULL)
1512     fprintf(outfile, "No starting byte set\n");
1513     else
1514     {
1515     int i;
1516     int c = 24;
1517     fprintf(outfile, "Starting byte set: ");
1518     for (i = 0; i < 256; i++)
1519     {
1520     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1521     {
1522     if (c > 75)
1523     {
1524     fprintf(outfile, "\n ");
1525     c = 2;
1526     }
1527 nigel 93 if (PRINTHEX(i) && i != ' ')
1528 nigel 75 {
1529     fprintf(outfile, "%c ", i);
1530     c += 2;
1531     }
1532     else
1533     {
1534     fprintf(outfile, "\\x%02x ", i);
1535     c += 5;
1536     }
1537     }
1538     }
1539     fprintf(outfile, "\n");
1540     }
1541     }
1542     }
1543 nigel 3 }
1544    
1545 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1546     that is all. The first 8 bytes of the file are the regex length and then
1547     the study length, in big-endian order. */
1548 nigel 3
1549 nigel 75 if (to_file != NULL)
1550 nigel 3 {
1551 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1552     if (f == NULL)
1553 nigel 3 {
1554 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1555 nigel 3 }
1556 nigel 75 else
1557     {
1558     uschar sbuf[8];
1559     sbuf[0] = (true_size >> 24) & 255;
1560     sbuf[1] = (true_size >> 16) & 255;
1561     sbuf[2] = (true_size >> 8) & 255;
1562     sbuf[3] = (true_size) & 255;
1563 nigel 3
1564 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1565     sbuf[5] = (true_study_size >> 16) & 255;
1566     sbuf[6] = (true_study_size >> 8) & 255;
1567     sbuf[7] = (true_study_size) & 255;
1568 nigel 3
1569 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1570     fwrite(re, 1, true_size, f) < true_size)
1571     {
1572     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1573     }
1574 nigel 3 else
1575     {
1576 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1577     if (extra != NULL)
1578 nigel 3 {
1579 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1580     true_study_size)
1581 nigel 3 {
1582 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1583     strerror(errno));
1584 nigel 3 }
1585 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1586 nigel 93
1587 nigel 3 }
1588     }
1589 nigel 75 fclose(f);
1590 nigel 3 }
1591 nigel 77
1592     new_free(re);
1593     if (extra != NULL) new_free(extra);
1594     if (tables != NULL) new_free((void *)tables);
1595 nigel 75 continue; /* With next regex */
1596 nigel 3 }
1597 nigel 75 } /* End of non-POSIX compile */
1598 nigel 3
1599     /* Read data lines and test them */
1600    
1601     for (;;)
1602     {
1603 nigel 87 uschar *q;
1604 ph10 147 uschar *bptr;
1605 nigel 57 int *use_offsets = offsets;
1606 nigel 53 int use_size_offsets = size_offsets;
1607 nigel 63 int callout_data = 0;
1608     int callout_data_set = 0;
1609 nigel 3 int count, c;
1610 nigel 29 int copystrings = 0;
1611 nigel 63 int find_match_limit = 0;
1612 nigel 29 int getstrings = 0;
1613     int getlist = 0;
1614 nigel 39 int gmatched = 0;
1615 nigel 35 int start_offset = 0;
1616 nigel 41 int g_notempty = 0;
1617 nigel 77 int use_dfa = 0;
1618 nigel 3
1619     options = 0;
1620    
1621 nigel 91 *copynames = 0;
1622     *getnames = 0;
1623    
1624     copynamesptr = copynames;
1625     getnamesptr = getnames;
1626    
1627 nigel 63 pcre_callout = callout;
1628     first_callout = 1;
1629     callout_extra = 0;
1630     callout_count = 0;
1631     callout_fail_count = 999999;
1632     callout_fail_id = -1;
1633 nigel 73 show_malloc = 0;
1634 nigel 63
1635 nigel 91 if (extra != NULL) extra->flags &=
1636     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1637    
1638     len = 0;
1639     for (;;)
1640 nigel 11 {
1641 nigel 91 if (infile == stdin) printf("data> ");
1642     if (extend_inputline(infile, buffer + len) == NULL)
1643     {
1644     if (len > 0) break;
1645     done = 1;
1646     goto CONTINUE;
1647     }
1648     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1649     len = (int)strlen((char *)buffer);
1650     if (buffer[len-1] == '\n') break;
1651 nigel 11 }
1652 nigel 3
1653     while (len > 0 && isspace(buffer[len-1])) len--;
1654     buffer[len] = 0;
1655     if (len == 0) break;
1656    
1657     p = buffer;
1658     while (isspace(*p)) p++;
1659    
1660 ph10 147 bptr = q = dbuffer;
1661 nigel 3 while ((c = *p++) != 0)
1662     {
1663     int i = 0;
1664     int n = 0;
1665 nigel 63
1666 nigel 3 if (c == '\\') switch ((c = *p++))
1667     {
1668     case 'a': c = 7; break;
1669     case 'b': c = '\b'; break;
1670     case 'e': c = 27; break;
1671     case 'f': c = '\f'; break;
1672     case 'n': c = '\n'; break;
1673     case 'r': c = '\r'; break;
1674     case 't': c = '\t'; break;
1675     case 'v': c = '\v'; break;
1676    
1677     case '0': case '1': case '2': case '3':
1678     case '4': case '5': case '6': case '7':
1679     c -= '0';
1680     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1681     c = c * 8 + *p++ - '0';
1682 nigel 91
1683     #if !defined NOUTF8
1684     if (use_utf8 && c > 255)
1685     {
1686     unsigned char buff8[8];
1687     int ii, utn;
1688     utn = ord2utf8(c, buff8);
1689     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1690     c = buff8[ii]; /* Last byte */
1691     }
1692     #endif
1693 nigel 3 break;
1694    
1695     case 'x':
1696 nigel 49
1697     /* Handle \x{..} specially - new Perl thing for utf8 */
1698    
1699 nigel 79 #if !defined NOUTF8
1700 nigel 49 if (*p == '{')
1701     {
1702     unsigned char *pt = p;
1703     c = 0;
1704     while (isxdigit(*(++pt)))
1705     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1706     if (*pt == '}')
1707     {
1708 nigel 67 unsigned char buff8[8];
1709 nigel 49 int ii, utn;
1710 nigel 85 utn = ord2utf8(c, buff8);
1711 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1712     c = buff8[ii]; /* Last byte */
1713 nigel 49 p = pt + 1;
1714     break;
1715     }
1716     /* Not correct form; fall through */
1717     }
1718 nigel 79 #endif
1719 nigel 49
1720     /* Ordinary \x */
1721    
1722 nigel 3 c = 0;
1723     while (i++ < 2 && isxdigit(*p))
1724     {
1725     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1726     p++;
1727     }
1728     break;
1729    
1730 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1731 nigel 3 p--;
1732     continue;
1733    
1734 nigel 75 case '>':
1735     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1736     continue;
1737    
1738 nigel 3 case 'A': /* Option setting */
1739     options |= PCRE_ANCHORED;
1740     continue;
1741    
1742     case 'B':
1743     options |= PCRE_NOTBOL;
1744     continue;
1745    
1746 nigel 29 case 'C':
1747 nigel 63 if (isdigit(*p)) /* Set copy string */
1748     {
1749     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1750     copystrings |= 1 << n;
1751     }
1752     else if (isalnum(*p))
1753     {
1754 nigel 91 uschar *npp = copynamesptr;
1755 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1756 nigel 91 *npp++ = 0;
1757 nigel 67 *npp = 0;
1758 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1759 nigel 63 if (n < 0)
1760 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1761     copynamesptr = npp;
1762 nigel 63 }
1763     else if (*p == '+')
1764     {
1765     callout_extra = 1;
1766     p++;
1767     }
1768     else if (*p == '-')
1769     {
1770     pcre_callout = NULL;
1771     p++;
1772     }
1773     else if (*p == '!')
1774     {
1775     callout_fail_id = 0;
1776     p++;
1777     while(isdigit(*p))
1778     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1779     callout_fail_count = 0;
1780     if (*p == '!')
1781     {
1782     p++;
1783     while(isdigit(*p))
1784     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1785     }
1786     }
1787     else if (*p == '*')
1788     {
1789     int sign = 1;
1790     callout_data = 0;
1791     if (*(++p) == '-') { sign = -1; p++; }
1792     while(isdigit(*p))
1793     callout_data = callout_data * 10 + *p++ - '0';
1794     callout_data *= sign;
1795     callout_data_set = 1;
1796     }
1797 nigel 29 continue;
1798    
1799 nigel 79 #if !defined NODFA
1800 nigel 77 case 'D':
1801 nigel 79 #if !defined NOPOSIX
1802 nigel 77 if (posix || do_posix)
1803     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1804     else
1805 nigel 79 #endif
1806 nigel 77 use_dfa = 1;
1807     continue;
1808    
1809     case 'F':
1810     options |= PCRE_DFA_SHORTEST;
1811     continue;
1812 nigel 79 #endif
1813 nigel 77
1814 nigel 29 case 'G':
1815 nigel 63 if (isdigit(*p))
1816     {
1817     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1818     getstrings |= 1 << n;
1819     }
1820     else if (isalnum(*p))
1821     {
1822 nigel 91 uschar *npp = getnamesptr;
1823 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1824 nigel 91 *npp++ = 0;
1825 nigel 67 *npp = 0;
1826 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1827 nigel 63 if (n < 0)
1828 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1829     getnamesptr = npp;
1830 nigel 63 }
1831 nigel 29 continue;
1832    
1833     case 'L':
1834     getlist = 1;
1835     continue;
1836    
1837 nigel 63 case 'M':
1838     find_match_limit = 1;
1839     continue;
1840    
1841 nigel 37 case 'N':
1842     options |= PCRE_NOTEMPTY;
1843     continue;
1844    
1845 nigel 3 case 'O':
1846     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1847 nigel 53 if (n > size_offsets_max)
1848     {
1849     size_offsets_max = n;
1850 nigel 57 free(offsets);
1851 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1852 nigel 53 if (offsets == NULL)
1853     {
1854     printf("** Failed to get %d bytes of memory for offsets vector\n",
1855     size_offsets_max * sizeof(int));
1856 nigel 77 yield = 1;
1857     goto EXIT;
1858 nigel 53 }
1859     }
1860     use_size_offsets = n;
1861 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1862 nigel 3 continue;
1863    
1864 nigel 75 case 'P':
1865     options |= PCRE_PARTIAL;
1866     continue;
1867    
1868 nigel 91 case 'Q':
1869     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1870     if (extra == NULL)
1871     {
1872     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1873     extra->flags = 0;
1874     }
1875     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1876     extra->match_limit_recursion = n;
1877     continue;
1878    
1879     case 'q':
1880     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1881     if (extra == NULL)
1882     {
1883     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1884     extra->flags = 0;
1885     }
1886     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1887     extra->match_limit = n;
1888     continue;
1889    
1890 nigel 79 #if !defined NODFA
1891 nigel 77 case 'R':
1892     options |= PCRE_DFA_RESTART;
1893     continue;
1894 nigel 79 #endif
1895 nigel 77
1896 nigel 73 case 'S':
1897     show_malloc = 1;
1898     continue;
1899    
1900 nigel 3 case 'Z':
1901     options |= PCRE_NOTEOL;
1902     continue;
1903 nigel 71
1904     case '?':
1905     options |= PCRE_NO_UTF8_CHECK;
1906     continue;
1907 nigel 91
1908     case '<':
1909     {
1910     int x = check_newline(p, outfile);
1911     if (x == 0) goto NEXT_DATA;
1912     options |= x;
1913     while (*p++ != '>');
1914     }
1915     continue;
1916 nigel 3 }
1917 nigel 9 *q++ = c;
1918 nigel 3 }
1919 nigel 9 *q = 0;
1920     len = q - dbuffer;
1921 nigel 3
1922 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1923     {
1924     printf("**Match limit not relevant for DFA matching: ignored\n");
1925     find_match_limit = 0;
1926     }
1927    
1928 nigel 3 /* Handle matching via the POSIX interface, which does not
1929 nigel 63 support timing or playing with the match limit or callout data. */
1930 nigel 3
1931 nigel 37 #if !defined NOPOSIX
1932 nigel 3 if (posix || do_posix)
1933     {
1934     int rc;
1935     int eflags = 0;
1936 nigel 63 regmatch_t *pmatch = NULL;
1937     if (use_size_offsets > 0)
1938 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1939 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1940     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1941    
1942 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1943 nigel 3
1944     if (rc != 0)
1945     {
1946 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1947 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1948     }
1949 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1950     != 0)
1951     {
1952     fprintf(outfile, "Matched with REG_NOSUB\n");
1953     }
1954 nigel 3 else
1955     {
1956 nigel 7 size_t i;
1957 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1958 nigel 3 {
1959     if (pmatch[i].rm_so >= 0)
1960     {
1961 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1962 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1963     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1964 nigel 3 fprintf(outfile, "\n");
1965 nigel 35 if (i == 0 && do_showrest)
1966     {
1967     fprintf(outfile, " 0+ ");
1968 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1969     outfile);
1970 nigel 35 fprintf(outfile, "\n");
1971     }
1972 nigel 3 }
1973     }
1974     }
1975 nigel 53 free(pmatch);
1976 nigel 3 }
1977    
1978 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1979 nigel 3
1980 nigel 37 else
1981     #endif /* !defined NOPOSIX */
1982    
1983 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1984 nigel 3 {
1985 nigel 93 if (timeitm > 0)
1986 nigel 3 {
1987     register int i;
1988     clock_t time_taken;
1989     clock_t start_time = clock();
1990 nigel 77
1991 nigel 79 #if !defined NODFA
1992 nigel 77 if (all_use_dfa || use_dfa)
1993     {
1994     int workspace[1000];
1995 nigel 93 for (i = 0; i < timeitm; i++)
1996 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1997     options | g_notempty, use_offsets, use_size_offsets, workspace,
1998     sizeof(workspace)/sizeof(int));
1999     }
2000     else
2001 nigel 79 #endif
2002 nigel 77
2003 nigel 93 for (i = 0; i < timeitm; i++)
2004 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2005 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2006 nigel 77
2007 nigel 3 time_taken = clock() - start_time;
2008 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2009     (((double)time_taken * 1000.0) / (double)timeitm) /
2010 nigel 63 (double)CLOCKS_PER_SEC);
2011 nigel 3 }
2012    
2013 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2014 nigel 87 varying limits in order to find the minimum value for the match limit and
2015     for the recursion limit. */
2016 nigel 63
2017     if (find_match_limit)
2018     {
2019     if (extra == NULL)
2020     {
2021 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2022 nigel 63 extra->flags = 0;
2023     }
2024    
2025 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2026 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2027     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2028     PCRE_ERROR_MATCHLIMIT, "match()");
2029 nigel 63
2030 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2031     options|g_notempty, use_offsets, use_size_offsets,
2032     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2033     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2034 nigel 63 }
2035    
2036     /* If callout_data is set, use the interface with additional data */
2037    
2038     else if (callout_data_set)
2039     {
2040     if (extra == NULL)
2041     {
2042 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2043 nigel 63 extra->flags = 0;
2044     }
2045     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2046 nigel 71 extra->callout_data = &callout_data;
2047 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2048     options | g_notempty, use_offsets, use_size_offsets);
2049     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2050     }
2051    
2052     /* The normal case is just to do the match once, with the default
2053     value of match_limit. */
2054    
2055 nigel 79 #if !defined NODFA
2056 nigel 77 else if (all_use_dfa || use_dfa)
2057     {
2058     int workspace[1000];
2059     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2060     options | g_notempty, use_offsets, use_size_offsets, workspace,
2061     sizeof(workspace)/sizeof(int));
2062     if (count == 0)
2063     {
2064     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2065     count = use_size_offsets/2;
2066     }
2067     }
2068 nigel 79 #endif
2069 nigel 77
2070 nigel 75 else
2071     {
2072     count = pcre_exec(re, extra, (char *)bptr, len,
2073     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2074 nigel 77 if (count == 0)
2075     {
2076     fprintf(outfile, "Matched, but too many substrings\n");
2077     count = use_size_offsets/3;
2078     }
2079 nigel 75 }
2080 nigel 3
2081 nigel 39 /* Matched */
2082    
2083 nigel 3 if (count >= 0)
2084     {
2085 nigel 93 int i, maxcount;
2086    
2087     #if !defined NODFA
2088     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2089     #endif
2090     maxcount = use_size_offsets/3;
2091    
2092     /* This is a check against a lunatic return value. */
2093    
2094     if (count > maxcount)
2095     {
2096     fprintf(outfile,
2097     "** PCRE error: returned count %d is too big for offset size %d\n",
2098     count, use_size_offsets);
2099     count = use_size_offsets/3;
2100     if (do_g || do_G)
2101     {
2102     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2103     do_g = do_G = FALSE; /* Break g/G loop */
2104     }
2105     }
2106    
2107 nigel 29 for (i = 0; i < count * 2; i += 2)
2108 nigel 3 {
2109 nigel 57 if (use_offsets[i] < 0)
2110 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2111     else
2112     {
2113     fprintf(outfile, "%2d: ", i/2);
2114 nigel 63 (void)pchars(bptr + use_offsets[i],
2115     use_offsets[i+1] - use_offsets[i], outfile);
2116 nigel 3 fprintf(outfile, "\n");
2117 nigel 35 if (i == 0)
2118     {
2119     if (do_showrest)
2120     {
2121     fprintf(outfile, " 0+ ");
2122 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2123     outfile);
2124 nigel 35 fprintf(outfile, "\n");
2125     }
2126     }
2127 nigel 3 }
2128     }
2129 nigel 29
2130     for (i = 0; i < 32; i++)
2131     {
2132     if ((copystrings & (1 << i)) != 0)
2133     {
2134 nigel 91 char copybuffer[256];
2135 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2136 nigel 37 i, copybuffer, sizeof(copybuffer));
2137 nigel 29 if (rc < 0)
2138     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2139     else
2140 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2141 nigel 29 }
2142     }
2143    
2144 nigel 91 for (copynamesptr = copynames;
2145     *copynamesptr != 0;
2146     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2147     {
2148     char copybuffer[256];
2149     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2150     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2151     if (rc < 0)
2152     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2153     else
2154     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2155     }
2156    
2157 nigel 29 for (i = 0; i < 32; i++)
2158     {
2159     if ((getstrings & (1 << i)) != 0)
2160     {
2161     const char *substring;
2162 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2163 nigel 29 i, &substring);
2164     if (rc < 0)
2165     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2166     else
2167     {
2168     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2169 nigel 49 pcre_free_substring(substring);
2170 nigel 29 }
2171     }
2172     }
2173    
2174 nigel 91 for (getnamesptr = getnames;
2175     *getnamesptr != 0;
2176     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2177     {
2178     const char *substring;
2179     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2180     count, (char *)getnamesptr, &substring);
2181     if (rc < 0)
2182     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2183     else
2184     {
2185     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2186     pcre_free_substring(substring);
2187     }
2188     }
2189    
2190 nigel 29 if (getlist)
2191     {
2192     const char **stringlist;
2193 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2194 nigel 29 &stringlist);
2195     if (rc < 0)
2196     fprintf(outfile, "get substring list failed %d\n", rc);
2197     else
2198     {
2199     for (i = 0; i < count; i++)
2200     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2201     if (stringlist[i] != NULL)
2202     fprintf(outfile, "string list not terminated by NULL\n");
2203 nigel 49 /* free((void *)stringlist); */
2204     pcre_free_substring_list(stringlist);
2205 nigel 29 }
2206     }
2207 nigel 39 }
2208 nigel 29
2209 nigel 75 /* There was a partial match */
2210    
2211     else if (count == PCRE_ERROR_PARTIAL)
2212     {
2213 nigel 77 fprintf(outfile, "Partial match");
2214 nigel 79 #if !defined NODFA
2215 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2216     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2217     bptr + use_offsets[0]);
2218 nigel 79 #endif
2219 nigel 77 fprintf(outfile, "\n");
2220 nigel 75 break; /* Out of the /g loop */
2221     }
2222    
2223 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2224 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2225     to advance the start offset, and continue. We won't be at the end of the
2226     string - that was checked before setting g_notempty.
2227 nigel 39
2228 ph10 150 Complication arises in the case when the newline option is "any" or
2229 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2230     CRLF, an advance of one character just passes the \r, whereas we should
2231     prefer the longer newline sequence, as does the code in pcre_exec().
2232     Fudge the offset value to achieve this.
2233 ph10 144
2234 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2235     character, not one byte. */
2236    
2237 nigel 3 else
2238     {
2239 nigel 41 if (g_notempty != 0)
2240 nigel 35 {
2241 nigel 73 int onechar = 1;
2242 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2243 nigel 57 use_offsets[0] = start_offset;
2244 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2245     {
2246     int d;
2247     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2248     obits = (d == '\r')? PCRE_NEWLINE_CR :
2249     (d == '\n')? PCRE_NEWLINE_LF :
2250     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2251 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2252 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2253     }
2254 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2255 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2256 ph10 149 &&
2257 ph10 143 start_offset < len - 1 &&
2258     bptr[start_offset] == '\r' &&
2259     bptr[start_offset+1] == '\n')
2260 ph10 144 onechar++;
2261 ph10 143 else if (use_utf8)
2262 nigel 73 {
2263     while (start_offset + onechar < len)
2264     {
2265     int tb = bptr[start_offset+onechar];
2266     if (tb <= 127) break;
2267     tb &= 0xc0;
2268     if (tb != 0 && tb != 0xc0) onechar++;
2269     }
2270     }
2271     use_offsets[1] = start_offset + onechar;
2272 nigel 35 }
2273 nigel 41 else
2274     {
2275 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2276 nigel 41 {
2277 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2278 nigel 41 }
2279 nigel 73 else fprintf(outfile, "Error %d\n", count);
2280 nigel 41 break; /* Out of the /g loop */
2281     }
2282 nigel 3 }
2283 nigel 35
2284 nigel 39 /* If not /g or /G we are done */
2285    
2286     if (!do_g && !do_G) break;
2287    
2288 nigel 41 /* If we have matched an empty string, first check to see if we are at
2289     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2290     what Perl's /g options does. This turns out to be rather cunning. First
2291 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2292     same point. If this fails (picked up above) we advance to the next
2293 ph10 143 character. */
2294 ph10 142
2295 nigel 41 g_notempty = 0;
2296 ph10 142
2297 nigel 57 if (use_offsets[0] == use_offsets[1])
2298 nigel 41 {
2299 nigel 57 if (use_offsets[0] == len) break;
2300 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2301 nigel 41 }
2302 nigel 39
2303     /* For /g, update the start offset, leaving the rest alone */
2304    
2305 ph10 143 if (do_g) start_offset = use_offsets[1];
2306 nigel 39
2307     /* For /G, update the pointer and length */
2308    
2309     else
2310 nigel 35 {
2311 ph10 143 bptr += use_offsets[1];
2312     len -= use_offsets[1];
2313 nigel 35 }
2314 nigel 39 } /* End of loop for /g and /G */
2315 nigel 91
2316     NEXT_DATA: continue;
2317 nigel 39 } /* End of loop for data lines */
2318 nigel 3
2319 nigel 11 CONTINUE:
2320 nigel 37
2321     #if !defined NOPOSIX
2322 nigel 3 if (posix || do_posix) regfree(&preg);
2323 nigel 37 #endif
2324    
2325 nigel 77 if (re != NULL) new_free(re);
2326     if (extra != NULL) new_free(extra);
2327 nigel 25 if (tables != NULL)
2328     {
2329 nigel 77 new_free((void *)tables);
2330 nigel 25 setlocale(LC_CTYPE, "C");
2331 nigel 93 locale_set = 0;
2332 nigel 25 }
2333 nigel 3 }
2334    
2335 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2336 nigel 77
2337     EXIT:
2338    
2339     if (infile != NULL && infile != stdin) fclose(infile);
2340     if (outfile != NULL && outfile != stdout) fclose(outfile);
2341    
2342     free(buffer);
2343     free(dbuffer);
2344     free(pbuffer);
2345     free(offsets);
2346    
2347     return yield;
2348 nigel 3 }
2349    
2350 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12