/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 93 - (hide annotations) (download)
Sat Feb 24 21:41:42 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 66834 byte(s)
Load pcre-7.0 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 93
48     /* A number of things vary for Windows builds. Originally, pcretest opened its
49     input and output without "b"; then I was told that "b" was needed in some
50     environments, so it was added for release 5.0 to both the input and output. (It
51     makes no difference on Unix-like systems.) Later I was told that it is wrong
52     for the input on Windows. I've now abstracted the modes into two macros that
53     are set here, to make it easier to fiddle with them, and removed "b" from the
54     input mode under Windows. */
55    
56     #if defined(_WIN32) || defined(WIN32)
57     #include <io.h> /* For _setmode() */
58     #include <fcntl.h> /* For _O_BINARY */
59     #define INPUT_MODE "r"
60     #define OUTPUT_MODE "wb"
61    
62     #else
63     #include <sys/time.h> /* These two includes are needed */
64     #include <sys/resource.h> /* for setrlimit(). */
65     #define INPUT_MODE "rb"
66     #define OUTPUT_MODE "wb"
67 nigel 91 #endif
68    
69 nigel 93
70 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
71 nigel 37
72 nigel 85 /* We include pcre_internal.h because we need the internal info for displaying
73     the results of pcre_study() and we also need to know about the internal
74     macros, structures, and other internal data values; pcretest has "inside
75     information" compared to a program that strictly follows the PCRE API. */
76 nigel 77
77     #include "pcre_internal.h"
78    
79 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
80     two copies, we include the source file here, changing the names of the external
81     symbols to prevent clashes. */
82 nigel 77
83 nigel 85 #define _pcre_utf8_table1 utf8_table1
84     #define _pcre_utf8_table1_size utf8_table1_size
85     #define _pcre_utf8_table2 utf8_table2
86     #define _pcre_utf8_table3 utf8_table3
87     #define _pcre_utf8_table4 utf8_table4
88     #define _pcre_utt utt
89     #define _pcre_utt_size utt_size
90     #define _pcre_OP_lengths OP_lengths
91    
92     #include "pcre_tables.c"
93    
94     /* We also need the pcre_printint() function for printing out compiled
95     patterns. This function is in a separate file so that it can be included in
96 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
97 nigel 85
98 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
99     output character as-is or as a hex value when showing compiled patterns, is
100     contained in this file. We uses it here also, in cases when the locale has not
101     been explicitly changed, so as to get consistent output from systems that
102     differ in their output from isprint() even in the "C" locale. */
103    
104 nigel 85 #include "pcre_printint.src"
105    
106 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107 nigel 85
108 nigel 93
109 nigel 37 /* It is possible to compile this test program without including support for
110     testing the POSIX interface, though this is not available via the standard
111     Makefile. */
112    
113     #if !defined NOPOSIX
114 nigel 3 #include "pcreposix.h"
115 nigel 37 #endif
116 nigel 3
117 nigel 79 /* It is also possible, for the benefit of the version imported into Exim, to
118     build pcretest without support for UTF8 (define NOUTF8), without the interface
119     to the DFA matcher (NODFA), and without the doublecheck of the old "info"
120     function (define NOINFOCHECK). */
121    
122    
123 nigel 85 /* Other parameters */
124    
125 nigel 3 #ifndef CLOCKS_PER_SEC
126     #ifdef CLK_TCK
127     #define CLOCKS_PER_SEC CLK_TCK
128     #else
129     #define CLOCKS_PER_SEC 100
130     #endif
131     #endif
132    
133 nigel 93 /* This is the default loop count for timing. */
134    
135 nigel 75 #define LOOPREPEAT 500000
136 nigel 3
137 nigel 85 /* Static variables */
138    
139 nigel 3 static FILE *outfile;
140     static int log_store = 0;
141 nigel 63 static int callout_count;
142     static int callout_extra;
143     static int callout_fail_count;
144     static int callout_fail_id;
145     static int first_callout;
146 nigel 93 static int locale_set = 0;
147 nigel 73 static int show_malloc;
148 nigel 67 static int use_utf8;
149 nigel 43 static size_t gotten_store;
150 nigel 3
151 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
152    
153     static int buffer_size = 50000;
154     static uschar *buffer = NULL;
155     static uschar *dbuffer = NULL;
156 nigel 75 static uschar *pbuffer = NULL;
157 nigel 3
158 nigel 75
159 nigel 49
160     /*************************************************
161 nigel 91 * Read or extend an input line *
162     *************************************************/
163    
164     /* Input lines are read into buffer, but both patterns and data lines can be
165     continued over multiple input lines. In addition, if the buffer fills up, we
166     want to automatically expand it so as to be able to handle extremely large
167     lines that are needed for certain stress tests. When the input buffer is
168     expanded, the other two buffers must also be expanded likewise, and the
169     contents of pbuffer, which are a copy of the input for callouts, must be
170     preserved (for when expansion happens for a data line). This is not the most
171     optimal way of handling this, but hey, this is just a test program!
172    
173     Arguments:
174     f the file to read
175     start where in buffer to start (this *must* be within buffer)
176    
177     Returns: pointer to the start of new data
178     could be a copy of start, or could be moved
179     NULL if no data read and EOF reached
180     */
181    
182     static uschar *
183     extend_inputline(FILE *f, uschar *start)
184     {
185     uschar *here = start;
186    
187     for (;;)
188     {
189     int rlen = buffer_size - (here - buffer);
190 nigel 93
191 nigel 91 if (rlen > 1000)
192     {
193     int dlen;
194     if (fgets((char *)here, rlen, f) == NULL)
195     return (here == start)? NULL : start;
196     dlen = (int)strlen((char *)here);
197     if (dlen > 0 && here[dlen - 1] == '\n') return start;
198     here += dlen;
199     }
200    
201     else
202     {
203     int new_buffer_size = 2*buffer_size;
204     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
205     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
206     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
207    
208     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
209     {
210     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
211     exit(1);
212     }
213    
214     memcpy(new_buffer, buffer, buffer_size);
215     memcpy(new_pbuffer, pbuffer, buffer_size);
216    
217     buffer_size = new_buffer_size;
218    
219     start = new_buffer + (start - buffer);
220     here = new_buffer + (here - buffer);
221    
222     free(buffer);
223     free(dbuffer);
224     free(pbuffer);
225    
226     buffer = new_buffer;
227     dbuffer = new_dbuffer;
228     pbuffer = new_pbuffer;
229     }
230     }
231    
232     return NULL; /* Control never gets here */
233     }
234    
235    
236    
237    
238    
239    
240    
241     /*************************************************
242 nigel 63 * Read number from string *
243     *************************************************/
244    
245     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
246     around with conditional compilation, just do the job by hand. It is only used
247 nigel 93 for unpicking arguments, so just keep it simple.
248 nigel 63
249     Arguments:
250     str string to be converted
251     endptr where to put the end pointer
252    
253     Returns: the unsigned long
254     */
255    
256     static int
257     get_value(unsigned char *str, unsigned char **endptr)
258     {
259     int result = 0;
260     while(*str != 0 && isspace(*str)) str++;
261     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
262     *endptr = str;
263     return(result);
264     }
265    
266    
267    
268 nigel 49
269     /*************************************************
270     * Convert UTF-8 string to value *
271     *************************************************/
272    
273     /* This function takes one or more bytes that represents a UTF-8 character,
274     and returns the value of the character.
275    
276     Argument:
277 nigel 91 utf8bytes a pointer to the byte vector
278     vptr a pointer to an int to receive the value
279 nigel 49
280 nigel 91 Returns: > 0 => the number of bytes consumed
281     -6 to 0 => malformed UTF-8 character at offset = (-return)
282 nigel 49 */
283    
284 nigel 79 #if !defined NOUTF8
285    
286 nigel 67 static int
287 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
288 nigel 49 {
289 nigel 91 int c = *utf8bytes++;
290 nigel 49 int d = c;
291     int i, j, s;
292    
293     for (i = -1; i < 6; i++) /* i is number of additional bytes */
294     {
295     if ((d & 0x80) == 0) break;
296     d <<= 1;
297     }
298    
299     if (i == -1) { *vptr = c; return 1; } /* ascii character */
300     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
301    
302     /* i now has a value in the range 1-5 */
303    
304 nigel 59 s = 6*i;
305 nigel 85 d = (c & utf8_table3[i]) << s;
306 nigel 49
307     for (j = 0; j < i; j++)
308     {
309 nigel 91 c = *utf8bytes++;
310 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
311 nigel 59 s -= 6;
312 nigel 49 d |= (c & 0x3f) << s;
313     }
314    
315     /* Check that encoding was the correct unique one */
316    
317 nigel 85 for (j = 0; j < utf8_table1_size; j++)
318     if (d <= utf8_table1[j]) break;
319 nigel 49 if (j != i) return -(i+1);
320    
321     /* Valid value */
322    
323     *vptr = d;
324     return i+1;
325     }
326    
327 nigel 79 #endif
328 nigel 49
329    
330 nigel 79
331 nigel 63 /*************************************************
332 nigel 85 * Convert character value to UTF-8 *
333     *************************************************/
334    
335     /* This function takes an integer value in the range 0 - 0x7fffffff
336     and encodes it as a UTF-8 character in 0 to 6 bytes.
337    
338     Arguments:
339     cvalue the character value
340 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
341 nigel 85
342     Returns: number of characters placed in the buffer
343     */
344    
345 nigel 93 #if !defined NOUTF8
346    
347 nigel 85 static int
348 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
349 nigel 85 {
350     register int i, j;
351     for (i = 0; i < utf8_table1_size; i++)
352     if (cvalue <= utf8_table1[i]) break;
353 nigel 91 utf8bytes += i;
354 nigel 85 for (j = i; j > 0; j--)
355     {
356 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
357 nigel 85 cvalue >>= 6;
358     }
359 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
360 nigel 85 return i + 1;
361     }
362    
363 nigel 93 #endif
364 nigel 85
365    
366 nigel 93
367 nigel 85 /*************************************************
368 nigel 63 * Print character string *
369     *************************************************/
370 nigel 49
371 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
372     mode. Yields number of characters printed. If handed a NULL file, just counts
373     chars without printing. */
374 nigel 49
375 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
376 nigel 3 {
377 nigel 85 int c = 0;
378 nigel 63 int yield = 0;
379 nigel 3
380 nigel 63 while (length-- > 0)
381 nigel 3 {
382 nigel 79 #if !defined NOUTF8
383 nigel 67 if (use_utf8)
384 nigel 63 {
385     int rc = utf82ord(p, &c);
386 nigel 3
387 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
388     {
389     length -= rc - 1;
390     p += rc;
391 nigel 93 if (PRINTHEX(c))
392 nigel 63 {
393     if (f != NULL) fprintf(f, "%c", c);
394     yield++;
395     }
396     else
397     {
398 nigel 93 int n = 4;
399     if (f != NULL) fprintf(f, "\\x{%02x}", c);
400     yield += (n <= 0x000000ff)? 2 :
401     (n <= 0x00000fff)? 3 :
402     (n <= 0x0000ffff)? 4 :
403     (n <= 0x000fffff)? 5 : 6;
404 nigel 63 }
405     continue;
406     }
407     }
408 nigel 79 #endif
409 nigel 3
410 nigel 63 /* Not UTF-8, or malformed UTF-8 */
411    
412 nigel 93 c = *p++;
413     if (PRINTHEX(c))
414 nigel 3 {
415 nigel 63 if (f != NULL) fprintf(f, "%c", c);
416     yield++;
417 nigel 3 }
418 nigel 63 else
419 nigel 3 {
420 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
421     yield += 4;
422     }
423     }
424 nigel 3
425 nigel 63 return yield;
426     }
427 nigel 23
428 nigel 3
429 nigel 23
430 nigel 63 /*************************************************
431     * Callout function *
432     *************************************************/
433 nigel 3
434 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
435     the match. Yield zero unless more callouts than the fail count, or the callout
436     data is not zero. */
437 nigel 3
438 nigel 63 static int callout(pcre_callout_block *cb)
439     {
440     FILE *f = (first_callout | callout_extra)? outfile : NULL;
441 nigel 75 int i, pre_start, post_start, subject_length;
442 nigel 3
443 nigel 63 if (callout_extra)
444     {
445     fprintf(f, "Callout %d: last capture = %d\n",
446     cb->callout_number, cb->capture_last);
447 nigel 3
448 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
449     {
450     if (cb->offset_vector[i] < 0)
451     fprintf(f, "%2d: <unset>\n", i/2);
452     else
453     {
454     fprintf(f, "%2d: ", i/2);
455     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
456     cb->offset_vector[i+1] - cb->offset_vector[i], f);
457     fprintf(f, "\n");
458     }
459     }
460     }
461 nigel 3
462 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
463     datails. On subsequent calls in the same match, we use pchars just to find the
464     printed lengths of the substrings. */
465 nigel 3
466 nigel 63 if (f != NULL) fprintf(f, "--->");
467 nigel 3
468 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
469     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
470     cb->current_position - cb->start_match, f);
471 nigel 3
472 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
473    
474 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
475     cb->subject_length - cb->current_position, f);
476 nigel 3
477 nigel 63 if (f != NULL) fprintf(f, "\n");
478 nigel 9
479 nigel 63 /* Always print appropriate indicators, with callout number if not already
480 nigel 75 shown. For automatic callouts, show the pattern offset. */
481 nigel 3
482 nigel 75 if (cb->callout_number == 255)
483     {
484     fprintf(outfile, "%+3d ", cb->pattern_position);
485     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
486     }
487     else
488     {
489     if (callout_extra) fprintf(outfile, " ");
490     else fprintf(outfile, "%3d ", cb->callout_number);
491     }
492 nigel 3
493 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
494     fprintf(outfile, "^");
495 nigel 3
496 nigel 63 if (post_start > 0)
497     {
498     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
499     fprintf(outfile, "^");
500 nigel 3 }
501    
502 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
503     fprintf(outfile, " ");
504    
505     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
506     pbuffer + cb->pattern_position);
507    
508 nigel 63 fprintf(outfile, "\n");
509     first_callout = 0;
510 nigel 3
511 nigel 71 if (cb->callout_data != NULL)
512 nigel 49 {
513 nigel 71 int callout_data = *((int *)(cb->callout_data));
514     if (callout_data != 0)
515     {
516     fprintf(outfile, "Callout data = %d\n", callout_data);
517     return callout_data;
518     }
519 nigel 63 }
520 nigel 49
521 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
522     (++callout_count >= callout_fail_count)? 1 : 0;
523 nigel 3 }
524    
525    
526 nigel 63 /*************************************************
527 nigel 73 * Local malloc functions *
528 nigel 63 *************************************************/
529 nigel 3
530     /* Alternative malloc function, to test functionality and show the size of the
531     compiled re. */
532    
533     static void *new_malloc(size_t size)
534     {
535 nigel 73 void *block = malloc(size);
536 nigel 43 gotten_store = size;
537 nigel 73 if (show_malloc)
538 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
539 nigel 73 return block;
540 nigel 3 }
541    
542 nigel 73 static void new_free(void *block)
543     {
544     if (show_malloc)
545     fprintf(outfile, "free %p\n", block);
546     free(block);
547     }
548 nigel 3
549    
550 nigel 73 /* For recursion malloc/free, to test stacking calls */
551    
552     static void *stack_malloc(size_t size)
553     {
554     void *block = malloc(size);
555     if (show_malloc)
556 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
557 nigel 73 return block;
558     }
559    
560     static void stack_free(void *block)
561     {
562     if (show_malloc)
563     fprintf(outfile, "stack_free %p\n", block);
564     free(block);
565     }
566    
567    
568 nigel 63 /*************************************************
569     * Call pcre_fullinfo() *
570     *************************************************/
571 nigel 43
572     /* Get one piece of information from the pcre_fullinfo() function */
573    
574     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
575     {
576     int rc;
577     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
578     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
579     }
580    
581    
582    
583 nigel 63 /*************************************************
584 nigel 75 * Byte flipping function *
585     *************************************************/
586    
587 nigel 91 static unsigned long int
588     byteflip(unsigned long int value, int n)
589 nigel 75 {
590     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
591     return ((value & 0x000000ff) << 24) |
592     ((value & 0x0000ff00) << 8) |
593     ((value & 0x00ff0000) >> 8) |
594     ((value & 0xff000000) >> 24);
595     }
596    
597    
598    
599    
600     /*************************************************
601 nigel 87 * Check match or recursion limit *
602     *************************************************/
603    
604     static int
605     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
606     int start_offset, int options, int *use_offsets, int use_size_offsets,
607     int flag, unsigned long int *limit, int errnumber, const char *msg)
608     {
609     int count;
610     int min = 0;
611     int mid = 64;
612     int max = -1;
613    
614     extra->flags |= flag;
615    
616     for (;;)
617     {
618     *limit = mid;
619    
620     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
621     use_offsets, use_size_offsets);
622    
623     if (count == errnumber)
624     {
625     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
626     min = mid;
627     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
628     }
629    
630     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
631     count == PCRE_ERROR_PARTIAL)
632     {
633     if (mid == min + 1)
634     {
635     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
636     break;
637     }
638     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
639     max = mid;
640     mid = (min + mid)/2;
641     }
642     else break; /* Some other error */
643     }
644    
645     extra->flags &= ~flag;
646     return count;
647     }
648    
649    
650    
651     /*************************************************
652 nigel 91 * Check newline indicator *
653     *************************************************/
654    
655     /* This is used both at compile and run-time to check for <xxx> escapes, where
656 nigel 93 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
657 nigel 91
658     Arguments:
659     p points after the leading '<'
660     f file for error message
661    
662     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
663     */
664    
665     static int
666     check_newline(uschar *p, FILE *f)
667     {
668     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
669     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
670     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
671 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
672 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
673     return 0;
674     }
675    
676    
677    
678     /*************************************************
679 nigel 93 * Usage function *
680     *************************************************/
681    
682     static void
683     usage(void)
684     {
685     printf("Usage: pcretest [options] [<input> [<output>]]\n");
686     printf(" -b show compiled code (bytecode)\n");
687     printf(" -C show PCRE compile-time options and exit\n");
688     printf(" -d debug: show compiled code and information (-b and -i)\n");
689     #if !defined NODFA
690     printf(" -dfa force DFA matching for all subjects\n");
691     #endif
692     printf(" -help show usage information\n");
693     printf(" -i show information about compiled patterns\n"
694     " -m output memory used information\n"
695     " -o <n> set size of offsets vector to <n>\n");
696     #if !defined NOPOSIX
697     printf(" -p use POSIX interface\n");
698     #endif
699     printf(" -q quiet: do not output PCRE version number at start\n");
700     printf(" -S <n> set stack size to <n> megabytes\n");
701     printf(" -s output store (memory) used information\n"
702     " -t time compilation and execution\n");
703     printf(" -t <n> time compilation and execution, repeating <n> times\n");
704     printf(" -tm time execution (matching) only\n");
705     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
706     }
707    
708    
709    
710     /*************************************************
711 nigel 63 * Main Program *
712     *************************************************/
713 nigel 43
714 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
715     consist of a regular expression, in delimiters and optionally followed by
716     options, followed by a set of test data, terminated by an empty line. */
717    
718     int main(int argc, char **argv)
719     {
720     FILE *infile = stdin;
721     int options = 0;
722     int study_options = 0;
723     int op = 1;
724     int timeit = 0;
725 nigel 93 int timeitm = 0;
726 nigel 3 int showinfo = 0;
727 nigel 31 int showstore = 0;
728 nigel 87 int quiet = 0;
729 nigel 53 int size_offsets = 45;
730     int size_offsets_max;
731 nigel 77 int *offsets = NULL;
732 nigel 53 #if !defined NOPOSIX
733 nigel 3 int posix = 0;
734 nigel 53 #endif
735 nigel 3 int debug = 0;
736 nigel 11 int done = 0;
737 nigel 77 int all_use_dfa = 0;
738     int yield = 0;
739 nigel 91 int stack_size;
740 nigel 3
741 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
742     that 1024 is plenty long enough for the few names we'll be testing. */
743 nigel 69
744 nigel 91 uschar copynames[1024];
745     uschar getnames[1024];
746    
747     uschar *copynamesptr;
748     uschar *getnamesptr;
749    
750 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
751 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
752 nigel 69
753 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
754     dbuffer = (unsigned char *)malloc(buffer_size);
755     pbuffer = (unsigned char *)malloc(buffer_size);
756 nigel 69
757 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
758 nigel 3
759 nigel 93 outfile = stdout;
760    
761     /* The following _setmode() stuff is some Windows magic that tells its runtime
762     library to translate CRLF into a single LF character. At least, that's what
763     I've been told: never having used Windows I take this all on trust. Originally
764     it set 0x8000, but then I was advised that _O_BINARY was better. */
765    
766 nigel 75 #if defined(_WIN32) || defined(WIN32)
767 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
768     #endif
769 nigel 75
770 nigel 3 /* Scan options */
771    
772     while (argc > 1 && argv[op][0] == '-')
773     {
774 nigel 63 unsigned char *endptr;
775 nigel 53
776 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
777     showstore = 1;
778 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
779 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
780 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
781     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
782 nigel 79 #if !defined NODFA
783 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
784 nigel 79 #endif
785 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
786 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
787     *endptr == 0))
788 nigel 53 {
789     op++;
790     argc--;
791     }
792 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
793     {
794     int both = argv[op][2] == 0;
795     int temp;
796     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
797     *endptr == 0))
798     {
799     timeitm = temp;
800     op++;
801     argc--;
802     }
803     else timeitm = LOOPREPEAT;
804     if (both) timeit = timeitm;
805     }
806 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
807     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
808     *endptr == 0))
809     {
810 nigel 93 #if defined(_WIN32) || defined(WIN32)
811 nigel 91 printf("PCRE: -S not supported on this OS\n");
812     exit(1);
813     #else
814     int rc;
815     struct rlimit rlim;
816     getrlimit(RLIMIT_STACK, &rlim);
817     rlim.rlim_cur = stack_size * 1024 * 1024;
818     rc = setrlimit(RLIMIT_STACK, &rlim);
819     if (rc != 0)
820     {
821     printf("PCRE: setrlimit() failed with error %d\n", rc);
822     exit(1);
823     }
824     op++;
825     argc--;
826     #endif
827     }
828 nigel 53 #if !defined NOPOSIX
829 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
830 nigel 53 #endif
831 nigel 63 else if (strcmp(argv[op], "-C") == 0)
832     {
833     int rc;
834     printf("PCRE version %s\n", pcre_version());
835     printf("Compiled with\n");
836     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
837     printf(" %sUTF-8 support\n", rc? "" : "No ");
838 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
839     printf(" %sUnicode properties support\n", rc? "" : "No ");
840 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
841 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
842 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
843     (rc == -1)? "ANY" : "???");
844 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
845     printf(" Internal link size = %d\n", rc);
846     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
847     printf(" POSIX malloc threshold = %d\n", rc);
848     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
849     printf(" Default match limit = %d\n", rc);
850 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
851     printf(" Default recursion depth limit = %d\n", rc);
852 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
853     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
854 nigel 63 exit(0);
855     }
856 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
857     strcmp(argv[op], "--help") == 0)
858     {
859     usage();
860     goto EXIT;
861     }
862 nigel 3 else
863     {
864 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
865 nigel 93 usage();
866 nigel 77 yield = 1;
867     goto EXIT;
868 nigel 3 }
869     op++;
870     argc--;
871     }
872    
873 nigel 53 /* Get the store for the offsets vector, and remember what it was */
874    
875     size_offsets_max = size_offsets;
876 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
877 nigel 53 if (offsets == NULL)
878     {
879     printf("** Failed to get %d bytes of memory for offsets vector\n",
880     size_offsets_max * sizeof(int));
881 nigel 77 yield = 1;
882     goto EXIT;
883 nigel 53 }
884    
885 nigel 3 /* Sort out the input and output files */
886    
887     if (argc > 1)
888     {
889 nigel 93 infile = fopen(argv[op], INPUT_MODE);
890 nigel 3 if (infile == NULL)
891     {
892     printf("** Failed to open %s\n", argv[op]);
893 nigel 77 yield = 1;
894     goto EXIT;
895 nigel 3 }
896     }
897    
898     if (argc > 2)
899     {
900 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
901 nigel 3 if (outfile == NULL)
902     {
903     printf("** Failed to open %s\n", argv[op+1]);
904 nigel 77 yield = 1;
905     goto EXIT;
906 nigel 3 }
907     }
908    
909     /* Set alternative malloc function */
910    
911     pcre_malloc = new_malloc;
912 nigel 73 pcre_free = new_free;
913     pcre_stack_malloc = stack_malloc;
914     pcre_stack_free = stack_free;
915 nigel 3
916 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
917 nigel 3
918 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
919 nigel 3
920     /* Main loop */
921    
922 nigel 11 while (!done)
923 nigel 3 {
924     pcre *re = NULL;
925     pcre_extra *extra = NULL;
926 nigel 37
927     #if !defined NOPOSIX /* There are still compilers that require no indent */
928 nigel 3 regex_t preg;
929 nigel 45 int do_posix = 0;
930 nigel 37 #endif
931    
932 nigel 7 const char *error;
933 nigel 25 unsigned char *p, *pp, *ppp;
934 nigel 75 unsigned char *to_file = NULL;
935 nigel 53 const unsigned char *tables = NULL;
936 nigel 75 unsigned long int true_size, true_study_size = 0;
937     size_t size, regex_gotten_store;
938 nigel 3 int do_study = 0;
939 nigel 25 int do_debug = debug;
940 nigel 35 int do_G = 0;
941     int do_g = 0;
942 nigel 25 int do_showinfo = showinfo;
943 nigel 35 int do_showrest = 0;
944 nigel 75 int do_flip = 0;
945 nigel 93 int erroroffset, len, delimiter, poffset;
946 nigel 3
947 nigel 67 use_utf8 = 0;
948 nigel 63
949 nigel 3 if (infile == stdin) printf(" re> ");
950 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
951 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
952 nigel 63 fflush(outfile);
953 nigel 3
954     p = buffer;
955     while (isspace(*p)) p++;
956     if (*p == 0) continue;
957    
958 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
959 nigel 3
960 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
961     {
962 nigel 91 unsigned long int magic, get_options;
963 nigel 75 uschar sbuf[8];
964     FILE *f;
965    
966     p++;
967     pp = p + (int)strlen((char *)p);
968     while (isspace(pp[-1])) pp--;
969     *pp = 0;
970    
971     f = fopen((char *)p, "rb");
972     if (f == NULL)
973     {
974     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
975     continue;
976     }
977    
978     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
979    
980     true_size =
981     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
982     true_study_size =
983     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
984    
985     re = (real_pcre *)new_malloc(true_size);
986     regex_gotten_store = gotten_store;
987    
988     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
989    
990     magic = ((real_pcre *)re)->magic_number;
991     if (magic != MAGIC_NUMBER)
992     {
993     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
994     {
995     do_flip = 1;
996     }
997     else
998     {
999     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1000     fclose(f);
1001     continue;
1002     }
1003     }
1004    
1005     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1006     do_flip? " (byte-inverted)" : "", p);
1007    
1008     /* Need to know if UTF-8 for printing data strings */
1009    
1010 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1011     use_utf8 = (get_options & PCRE_UTF8) != 0;
1012 nigel 75
1013     /* Now see if there is any following study data */
1014    
1015     if (true_study_size != 0)
1016     {
1017     pcre_study_data *psd;
1018    
1019     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1020     extra->flags = PCRE_EXTRA_STUDY_DATA;
1021    
1022     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1023     extra->study_data = psd;
1024    
1025     if (fread(psd, 1, true_study_size, f) != true_study_size)
1026     {
1027     FAIL_READ:
1028     fprintf(outfile, "Failed to read data from %s\n", p);
1029     if (extra != NULL) new_free(extra);
1030     if (re != NULL) new_free(re);
1031     fclose(f);
1032     continue;
1033     }
1034     fprintf(outfile, "Study data loaded from %s\n", p);
1035     do_study = 1; /* To get the data output if requested */
1036     }
1037     else fprintf(outfile, "No study data\n");
1038    
1039     fclose(f);
1040     goto SHOW_INFO;
1041     }
1042    
1043     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1044     the pattern; if is isn't complete, read more. */
1045    
1046 nigel 3 delimiter = *p++;
1047    
1048 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1049 nigel 3 {
1050 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1051 nigel 3 goto SKIP_DATA;
1052     }
1053    
1054     pp = p;
1055 nigel 93 poffset = p - buffer;
1056 nigel 3
1057     for(;;)
1058     {
1059 nigel 29 while (*pp != 0)
1060     {
1061     if (*pp == '\\' && pp[1] != 0) pp++;
1062     else if (*pp == delimiter) break;
1063     pp++;
1064     }
1065 nigel 3 if (*pp != 0) break;
1066     if (infile == stdin) printf(" > ");
1067 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1068 nigel 3 {
1069     fprintf(outfile, "** Unexpected EOF\n");
1070 nigel 11 done = 1;
1071     goto CONTINUE;
1072 nigel 3 }
1073 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1074 nigel 3 }
1075    
1076 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1077     pointer to the correct relative point in the buffer. */
1078    
1079     p = buffer + poffset;
1080    
1081 nigel 29 /* If the first character after the delimiter is backslash, make
1082     the pattern end with backslash. This is purely to provide a way
1083     of testing for the error message when a pattern ends with backslash. */
1084    
1085     if (pp[1] == '\\') *pp++ = '\\';
1086    
1087 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1088     for callouts. */
1089 nigel 3
1090     *pp++ = 0;
1091 nigel 75 strcpy((char *)pbuffer, (char *)p);
1092 nigel 3
1093     /* Look for options after final delimiter */
1094    
1095     options = 0;
1096     study_options = 0;
1097 nigel 31 log_store = showstore; /* default from command line */
1098    
1099 nigel 3 while (*pp != 0)
1100     {
1101     switch (*pp++)
1102     {
1103 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1104 nigel 35 case 'g': do_g = 1; break;
1105 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1106     case 'm': options |= PCRE_MULTILINE; break;
1107     case 's': options |= PCRE_DOTALL; break;
1108     case 'x': options |= PCRE_EXTENDED; break;
1109 nigel 25
1110 nigel 35 case '+': do_showrest = 1; break;
1111 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1112 nigel 93 case 'B': do_debug = 1; break;
1113 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1114 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1115 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1116 nigel 75 case 'F': do_flip = 1; break;
1117 nigel 35 case 'G': do_G = 1; break;
1118 nigel 25 case 'I': do_showinfo = 1; break;
1119 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1120 nigel 31 case 'M': log_store = 1; break;
1121 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1122 nigel 37
1123     #if !defined NOPOSIX
1124 nigel 3 case 'P': do_posix = 1; break;
1125 nigel 37 #endif
1126    
1127 nigel 3 case 'S': do_study = 1; break;
1128 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1129 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1130 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1131 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1132 nigel 25
1133     case 'L':
1134     ppp = pp;
1135 nigel 93 /* The '\r' test here is so that it works on Windows. */
1136     /* The '0' test is just in case this is an unterminated line. */
1137     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1138 nigel 25 *ppp = 0;
1139     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1140     {
1141     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1142     goto SKIP_DATA;
1143     }
1144 nigel 93 locale_set = 1;
1145 nigel 25 tables = pcre_maketables();
1146     pp = ppp;
1147     break;
1148    
1149 nigel 75 case '>':
1150     to_file = pp;
1151     while (*pp != 0) pp++;
1152     while (isspace(pp[-1])) pp--;
1153     *pp = 0;
1154     break;
1155    
1156 nigel 91 case '<':
1157     {
1158     int x = check_newline(pp, outfile);
1159     if (x == 0) goto SKIP_DATA;
1160     options |= x;
1161     while (*pp++ != '>');
1162     }
1163     break;
1164    
1165 nigel 77 case '\r': /* So that it works in Windows */
1166     case '\n':
1167     case ' ':
1168     break;
1169 nigel 75
1170 nigel 3 default:
1171     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1172     goto SKIP_DATA;
1173     }
1174     }
1175    
1176 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1177 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1178     local character tables. */
1179 nigel 3
1180 nigel 37 #if !defined NOPOSIX
1181 nigel 3 if (posix || do_posix)
1182     {
1183     int rc;
1184     int cflags = 0;
1185 nigel 75
1186 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1187     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1188 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1189 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1190     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1191    
1192 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1193    
1194     /* Compilation failed; go back for another re, skipping to blank line
1195     if non-interactive. */
1196    
1197     if (rc != 0)
1198     {
1199 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1200 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1201     goto SKIP_DATA;
1202     }
1203     }
1204    
1205     /* Handle compiling via the native interface */
1206    
1207     else
1208 nigel 37 #endif /* !defined NOPOSIX */
1209    
1210 nigel 3 {
1211 nigel 93 if (timeit > 0)
1212 nigel 3 {
1213     register int i;
1214     clock_t time_taken;
1215     clock_t start_time = clock();
1216 nigel 93 for (i = 0; i < timeit; i++)
1217 nigel 3 {
1218 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1219 nigel 3 if (re != NULL) free(re);
1220     }
1221     time_taken = clock() - start_time;
1222 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1223     (((double)time_taken * 1000.0) / (double)timeit) /
1224 nigel 63 (double)CLOCKS_PER_SEC);
1225 nigel 3 }
1226    
1227 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1228 nigel 3
1229     /* Compilation failed; go back for another re, skipping to blank line
1230     if non-interactive. */
1231    
1232     if (re == NULL)
1233     {
1234     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1235     SKIP_DATA:
1236     if (infile != stdin)
1237     {
1238     for (;;)
1239     {
1240 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1241 nigel 11 {
1242     done = 1;
1243     goto CONTINUE;
1244     }
1245 nigel 3 len = (int)strlen((char *)buffer);
1246     while (len > 0 && isspace(buffer[len-1])) len--;
1247     if (len == 0) break;
1248     }
1249     fprintf(outfile, "\n");
1250     }
1251 nigel 25 goto CONTINUE;
1252 nigel 3 }
1253    
1254 nigel 43 /* Compilation succeeded; print data if required. There are now two
1255     info-returning functions. The old one has a limited interface and
1256     returns only limited data. Check that it agrees with the newer one. */
1257 nigel 3
1258 nigel 63 if (log_store)
1259     fprintf(outfile, "Memory allocation (code space): %d\n",
1260     (int)(gotten_store -
1261     sizeof(real_pcre) -
1262     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1263    
1264 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1265     and remember the store that was got. */
1266    
1267     true_size = ((real_pcre *)re)->size;
1268     regex_gotten_store = gotten_store;
1269    
1270     /* If /S was present, study the regexp to generate additional info to
1271     help with the matching. */
1272    
1273     if (do_study)
1274     {
1275 nigel 93 if (timeit > 0)
1276 nigel 75 {
1277     register int i;
1278     clock_t time_taken;
1279     clock_t start_time = clock();
1280 nigel 93 for (i = 0; i < timeit; i++)
1281 nigel 75 extra = pcre_study(re, study_options, &error);
1282     time_taken = clock() - start_time;
1283     if (extra != NULL) free(extra);
1284 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1285     (((double)time_taken * 1000.0) / (double)timeit) /
1286 nigel 75 (double)CLOCKS_PER_SEC);
1287     }
1288     extra = pcre_study(re, study_options, &error);
1289     if (error != NULL)
1290     fprintf(outfile, "Failed to study: %s\n", error);
1291     else if (extra != NULL)
1292     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1293     }
1294    
1295     /* If the 'F' option was present, we flip the bytes of all the integer
1296     fields in the regex data block and the study block. This is to make it
1297     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1298     compiled on a different architecture. */
1299    
1300     if (do_flip)
1301     {
1302     real_pcre *rre = (real_pcre *)re;
1303     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1304     rre->size = byteflip(rre->size, sizeof(rre->size));
1305     rre->options = byteflip(rre->options, sizeof(rre->options));
1306     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1307     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1308     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1309     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1310     rre->name_table_offset = byteflip(rre->name_table_offset,
1311     sizeof(rre->name_table_offset));
1312     rre->name_entry_size = byteflip(rre->name_entry_size,
1313     sizeof(rre->name_entry_size));
1314     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1315    
1316     if (extra != NULL)
1317     {
1318     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1319     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1320     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1321     }
1322     }
1323    
1324     /* Extract information from the compiled data if required */
1325    
1326     SHOW_INFO:
1327    
1328 nigel 93 if (do_debug)
1329     {
1330     fprintf(outfile, "------------------------------------------------------------------\n");
1331     pcre_printint(re, outfile);
1332     }
1333    
1334 nigel 25 if (do_showinfo)
1335 nigel 3 {
1336 nigel 75 unsigned long int get_options, all_options;
1337 nigel 79 #if !defined NOINFOCHECK
1338 nigel 43 int old_first_char, old_options, old_count;
1339 nigel 79 #endif
1340 nigel 43 int count, backrefmax, first_char, need_char;
1341 nigel 63 int nameentrysize, namecount;
1342     const uschar *nametable;
1343 nigel 3
1344 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1345 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1346     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1347     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1348 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1349 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1350 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1351     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1352 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1353 nigel 43
1354 nigel 79 #if !defined NOINFOCHECK
1355 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1356 nigel 3 if (count < 0) fprintf(outfile,
1357 nigel 43 "Error %d from pcre_info()\n", count);
1358 nigel 3 else
1359     {
1360 nigel 43 if (old_count != count) fprintf(outfile,
1361     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1362     old_count);
1363 nigel 37
1364 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1365     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1366     first_char, old_first_char);
1367 nigel 37
1368 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1369     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1370     get_options, old_options);
1371 nigel 43 }
1372 nigel 79 #endif
1373 nigel 43
1374 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1375 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1376 nigel 77 (int)size, (int)regex_gotten_store);
1377 nigel 43
1378     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1379     if (backrefmax > 0)
1380     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1381 nigel 63
1382     if (namecount > 0)
1383     {
1384     fprintf(outfile, "Named capturing subpatterns:\n");
1385     while (namecount-- > 0)
1386     {
1387     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1388     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1389     GET2(nametable, 0));
1390     nametable += nameentrysize;
1391     }
1392     }
1393    
1394 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1395     to fish it out via out back door */
1396    
1397     all_options = ((real_pcre *)re)->options;
1398     if (do_flip)
1399     {
1400     all_options = byteflip(all_options, sizeof(all_options));
1401 nigel 91 }
1402 nigel 75
1403     if ((all_options & PCRE_NOPARTIAL) != 0)
1404     fprintf(outfile, "Partial matching not supported\n");
1405    
1406 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1407 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1408 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1409     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1410     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1411     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1412 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1413 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1414     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1415     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1416     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1417 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1418 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1419 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1420     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1421 nigel 43
1422 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1423 nigel 91 {
1424     case PCRE_NEWLINE_CR:
1425     fprintf(outfile, "Forced newline sequence: CR\n");
1426     break;
1427 nigel 43
1428 nigel 91 case PCRE_NEWLINE_LF:
1429     fprintf(outfile, "Forced newline sequence: LF\n");
1430     break;
1431    
1432     case PCRE_NEWLINE_CRLF:
1433     fprintf(outfile, "Forced newline sequence: CRLF\n");
1434     break;
1435    
1436 nigel 93 case PCRE_NEWLINE_ANY:
1437     fprintf(outfile, "Forced newline sequence: ANY\n");
1438     break;
1439    
1440 nigel 91 default:
1441     break;
1442     }
1443    
1444 nigel 43 if (first_char == -1)
1445     {
1446 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1447 nigel 43 }
1448     else if (first_char < 0)
1449     {
1450     fprintf(outfile, "No first char\n");
1451     }
1452     else
1453     {
1454 nigel 63 int ch = first_char & 255;
1455 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1456 nigel 63 "" : " (caseless)";
1457 nigel 93 if (PRINTHEX(ch))
1458 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1459 nigel 3 else
1460 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1461 nigel 43 }
1462 nigel 37
1463 nigel 43 if (need_char < 0)
1464     {
1465     fprintf(outfile, "No need char\n");
1466 nigel 3 }
1467 nigel 43 else
1468     {
1469 nigel 63 int ch = need_char & 255;
1470 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1471 nigel 63 "" : " (caseless)";
1472 nigel 93 if (PRINTHEX(ch))
1473 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1474 nigel 43 else
1475 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1476 nigel 43 }
1477 nigel 75
1478     /* Don't output study size; at present it is in any case a fixed
1479     value, but it varies, depending on the computer architecture, and
1480     so messes up the test suite. (And with the /F option, it might be
1481     flipped.) */
1482    
1483     if (do_study)
1484     {
1485     if (extra == NULL)
1486     fprintf(outfile, "Study returned NULL\n");
1487     else
1488     {
1489     uschar *start_bits = NULL;
1490     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1491    
1492     if (start_bits == NULL)
1493     fprintf(outfile, "No starting byte set\n");
1494     else
1495     {
1496     int i;
1497     int c = 24;
1498     fprintf(outfile, "Starting byte set: ");
1499     for (i = 0; i < 256; i++)
1500     {
1501     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1502     {
1503     if (c > 75)
1504     {
1505     fprintf(outfile, "\n ");
1506     c = 2;
1507     }
1508 nigel 93 if (PRINTHEX(i) && i != ' ')
1509 nigel 75 {
1510     fprintf(outfile, "%c ", i);
1511     c += 2;
1512     }
1513     else
1514     {
1515     fprintf(outfile, "\\x%02x ", i);
1516     c += 5;
1517     }
1518     }
1519     }
1520     fprintf(outfile, "\n");
1521     }
1522     }
1523     }
1524 nigel 3 }
1525    
1526 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1527     that is all. The first 8 bytes of the file are the regex length and then
1528     the study length, in big-endian order. */
1529 nigel 3
1530 nigel 75 if (to_file != NULL)
1531 nigel 3 {
1532 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1533     if (f == NULL)
1534 nigel 3 {
1535 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1536 nigel 3 }
1537 nigel 75 else
1538     {
1539     uschar sbuf[8];
1540     sbuf[0] = (true_size >> 24) & 255;
1541     sbuf[1] = (true_size >> 16) & 255;
1542     sbuf[2] = (true_size >> 8) & 255;
1543     sbuf[3] = (true_size) & 255;
1544 nigel 3
1545 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1546     sbuf[5] = (true_study_size >> 16) & 255;
1547     sbuf[6] = (true_study_size >> 8) & 255;
1548     sbuf[7] = (true_study_size) & 255;
1549 nigel 3
1550 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1551     fwrite(re, 1, true_size, f) < true_size)
1552     {
1553     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1554     }
1555 nigel 3 else
1556     {
1557 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1558     if (extra != NULL)
1559 nigel 3 {
1560 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1561     true_study_size)
1562 nigel 3 {
1563 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1564     strerror(errno));
1565 nigel 3 }
1566 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1567 nigel 93
1568 nigel 3 }
1569     }
1570 nigel 75 fclose(f);
1571 nigel 3 }
1572 nigel 77
1573     new_free(re);
1574     if (extra != NULL) new_free(extra);
1575     if (tables != NULL) new_free((void *)tables);
1576 nigel 75 continue; /* With next regex */
1577 nigel 3 }
1578 nigel 75 } /* End of non-POSIX compile */
1579 nigel 3
1580     /* Read data lines and test them */
1581    
1582     for (;;)
1583     {
1584 nigel 87 uschar *q;
1585     uschar *bptr = dbuffer;
1586 nigel 57 int *use_offsets = offsets;
1587 nigel 53 int use_size_offsets = size_offsets;
1588 nigel 63 int callout_data = 0;
1589     int callout_data_set = 0;
1590 nigel 3 int count, c;
1591 nigel 29 int copystrings = 0;
1592 nigel 63 int find_match_limit = 0;
1593 nigel 29 int getstrings = 0;
1594     int getlist = 0;
1595 nigel 39 int gmatched = 0;
1596 nigel 35 int start_offset = 0;
1597 nigel 41 int g_notempty = 0;
1598 nigel 77 int use_dfa = 0;
1599 nigel 3
1600     options = 0;
1601    
1602 nigel 91 *copynames = 0;
1603     *getnames = 0;
1604    
1605     copynamesptr = copynames;
1606     getnamesptr = getnames;
1607    
1608 nigel 63 pcre_callout = callout;
1609     first_callout = 1;
1610     callout_extra = 0;
1611     callout_count = 0;
1612     callout_fail_count = 999999;
1613     callout_fail_id = -1;
1614 nigel 73 show_malloc = 0;
1615 nigel 63
1616 nigel 91 if (extra != NULL) extra->flags &=
1617     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1618    
1619     len = 0;
1620     for (;;)
1621 nigel 11 {
1622 nigel 91 if (infile == stdin) printf("data> ");
1623     if (extend_inputline(infile, buffer + len) == NULL)
1624     {
1625     if (len > 0) break;
1626     done = 1;
1627     goto CONTINUE;
1628     }
1629     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1630     len = (int)strlen((char *)buffer);
1631     if (buffer[len-1] == '\n') break;
1632 nigel 11 }
1633 nigel 3
1634     while (len > 0 && isspace(buffer[len-1])) len--;
1635     buffer[len] = 0;
1636     if (len == 0) break;
1637    
1638     p = buffer;
1639     while (isspace(*p)) p++;
1640    
1641 nigel 9 q = dbuffer;
1642 nigel 3 while ((c = *p++) != 0)
1643     {
1644     int i = 0;
1645     int n = 0;
1646 nigel 63
1647 nigel 3 if (c == '\\') switch ((c = *p++))
1648     {
1649     case 'a': c = 7; break;
1650     case 'b': c = '\b'; break;
1651     case 'e': c = 27; break;
1652     case 'f': c = '\f'; break;
1653     case 'n': c = '\n'; break;
1654     case 'r': c = '\r'; break;
1655     case 't': c = '\t'; break;
1656     case 'v': c = '\v'; break;
1657    
1658     case '0': case '1': case '2': case '3':
1659     case '4': case '5': case '6': case '7':
1660     c -= '0';
1661     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1662     c = c * 8 + *p++ - '0';
1663 nigel 91
1664     #if !defined NOUTF8
1665     if (use_utf8 && c > 255)
1666     {
1667     unsigned char buff8[8];
1668     int ii, utn;
1669     utn = ord2utf8(c, buff8);
1670     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1671     c = buff8[ii]; /* Last byte */
1672     }
1673     #endif
1674 nigel 3 break;
1675    
1676     case 'x':
1677 nigel 49
1678     /* Handle \x{..} specially - new Perl thing for utf8 */
1679    
1680 nigel 79 #if !defined NOUTF8
1681 nigel 49 if (*p == '{')
1682     {
1683     unsigned char *pt = p;
1684     c = 0;
1685     while (isxdigit(*(++pt)))
1686     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1687     if (*pt == '}')
1688     {
1689 nigel 67 unsigned char buff8[8];
1690 nigel 49 int ii, utn;
1691 nigel 85 utn = ord2utf8(c, buff8);
1692 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1693     c = buff8[ii]; /* Last byte */
1694 nigel 49 p = pt + 1;
1695     break;
1696     }
1697     /* Not correct form; fall through */
1698     }
1699 nigel 79 #endif
1700 nigel 49
1701     /* Ordinary \x */
1702    
1703 nigel 3 c = 0;
1704     while (i++ < 2 && isxdigit(*p))
1705     {
1706     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1707     p++;
1708     }
1709     break;
1710    
1711 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1712 nigel 3 p--;
1713     continue;
1714    
1715 nigel 75 case '>':
1716     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1717     continue;
1718    
1719 nigel 3 case 'A': /* Option setting */
1720     options |= PCRE_ANCHORED;
1721     continue;
1722    
1723     case 'B':
1724     options |= PCRE_NOTBOL;
1725     continue;
1726    
1727 nigel 29 case 'C':
1728 nigel 63 if (isdigit(*p)) /* Set copy string */
1729     {
1730     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1731     copystrings |= 1 << n;
1732     }
1733     else if (isalnum(*p))
1734     {
1735 nigel 91 uschar *npp = copynamesptr;
1736 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1737 nigel 91 *npp++ = 0;
1738 nigel 67 *npp = 0;
1739 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1740 nigel 63 if (n < 0)
1741 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1742     copynamesptr = npp;
1743 nigel 63 }
1744     else if (*p == '+')
1745     {
1746     callout_extra = 1;
1747     p++;
1748     }
1749     else if (*p == '-')
1750     {
1751     pcre_callout = NULL;
1752     p++;
1753     }
1754     else if (*p == '!')
1755     {
1756     callout_fail_id = 0;
1757     p++;
1758     while(isdigit(*p))
1759     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1760     callout_fail_count = 0;
1761     if (*p == '!')
1762     {
1763     p++;
1764     while(isdigit(*p))
1765     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1766     }
1767     }
1768     else if (*p == '*')
1769     {
1770     int sign = 1;
1771     callout_data = 0;
1772     if (*(++p) == '-') { sign = -1; p++; }
1773     while(isdigit(*p))
1774     callout_data = callout_data * 10 + *p++ - '0';
1775     callout_data *= sign;
1776     callout_data_set = 1;
1777     }
1778 nigel 29 continue;
1779    
1780 nigel 79 #if !defined NODFA
1781 nigel 77 case 'D':
1782 nigel 79 #if !defined NOPOSIX
1783 nigel 77 if (posix || do_posix)
1784     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1785     else
1786 nigel 79 #endif
1787 nigel 77 use_dfa = 1;
1788     continue;
1789    
1790     case 'F':
1791     options |= PCRE_DFA_SHORTEST;
1792     continue;
1793 nigel 79 #endif
1794 nigel 77
1795 nigel 29 case 'G':
1796 nigel 63 if (isdigit(*p))
1797     {
1798     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1799     getstrings |= 1 << n;
1800     }
1801     else if (isalnum(*p))
1802     {
1803 nigel 91 uschar *npp = getnamesptr;
1804 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1805 nigel 91 *npp++ = 0;
1806 nigel 67 *npp = 0;
1807 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1808 nigel 63 if (n < 0)
1809 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1810     getnamesptr = npp;
1811 nigel 63 }
1812 nigel 29 continue;
1813    
1814     case 'L':
1815     getlist = 1;
1816     continue;
1817    
1818 nigel 63 case 'M':
1819     find_match_limit = 1;
1820     continue;
1821    
1822 nigel 37 case 'N':
1823     options |= PCRE_NOTEMPTY;
1824     continue;
1825    
1826 nigel 3 case 'O':
1827     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1828 nigel 53 if (n > size_offsets_max)
1829     {
1830     size_offsets_max = n;
1831 nigel 57 free(offsets);
1832 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1833 nigel 53 if (offsets == NULL)
1834     {
1835     printf("** Failed to get %d bytes of memory for offsets vector\n",
1836     size_offsets_max * sizeof(int));
1837 nigel 77 yield = 1;
1838     goto EXIT;
1839 nigel 53 }
1840     }
1841     use_size_offsets = n;
1842 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1843 nigel 3 continue;
1844    
1845 nigel 75 case 'P':
1846     options |= PCRE_PARTIAL;
1847     continue;
1848    
1849 nigel 91 case 'Q':
1850     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1851     if (extra == NULL)
1852     {
1853     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1854     extra->flags = 0;
1855     }
1856     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1857     extra->match_limit_recursion = n;
1858     continue;
1859    
1860     case 'q':
1861     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1862     if (extra == NULL)
1863     {
1864     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1865     extra->flags = 0;
1866     }
1867     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1868     extra->match_limit = n;
1869     continue;
1870    
1871 nigel 79 #if !defined NODFA
1872 nigel 77 case 'R':
1873     options |= PCRE_DFA_RESTART;
1874     continue;
1875 nigel 79 #endif
1876 nigel 77
1877 nigel 73 case 'S':
1878     show_malloc = 1;
1879     continue;
1880    
1881 nigel 3 case 'Z':
1882     options |= PCRE_NOTEOL;
1883     continue;
1884 nigel 71
1885     case '?':
1886     options |= PCRE_NO_UTF8_CHECK;
1887     continue;
1888 nigel 91
1889     case '<':
1890     {
1891     int x = check_newline(p, outfile);
1892     if (x == 0) goto NEXT_DATA;
1893     options |= x;
1894     while (*p++ != '>');
1895     }
1896     continue;
1897 nigel 3 }
1898 nigel 9 *q++ = c;
1899 nigel 3 }
1900 nigel 9 *q = 0;
1901     len = q - dbuffer;
1902 nigel 3
1903 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1904     {
1905     printf("**Match limit not relevant for DFA matching: ignored\n");
1906     find_match_limit = 0;
1907     }
1908    
1909 nigel 3 /* Handle matching via the POSIX interface, which does not
1910 nigel 63 support timing or playing with the match limit or callout data. */
1911 nigel 3
1912 nigel 37 #if !defined NOPOSIX
1913 nigel 3 if (posix || do_posix)
1914     {
1915     int rc;
1916     int eflags = 0;
1917 nigel 63 regmatch_t *pmatch = NULL;
1918     if (use_size_offsets > 0)
1919 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1920 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1921     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1922    
1923 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1924 nigel 3
1925     if (rc != 0)
1926     {
1927 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1928 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1929     }
1930 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1931     != 0)
1932     {
1933     fprintf(outfile, "Matched with REG_NOSUB\n");
1934     }
1935 nigel 3 else
1936     {
1937 nigel 7 size_t i;
1938 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1939 nigel 3 {
1940     if (pmatch[i].rm_so >= 0)
1941     {
1942 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1943 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1944     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1945 nigel 3 fprintf(outfile, "\n");
1946 nigel 35 if (i == 0 && do_showrest)
1947     {
1948     fprintf(outfile, " 0+ ");
1949 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1950     outfile);
1951 nigel 35 fprintf(outfile, "\n");
1952     }
1953 nigel 3 }
1954     }
1955     }
1956 nigel 53 free(pmatch);
1957 nigel 3 }
1958    
1959 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1960 nigel 3
1961 nigel 37 else
1962     #endif /* !defined NOPOSIX */
1963    
1964 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1965 nigel 3 {
1966 nigel 93 if (timeitm > 0)
1967 nigel 3 {
1968     register int i;
1969     clock_t time_taken;
1970     clock_t start_time = clock();
1971 nigel 77
1972 nigel 79 #if !defined NODFA
1973 nigel 77 if (all_use_dfa || use_dfa)
1974     {
1975     int workspace[1000];
1976 nigel 93 for (i = 0; i < timeitm; i++)
1977 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1978     options | g_notempty, use_offsets, use_size_offsets, workspace,
1979     sizeof(workspace)/sizeof(int));
1980     }
1981     else
1982 nigel 79 #endif
1983 nigel 77
1984 nigel 93 for (i = 0; i < timeitm; i++)
1985 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1986 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1987 nigel 77
1988 nigel 3 time_taken = clock() - start_time;
1989 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
1990     (((double)time_taken * 1000.0) / (double)timeitm) /
1991 nigel 63 (double)CLOCKS_PER_SEC);
1992 nigel 3 }
1993    
1994 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1995 nigel 87 varying limits in order to find the minimum value for the match limit and
1996     for the recursion limit. */
1997 nigel 63
1998     if (find_match_limit)
1999     {
2000     if (extra == NULL)
2001     {
2002 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2003 nigel 63 extra->flags = 0;
2004     }
2005    
2006 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2007 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2008     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2009     PCRE_ERROR_MATCHLIMIT, "match()");
2010 nigel 63
2011 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2012     options|g_notempty, use_offsets, use_size_offsets,
2013     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2014     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2015 nigel 63 }
2016    
2017     /* If callout_data is set, use the interface with additional data */
2018    
2019     else if (callout_data_set)
2020     {
2021     if (extra == NULL)
2022     {
2023 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024 nigel 63 extra->flags = 0;
2025     }
2026     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2027 nigel 71 extra->callout_data = &callout_data;
2028 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2029     options | g_notempty, use_offsets, use_size_offsets);
2030     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2031     }
2032    
2033     /* The normal case is just to do the match once, with the default
2034     value of match_limit. */
2035    
2036 nigel 79 #if !defined NODFA
2037 nigel 77 else if (all_use_dfa || use_dfa)
2038     {
2039     int workspace[1000];
2040     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2041     options | g_notempty, use_offsets, use_size_offsets, workspace,
2042     sizeof(workspace)/sizeof(int));
2043     if (count == 0)
2044     {
2045     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2046     count = use_size_offsets/2;
2047     }
2048     }
2049 nigel 79 #endif
2050 nigel 77
2051 nigel 75 else
2052     {
2053     count = pcre_exec(re, extra, (char *)bptr, len,
2054     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2055 nigel 77 if (count == 0)
2056     {
2057     fprintf(outfile, "Matched, but too many substrings\n");
2058     count = use_size_offsets/3;
2059     }
2060 nigel 75 }
2061 nigel 3
2062 nigel 39 /* Matched */
2063    
2064 nigel 3 if (count >= 0)
2065     {
2066 nigel 93 int i, maxcount;
2067    
2068     #if !defined NODFA
2069     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2070     #endif
2071     maxcount = use_size_offsets/3;
2072    
2073     /* This is a check against a lunatic return value. */
2074    
2075     if (count > maxcount)
2076     {
2077     fprintf(outfile,
2078     "** PCRE error: returned count %d is too big for offset size %d\n",
2079     count, use_size_offsets);
2080     count = use_size_offsets/3;
2081     if (do_g || do_G)
2082     {
2083     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2084     do_g = do_G = FALSE; /* Break g/G loop */
2085     }
2086     }
2087    
2088 nigel 29 for (i = 0; i < count * 2; i += 2)
2089 nigel 3 {
2090 nigel 57 if (use_offsets[i] < 0)
2091 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2092     else
2093     {
2094     fprintf(outfile, "%2d: ", i/2);
2095 nigel 63 (void)pchars(bptr + use_offsets[i],
2096     use_offsets[i+1] - use_offsets[i], outfile);
2097 nigel 3 fprintf(outfile, "\n");
2098 nigel 35 if (i == 0)
2099     {
2100     if (do_showrest)
2101     {
2102     fprintf(outfile, " 0+ ");
2103 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2104     outfile);
2105 nigel 35 fprintf(outfile, "\n");
2106     }
2107     }
2108 nigel 3 }
2109     }
2110 nigel 29
2111     for (i = 0; i < 32; i++)
2112     {
2113     if ((copystrings & (1 << i)) != 0)
2114     {
2115 nigel 91 char copybuffer[256];
2116 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2117 nigel 37 i, copybuffer, sizeof(copybuffer));
2118 nigel 29 if (rc < 0)
2119     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2120     else
2121 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2122 nigel 29 }
2123     }
2124    
2125 nigel 91 for (copynamesptr = copynames;
2126     *copynamesptr != 0;
2127     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2128     {
2129     char copybuffer[256];
2130     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2131     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2132     if (rc < 0)
2133     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2134     else
2135     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2136     }
2137    
2138 nigel 29 for (i = 0; i < 32; i++)
2139     {
2140     if ((getstrings & (1 << i)) != 0)
2141     {
2142     const char *substring;
2143 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2144 nigel 29 i, &substring);
2145     if (rc < 0)
2146     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2147     else
2148     {
2149     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2150 nigel 49 pcre_free_substring(substring);
2151 nigel 29 }
2152     }
2153     }
2154    
2155 nigel 91 for (getnamesptr = getnames;
2156     *getnamesptr != 0;
2157     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2158     {
2159     const char *substring;
2160     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2161     count, (char *)getnamesptr, &substring);
2162     if (rc < 0)
2163     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2164     else
2165     {
2166     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2167     pcre_free_substring(substring);
2168     }
2169     }
2170    
2171 nigel 29 if (getlist)
2172     {
2173     const char **stringlist;
2174 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2175 nigel 29 &stringlist);
2176     if (rc < 0)
2177     fprintf(outfile, "get substring list failed %d\n", rc);
2178     else
2179     {
2180     for (i = 0; i < count; i++)
2181     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2182     if (stringlist[i] != NULL)
2183     fprintf(outfile, "string list not terminated by NULL\n");
2184 nigel 49 /* free((void *)stringlist); */
2185     pcre_free_substring_list(stringlist);
2186 nigel 29 }
2187     }
2188 nigel 39 }
2189 nigel 29
2190 nigel 75 /* There was a partial match */
2191    
2192     else if (count == PCRE_ERROR_PARTIAL)
2193     {
2194 nigel 77 fprintf(outfile, "Partial match");
2195 nigel 79 #if !defined NODFA
2196 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2197     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2198     bptr + use_offsets[0]);
2199 nigel 79 #endif
2200 nigel 77 fprintf(outfile, "\n");
2201 nigel 75 break; /* Out of the /g loop */
2202     }
2203    
2204 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2205 nigel 47 g_notempty after a null match, this is not necessarily the end.
2206 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
2207     matching, the advance must be one character, not one byte. Fudge the
2208     offset values to achieve this. We won't be at the end of the string -
2209     that was checked before setting g_notempty. */
2210 nigel 39
2211 nigel 3 else
2212     {
2213 nigel 41 if (g_notempty != 0)
2214 nigel 35 {
2215 nigel 73 int onechar = 1;
2216 nigel 57 use_offsets[0] = start_offset;
2217 nigel 73 if (use_utf8)
2218     {
2219     while (start_offset + onechar < len)
2220     {
2221     int tb = bptr[start_offset+onechar];
2222     if (tb <= 127) break;
2223     tb &= 0xc0;
2224     if (tb != 0 && tb != 0xc0) onechar++;
2225     }
2226     }
2227     use_offsets[1] = start_offset + onechar;
2228 nigel 35 }
2229 nigel 41 else
2230     {
2231 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2232 nigel 41 {
2233 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2234 nigel 41 }
2235 nigel 73 else fprintf(outfile, "Error %d\n", count);
2236 nigel 41 break; /* Out of the /g loop */
2237     }
2238 nigel 3 }
2239 nigel 35
2240 nigel 39 /* If not /g or /G we are done */
2241    
2242     if (!do_g && !do_G) break;
2243    
2244 nigel 41 /* If we have matched an empty string, first check to see if we are at
2245     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2246     what Perl's /g options does. This turns out to be rather cunning. First
2247 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2248     same point. If this fails (picked up above) we advance to the next
2249     character. */
2250 nigel 39
2251 nigel 41 g_notempty = 0;
2252 nigel 57 if (use_offsets[0] == use_offsets[1])
2253 nigel 41 {
2254 nigel 57 if (use_offsets[0] == len) break;
2255 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2256 nigel 41 }
2257 nigel 39
2258     /* For /g, update the start offset, leaving the rest alone */
2259    
2260 nigel 57 if (do_g) start_offset = use_offsets[1];
2261 nigel 39
2262     /* For /G, update the pointer and length */
2263    
2264     else
2265 nigel 35 {
2266 nigel 57 bptr += use_offsets[1];
2267     len -= use_offsets[1];
2268 nigel 35 }
2269 nigel 39 } /* End of loop for /g and /G */
2270 nigel 91
2271     NEXT_DATA: continue;
2272 nigel 39 } /* End of loop for data lines */
2273 nigel 3
2274 nigel 11 CONTINUE:
2275 nigel 37
2276     #if !defined NOPOSIX
2277 nigel 3 if (posix || do_posix) regfree(&preg);
2278 nigel 37 #endif
2279    
2280 nigel 77 if (re != NULL) new_free(re);
2281     if (extra != NULL) new_free(extra);
2282 nigel 25 if (tables != NULL)
2283     {
2284 nigel 77 new_free((void *)tables);
2285 nigel 25 setlocale(LC_CTYPE, "C");
2286 nigel 93 locale_set = 0;
2287 nigel 25 }
2288 nigel 3 }
2289    
2290 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2291 nigel 77
2292     EXIT:
2293    
2294     if (infile != NULL && infile != stdin) fclose(infile);
2295     if (outfile != NULL && outfile != stdout) fclose(outfile);
2296    
2297     free(buffer);
2298     free(dbuffer);
2299     free(pbuffer);
2300     free(offsets);
2301    
2302     return yield;
2303 nigel 3 }
2304    
2305 nigel 77 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12