/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 200 - (hide annotations) (download)
Wed Aug 1 09:10:40 2007 UTC (6 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 68669 byte(s)
Correct errors in previous patch; tidy for test release.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 199 #include <config.h>
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97     #define _pcre_OP_lengths OP_lengths
98    
99     #include "pcre_tables.c"
100    
101     /* We also need the pcre_printint() function for printing out compiled
102     patterns. This function is in a separate file so that it can be included in
103 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
104 nigel 85
105 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
106     output character as-is or as a hex value when showing compiled patterns, is
107     contained in this file. We uses it here also, in cases when the locale has not
108     been explicitly changed, so as to get consistent output from systems that
109     differ in their output from isprint() even in the "C" locale. */
110    
111 nigel 85 #include "pcre_printint.src"
112    
113 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114 nigel 85
115 nigel 93
116 nigel 37 /* It is possible to compile this test program without including support for
117     testing the POSIX interface, though this is not available via the standard
118     Makefile. */
119    
120     #if !defined NOPOSIX
121 nigel 3 #include "pcreposix.h"
122 nigel 37 #endif
123 nigel 3
124 ph10 107 /* It is also possible, for the benefit of the version currently imported into
125     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126     interface to the DFA matcher (NODFA), and without the doublecheck of the old
127     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128     UTF8 support if PCRE is built without it. */
129 nigel 79
130 ph10 107 #ifndef SUPPORT_UTF8
131     #ifndef NOUTF8
132     #define NOUTF8
133     #endif
134     #endif
135 nigel 79
136 ph10 107
137 nigel 85 /* Other parameters */
138    
139 nigel 3 #ifndef CLOCKS_PER_SEC
140     #ifdef CLK_TCK
141     #define CLOCKS_PER_SEC CLK_TCK
142     #else
143     #define CLOCKS_PER_SEC 100
144     #endif
145     #endif
146    
147 nigel 93 /* This is the default loop count for timing. */
148    
149 nigel 75 #define LOOPREPEAT 500000
150 nigel 3
151 nigel 85 /* Static variables */
152    
153 nigel 3 static FILE *outfile;
154     static int log_store = 0;
155 nigel 63 static int callout_count;
156     static int callout_extra;
157     static int callout_fail_count;
158     static int callout_fail_id;
159     static int first_callout;
160 nigel 93 static int locale_set = 0;
161 nigel 73 static int show_malloc;
162 nigel 67 static int use_utf8;
163 nigel 43 static size_t gotten_store;
164 nigel 3
165 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
166    
167     static int buffer_size = 50000;
168     static uschar *buffer = NULL;
169     static uschar *dbuffer = NULL;
170 nigel 75 static uschar *pbuffer = NULL;
171 nigel 3
172 nigel 75
173 nigel 49
174     /*************************************************
175 nigel 91 * Read or extend an input line *
176     *************************************************/
177    
178     /* Input lines are read into buffer, but both patterns and data lines can be
179     continued over multiple input lines. In addition, if the buffer fills up, we
180     want to automatically expand it so as to be able to handle extremely large
181     lines that are needed for certain stress tests. When the input buffer is
182     expanded, the other two buffers must also be expanded likewise, and the
183     contents of pbuffer, which are a copy of the input for callouts, must be
184     preserved (for when expansion happens for a data line). This is not the most
185     optimal way of handling this, but hey, this is just a test program!
186    
187     Arguments:
188     f the file to read
189     start where in buffer to start (this *must* be within buffer)
190    
191     Returns: pointer to the start of new data
192     could be a copy of start, or could be moved
193     NULL if no data read and EOF reached
194     */
195    
196     static uschar *
197     extend_inputline(FILE *f, uschar *start)
198     {
199     uschar *here = start;
200    
201     for (;;)
202     {
203     int rlen = buffer_size - (here - buffer);
204 nigel 93
205 nigel 91 if (rlen > 1000)
206     {
207     int dlen;
208     if (fgets((char *)here, rlen, f) == NULL)
209     return (here == start)? NULL : start;
210     dlen = (int)strlen((char *)here);
211     if (dlen > 0 && here[dlen - 1] == '\n') return start;
212     here += dlen;
213     }
214    
215     else
216     {
217     int new_buffer_size = 2*buffer_size;
218     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
219     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
220     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
221    
222     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
223     {
224     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
225     exit(1);
226     }
227    
228     memcpy(new_buffer, buffer, buffer_size);
229     memcpy(new_pbuffer, pbuffer, buffer_size);
230    
231     buffer_size = new_buffer_size;
232    
233     start = new_buffer + (start - buffer);
234     here = new_buffer + (here - buffer);
235    
236     free(buffer);
237     free(dbuffer);
238     free(pbuffer);
239    
240     buffer = new_buffer;
241     dbuffer = new_dbuffer;
242     pbuffer = new_pbuffer;
243     }
244     }
245    
246     return NULL; /* Control never gets here */
247     }
248    
249    
250    
251    
252    
253    
254    
255     /*************************************************
256 nigel 63 * Read number from string *
257     *************************************************/
258    
259     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
260     around with conditional compilation, just do the job by hand. It is only used
261 nigel 93 for unpicking arguments, so just keep it simple.
262 nigel 63
263     Arguments:
264     str string to be converted
265     endptr where to put the end pointer
266    
267     Returns: the unsigned long
268     */
269    
270     static int
271     get_value(unsigned char *str, unsigned char **endptr)
272     {
273     int result = 0;
274     while(*str != 0 && isspace(*str)) str++;
275     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
276     *endptr = str;
277     return(result);
278     }
279    
280    
281    
282 nigel 49
283     /*************************************************
284     * Convert UTF-8 string to value *
285     *************************************************/
286    
287     /* This function takes one or more bytes that represents a UTF-8 character,
288     and returns the value of the character.
289    
290     Argument:
291 nigel 91 utf8bytes a pointer to the byte vector
292     vptr a pointer to an int to receive the value
293 nigel 49
294 nigel 91 Returns: > 0 => the number of bytes consumed
295     -6 to 0 => malformed UTF-8 character at offset = (-return)
296 nigel 49 */
297    
298 nigel 79 #if !defined NOUTF8
299    
300 nigel 67 static int
301 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
302 nigel 49 {
303 nigel 91 int c = *utf8bytes++;
304 nigel 49 int d = c;
305     int i, j, s;
306    
307     for (i = -1; i < 6; i++) /* i is number of additional bytes */
308     {
309     if ((d & 0x80) == 0) break;
310     d <<= 1;
311     }
312    
313     if (i == -1) { *vptr = c; return 1; } /* ascii character */
314     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
315    
316     /* i now has a value in the range 1-5 */
317    
318 nigel 59 s = 6*i;
319 nigel 85 d = (c & utf8_table3[i]) << s;
320 nigel 49
321     for (j = 0; j < i; j++)
322     {
323 nigel 91 c = *utf8bytes++;
324 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
325 nigel 59 s -= 6;
326 nigel 49 d |= (c & 0x3f) << s;
327     }
328    
329     /* Check that encoding was the correct unique one */
330    
331 nigel 85 for (j = 0; j < utf8_table1_size; j++)
332     if (d <= utf8_table1[j]) break;
333 nigel 49 if (j != i) return -(i+1);
334    
335     /* Valid value */
336    
337     *vptr = d;
338     return i+1;
339     }
340    
341 nigel 79 #endif
342 nigel 49
343    
344 nigel 79
345 nigel 63 /*************************************************
346 nigel 85 * Convert character value to UTF-8 *
347     *************************************************/
348    
349     /* This function takes an integer value in the range 0 - 0x7fffffff
350     and encodes it as a UTF-8 character in 0 to 6 bytes.
351    
352     Arguments:
353     cvalue the character value
354 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
355 nigel 85
356     Returns: number of characters placed in the buffer
357     */
358    
359 nigel 93 #if !defined NOUTF8
360    
361 nigel 85 static int
362 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
363 nigel 85 {
364     register int i, j;
365     for (i = 0; i < utf8_table1_size; i++)
366     if (cvalue <= utf8_table1[i]) break;
367 nigel 91 utf8bytes += i;
368 nigel 85 for (j = i; j > 0; j--)
369     {
370 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
371 nigel 85 cvalue >>= 6;
372     }
373 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
374 nigel 85 return i + 1;
375     }
376    
377 nigel 93 #endif
378 nigel 85
379    
380 nigel 93
381 nigel 85 /*************************************************
382 nigel 63 * Print character string *
383     *************************************************/
384 nigel 49
385 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
386     mode. Yields number of characters printed. If handed a NULL file, just counts
387     chars without printing. */
388 nigel 49
389 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
390 nigel 3 {
391 nigel 85 int c = 0;
392 nigel 63 int yield = 0;
393 nigel 3
394 nigel 63 while (length-- > 0)
395 nigel 3 {
396 nigel 79 #if !defined NOUTF8
397 nigel 67 if (use_utf8)
398 nigel 63 {
399     int rc = utf82ord(p, &c);
400 nigel 3
401 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
402     {
403     length -= rc - 1;
404     p += rc;
405 nigel 93 if (PRINTHEX(c))
406 nigel 63 {
407     if (f != NULL) fprintf(f, "%c", c);
408     yield++;
409     }
410     else
411     {
412 nigel 93 int n = 4;
413     if (f != NULL) fprintf(f, "\\x{%02x}", c);
414     yield += (n <= 0x000000ff)? 2 :
415     (n <= 0x00000fff)? 3 :
416     (n <= 0x0000ffff)? 4 :
417     (n <= 0x000fffff)? 5 : 6;
418 nigel 63 }
419     continue;
420     }
421     }
422 nigel 79 #endif
423 nigel 3
424 nigel 63 /* Not UTF-8, or malformed UTF-8 */
425    
426 nigel 93 c = *p++;
427     if (PRINTHEX(c))
428 nigel 3 {
429 nigel 63 if (f != NULL) fprintf(f, "%c", c);
430     yield++;
431 nigel 3 }
432 nigel 63 else
433 nigel 3 {
434 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
435     yield += 4;
436     }
437     }
438 nigel 3
439 nigel 63 return yield;
440     }
441 nigel 23
442 nigel 3
443 nigel 23
444 nigel 63 /*************************************************
445     * Callout function *
446     *************************************************/
447 nigel 3
448 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
449     the match. Yield zero unless more callouts than the fail count, or the callout
450     data is not zero. */
451 nigel 3
452 nigel 63 static int callout(pcre_callout_block *cb)
453     {
454     FILE *f = (first_callout | callout_extra)? outfile : NULL;
455 nigel 75 int i, pre_start, post_start, subject_length;
456 nigel 3
457 nigel 63 if (callout_extra)
458     {
459     fprintf(f, "Callout %d: last capture = %d\n",
460     cb->callout_number, cb->capture_last);
461 nigel 3
462 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
463     {
464     if (cb->offset_vector[i] < 0)
465     fprintf(f, "%2d: <unset>\n", i/2);
466     else
467     {
468     fprintf(f, "%2d: ", i/2);
469     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
470     cb->offset_vector[i+1] - cb->offset_vector[i], f);
471     fprintf(f, "\n");
472     }
473     }
474     }
475 nigel 3
476 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
477     datails. On subsequent calls in the same match, we use pchars just to find the
478     printed lengths of the substrings. */
479 nigel 3
480 nigel 63 if (f != NULL) fprintf(f, "--->");
481 nigel 3
482 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
483     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
484     cb->current_position - cb->start_match, f);
485 nigel 3
486 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
487    
488 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
489     cb->subject_length - cb->current_position, f);
490 nigel 3
491 nigel 63 if (f != NULL) fprintf(f, "\n");
492 nigel 9
493 nigel 63 /* Always print appropriate indicators, with callout number if not already
494 nigel 75 shown. For automatic callouts, show the pattern offset. */
495 nigel 3
496 nigel 75 if (cb->callout_number == 255)
497     {
498     fprintf(outfile, "%+3d ", cb->pattern_position);
499     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
500     }
501     else
502     {
503     if (callout_extra) fprintf(outfile, " ");
504     else fprintf(outfile, "%3d ", cb->callout_number);
505     }
506 nigel 3
507 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
508     fprintf(outfile, "^");
509 nigel 3
510 nigel 63 if (post_start > 0)
511     {
512     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
513     fprintf(outfile, "^");
514 nigel 3 }
515    
516 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
517     fprintf(outfile, " ");
518    
519     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
520     pbuffer + cb->pattern_position);
521    
522 nigel 63 fprintf(outfile, "\n");
523     first_callout = 0;
524 nigel 3
525 nigel 71 if (cb->callout_data != NULL)
526 nigel 49 {
527 nigel 71 int callout_data = *((int *)(cb->callout_data));
528     if (callout_data != 0)
529     {
530     fprintf(outfile, "Callout data = %d\n", callout_data);
531     return callout_data;
532     }
533 nigel 63 }
534 nigel 49
535 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
536     (++callout_count >= callout_fail_count)? 1 : 0;
537 nigel 3 }
538    
539    
540 nigel 63 /*************************************************
541 nigel 73 * Local malloc functions *
542 nigel 63 *************************************************/
543 nigel 3
544     /* Alternative malloc function, to test functionality and show the size of the
545     compiled re. */
546    
547     static void *new_malloc(size_t size)
548     {
549 nigel 73 void *block = malloc(size);
550 nigel 43 gotten_store = size;
551 nigel 73 if (show_malloc)
552 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
553 nigel 73 return block;
554 nigel 3 }
555    
556 nigel 73 static void new_free(void *block)
557     {
558     if (show_malloc)
559     fprintf(outfile, "free %p\n", block);
560     free(block);
561     }
562 nigel 3
563    
564 nigel 73 /* For recursion malloc/free, to test stacking calls */
565    
566     static void *stack_malloc(size_t size)
567     {
568     void *block = malloc(size);
569     if (show_malloc)
570 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
571 nigel 73 return block;
572     }
573    
574     static void stack_free(void *block)
575     {
576     if (show_malloc)
577     fprintf(outfile, "stack_free %p\n", block);
578     free(block);
579     }
580    
581    
582 nigel 63 /*************************************************
583     * Call pcre_fullinfo() *
584     *************************************************/
585 nigel 43
586     /* Get one piece of information from the pcre_fullinfo() function */
587    
588     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
589     {
590     int rc;
591     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
592     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
593     }
594    
595    
596    
597 nigel 63 /*************************************************
598 nigel 75 * Byte flipping function *
599     *************************************************/
600    
601 nigel 91 static unsigned long int
602     byteflip(unsigned long int value, int n)
603 nigel 75 {
604     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
605     return ((value & 0x000000ff) << 24) |
606     ((value & 0x0000ff00) << 8) |
607     ((value & 0x00ff0000) >> 8) |
608     ((value & 0xff000000) >> 24);
609     }
610    
611    
612    
613    
614     /*************************************************
615 nigel 87 * Check match or recursion limit *
616     *************************************************/
617    
618     static int
619     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
620     int start_offset, int options, int *use_offsets, int use_size_offsets,
621     int flag, unsigned long int *limit, int errnumber, const char *msg)
622     {
623     int count;
624     int min = 0;
625     int mid = 64;
626     int max = -1;
627    
628     extra->flags |= flag;
629    
630     for (;;)
631     {
632     *limit = mid;
633    
634     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
635     use_offsets, use_size_offsets);
636    
637     if (count == errnumber)
638     {
639     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
640     min = mid;
641     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
642     }
643    
644     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
645     count == PCRE_ERROR_PARTIAL)
646     {
647     if (mid == min + 1)
648     {
649     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
650     break;
651     }
652     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
653     max = mid;
654     mid = (min + mid)/2;
655     }
656     else break; /* Some other error */
657     }
658    
659     extra->flags &= ~flag;
660     return count;
661     }
662    
663    
664    
665     /*************************************************
666 nigel 91 * Check newline indicator *
667     *************************************************/
668    
669     /* This is used both at compile and run-time to check for <xxx> escapes, where
670 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671     no match.
672 nigel 91
673     Arguments:
674     p points after the leading '<'
675     f file for error message
676    
677     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
678     */
679    
680     static int
681     check_newline(uschar *p, FILE *f)
682     {
683     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686 ph10 149 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
689     return 0;
690     }
691    
692    
693    
694     /*************************************************
695 nigel 93 * Usage function *
696     *************************************************/
697    
698     static void
699     usage(void)
700     {
701     printf("Usage: pcretest [options] [<input> [<output>]]\n");
702     printf(" -b show compiled code (bytecode)\n");
703     printf(" -C show PCRE compile-time options and exit\n");
704     printf(" -d debug: show compiled code and information (-b and -i)\n");
705     #if !defined NODFA
706     printf(" -dfa force DFA matching for all subjects\n");
707     #endif
708     printf(" -help show usage information\n");
709     printf(" -i show information about compiled patterns\n"
710     " -m output memory used information\n"
711     " -o <n> set size of offsets vector to <n>\n");
712     #if !defined NOPOSIX
713     printf(" -p use POSIX interface\n");
714     #endif
715     printf(" -q quiet: do not output PCRE version number at start\n");
716     printf(" -S <n> set stack size to <n> megabytes\n");
717     printf(" -s output store (memory) used information\n"
718     " -t time compilation and execution\n");
719     printf(" -t <n> time compilation and execution, repeating <n> times\n");
720     printf(" -tm time execution (matching) only\n");
721     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
722     }
723    
724    
725    
726     /*************************************************
727 nigel 63 * Main Program *
728     *************************************************/
729 nigel 43
730 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
731     consist of a regular expression, in delimiters and optionally followed by
732     options, followed by a set of test data, terminated by an empty line. */
733    
734     int main(int argc, char **argv)
735     {
736     FILE *infile = stdin;
737     int options = 0;
738     int study_options = 0;
739     int op = 1;
740     int timeit = 0;
741 nigel 93 int timeitm = 0;
742 nigel 3 int showinfo = 0;
743 nigel 31 int showstore = 0;
744 nigel 87 int quiet = 0;
745 nigel 53 int size_offsets = 45;
746     int size_offsets_max;
747 nigel 77 int *offsets = NULL;
748 nigel 53 #if !defined NOPOSIX
749 nigel 3 int posix = 0;
750 nigel 53 #endif
751 nigel 3 int debug = 0;
752 nigel 11 int done = 0;
753 nigel 77 int all_use_dfa = 0;
754     int yield = 0;
755 nigel 91 int stack_size;
756 nigel 3
757 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
758     that 1024 is plenty long enough for the few names we'll be testing. */
759 nigel 69
760 nigel 91 uschar copynames[1024];
761     uschar getnames[1024];
762    
763     uschar *copynamesptr;
764     uschar *getnamesptr;
765    
766 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
767 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
768 nigel 69
769 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
770     dbuffer = (unsigned char *)malloc(buffer_size);
771     pbuffer = (unsigned char *)malloc(buffer_size);
772 nigel 69
773 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
774 nigel 3
775 nigel 93 outfile = stdout;
776    
777     /* The following _setmode() stuff is some Windows magic that tells its runtime
778     library to translate CRLF into a single LF character. At least, that's what
779     I've been told: never having used Windows I take this all on trust. Originally
780     it set 0x8000, but then I was advised that _O_BINARY was better. */
781    
782 nigel 75 #if defined(_WIN32) || defined(WIN32)
783 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
784     #endif
785 nigel 75
786 nigel 3 /* Scan options */
787    
788     while (argc > 1 && argv[op][0] == '-')
789     {
790 nigel 63 unsigned char *endptr;
791 nigel 53
792 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
793     showstore = 1;
794 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
795 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
796 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
797     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
798 nigel 79 #if !defined NODFA
799 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
800 nigel 79 #endif
801 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
802 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
803     *endptr == 0))
804 nigel 53 {
805     op++;
806     argc--;
807     }
808 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
809     {
810     int both = argv[op][2] == 0;
811     int temp;
812     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
813     *endptr == 0))
814     {
815     timeitm = temp;
816     op++;
817     argc--;
818     }
819     else timeitm = LOOPREPEAT;
820     if (both) timeit = timeitm;
821     }
822 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
823     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824     *endptr == 0))
825     {
826 nigel 93 #if defined(_WIN32) || defined(WIN32)
827 nigel 91 printf("PCRE: -S not supported on this OS\n");
828     exit(1);
829     #else
830     int rc;
831     struct rlimit rlim;
832     getrlimit(RLIMIT_STACK, &rlim);
833     rlim.rlim_cur = stack_size * 1024 * 1024;
834     rc = setrlimit(RLIMIT_STACK, &rlim);
835     if (rc != 0)
836     {
837     printf("PCRE: setrlimit() failed with error %d\n", rc);
838     exit(1);
839     }
840     op++;
841     argc--;
842     #endif
843     }
844 nigel 53 #if !defined NOPOSIX
845 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
846 nigel 53 #endif
847 nigel 63 else if (strcmp(argv[op], "-C") == 0)
848     {
849     int rc;
850     printf("PCRE version %s\n", pcre_version());
851     printf("Compiled with\n");
852     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
853     printf(" %sUTF-8 support\n", rc? "" : "No ");
854 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
855     printf(" %sUnicode properties support\n", rc? "" : "No ");
856 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
858 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859 ph10 150 (rc == -2)? "ANYCRLF" :
860 nigel 93 (rc == -1)? "ANY" : "???");
861 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862     printf(" Internal link size = %d\n", rc);
863     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
864     printf(" POSIX malloc threshold = %d\n", rc);
865     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
866     printf(" Default match limit = %d\n", rc);
867 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
868     printf(" Default recursion depth limit = %d\n", rc);
869 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
871 ph10 121 goto EXIT;
872 nigel 63 }
873 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
874     strcmp(argv[op], "--help") == 0)
875     {
876     usage();
877     goto EXIT;
878     }
879 nigel 3 else
880     {
881 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
882 nigel 93 usage();
883 nigel 77 yield = 1;
884     goto EXIT;
885 nigel 3 }
886     op++;
887     argc--;
888     }
889    
890 nigel 53 /* Get the store for the offsets vector, and remember what it was */
891    
892     size_offsets_max = size_offsets;
893 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
894 nigel 53 if (offsets == NULL)
895     {
896     printf("** Failed to get %d bytes of memory for offsets vector\n",
897 ph10 151 (int)(size_offsets_max * sizeof(int)));
898 nigel 77 yield = 1;
899     goto EXIT;
900 nigel 53 }
901    
902 nigel 3 /* Sort out the input and output files */
903    
904     if (argc > 1)
905     {
906 nigel 93 infile = fopen(argv[op], INPUT_MODE);
907 nigel 3 if (infile == NULL)
908     {
909     printf("** Failed to open %s\n", argv[op]);
910 nigel 77 yield = 1;
911     goto EXIT;
912 nigel 3 }
913     }
914    
915     if (argc > 2)
916     {
917 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
918 nigel 3 if (outfile == NULL)
919     {
920     printf("** Failed to open %s\n", argv[op+1]);
921 nigel 77 yield = 1;
922     goto EXIT;
923 nigel 3 }
924     }
925    
926     /* Set alternative malloc function */
927    
928     pcre_malloc = new_malloc;
929 nigel 73 pcre_free = new_free;
930     pcre_stack_malloc = stack_malloc;
931     pcre_stack_free = stack_free;
932 nigel 3
933 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
934 nigel 3
935 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
936 nigel 3
937     /* Main loop */
938    
939 nigel 11 while (!done)
940 nigel 3 {
941     pcre *re = NULL;
942     pcre_extra *extra = NULL;
943 nigel 37
944     #if !defined NOPOSIX /* There are still compilers that require no indent */
945 nigel 3 regex_t preg;
946 nigel 45 int do_posix = 0;
947 nigel 37 #endif
948    
949 nigel 7 const char *error;
950 nigel 25 unsigned char *p, *pp, *ppp;
951 nigel 75 unsigned char *to_file = NULL;
952 nigel 53 const unsigned char *tables = NULL;
953 nigel 75 unsigned long int true_size, true_study_size = 0;
954     size_t size, regex_gotten_store;
955 nigel 3 int do_study = 0;
956 nigel 25 int do_debug = debug;
957 ph10 123 int debug_lengths = 1;
958 nigel 35 int do_G = 0;
959     int do_g = 0;
960 nigel 25 int do_showinfo = showinfo;
961 nigel 35 int do_showrest = 0;
962 nigel 75 int do_flip = 0;
963 nigel 93 int erroroffset, len, delimiter, poffset;
964 nigel 3
965 nigel 67 use_utf8 = 0;
966 nigel 63
967 nigel 3 if (infile == stdin) printf(" re> ");
968 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
969 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
970 nigel 63 fflush(outfile);
971 nigel 3
972     p = buffer;
973     while (isspace(*p)) p++;
974     if (*p == 0) continue;
975    
976 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
977 nigel 3
978 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
979     {
980 nigel 91 unsigned long int magic, get_options;
981 nigel 75 uschar sbuf[8];
982     FILE *f;
983    
984     p++;
985     pp = p + (int)strlen((char *)p);
986     while (isspace(pp[-1])) pp--;
987     *pp = 0;
988    
989     f = fopen((char *)p, "rb");
990     if (f == NULL)
991     {
992     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
993     continue;
994     }
995    
996     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
997    
998     true_size =
999     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1000     true_study_size =
1001     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1002    
1003     re = (real_pcre *)new_malloc(true_size);
1004     regex_gotten_store = gotten_store;
1005    
1006     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1007    
1008     magic = ((real_pcre *)re)->magic_number;
1009     if (magic != MAGIC_NUMBER)
1010     {
1011     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1012     {
1013     do_flip = 1;
1014     }
1015     else
1016     {
1017     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1018     fclose(f);
1019     continue;
1020     }
1021     }
1022    
1023     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1024     do_flip? " (byte-inverted)" : "", p);
1025    
1026     /* Need to know if UTF-8 for printing data strings */
1027    
1028 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1029     use_utf8 = (get_options & PCRE_UTF8) != 0;
1030 nigel 75
1031     /* Now see if there is any following study data */
1032    
1033     if (true_study_size != 0)
1034     {
1035     pcre_study_data *psd;
1036    
1037     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1038     extra->flags = PCRE_EXTRA_STUDY_DATA;
1039    
1040     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1041     extra->study_data = psd;
1042    
1043     if (fread(psd, 1, true_study_size, f) != true_study_size)
1044     {
1045     FAIL_READ:
1046     fprintf(outfile, "Failed to read data from %s\n", p);
1047     if (extra != NULL) new_free(extra);
1048     if (re != NULL) new_free(re);
1049     fclose(f);
1050     continue;
1051     }
1052     fprintf(outfile, "Study data loaded from %s\n", p);
1053     do_study = 1; /* To get the data output if requested */
1054     }
1055     else fprintf(outfile, "No study data\n");
1056    
1057     fclose(f);
1058     goto SHOW_INFO;
1059     }
1060    
1061     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1062     the pattern; if is isn't complete, read more. */
1063    
1064 nigel 3 delimiter = *p++;
1065    
1066 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1067 nigel 3 {
1068 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1069 nigel 3 goto SKIP_DATA;
1070     }
1071    
1072     pp = p;
1073 nigel 93 poffset = p - buffer;
1074 nigel 3
1075     for(;;)
1076     {
1077 nigel 29 while (*pp != 0)
1078     {
1079     if (*pp == '\\' && pp[1] != 0) pp++;
1080     else if (*pp == delimiter) break;
1081     pp++;
1082     }
1083 nigel 3 if (*pp != 0) break;
1084     if (infile == stdin) printf(" > ");
1085 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1086 nigel 3 {
1087     fprintf(outfile, "** Unexpected EOF\n");
1088 nigel 11 done = 1;
1089     goto CONTINUE;
1090 nigel 3 }
1091 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092 nigel 3 }
1093    
1094 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1095     pointer to the correct relative point in the buffer. */
1096    
1097     p = buffer + poffset;
1098    
1099 nigel 29 /* If the first character after the delimiter is backslash, make
1100     the pattern end with backslash. This is purely to provide a way
1101     of testing for the error message when a pattern ends with backslash. */
1102    
1103     if (pp[1] == '\\') *pp++ = '\\';
1104    
1105 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1106     for callouts. */
1107 nigel 3
1108     *pp++ = 0;
1109 nigel 75 strcpy((char *)pbuffer, (char *)p);
1110 nigel 3
1111     /* Look for options after final delimiter */
1112    
1113     options = 0;
1114     study_options = 0;
1115 nigel 31 log_store = showstore; /* default from command line */
1116    
1117 nigel 3 while (*pp != 0)
1118     {
1119     switch (*pp++)
1120     {
1121 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1122 nigel 35 case 'g': do_g = 1; break;
1123 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1124     case 'm': options |= PCRE_MULTILINE; break;
1125     case 's': options |= PCRE_DOTALL; break;
1126     case 'x': options |= PCRE_EXTENDED; break;
1127 nigel 25
1128 nigel 35 case '+': do_showrest = 1; break;
1129 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1130 nigel 93 case 'B': do_debug = 1; break;
1131 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1132 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1133 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1134 nigel 75 case 'F': do_flip = 1; break;
1135 nigel 35 case 'G': do_G = 1; break;
1136 nigel 25 case 'I': do_showinfo = 1; break;
1137 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1138 nigel 31 case 'M': log_store = 1; break;
1139 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1140 nigel 37
1141     #if !defined NOPOSIX
1142 nigel 3 case 'P': do_posix = 1; break;
1143 nigel 37 #endif
1144    
1145 nigel 3 case 'S': do_study = 1; break;
1146 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1147 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1148 ph10 126 case 'Z': debug_lengths = 0; break;
1149 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151 nigel 25
1152     case 'L':
1153     ppp = pp;
1154 nigel 93 /* The '\r' test here is so that it works on Windows. */
1155     /* The '0' test is just in case this is an unterminated line. */
1156     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1157 nigel 25 *ppp = 0;
1158     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1159     {
1160     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1161     goto SKIP_DATA;
1162     }
1163 nigel 93 locale_set = 1;
1164 nigel 25 tables = pcre_maketables();
1165     pp = ppp;
1166     break;
1167    
1168 nigel 75 case '>':
1169     to_file = pp;
1170     while (*pp != 0) pp++;
1171     while (isspace(pp[-1])) pp--;
1172     *pp = 0;
1173     break;
1174    
1175 nigel 91 case '<':
1176     {
1177     int x = check_newline(pp, outfile);
1178     if (x == 0) goto SKIP_DATA;
1179     options |= x;
1180     while (*pp++ != '>');
1181     }
1182     break;
1183    
1184 nigel 77 case '\r': /* So that it works in Windows */
1185     case '\n':
1186     case ' ':
1187     break;
1188 nigel 75
1189 nigel 3 default:
1190     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1191     goto SKIP_DATA;
1192     }
1193     }
1194    
1195 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1196 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1197     local character tables. */
1198 nigel 3
1199 nigel 37 #if !defined NOPOSIX
1200 nigel 3 if (posix || do_posix)
1201     {
1202     int rc;
1203     int cflags = 0;
1204 nigel 75
1205 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1206     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1207 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1208 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1209     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1210    
1211 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1212    
1213     /* Compilation failed; go back for another re, skipping to blank line
1214     if non-interactive. */
1215    
1216     if (rc != 0)
1217     {
1218 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1219 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1220     goto SKIP_DATA;
1221     }
1222     }
1223    
1224     /* Handle compiling via the native interface */
1225    
1226     else
1227 nigel 37 #endif /* !defined NOPOSIX */
1228    
1229 nigel 3 {
1230 nigel 93 if (timeit > 0)
1231 nigel 3 {
1232     register int i;
1233     clock_t time_taken;
1234     clock_t start_time = clock();
1235 nigel 93 for (i = 0; i < timeit; i++)
1236 nigel 3 {
1237 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1238 nigel 3 if (re != NULL) free(re);
1239     }
1240     time_taken = clock() - start_time;
1241 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1242     (((double)time_taken * 1000.0) / (double)timeit) /
1243 nigel 63 (double)CLOCKS_PER_SEC);
1244 nigel 3 }
1245    
1246 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1247 nigel 3
1248     /* Compilation failed; go back for another re, skipping to blank line
1249     if non-interactive. */
1250    
1251     if (re == NULL)
1252     {
1253     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1254     SKIP_DATA:
1255     if (infile != stdin)
1256     {
1257     for (;;)
1258     {
1259 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1260 nigel 11 {
1261     done = 1;
1262     goto CONTINUE;
1263     }
1264 nigel 3 len = (int)strlen((char *)buffer);
1265     while (len > 0 && isspace(buffer[len-1])) len--;
1266     if (len == 0) break;
1267     }
1268     fprintf(outfile, "\n");
1269     }
1270 nigel 25 goto CONTINUE;
1271 nigel 3 }
1272    
1273 nigel 43 /* Compilation succeeded; print data if required. There are now two
1274     info-returning functions. The old one has a limited interface and
1275     returns only limited data. Check that it agrees with the newer one. */
1276 nigel 3
1277 nigel 63 if (log_store)
1278     fprintf(outfile, "Memory allocation (code space): %d\n",
1279     (int)(gotten_store -
1280     sizeof(real_pcre) -
1281     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1282    
1283 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1284     and remember the store that was got. */
1285    
1286     true_size = ((real_pcre *)re)->size;
1287     regex_gotten_store = gotten_store;
1288    
1289     /* If /S was present, study the regexp to generate additional info to
1290     help with the matching. */
1291    
1292     if (do_study)
1293     {
1294 nigel 93 if (timeit > 0)
1295 nigel 75 {
1296     register int i;
1297     clock_t time_taken;
1298     clock_t start_time = clock();
1299 nigel 93 for (i = 0; i < timeit; i++)
1300 nigel 75 extra = pcre_study(re, study_options, &error);
1301     time_taken = clock() - start_time;
1302     if (extra != NULL) free(extra);
1303 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1304     (((double)time_taken * 1000.0) / (double)timeit) /
1305 nigel 75 (double)CLOCKS_PER_SEC);
1306     }
1307     extra = pcre_study(re, study_options, &error);
1308     if (error != NULL)
1309     fprintf(outfile, "Failed to study: %s\n", error);
1310     else if (extra != NULL)
1311     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1312     }
1313    
1314     /* If the 'F' option was present, we flip the bytes of all the integer
1315     fields in the regex data block and the study block. This is to make it
1316     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1317     compiled on a different architecture. */
1318    
1319     if (do_flip)
1320     {
1321     real_pcre *rre = (real_pcre *)re;
1322     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1323     rre->size = byteflip(rre->size, sizeof(rre->size));
1324     rre->options = byteflip(rre->options, sizeof(rre->options));
1325     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1326     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1327     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1328     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1329     rre->name_table_offset = byteflip(rre->name_table_offset,
1330     sizeof(rre->name_table_offset));
1331     rre->name_entry_size = byteflip(rre->name_entry_size,
1332     sizeof(rre->name_entry_size));
1333     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1334    
1335     if (extra != NULL)
1336     {
1337     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1338     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1339     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1340     }
1341     }
1342    
1343     /* Extract information from the compiled data if required */
1344    
1345     SHOW_INFO:
1346    
1347 nigel 93 if (do_debug)
1348     {
1349     fprintf(outfile, "------------------------------------------------------------------\n");
1350 ph10 116 pcre_printint(re, outfile, debug_lengths);
1351 nigel 93 }
1352    
1353 nigel 25 if (do_showinfo)
1354 nigel 3 {
1355 nigel 75 unsigned long int get_options, all_options;
1356 nigel 79 #if !defined NOINFOCHECK
1357 nigel 43 int old_first_char, old_options, old_count;
1358 nigel 79 #endif
1359 ph10 169 int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360 nigel 63 int nameentrysize, namecount;
1361     const uschar *nametable;
1362 nigel 3
1363 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1364 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1365     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1366     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1367 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1368 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1369 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374 nigel 43
1375 nigel 79 #if !defined NOINFOCHECK
1376 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1377 nigel 3 if (count < 0) fprintf(outfile,
1378 nigel 43 "Error %d from pcre_info()\n", count);
1379 nigel 3 else
1380     {
1381 nigel 43 if (old_count != count) fprintf(outfile,
1382     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1383     old_count);
1384 nigel 37
1385 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1386     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1387     first_char, old_first_char);
1388 nigel 37
1389 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1390     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1391     get_options, old_options);
1392 nigel 43 }
1393 nigel 79 #endif
1394 nigel 43
1395 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1396 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1397 nigel 77 (int)size, (int)regex_gotten_store);
1398 nigel 43
1399     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1400     if (backrefmax > 0)
1401     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1402 nigel 63
1403     if (namecount > 0)
1404     {
1405     fprintf(outfile, "Named capturing subpatterns:\n");
1406     while (namecount-- > 0)
1407     {
1408     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1409     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1410     GET2(nametable, 0));
1411     nametable += nameentrysize;
1412     }
1413     }
1414 ph10 172
1415 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1416 nigel 63
1417 nigel 75 all_options = ((real_pcre *)re)->options;
1418 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1419 nigel 75
1420 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1421 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1422 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1423     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1424     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1425     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1426 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1427 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1428     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1429     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1430     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1431 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1432 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1433 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435 ph10 172
1436 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437 nigel 43
1438 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1439 nigel 91 {
1440     case PCRE_NEWLINE_CR:
1441     fprintf(outfile, "Forced newline sequence: CR\n");
1442     break;
1443 nigel 43
1444 nigel 91 case PCRE_NEWLINE_LF:
1445     fprintf(outfile, "Forced newline sequence: LF\n");
1446     break;
1447    
1448     case PCRE_NEWLINE_CRLF:
1449     fprintf(outfile, "Forced newline sequence: CRLF\n");
1450     break;
1451    
1452 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1453     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454     break;
1455    
1456 nigel 93 case PCRE_NEWLINE_ANY:
1457     fprintf(outfile, "Forced newline sequence: ANY\n");
1458     break;
1459    
1460 nigel 91 default:
1461     break;
1462     }
1463    
1464 nigel 43 if (first_char == -1)
1465     {
1466 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1467 nigel 43 }
1468     else if (first_char < 0)
1469     {
1470     fprintf(outfile, "No first char\n");
1471     }
1472     else
1473     {
1474 nigel 63 int ch = first_char & 255;
1475 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476 nigel 63 "" : " (caseless)";
1477 nigel 93 if (PRINTHEX(ch))
1478 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1479 nigel 3 else
1480 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1481 nigel 43 }
1482 nigel 37
1483 nigel 43 if (need_char < 0)
1484     {
1485     fprintf(outfile, "No need char\n");
1486 nigel 3 }
1487 nigel 43 else
1488     {
1489 nigel 63 int ch = need_char & 255;
1490 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491 nigel 63 "" : " (caseless)";
1492 nigel 93 if (PRINTHEX(ch))
1493 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1494 nigel 43 else
1495 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1496 nigel 43 }
1497 nigel 75
1498     /* Don't output study size; at present it is in any case a fixed
1499     value, but it varies, depending on the computer architecture, and
1500     so messes up the test suite. (And with the /F option, it might be
1501     flipped.) */
1502    
1503     if (do_study)
1504     {
1505     if (extra == NULL)
1506     fprintf(outfile, "Study returned NULL\n");
1507     else
1508     {
1509     uschar *start_bits = NULL;
1510     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1511    
1512     if (start_bits == NULL)
1513     fprintf(outfile, "No starting byte set\n");
1514     else
1515     {
1516     int i;
1517     int c = 24;
1518     fprintf(outfile, "Starting byte set: ");
1519     for (i = 0; i < 256; i++)
1520     {
1521     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1522     {
1523     if (c > 75)
1524     {
1525     fprintf(outfile, "\n ");
1526     c = 2;
1527     }
1528 nigel 93 if (PRINTHEX(i) && i != ' ')
1529 nigel 75 {
1530     fprintf(outfile, "%c ", i);
1531     c += 2;
1532     }
1533     else
1534     {
1535     fprintf(outfile, "\\x%02x ", i);
1536     c += 5;
1537     }
1538     }
1539     }
1540     fprintf(outfile, "\n");
1541     }
1542     }
1543     }
1544 nigel 3 }
1545    
1546 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1547     that is all. The first 8 bytes of the file are the regex length and then
1548     the study length, in big-endian order. */
1549 nigel 3
1550 nigel 75 if (to_file != NULL)
1551 nigel 3 {
1552 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1553     if (f == NULL)
1554 nigel 3 {
1555 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1556 nigel 3 }
1557 nigel 75 else
1558     {
1559     uschar sbuf[8];
1560     sbuf[0] = (true_size >> 24) & 255;
1561     sbuf[1] = (true_size >> 16) & 255;
1562     sbuf[2] = (true_size >> 8) & 255;
1563     sbuf[3] = (true_size) & 255;
1564 nigel 3
1565 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1566     sbuf[5] = (true_study_size >> 16) & 255;
1567     sbuf[6] = (true_study_size >> 8) & 255;
1568     sbuf[7] = (true_study_size) & 255;
1569 nigel 3
1570 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1571     fwrite(re, 1, true_size, f) < true_size)
1572     {
1573     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1574     }
1575 nigel 3 else
1576     {
1577 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1578     if (extra != NULL)
1579 nigel 3 {
1580 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1581     true_study_size)
1582 nigel 3 {
1583 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1584     strerror(errno));
1585 nigel 3 }
1586 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1587 nigel 93
1588 nigel 3 }
1589     }
1590 nigel 75 fclose(f);
1591 nigel 3 }
1592 nigel 77
1593     new_free(re);
1594     if (extra != NULL) new_free(extra);
1595     if (tables != NULL) new_free((void *)tables);
1596 nigel 75 continue; /* With next regex */
1597 nigel 3 }
1598 nigel 75 } /* End of non-POSIX compile */
1599 nigel 3
1600     /* Read data lines and test them */
1601    
1602     for (;;)
1603     {
1604 nigel 87 uschar *q;
1605 ph10 147 uschar *bptr;
1606 nigel 57 int *use_offsets = offsets;
1607 nigel 53 int use_size_offsets = size_offsets;
1608 nigel 63 int callout_data = 0;
1609     int callout_data_set = 0;
1610 nigel 3 int count, c;
1611 nigel 29 int copystrings = 0;
1612 nigel 63 int find_match_limit = 0;
1613 nigel 29 int getstrings = 0;
1614     int getlist = 0;
1615 nigel 39 int gmatched = 0;
1616 nigel 35 int start_offset = 0;
1617 nigel 41 int g_notempty = 0;
1618 nigel 77 int use_dfa = 0;
1619 nigel 3
1620     options = 0;
1621    
1622 nigel 91 *copynames = 0;
1623     *getnames = 0;
1624    
1625     copynamesptr = copynames;
1626     getnamesptr = getnames;
1627    
1628 nigel 63 pcre_callout = callout;
1629     first_callout = 1;
1630     callout_extra = 0;
1631     callout_count = 0;
1632     callout_fail_count = 999999;
1633     callout_fail_id = -1;
1634 nigel 73 show_malloc = 0;
1635 nigel 63
1636 nigel 91 if (extra != NULL) extra->flags &=
1637     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1638    
1639     len = 0;
1640     for (;;)
1641 nigel 11 {
1642 nigel 91 if (infile == stdin) printf("data> ");
1643     if (extend_inputline(infile, buffer + len) == NULL)
1644     {
1645     if (len > 0) break;
1646     done = 1;
1647     goto CONTINUE;
1648     }
1649     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1650     len = (int)strlen((char *)buffer);
1651     if (buffer[len-1] == '\n') break;
1652 nigel 11 }
1653 nigel 3
1654     while (len > 0 && isspace(buffer[len-1])) len--;
1655     buffer[len] = 0;
1656     if (len == 0) break;
1657    
1658     p = buffer;
1659     while (isspace(*p)) p++;
1660    
1661 ph10 147 bptr = q = dbuffer;
1662 nigel 3 while ((c = *p++) != 0)
1663     {
1664     int i = 0;
1665     int n = 0;
1666 nigel 63
1667 nigel 3 if (c == '\\') switch ((c = *p++))
1668     {
1669     case 'a': c = 7; break;
1670     case 'b': c = '\b'; break;
1671     case 'e': c = 27; break;
1672     case 'f': c = '\f'; break;
1673     case 'n': c = '\n'; break;
1674     case 'r': c = '\r'; break;
1675     case 't': c = '\t'; break;
1676     case 'v': c = '\v'; break;
1677    
1678     case '0': case '1': case '2': case '3':
1679     case '4': case '5': case '6': case '7':
1680     c -= '0';
1681     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1682     c = c * 8 + *p++ - '0';
1683 nigel 91
1684     #if !defined NOUTF8
1685     if (use_utf8 && c > 255)
1686     {
1687     unsigned char buff8[8];
1688     int ii, utn;
1689     utn = ord2utf8(c, buff8);
1690     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1691     c = buff8[ii]; /* Last byte */
1692     }
1693     #endif
1694 nigel 3 break;
1695    
1696     case 'x':
1697 nigel 49
1698     /* Handle \x{..} specially - new Perl thing for utf8 */
1699    
1700 nigel 79 #if !defined NOUTF8
1701 nigel 49 if (*p == '{')
1702     {
1703     unsigned char *pt = p;
1704     c = 0;
1705     while (isxdigit(*(++pt)))
1706     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1707     if (*pt == '}')
1708     {
1709 nigel 67 unsigned char buff8[8];
1710 nigel 49 int ii, utn;
1711 nigel 85 utn = ord2utf8(c, buff8);
1712 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1713     c = buff8[ii]; /* Last byte */
1714 nigel 49 p = pt + 1;
1715     break;
1716     }
1717     /* Not correct form; fall through */
1718     }
1719 nigel 79 #endif
1720 nigel 49
1721     /* Ordinary \x */
1722    
1723 nigel 3 c = 0;
1724     while (i++ < 2 && isxdigit(*p))
1725     {
1726     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1727     p++;
1728     }
1729     break;
1730    
1731 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1732 nigel 3 p--;
1733     continue;
1734    
1735 nigel 75 case '>':
1736     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1737     continue;
1738    
1739 nigel 3 case 'A': /* Option setting */
1740     options |= PCRE_ANCHORED;
1741     continue;
1742    
1743     case 'B':
1744     options |= PCRE_NOTBOL;
1745     continue;
1746    
1747 nigel 29 case 'C':
1748 nigel 63 if (isdigit(*p)) /* Set copy string */
1749     {
1750     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1751     copystrings |= 1 << n;
1752     }
1753     else if (isalnum(*p))
1754     {
1755 nigel 91 uschar *npp = copynamesptr;
1756 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1757 nigel 91 *npp++ = 0;
1758 nigel 67 *npp = 0;
1759 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1760 nigel 63 if (n < 0)
1761 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1762     copynamesptr = npp;
1763 nigel 63 }
1764     else if (*p == '+')
1765     {
1766     callout_extra = 1;
1767     p++;
1768     }
1769     else if (*p == '-')
1770     {
1771     pcre_callout = NULL;
1772     p++;
1773     }
1774     else if (*p == '!')
1775     {
1776     callout_fail_id = 0;
1777     p++;
1778     while(isdigit(*p))
1779     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1780     callout_fail_count = 0;
1781     if (*p == '!')
1782     {
1783     p++;
1784     while(isdigit(*p))
1785     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1786     }
1787     }
1788     else if (*p == '*')
1789     {
1790     int sign = 1;
1791     callout_data = 0;
1792     if (*(++p) == '-') { sign = -1; p++; }
1793     while(isdigit(*p))
1794     callout_data = callout_data * 10 + *p++ - '0';
1795     callout_data *= sign;
1796     callout_data_set = 1;
1797     }
1798 nigel 29 continue;
1799    
1800 nigel 79 #if !defined NODFA
1801 nigel 77 case 'D':
1802 nigel 79 #if !defined NOPOSIX
1803 nigel 77 if (posix || do_posix)
1804     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1805     else
1806 nigel 79 #endif
1807 nigel 77 use_dfa = 1;
1808     continue;
1809    
1810     case 'F':
1811     options |= PCRE_DFA_SHORTEST;
1812     continue;
1813 nigel 79 #endif
1814 nigel 77
1815 nigel 29 case 'G':
1816 nigel 63 if (isdigit(*p))
1817     {
1818     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1819     getstrings |= 1 << n;
1820     }
1821     else if (isalnum(*p))
1822     {
1823 nigel 91 uschar *npp = getnamesptr;
1824 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1825 nigel 91 *npp++ = 0;
1826 nigel 67 *npp = 0;
1827 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1828 nigel 63 if (n < 0)
1829 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1830     getnamesptr = npp;
1831 nigel 63 }
1832 nigel 29 continue;
1833    
1834     case 'L':
1835     getlist = 1;
1836     continue;
1837    
1838 nigel 63 case 'M':
1839     find_match_limit = 1;
1840     continue;
1841    
1842 nigel 37 case 'N':
1843     options |= PCRE_NOTEMPTY;
1844     continue;
1845    
1846 nigel 3 case 'O':
1847     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1848 nigel 53 if (n > size_offsets_max)
1849     {
1850     size_offsets_max = n;
1851 nigel 57 free(offsets);
1852 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1853 nigel 53 if (offsets == NULL)
1854     {
1855     printf("** Failed to get %d bytes of memory for offsets vector\n",
1856 ph10 151 (int)(size_offsets_max * sizeof(int)));
1857 nigel 77 yield = 1;
1858     goto EXIT;
1859 nigel 53 }
1860     }
1861     use_size_offsets = n;
1862 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1863 nigel 3 continue;
1864    
1865 nigel 75 case 'P':
1866     options |= PCRE_PARTIAL;
1867     continue;
1868    
1869 nigel 91 case 'Q':
1870     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871     if (extra == NULL)
1872     {
1873     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874     extra->flags = 0;
1875     }
1876     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1877     extra->match_limit_recursion = n;
1878     continue;
1879    
1880     case 'q':
1881     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1882     if (extra == NULL)
1883     {
1884     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1885     extra->flags = 0;
1886     }
1887     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1888     extra->match_limit = n;
1889     continue;
1890    
1891 nigel 79 #if !defined NODFA
1892 nigel 77 case 'R':
1893     options |= PCRE_DFA_RESTART;
1894     continue;
1895 nigel 79 #endif
1896 nigel 77
1897 nigel 73 case 'S':
1898     show_malloc = 1;
1899     continue;
1900    
1901 nigel 3 case 'Z':
1902     options |= PCRE_NOTEOL;
1903     continue;
1904 nigel 71
1905     case '?':
1906     options |= PCRE_NO_UTF8_CHECK;
1907     continue;
1908 nigel 91
1909     case '<':
1910     {
1911     int x = check_newline(p, outfile);
1912     if (x == 0) goto NEXT_DATA;
1913     options |= x;
1914     while (*p++ != '>');
1915     }
1916     continue;
1917 nigel 3 }
1918 nigel 9 *q++ = c;
1919 nigel 3 }
1920 nigel 9 *q = 0;
1921     len = q - dbuffer;
1922 nigel 3
1923 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1924     {
1925     printf("**Match limit not relevant for DFA matching: ignored\n");
1926     find_match_limit = 0;
1927     }
1928    
1929 nigel 3 /* Handle matching via the POSIX interface, which does not
1930 nigel 63 support timing or playing with the match limit or callout data. */
1931 nigel 3
1932 nigel 37 #if !defined NOPOSIX
1933 nigel 3 if (posix || do_posix)
1934     {
1935     int rc;
1936     int eflags = 0;
1937 nigel 63 regmatch_t *pmatch = NULL;
1938     if (use_size_offsets > 0)
1939 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1940 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1941     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1942    
1943 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1944 nigel 3
1945     if (rc != 0)
1946     {
1947 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1948 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1949     }
1950 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1951     != 0)
1952     {
1953     fprintf(outfile, "Matched with REG_NOSUB\n");
1954     }
1955 nigel 3 else
1956     {
1957 nigel 7 size_t i;
1958 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1959 nigel 3 {
1960     if (pmatch[i].rm_so >= 0)
1961     {
1962 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1963 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1964     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1965 nigel 3 fprintf(outfile, "\n");
1966 nigel 35 if (i == 0 && do_showrest)
1967     {
1968     fprintf(outfile, " 0+ ");
1969 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1970     outfile);
1971 nigel 35 fprintf(outfile, "\n");
1972     }
1973 nigel 3 }
1974     }
1975     }
1976 nigel 53 free(pmatch);
1977 nigel 3 }
1978    
1979 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1980 nigel 3
1981 nigel 37 else
1982     #endif /* !defined NOPOSIX */
1983    
1984 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1985 nigel 3 {
1986 nigel 93 if (timeitm > 0)
1987 nigel 3 {
1988     register int i;
1989     clock_t time_taken;
1990     clock_t start_time = clock();
1991 nigel 77
1992 nigel 79 #if !defined NODFA
1993 nigel 77 if (all_use_dfa || use_dfa)
1994     {
1995     int workspace[1000];
1996 nigel 93 for (i = 0; i < timeitm; i++)
1997 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1998     options | g_notempty, use_offsets, use_size_offsets, workspace,
1999     sizeof(workspace)/sizeof(int));
2000     }
2001     else
2002 nigel 79 #endif
2003 nigel 77
2004 nigel 93 for (i = 0; i < timeitm; i++)
2005 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2006 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2007 nigel 77
2008 nigel 3 time_taken = clock() - start_time;
2009 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2010     (((double)time_taken * 1000.0) / (double)timeitm) /
2011 nigel 63 (double)CLOCKS_PER_SEC);
2012 nigel 3 }
2013    
2014 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2015 nigel 87 varying limits in order to find the minimum value for the match limit and
2016     for the recursion limit. */
2017 nigel 63
2018     if (find_match_limit)
2019     {
2020     if (extra == NULL)
2021     {
2022 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2023 nigel 63 extra->flags = 0;
2024     }
2025    
2026 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2027 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2028     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2029     PCRE_ERROR_MATCHLIMIT, "match()");
2030 nigel 63
2031 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2032     options|g_notempty, use_offsets, use_size_offsets,
2033     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2034     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2035 nigel 63 }
2036    
2037     /* If callout_data is set, use the interface with additional data */
2038    
2039     else if (callout_data_set)
2040     {
2041     if (extra == NULL)
2042     {
2043 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2044 nigel 63 extra->flags = 0;
2045     }
2046     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2047 nigel 71 extra->callout_data = &callout_data;
2048 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2049     options | g_notempty, use_offsets, use_size_offsets);
2050     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2051     }
2052    
2053     /* The normal case is just to do the match once, with the default
2054     value of match_limit. */
2055    
2056 nigel 79 #if !defined NODFA
2057 nigel 77 else if (all_use_dfa || use_dfa)
2058     {
2059     int workspace[1000];
2060     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2061     options | g_notempty, use_offsets, use_size_offsets, workspace,
2062     sizeof(workspace)/sizeof(int));
2063     if (count == 0)
2064     {
2065     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2066     count = use_size_offsets/2;
2067     }
2068     }
2069 nigel 79 #endif
2070 nigel 77
2071 nigel 75 else
2072     {
2073     count = pcre_exec(re, extra, (char *)bptr, len,
2074     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2075 nigel 77 if (count == 0)
2076     {
2077     fprintf(outfile, "Matched, but too many substrings\n");
2078     count = use_size_offsets/3;
2079     }
2080 nigel 75 }
2081 nigel 3
2082 nigel 39 /* Matched */
2083    
2084 nigel 3 if (count >= 0)
2085     {
2086 nigel 93 int i, maxcount;
2087    
2088     #if !defined NODFA
2089     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2090     #endif
2091     maxcount = use_size_offsets/3;
2092    
2093     /* This is a check against a lunatic return value. */
2094    
2095     if (count > maxcount)
2096     {
2097     fprintf(outfile,
2098     "** PCRE error: returned count %d is too big for offset size %d\n",
2099     count, use_size_offsets);
2100     count = use_size_offsets/3;
2101     if (do_g || do_G)
2102     {
2103     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2104     do_g = do_G = FALSE; /* Break g/G loop */
2105     }
2106     }
2107    
2108 nigel 29 for (i = 0; i < count * 2; i += 2)
2109 nigel 3 {
2110 nigel 57 if (use_offsets[i] < 0)
2111 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2112     else
2113     {
2114     fprintf(outfile, "%2d: ", i/2);
2115 nigel 63 (void)pchars(bptr + use_offsets[i],
2116     use_offsets[i+1] - use_offsets[i], outfile);
2117 nigel 3 fprintf(outfile, "\n");
2118 nigel 35 if (i == 0)
2119     {
2120     if (do_showrest)
2121     {
2122     fprintf(outfile, " 0+ ");
2123 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2124     outfile);
2125 nigel 35 fprintf(outfile, "\n");
2126     }
2127     }
2128 nigel 3 }
2129     }
2130 nigel 29
2131     for (i = 0; i < 32; i++)
2132     {
2133     if ((copystrings & (1 << i)) != 0)
2134     {
2135 nigel 91 char copybuffer[256];
2136 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2137 nigel 37 i, copybuffer, sizeof(copybuffer));
2138 nigel 29 if (rc < 0)
2139     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2140     else
2141 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2142 nigel 29 }
2143     }
2144    
2145 nigel 91 for (copynamesptr = copynames;
2146     *copynamesptr != 0;
2147     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2148     {
2149     char copybuffer[256];
2150     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2151     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2152     if (rc < 0)
2153     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2154     else
2155     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2156     }
2157    
2158 nigel 29 for (i = 0; i < 32; i++)
2159     {
2160     if ((getstrings & (1 << i)) != 0)
2161     {
2162     const char *substring;
2163 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2164 nigel 29 i, &substring);
2165     if (rc < 0)
2166     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2167     else
2168     {
2169     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2170 nigel 49 pcre_free_substring(substring);
2171 nigel 29 }
2172     }
2173     }
2174    
2175 nigel 91 for (getnamesptr = getnames;
2176     *getnamesptr != 0;
2177     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2178     {
2179     const char *substring;
2180     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2181     count, (char *)getnamesptr, &substring);
2182     if (rc < 0)
2183     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2184     else
2185     {
2186     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2187     pcre_free_substring(substring);
2188     }
2189     }
2190    
2191 nigel 29 if (getlist)
2192     {
2193     const char **stringlist;
2194 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2195 nigel 29 &stringlist);
2196     if (rc < 0)
2197     fprintf(outfile, "get substring list failed %d\n", rc);
2198     else
2199     {
2200     for (i = 0; i < count; i++)
2201     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2202     if (stringlist[i] != NULL)
2203     fprintf(outfile, "string list not terminated by NULL\n");
2204 nigel 49 /* free((void *)stringlist); */
2205     pcre_free_substring_list(stringlist);
2206 nigel 29 }
2207     }
2208 nigel 39 }
2209 nigel 29
2210 nigel 75 /* There was a partial match */
2211    
2212     else if (count == PCRE_ERROR_PARTIAL)
2213     {
2214 nigel 77 fprintf(outfile, "Partial match");
2215 nigel 79 #if !defined NODFA
2216 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2217     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2218     bptr + use_offsets[0]);
2219 nigel 79 #endif
2220 nigel 77 fprintf(outfile, "\n");
2221 nigel 75 break; /* Out of the /g loop */
2222     }
2223    
2224 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2225 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2226     to advance the start offset, and continue. We won't be at the end of the
2227     string - that was checked before setting g_notempty.
2228 nigel 39
2229 ph10 150 Complication arises in the case when the newline option is "any" or
2230 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2231     CRLF, an advance of one character just passes the \r, whereas we should
2232     prefer the longer newline sequence, as does the code in pcre_exec().
2233     Fudge the offset value to achieve this.
2234 ph10 144
2235 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2236     character, not one byte. */
2237    
2238 nigel 3 else
2239     {
2240 nigel 41 if (g_notempty != 0)
2241 nigel 35 {
2242 nigel 73 int onechar = 1;
2243 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2244 nigel 57 use_offsets[0] = start_offset;
2245 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2246     {
2247     int d;
2248     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249     obits = (d == '\r')? PCRE_NEWLINE_CR :
2250     (d == '\n')? PCRE_NEWLINE_LF :
2251     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2254     }
2255 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257 ph10 149 &&
2258 ph10 143 start_offset < len - 1 &&
2259     bptr[start_offset] == '\r' &&
2260     bptr[start_offset+1] == '\n')
2261 ph10 144 onechar++;
2262 ph10 143 else if (use_utf8)
2263 nigel 73 {
2264     while (start_offset + onechar < len)
2265     {
2266     int tb = bptr[start_offset+onechar];
2267     if (tb <= 127) break;
2268     tb &= 0xc0;
2269     if (tb != 0 && tb != 0xc0) onechar++;
2270     }
2271     }
2272     use_offsets[1] = start_offset + onechar;
2273 nigel 35 }
2274 nigel 41 else
2275     {
2276 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2277 nigel 41 {
2278 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2279 nigel 41 }
2280 nigel 73 else fprintf(outfile, "Error %d\n", count);
2281 nigel 41 break; /* Out of the /g loop */
2282     }
2283 nigel 3 }
2284 nigel 35
2285 nigel 39 /* If not /g or /G we are done */
2286    
2287     if (!do_g && !do_G) break;
2288    
2289 nigel 41 /* If we have matched an empty string, first check to see if we are at
2290     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2291     what Perl's /g options does. This turns out to be rather cunning. First
2292 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2293     same point. If this fails (picked up above) we advance to the next
2294 ph10 143 character. */
2295 ph10 142
2296 nigel 41 g_notempty = 0;
2297 ph10 142
2298 nigel 57 if (use_offsets[0] == use_offsets[1])
2299 nigel 41 {
2300 nigel 57 if (use_offsets[0] == len) break;
2301 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2302 nigel 41 }
2303 nigel 39
2304     /* For /g, update the start offset, leaving the rest alone */
2305    
2306 ph10 143 if (do_g) start_offset = use_offsets[1];
2307 nigel 39
2308     /* For /G, update the pointer and length */
2309    
2310     else
2311 nigel 35 {
2312 ph10 143 bptr += use_offsets[1];
2313     len -= use_offsets[1];
2314 nigel 35 }
2315 nigel 39 } /* End of loop for /g and /G */
2316 nigel 91
2317     NEXT_DATA: continue;
2318 nigel 39 } /* End of loop for data lines */
2319 nigel 3
2320 nigel 11 CONTINUE:
2321 nigel 37
2322     #if !defined NOPOSIX
2323 nigel 3 if (posix || do_posix) regfree(&preg);
2324 nigel 37 #endif
2325    
2326 nigel 77 if (re != NULL) new_free(re);
2327     if (extra != NULL) new_free(extra);
2328 nigel 25 if (tables != NULL)
2329     {
2330 nigel 77 new_free((void *)tables);
2331 nigel 25 setlocale(LC_CTYPE, "C");
2332 nigel 93 locale_set = 0;
2333 nigel 25 }
2334 nigel 3 }
2335    
2336 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2337 nigel 77
2338     EXIT:
2339    
2340     if (infile != NULL && infile != stdin) fclose(infile);
2341     if (outfile != NULL && outfile != stdout) fclose(outfile);
2342    
2343     free(buffer);
2344     free(dbuffer);
2345     free(pbuffer);
2346     free(offsets);
2347    
2348     return yield;
2349 nigel 3 }
2350    
2351 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12