/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 146 - (hide annotations) (download)
Thu Apr 5 09:17:28 2007 UTC (7 years, 8 months ago) by ph10
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 68194 byte(s)
Another buglet in pcretest for "any" newline and /g.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 93
48     /* A number of things vary for Windows builds. Originally, pcretest opened its
49     input and output without "b"; then I was told that "b" was needed in some
50     environments, so it was added for release 5.0 to both the input and output. (It
51     makes no difference on Unix-like systems.) Later I was told that it is wrong
52     for the input on Windows. I've now abstracted the modes into two macros that
53     are set here, to make it easier to fiddle with them, and removed "b" from the
54     input mode under Windows. */
55    
56     #if defined(_WIN32) || defined(WIN32)
57     #include <io.h> /* For _setmode() */
58     #include <fcntl.h> /* For _O_BINARY */
59     #define INPUT_MODE "r"
60     #define OUTPUT_MODE "wb"
61    
62     #else
63     #include <sys/time.h> /* These two includes are needed */
64     #include <sys/resource.h> /* for setrlimit(). */
65     #define INPUT_MODE "rb"
66     #define OUTPUT_MODE "wb"
67 nigel 91 #endif
68    
69 nigel 93
70 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
71     displaying the results of pcre_study() and we also need to know about the
72     internal macros, structures, and other internal data values; pcretest has
73     "inside information" compared to a program that strictly follows the PCRE API.
74 nigel 37
75 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
76     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77     appropriately for an application, not for building PCRE. */
78 nigel 77
79 ph10 145 #include "pcre.h"
80 nigel 77 #include "pcre_internal.h"
81    
82 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
83     two copies, we include the source file here, changing the names of the external
84     symbols to prevent clashes. */
85 nigel 77
86 nigel 85 #define _pcre_utf8_table1 utf8_table1
87     #define _pcre_utf8_table1_size utf8_table1_size
88     #define _pcre_utf8_table2 utf8_table2
89     #define _pcre_utf8_table3 utf8_table3
90     #define _pcre_utf8_table4 utf8_table4
91     #define _pcre_utt utt
92     #define _pcre_utt_size utt_size
93     #define _pcre_OP_lengths OP_lengths
94    
95     #include "pcre_tables.c"
96    
97     /* We also need the pcre_printint() function for printing out compiled
98     patterns. This function is in a separate file so that it can be included in
99 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
100 nigel 85
101 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
102     output character as-is or as a hex value when showing compiled patterns, is
103     contained in this file. We uses it here also, in cases when the locale has not
104     been explicitly changed, so as to get consistent output from systems that
105     differ in their output from isprint() even in the "C" locale. */
106    
107 nigel 85 #include "pcre_printint.src"
108    
109 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110 nigel 85
111 nigel 93
112 nigel 37 /* It is possible to compile this test program without including support for
113     testing the POSIX interface, though this is not available via the standard
114     Makefile. */
115    
116     #if !defined NOPOSIX
117 nigel 3 #include "pcreposix.h"
118 nigel 37 #endif
119 nigel 3
120 ph10 107 /* It is also possible, for the benefit of the version currently imported into
121     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122     interface to the DFA matcher (NODFA), and without the doublecheck of the old
123     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124     UTF8 support if PCRE is built without it. */
125 nigel 79
126 ph10 107 #ifndef SUPPORT_UTF8
127     #ifndef NOUTF8
128     #define NOUTF8
129     #endif
130     #endif
131 nigel 79
132 ph10 107
133 nigel 85 /* Other parameters */
134    
135 nigel 3 #ifndef CLOCKS_PER_SEC
136     #ifdef CLK_TCK
137     #define CLOCKS_PER_SEC CLK_TCK
138     #else
139     #define CLOCKS_PER_SEC 100
140     #endif
141     #endif
142    
143 nigel 93 /* This is the default loop count for timing. */
144    
145 nigel 75 #define LOOPREPEAT 500000
146 nigel 3
147 nigel 85 /* Static variables */
148    
149 nigel 3 static FILE *outfile;
150     static int log_store = 0;
151 nigel 63 static int callout_count;
152     static int callout_extra;
153     static int callout_fail_count;
154     static int callout_fail_id;
155     static int first_callout;
156 nigel 93 static int locale_set = 0;
157 nigel 73 static int show_malloc;
158 nigel 67 static int use_utf8;
159 nigel 43 static size_t gotten_store;
160 nigel 3
161 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
162    
163     static int buffer_size = 50000;
164     static uschar *buffer = NULL;
165     static uschar *dbuffer = NULL;
166 nigel 75 static uschar *pbuffer = NULL;
167 nigel 3
168 nigel 75
169 nigel 49
170     /*************************************************
171 nigel 91 * Read or extend an input line *
172     *************************************************/
173    
174     /* Input lines are read into buffer, but both patterns and data lines can be
175     continued over multiple input lines. In addition, if the buffer fills up, we
176     want to automatically expand it so as to be able to handle extremely large
177     lines that are needed for certain stress tests. When the input buffer is
178     expanded, the other two buffers must also be expanded likewise, and the
179     contents of pbuffer, which are a copy of the input for callouts, must be
180     preserved (for when expansion happens for a data line). This is not the most
181     optimal way of handling this, but hey, this is just a test program!
182    
183     Arguments:
184     f the file to read
185     start where in buffer to start (this *must* be within buffer)
186    
187     Returns: pointer to the start of new data
188     could be a copy of start, or could be moved
189     NULL if no data read and EOF reached
190     */
191    
192     static uschar *
193     extend_inputline(FILE *f, uschar *start)
194     {
195     uschar *here = start;
196    
197     for (;;)
198     {
199     int rlen = buffer_size - (here - buffer);
200 nigel 93
201 nigel 91 if (rlen > 1000)
202     {
203     int dlen;
204     if (fgets((char *)here, rlen, f) == NULL)
205     return (here == start)? NULL : start;
206     dlen = (int)strlen((char *)here);
207     if (dlen > 0 && here[dlen - 1] == '\n') return start;
208     here += dlen;
209     }
210    
211     else
212     {
213     int new_buffer_size = 2*buffer_size;
214     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217    
218     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219     {
220     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221     exit(1);
222     }
223    
224     memcpy(new_buffer, buffer, buffer_size);
225     memcpy(new_pbuffer, pbuffer, buffer_size);
226    
227     buffer_size = new_buffer_size;
228    
229     start = new_buffer + (start - buffer);
230     here = new_buffer + (here - buffer);
231    
232     free(buffer);
233     free(dbuffer);
234     free(pbuffer);
235    
236     buffer = new_buffer;
237     dbuffer = new_dbuffer;
238     pbuffer = new_pbuffer;
239     }
240     }
241    
242     return NULL; /* Control never gets here */
243     }
244    
245    
246    
247    
248    
249    
250    
251     /*************************************************
252 nigel 63 * Read number from string *
253     *************************************************/
254    
255     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256     around with conditional compilation, just do the job by hand. It is only used
257 nigel 93 for unpicking arguments, so just keep it simple.
258 nigel 63
259     Arguments:
260     str string to be converted
261     endptr where to put the end pointer
262    
263     Returns: the unsigned long
264     */
265    
266     static int
267     get_value(unsigned char *str, unsigned char **endptr)
268     {
269     int result = 0;
270     while(*str != 0 && isspace(*str)) str++;
271     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272     *endptr = str;
273     return(result);
274     }
275    
276    
277    
278 nigel 49
279     /*************************************************
280     * Convert UTF-8 string to value *
281     *************************************************/
282    
283     /* This function takes one or more bytes that represents a UTF-8 character,
284     and returns the value of the character.
285    
286     Argument:
287 nigel 91 utf8bytes a pointer to the byte vector
288     vptr a pointer to an int to receive the value
289 nigel 49
290 nigel 91 Returns: > 0 => the number of bytes consumed
291     -6 to 0 => malformed UTF-8 character at offset = (-return)
292 nigel 49 */
293    
294 nigel 79 #if !defined NOUTF8
295    
296 nigel 67 static int
297 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
298 nigel 49 {
299 nigel 91 int c = *utf8bytes++;
300 nigel 49 int d = c;
301     int i, j, s;
302    
303     for (i = -1; i < 6; i++) /* i is number of additional bytes */
304     {
305     if ((d & 0x80) == 0) break;
306     d <<= 1;
307     }
308    
309     if (i == -1) { *vptr = c; return 1; } /* ascii character */
310     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
311    
312     /* i now has a value in the range 1-5 */
313    
314 nigel 59 s = 6*i;
315 nigel 85 d = (c & utf8_table3[i]) << s;
316 nigel 49
317     for (j = 0; j < i; j++)
318     {
319 nigel 91 c = *utf8bytes++;
320 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
321 nigel 59 s -= 6;
322 nigel 49 d |= (c & 0x3f) << s;
323     }
324    
325     /* Check that encoding was the correct unique one */
326    
327 nigel 85 for (j = 0; j < utf8_table1_size; j++)
328     if (d <= utf8_table1[j]) break;
329 nigel 49 if (j != i) return -(i+1);
330    
331     /* Valid value */
332    
333     *vptr = d;
334     return i+1;
335     }
336    
337 nigel 79 #endif
338 nigel 49
339    
340 nigel 79
341 nigel 63 /*************************************************
342 nigel 85 * Convert character value to UTF-8 *
343     *************************************************/
344    
345     /* This function takes an integer value in the range 0 - 0x7fffffff
346     and encodes it as a UTF-8 character in 0 to 6 bytes.
347    
348     Arguments:
349     cvalue the character value
350 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
351 nigel 85
352     Returns: number of characters placed in the buffer
353     */
354    
355 nigel 93 #if !defined NOUTF8
356    
357 nigel 85 static int
358 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
359 nigel 85 {
360     register int i, j;
361     for (i = 0; i < utf8_table1_size; i++)
362     if (cvalue <= utf8_table1[i]) break;
363 nigel 91 utf8bytes += i;
364 nigel 85 for (j = i; j > 0; j--)
365     {
366 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367 nigel 85 cvalue >>= 6;
368     }
369 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
370 nigel 85 return i + 1;
371     }
372    
373 nigel 93 #endif
374 nigel 85
375    
376 nigel 93
377 nigel 85 /*************************************************
378 nigel 63 * Print character string *
379     *************************************************/
380 nigel 49
381 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
382     mode. Yields number of characters printed. If handed a NULL file, just counts
383     chars without printing. */
384 nigel 49
385 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
386 nigel 3 {
387 nigel 85 int c = 0;
388 nigel 63 int yield = 0;
389 nigel 3
390 nigel 63 while (length-- > 0)
391 nigel 3 {
392 nigel 79 #if !defined NOUTF8
393 nigel 67 if (use_utf8)
394 nigel 63 {
395     int rc = utf82ord(p, &c);
396 nigel 3
397 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
398     {
399     length -= rc - 1;
400     p += rc;
401 nigel 93 if (PRINTHEX(c))
402 nigel 63 {
403     if (f != NULL) fprintf(f, "%c", c);
404     yield++;
405     }
406     else
407     {
408 nigel 93 int n = 4;
409     if (f != NULL) fprintf(f, "\\x{%02x}", c);
410     yield += (n <= 0x000000ff)? 2 :
411     (n <= 0x00000fff)? 3 :
412     (n <= 0x0000ffff)? 4 :
413     (n <= 0x000fffff)? 5 : 6;
414 nigel 63 }
415     continue;
416     }
417     }
418 nigel 79 #endif
419 nigel 3
420 nigel 63 /* Not UTF-8, or malformed UTF-8 */
421    
422 nigel 93 c = *p++;
423     if (PRINTHEX(c))
424 nigel 3 {
425 nigel 63 if (f != NULL) fprintf(f, "%c", c);
426     yield++;
427 nigel 3 }
428 nigel 63 else
429 nigel 3 {
430 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
431     yield += 4;
432     }
433     }
434 nigel 3
435 nigel 63 return yield;
436     }
437 nigel 23
438 nigel 3
439 nigel 23
440 nigel 63 /*************************************************
441     * Callout function *
442     *************************************************/
443 nigel 3
444 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
445     the match. Yield zero unless more callouts than the fail count, or the callout
446     data is not zero. */
447 nigel 3
448 nigel 63 static int callout(pcre_callout_block *cb)
449     {
450     FILE *f = (first_callout | callout_extra)? outfile : NULL;
451 nigel 75 int i, pre_start, post_start, subject_length;
452 nigel 3
453 nigel 63 if (callout_extra)
454     {
455     fprintf(f, "Callout %d: last capture = %d\n",
456     cb->callout_number, cb->capture_last);
457 nigel 3
458 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
459     {
460     if (cb->offset_vector[i] < 0)
461     fprintf(f, "%2d: <unset>\n", i/2);
462     else
463     {
464     fprintf(f, "%2d: ", i/2);
465     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466     cb->offset_vector[i+1] - cb->offset_vector[i], f);
467     fprintf(f, "\n");
468     }
469     }
470     }
471 nigel 3
472 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
473     datails. On subsequent calls in the same match, we use pchars just to find the
474     printed lengths of the substrings. */
475 nigel 3
476 nigel 63 if (f != NULL) fprintf(f, "--->");
477 nigel 3
478 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480     cb->current_position - cb->start_match, f);
481 nigel 3
482 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483    
484 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485     cb->subject_length - cb->current_position, f);
486 nigel 3
487 nigel 63 if (f != NULL) fprintf(f, "\n");
488 nigel 9
489 nigel 63 /* Always print appropriate indicators, with callout number if not already
490 nigel 75 shown. For automatic callouts, show the pattern offset. */
491 nigel 3
492 nigel 75 if (cb->callout_number == 255)
493     {
494     fprintf(outfile, "%+3d ", cb->pattern_position);
495     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
496     }
497     else
498     {
499     if (callout_extra) fprintf(outfile, " ");
500     else fprintf(outfile, "%3d ", cb->callout_number);
501     }
502 nigel 3
503 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504     fprintf(outfile, "^");
505 nigel 3
506 nigel 63 if (post_start > 0)
507     {
508     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509     fprintf(outfile, "^");
510 nigel 3 }
511    
512 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513     fprintf(outfile, " ");
514    
515     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516     pbuffer + cb->pattern_position);
517    
518 nigel 63 fprintf(outfile, "\n");
519     first_callout = 0;
520 nigel 3
521 nigel 71 if (cb->callout_data != NULL)
522 nigel 49 {
523 nigel 71 int callout_data = *((int *)(cb->callout_data));
524     if (callout_data != 0)
525     {
526     fprintf(outfile, "Callout data = %d\n", callout_data);
527     return callout_data;
528     }
529 nigel 63 }
530 nigel 49
531 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
532     (++callout_count >= callout_fail_count)? 1 : 0;
533 nigel 3 }
534    
535    
536 nigel 63 /*************************************************
537 nigel 73 * Local malloc functions *
538 nigel 63 *************************************************/
539 nigel 3
540     /* Alternative malloc function, to test functionality and show the size of the
541     compiled re. */
542    
543     static void *new_malloc(size_t size)
544     {
545 nigel 73 void *block = malloc(size);
546 nigel 43 gotten_store = size;
547 nigel 73 if (show_malloc)
548 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
549 nigel 73 return block;
550 nigel 3 }
551    
552 nigel 73 static void new_free(void *block)
553     {
554     if (show_malloc)
555     fprintf(outfile, "free %p\n", block);
556     free(block);
557     }
558 nigel 3
559    
560 nigel 73 /* For recursion malloc/free, to test stacking calls */
561    
562     static void *stack_malloc(size_t size)
563     {
564     void *block = malloc(size);
565     if (show_malloc)
566 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567 nigel 73 return block;
568     }
569    
570     static void stack_free(void *block)
571     {
572     if (show_malloc)
573     fprintf(outfile, "stack_free %p\n", block);
574     free(block);
575     }
576    
577    
578 nigel 63 /*************************************************
579     * Call pcre_fullinfo() *
580     *************************************************/
581 nigel 43
582     /* Get one piece of information from the pcre_fullinfo() function */
583    
584     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
585     {
586     int rc;
587     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
588     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
589     }
590    
591    
592    
593 nigel 63 /*************************************************
594 nigel 75 * Byte flipping function *
595     *************************************************/
596    
597 nigel 91 static unsigned long int
598     byteflip(unsigned long int value, int n)
599 nigel 75 {
600     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601     return ((value & 0x000000ff) << 24) |
602     ((value & 0x0000ff00) << 8) |
603     ((value & 0x00ff0000) >> 8) |
604     ((value & 0xff000000) >> 24);
605     }
606    
607    
608    
609    
610     /*************************************************
611 nigel 87 * Check match or recursion limit *
612     *************************************************/
613    
614     static int
615     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616     int start_offset, int options, int *use_offsets, int use_size_offsets,
617     int flag, unsigned long int *limit, int errnumber, const char *msg)
618     {
619     int count;
620     int min = 0;
621     int mid = 64;
622     int max = -1;
623    
624     extra->flags |= flag;
625    
626     for (;;)
627     {
628     *limit = mid;
629    
630     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631     use_offsets, use_size_offsets);
632    
633     if (count == errnumber)
634     {
635     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636     min = mid;
637     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638     }
639    
640     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641     count == PCRE_ERROR_PARTIAL)
642     {
643     if (mid == min + 1)
644     {
645     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646     break;
647     }
648     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649     max = mid;
650     mid = (min + mid)/2;
651     }
652     else break; /* Some other error */
653     }
654    
655     extra->flags &= ~flag;
656     return count;
657     }
658    
659    
660    
661     /*************************************************
662 nigel 91 * Check newline indicator *
663     *************************************************/
664    
665     /* This is used both at compile and run-time to check for <xxx> escapes, where
666 nigel 93 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
667 nigel 91
668     Arguments:
669     p points after the leading '<'
670     f file for error message
671    
672     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
673     */
674    
675     static int
676     check_newline(uschar *p, FILE *f)
677     {
678     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
679     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
680     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
681 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
682 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
683     return 0;
684     }
685    
686    
687    
688     /*************************************************
689 nigel 93 * Usage function *
690     *************************************************/
691    
692     static void
693     usage(void)
694     {
695     printf("Usage: pcretest [options] [<input> [<output>]]\n");
696     printf(" -b show compiled code (bytecode)\n");
697     printf(" -C show PCRE compile-time options and exit\n");
698     printf(" -d debug: show compiled code and information (-b and -i)\n");
699     #if !defined NODFA
700     printf(" -dfa force DFA matching for all subjects\n");
701     #endif
702     printf(" -help show usage information\n");
703     printf(" -i show information about compiled patterns\n"
704     " -m output memory used information\n"
705     " -o <n> set size of offsets vector to <n>\n");
706     #if !defined NOPOSIX
707     printf(" -p use POSIX interface\n");
708     #endif
709     printf(" -q quiet: do not output PCRE version number at start\n");
710     printf(" -S <n> set stack size to <n> megabytes\n");
711     printf(" -s output store (memory) used information\n"
712     " -t time compilation and execution\n");
713     printf(" -t <n> time compilation and execution, repeating <n> times\n");
714     printf(" -tm time execution (matching) only\n");
715     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
716     }
717    
718    
719    
720     /*************************************************
721 nigel 63 * Main Program *
722     *************************************************/
723 nigel 43
724 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
725     consist of a regular expression, in delimiters and optionally followed by
726     options, followed by a set of test data, terminated by an empty line. */
727    
728     int main(int argc, char **argv)
729     {
730     FILE *infile = stdin;
731     int options = 0;
732     int study_options = 0;
733     int op = 1;
734     int timeit = 0;
735 nigel 93 int timeitm = 0;
736 nigel 3 int showinfo = 0;
737 nigel 31 int showstore = 0;
738 nigel 87 int quiet = 0;
739 nigel 53 int size_offsets = 45;
740     int size_offsets_max;
741 nigel 77 int *offsets = NULL;
742 nigel 53 #if !defined NOPOSIX
743 nigel 3 int posix = 0;
744 nigel 53 #endif
745 nigel 3 int debug = 0;
746 nigel 11 int done = 0;
747 nigel 77 int all_use_dfa = 0;
748     int yield = 0;
749 nigel 91 int stack_size;
750 nigel 3
751 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
752     that 1024 is plenty long enough for the few names we'll be testing. */
753 nigel 69
754 nigel 91 uschar copynames[1024];
755     uschar getnames[1024];
756    
757     uschar *copynamesptr;
758     uschar *getnamesptr;
759    
760 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
761 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
762 nigel 69
763 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
764     dbuffer = (unsigned char *)malloc(buffer_size);
765     pbuffer = (unsigned char *)malloc(buffer_size);
766 nigel 69
767 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
768 nigel 3
769 nigel 93 outfile = stdout;
770    
771     /* The following _setmode() stuff is some Windows magic that tells its runtime
772     library to translate CRLF into a single LF character. At least, that's what
773     I've been told: never having used Windows I take this all on trust. Originally
774     it set 0x8000, but then I was advised that _O_BINARY was better. */
775    
776 nigel 75 #if defined(_WIN32) || defined(WIN32)
777 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
778     #endif
779 nigel 75
780 nigel 3 /* Scan options */
781    
782     while (argc > 1 && argv[op][0] == '-')
783     {
784 nigel 63 unsigned char *endptr;
785 nigel 53
786 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
787     showstore = 1;
788 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
789 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
790 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
791     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
792 nigel 79 #if !defined NODFA
793 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
794 nigel 79 #endif
795 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
796 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
797     *endptr == 0))
798 nigel 53 {
799     op++;
800     argc--;
801     }
802 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
803     {
804     int both = argv[op][2] == 0;
805     int temp;
806     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
807     *endptr == 0))
808     {
809     timeitm = temp;
810     op++;
811     argc--;
812     }
813     else timeitm = LOOPREPEAT;
814     if (both) timeit = timeitm;
815     }
816 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
817     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
818     *endptr == 0))
819     {
820 nigel 93 #if defined(_WIN32) || defined(WIN32)
821 nigel 91 printf("PCRE: -S not supported on this OS\n");
822     exit(1);
823     #else
824     int rc;
825     struct rlimit rlim;
826     getrlimit(RLIMIT_STACK, &rlim);
827     rlim.rlim_cur = stack_size * 1024 * 1024;
828     rc = setrlimit(RLIMIT_STACK, &rlim);
829     if (rc != 0)
830     {
831     printf("PCRE: setrlimit() failed with error %d\n", rc);
832     exit(1);
833     }
834     op++;
835     argc--;
836     #endif
837     }
838 nigel 53 #if !defined NOPOSIX
839 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
840 nigel 53 #endif
841 nigel 63 else if (strcmp(argv[op], "-C") == 0)
842     {
843     int rc;
844     printf("PCRE version %s\n", pcre_version());
845     printf("Compiled with\n");
846     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
847     printf(" %sUTF-8 support\n", rc? "" : "No ");
848 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
849     printf(" %sUnicode properties support\n", rc? "" : "No ");
850 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
851 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
852 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
853     (rc == -1)? "ANY" : "???");
854 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
855     printf(" Internal link size = %d\n", rc);
856     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
857     printf(" POSIX malloc threshold = %d\n", rc);
858     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
859     printf(" Default match limit = %d\n", rc);
860 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
861     printf(" Default recursion depth limit = %d\n", rc);
862 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
863     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
864 ph10 121 goto EXIT;
865 nigel 63 }
866 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
867     strcmp(argv[op], "--help") == 0)
868     {
869     usage();
870     goto EXIT;
871     }
872 nigel 3 else
873     {
874 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
875 nigel 93 usage();
876 nigel 77 yield = 1;
877     goto EXIT;
878 nigel 3 }
879     op++;
880     argc--;
881     }
882    
883 nigel 53 /* Get the store for the offsets vector, and remember what it was */
884    
885     size_offsets_max = size_offsets;
886 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
887 nigel 53 if (offsets == NULL)
888     {
889     printf("** Failed to get %d bytes of memory for offsets vector\n",
890     size_offsets_max * sizeof(int));
891 nigel 77 yield = 1;
892     goto EXIT;
893 nigel 53 }
894    
895 nigel 3 /* Sort out the input and output files */
896    
897     if (argc > 1)
898     {
899 nigel 93 infile = fopen(argv[op], INPUT_MODE);
900 nigel 3 if (infile == NULL)
901     {
902     printf("** Failed to open %s\n", argv[op]);
903 nigel 77 yield = 1;
904     goto EXIT;
905 nigel 3 }
906     }
907    
908     if (argc > 2)
909     {
910 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
911 nigel 3 if (outfile == NULL)
912     {
913     printf("** Failed to open %s\n", argv[op+1]);
914 nigel 77 yield = 1;
915     goto EXIT;
916 nigel 3 }
917     }
918    
919     /* Set alternative malloc function */
920    
921     pcre_malloc = new_malloc;
922 nigel 73 pcre_free = new_free;
923     pcre_stack_malloc = stack_malloc;
924     pcre_stack_free = stack_free;
925 nigel 3
926 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
927 nigel 3
928 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
929 nigel 3
930     /* Main loop */
931    
932 nigel 11 while (!done)
933 nigel 3 {
934     pcre *re = NULL;
935     pcre_extra *extra = NULL;
936 nigel 37
937     #if !defined NOPOSIX /* There are still compilers that require no indent */
938 nigel 3 regex_t preg;
939 nigel 45 int do_posix = 0;
940 nigel 37 #endif
941    
942 nigel 7 const char *error;
943 nigel 25 unsigned char *p, *pp, *ppp;
944 nigel 75 unsigned char *to_file = NULL;
945 nigel 53 const unsigned char *tables = NULL;
946 nigel 75 unsigned long int true_size, true_study_size = 0;
947     size_t size, regex_gotten_store;
948 nigel 3 int do_study = 0;
949 nigel 25 int do_debug = debug;
950 ph10 123 int debug_lengths = 1;
951 nigel 35 int do_G = 0;
952     int do_g = 0;
953 nigel 25 int do_showinfo = showinfo;
954 nigel 35 int do_showrest = 0;
955 nigel 75 int do_flip = 0;
956 nigel 93 int erroroffset, len, delimiter, poffset;
957 nigel 3
958 nigel 67 use_utf8 = 0;
959 nigel 63
960 nigel 3 if (infile == stdin) printf(" re> ");
961 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
962 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
963 nigel 63 fflush(outfile);
964 nigel 3
965     p = buffer;
966     while (isspace(*p)) p++;
967     if (*p == 0) continue;
968    
969 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
970 nigel 3
971 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
972     {
973 nigel 91 unsigned long int magic, get_options;
974 nigel 75 uschar sbuf[8];
975     FILE *f;
976    
977     p++;
978     pp = p + (int)strlen((char *)p);
979     while (isspace(pp[-1])) pp--;
980     *pp = 0;
981    
982     f = fopen((char *)p, "rb");
983     if (f == NULL)
984     {
985     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
986     continue;
987     }
988    
989     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
990    
991     true_size =
992     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
993     true_study_size =
994     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
995    
996     re = (real_pcre *)new_malloc(true_size);
997     regex_gotten_store = gotten_store;
998    
999     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1000    
1001     magic = ((real_pcre *)re)->magic_number;
1002     if (magic != MAGIC_NUMBER)
1003     {
1004     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1005     {
1006     do_flip = 1;
1007     }
1008     else
1009     {
1010     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1011     fclose(f);
1012     continue;
1013     }
1014     }
1015    
1016     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1017     do_flip? " (byte-inverted)" : "", p);
1018    
1019     /* Need to know if UTF-8 for printing data strings */
1020    
1021 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1022     use_utf8 = (get_options & PCRE_UTF8) != 0;
1023 nigel 75
1024     /* Now see if there is any following study data */
1025    
1026     if (true_study_size != 0)
1027     {
1028     pcre_study_data *psd;
1029    
1030     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1031     extra->flags = PCRE_EXTRA_STUDY_DATA;
1032    
1033     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1034     extra->study_data = psd;
1035    
1036     if (fread(psd, 1, true_study_size, f) != true_study_size)
1037     {
1038     FAIL_READ:
1039     fprintf(outfile, "Failed to read data from %s\n", p);
1040     if (extra != NULL) new_free(extra);
1041     if (re != NULL) new_free(re);
1042     fclose(f);
1043     continue;
1044     }
1045     fprintf(outfile, "Study data loaded from %s\n", p);
1046     do_study = 1; /* To get the data output if requested */
1047     }
1048     else fprintf(outfile, "No study data\n");
1049    
1050     fclose(f);
1051     goto SHOW_INFO;
1052     }
1053    
1054     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1055     the pattern; if is isn't complete, read more. */
1056    
1057 nigel 3 delimiter = *p++;
1058    
1059 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1060 nigel 3 {
1061 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1062 nigel 3 goto SKIP_DATA;
1063     }
1064    
1065     pp = p;
1066 nigel 93 poffset = p - buffer;
1067 nigel 3
1068     for(;;)
1069     {
1070 nigel 29 while (*pp != 0)
1071     {
1072     if (*pp == '\\' && pp[1] != 0) pp++;
1073     else if (*pp == delimiter) break;
1074     pp++;
1075     }
1076 nigel 3 if (*pp != 0) break;
1077     if (infile == stdin) printf(" > ");
1078 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1079 nigel 3 {
1080     fprintf(outfile, "** Unexpected EOF\n");
1081 nigel 11 done = 1;
1082     goto CONTINUE;
1083 nigel 3 }
1084 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1085 nigel 3 }
1086    
1087 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1088     pointer to the correct relative point in the buffer. */
1089    
1090     p = buffer + poffset;
1091    
1092 nigel 29 /* If the first character after the delimiter is backslash, make
1093     the pattern end with backslash. This is purely to provide a way
1094     of testing for the error message when a pattern ends with backslash. */
1095    
1096     if (pp[1] == '\\') *pp++ = '\\';
1097    
1098 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1099     for callouts. */
1100 nigel 3
1101     *pp++ = 0;
1102 nigel 75 strcpy((char *)pbuffer, (char *)p);
1103 nigel 3
1104     /* Look for options after final delimiter */
1105    
1106     options = 0;
1107     study_options = 0;
1108 nigel 31 log_store = showstore; /* default from command line */
1109    
1110 nigel 3 while (*pp != 0)
1111     {
1112     switch (*pp++)
1113     {
1114 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1115 nigel 35 case 'g': do_g = 1; break;
1116 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1117     case 'm': options |= PCRE_MULTILINE; break;
1118     case 's': options |= PCRE_DOTALL; break;
1119     case 'x': options |= PCRE_EXTENDED; break;
1120 nigel 25
1121 nigel 35 case '+': do_showrest = 1; break;
1122 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1123 nigel 93 case 'B': do_debug = 1; break;
1124 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1125 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1126 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1127 nigel 75 case 'F': do_flip = 1; break;
1128 nigel 35 case 'G': do_G = 1; break;
1129 nigel 25 case 'I': do_showinfo = 1; break;
1130 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1131 nigel 31 case 'M': log_store = 1; break;
1132 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1133 nigel 37
1134     #if !defined NOPOSIX
1135 nigel 3 case 'P': do_posix = 1; break;
1136 nigel 37 #endif
1137    
1138 nigel 3 case 'S': do_study = 1; break;
1139 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1140 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1141 ph10 126 case 'Z': debug_lengths = 0; break;
1142 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1143 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1144 nigel 25
1145     case 'L':
1146     ppp = pp;
1147 nigel 93 /* The '\r' test here is so that it works on Windows. */
1148     /* The '0' test is just in case this is an unterminated line. */
1149     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1150 nigel 25 *ppp = 0;
1151     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1152     {
1153     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1154     goto SKIP_DATA;
1155     }
1156 nigel 93 locale_set = 1;
1157 nigel 25 tables = pcre_maketables();
1158     pp = ppp;
1159     break;
1160    
1161 nigel 75 case '>':
1162     to_file = pp;
1163     while (*pp != 0) pp++;
1164     while (isspace(pp[-1])) pp--;
1165     *pp = 0;
1166     break;
1167    
1168 nigel 91 case '<':
1169     {
1170     int x = check_newline(pp, outfile);
1171     if (x == 0) goto SKIP_DATA;
1172     options |= x;
1173     while (*pp++ != '>');
1174     }
1175     break;
1176    
1177 nigel 77 case '\r': /* So that it works in Windows */
1178     case '\n':
1179     case ' ':
1180     break;
1181 nigel 75
1182 nigel 3 default:
1183     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1184     goto SKIP_DATA;
1185     }
1186     }
1187    
1188 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1189 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1190     local character tables. */
1191 nigel 3
1192 nigel 37 #if !defined NOPOSIX
1193 nigel 3 if (posix || do_posix)
1194     {
1195     int rc;
1196     int cflags = 0;
1197 nigel 75
1198 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1199     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1200 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1201 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1202     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1203    
1204 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1205    
1206     /* Compilation failed; go back for another re, skipping to blank line
1207     if non-interactive. */
1208    
1209     if (rc != 0)
1210     {
1211 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1212 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1213     goto SKIP_DATA;
1214     }
1215     }
1216    
1217     /* Handle compiling via the native interface */
1218    
1219     else
1220 nigel 37 #endif /* !defined NOPOSIX */
1221    
1222 nigel 3 {
1223 nigel 93 if (timeit > 0)
1224 nigel 3 {
1225     register int i;
1226     clock_t time_taken;
1227     clock_t start_time = clock();
1228 nigel 93 for (i = 0; i < timeit; i++)
1229 nigel 3 {
1230 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1231 nigel 3 if (re != NULL) free(re);
1232     }
1233     time_taken = clock() - start_time;
1234 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1235     (((double)time_taken * 1000.0) / (double)timeit) /
1236 nigel 63 (double)CLOCKS_PER_SEC);
1237 nigel 3 }
1238    
1239 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1240 nigel 3
1241     /* Compilation failed; go back for another re, skipping to blank line
1242     if non-interactive. */
1243    
1244     if (re == NULL)
1245     {
1246     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1247     SKIP_DATA:
1248     if (infile != stdin)
1249     {
1250     for (;;)
1251     {
1252 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1253 nigel 11 {
1254     done = 1;
1255     goto CONTINUE;
1256     }
1257 nigel 3 len = (int)strlen((char *)buffer);
1258     while (len > 0 && isspace(buffer[len-1])) len--;
1259     if (len == 0) break;
1260     }
1261     fprintf(outfile, "\n");
1262     }
1263 nigel 25 goto CONTINUE;
1264 nigel 3 }
1265    
1266 nigel 43 /* Compilation succeeded; print data if required. There are now two
1267     info-returning functions. The old one has a limited interface and
1268     returns only limited data. Check that it agrees with the newer one. */
1269 nigel 3
1270 nigel 63 if (log_store)
1271     fprintf(outfile, "Memory allocation (code space): %d\n",
1272     (int)(gotten_store -
1273     sizeof(real_pcre) -
1274     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1275    
1276 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1277     and remember the store that was got. */
1278    
1279     true_size = ((real_pcre *)re)->size;
1280     regex_gotten_store = gotten_store;
1281    
1282     /* If /S was present, study the regexp to generate additional info to
1283     help with the matching. */
1284    
1285     if (do_study)
1286     {
1287 nigel 93 if (timeit > 0)
1288 nigel 75 {
1289     register int i;
1290     clock_t time_taken;
1291     clock_t start_time = clock();
1292 nigel 93 for (i = 0; i < timeit; i++)
1293 nigel 75 extra = pcre_study(re, study_options, &error);
1294     time_taken = clock() - start_time;
1295     if (extra != NULL) free(extra);
1296 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1297     (((double)time_taken * 1000.0) / (double)timeit) /
1298 nigel 75 (double)CLOCKS_PER_SEC);
1299     }
1300     extra = pcre_study(re, study_options, &error);
1301     if (error != NULL)
1302     fprintf(outfile, "Failed to study: %s\n", error);
1303     else if (extra != NULL)
1304     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1305     }
1306    
1307     /* If the 'F' option was present, we flip the bytes of all the integer
1308     fields in the regex data block and the study block. This is to make it
1309     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1310     compiled on a different architecture. */
1311    
1312     if (do_flip)
1313     {
1314     real_pcre *rre = (real_pcre *)re;
1315     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1316     rre->size = byteflip(rre->size, sizeof(rre->size));
1317     rre->options = byteflip(rre->options, sizeof(rre->options));
1318     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1319     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1320     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1321     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1322     rre->name_table_offset = byteflip(rre->name_table_offset,
1323     sizeof(rre->name_table_offset));
1324     rre->name_entry_size = byteflip(rre->name_entry_size,
1325     sizeof(rre->name_entry_size));
1326     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1327    
1328     if (extra != NULL)
1329     {
1330     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1331     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1332     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1333     }
1334     }
1335    
1336     /* Extract information from the compiled data if required */
1337    
1338     SHOW_INFO:
1339    
1340 nigel 93 if (do_debug)
1341     {
1342     fprintf(outfile, "------------------------------------------------------------------\n");
1343 ph10 116 pcre_printint(re, outfile, debug_lengths);
1344 nigel 93 }
1345    
1346 nigel 25 if (do_showinfo)
1347 nigel 3 {
1348 nigel 75 unsigned long int get_options, all_options;
1349 nigel 79 #if !defined NOINFOCHECK
1350 nigel 43 int old_first_char, old_options, old_count;
1351 nigel 79 #endif
1352 nigel 43 int count, backrefmax, first_char, need_char;
1353 nigel 63 int nameentrysize, namecount;
1354     const uschar *nametable;
1355 nigel 3
1356 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1357 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1358     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1359     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1360 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1361 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1362 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1363     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1364 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1365 nigel 43
1366 nigel 79 #if !defined NOINFOCHECK
1367 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1368 nigel 3 if (count < 0) fprintf(outfile,
1369 nigel 43 "Error %d from pcre_info()\n", count);
1370 nigel 3 else
1371     {
1372 nigel 43 if (old_count != count) fprintf(outfile,
1373     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1374     old_count);
1375 nigel 37
1376 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1377     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1378     first_char, old_first_char);
1379 nigel 37
1380 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1381     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1382     get_options, old_options);
1383 nigel 43 }
1384 nigel 79 #endif
1385 nigel 43
1386 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1387 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1388 nigel 77 (int)size, (int)regex_gotten_store);
1389 nigel 43
1390     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1391     if (backrefmax > 0)
1392     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1393 nigel 63
1394     if (namecount > 0)
1395     {
1396     fprintf(outfile, "Named capturing subpatterns:\n");
1397     while (namecount-- > 0)
1398     {
1399     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1400     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1401     GET2(nametable, 0));
1402     nametable += nameentrysize;
1403     }
1404     }
1405    
1406 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1407     to fish it out via out back door */
1408    
1409     all_options = ((real_pcre *)re)->options;
1410     if (do_flip)
1411     {
1412     all_options = byteflip(all_options, sizeof(all_options));
1413 nigel 91 }
1414 nigel 75
1415     if ((all_options & PCRE_NOPARTIAL) != 0)
1416     fprintf(outfile, "Partial matching not supported\n");
1417    
1418 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1419 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1420 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1421     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1422     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1423     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1424 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1425 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1426     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1427     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1428     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1429 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1430 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1431 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1432     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1433 nigel 43
1434 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1435 nigel 91 {
1436     case PCRE_NEWLINE_CR:
1437     fprintf(outfile, "Forced newline sequence: CR\n");
1438     break;
1439 nigel 43
1440 nigel 91 case PCRE_NEWLINE_LF:
1441     fprintf(outfile, "Forced newline sequence: LF\n");
1442     break;
1443    
1444     case PCRE_NEWLINE_CRLF:
1445     fprintf(outfile, "Forced newline sequence: CRLF\n");
1446     break;
1447    
1448 nigel 93 case PCRE_NEWLINE_ANY:
1449     fprintf(outfile, "Forced newline sequence: ANY\n");
1450     break;
1451    
1452 nigel 91 default:
1453     break;
1454     }
1455    
1456 nigel 43 if (first_char == -1)
1457     {
1458 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1459 nigel 43 }
1460     else if (first_char < 0)
1461     {
1462     fprintf(outfile, "No first char\n");
1463     }
1464     else
1465     {
1466 nigel 63 int ch = first_char & 255;
1467 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1468 nigel 63 "" : " (caseless)";
1469 nigel 93 if (PRINTHEX(ch))
1470 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1471 nigel 3 else
1472 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1473 nigel 43 }
1474 nigel 37
1475 nigel 43 if (need_char < 0)
1476     {
1477     fprintf(outfile, "No need char\n");
1478 nigel 3 }
1479 nigel 43 else
1480     {
1481 nigel 63 int ch = need_char & 255;
1482 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1483 nigel 63 "" : " (caseless)";
1484 nigel 93 if (PRINTHEX(ch))
1485 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1486 nigel 43 else
1487 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1488 nigel 43 }
1489 nigel 75
1490     /* Don't output study size; at present it is in any case a fixed
1491     value, but it varies, depending on the computer architecture, and
1492     so messes up the test suite. (And with the /F option, it might be
1493     flipped.) */
1494    
1495     if (do_study)
1496     {
1497     if (extra == NULL)
1498     fprintf(outfile, "Study returned NULL\n");
1499     else
1500     {
1501     uschar *start_bits = NULL;
1502     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1503    
1504     if (start_bits == NULL)
1505     fprintf(outfile, "No starting byte set\n");
1506     else
1507     {
1508     int i;
1509     int c = 24;
1510     fprintf(outfile, "Starting byte set: ");
1511     for (i = 0; i < 256; i++)
1512     {
1513     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1514     {
1515     if (c > 75)
1516     {
1517     fprintf(outfile, "\n ");
1518     c = 2;
1519     }
1520 nigel 93 if (PRINTHEX(i) && i != ' ')
1521 nigel 75 {
1522     fprintf(outfile, "%c ", i);
1523     c += 2;
1524     }
1525     else
1526     {
1527     fprintf(outfile, "\\x%02x ", i);
1528     c += 5;
1529     }
1530     }
1531     }
1532     fprintf(outfile, "\n");
1533     }
1534     }
1535     }
1536 nigel 3 }
1537    
1538 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1539     that is all. The first 8 bytes of the file are the regex length and then
1540     the study length, in big-endian order. */
1541 nigel 3
1542 nigel 75 if (to_file != NULL)
1543 nigel 3 {
1544 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1545     if (f == NULL)
1546 nigel 3 {
1547 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1548 nigel 3 }
1549 nigel 75 else
1550     {
1551     uschar sbuf[8];
1552     sbuf[0] = (true_size >> 24) & 255;
1553     sbuf[1] = (true_size >> 16) & 255;
1554     sbuf[2] = (true_size >> 8) & 255;
1555     sbuf[3] = (true_size) & 255;
1556 nigel 3
1557 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1558     sbuf[5] = (true_study_size >> 16) & 255;
1559     sbuf[6] = (true_study_size >> 8) & 255;
1560     sbuf[7] = (true_study_size) & 255;
1561 nigel 3
1562 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1563     fwrite(re, 1, true_size, f) < true_size)
1564     {
1565     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1566     }
1567 nigel 3 else
1568     {
1569 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1570     if (extra != NULL)
1571 nigel 3 {
1572 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1573     true_study_size)
1574 nigel 3 {
1575 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1576     strerror(errno));
1577 nigel 3 }
1578 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1579 nigel 93
1580 nigel 3 }
1581     }
1582 nigel 75 fclose(f);
1583 nigel 3 }
1584 nigel 77
1585     new_free(re);
1586     if (extra != NULL) new_free(extra);
1587     if (tables != NULL) new_free((void *)tables);
1588 nigel 75 continue; /* With next regex */
1589 nigel 3 }
1590 nigel 75 } /* End of non-POSIX compile */
1591 nigel 3
1592     /* Read data lines and test them */
1593    
1594     for (;;)
1595     {
1596 nigel 87 uschar *q;
1597     uschar *bptr = dbuffer;
1598 nigel 57 int *use_offsets = offsets;
1599 nigel 53 int use_size_offsets = size_offsets;
1600 nigel 63 int callout_data = 0;
1601     int callout_data_set = 0;
1602 nigel 3 int count, c;
1603 nigel 29 int copystrings = 0;
1604 nigel 63 int find_match_limit = 0;
1605 nigel 29 int getstrings = 0;
1606     int getlist = 0;
1607 nigel 39 int gmatched = 0;
1608 nigel 35 int start_offset = 0;
1609 nigel 41 int g_notempty = 0;
1610 nigel 77 int use_dfa = 0;
1611 nigel 3
1612     options = 0;
1613    
1614 nigel 91 *copynames = 0;
1615     *getnames = 0;
1616    
1617     copynamesptr = copynames;
1618     getnamesptr = getnames;
1619    
1620 nigel 63 pcre_callout = callout;
1621     first_callout = 1;
1622     callout_extra = 0;
1623     callout_count = 0;
1624     callout_fail_count = 999999;
1625     callout_fail_id = -1;
1626 nigel 73 show_malloc = 0;
1627 nigel 63
1628 nigel 91 if (extra != NULL) extra->flags &=
1629     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1630    
1631     len = 0;
1632     for (;;)
1633 nigel 11 {
1634 nigel 91 if (infile == stdin) printf("data> ");
1635     if (extend_inputline(infile, buffer + len) == NULL)
1636     {
1637     if (len > 0) break;
1638     done = 1;
1639     goto CONTINUE;
1640     }
1641     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1642     len = (int)strlen((char *)buffer);
1643     if (buffer[len-1] == '\n') break;
1644 nigel 11 }
1645 nigel 3
1646     while (len > 0 && isspace(buffer[len-1])) len--;
1647     buffer[len] = 0;
1648     if (len == 0) break;
1649    
1650     p = buffer;
1651     while (isspace(*p)) p++;
1652    
1653 nigel 9 q = dbuffer;
1654 nigel 3 while ((c = *p++) != 0)
1655     {
1656     int i = 0;
1657     int n = 0;
1658 nigel 63
1659 nigel 3 if (c == '\\') switch ((c = *p++))
1660     {
1661     case 'a': c = 7; break;
1662     case 'b': c = '\b'; break;
1663     case 'e': c = 27; break;
1664     case 'f': c = '\f'; break;
1665     case 'n': c = '\n'; break;
1666     case 'r': c = '\r'; break;
1667     case 't': c = '\t'; break;
1668     case 'v': c = '\v'; break;
1669    
1670     case '0': case '1': case '2': case '3':
1671     case '4': case '5': case '6': case '7':
1672     c -= '0';
1673     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1674     c = c * 8 + *p++ - '0';
1675 nigel 91
1676     #if !defined NOUTF8
1677     if (use_utf8 && c > 255)
1678     {
1679     unsigned char buff8[8];
1680     int ii, utn;
1681     utn = ord2utf8(c, buff8);
1682     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1683     c = buff8[ii]; /* Last byte */
1684     }
1685     #endif
1686 nigel 3 break;
1687    
1688     case 'x':
1689 nigel 49
1690     /* Handle \x{..} specially - new Perl thing for utf8 */
1691    
1692 nigel 79 #if !defined NOUTF8
1693 nigel 49 if (*p == '{')
1694     {
1695     unsigned char *pt = p;
1696     c = 0;
1697     while (isxdigit(*(++pt)))
1698     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1699     if (*pt == '}')
1700     {
1701 nigel 67 unsigned char buff8[8];
1702 nigel 49 int ii, utn;
1703 nigel 85 utn = ord2utf8(c, buff8);
1704 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1705     c = buff8[ii]; /* Last byte */
1706 nigel 49 p = pt + 1;
1707     break;
1708     }
1709     /* Not correct form; fall through */
1710     }
1711 nigel 79 #endif
1712 nigel 49
1713     /* Ordinary \x */
1714    
1715 nigel 3 c = 0;
1716     while (i++ < 2 && isxdigit(*p))
1717     {
1718     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1719     p++;
1720     }
1721     break;
1722    
1723 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1724 nigel 3 p--;
1725     continue;
1726    
1727 nigel 75 case '>':
1728     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1729     continue;
1730    
1731 nigel 3 case 'A': /* Option setting */
1732     options |= PCRE_ANCHORED;
1733     continue;
1734    
1735     case 'B':
1736     options |= PCRE_NOTBOL;
1737     continue;
1738    
1739 nigel 29 case 'C':
1740 nigel 63 if (isdigit(*p)) /* Set copy string */
1741     {
1742     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1743     copystrings |= 1 << n;
1744     }
1745     else if (isalnum(*p))
1746     {
1747 nigel 91 uschar *npp = copynamesptr;
1748 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1749 nigel 91 *npp++ = 0;
1750 nigel 67 *npp = 0;
1751 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1752 nigel 63 if (n < 0)
1753 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1754     copynamesptr = npp;
1755 nigel 63 }
1756     else if (*p == '+')
1757     {
1758     callout_extra = 1;
1759     p++;
1760     }
1761     else if (*p == '-')
1762     {
1763     pcre_callout = NULL;
1764     p++;
1765     }
1766     else if (*p == '!')
1767     {
1768     callout_fail_id = 0;
1769     p++;
1770     while(isdigit(*p))
1771     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1772     callout_fail_count = 0;
1773     if (*p == '!')
1774     {
1775     p++;
1776     while(isdigit(*p))
1777     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1778     }
1779     }
1780     else if (*p == '*')
1781     {
1782     int sign = 1;
1783     callout_data = 0;
1784     if (*(++p) == '-') { sign = -1; p++; }
1785     while(isdigit(*p))
1786     callout_data = callout_data * 10 + *p++ - '0';
1787     callout_data *= sign;
1788     callout_data_set = 1;
1789     }
1790 nigel 29 continue;
1791    
1792 nigel 79 #if !defined NODFA
1793 nigel 77 case 'D':
1794 nigel 79 #if !defined NOPOSIX
1795 nigel 77 if (posix || do_posix)
1796     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1797     else
1798 nigel 79 #endif
1799 nigel 77 use_dfa = 1;
1800     continue;
1801    
1802     case 'F':
1803     options |= PCRE_DFA_SHORTEST;
1804     continue;
1805 nigel 79 #endif
1806 nigel 77
1807 nigel 29 case 'G':
1808 nigel 63 if (isdigit(*p))
1809     {
1810     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1811     getstrings |= 1 << n;
1812     }
1813     else if (isalnum(*p))
1814     {
1815 nigel 91 uschar *npp = getnamesptr;
1816 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1817 nigel 91 *npp++ = 0;
1818 nigel 67 *npp = 0;
1819 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1820 nigel 63 if (n < 0)
1821 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1822     getnamesptr = npp;
1823 nigel 63 }
1824 nigel 29 continue;
1825    
1826     case 'L':
1827     getlist = 1;
1828     continue;
1829    
1830 nigel 63 case 'M':
1831     find_match_limit = 1;
1832     continue;
1833    
1834 nigel 37 case 'N':
1835     options |= PCRE_NOTEMPTY;
1836     continue;
1837    
1838 nigel 3 case 'O':
1839     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1840 nigel 53 if (n > size_offsets_max)
1841     {
1842     size_offsets_max = n;
1843 nigel 57 free(offsets);
1844 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1845 nigel 53 if (offsets == NULL)
1846     {
1847     printf("** Failed to get %d bytes of memory for offsets vector\n",
1848     size_offsets_max * sizeof(int));
1849 nigel 77 yield = 1;
1850     goto EXIT;
1851 nigel 53 }
1852     }
1853     use_size_offsets = n;
1854 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1855 nigel 3 continue;
1856    
1857 nigel 75 case 'P':
1858     options |= PCRE_PARTIAL;
1859     continue;
1860    
1861 nigel 91 case 'Q':
1862     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863     if (extra == NULL)
1864     {
1865     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1866     extra->flags = 0;
1867     }
1868     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1869     extra->match_limit_recursion = n;
1870     continue;
1871    
1872     case 'q':
1873     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1874     if (extra == NULL)
1875     {
1876     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1877     extra->flags = 0;
1878     }
1879     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1880     extra->match_limit = n;
1881     continue;
1882    
1883 nigel 79 #if !defined NODFA
1884 nigel 77 case 'R':
1885     options |= PCRE_DFA_RESTART;
1886     continue;
1887 nigel 79 #endif
1888 nigel 77
1889 nigel 73 case 'S':
1890     show_malloc = 1;
1891     continue;
1892    
1893 nigel 3 case 'Z':
1894     options |= PCRE_NOTEOL;
1895     continue;
1896 nigel 71
1897     case '?':
1898     options |= PCRE_NO_UTF8_CHECK;
1899     continue;
1900 nigel 91
1901     case '<':
1902     {
1903     int x = check_newline(p, outfile);
1904     if (x == 0) goto NEXT_DATA;
1905     options |= x;
1906     while (*p++ != '>');
1907     }
1908     continue;
1909 nigel 3 }
1910 nigel 9 *q++ = c;
1911 nigel 3 }
1912 nigel 9 *q = 0;
1913     len = q - dbuffer;
1914 nigel 3
1915 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1916     {
1917     printf("**Match limit not relevant for DFA matching: ignored\n");
1918     find_match_limit = 0;
1919     }
1920    
1921 nigel 3 /* Handle matching via the POSIX interface, which does not
1922 nigel 63 support timing or playing with the match limit or callout data. */
1923 nigel 3
1924 nigel 37 #if !defined NOPOSIX
1925 nigel 3 if (posix || do_posix)
1926     {
1927     int rc;
1928     int eflags = 0;
1929 nigel 63 regmatch_t *pmatch = NULL;
1930     if (use_size_offsets > 0)
1931 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1932 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1933     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1934    
1935 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1936 nigel 3
1937     if (rc != 0)
1938     {
1939 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1940 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1941     }
1942 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1943     != 0)
1944     {
1945     fprintf(outfile, "Matched with REG_NOSUB\n");
1946     }
1947 nigel 3 else
1948     {
1949 nigel 7 size_t i;
1950 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1951 nigel 3 {
1952     if (pmatch[i].rm_so >= 0)
1953     {
1954 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1955 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1956     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1957 nigel 3 fprintf(outfile, "\n");
1958 nigel 35 if (i == 0 && do_showrest)
1959     {
1960     fprintf(outfile, " 0+ ");
1961 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1962     outfile);
1963 nigel 35 fprintf(outfile, "\n");
1964     }
1965 nigel 3 }
1966     }
1967     }
1968 nigel 53 free(pmatch);
1969 nigel 3 }
1970    
1971 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1972 nigel 3
1973 nigel 37 else
1974     #endif /* !defined NOPOSIX */
1975    
1976 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1977 nigel 3 {
1978 nigel 93 if (timeitm > 0)
1979 nigel 3 {
1980     register int i;
1981     clock_t time_taken;
1982     clock_t start_time = clock();
1983 nigel 77
1984 nigel 79 #if !defined NODFA
1985 nigel 77 if (all_use_dfa || use_dfa)
1986     {
1987     int workspace[1000];
1988 nigel 93 for (i = 0; i < timeitm; i++)
1989 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1990     options | g_notempty, use_offsets, use_size_offsets, workspace,
1991     sizeof(workspace)/sizeof(int));
1992     }
1993     else
1994 nigel 79 #endif
1995 nigel 77
1996 nigel 93 for (i = 0; i < timeitm; i++)
1997 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1998 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1999 nigel 77
2000 nigel 3 time_taken = clock() - start_time;
2001 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2002     (((double)time_taken * 1000.0) / (double)timeitm) /
2003 nigel 63 (double)CLOCKS_PER_SEC);
2004 nigel 3 }
2005    
2006 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2007 nigel 87 varying limits in order to find the minimum value for the match limit and
2008     for the recursion limit. */
2009 nigel 63
2010     if (find_match_limit)
2011     {
2012     if (extra == NULL)
2013     {
2014 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2015 nigel 63 extra->flags = 0;
2016     }
2017    
2018 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2019 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2020     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2021     PCRE_ERROR_MATCHLIMIT, "match()");
2022 nigel 63
2023 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2024     options|g_notempty, use_offsets, use_size_offsets,
2025     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2026     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2027 nigel 63 }
2028    
2029     /* If callout_data is set, use the interface with additional data */
2030    
2031     else if (callout_data_set)
2032     {
2033     if (extra == NULL)
2034     {
2035 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2036 nigel 63 extra->flags = 0;
2037     }
2038     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2039 nigel 71 extra->callout_data = &callout_data;
2040 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2041     options | g_notempty, use_offsets, use_size_offsets);
2042     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2043     }
2044    
2045     /* The normal case is just to do the match once, with the default
2046     value of match_limit. */
2047    
2048 nigel 79 #if !defined NODFA
2049 nigel 77 else if (all_use_dfa || use_dfa)
2050     {
2051     int workspace[1000];
2052     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2053     options | g_notempty, use_offsets, use_size_offsets, workspace,
2054     sizeof(workspace)/sizeof(int));
2055     if (count == 0)
2056     {
2057     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2058     count = use_size_offsets/2;
2059     }
2060     }
2061 nigel 79 #endif
2062 nigel 77
2063 nigel 75 else
2064     {
2065     count = pcre_exec(re, extra, (char *)bptr, len,
2066     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2067 nigel 77 if (count == 0)
2068     {
2069     fprintf(outfile, "Matched, but too many substrings\n");
2070     count = use_size_offsets/3;
2071     }
2072 nigel 75 }
2073 nigel 3
2074 nigel 39 /* Matched */
2075    
2076 nigel 3 if (count >= 0)
2077     {
2078 nigel 93 int i, maxcount;
2079    
2080     #if !defined NODFA
2081     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2082     #endif
2083     maxcount = use_size_offsets/3;
2084    
2085     /* This is a check against a lunatic return value. */
2086    
2087     if (count > maxcount)
2088     {
2089     fprintf(outfile,
2090     "** PCRE error: returned count %d is too big for offset size %d\n",
2091     count, use_size_offsets);
2092     count = use_size_offsets/3;
2093     if (do_g || do_G)
2094     {
2095     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2096     do_g = do_G = FALSE; /* Break g/G loop */
2097     }
2098     }
2099    
2100 nigel 29 for (i = 0; i < count * 2; i += 2)
2101 nigel 3 {
2102 nigel 57 if (use_offsets[i] < 0)
2103 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2104     else
2105     {
2106     fprintf(outfile, "%2d: ", i/2);
2107 nigel 63 (void)pchars(bptr + use_offsets[i],
2108     use_offsets[i+1] - use_offsets[i], outfile);
2109 nigel 3 fprintf(outfile, "\n");
2110 nigel 35 if (i == 0)
2111     {
2112     if (do_showrest)
2113     {
2114     fprintf(outfile, " 0+ ");
2115 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2116     outfile);
2117 nigel 35 fprintf(outfile, "\n");
2118     }
2119     }
2120 nigel 3 }
2121     }
2122 nigel 29
2123     for (i = 0; i < 32; i++)
2124     {
2125     if ((copystrings & (1 << i)) != 0)
2126     {
2127 nigel 91 char copybuffer[256];
2128 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2129 nigel 37 i, copybuffer, sizeof(copybuffer));
2130 nigel 29 if (rc < 0)
2131     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2132     else
2133 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2134 nigel 29 }
2135     }
2136    
2137 nigel 91 for (copynamesptr = copynames;
2138     *copynamesptr != 0;
2139     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2140     {
2141     char copybuffer[256];
2142     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2143     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2144     if (rc < 0)
2145     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2146     else
2147     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2148     }
2149    
2150 nigel 29 for (i = 0; i < 32; i++)
2151     {
2152     if ((getstrings & (1 << i)) != 0)
2153     {
2154     const char *substring;
2155 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2156 nigel 29 i, &substring);
2157     if (rc < 0)
2158     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2159     else
2160     {
2161     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2162 nigel 49 pcre_free_substring(substring);
2163 nigel 29 }
2164     }
2165     }
2166    
2167 nigel 91 for (getnamesptr = getnames;
2168     *getnamesptr != 0;
2169     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2170     {
2171     const char *substring;
2172     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2173     count, (char *)getnamesptr, &substring);
2174     if (rc < 0)
2175     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2176     else
2177     {
2178     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2179     pcre_free_substring(substring);
2180     }
2181     }
2182    
2183 nigel 29 if (getlist)
2184     {
2185     const char **stringlist;
2186 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2187 nigel 29 &stringlist);
2188     if (rc < 0)
2189     fprintf(outfile, "get substring list failed %d\n", rc);
2190     else
2191     {
2192     for (i = 0; i < count; i++)
2193     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2194     if (stringlist[i] != NULL)
2195     fprintf(outfile, "string list not terminated by NULL\n");
2196 nigel 49 /* free((void *)stringlist); */
2197     pcre_free_substring_list(stringlist);
2198 nigel 29 }
2199     }
2200 nigel 39 }
2201 nigel 29
2202 nigel 75 /* There was a partial match */
2203    
2204     else if (count == PCRE_ERROR_PARTIAL)
2205     {
2206 nigel 77 fprintf(outfile, "Partial match");
2207 nigel 79 #if !defined NODFA
2208 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2209     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2210     bptr + use_offsets[0]);
2211 nigel 79 #endif
2212 nigel 77 fprintf(outfile, "\n");
2213 nigel 75 break; /* Out of the /g loop */
2214     }
2215    
2216 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2217 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2218     to advance the start offset, and continue. We won't be at the end of the
2219     string - that was checked before setting g_notempty.
2220 nigel 39
2221 ph10 143 Complication arises in the case when the newline option is "any".
2222     If the previous match was at the end of a line terminated by CRLF, an
2223     advance of one character just passes the \r, whereas we should prefer the
2224     longer newline sequence, as does the code in pcre_exec(). Fudge the
2225     offset value to achieve this.
2226 ph10 144
2227 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2228     character, not one byte. */
2229    
2230 nigel 3 else
2231     {
2232 nigel 41 if (g_notempty != 0)
2233 nigel 35 {
2234 nigel 73 int onechar = 1;
2235 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2236 nigel 57 use_offsets[0] = start_offset;
2237 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2238     {
2239     int d;
2240     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2241     obits = (d == '\r')? PCRE_NEWLINE_CR :
2242     (d == '\n')? PCRE_NEWLINE_LF :
2243     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2244     (d == -1)? PCRE_NEWLINE_ANY : 0;
2245     }
2246     if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2247 ph10 143 start_offset < len - 1 &&
2248     bptr[start_offset] == '\r' &&
2249     bptr[start_offset+1] == '\n')
2250 ph10 144 onechar++;
2251 ph10 143 else if (use_utf8)
2252 nigel 73 {
2253     while (start_offset + onechar < len)
2254     {
2255     int tb = bptr[start_offset+onechar];
2256     if (tb <= 127) break;
2257     tb &= 0xc0;
2258     if (tb != 0 && tb != 0xc0) onechar++;
2259     }
2260     }
2261     use_offsets[1] = start_offset + onechar;
2262 nigel 35 }
2263 nigel 41 else
2264     {
2265 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2266 nigel 41 {
2267 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2268 nigel 41 }
2269 nigel 73 else fprintf(outfile, "Error %d\n", count);
2270 nigel 41 break; /* Out of the /g loop */
2271     }
2272 nigel 3 }
2273 nigel 35
2274 nigel 39 /* If not /g or /G we are done */
2275    
2276     if (!do_g && !do_G) break;
2277    
2278 nigel 41 /* If we have matched an empty string, first check to see if we are at
2279     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2280     what Perl's /g options does. This turns out to be rather cunning. First
2281 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2282     same point. If this fails (picked up above) we advance to the next
2283 ph10 143 character. */
2284 ph10 142
2285 nigel 41 g_notempty = 0;
2286 ph10 142
2287 nigel 57 if (use_offsets[0] == use_offsets[1])
2288 nigel 41 {
2289 nigel 57 if (use_offsets[0] == len) break;
2290 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2291 nigel 41 }
2292 nigel 39
2293     /* For /g, update the start offset, leaving the rest alone */
2294    
2295 ph10 143 if (do_g) start_offset = use_offsets[1];
2296 nigel 39
2297     /* For /G, update the pointer and length */
2298    
2299     else
2300 nigel 35 {
2301 ph10 143 bptr += use_offsets[1];
2302     len -= use_offsets[1];
2303 nigel 35 }
2304 nigel 39 } /* End of loop for /g and /G */
2305 nigel 91
2306     NEXT_DATA: continue;
2307 nigel 39 } /* End of loop for data lines */
2308 nigel 3
2309 nigel 11 CONTINUE:
2310 nigel 37
2311     #if !defined NOPOSIX
2312 nigel 3 if (posix || do_posix) regfree(&preg);
2313 nigel 37 #endif
2314    
2315 nigel 77 if (re != NULL) new_free(re);
2316     if (extra != NULL) new_free(extra);
2317 nigel 25 if (tables != NULL)
2318     {
2319 nigel 77 new_free((void *)tables);
2320 nigel 25 setlocale(LC_CTYPE, "C");
2321 nigel 93 locale_set = 0;
2322 nigel 25 }
2323 nigel 3 }
2324    
2325 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2326 nigel 77
2327     EXIT:
2328    
2329     if (infile != NULL && infile != stdin) fclose(infile);
2330     if (outfile != NULL && outfile != stdout) fclose(outfile);
2331    
2332     free(buffer);
2333     free(dbuffer);
2334     free(pbuffer);
2335     free(offsets);
2336    
2337     return yield;
2338 nigel 3 }
2339    
2340 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12