/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 142 - (hide annotations) (download)
Fri Mar 30 15:55:18 2007 UTC (7 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 67834 byte(s)
Trailing spaces.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 93
48     /* A number of things vary for Windows builds. Originally, pcretest opened its
49     input and output without "b"; then I was told that "b" was needed in some
50     environments, so it was added for release 5.0 to both the input and output. (It
51     makes no difference on Unix-like systems.) Later I was told that it is wrong
52     for the input on Windows. I've now abstracted the modes into two macros that
53     are set here, to make it easier to fiddle with them, and removed "b" from the
54     input mode under Windows. */
55    
56     #if defined(_WIN32) || defined(WIN32)
57     #include <io.h> /* For _setmode() */
58     #include <fcntl.h> /* For _O_BINARY */
59     #define INPUT_MODE "r"
60     #define OUTPUT_MODE "wb"
61    
62     #else
63     #include <sys/time.h> /* These two includes are needed */
64     #include <sys/resource.h> /* for setrlimit(). */
65     #define INPUT_MODE "rb"
66     #define OUTPUT_MODE "wb"
67 nigel 91 #endif
68    
69 nigel 93
70 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
71 nigel 37
72 nigel 85 /* We include pcre_internal.h because we need the internal info for displaying
73     the results of pcre_study() and we also need to know about the internal
74     macros, structures, and other internal data values; pcretest has "inside
75     information" compared to a program that strictly follows the PCRE API. */
76 nigel 77
77     #include "pcre_internal.h"
78    
79 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
80     two copies, we include the source file here, changing the names of the external
81     symbols to prevent clashes. */
82 nigel 77
83 nigel 85 #define _pcre_utf8_table1 utf8_table1
84     #define _pcre_utf8_table1_size utf8_table1_size
85     #define _pcre_utf8_table2 utf8_table2
86     #define _pcre_utf8_table3 utf8_table3
87     #define _pcre_utf8_table4 utf8_table4
88     #define _pcre_utt utt
89     #define _pcre_utt_size utt_size
90     #define _pcre_OP_lengths OP_lengths
91    
92     #include "pcre_tables.c"
93    
94     /* We also need the pcre_printint() function for printing out compiled
95     patterns. This function is in a separate file so that it can be included in
96 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
97 nigel 85
98 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
99     output character as-is or as a hex value when showing compiled patterns, is
100     contained in this file. We uses it here also, in cases when the locale has not
101     been explicitly changed, so as to get consistent output from systems that
102     differ in their output from isprint() even in the "C" locale. */
103    
104 nigel 85 #include "pcre_printint.src"
105    
106 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107 nigel 85
108 nigel 93
109 nigel 37 /* It is possible to compile this test program without including support for
110     testing the POSIX interface, though this is not available via the standard
111     Makefile. */
112    
113     #if !defined NOPOSIX
114 nigel 3 #include "pcreposix.h"
115 nigel 37 #endif
116 nigel 3
117 ph10 107 /* It is also possible, for the benefit of the version currently imported into
118     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119     interface to the DFA matcher (NODFA), and without the doublecheck of the old
120     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121     UTF8 support if PCRE is built without it. */
122 nigel 79
123 ph10 107 #ifndef SUPPORT_UTF8
124     #ifndef NOUTF8
125     #define NOUTF8
126     #endif
127     #endif
128 nigel 79
129 ph10 107
130 nigel 85 /* Other parameters */
131    
132 nigel 3 #ifndef CLOCKS_PER_SEC
133     #ifdef CLK_TCK
134     #define CLOCKS_PER_SEC CLK_TCK
135     #else
136     #define CLOCKS_PER_SEC 100
137     #endif
138     #endif
139    
140 nigel 93 /* This is the default loop count for timing. */
141    
142 nigel 75 #define LOOPREPEAT 500000
143 nigel 3
144 nigel 85 /* Static variables */
145    
146 nigel 3 static FILE *outfile;
147     static int log_store = 0;
148 nigel 63 static int callout_count;
149     static int callout_extra;
150     static int callout_fail_count;
151     static int callout_fail_id;
152     static int first_callout;
153 nigel 93 static int locale_set = 0;
154 nigel 73 static int show_malloc;
155 nigel 67 static int use_utf8;
156 nigel 43 static size_t gotten_store;
157 nigel 3
158 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
159    
160     static int buffer_size = 50000;
161     static uschar *buffer = NULL;
162     static uschar *dbuffer = NULL;
163 nigel 75 static uschar *pbuffer = NULL;
164 nigel 3
165 nigel 75
166 nigel 49
167     /*************************************************
168 nigel 91 * Read or extend an input line *
169     *************************************************/
170    
171     /* Input lines are read into buffer, but both patterns and data lines can be
172     continued over multiple input lines. In addition, if the buffer fills up, we
173     want to automatically expand it so as to be able to handle extremely large
174     lines that are needed for certain stress tests. When the input buffer is
175     expanded, the other two buffers must also be expanded likewise, and the
176     contents of pbuffer, which are a copy of the input for callouts, must be
177     preserved (for when expansion happens for a data line). This is not the most
178     optimal way of handling this, but hey, this is just a test program!
179    
180     Arguments:
181     f the file to read
182     start where in buffer to start (this *must* be within buffer)
183    
184     Returns: pointer to the start of new data
185     could be a copy of start, or could be moved
186     NULL if no data read and EOF reached
187     */
188    
189     static uschar *
190     extend_inputline(FILE *f, uschar *start)
191     {
192     uschar *here = start;
193    
194     for (;;)
195     {
196     int rlen = buffer_size - (here - buffer);
197 nigel 93
198 nigel 91 if (rlen > 1000)
199     {
200     int dlen;
201     if (fgets((char *)here, rlen, f) == NULL)
202     return (here == start)? NULL : start;
203     dlen = (int)strlen((char *)here);
204     if (dlen > 0 && here[dlen - 1] == '\n') return start;
205     here += dlen;
206     }
207    
208     else
209     {
210     int new_buffer_size = 2*buffer_size;
211     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214    
215     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216     {
217     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218     exit(1);
219     }
220    
221     memcpy(new_buffer, buffer, buffer_size);
222     memcpy(new_pbuffer, pbuffer, buffer_size);
223    
224     buffer_size = new_buffer_size;
225    
226     start = new_buffer + (start - buffer);
227     here = new_buffer + (here - buffer);
228    
229     free(buffer);
230     free(dbuffer);
231     free(pbuffer);
232    
233     buffer = new_buffer;
234     dbuffer = new_dbuffer;
235     pbuffer = new_pbuffer;
236     }
237     }
238    
239     return NULL; /* Control never gets here */
240     }
241    
242    
243    
244    
245    
246    
247    
248     /*************************************************
249 nigel 63 * Read number from string *
250     *************************************************/
251    
252     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253     around with conditional compilation, just do the job by hand. It is only used
254 nigel 93 for unpicking arguments, so just keep it simple.
255 nigel 63
256     Arguments:
257     str string to be converted
258     endptr where to put the end pointer
259    
260     Returns: the unsigned long
261     */
262    
263     static int
264     get_value(unsigned char *str, unsigned char **endptr)
265     {
266     int result = 0;
267     while(*str != 0 && isspace(*str)) str++;
268     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
269     *endptr = str;
270     return(result);
271     }
272    
273    
274    
275 nigel 49
276     /*************************************************
277     * Convert UTF-8 string to value *
278     *************************************************/
279    
280     /* This function takes one or more bytes that represents a UTF-8 character,
281     and returns the value of the character.
282    
283     Argument:
284 nigel 91 utf8bytes a pointer to the byte vector
285     vptr a pointer to an int to receive the value
286 nigel 49
287 nigel 91 Returns: > 0 => the number of bytes consumed
288     -6 to 0 => malformed UTF-8 character at offset = (-return)
289 nigel 49 */
290    
291 nigel 79 #if !defined NOUTF8
292    
293 nigel 67 static int
294 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
295 nigel 49 {
296 nigel 91 int c = *utf8bytes++;
297 nigel 49 int d = c;
298     int i, j, s;
299    
300     for (i = -1; i < 6; i++) /* i is number of additional bytes */
301     {
302     if ((d & 0x80) == 0) break;
303     d <<= 1;
304     }
305    
306     if (i == -1) { *vptr = c; return 1; } /* ascii character */
307     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
308    
309     /* i now has a value in the range 1-5 */
310    
311 nigel 59 s = 6*i;
312 nigel 85 d = (c & utf8_table3[i]) << s;
313 nigel 49
314     for (j = 0; j < i; j++)
315     {
316 nigel 91 c = *utf8bytes++;
317 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
318 nigel 59 s -= 6;
319 nigel 49 d |= (c & 0x3f) << s;
320     }
321    
322     /* Check that encoding was the correct unique one */
323    
324 nigel 85 for (j = 0; j < utf8_table1_size; j++)
325     if (d <= utf8_table1[j]) break;
326 nigel 49 if (j != i) return -(i+1);
327    
328     /* Valid value */
329    
330     *vptr = d;
331     return i+1;
332     }
333    
334 nigel 79 #endif
335 nigel 49
336    
337 nigel 79
338 nigel 63 /*************************************************
339 nigel 85 * Convert character value to UTF-8 *
340     *************************************************/
341    
342     /* This function takes an integer value in the range 0 - 0x7fffffff
343     and encodes it as a UTF-8 character in 0 to 6 bytes.
344    
345     Arguments:
346     cvalue the character value
347 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
348 nigel 85
349     Returns: number of characters placed in the buffer
350     */
351    
352 nigel 93 #if !defined NOUTF8
353    
354 nigel 85 static int
355 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
356 nigel 85 {
357     register int i, j;
358     for (i = 0; i < utf8_table1_size; i++)
359     if (cvalue <= utf8_table1[i]) break;
360 nigel 91 utf8bytes += i;
361 nigel 85 for (j = i; j > 0; j--)
362     {
363 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364 nigel 85 cvalue >>= 6;
365     }
366 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
367 nigel 85 return i + 1;
368     }
369    
370 nigel 93 #endif
371 nigel 85
372    
373 nigel 93
374 nigel 85 /*************************************************
375 nigel 63 * Print character string *
376     *************************************************/
377 nigel 49
378 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
379     mode. Yields number of characters printed. If handed a NULL file, just counts
380     chars without printing. */
381 nigel 49
382 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
383 nigel 3 {
384 nigel 85 int c = 0;
385 nigel 63 int yield = 0;
386 nigel 3
387 nigel 63 while (length-- > 0)
388 nigel 3 {
389 nigel 79 #if !defined NOUTF8
390 nigel 67 if (use_utf8)
391 nigel 63 {
392     int rc = utf82ord(p, &c);
393 nigel 3
394 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
395     {
396     length -= rc - 1;
397     p += rc;
398 nigel 93 if (PRINTHEX(c))
399 nigel 63 {
400     if (f != NULL) fprintf(f, "%c", c);
401     yield++;
402     }
403     else
404     {
405 nigel 93 int n = 4;
406     if (f != NULL) fprintf(f, "\\x{%02x}", c);
407     yield += (n <= 0x000000ff)? 2 :
408     (n <= 0x00000fff)? 3 :
409     (n <= 0x0000ffff)? 4 :
410     (n <= 0x000fffff)? 5 : 6;
411 nigel 63 }
412     continue;
413     }
414     }
415 nigel 79 #endif
416 nigel 3
417 nigel 63 /* Not UTF-8, or malformed UTF-8 */
418    
419 nigel 93 c = *p++;
420     if (PRINTHEX(c))
421 nigel 3 {
422 nigel 63 if (f != NULL) fprintf(f, "%c", c);
423     yield++;
424 nigel 3 }
425 nigel 63 else
426 nigel 3 {
427 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
428     yield += 4;
429     }
430     }
431 nigel 3
432 nigel 63 return yield;
433     }
434 nigel 23
435 nigel 3
436 nigel 23
437 nigel 63 /*************************************************
438     * Callout function *
439     *************************************************/
440 nigel 3
441 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
442     the match. Yield zero unless more callouts than the fail count, or the callout
443     data is not zero. */
444 nigel 3
445 nigel 63 static int callout(pcre_callout_block *cb)
446     {
447     FILE *f = (first_callout | callout_extra)? outfile : NULL;
448 nigel 75 int i, pre_start, post_start, subject_length;
449 nigel 3
450 nigel 63 if (callout_extra)
451     {
452     fprintf(f, "Callout %d: last capture = %d\n",
453     cb->callout_number, cb->capture_last);
454 nigel 3
455 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
456     {
457     if (cb->offset_vector[i] < 0)
458     fprintf(f, "%2d: <unset>\n", i/2);
459     else
460     {
461     fprintf(f, "%2d: ", i/2);
462     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
463     cb->offset_vector[i+1] - cb->offset_vector[i], f);
464     fprintf(f, "\n");
465     }
466     }
467     }
468 nigel 3
469 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
470     datails. On subsequent calls in the same match, we use pchars just to find the
471     printed lengths of the substrings. */
472 nigel 3
473 nigel 63 if (f != NULL) fprintf(f, "--->");
474 nigel 3
475 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
476     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
477     cb->current_position - cb->start_match, f);
478 nigel 3
479 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
480    
481 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
482     cb->subject_length - cb->current_position, f);
483 nigel 3
484 nigel 63 if (f != NULL) fprintf(f, "\n");
485 nigel 9
486 nigel 63 /* Always print appropriate indicators, with callout number if not already
487 nigel 75 shown. For automatic callouts, show the pattern offset. */
488 nigel 3
489 nigel 75 if (cb->callout_number == 255)
490     {
491     fprintf(outfile, "%+3d ", cb->pattern_position);
492     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
493     }
494     else
495     {
496     if (callout_extra) fprintf(outfile, " ");
497     else fprintf(outfile, "%3d ", cb->callout_number);
498     }
499 nigel 3
500 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
501     fprintf(outfile, "^");
502 nigel 3
503 nigel 63 if (post_start > 0)
504     {
505     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
506     fprintf(outfile, "^");
507 nigel 3 }
508    
509 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
510     fprintf(outfile, " ");
511    
512     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
513     pbuffer + cb->pattern_position);
514    
515 nigel 63 fprintf(outfile, "\n");
516     first_callout = 0;
517 nigel 3
518 nigel 71 if (cb->callout_data != NULL)
519 nigel 49 {
520 nigel 71 int callout_data = *((int *)(cb->callout_data));
521     if (callout_data != 0)
522     {
523     fprintf(outfile, "Callout data = %d\n", callout_data);
524     return callout_data;
525     }
526 nigel 63 }
527 nigel 49
528 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
529     (++callout_count >= callout_fail_count)? 1 : 0;
530 nigel 3 }
531    
532    
533 nigel 63 /*************************************************
534 nigel 73 * Local malloc functions *
535 nigel 63 *************************************************/
536 nigel 3
537     /* Alternative malloc function, to test functionality and show the size of the
538     compiled re. */
539    
540     static void *new_malloc(size_t size)
541     {
542 nigel 73 void *block = malloc(size);
543 nigel 43 gotten_store = size;
544 nigel 73 if (show_malloc)
545 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
546 nigel 73 return block;
547 nigel 3 }
548    
549 nigel 73 static void new_free(void *block)
550     {
551     if (show_malloc)
552     fprintf(outfile, "free %p\n", block);
553     free(block);
554     }
555 nigel 3
556    
557 nigel 73 /* For recursion malloc/free, to test stacking calls */
558    
559     static void *stack_malloc(size_t size)
560     {
561     void *block = malloc(size);
562     if (show_malloc)
563 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
564 nigel 73 return block;
565     }
566    
567     static void stack_free(void *block)
568     {
569     if (show_malloc)
570     fprintf(outfile, "stack_free %p\n", block);
571     free(block);
572     }
573    
574    
575 nigel 63 /*************************************************
576     * Call pcre_fullinfo() *
577     *************************************************/
578 nigel 43
579     /* Get one piece of information from the pcre_fullinfo() function */
580    
581     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
582     {
583     int rc;
584     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
585     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
586     }
587    
588    
589    
590 nigel 63 /*************************************************
591 nigel 75 * Byte flipping function *
592     *************************************************/
593    
594 nigel 91 static unsigned long int
595     byteflip(unsigned long int value, int n)
596 nigel 75 {
597     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598     return ((value & 0x000000ff) << 24) |
599     ((value & 0x0000ff00) << 8) |
600     ((value & 0x00ff0000) >> 8) |
601     ((value & 0xff000000) >> 24);
602     }
603    
604    
605    
606    
607     /*************************************************
608 nigel 87 * Check match or recursion limit *
609     *************************************************/
610    
611     static int
612     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613     int start_offset, int options, int *use_offsets, int use_size_offsets,
614     int flag, unsigned long int *limit, int errnumber, const char *msg)
615     {
616     int count;
617     int min = 0;
618     int mid = 64;
619     int max = -1;
620    
621     extra->flags |= flag;
622    
623     for (;;)
624     {
625     *limit = mid;
626    
627     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628     use_offsets, use_size_offsets);
629    
630     if (count == errnumber)
631     {
632     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633     min = mid;
634     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635     }
636    
637     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638     count == PCRE_ERROR_PARTIAL)
639     {
640     if (mid == min + 1)
641     {
642     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643     break;
644     }
645     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646     max = mid;
647     mid = (min + mid)/2;
648     }
649     else break; /* Some other error */
650     }
651    
652     extra->flags &= ~flag;
653     return count;
654     }
655    
656    
657    
658     /*************************************************
659 nigel 91 * Check newline indicator *
660     *************************************************/
661    
662     /* This is used both at compile and run-time to check for <xxx> escapes, where
663 nigel 93 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664 nigel 91
665     Arguments:
666     p points after the leading '<'
667     f file for error message
668    
669     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
670     */
671    
672     static int
673     check_newline(uschar *p, FILE *f)
674     {
675     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
680     return 0;
681     }
682    
683    
684    
685     /*************************************************
686 nigel 93 * Usage function *
687     *************************************************/
688    
689     static void
690     usage(void)
691     {
692     printf("Usage: pcretest [options] [<input> [<output>]]\n");
693     printf(" -b show compiled code (bytecode)\n");
694     printf(" -C show PCRE compile-time options and exit\n");
695     printf(" -d debug: show compiled code and information (-b and -i)\n");
696     #if !defined NODFA
697     printf(" -dfa force DFA matching for all subjects\n");
698     #endif
699     printf(" -help show usage information\n");
700     printf(" -i show information about compiled patterns\n"
701     " -m output memory used information\n"
702     " -o <n> set size of offsets vector to <n>\n");
703     #if !defined NOPOSIX
704     printf(" -p use POSIX interface\n");
705     #endif
706     printf(" -q quiet: do not output PCRE version number at start\n");
707     printf(" -S <n> set stack size to <n> megabytes\n");
708     printf(" -s output store (memory) used information\n"
709     " -t time compilation and execution\n");
710     printf(" -t <n> time compilation and execution, repeating <n> times\n");
711     printf(" -tm time execution (matching) only\n");
712     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
713     }
714    
715    
716    
717     /*************************************************
718 nigel 63 * Main Program *
719     *************************************************/
720 nigel 43
721 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
722     consist of a regular expression, in delimiters and optionally followed by
723     options, followed by a set of test data, terminated by an empty line. */
724    
725     int main(int argc, char **argv)
726     {
727     FILE *infile = stdin;
728     int options = 0;
729     int study_options = 0;
730     int op = 1;
731     int timeit = 0;
732 nigel 93 int timeitm = 0;
733 nigel 3 int showinfo = 0;
734 nigel 31 int showstore = 0;
735 nigel 87 int quiet = 0;
736 nigel 53 int size_offsets = 45;
737     int size_offsets_max;
738 nigel 77 int *offsets = NULL;
739 nigel 53 #if !defined NOPOSIX
740 nigel 3 int posix = 0;
741 nigel 53 #endif
742 nigel 3 int debug = 0;
743 nigel 11 int done = 0;
744 nigel 77 int all_use_dfa = 0;
745     int yield = 0;
746 nigel 91 int stack_size;
747 nigel 3
748 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
749     that 1024 is plenty long enough for the few names we'll be testing. */
750 nigel 69
751 nigel 91 uschar copynames[1024];
752     uschar getnames[1024];
753    
754     uschar *copynamesptr;
755     uschar *getnamesptr;
756    
757 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
758 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
759 nigel 69
760 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
761     dbuffer = (unsigned char *)malloc(buffer_size);
762     pbuffer = (unsigned char *)malloc(buffer_size);
763 nigel 69
764 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
765 nigel 3
766 nigel 93 outfile = stdout;
767    
768     /* The following _setmode() stuff is some Windows magic that tells its runtime
769     library to translate CRLF into a single LF character. At least, that's what
770     I've been told: never having used Windows I take this all on trust. Originally
771     it set 0x8000, but then I was advised that _O_BINARY was better. */
772    
773 nigel 75 #if defined(_WIN32) || defined(WIN32)
774 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
775     #endif
776 nigel 75
777 nigel 3 /* Scan options */
778    
779     while (argc > 1 && argv[op][0] == '-')
780     {
781 nigel 63 unsigned char *endptr;
782 nigel 53
783 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784     showstore = 1;
785 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
787 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789 nigel 79 #if !defined NODFA
790 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
791 nigel 79 #endif
792 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
793 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794     *endptr == 0))
795 nigel 53 {
796     op++;
797     argc--;
798     }
799 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800     {
801     int both = argv[op][2] == 0;
802     int temp;
803     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804     *endptr == 0))
805     {
806     timeitm = temp;
807     op++;
808     argc--;
809     }
810     else timeitm = LOOPREPEAT;
811     if (both) timeit = timeitm;
812     }
813 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815     *endptr == 0))
816     {
817 nigel 93 #if defined(_WIN32) || defined(WIN32)
818 nigel 91 printf("PCRE: -S not supported on this OS\n");
819     exit(1);
820     #else
821     int rc;
822     struct rlimit rlim;
823     getrlimit(RLIMIT_STACK, &rlim);
824     rlim.rlim_cur = stack_size * 1024 * 1024;
825     rc = setrlimit(RLIMIT_STACK, &rlim);
826     if (rc != 0)
827     {
828     printf("PCRE: setrlimit() failed with error %d\n", rc);
829     exit(1);
830     }
831     op++;
832     argc--;
833     #endif
834     }
835 nigel 53 #if !defined NOPOSIX
836 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
837 nigel 53 #endif
838 nigel 63 else if (strcmp(argv[op], "-C") == 0)
839     {
840     int rc;
841     printf("PCRE version %s\n", pcre_version());
842     printf("Compiled with\n");
843     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
844     printf(" %sUTF-8 support\n", rc? "" : "No ");
845 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846     printf(" %sUnicode properties support\n", rc? "" : "No ");
847 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
849 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850     (rc == -1)? "ANY" : "???");
851 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852     printf(" Internal link size = %d\n", rc);
853     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854     printf(" POSIX malloc threshold = %d\n", rc);
855     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856     printf(" Default match limit = %d\n", rc);
857 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858     printf(" Default recursion depth limit = %d\n", rc);
859 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
861 ph10 121 goto EXIT;
862 nigel 63 }
863 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
864     strcmp(argv[op], "--help") == 0)
865     {
866     usage();
867     goto EXIT;
868     }
869 nigel 3 else
870     {
871 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
872 nigel 93 usage();
873 nigel 77 yield = 1;
874     goto EXIT;
875 nigel 3 }
876     op++;
877     argc--;
878     }
879    
880 nigel 53 /* Get the store for the offsets vector, and remember what it was */
881    
882     size_offsets_max = size_offsets;
883 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
884 nigel 53 if (offsets == NULL)
885     {
886     printf("** Failed to get %d bytes of memory for offsets vector\n",
887     size_offsets_max * sizeof(int));
888 nigel 77 yield = 1;
889     goto EXIT;
890 nigel 53 }
891    
892 nigel 3 /* Sort out the input and output files */
893    
894     if (argc > 1)
895     {
896 nigel 93 infile = fopen(argv[op], INPUT_MODE);
897 nigel 3 if (infile == NULL)
898     {
899     printf("** Failed to open %s\n", argv[op]);
900 nigel 77 yield = 1;
901     goto EXIT;
902 nigel 3 }
903     }
904    
905     if (argc > 2)
906     {
907 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
908 nigel 3 if (outfile == NULL)
909     {
910     printf("** Failed to open %s\n", argv[op+1]);
911 nigel 77 yield = 1;
912     goto EXIT;
913 nigel 3 }
914     }
915    
916     /* Set alternative malloc function */
917    
918     pcre_malloc = new_malloc;
919 nigel 73 pcre_free = new_free;
920     pcre_stack_malloc = stack_malloc;
921     pcre_stack_free = stack_free;
922 nigel 3
923 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
924 nigel 3
925 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926 nigel 3
927     /* Main loop */
928    
929 nigel 11 while (!done)
930 nigel 3 {
931     pcre *re = NULL;
932     pcre_extra *extra = NULL;
933 nigel 37
934     #if !defined NOPOSIX /* There are still compilers that require no indent */
935 nigel 3 regex_t preg;
936 nigel 45 int do_posix = 0;
937 nigel 37 #endif
938    
939 nigel 7 const char *error;
940 nigel 25 unsigned char *p, *pp, *ppp;
941 nigel 75 unsigned char *to_file = NULL;
942 nigel 53 const unsigned char *tables = NULL;
943 nigel 75 unsigned long int true_size, true_study_size = 0;
944     size_t size, regex_gotten_store;
945 nigel 3 int do_study = 0;
946 nigel 25 int do_debug = debug;
947 ph10 123 int debug_lengths = 1;
948 nigel 35 int do_G = 0;
949     int do_g = 0;
950 nigel 25 int do_showinfo = showinfo;
951 nigel 35 int do_showrest = 0;
952 nigel 75 int do_flip = 0;
953 nigel 93 int erroroffset, len, delimiter, poffset;
954 nigel 3
955 nigel 67 use_utf8 = 0;
956 nigel 63
957 nigel 3 if (infile == stdin) printf(" re> ");
958 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
959 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
960 nigel 63 fflush(outfile);
961 nigel 3
962     p = buffer;
963     while (isspace(*p)) p++;
964     if (*p == 0) continue;
965    
966 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
967 nigel 3
968 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
969     {
970 nigel 91 unsigned long int magic, get_options;
971 nigel 75 uschar sbuf[8];
972     FILE *f;
973    
974     p++;
975     pp = p + (int)strlen((char *)p);
976     while (isspace(pp[-1])) pp--;
977     *pp = 0;
978    
979     f = fopen((char *)p, "rb");
980     if (f == NULL)
981     {
982     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
983     continue;
984     }
985    
986     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
987    
988     true_size =
989     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
990     true_study_size =
991     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
992    
993     re = (real_pcre *)new_malloc(true_size);
994     regex_gotten_store = gotten_store;
995    
996     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
997    
998     magic = ((real_pcre *)re)->magic_number;
999     if (magic != MAGIC_NUMBER)
1000     {
1001     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1002     {
1003     do_flip = 1;
1004     }
1005     else
1006     {
1007     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1008     fclose(f);
1009     continue;
1010     }
1011     }
1012    
1013     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1014     do_flip? " (byte-inverted)" : "", p);
1015    
1016     /* Need to know if UTF-8 for printing data strings */
1017    
1018 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1019     use_utf8 = (get_options & PCRE_UTF8) != 0;
1020 nigel 75
1021     /* Now see if there is any following study data */
1022    
1023     if (true_study_size != 0)
1024     {
1025     pcre_study_data *psd;
1026    
1027     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1028     extra->flags = PCRE_EXTRA_STUDY_DATA;
1029    
1030     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1031     extra->study_data = psd;
1032    
1033     if (fread(psd, 1, true_study_size, f) != true_study_size)
1034     {
1035     FAIL_READ:
1036     fprintf(outfile, "Failed to read data from %s\n", p);
1037     if (extra != NULL) new_free(extra);
1038     if (re != NULL) new_free(re);
1039     fclose(f);
1040     continue;
1041     }
1042     fprintf(outfile, "Study data loaded from %s\n", p);
1043     do_study = 1; /* To get the data output if requested */
1044     }
1045     else fprintf(outfile, "No study data\n");
1046    
1047     fclose(f);
1048     goto SHOW_INFO;
1049     }
1050    
1051     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1052     the pattern; if is isn't complete, read more. */
1053    
1054 nigel 3 delimiter = *p++;
1055    
1056 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1057 nigel 3 {
1058 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1059 nigel 3 goto SKIP_DATA;
1060     }
1061    
1062     pp = p;
1063 nigel 93 poffset = p - buffer;
1064 nigel 3
1065     for(;;)
1066     {
1067 nigel 29 while (*pp != 0)
1068     {
1069     if (*pp == '\\' && pp[1] != 0) pp++;
1070     else if (*pp == delimiter) break;
1071     pp++;
1072     }
1073 nigel 3 if (*pp != 0) break;
1074     if (infile == stdin) printf(" > ");
1075 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1076 nigel 3 {
1077     fprintf(outfile, "** Unexpected EOF\n");
1078 nigel 11 done = 1;
1079     goto CONTINUE;
1080 nigel 3 }
1081 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1082 nigel 3 }
1083    
1084 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1085     pointer to the correct relative point in the buffer. */
1086    
1087     p = buffer + poffset;
1088    
1089 nigel 29 /* If the first character after the delimiter is backslash, make
1090     the pattern end with backslash. This is purely to provide a way
1091     of testing for the error message when a pattern ends with backslash. */
1092    
1093     if (pp[1] == '\\') *pp++ = '\\';
1094    
1095 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1096     for callouts. */
1097 nigel 3
1098     *pp++ = 0;
1099 nigel 75 strcpy((char *)pbuffer, (char *)p);
1100 nigel 3
1101     /* Look for options after final delimiter */
1102    
1103     options = 0;
1104     study_options = 0;
1105 nigel 31 log_store = showstore; /* default from command line */
1106    
1107 nigel 3 while (*pp != 0)
1108     {
1109     switch (*pp++)
1110     {
1111 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1112 nigel 35 case 'g': do_g = 1; break;
1113 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1114     case 'm': options |= PCRE_MULTILINE; break;
1115     case 's': options |= PCRE_DOTALL; break;
1116     case 'x': options |= PCRE_EXTENDED; break;
1117 nigel 25
1118 nigel 35 case '+': do_showrest = 1; break;
1119 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1120 nigel 93 case 'B': do_debug = 1; break;
1121 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1122 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1123 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1124 nigel 75 case 'F': do_flip = 1; break;
1125 nigel 35 case 'G': do_G = 1; break;
1126 nigel 25 case 'I': do_showinfo = 1; break;
1127 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1128 nigel 31 case 'M': log_store = 1; break;
1129 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1130 nigel 37
1131     #if !defined NOPOSIX
1132 nigel 3 case 'P': do_posix = 1; break;
1133 nigel 37 #endif
1134    
1135 nigel 3 case 'S': do_study = 1; break;
1136 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1137 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1138 ph10 126 case 'Z': debug_lengths = 0; break;
1139 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1140 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1141 nigel 25
1142     case 'L':
1143     ppp = pp;
1144 nigel 93 /* The '\r' test here is so that it works on Windows. */
1145     /* The '0' test is just in case this is an unterminated line. */
1146     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1147 nigel 25 *ppp = 0;
1148     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1149     {
1150     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1151     goto SKIP_DATA;
1152     }
1153 nigel 93 locale_set = 1;
1154 nigel 25 tables = pcre_maketables();
1155     pp = ppp;
1156     break;
1157    
1158 nigel 75 case '>':
1159     to_file = pp;
1160     while (*pp != 0) pp++;
1161     while (isspace(pp[-1])) pp--;
1162     *pp = 0;
1163     break;
1164    
1165 nigel 91 case '<':
1166     {
1167     int x = check_newline(pp, outfile);
1168     if (x == 0) goto SKIP_DATA;
1169     options |= x;
1170     while (*pp++ != '>');
1171     }
1172     break;
1173    
1174 nigel 77 case '\r': /* So that it works in Windows */
1175     case '\n':
1176     case ' ':
1177     break;
1178 nigel 75
1179 nigel 3 default:
1180     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1181     goto SKIP_DATA;
1182     }
1183     }
1184    
1185 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1186 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1187     local character tables. */
1188 nigel 3
1189 nigel 37 #if !defined NOPOSIX
1190 nigel 3 if (posix || do_posix)
1191     {
1192     int rc;
1193     int cflags = 0;
1194 nigel 75
1195 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1196     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1197 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1198 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1199     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1200    
1201 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1202    
1203     /* Compilation failed; go back for another re, skipping to blank line
1204     if non-interactive. */
1205    
1206     if (rc != 0)
1207     {
1208 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1209 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1210     goto SKIP_DATA;
1211     }
1212     }
1213    
1214     /* Handle compiling via the native interface */
1215    
1216     else
1217 nigel 37 #endif /* !defined NOPOSIX */
1218    
1219 nigel 3 {
1220 nigel 93 if (timeit > 0)
1221 nigel 3 {
1222     register int i;
1223     clock_t time_taken;
1224     clock_t start_time = clock();
1225 nigel 93 for (i = 0; i < timeit; i++)
1226 nigel 3 {
1227 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1228 nigel 3 if (re != NULL) free(re);
1229     }
1230     time_taken = clock() - start_time;
1231 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1232     (((double)time_taken * 1000.0) / (double)timeit) /
1233 nigel 63 (double)CLOCKS_PER_SEC);
1234 nigel 3 }
1235    
1236 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1237 nigel 3
1238     /* Compilation failed; go back for another re, skipping to blank line
1239     if non-interactive. */
1240    
1241     if (re == NULL)
1242     {
1243     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1244     SKIP_DATA:
1245     if (infile != stdin)
1246     {
1247     for (;;)
1248     {
1249 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1250 nigel 11 {
1251     done = 1;
1252     goto CONTINUE;
1253     }
1254 nigel 3 len = (int)strlen((char *)buffer);
1255     while (len > 0 && isspace(buffer[len-1])) len--;
1256     if (len == 0) break;
1257     }
1258     fprintf(outfile, "\n");
1259     }
1260 nigel 25 goto CONTINUE;
1261 nigel 3 }
1262    
1263 nigel 43 /* Compilation succeeded; print data if required. There are now two
1264     info-returning functions. The old one has a limited interface and
1265     returns only limited data. Check that it agrees with the newer one. */
1266 nigel 3
1267 nigel 63 if (log_store)
1268     fprintf(outfile, "Memory allocation (code space): %d\n",
1269     (int)(gotten_store -
1270     sizeof(real_pcre) -
1271     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1272    
1273 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1274     and remember the store that was got. */
1275    
1276     true_size = ((real_pcre *)re)->size;
1277     regex_gotten_store = gotten_store;
1278    
1279     /* If /S was present, study the regexp to generate additional info to
1280     help with the matching. */
1281    
1282     if (do_study)
1283     {
1284 nigel 93 if (timeit > 0)
1285 nigel 75 {
1286     register int i;
1287     clock_t time_taken;
1288     clock_t start_time = clock();
1289 nigel 93 for (i = 0; i < timeit; i++)
1290 nigel 75 extra = pcre_study(re, study_options, &error);
1291     time_taken = clock() - start_time;
1292     if (extra != NULL) free(extra);
1293 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1294     (((double)time_taken * 1000.0) / (double)timeit) /
1295 nigel 75 (double)CLOCKS_PER_SEC);
1296     }
1297     extra = pcre_study(re, study_options, &error);
1298     if (error != NULL)
1299     fprintf(outfile, "Failed to study: %s\n", error);
1300     else if (extra != NULL)
1301     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1302     }
1303    
1304     /* If the 'F' option was present, we flip the bytes of all the integer
1305     fields in the regex data block and the study block. This is to make it
1306     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1307     compiled on a different architecture. */
1308    
1309     if (do_flip)
1310     {
1311     real_pcre *rre = (real_pcre *)re;
1312     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1313     rre->size = byteflip(rre->size, sizeof(rre->size));
1314     rre->options = byteflip(rre->options, sizeof(rre->options));
1315     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1316     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1317     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1318     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1319     rre->name_table_offset = byteflip(rre->name_table_offset,
1320     sizeof(rre->name_table_offset));
1321     rre->name_entry_size = byteflip(rre->name_entry_size,
1322     sizeof(rre->name_entry_size));
1323     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1324    
1325     if (extra != NULL)
1326     {
1327     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1328     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1329     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1330     }
1331     }
1332    
1333     /* Extract information from the compiled data if required */
1334    
1335     SHOW_INFO:
1336    
1337 nigel 93 if (do_debug)
1338     {
1339     fprintf(outfile, "------------------------------------------------------------------\n");
1340 ph10 116 pcre_printint(re, outfile, debug_lengths);
1341 nigel 93 }
1342    
1343 nigel 25 if (do_showinfo)
1344 nigel 3 {
1345 nigel 75 unsigned long int get_options, all_options;
1346 nigel 79 #if !defined NOINFOCHECK
1347 nigel 43 int old_first_char, old_options, old_count;
1348 nigel 79 #endif
1349 nigel 43 int count, backrefmax, first_char, need_char;
1350 nigel 63 int nameentrysize, namecount;
1351     const uschar *nametable;
1352 nigel 3
1353 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1354 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1355     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1356     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1357 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1358 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1359 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1360     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1361 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1362 nigel 43
1363 nigel 79 #if !defined NOINFOCHECK
1364 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1365 nigel 3 if (count < 0) fprintf(outfile,
1366 nigel 43 "Error %d from pcre_info()\n", count);
1367 nigel 3 else
1368     {
1369 nigel 43 if (old_count != count) fprintf(outfile,
1370     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1371     old_count);
1372 nigel 37
1373 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1374     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1375     first_char, old_first_char);
1376 nigel 37
1377 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1378     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1379     get_options, old_options);
1380 nigel 43 }
1381 nigel 79 #endif
1382 nigel 43
1383 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1384 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1385 nigel 77 (int)size, (int)regex_gotten_store);
1386 nigel 43
1387     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1388     if (backrefmax > 0)
1389     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1390 nigel 63
1391     if (namecount > 0)
1392     {
1393     fprintf(outfile, "Named capturing subpatterns:\n");
1394     while (namecount-- > 0)
1395     {
1396     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1397     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1398     GET2(nametable, 0));
1399     nametable += nameentrysize;
1400     }
1401     }
1402    
1403 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1404     to fish it out via out back door */
1405    
1406     all_options = ((real_pcre *)re)->options;
1407     if (do_flip)
1408     {
1409     all_options = byteflip(all_options, sizeof(all_options));
1410 nigel 91 }
1411 nigel 75
1412     if ((all_options & PCRE_NOPARTIAL) != 0)
1413     fprintf(outfile, "Partial matching not supported\n");
1414    
1415 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1416 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1417 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1418     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1419     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1420     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1421 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1422 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1423     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1424     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1425     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1426 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1427 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1428 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1429     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1430 nigel 43
1431 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1432 nigel 91 {
1433     case PCRE_NEWLINE_CR:
1434     fprintf(outfile, "Forced newline sequence: CR\n");
1435     break;
1436 nigel 43
1437 nigel 91 case PCRE_NEWLINE_LF:
1438     fprintf(outfile, "Forced newline sequence: LF\n");
1439     break;
1440    
1441     case PCRE_NEWLINE_CRLF:
1442     fprintf(outfile, "Forced newline sequence: CRLF\n");
1443     break;
1444    
1445 nigel 93 case PCRE_NEWLINE_ANY:
1446     fprintf(outfile, "Forced newline sequence: ANY\n");
1447     break;
1448    
1449 nigel 91 default:
1450     break;
1451     }
1452    
1453 nigel 43 if (first_char == -1)
1454     {
1455 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1456 nigel 43 }
1457     else if (first_char < 0)
1458     {
1459     fprintf(outfile, "No first char\n");
1460     }
1461     else
1462     {
1463 nigel 63 int ch = first_char & 255;
1464 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1465 nigel 63 "" : " (caseless)";
1466 nigel 93 if (PRINTHEX(ch))
1467 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1468 nigel 3 else
1469 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1470 nigel 43 }
1471 nigel 37
1472 nigel 43 if (need_char < 0)
1473     {
1474     fprintf(outfile, "No need char\n");
1475 nigel 3 }
1476 nigel 43 else
1477     {
1478 nigel 63 int ch = need_char & 255;
1479 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1480 nigel 63 "" : " (caseless)";
1481 nigel 93 if (PRINTHEX(ch))
1482 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1483 nigel 43 else
1484 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1485 nigel 43 }
1486 nigel 75
1487     /* Don't output study size; at present it is in any case a fixed
1488     value, but it varies, depending on the computer architecture, and
1489     so messes up the test suite. (And with the /F option, it might be
1490     flipped.) */
1491    
1492     if (do_study)
1493     {
1494     if (extra == NULL)
1495     fprintf(outfile, "Study returned NULL\n");
1496     else
1497     {
1498     uschar *start_bits = NULL;
1499     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1500    
1501     if (start_bits == NULL)
1502     fprintf(outfile, "No starting byte set\n");
1503     else
1504     {
1505     int i;
1506     int c = 24;
1507     fprintf(outfile, "Starting byte set: ");
1508     for (i = 0; i < 256; i++)
1509     {
1510     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1511     {
1512     if (c > 75)
1513     {
1514     fprintf(outfile, "\n ");
1515     c = 2;
1516     }
1517 nigel 93 if (PRINTHEX(i) && i != ' ')
1518 nigel 75 {
1519     fprintf(outfile, "%c ", i);
1520     c += 2;
1521     }
1522     else
1523     {
1524     fprintf(outfile, "\\x%02x ", i);
1525     c += 5;
1526     }
1527     }
1528     }
1529     fprintf(outfile, "\n");
1530     }
1531     }
1532     }
1533 nigel 3 }
1534    
1535 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1536     that is all. The first 8 bytes of the file are the regex length and then
1537     the study length, in big-endian order. */
1538 nigel 3
1539 nigel 75 if (to_file != NULL)
1540 nigel 3 {
1541 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1542     if (f == NULL)
1543 nigel 3 {
1544 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1545 nigel 3 }
1546 nigel 75 else
1547     {
1548     uschar sbuf[8];
1549     sbuf[0] = (true_size >> 24) & 255;
1550     sbuf[1] = (true_size >> 16) & 255;
1551     sbuf[2] = (true_size >> 8) & 255;
1552     sbuf[3] = (true_size) & 255;
1553 nigel 3
1554 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1555     sbuf[5] = (true_study_size >> 16) & 255;
1556     sbuf[6] = (true_study_size >> 8) & 255;
1557     sbuf[7] = (true_study_size) & 255;
1558 nigel 3
1559 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1560     fwrite(re, 1, true_size, f) < true_size)
1561     {
1562     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1563     }
1564 nigel 3 else
1565     {
1566 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1567     if (extra != NULL)
1568 nigel 3 {
1569 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1570     true_study_size)
1571 nigel 3 {
1572 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1573     strerror(errno));
1574 nigel 3 }
1575 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1576 nigel 93
1577 nigel 3 }
1578     }
1579 nigel 75 fclose(f);
1580 nigel 3 }
1581 nigel 77
1582     new_free(re);
1583     if (extra != NULL) new_free(extra);
1584     if (tables != NULL) new_free((void *)tables);
1585 nigel 75 continue; /* With next regex */
1586 nigel 3 }
1587 nigel 75 } /* End of non-POSIX compile */
1588 nigel 3
1589     /* Read data lines and test them */
1590    
1591     for (;;)
1592     {
1593 nigel 87 uschar *q;
1594     uschar *bptr = dbuffer;
1595 nigel 57 int *use_offsets = offsets;
1596 nigel 53 int use_size_offsets = size_offsets;
1597 nigel 63 int callout_data = 0;
1598     int callout_data_set = 0;
1599 nigel 3 int count, c;
1600 nigel 29 int copystrings = 0;
1601 nigel 63 int find_match_limit = 0;
1602 nigel 29 int getstrings = 0;
1603     int getlist = 0;
1604 nigel 39 int gmatched = 0;
1605 nigel 35 int start_offset = 0;
1606 nigel 41 int g_notempty = 0;
1607 nigel 77 int use_dfa = 0;
1608 nigel 3
1609     options = 0;
1610    
1611 nigel 91 *copynames = 0;
1612     *getnames = 0;
1613    
1614     copynamesptr = copynames;
1615     getnamesptr = getnames;
1616    
1617 nigel 63 pcre_callout = callout;
1618     first_callout = 1;
1619     callout_extra = 0;
1620     callout_count = 0;
1621     callout_fail_count = 999999;
1622     callout_fail_id = -1;
1623 nigel 73 show_malloc = 0;
1624 nigel 63
1625 nigel 91 if (extra != NULL) extra->flags &=
1626     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1627    
1628     len = 0;
1629     for (;;)
1630 nigel 11 {
1631 nigel 91 if (infile == stdin) printf("data> ");
1632     if (extend_inputline(infile, buffer + len) == NULL)
1633     {
1634     if (len > 0) break;
1635     done = 1;
1636     goto CONTINUE;
1637     }
1638     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1639     len = (int)strlen((char *)buffer);
1640     if (buffer[len-1] == '\n') break;
1641 nigel 11 }
1642 nigel 3
1643     while (len > 0 && isspace(buffer[len-1])) len--;
1644     buffer[len] = 0;
1645     if (len == 0) break;
1646    
1647     p = buffer;
1648     while (isspace(*p)) p++;
1649    
1650 nigel 9 q = dbuffer;
1651 nigel 3 while ((c = *p++) != 0)
1652     {
1653     int i = 0;
1654     int n = 0;
1655 nigel 63
1656 nigel 3 if (c == '\\') switch ((c = *p++))
1657     {
1658     case 'a': c = 7; break;
1659     case 'b': c = '\b'; break;
1660     case 'e': c = 27; break;
1661     case 'f': c = '\f'; break;
1662     case 'n': c = '\n'; break;
1663     case 'r': c = '\r'; break;
1664     case 't': c = '\t'; break;
1665     case 'v': c = '\v'; break;
1666    
1667     case '0': case '1': case '2': case '3':
1668     case '4': case '5': case '6': case '7':
1669     c -= '0';
1670     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1671     c = c * 8 + *p++ - '0';
1672 nigel 91
1673     #if !defined NOUTF8
1674     if (use_utf8 && c > 255)
1675     {
1676     unsigned char buff8[8];
1677     int ii, utn;
1678     utn = ord2utf8(c, buff8);
1679     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1680     c = buff8[ii]; /* Last byte */
1681     }
1682     #endif
1683 nigel 3 break;
1684    
1685     case 'x':
1686 nigel 49
1687     /* Handle \x{..} specially - new Perl thing for utf8 */
1688    
1689 nigel 79 #if !defined NOUTF8
1690 nigel 49 if (*p == '{')
1691     {
1692     unsigned char *pt = p;
1693     c = 0;
1694     while (isxdigit(*(++pt)))
1695     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1696     if (*pt == '}')
1697     {
1698 nigel 67 unsigned char buff8[8];
1699 nigel 49 int ii, utn;
1700 nigel 85 utn = ord2utf8(c, buff8);
1701 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1702     c = buff8[ii]; /* Last byte */
1703 nigel 49 p = pt + 1;
1704     break;
1705     }
1706     /* Not correct form; fall through */
1707     }
1708 nigel 79 #endif
1709 nigel 49
1710     /* Ordinary \x */
1711    
1712 nigel 3 c = 0;
1713     while (i++ < 2 && isxdigit(*p))
1714     {
1715     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1716     p++;
1717     }
1718     break;
1719    
1720 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1721 nigel 3 p--;
1722     continue;
1723    
1724 nigel 75 case '>':
1725     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1726     continue;
1727    
1728 nigel 3 case 'A': /* Option setting */
1729     options |= PCRE_ANCHORED;
1730     continue;
1731    
1732     case 'B':
1733     options |= PCRE_NOTBOL;
1734     continue;
1735    
1736 nigel 29 case 'C':
1737 nigel 63 if (isdigit(*p)) /* Set copy string */
1738     {
1739     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1740     copystrings |= 1 << n;
1741     }
1742     else if (isalnum(*p))
1743     {
1744 nigel 91 uschar *npp = copynamesptr;
1745 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1746 nigel 91 *npp++ = 0;
1747 nigel 67 *npp = 0;
1748 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1749 nigel 63 if (n < 0)
1750 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1751     copynamesptr = npp;
1752 nigel 63 }
1753     else if (*p == '+')
1754     {
1755     callout_extra = 1;
1756     p++;
1757     }
1758     else if (*p == '-')
1759     {
1760     pcre_callout = NULL;
1761     p++;
1762     }
1763     else if (*p == '!')
1764     {
1765     callout_fail_id = 0;
1766     p++;
1767     while(isdigit(*p))
1768     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1769     callout_fail_count = 0;
1770     if (*p == '!')
1771     {
1772     p++;
1773     while(isdigit(*p))
1774     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1775     }
1776     }
1777     else if (*p == '*')
1778     {
1779     int sign = 1;
1780     callout_data = 0;
1781     if (*(++p) == '-') { sign = -1; p++; }
1782     while(isdigit(*p))
1783     callout_data = callout_data * 10 + *p++ - '0';
1784     callout_data *= sign;
1785     callout_data_set = 1;
1786     }
1787 nigel 29 continue;
1788    
1789 nigel 79 #if !defined NODFA
1790 nigel 77 case 'D':
1791 nigel 79 #if !defined NOPOSIX
1792 nigel 77 if (posix || do_posix)
1793     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1794     else
1795 nigel 79 #endif
1796 nigel 77 use_dfa = 1;
1797     continue;
1798    
1799     case 'F':
1800     options |= PCRE_DFA_SHORTEST;
1801     continue;
1802 nigel 79 #endif
1803 nigel 77
1804 nigel 29 case 'G':
1805 nigel 63 if (isdigit(*p))
1806     {
1807     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1808     getstrings |= 1 << n;
1809     }
1810     else if (isalnum(*p))
1811     {
1812 nigel 91 uschar *npp = getnamesptr;
1813 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1814 nigel 91 *npp++ = 0;
1815 nigel 67 *npp = 0;
1816 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1817 nigel 63 if (n < 0)
1818 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1819     getnamesptr = npp;
1820 nigel 63 }
1821 nigel 29 continue;
1822    
1823     case 'L':
1824     getlist = 1;
1825     continue;
1826    
1827 nigel 63 case 'M':
1828     find_match_limit = 1;
1829     continue;
1830    
1831 nigel 37 case 'N':
1832     options |= PCRE_NOTEMPTY;
1833     continue;
1834    
1835 nigel 3 case 'O':
1836     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1837 nigel 53 if (n > size_offsets_max)
1838     {
1839     size_offsets_max = n;
1840 nigel 57 free(offsets);
1841 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1842 nigel 53 if (offsets == NULL)
1843     {
1844     printf("** Failed to get %d bytes of memory for offsets vector\n",
1845     size_offsets_max * sizeof(int));
1846 nigel 77 yield = 1;
1847     goto EXIT;
1848 nigel 53 }
1849     }
1850     use_size_offsets = n;
1851 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1852 nigel 3 continue;
1853    
1854 nigel 75 case 'P':
1855     options |= PCRE_PARTIAL;
1856     continue;
1857    
1858 nigel 91 case 'Q':
1859     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1860     if (extra == NULL)
1861     {
1862     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1863     extra->flags = 0;
1864     }
1865     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1866     extra->match_limit_recursion = n;
1867     continue;
1868    
1869     case 'q':
1870     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871     if (extra == NULL)
1872     {
1873     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874     extra->flags = 0;
1875     }
1876     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1877     extra->match_limit = n;
1878     continue;
1879    
1880 nigel 79 #if !defined NODFA
1881 nigel 77 case 'R':
1882     options |= PCRE_DFA_RESTART;
1883     continue;
1884 nigel 79 #endif
1885 nigel 77
1886 nigel 73 case 'S':
1887     show_malloc = 1;
1888     continue;
1889    
1890 nigel 3 case 'Z':
1891     options |= PCRE_NOTEOL;
1892     continue;
1893 nigel 71
1894     case '?':
1895     options |= PCRE_NO_UTF8_CHECK;
1896     continue;
1897 nigel 91
1898     case '<':
1899     {
1900     int x = check_newline(p, outfile);
1901     if (x == 0) goto NEXT_DATA;
1902     options |= x;
1903     while (*p++ != '>');
1904     }
1905     continue;
1906 nigel 3 }
1907 nigel 9 *q++ = c;
1908 nigel 3 }
1909 nigel 9 *q = 0;
1910     len = q - dbuffer;
1911 nigel 3
1912 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1913     {
1914     printf("**Match limit not relevant for DFA matching: ignored\n");
1915     find_match_limit = 0;
1916     }
1917    
1918 nigel 3 /* Handle matching via the POSIX interface, which does not
1919 nigel 63 support timing or playing with the match limit or callout data. */
1920 nigel 3
1921 nigel 37 #if !defined NOPOSIX
1922 nigel 3 if (posix || do_posix)
1923     {
1924     int rc;
1925     int eflags = 0;
1926 nigel 63 regmatch_t *pmatch = NULL;
1927     if (use_size_offsets > 0)
1928 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1929 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1930     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1931    
1932 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1933 nigel 3
1934     if (rc != 0)
1935     {
1936 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1937 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1938     }
1939 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1940     != 0)
1941     {
1942     fprintf(outfile, "Matched with REG_NOSUB\n");
1943     }
1944 nigel 3 else
1945     {
1946 nigel 7 size_t i;
1947 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1948 nigel 3 {
1949     if (pmatch[i].rm_so >= 0)
1950     {
1951 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1952 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1953     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1954 nigel 3 fprintf(outfile, "\n");
1955 nigel 35 if (i == 0 && do_showrest)
1956     {
1957     fprintf(outfile, " 0+ ");
1958 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1959     outfile);
1960 nigel 35 fprintf(outfile, "\n");
1961     }
1962 nigel 3 }
1963     }
1964     }
1965 nigel 53 free(pmatch);
1966 nigel 3 }
1967    
1968 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1969 nigel 3
1970 nigel 37 else
1971     #endif /* !defined NOPOSIX */
1972    
1973 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1974 nigel 3 {
1975 ph10 142 int gany_fudge;
1976 nigel 93 if (timeitm > 0)
1977 nigel 3 {
1978     register int i;
1979     clock_t time_taken;
1980     clock_t start_time = clock();
1981 nigel 77
1982 nigel 79 #if !defined NODFA
1983 nigel 77 if (all_use_dfa || use_dfa)
1984     {
1985     int workspace[1000];
1986 nigel 93 for (i = 0; i < timeitm; i++)
1987 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1988     options | g_notempty, use_offsets, use_size_offsets, workspace,
1989     sizeof(workspace)/sizeof(int));
1990     }
1991     else
1992 nigel 79 #endif
1993 nigel 77
1994 nigel 93 for (i = 0; i < timeitm; i++)
1995 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1996 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1997 nigel 77
1998 nigel 3 time_taken = clock() - start_time;
1999 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2000     (((double)time_taken * 1000.0) / (double)timeitm) /
2001 nigel 63 (double)CLOCKS_PER_SEC);
2002 nigel 3 }
2003    
2004 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2005 nigel 87 varying limits in order to find the minimum value for the match limit and
2006     for the recursion limit. */
2007 nigel 63
2008     if (find_match_limit)
2009     {
2010     if (extra == NULL)
2011     {
2012 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2013 nigel 63 extra->flags = 0;
2014     }
2015    
2016 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2017 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2018     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2019     PCRE_ERROR_MATCHLIMIT, "match()");
2020 nigel 63
2021 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2022     options|g_notempty, use_offsets, use_size_offsets,
2023     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2024     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2025 nigel 63 }
2026    
2027     /* If callout_data is set, use the interface with additional data */
2028    
2029     else if (callout_data_set)
2030     {
2031     if (extra == NULL)
2032     {
2033 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2034 nigel 63 extra->flags = 0;
2035     }
2036     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2037 nigel 71 extra->callout_data = &callout_data;
2038 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2039     options | g_notempty, use_offsets, use_size_offsets);
2040     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2041     }
2042    
2043     /* The normal case is just to do the match once, with the default
2044     value of match_limit. */
2045    
2046 nigel 79 #if !defined NODFA
2047 nigel 77 else if (all_use_dfa || use_dfa)
2048     {
2049     int workspace[1000];
2050     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2051     options | g_notempty, use_offsets, use_size_offsets, workspace,
2052     sizeof(workspace)/sizeof(int));
2053     if (count == 0)
2054     {
2055     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2056     count = use_size_offsets/2;
2057     }
2058     }
2059 nigel 79 #endif
2060 nigel 77
2061 nigel 75 else
2062     {
2063     count = pcre_exec(re, extra, (char *)bptr, len,
2064     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2065 nigel 77 if (count == 0)
2066     {
2067     fprintf(outfile, "Matched, but too many substrings\n");
2068     count = use_size_offsets/3;
2069     }
2070 nigel 75 }
2071 nigel 3
2072 nigel 39 /* Matched */
2073    
2074 nigel 3 if (count >= 0)
2075     {
2076 nigel 93 int i, maxcount;
2077    
2078     #if !defined NODFA
2079     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2080     #endif
2081     maxcount = use_size_offsets/3;
2082    
2083     /* This is a check against a lunatic return value. */
2084    
2085     if (count > maxcount)
2086     {
2087     fprintf(outfile,
2088     "** PCRE error: returned count %d is too big for offset size %d\n",
2089     count, use_size_offsets);
2090     count = use_size_offsets/3;
2091     if (do_g || do_G)
2092     {
2093     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2094     do_g = do_G = FALSE; /* Break g/G loop */
2095     }
2096     }
2097    
2098 nigel 29 for (i = 0; i < count * 2; i += 2)
2099 nigel 3 {
2100 nigel 57 if (use_offsets[i] < 0)
2101 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2102     else
2103     {
2104     fprintf(outfile, "%2d: ", i/2);
2105 nigel 63 (void)pchars(bptr + use_offsets[i],
2106     use_offsets[i+1] - use_offsets[i], outfile);
2107 nigel 3 fprintf(outfile, "\n");
2108 nigel 35 if (i == 0)
2109     {
2110     if (do_showrest)
2111     {
2112     fprintf(outfile, " 0+ ");
2113 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2114     outfile);
2115 nigel 35 fprintf(outfile, "\n");
2116     }
2117     }
2118 nigel 3 }
2119     }
2120 nigel 29
2121     for (i = 0; i < 32; i++)
2122     {
2123     if ((copystrings & (1 << i)) != 0)
2124     {
2125 nigel 91 char copybuffer[256];
2126 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2127 nigel 37 i, copybuffer, sizeof(copybuffer));
2128 nigel 29 if (rc < 0)
2129     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2130     else
2131 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2132 nigel 29 }
2133     }
2134    
2135 nigel 91 for (copynamesptr = copynames;
2136     *copynamesptr != 0;
2137     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2138     {
2139     char copybuffer[256];
2140     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2141     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2142     if (rc < 0)
2143     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2144     else
2145     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2146     }
2147    
2148 nigel 29 for (i = 0; i < 32; i++)
2149     {
2150     if ((getstrings & (1 << i)) != 0)
2151     {
2152     const char *substring;
2153 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2154 nigel 29 i, &substring);
2155     if (rc < 0)
2156     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2157     else
2158     {
2159     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2160 nigel 49 pcre_free_substring(substring);
2161 nigel 29 }
2162     }
2163     }
2164    
2165 nigel 91 for (getnamesptr = getnames;
2166     *getnamesptr != 0;
2167     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2168     {
2169     const char *substring;
2170     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2171     count, (char *)getnamesptr, &substring);
2172     if (rc < 0)
2173     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2174     else
2175     {
2176     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2177     pcre_free_substring(substring);
2178     }
2179     }
2180    
2181 nigel 29 if (getlist)
2182     {
2183     const char **stringlist;
2184 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2185 nigel 29 &stringlist);
2186     if (rc < 0)
2187     fprintf(outfile, "get substring list failed %d\n", rc);
2188     else
2189     {
2190     for (i = 0; i < count; i++)
2191     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2192     if (stringlist[i] != NULL)
2193     fprintf(outfile, "string list not terminated by NULL\n");
2194 nigel 49 /* free((void *)stringlist); */
2195     pcre_free_substring_list(stringlist);
2196 nigel 29 }
2197     }
2198 nigel 39 }
2199 nigel 29
2200 nigel 75 /* There was a partial match */
2201    
2202     else if (count == PCRE_ERROR_PARTIAL)
2203     {
2204 nigel 77 fprintf(outfile, "Partial match");
2205 nigel 79 #if !defined NODFA
2206 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2207     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2208     bptr + use_offsets[0]);
2209 nigel 79 #endif
2210 nigel 77 fprintf(outfile, "\n");
2211 nigel 75 break; /* Out of the /g loop */
2212     }
2213    
2214 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2215 nigel 47 g_notempty after a null match, this is not necessarily the end.
2216 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
2217     matching, the advance must be one character, not one byte. Fudge the
2218     offset values to achieve this. We won't be at the end of the string -
2219     that was checked before setting g_notempty. */
2220 nigel 39
2221 nigel 3 else
2222     {
2223 nigel 41 if (g_notempty != 0)
2224 nigel 35 {
2225 nigel 73 int onechar = 1;
2226 nigel 57 use_offsets[0] = start_offset;
2227 nigel 73 if (use_utf8)
2228     {
2229     while (start_offset + onechar < len)
2230     {
2231     int tb = bptr[start_offset+onechar];
2232     if (tb <= 127) break;
2233     tb &= 0xc0;
2234     if (tb != 0 && tb != 0xc0) onechar++;
2235     }
2236     }
2237     use_offsets[1] = start_offset + onechar;
2238 nigel 35 }
2239 nigel 41 else
2240     {
2241 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2242 nigel 41 {
2243 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2244 nigel 41 }
2245 nigel 73 else fprintf(outfile, "Error %d\n", count);
2246 nigel 41 break; /* Out of the /g loop */
2247     }
2248 nigel 3 }
2249 nigel 35
2250 nigel 39 /* If not /g or /G we are done */
2251    
2252     if (!do_g && !do_G) break;
2253    
2254 nigel 41 /* If we have matched an empty string, first check to see if we are at
2255     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2256     what Perl's /g options does. This turns out to be rather cunning. First
2257 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2258     same point. If this fails (picked up above) we advance to the next
2259 ph10 142 character.
2260    
2261 ph10 141 Yet more complication arises in the case when the newline option is
2262 ph10 142 "any" and a pattern in multiline mode has to match at the start of a
2263     line. If a previous match was at the end of a line, and advance of one
2264     character just passes the \r, whereas we should prefer the longer newline
2265 ph10 141 sequence, as does the code in pcre_exec(). So we fudge it. */
2266 nigel 39
2267 nigel 41 g_notempty = 0;
2268 ph10 142 gany_fudge = 0;
2269    
2270 nigel 57 if (use_offsets[0] == use_offsets[1])
2271 nigel 41 {
2272 nigel 57 if (use_offsets[0] == len) break;
2273 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2274 ph10 141 if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&
2275     (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2276     use_offsets[0] < len - 1 &&
2277 ph10 142 bptr[use_offsets[0]] == '\r' &&
2278 ph10 141 bptr[use_offsets[0]+1] == '\n')
2279 ph10 142 gany_fudge = 1;
2280 nigel 41 }
2281 nigel 39
2282     /* For /g, update the start offset, leaving the rest alone */
2283    
2284 ph10 141 if (do_g) start_offset = use_offsets[1] + gany_fudge;
2285 nigel 39
2286     /* For /G, update the pointer and length */
2287    
2288     else
2289 nigel 35 {
2290 ph10 141 bptr += use_offsets[1] + gany_fudge;
2291     len -= use_offsets[1] + gany_fudge;
2292 nigel 35 }
2293 nigel 39 } /* End of loop for /g and /G */
2294 nigel 91
2295     NEXT_DATA: continue;
2296 nigel 39 } /* End of loop for data lines */
2297 nigel 3
2298 nigel 11 CONTINUE:
2299 nigel 37
2300     #if !defined NOPOSIX
2301 nigel 3 if (posix || do_posix) regfree(&preg);
2302 nigel 37 #endif
2303    
2304 nigel 77 if (re != NULL) new_free(re);
2305     if (extra != NULL) new_free(extra);
2306 nigel 25 if (tables != NULL)
2307     {
2308 nigel 77 new_free((void *)tables);
2309 nigel 25 setlocale(LC_CTYPE, "C");
2310 nigel 93 locale_set = 0;
2311 nigel 25 }
2312 nigel 3 }
2313    
2314 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2315 nigel 77
2316     EXIT:
2317    
2318     if (infile != NULL && infile != stdin) fclose(infile);
2319     if (outfile != NULL && outfile != stdout) fclose(outfile);
2320    
2321     free(buffer);
2322     free(dbuffer);
2323     free(pbuffer);
2324     free(offsets);
2325    
2326     return yield;
2327 nigel 3 }
2328    
2329 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12