/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 107 - (hide annotations) (download)
Wed Mar 7 11:02:28 2007 UTC (7 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 66990 byte(s)
Added some additional #ifdef SUPPORT_UTF8 to minimize the code when UTF-8 
support is not compiled.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 93
48     /* A number of things vary for Windows builds. Originally, pcretest opened its
49     input and output without "b"; then I was told that "b" was needed in some
50     environments, so it was added for release 5.0 to both the input and output. (It
51     makes no difference on Unix-like systems.) Later I was told that it is wrong
52     for the input on Windows. I've now abstracted the modes into two macros that
53     are set here, to make it easier to fiddle with them, and removed "b" from the
54     input mode under Windows. */
55    
56     #if defined(_WIN32) || defined(WIN32)
57     #include <io.h> /* For _setmode() */
58     #include <fcntl.h> /* For _O_BINARY */
59     #define INPUT_MODE "r"
60     #define OUTPUT_MODE "wb"
61    
62     #else
63     #include <sys/time.h> /* These two includes are needed */
64     #include <sys/resource.h> /* for setrlimit(). */
65     #define INPUT_MODE "rb"
66     #define OUTPUT_MODE "wb"
67 nigel 91 #endif
68    
69 nigel 93
70 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
71 nigel 37
72 nigel 85 /* We include pcre_internal.h because we need the internal info for displaying
73     the results of pcre_study() and we also need to know about the internal
74     macros, structures, and other internal data values; pcretest has "inside
75     information" compared to a program that strictly follows the PCRE API. */
76 nigel 77
77     #include "pcre_internal.h"
78    
79 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
80     two copies, we include the source file here, changing the names of the external
81     symbols to prevent clashes. */
82 nigel 77
83 nigel 85 #define _pcre_utf8_table1 utf8_table1
84     #define _pcre_utf8_table1_size utf8_table1_size
85     #define _pcre_utf8_table2 utf8_table2
86     #define _pcre_utf8_table3 utf8_table3
87     #define _pcre_utf8_table4 utf8_table4
88     #define _pcre_utt utt
89     #define _pcre_utt_size utt_size
90     #define _pcre_OP_lengths OP_lengths
91    
92     #include "pcre_tables.c"
93    
94     /* We also need the pcre_printint() function for printing out compiled
95     patterns. This function is in a separate file so that it can be included in
96 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
97 nigel 85
98 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
99     output character as-is or as a hex value when showing compiled patterns, is
100     contained in this file. We uses it here also, in cases when the locale has not
101     been explicitly changed, so as to get consistent output from systems that
102     differ in their output from isprint() even in the "C" locale. */
103    
104 nigel 85 #include "pcre_printint.src"
105    
106 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107 nigel 85
108 nigel 93
109 nigel 37 /* It is possible to compile this test program without including support for
110     testing the POSIX interface, though this is not available via the standard
111     Makefile. */
112    
113     #if !defined NOPOSIX
114 nigel 3 #include "pcreposix.h"
115 nigel 37 #endif
116 nigel 3
117 ph10 107 /* It is also possible, for the benefit of the version currently imported into
118     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119     interface to the DFA matcher (NODFA), and without the doublecheck of the old
120     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121     UTF8 support if PCRE is built without it. */
122 nigel 79
123 ph10 107 #ifndef SUPPORT_UTF8
124     #ifndef NOUTF8
125     #define NOUTF8
126     #endif
127     #endif
128 nigel 79
129 ph10 107
130 nigel 85 /* Other parameters */
131    
132 nigel 3 #ifndef CLOCKS_PER_SEC
133     #ifdef CLK_TCK
134     #define CLOCKS_PER_SEC CLK_TCK
135     #else
136     #define CLOCKS_PER_SEC 100
137     #endif
138     #endif
139    
140 nigel 93 /* This is the default loop count for timing. */
141    
142 nigel 75 #define LOOPREPEAT 500000
143 nigel 3
144 nigel 85 /* Static variables */
145    
146 nigel 3 static FILE *outfile;
147     static int log_store = 0;
148 nigel 63 static int callout_count;
149     static int callout_extra;
150     static int callout_fail_count;
151     static int callout_fail_id;
152     static int first_callout;
153 nigel 93 static int locale_set = 0;
154 nigel 73 static int show_malloc;
155 nigel 67 static int use_utf8;
156 nigel 43 static size_t gotten_store;
157 nigel 3
158 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
159    
160     static int buffer_size = 50000;
161     static uschar *buffer = NULL;
162     static uschar *dbuffer = NULL;
163 nigel 75 static uschar *pbuffer = NULL;
164 nigel 3
165 nigel 75
166 nigel 49
167     /*************************************************
168 nigel 91 * Read or extend an input line *
169     *************************************************/
170    
171     /* Input lines are read into buffer, but both patterns and data lines can be
172     continued over multiple input lines. In addition, if the buffer fills up, we
173     want to automatically expand it so as to be able to handle extremely large
174     lines that are needed for certain stress tests. When the input buffer is
175     expanded, the other two buffers must also be expanded likewise, and the
176     contents of pbuffer, which are a copy of the input for callouts, must be
177     preserved (for when expansion happens for a data line). This is not the most
178     optimal way of handling this, but hey, this is just a test program!
179    
180     Arguments:
181     f the file to read
182     start where in buffer to start (this *must* be within buffer)
183    
184     Returns: pointer to the start of new data
185     could be a copy of start, or could be moved
186     NULL if no data read and EOF reached
187     */
188    
189     static uschar *
190     extend_inputline(FILE *f, uschar *start)
191     {
192     uschar *here = start;
193    
194     for (;;)
195     {
196     int rlen = buffer_size - (here - buffer);
197 nigel 93
198 nigel 91 if (rlen > 1000)
199     {
200     int dlen;
201     if (fgets((char *)here, rlen, f) == NULL)
202     return (here == start)? NULL : start;
203     dlen = (int)strlen((char *)here);
204     if (dlen > 0 && here[dlen - 1] == '\n') return start;
205     here += dlen;
206     }
207    
208     else
209     {
210     int new_buffer_size = 2*buffer_size;
211     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214    
215     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216     {
217     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218     exit(1);
219     }
220    
221     memcpy(new_buffer, buffer, buffer_size);
222     memcpy(new_pbuffer, pbuffer, buffer_size);
223    
224     buffer_size = new_buffer_size;
225    
226     start = new_buffer + (start - buffer);
227     here = new_buffer + (here - buffer);
228    
229     free(buffer);
230     free(dbuffer);
231     free(pbuffer);
232    
233     buffer = new_buffer;
234     dbuffer = new_dbuffer;
235     pbuffer = new_pbuffer;
236     }
237     }
238    
239     return NULL; /* Control never gets here */
240     }
241    
242    
243    
244    
245    
246    
247    
248     /*************************************************
249 nigel 63 * Read number from string *
250     *************************************************/
251    
252     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253     around with conditional compilation, just do the job by hand. It is only used
254 nigel 93 for unpicking arguments, so just keep it simple.
255 nigel 63
256     Arguments:
257     str string to be converted
258     endptr where to put the end pointer
259    
260     Returns: the unsigned long
261     */
262    
263     static int
264     get_value(unsigned char *str, unsigned char **endptr)
265     {
266     int result = 0;
267     while(*str != 0 && isspace(*str)) str++;
268     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
269     *endptr = str;
270     return(result);
271     }
272    
273    
274    
275 nigel 49
276     /*************************************************
277     * Convert UTF-8 string to value *
278     *************************************************/
279    
280     /* This function takes one or more bytes that represents a UTF-8 character,
281     and returns the value of the character.
282    
283     Argument:
284 nigel 91 utf8bytes a pointer to the byte vector
285     vptr a pointer to an int to receive the value
286 nigel 49
287 nigel 91 Returns: > 0 => the number of bytes consumed
288     -6 to 0 => malformed UTF-8 character at offset = (-return)
289 nigel 49 */
290    
291 nigel 79 #if !defined NOUTF8
292    
293 nigel 67 static int
294 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
295 nigel 49 {
296 nigel 91 int c = *utf8bytes++;
297 nigel 49 int d = c;
298     int i, j, s;
299    
300     for (i = -1; i < 6; i++) /* i is number of additional bytes */
301     {
302     if ((d & 0x80) == 0) break;
303     d <<= 1;
304     }
305    
306     if (i == -1) { *vptr = c; return 1; } /* ascii character */
307     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
308    
309     /* i now has a value in the range 1-5 */
310    
311 nigel 59 s = 6*i;
312 nigel 85 d = (c & utf8_table3[i]) << s;
313 nigel 49
314     for (j = 0; j < i; j++)
315     {
316 nigel 91 c = *utf8bytes++;
317 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
318 nigel 59 s -= 6;
319 nigel 49 d |= (c & 0x3f) << s;
320     }
321    
322     /* Check that encoding was the correct unique one */
323    
324 nigel 85 for (j = 0; j < utf8_table1_size; j++)
325     if (d <= utf8_table1[j]) break;
326 nigel 49 if (j != i) return -(i+1);
327    
328     /* Valid value */
329    
330     *vptr = d;
331     return i+1;
332     }
333    
334 nigel 79 #endif
335 nigel 49
336    
337 nigel 79
338 nigel 63 /*************************************************
339 nigel 85 * Convert character value to UTF-8 *
340     *************************************************/
341    
342     /* This function takes an integer value in the range 0 - 0x7fffffff
343     and encodes it as a UTF-8 character in 0 to 6 bytes.
344    
345     Arguments:
346     cvalue the character value
347 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
348 nigel 85
349     Returns: number of characters placed in the buffer
350     */
351    
352 nigel 93 #if !defined NOUTF8
353    
354 nigel 85 static int
355 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
356 nigel 85 {
357     register int i, j;
358     for (i = 0; i < utf8_table1_size; i++)
359     if (cvalue <= utf8_table1[i]) break;
360 nigel 91 utf8bytes += i;
361 nigel 85 for (j = i; j > 0; j--)
362     {
363 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364 nigel 85 cvalue >>= 6;
365     }
366 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
367 nigel 85 return i + 1;
368     }
369    
370 nigel 93 #endif
371 nigel 85
372    
373 nigel 93
374 nigel 85 /*************************************************
375 nigel 63 * Print character string *
376     *************************************************/
377 nigel 49
378 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
379     mode. Yields number of characters printed. If handed a NULL file, just counts
380     chars without printing. */
381 nigel 49
382 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
383 nigel 3 {
384 nigel 85 int c = 0;
385 nigel 63 int yield = 0;
386 nigel 3
387 nigel 63 while (length-- > 0)
388 nigel 3 {
389 nigel 79 #if !defined NOUTF8
390 nigel 67 if (use_utf8)
391 nigel 63 {
392     int rc = utf82ord(p, &c);
393 nigel 3
394 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
395     {
396     length -= rc - 1;
397     p += rc;
398 nigel 93 if (PRINTHEX(c))
399 nigel 63 {
400     if (f != NULL) fprintf(f, "%c", c);
401     yield++;
402     }
403     else
404     {
405 nigel 93 int n = 4;
406     if (f != NULL) fprintf(f, "\\x{%02x}", c);
407     yield += (n <= 0x000000ff)? 2 :
408     (n <= 0x00000fff)? 3 :
409     (n <= 0x0000ffff)? 4 :
410     (n <= 0x000fffff)? 5 : 6;
411 nigel 63 }
412     continue;
413     }
414     }
415 nigel 79 #endif
416 nigel 3
417 nigel 63 /* Not UTF-8, or malformed UTF-8 */
418    
419 nigel 93 c = *p++;
420     if (PRINTHEX(c))
421 nigel 3 {
422 nigel 63 if (f != NULL) fprintf(f, "%c", c);
423     yield++;
424 nigel 3 }
425 nigel 63 else
426 nigel 3 {
427 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
428     yield += 4;
429     }
430     }
431 nigel 3
432 nigel 63 return yield;
433     }
434 nigel 23
435 nigel 3
436 nigel 23
437 nigel 63 /*************************************************
438     * Callout function *
439     *************************************************/
440 nigel 3
441 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
442     the match. Yield zero unless more callouts than the fail count, or the callout
443     data is not zero. */
444 nigel 3
445 nigel 63 static int callout(pcre_callout_block *cb)
446     {
447     FILE *f = (first_callout | callout_extra)? outfile : NULL;
448 nigel 75 int i, pre_start, post_start, subject_length;
449 nigel 3
450 nigel 63 if (callout_extra)
451     {
452     fprintf(f, "Callout %d: last capture = %d\n",
453     cb->callout_number, cb->capture_last);
454 nigel 3
455 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
456     {
457     if (cb->offset_vector[i] < 0)
458     fprintf(f, "%2d: <unset>\n", i/2);
459     else
460     {
461     fprintf(f, "%2d: ", i/2);
462     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
463     cb->offset_vector[i+1] - cb->offset_vector[i], f);
464     fprintf(f, "\n");
465     }
466     }
467     }
468 nigel 3
469 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
470     datails. On subsequent calls in the same match, we use pchars just to find the
471     printed lengths of the substrings. */
472 nigel 3
473 nigel 63 if (f != NULL) fprintf(f, "--->");
474 nigel 3
475 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
476     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
477     cb->current_position - cb->start_match, f);
478 nigel 3
479 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
480    
481 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
482     cb->subject_length - cb->current_position, f);
483 nigel 3
484 nigel 63 if (f != NULL) fprintf(f, "\n");
485 nigel 9
486 nigel 63 /* Always print appropriate indicators, with callout number if not already
487 nigel 75 shown. For automatic callouts, show the pattern offset. */
488 nigel 3
489 nigel 75 if (cb->callout_number == 255)
490     {
491     fprintf(outfile, "%+3d ", cb->pattern_position);
492     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
493     }
494     else
495     {
496     if (callout_extra) fprintf(outfile, " ");
497     else fprintf(outfile, "%3d ", cb->callout_number);
498     }
499 nigel 3
500 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
501     fprintf(outfile, "^");
502 nigel 3
503 nigel 63 if (post_start > 0)
504     {
505     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
506     fprintf(outfile, "^");
507 nigel 3 }
508    
509 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
510     fprintf(outfile, " ");
511    
512     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
513     pbuffer + cb->pattern_position);
514    
515 nigel 63 fprintf(outfile, "\n");
516     first_callout = 0;
517 nigel 3
518 nigel 71 if (cb->callout_data != NULL)
519 nigel 49 {
520 nigel 71 int callout_data = *((int *)(cb->callout_data));
521     if (callout_data != 0)
522     {
523     fprintf(outfile, "Callout data = %d\n", callout_data);
524     return callout_data;
525     }
526 nigel 63 }
527 nigel 49
528 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
529     (++callout_count >= callout_fail_count)? 1 : 0;
530 nigel 3 }
531    
532    
533 nigel 63 /*************************************************
534 nigel 73 * Local malloc functions *
535 nigel 63 *************************************************/
536 nigel 3
537     /* Alternative malloc function, to test functionality and show the size of the
538     compiled re. */
539    
540     static void *new_malloc(size_t size)
541     {
542 nigel 73 void *block = malloc(size);
543 nigel 43 gotten_store = size;
544 nigel 73 if (show_malloc)
545 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
546 nigel 73 return block;
547 nigel 3 }
548    
549 nigel 73 static void new_free(void *block)
550     {
551     if (show_malloc)
552     fprintf(outfile, "free %p\n", block);
553     free(block);
554     }
555 nigel 3
556    
557 nigel 73 /* For recursion malloc/free, to test stacking calls */
558    
559     static void *stack_malloc(size_t size)
560     {
561     void *block = malloc(size);
562     if (show_malloc)
563 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
564 nigel 73 return block;
565     }
566    
567     static void stack_free(void *block)
568     {
569     if (show_malloc)
570     fprintf(outfile, "stack_free %p\n", block);
571     free(block);
572     }
573    
574    
575 nigel 63 /*************************************************
576     * Call pcre_fullinfo() *
577     *************************************************/
578 nigel 43
579     /* Get one piece of information from the pcre_fullinfo() function */
580    
581     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
582     {
583     int rc;
584     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
585     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
586     }
587    
588    
589    
590 nigel 63 /*************************************************
591 nigel 75 * Byte flipping function *
592     *************************************************/
593    
594 nigel 91 static unsigned long int
595     byteflip(unsigned long int value, int n)
596 nigel 75 {
597     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598     return ((value & 0x000000ff) << 24) |
599     ((value & 0x0000ff00) << 8) |
600     ((value & 0x00ff0000) >> 8) |
601     ((value & 0xff000000) >> 24);
602     }
603    
604    
605    
606    
607     /*************************************************
608 nigel 87 * Check match or recursion limit *
609     *************************************************/
610    
611     static int
612     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613     int start_offset, int options, int *use_offsets, int use_size_offsets,
614     int flag, unsigned long int *limit, int errnumber, const char *msg)
615     {
616     int count;
617     int min = 0;
618     int mid = 64;
619     int max = -1;
620    
621     extra->flags |= flag;
622    
623     for (;;)
624     {
625     *limit = mid;
626    
627     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628     use_offsets, use_size_offsets);
629    
630     if (count == errnumber)
631     {
632     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633     min = mid;
634     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635     }
636    
637     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638     count == PCRE_ERROR_PARTIAL)
639     {
640     if (mid == min + 1)
641     {
642     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643     break;
644     }
645     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646     max = mid;
647     mid = (min + mid)/2;
648     }
649     else break; /* Some other error */
650     }
651    
652     extra->flags &= ~flag;
653     return count;
654     }
655    
656    
657    
658     /*************************************************
659 nigel 91 * Check newline indicator *
660     *************************************************/
661    
662     /* This is used both at compile and run-time to check for <xxx> escapes, where
663 nigel 93 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664 nigel 91
665     Arguments:
666     p points after the leading '<'
667     f file for error message
668    
669     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
670     */
671    
672     static int
673     check_newline(uschar *p, FILE *f)
674     {
675     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
680     return 0;
681     }
682    
683    
684    
685     /*************************************************
686 nigel 93 * Usage function *
687     *************************************************/
688    
689     static void
690     usage(void)
691     {
692     printf("Usage: pcretest [options] [<input> [<output>]]\n");
693     printf(" -b show compiled code (bytecode)\n");
694     printf(" -C show PCRE compile-time options and exit\n");
695     printf(" -d debug: show compiled code and information (-b and -i)\n");
696     #if !defined NODFA
697     printf(" -dfa force DFA matching for all subjects\n");
698     #endif
699     printf(" -help show usage information\n");
700     printf(" -i show information about compiled patterns\n"
701     " -m output memory used information\n"
702     " -o <n> set size of offsets vector to <n>\n");
703     #if !defined NOPOSIX
704     printf(" -p use POSIX interface\n");
705     #endif
706     printf(" -q quiet: do not output PCRE version number at start\n");
707     printf(" -S <n> set stack size to <n> megabytes\n");
708     printf(" -s output store (memory) used information\n"
709     " -t time compilation and execution\n");
710     printf(" -t <n> time compilation and execution, repeating <n> times\n");
711     printf(" -tm time execution (matching) only\n");
712     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
713     }
714    
715    
716    
717     /*************************************************
718 nigel 63 * Main Program *
719     *************************************************/
720 nigel 43
721 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
722     consist of a regular expression, in delimiters and optionally followed by
723     options, followed by a set of test data, terminated by an empty line. */
724    
725     int main(int argc, char **argv)
726     {
727     FILE *infile = stdin;
728     int options = 0;
729     int study_options = 0;
730     int op = 1;
731     int timeit = 0;
732 nigel 93 int timeitm = 0;
733 nigel 3 int showinfo = 0;
734 nigel 31 int showstore = 0;
735 nigel 87 int quiet = 0;
736 nigel 53 int size_offsets = 45;
737     int size_offsets_max;
738 nigel 77 int *offsets = NULL;
739 nigel 53 #if !defined NOPOSIX
740 nigel 3 int posix = 0;
741 nigel 53 #endif
742 nigel 3 int debug = 0;
743 nigel 11 int done = 0;
744 nigel 77 int all_use_dfa = 0;
745     int yield = 0;
746 nigel 91 int stack_size;
747 nigel 3
748 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
749     that 1024 is plenty long enough for the few names we'll be testing. */
750 nigel 69
751 nigel 91 uschar copynames[1024];
752     uschar getnames[1024];
753    
754     uschar *copynamesptr;
755     uschar *getnamesptr;
756    
757 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
758 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
759 nigel 69
760 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
761     dbuffer = (unsigned char *)malloc(buffer_size);
762     pbuffer = (unsigned char *)malloc(buffer_size);
763 nigel 69
764 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
765 nigel 3
766 nigel 93 outfile = stdout;
767    
768     /* The following _setmode() stuff is some Windows magic that tells its runtime
769     library to translate CRLF into a single LF character. At least, that's what
770     I've been told: never having used Windows I take this all on trust. Originally
771     it set 0x8000, but then I was advised that _O_BINARY was better. */
772    
773 nigel 75 #if defined(_WIN32) || defined(WIN32)
774 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
775     #endif
776 nigel 75
777 nigel 3 /* Scan options */
778    
779     while (argc > 1 && argv[op][0] == '-')
780     {
781 nigel 63 unsigned char *endptr;
782 nigel 53
783 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784     showstore = 1;
785 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
787 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789 nigel 79 #if !defined NODFA
790 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
791 nigel 79 #endif
792 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
793 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794     *endptr == 0))
795 nigel 53 {
796     op++;
797     argc--;
798     }
799 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800     {
801     int both = argv[op][2] == 0;
802     int temp;
803     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804     *endptr == 0))
805     {
806     timeitm = temp;
807     op++;
808     argc--;
809     }
810     else timeitm = LOOPREPEAT;
811     if (both) timeit = timeitm;
812     }
813 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815     *endptr == 0))
816     {
817 nigel 93 #if defined(_WIN32) || defined(WIN32)
818 nigel 91 printf("PCRE: -S not supported on this OS\n");
819     exit(1);
820     #else
821     int rc;
822     struct rlimit rlim;
823     getrlimit(RLIMIT_STACK, &rlim);
824     rlim.rlim_cur = stack_size * 1024 * 1024;
825     rc = setrlimit(RLIMIT_STACK, &rlim);
826     if (rc != 0)
827     {
828     printf("PCRE: setrlimit() failed with error %d\n", rc);
829     exit(1);
830     }
831     op++;
832     argc--;
833     #endif
834     }
835 nigel 53 #if !defined NOPOSIX
836 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
837 nigel 53 #endif
838 nigel 63 else if (strcmp(argv[op], "-C") == 0)
839     {
840     int rc;
841     printf("PCRE version %s\n", pcre_version());
842     printf("Compiled with\n");
843     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
844     printf(" %sUTF-8 support\n", rc? "" : "No ");
845 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846     printf(" %sUnicode properties support\n", rc? "" : "No ");
847 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
849 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850     (rc == -1)? "ANY" : "???");
851 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852     printf(" Internal link size = %d\n", rc);
853     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854     printf(" POSIX malloc threshold = %d\n", rc);
855     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856     printf(" Default match limit = %d\n", rc);
857 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858     printf(" Default recursion depth limit = %d\n", rc);
859 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
861 nigel 63 exit(0);
862     }
863 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
864     strcmp(argv[op], "--help") == 0)
865     {
866     usage();
867     goto EXIT;
868     }
869 nigel 3 else
870     {
871 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
872 nigel 93 usage();
873 nigel 77 yield = 1;
874     goto EXIT;
875 nigel 3 }
876     op++;
877     argc--;
878     }
879    
880 nigel 53 /* Get the store for the offsets vector, and remember what it was */
881    
882     size_offsets_max = size_offsets;
883 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
884 nigel 53 if (offsets == NULL)
885     {
886     printf("** Failed to get %d bytes of memory for offsets vector\n",
887     size_offsets_max * sizeof(int));
888 nigel 77 yield = 1;
889     goto EXIT;
890 nigel 53 }
891    
892 nigel 3 /* Sort out the input and output files */
893    
894     if (argc > 1)
895     {
896 nigel 93 infile = fopen(argv[op], INPUT_MODE);
897 nigel 3 if (infile == NULL)
898     {
899     printf("** Failed to open %s\n", argv[op]);
900 nigel 77 yield = 1;
901     goto EXIT;
902 nigel 3 }
903     }
904    
905     if (argc > 2)
906     {
907 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
908 nigel 3 if (outfile == NULL)
909     {
910     printf("** Failed to open %s\n", argv[op+1]);
911 nigel 77 yield = 1;
912     goto EXIT;
913 nigel 3 }
914     }
915    
916     /* Set alternative malloc function */
917    
918     pcre_malloc = new_malloc;
919 nigel 73 pcre_free = new_free;
920     pcre_stack_malloc = stack_malloc;
921     pcre_stack_free = stack_free;
922 nigel 3
923 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
924 nigel 3
925 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926 nigel 3
927     /* Main loop */
928    
929 nigel 11 while (!done)
930 nigel 3 {
931     pcre *re = NULL;
932     pcre_extra *extra = NULL;
933 nigel 37
934     #if !defined NOPOSIX /* There are still compilers that require no indent */
935 nigel 3 regex_t preg;
936 nigel 45 int do_posix = 0;
937 nigel 37 #endif
938    
939 nigel 7 const char *error;
940 nigel 25 unsigned char *p, *pp, *ppp;
941 nigel 75 unsigned char *to_file = NULL;
942 nigel 53 const unsigned char *tables = NULL;
943 nigel 75 unsigned long int true_size, true_study_size = 0;
944     size_t size, regex_gotten_store;
945 nigel 3 int do_study = 0;
946 nigel 25 int do_debug = debug;
947 nigel 35 int do_G = 0;
948     int do_g = 0;
949 nigel 25 int do_showinfo = showinfo;
950 nigel 35 int do_showrest = 0;
951 nigel 75 int do_flip = 0;
952 nigel 93 int erroroffset, len, delimiter, poffset;
953 nigel 3
954 nigel 67 use_utf8 = 0;
955 nigel 63
956 nigel 3 if (infile == stdin) printf(" re> ");
957 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
958 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
959 nigel 63 fflush(outfile);
960 nigel 3
961     p = buffer;
962     while (isspace(*p)) p++;
963     if (*p == 0) continue;
964    
965 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
966 nigel 3
967 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
968     {
969 nigel 91 unsigned long int magic, get_options;
970 nigel 75 uschar sbuf[8];
971     FILE *f;
972    
973     p++;
974     pp = p + (int)strlen((char *)p);
975     while (isspace(pp[-1])) pp--;
976     *pp = 0;
977    
978     f = fopen((char *)p, "rb");
979     if (f == NULL)
980     {
981     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
982     continue;
983     }
984    
985     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
986    
987     true_size =
988     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
989     true_study_size =
990     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
991    
992     re = (real_pcre *)new_malloc(true_size);
993     regex_gotten_store = gotten_store;
994    
995     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
996    
997     magic = ((real_pcre *)re)->magic_number;
998     if (magic != MAGIC_NUMBER)
999     {
1000     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1001     {
1002     do_flip = 1;
1003     }
1004     else
1005     {
1006     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1007     fclose(f);
1008     continue;
1009     }
1010     }
1011    
1012     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1013     do_flip? " (byte-inverted)" : "", p);
1014    
1015     /* Need to know if UTF-8 for printing data strings */
1016    
1017 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1018     use_utf8 = (get_options & PCRE_UTF8) != 0;
1019 nigel 75
1020     /* Now see if there is any following study data */
1021    
1022     if (true_study_size != 0)
1023     {
1024     pcre_study_data *psd;
1025    
1026     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1027     extra->flags = PCRE_EXTRA_STUDY_DATA;
1028    
1029     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1030     extra->study_data = psd;
1031    
1032     if (fread(psd, 1, true_study_size, f) != true_study_size)
1033     {
1034     FAIL_READ:
1035     fprintf(outfile, "Failed to read data from %s\n", p);
1036     if (extra != NULL) new_free(extra);
1037     if (re != NULL) new_free(re);
1038     fclose(f);
1039     continue;
1040     }
1041     fprintf(outfile, "Study data loaded from %s\n", p);
1042     do_study = 1; /* To get the data output if requested */
1043     }
1044     else fprintf(outfile, "No study data\n");
1045    
1046     fclose(f);
1047     goto SHOW_INFO;
1048     }
1049    
1050     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1051     the pattern; if is isn't complete, read more. */
1052    
1053 nigel 3 delimiter = *p++;
1054    
1055 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1056 nigel 3 {
1057 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1058 nigel 3 goto SKIP_DATA;
1059     }
1060    
1061     pp = p;
1062 nigel 93 poffset = p - buffer;
1063 nigel 3
1064     for(;;)
1065     {
1066 nigel 29 while (*pp != 0)
1067     {
1068     if (*pp == '\\' && pp[1] != 0) pp++;
1069     else if (*pp == delimiter) break;
1070     pp++;
1071     }
1072 nigel 3 if (*pp != 0) break;
1073     if (infile == stdin) printf(" > ");
1074 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1075 nigel 3 {
1076     fprintf(outfile, "** Unexpected EOF\n");
1077 nigel 11 done = 1;
1078     goto CONTINUE;
1079 nigel 3 }
1080 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1081 nigel 3 }
1082    
1083 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1084     pointer to the correct relative point in the buffer. */
1085    
1086     p = buffer + poffset;
1087    
1088 nigel 29 /* If the first character after the delimiter is backslash, make
1089     the pattern end with backslash. This is purely to provide a way
1090     of testing for the error message when a pattern ends with backslash. */
1091    
1092     if (pp[1] == '\\') *pp++ = '\\';
1093    
1094 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1095     for callouts. */
1096 nigel 3
1097     *pp++ = 0;
1098 nigel 75 strcpy((char *)pbuffer, (char *)p);
1099 nigel 3
1100     /* Look for options after final delimiter */
1101    
1102     options = 0;
1103     study_options = 0;
1104 nigel 31 log_store = showstore; /* default from command line */
1105    
1106 nigel 3 while (*pp != 0)
1107     {
1108     switch (*pp++)
1109     {
1110 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1111 nigel 35 case 'g': do_g = 1; break;
1112 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1113     case 'm': options |= PCRE_MULTILINE; break;
1114     case 's': options |= PCRE_DOTALL; break;
1115     case 'x': options |= PCRE_EXTENDED; break;
1116 nigel 25
1117 nigel 35 case '+': do_showrest = 1; break;
1118 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1119 nigel 93 case 'B': do_debug = 1; break;
1120 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1121 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1122 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1123 nigel 75 case 'F': do_flip = 1; break;
1124 nigel 35 case 'G': do_G = 1; break;
1125 nigel 25 case 'I': do_showinfo = 1; break;
1126 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1127 nigel 31 case 'M': log_store = 1; break;
1128 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1129 nigel 37
1130     #if !defined NOPOSIX
1131 nigel 3 case 'P': do_posix = 1; break;
1132 nigel 37 #endif
1133    
1134 nigel 3 case 'S': do_study = 1; break;
1135 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1136 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1137 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1138 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1139 nigel 25
1140     case 'L':
1141     ppp = pp;
1142 nigel 93 /* The '\r' test here is so that it works on Windows. */
1143     /* The '0' test is just in case this is an unterminated line. */
1144     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1145 nigel 25 *ppp = 0;
1146     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1147     {
1148     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1149     goto SKIP_DATA;
1150     }
1151 nigel 93 locale_set = 1;
1152 nigel 25 tables = pcre_maketables();
1153     pp = ppp;
1154     break;
1155    
1156 nigel 75 case '>':
1157     to_file = pp;
1158     while (*pp != 0) pp++;
1159     while (isspace(pp[-1])) pp--;
1160     *pp = 0;
1161     break;
1162    
1163 nigel 91 case '<':
1164     {
1165     int x = check_newline(pp, outfile);
1166     if (x == 0) goto SKIP_DATA;
1167     options |= x;
1168     while (*pp++ != '>');
1169     }
1170     break;
1171    
1172 nigel 77 case '\r': /* So that it works in Windows */
1173     case '\n':
1174     case ' ':
1175     break;
1176 nigel 75
1177 nigel 3 default:
1178     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1179     goto SKIP_DATA;
1180     }
1181     }
1182    
1183 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1184 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1185     local character tables. */
1186 nigel 3
1187 nigel 37 #if !defined NOPOSIX
1188 nigel 3 if (posix || do_posix)
1189     {
1190     int rc;
1191     int cflags = 0;
1192 nigel 75
1193 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1194     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1195 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1196 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1197     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1198    
1199 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1200    
1201     /* Compilation failed; go back for another re, skipping to blank line
1202     if non-interactive. */
1203    
1204     if (rc != 0)
1205     {
1206 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1207 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1208     goto SKIP_DATA;
1209     }
1210     }
1211    
1212     /* Handle compiling via the native interface */
1213    
1214     else
1215 nigel 37 #endif /* !defined NOPOSIX */
1216    
1217 nigel 3 {
1218 nigel 93 if (timeit > 0)
1219 nigel 3 {
1220     register int i;
1221     clock_t time_taken;
1222     clock_t start_time = clock();
1223 nigel 93 for (i = 0; i < timeit; i++)
1224 nigel 3 {
1225 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1226 nigel 3 if (re != NULL) free(re);
1227     }
1228     time_taken = clock() - start_time;
1229 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1230     (((double)time_taken * 1000.0) / (double)timeit) /
1231 nigel 63 (double)CLOCKS_PER_SEC);
1232 nigel 3 }
1233    
1234 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1235 nigel 3
1236     /* Compilation failed; go back for another re, skipping to blank line
1237     if non-interactive. */
1238    
1239     if (re == NULL)
1240     {
1241     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1242     SKIP_DATA:
1243     if (infile != stdin)
1244     {
1245     for (;;)
1246     {
1247 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1248 nigel 11 {
1249     done = 1;
1250     goto CONTINUE;
1251     }
1252 nigel 3 len = (int)strlen((char *)buffer);
1253     while (len > 0 && isspace(buffer[len-1])) len--;
1254     if (len == 0) break;
1255     }
1256     fprintf(outfile, "\n");
1257     }
1258 nigel 25 goto CONTINUE;
1259 nigel 3 }
1260    
1261 nigel 43 /* Compilation succeeded; print data if required. There are now two
1262     info-returning functions. The old one has a limited interface and
1263     returns only limited data. Check that it agrees with the newer one. */
1264 nigel 3
1265 nigel 63 if (log_store)
1266     fprintf(outfile, "Memory allocation (code space): %d\n",
1267     (int)(gotten_store -
1268     sizeof(real_pcre) -
1269     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1270    
1271 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1272     and remember the store that was got. */
1273    
1274     true_size = ((real_pcre *)re)->size;
1275     regex_gotten_store = gotten_store;
1276    
1277     /* If /S was present, study the regexp to generate additional info to
1278     help with the matching. */
1279    
1280     if (do_study)
1281     {
1282 nigel 93 if (timeit > 0)
1283 nigel 75 {
1284     register int i;
1285     clock_t time_taken;
1286     clock_t start_time = clock();
1287 nigel 93 for (i = 0; i < timeit; i++)
1288 nigel 75 extra = pcre_study(re, study_options, &error);
1289     time_taken = clock() - start_time;
1290     if (extra != NULL) free(extra);
1291 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1292     (((double)time_taken * 1000.0) / (double)timeit) /
1293 nigel 75 (double)CLOCKS_PER_SEC);
1294     }
1295     extra = pcre_study(re, study_options, &error);
1296     if (error != NULL)
1297     fprintf(outfile, "Failed to study: %s\n", error);
1298     else if (extra != NULL)
1299     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1300     }
1301    
1302     /* If the 'F' option was present, we flip the bytes of all the integer
1303     fields in the regex data block and the study block. This is to make it
1304     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1305     compiled on a different architecture. */
1306    
1307     if (do_flip)
1308     {
1309     real_pcre *rre = (real_pcre *)re;
1310     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1311     rre->size = byteflip(rre->size, sizeof(rre->size));
1312     rre->options = byteflip(rre->options, sizeof(rre->options));
1313     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1314     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1315     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1316     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1317     rre->name_table_offset = byteflip(rre->name_table_offset,
1318     sizeof(rre->name_table_offset));
1319     rre->name_entry_size = byteflip(rre->name_entry_size,
1320     sizeof(rre->name_entry_size));
1321     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1322    
1323     if (extra != NULL)
1324     {
1325     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1326     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1327     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1328     }
1329     }
1330    
1331     /* Extract information from the compiled data if required */
1332    
1333     SHOW_INFO:
1334    
1335 nigel 93 if (do_debug)
1336     {
1337     fprintf(outfile, "------------------------------------------------------------------\n");
1338     pcre_printint(re, outfile);
1339     }
1340    
1341 nigel 25 if (do_showinfo)
1342 nigel 3 {
1343 nigel 75 unsigned long int get_options, all_options;
1344 nigel 79 #if !defined NOINFOCHECK
1345 nigel 43 int old_first_char, old_options, old_count;
1346 nigel 79 #endif
1347 nigel 43 int count, backrefmax, first_char, need_char;
1348 nigel 63 int nameentrysize, namecount;
1349     const uschar *nametable;
1350 nigel 3
1351 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1352 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1353     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1354     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1355 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1356 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1357 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1358     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1359 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1360 nigel 43
1361 nigel 79 #if !defined NOINFOCHECK
1362 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1363 nigel 3 if (count < 0) fprintf(outfile,
1364 nigel 43 "Error %d from pcre_info()\n", count);
1365 nigel 3 else
1366     {
1367 nigel 43 if (old_count != count) fprintf(outfile,
1368     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1369     old_count);
1370 nigel 37
1371 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1372     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1373     first_char, old_first_char);
1374 nigel 37
1375 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1376     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1377     get_options, old_options);
1378 nigel 43 }
1379 nigel 79 #endif
1380 nigel 43
1381 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1382 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1383 nigel 77 (int)size, (int)regex_gotten_store);
1384 nigel 43
1385     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1386     if (backrefmax > 0)
1387     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1388 nigel 63
1389     if (namecount > 0)
1390     {
1391     fprintf(outfile, "Named capturing subpatterns:\n");
1392     while (namecount-- > 0)
1393     {
1394     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1395     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1396     GET2(nametable, 0));
1397     nametable += nameentrysize;
1398     }
1399     }
1400    
1401 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1402     to fish it out via out back door */
1403    
1404     all_options = ((real_pcre *)re)->options;
1405     if (do_flip)
1406     {
1407     all_options = byteflip(all_options, sizeof(all_options));
1408 nigel 91 }
1409 nigel 75
1410     if ((all_options & PCRE_NOPARTIAL) != 0)
1411     fprintf(outfile, "Partial matching not supported\n");
1412    
1413 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1414 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1415 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1416     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1417     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1418     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1419 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1420 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1421     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1422     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1423     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1424 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1425 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1426 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1427     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1428 nigel 43
1429 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1430 nigel 91 {
1431     case PCRE_NEWLINE_CR:
1432     fprintf(outfile, "Forced newline sequence: CR\n");
1433     break;
1434 nigel 43
1435 nigel 91 case PCRE_NEWLINE_LF:
1436     fprintf(outfile, "Forced newline sequence: LF\n");
1437     break;
1438    
1439     case PCRE_NEWLINE_CRLF:
1440     fprintf(outfile, "Forced newline sequence: CRLF\n");
1441     break;
1442    
1443 nigel 93 case PCRE_NEWLINE_ANY:
1444     fprintf(outfile, "Forced newline sequence: ANY\n");
1445     break;
1446    
1447 nigel 91 default:
1448     break;
1449     }
1450    
1451 nigel 43 if (first_char == -1)
1452     {
1453 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1454 nigel 43 }
1455     else if (first_char < 0)
1456     {
1457     fprintf(outfile, "No first char\n");
1458     }
1459     else
1460     {
1461 nigel 63 int ch = first_char & 255;
1462 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1463 nigel 63 "" : " (caseless)";
1464 nigel 93 if (PRINTHEX(ch))
1465 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1466 nigel 3 else
1467 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1468 nigel 43 }
1469 nigel 37
1470 nigel 43 if (need_char < 0)
1471     {
1472     fprintf(outfile, "No need char\n");
1473 nigel 3 }
1474 nigel 43 else
1475     {
1476 nigel 63 int ch = need_char & 255;
1477 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1478 nigel 63 "" : " (caseless)";
1479 nigel 93 if (PRINTHEX(ch))
1480 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1481 nigel 43 else
1482 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1483 nigel 43 }
1484 nigel 75
1485     /* Don't output study size; at present it is in any case a fixed
1486     value, but it varies, depending on the computer architecture, and
1487     so messes up the test suite. (And with the /F option, it might be
1488     flipped.) */
1489    
1490     if (do_study)
1491     {
1492     if (extra == NULL)
1493     fprintf(outfile, "Study returned NULL\n");
1494     else
1495     {
1496     uschar *start_bits = NULL;
1497     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1498    
1499     if (start_bits == NULL)
1500     fprintf(outfile, "No starting byte set\n");
1501     else
1502     {
1503     int i;
1504     int c = 24;
1505     fprintf(outfile, "Starting byte set: ");
1506     for (i = 0; i < 256; i++)
1507     {
1508     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1509     {
1510     if (c > 75)
1511     {
1512     fprintf(outfile, "\n ");
1513     c = 2;
1514     }
1515 nigel 93 if (PRINTHEX(i) && i != ' ')
1516 nigel 75 {
1517     fprintf(outfile, "%c ", i);
1518     c += 2;
1519     }
1520     else
1521     {
1522     fprintf(outfile, "\\x%02x ", i);
1523     c += 5;
1524     }
1525     }
1526     }
1527     fprintf(outfile, "\n");
1528     }
1529     }
1530     }
1531 nigel 3 }
1532    
1533 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1534     that is all. The first 8 bytes of the file are the regex length and then
1535     the study length, in big-endian order. */
1536 nigel 3
1537 nigel 75 if (to_file != NULL)
1538 nigel 3 {
1539 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1540     if (f == NULL)
1541 nigel 3 {
1542 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1543 nigel 3 }
1544 nigel 75 else
1545     {
1546     uschar sbuf[8];
1547     sbuf[0] = (true_size >> 24) & 255;
1548     sbuf[1] = (true_size >> 16) & 255;
1549     sbuf[2] = (true_size >> 8) & 255;
1550     sbuf[3] = (true_size) & 255;
1551 nigel 3
1552 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1553     sbuf[5] = (true_study_size >> 16) & 255;
1554     sbuf[6] = (true_study_size >> 8) & 255;
1555     sbuf[7] = (true_study_size) & 255;
1556 nigel 3
1557 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1558     fwrite(re, 1, true_size, f) < true_size)
1559     {
1560     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1561     }
1562 nigel 3 else
1563     {
1564 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1565     if (extra != NULL)
1566 nigel 3 {
1567 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1568     true_study_size)
1569 nigel 3 {
1570 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1571     strerror(errno));
1572 nigel 3 }
1573 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1574 nigel 93
1575 nigel 3 }
1576     }
1577 nigel 75 fclose(f);
1578 nigel 3 }
1579 nigel 77
1580     new_free(re);
1581     if (extra != NULL) new_free(extra);
1582     if (tables != NULL) new_free((void *)tables);
1583 nigel 75 continue; /* With next regex */
1584 nigel 3 }
1585 nigel 75 } /* End of non-POSIX compile */
1586 nigel 3
1587     /* Read data lines and test them */
1588    
1589     for (;;)
1590     {
1591 nigel 87 uschar *q;
1592     uschar *bptr = dbuffer;
1593 nigel 57 int *use_offsets = offsets;
1594 nigel 53 int use_size_offsets = size_offsets;
1595 nigel 63 int callout_data = 0;
1596     int callout_data_set = 0;
1597 nigel 3 int count, c;
1598 nigel 29 int copystrings = 0;
1599 nigel 63 int find_match_limit = 0;
1600 nigel 29 int getstrings = 0;
1601     int getlist = 0;
1602 nigel 39 int gmatched = 0;
1603 nigel 35 int start_offset = 0;
1604 nigel 41 int g_notempty = 0;
1605 nigel 77 int use_dfa = 0;
1606 nigel 3
1607     options = 0;
1608    
1609 nigel 91 *copynames = 0;
1610     *getnames = 0;
1611    
1612     copynamesptr = copynames;
1613     getnamesptr = getnames;
1614    
1615 nigel 63 pcre_callout = callout;
1616     first_callout = 1;
1617     callout_extra = 0;
1618     callout_count = 0;
1619     callout_fail_count = 999999;
1620     callout_fail_id = -1;
1621 nigel 73 show_malloc = 0;
1622 nigel 63
1623 nigel 91 if (extra != NULL) extra->flags &=
1624     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1625    
1626     len = 0;
1627     for (;;)
1628 nigel 11 {
1629 nigel 91 if (infile == stdin) printf("data> ");
1630     if (extend_inputline(infile, buffer + len) == NULL)
1631     {
1632     if (len > 0) break;
1633     done = 1;
1634     goto CONTINUE;
1635     }
1636     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1637     len = (int)strlen((char *)buffer);
1638     if (buffer[len-1] == '\n') break;
1639 nigel 11 }
1640 nigel 3
1641     while (len > 0 && isspace(buffer[len-1])) len--;
1642     buffer[len] = 0;
1643     if (len == 0) break;
1644    
1645     p = buffer;
1646     while (isspace(*p)) p++;
1647    
1648 nigel 9 q = dbuffer;
1649 nigel 3 while ((c = *p++) != 0)
1650     {
1651     int i = 0;
1652     int n = 0;
1653 nigel 63
1654 nigel 3 if (c == '\\') switch ((c = *p++))
1655     {
1656     case 'a': c = 7; break;
1657     case 'b': c = '\b'; break;
1658     case 'e': c = 27; break;
1659     case 'f': c = '\f'; break;
1660     case 'n': c = '\n'; break;
1661     case 'r': c = '\r'; break;
1662     case 't': c = '\t'; break;
1663     case 'v': c = '\v'; break;
1664    
1665     case '0': case '1': case '2': case '3':
1666     case '4': case '5': case '6': case '7':
1667     c -= '0';
1668     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1669     c = c * 8 + *p++ - '0';
1670 nigel 91
1671     #if !defined NOUTF8
1672     if (use_utf8 && c > 255)
1673     {
1674     unsigned char buff8[8];
1675     int ii, utn;
1676     utn = ord2utf8(c, buff8);
1677     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1678     c = buff8[ii]; /* Last byte */
1679     }
1680     #endif
1681 nigel 3 break;
1682    
1683     case 'x':
1684 nigel 49
1685     /* Handle \x{..} specially - new Perl thing for utf8 */
1686    
1687 nigel 79 #if !defined NOUTF8
1688 nigel 49 if (*p == '{')
1689     {
1690     unsigned char *pt = p;
1691     c = 0;
1692     while (isxdigit(*(++pt)))
1693     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1694     if (*pt == '}')
1695     {
1696 nigel 67 unsigned char buff8[8];
1697 nigel 49 int ii, utn;
1698 nigel 85 utn = ord2utf8(c, buff8);
1699 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1700     c = buff8[ii]; /* Last byte */
1701 nigel 49 p = pt + 1;
1702     break;
1703     }
1704     /* Not correct form; fall through */
1705     }
1706 nigel 79 #endif
1707 nigel 49
1708     /* Ordinary \x */
1709    
1710 nigel 3 c = 0;
1711     while (i++ < 2 && isxdigit(*p))
1712     {
1713     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1714     p++;
1715     }
1716     break;
1717    
1718 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1719 nigel 3 p--;
1720     continue;
1721    
1722 nigel 75 case '>':
1723     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1724     continue;
1725    
1726 nigel 3 case 'A': /* Option setting */
1727     options |= PCRE_ANCHORED;
1728     continue;
1729    
1730     case 'B':
1731     options |= PCRE_NOTBOL;
1732     continue;
1733    
1734 nigel 29 case 'C':
1735 nigel 63 if (isdigit(*p)) /* Set copy string */
1736     {
1737     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1738     copystrings |= 1 << n;
1739     }
1740     else if (isalnum(*p))
1741     {
1742 nigel 91 uschar *npp = copynamesptr;
1743 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1744 nigel 91 *npp++ = 0;
1745 nigel 67 *npp = 0;
1746 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1747 nigel 63 if (n < 0)
1748 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1749     copynamesptr = npp;
1750 nigel 63 }
1751     else if (*p == '+')
1752     {
1753     callout_extra = 1;
1754     p++;
1755     }
1756     else if (*p == '-')
1757     {
1758     pcre_callout = NULL;
1759     p++;
1760     }
1761     else if (*p == '!')
1762     {
1763     callout_fail_id = 0;
1764     p++;
1765     while(isdigit(*p))
1766     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1767     callout_fail_count = 0;
1768     if (*p == '!')
1769     {
1770     p++;
1771     while(isdigit(*p))
1772     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1773     }
1774     }
1775     else if (*p == '*')
1776     {
1777     int sign = 1;
1778     callout_data = 0;
1779     if (*(++p) == '-') { sign = -1; p++; }
1780     while(isdigit(*p))
1781     callout_data = callout_data * 10 + *p++ - '0';
1782     callout_data *= sign;
1783     callout_data_set = 1;
1784     }
1785 nigel 29 continue;
1786    
1787 nigel 79 #if !defined NODFA
1788 nigel 77 case 'D':
1789 nigel 79 #if !defined NOPOSIX
1790 nigel 77 if (posix || do_posix)
1791     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1792     else
1793 nigel 79 #endif
1794 nigel 77 use_dfa = 1;
1795     continue;
1796    
1797     case 'F':
1798     options |= PCRE_DFA_SHORTEST;
1799     continue;
1800 nigel 79 #endif
1801 nigel 77
1802 nigel 29 case 'G':
1803 nigel 63 if (isdigit(*p))
1804     {
1805     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1806     getstrings |= 1 << n;
1807     }
1808     else if (isalnum(*p))
1809     {
1810 nigel 91 uschar *npp = getnamesptr;
1811 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1812 nigel 91 *npp++ = 0;
1813 nigel 67 *npp = 0;
1814 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1815 nigel 63 if (n < 0)
1816 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1817     getnamesptr = npp;
1818 nigel 63 }
1819 nigel 29 continue;
1820    
1821     case 'L':
1822     getlist = 1;
1823     continue;
1824    
1825 nigel 63 case 'M':
1826     find_match_limit = 1;
1827     continue;
1828    
1829 nigel 37 case 'N':
1830     options |= PCRE_NOTEMPTY;
1831     continue;
1832    
1833 nigel 3 case 'O':
1834     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1835 nigel 53 if (n > size_offsets_max)
1836     {
1837     size_offsets_max = n;
1838 nigel 57 free(offsets);
1839 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1840 nigel 53 if (offsets == NULL)
1841     {
1842     printf("** Failed to get %d bytes of memory for offsets vector\n",
1843     size_offsets_max * sizeof(int));
1844 nigel 77 yield = 1;
1845     goto EXIT;
1846 nigel 53 }
1847     }
1848     use_size_offsets = n;
1849 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1850 nigel 3 continue;
1851    
1852 nigel 75 case 'P':
1853     options |= PCRE_PARTIAL;
1854     continue;
1855    
1856 nigel 91 case 'Q':
1857     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1858     if (extra == NULL)
1859     {
1860     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1861     extra->flags = 0;
1862     }
1863     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1864     extra->match_limit_recursion = n;
1865     continue;
1866    
1867     case 'q':
1868     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1869     if (extra == NULL)
1870     {
1871     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1872     extra->flags = 0;
1873     }
1874     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1875     extra->match_limit = n;
1876     continue;
1877    
1878 nigel 79 #if !defined NODFA
1879 nigel 77 case 'R':
1880     options |= PCRE_DFA_RESTART;
1881     continue;
1882 nigel 79 #endif
1883 nigel 77
1884 nigel 73 case 'S':
1885     show_malloc = 1;
1886     continue;
1887    
1888 nigel 3 case 'Z':
1889     options |= PCRE_NOTEOL;
1890     continue;
1891 nigel 71
1892     case '?':
1893     options |= PCRE_NO_UTF8_CHECK;
1894     continue;
1895 nigel 91
1896     case '<':
1897     {
1898     int x = check_newline(p, outfile);
1899     if (x == 0) goto NEXT_DATA;
1900     options |= x;
1901     while (*p++ != '>');
1902     }
1903     continue;
1904 nigel 3 }
1905 nigel 9 *q++ = c;
1906 nigel 3 }
1907 nigel 9 *q = 0;
1908     len = q - dbuffer;
1909 nigel 3
1910 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1911     {
1912     printf("**Match limit not relevant for DFA matching: ignored\n");
1913     find_match_limit = 0;
1914     }
1915    
1916 nigel 3 /* Handle matching via the POSIX interface, which does not
1917 nigel 63 support timing or playing with the match limit or callout data. */
1918 nigel 3
1919 nigel 37 #if !defined NOPOSIX
1920 nigel 3 if (posix || do_posix)
1921     {
1922     int rc;
1923     int eflags = 0;
1924 nigel 63 regmatch_t *pmatch = NULL;
1925     if (use_size_offsets > 0)
1926 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1927 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1928     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1929    
1930 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1931 nigel 3
1932     if (rc != 0)
1933     {
1934 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1935 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1936     }
1937 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1938     != 0)
1939     {
1940     fprintf(outfile, "Matched with REG_NOSUB\n");
1941     }
1942 nigel 3 else
1943     {
1944 nigel 7 size_t i;
1945 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1946 nigel 3 {
1947     if (pmatch[i].rm_so >= 0)
1948     {
1949 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1950 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1951     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1952 nigel 3 fprintf(outfile, "\n");
1953 nigel 35 if (i == 0 && do_showrest)
1954     {
1955     fprintf(outfile, " 0+ ");
1956 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1957     outfile);
1958 nigel 35 fprintf(outfile, "\n");
1959     }
1960 nigel 3 }
1961     }
1962     }
1963 nigel 53 free(pmatch);
1964 nigel 3 }
1965    
1966 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1967 nigel 3
1968 nigel 37 else
1969     #endif /* !defined NOPOSIX */
1970    
1971 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1972 nigel 3 {
1973 nigel 93 if (timeitm > 0)
1974 nigel 3 {
1975     register int i;
1976     clock_t time_taken;
1977     clock_t start_time = clock();
1978 nigel 77
1979 nigel 79 #if !defined NODFA
1980 nigel 77 if (all_use_dfa || use_dfa)
1981     {
1982     int workspace[1000];
1983 nigel 93 for (i = 0; i < timeitm; i++)
1984 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1985     options | g_notempty, use_offsets, use_size_offsets, workspace,
1986     sizeof(workspace)/sizeof(int));
1987     }
1988     else
1989 nigel 79 #endif
1990 nigel 77
1991 nigel 93 for (i = 0; i < timeitm; i++)
1992 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1993 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1994 nigel 77
1995 nigel 3 time_taken = clock() - start_time;
1996 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
1997     (((double)time_taken * 1000.0) / (double)timeitm) /
1998 nigel 63 (double)CLOCKS_PER_SEC);
1999 nigel 3 }
2000    
2001 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2002 nigel 87 varying limits in order to find the minimum value for the match limit and
2003     for the recursion limit. */
2004 nigel 63
2005     if (find_match_limit)
2006     {
2007     if (extra == NULL)
2008     {
2009 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2010 nigel 63 extra->flags = 0;
2011     }
2012    
2013 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2014 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2015     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2016     PCRE_ERROR_MATCHLIMIT, "match()");
2017 nigel 63
2018 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2019     options|g_notempty, use_offsets, use_size_offsets,
2020     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2021     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2022 nigel 63 }
2023    
2024     /* If callout_data is set, use the interface with additional data */
2025    
2026     else if (callout_data_set)
2027     {
2028     if (extra == NULL)
2029     {
2030 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2031 nigel 63 extra->flags = 0;
2032     }
2033     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2034 nigel 71 extra->callout_data = &callout_data;
2035 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2036     options | g_notempty, use_offsets, use_size_offsets);
2037     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2038     }
2039    
2040     /* The normal case is just to do the match once, with the default
2041     value of match_limit. */
2042    
2043 nigel 79 #if !defined NODFA
2044 nigel 77 else if (all_use_dfa || use_dfa)
2045     {
2046     int workspace[1000];
2047     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2048     options | g_notempty, use_offsets, use_size_offsets, workspace,
2049     sizeof(workspace)/sizeof(int));
2050     if (count == 0)
2051     {
2052     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2053     count = use_size_offsets/2;
2054     }
2055     }
2056 nigel 79 #endif
2057 nigel 77
2058 nigel 75 else
2059     {
2060     count = pcre_exec(re, extra, (char *)bptr, len,
2061     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2062 nigel 77 if (count == 0)
2063     {
2064     fprintf(outfile, "Matched, but too many substrings\n");
2065     count = use_size_offsets/3;
2066     }
2067 nigel 75 }
2068 nigel 3
2069 nigel 39 /* Matched */
2070    
2071 nigel 3 if (count >= 0)
2072     {
2073 nigel 93 int i, maxcount;
2074    
2075     #if !defined NODFA
2076     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2077     #endif
2078     maxcount = use_size_offsets/3;
2079    
2080     /* This is a check against a lunatic return value. */
2081    
2082     if (count > maxcount)
2083     {
2084     fprintf(outfile,
2085     "** PCRE error: returned count %d is too big for offset size %d\n",
2086     count, use_size_offsets);
2087     count = use_size_offsets/3;
2088     if (do_g || do_G)
2089     {
2090     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2091     do_g = do_G = FALSE; /* Break g/G loop */
2092     }
2093     }
2094    
2095 nigel 29 for (i = 0; i < count * 2; i += 2)
2096 nigel 3 {
2097 nigel 57 if (use_offsets[i] < 0)
2098 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2099     else
2100     {
2101     fprintf(outfile, "%2d: ", i/2);
2102 nigel 63 (void)pchars(bptr + use_offsets[i],
2103     use_offsets[i+1] - use_offsets[i], outfile);
2104 nigel 3 fprintf(outfile, "\n");
2105 nigel 35 if (i == 0)
2106     {
2107     if (do_showrest)
2108     {
2109     fprintf(outfile, " 0+ ");
2110 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2111     outfile);
2112 nigel 35 fprintf(outfile, "\n");
2113     }
2114     }
2115 nigel 3 }
2116     }
2117 nigel 29
2118     for (i = 0; i < 32; i++)
2119     {
2120     if ((copystrings & (1 << i)) != 0)
2121     {
2122 nigel 91 char copybuffer[256];
2123 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2124 nigel 37 i, copybuffer, sizeof(copybuffer));
2125 nigel 29 if (rc < 0)
2126     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2127     else
2128 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2129 nigel 29 }
2130     }
2131    
2132 nigel 91 for (copynamesptr = copynames;
2133     *copynamesptr != 0;
2134     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2135     {
2136     char copybuffer[256];
2137     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2138     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2139     if (rc < 0)
2140     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2141     else
2142     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2143     }
2144    
2145 nigel 29 for (i = 0; i < 32; i++)
2146     {
2147     if ((getstrings & (1 << i)) != 0)
2148     {
2149     const char *substring;
2150 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2151 nigel 29 i, &substring);
2152     if (rc < 0)
2153     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2154     else
2155     {
2156     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2157 nigel 49 pcre_free_substring(substring);
2158 nigel 29 }
2159     }
2160     }
2161    
2162 nigel 91 for (getnamesptr = getnames;
2163     *getnamesptr != 0;
2164     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2165     {
2166     const char *substring;
2167     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2168     count, (char *)getnamesptr, &substring);
2169     if (rc < 0)
2170     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2171     else
2172     {
2173     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2174     pcre_free_substring(substring);
2175     }
2176     }
2177    
2178 nigel 29 if (getlist)
2179     {
2180     const char **stringlist;
2181 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2182 nigel 29 &stringlist);
2183     if (rc < 0)
2184     fprintf(outfile, "get substring list failed %d\n", rc);
2185     else
2186     {
2187     for (i = 0; i < count; i++)
2188     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2189     if (stringlist[i] != NULL)
2190     fprintf(outfile, "string list not terminated by NULL\n");
2191 nigel 49 /* free((void *)stringlist); */
2192     pcre_free_substring_list(stringlist);
2193 nigel 29 }
2194     }
2195 nigel 39 }
2196 nigel 29
2197 nigel 75 /* There was a partial match */
2198    
2199     else if (count == PCRE_ERROR_PARTIAL)
2200     {
2201 nigel 77 fprintf(outfile, "Partial match");
2202 nigel 79 #if !defined NODFA
2203 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2204     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2205     bptr + use_offsets[0]);
2206 nigel 79 #endif
2207 nigel 77 fprintf(outfile, "\n");
2208 nigel 75 break; /* Out of the /g loop */
2209     }
2210    
2211 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2212 nigel 47 g_notempty after a null match, this is not necessarily the end.
2213 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
2214     matching, the advance must be one character, not one byte. Fudge the
2215     offset values to achieve this. We won't be at the end of the string -
2216     that was checked before setting g_notempty. */
2217 nigel 39
2218 nigel 3 else
2219     {
2220 nigel 41 if (g_notempty != 0)
2221 nigel 35 {
2222 nigel 73 int onechar = 1;
2223 nigel 57 use_offsets[0] = start_offset;
2224 nigel 73 if (use_utf8)
2225     {
2226     while (start_offset + onechar < len)
2227     {
2228     int tb = bptr[start_offset+onechar];
2229     if (tb <= 127) break;
2230     tb &= 0xc0;
2231     if (tb != 0 && tb != 0xc0) onechar++;
2232     }
2233     }
2234     use_offsets[1] = start_offset + onechar;
2235 nigel 35 }
2236 nigel 41 else
2237     {
2238 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2239 nigel 41 {
2240 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2241 nigel 41 }
2242 nigel 73 else fprintf(outfile, "Error %d\n", count);
2243 nigel 41 break; /* Out of the /g loop */
2244     }
2245 nigel 3 }
2246 nigel 35
2247 nigel 39 /* If not /g or /G we are done */
2248    
2249     if (!do_g && !do_G) break;
2250    
2251 nigel 41 /* If we have matched an empty string, first check to see if we are at
2252     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2253     what Perl's /g options does. This turns out to be rather cunning. First
2254 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2255     same point. If this fails (picked up above) we advance to the next
2256     character. */
2257 nigel 39
2258 nigel 41 g_notempty = 0;
2259 nigel 57 if (use_offsets[0] == use_offsets[1])
2260 nigel 41 {
2261 nigel 57 if (use_offsets[0] == len) break;
2262 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2263 nigel 41 }
2264 nigel 39
2265     /* For /g, update the start offset, leaving the rest alone */
2266    
2267 nigel 57 if (do_g) start_offset = use_offsets[1];
2268 nigel 39
2269     /* For /G, update the pointer and length */
2270    
2271     else
2272 nigel 35 {
2273 nigel 57 bptr += use_offsets[1];
2274     len -= use_offsets[1];
2275 nigel 35 }
2276 nigel 39 } /* End of loop for /g and /G */
2277 nigel 91
2278     NEXT_DATA: continue;
2279 nigel 39 } /* End of loop for data lines */
2280 nigel 3
2281 nigel 11 CONTINUE:
2282 nigel 37
2283     #if !defined NOPOSIX
2284 nigel 3 if (posix || do_posix) regfree(&preg);
2285 nigel 37 #endif
2286    
2287 nigel 77 if (re != NULL) new_free(re);
2288     if (extra != NULL) new_free(extra);
2289 nigel 25 if (tables != NULL)
2290     {
2291 nigel 77 new_free((void *)tables);
2292 nigel 25 setlocale(LC_CTYPE, "C");
2293 nigel 93 locale_set = 0;
2294 nigel 25 }
2295 nigel 3 }
2296    
2297 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2298 nigel 77
2299     EXIT:
2300    
2301     if (infile != NULL && infile != stdin) fclose(infile);
2302     if (outfile != NULL && outfile != stdout) fclose(outfile);
2303    
2304     free(buffer);
2305     free(dbuffer);
2306     free(pbuffer);
2307     free(offsets);
2308    
2309     return yield;
2310 nigel 3 }
2311    
2312 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12