/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 210 - (hide annotations) (download)
Wed Aug 8 14:24:50 2007 UTC (7 years ago) by ph10
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 68692 byte(s)
Add Perl 5.10's backtracking verbs.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 199 #include <config.h>
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97     #define _pcre_OP_lengths OP_lengths
98    
99     #include "pcre_tables.c"
100    
101     /* We also need the pcre_printint() function for printing out compiled
102     patterns. This function is in a separate file so that it can be included in
103 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
104 nigel 85
105 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
106     output character as-is or as a hex value when showing compiled patterns, is
107     contained in this file. We uses it here also, in cases when the locale has not
108     been explicitly changed, so as to get consistent output from systems that
109     differ in their output from isprint() even in the "C" locale. */
110    
111 nigel 85 #include "pcre_printint.src"
112    
113 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114 nigel 85
115 nigel 93
116 nigel 37 /* It is possible to compile this test program without including support for
117     testing the POSIX interface, though this is not available via the standard
118     Makefile. */
119    
120     #if !defined NOPOSIX
121 nigel 3 #include "pcreposix.h"
122 nigel 37 #endif
123 nigel 3
124 ph10 107 /* It is also possible, for the benefit of the version currently imported into
125     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126     interface to the DFA matcher (NODFA), and without the doublecheck of the old
127     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128     UTF8 support if PCRE is built without it. */
129 nigel 79
130 ph10 107 #ifndef SUPPORT_UTF8
131     #ifndef NOUTF8
132     #define NOUTF8
133     #endif
134     #endif
135 nigel 79
136 ph10 107
137 nigel 85 /* Other parameters */
138    
139 nigel 3 #ifndef CLOCKS_PER_SEC
140     #ifdef CLK_TCK
141     #define CLOCKS_PER_SEC CLK_TCK
142     #else
143     #define CLOCKS_PER_SEC 100
144     #endif
145     #endif
146    
147 nigel 93 /* This is the default loop count for timing. */
148    
149 nigel 75 #define LOOPREPEAT 500000
150 nigel 3
151 nigel 85 /* Static variables */
152    
153 nigel 3 static FILE *outfile;
154     static int log_store = 0;
155 nigel 63 static int callout_count;
156     static int callout_extra;
157     static int callout_fail_count;
158     static int callout_fail_id;
159 ph10 210 static int debug_lengths;
160 nigel 63 static int first_callout;
161 nigel 93 static int locale_set = 0;
162 nigel 73 static int show_malloc;
163 nigel 67 static int use_utf8;
164 nigel 43 static size_t gotten_store;
165 nigel 3
166 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
167    
168     static int buffer_size = 50000;
169     static uschar *buffer = NULL;
170     static uschar *dbuffer = NULL;
171 nigel 75 static uschar *pbuffer = NULL;
172 nigel 3
173 nigel 75
174 nigel 49
175     /*************************************************
176 nigel 91 * Read or extend an input line *
177     *************************************************/
178    
179     /* Input lines are read into buffer, but both patterns and data lines can be
180     continued over multiple input lines. In addition, if the buffer fills up, we
181     want to automatically expand it so as to be able to handle extremely large
182     lines that are needed for certain stress tests. When the input buffer is
183     expanded, the other two buffers must also be expanded likewise, and the
184     contents of pbuffer, which are a copy of the input for callouts, must be
185     preserved (for when expansion happens for a data line). This is not the most
186     optimal way of handling this, but hey, this is just a test program!
187    
188     Arguments:
189     f the file to read
190     start where in buffer to start (this *must* be within buffer)
191    
192     Returns: pointer to the start of new data
193     could be a copy of start, or could be moved
194     NULL if no data read and EOF reached
195     */
196    
197     static uschar *
198     extend_inputline(FILE *f, uschar *start)
199     {
200     uschar *here = start;
201    
202     for (;;)
203     {
204     int rlen = buffer_size - (here - buffer);
205 nigel 93
206 nigel 91 if (rlen > 1000)
207     {
208     int dlen;
209     if (fgets((char *)here, rlen, f) == NULL)
210     return (here == start)? NULL : start;
211     dlen = (int)strlen((char *)here);
212     if (dlen > 0 && here[dlen - 1] == '\n') return start;
213     here += dlen;
214     }
215    
216     else
217     {
218     int new_buffer_size = 2*buffer_size;
219     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222    
223     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224     {
225     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226     exit(1);
227     }
228    
229     memcpy(new_buffer, buffer, buffer_size);
230     memcpy(new_pbuffer, pbuffer, buffer_size);
231    
232     buffer_size = new_buffer_size;
233    
234     start = new_buffer + (start - buffer);
235     here = new_buffer + (here - buffer);
236    
237     free(buffer);
238     free(dbuffer);
239     free(pbuffer);
240    
241     buffer = new_buffer;
242     dbuffer = new_dbuffer;
243     pbuffer = new_pbuffer;
244     }
245     }
246    
247     return NULL; /* Control never gets here */
248     }
249    
250    
251    
252    
253    
254    
255    
256     /*************************************************
257 nigel 63 * Read number from string *
258     *************************************************/
259    
260     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261     around with conditional compilation, just do the job by hand. It is only used
262 nigel 93 for unpicking arguments, so just keep it simple.
263 nigel 63
264     Arguments:
265     str string to be converted
266     endptr where to put the end pointer
267    
268     Returns: the unsigned long
269     */
270    
271     static int
272     get_value(unsigned char *str, unsigned char **endptr)
273     {
274     int result = 0;
275     while(*str != 0 && isspace(*str)) str++;
276     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277     *endptr = str;
278     return(result);
279     }
280    
281    
282    
283 nigel 49
284     /*************************************************
285     * Convert UTF-8 string to value *
286     *************************************************/
287    
288     /* This function takes one or more bytes that represents a UTF-8 character,
289     and returns the value of the character.
290    
291     Argument:
292 nigel 91 utf8bytes a pointer to the byte vector
293     vptr a pointer to an int to receive the value
294 nigel 49
295 nigel 91 Returns: > 0 => the number of bytes consumed
296     -6 to 0 => malformed UTF-8 character at offset = (-return)
297 nigel 49 */
298    
299 nigel 79 #if !defined NOUTF8
300    
301 nigel 67 static int
302 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
303 nigel 49 {
304 nigel 91 int c = *utf8bytes++;
305 nigel 49 int d = c;
306     int i, j, s;
307    
308     for (i = -1; i < 6; i++) /* i is number of additional bytes */
309     {
310     if ((d & 0x80) == 0) break;
311     d <<= 1;
312     }
313    
314     if (i == -1) { *vptr = c; return 1; } /* ascii character */
315     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316    
317     /* i now has a value in the range 1-5 */
318    
319 nigel 59 s = 6*i;
320 nigel 85 d = (c & utf8_table3[i]) << s;
321 nigel 49
322     for (j = 0; j < i; j++)
323     {
324 nigel 91 c = *utf8bytes++;
325 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
326 nigel 59 s -= 6;
327 nigel 49 d |= (c & 0x3f) << s;
328     }
329    
330     /* Check that encoding was the correct unique one */
331    
332 nigel 85 for (j = 0; j < utf8_table1_size; j++)
333     if (d <= utf8_table1[j]) break;
334 nigel 49 if (j != i) return -(i+1);
335    
336     /* Valid value */
337    
338     *vptr = d;
339     return i+1;
340     }
341    
342 nigel 79 #endif
343 nigel 49
344    
345 nigel 79
346 nigel 63 /*************************************************
347 nigel 85 * Convert character value to UTF-8 *
348     *************************************************/
349    
350     /* This function takes an integer value in the range 0 - 0x7fffffff
351     and encodes it as a UTF-8 character in 0 to 6 bytes.
352    
353     Arguments:
354     cvalue the character value
355 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
356 nigel 85
357     Returns: number of characters placed in the buffer
358     */
359    
360 nigel 93 #if !defined NOUTF8
361    
362 nigel 85 static int
363 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
364 nigel 85 {
365     register int i, j;
366     for (i = 0; i < utf8_table1_size; i++)
367     if (cvalue <= utf8_table1[i]) break;
368 nigel 91 utf8bytes += i;
369 nigel 85 for (j = i; j > 0; j--)
370     {
371 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 nigel 85 cvalue >>= 6;
373     }
374 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
375 nigel 85 return i + 1;
376     }
377    
378 nigel 93 #endif
379 nigel 85
380    
381 nigel 93
382 nigel 85 /*************************************************
383 nigel 63 * Print character string *
384     *************************************************/
385 nigel 49
386 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
387     mode. Yields number of characters printed. If handed a NULL file, just counts
388     chars without printing. */
389 nigel 49
390 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
391 nigel 3 {
392 nigel 85 int c = 0;
393 nigel 63 int yield = 0;
394 nigel 3
395 nigel 63 while (length-- > 0)
396 nigel 3 {
397 nigel 79 #if !defined NOUTF8
398 nigel 67 if (use_utf8)
399 nigel 63 {
400     int rc = utf82ord(p, &c);
401 nigel 3
402 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403     {
404     length -= rc - 1;
405     p += rc;
406 nigel 93 if (PRINTHEX(c))
407 nigel 63 {
408     if (f != NULL) fprintf(f, "%c", c);
409     yield++;
410     }
411     else
412     {
413 nigel 93 int n = 4;
414     if (f != NULL) fprintf(f, "\\x{%02x}", c);
415     yield += (n <= 0x000000ff)? 2 :
416     (n <= 0x00000fff)? 3 :
417     (n <= 0x0000ffff)? 4 :
418     (n <= 0x000fffff)? 5 : 6;
419 nigel 63 }
420     continue;
421     }
422     }
423 nigel 79 #endif
424 nigel 3
425 nigel 63 /* Not UTF-8, or malformed UTF-8 */
426    
427 nigel 93 c = *p++;
428     if (PRINTHEX(c))
429 nigel 3 {
430 nigel 63 if (f != NULL) fprintf(f, "%c", c);
431     yield++;
432 nigel 3 }
433 nigel 63 else
434 nigel 3 {
435 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
436     yield += 4;
437     }
438     }
439 nigel 3
440 nigel 63 return yield;
441     }
442 nigel 23
443 nigel 3
444 nigel 23
445 nigel 63 /*************************************************
446     * Callout function *
447     *************************************************/
448 nigel 3
449 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450     the match. Yield zero unless more callouts than the fail count, or the callout
451     data is not zero. */
452 nigel 3
453 nigel 63 static int callout(pcre_callout_block *cb)
454     {
455     FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 nigel 75 int i, pre_start, post_start, subject_length;
457 nigel 3
458 nigel 63 if (callout_extra)
459     {
460     fprintf(f, "Callout %d: last capture = %d\n",
461     cb->callout_number, cb->capture_last);
462 nigel 3
463 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
464     {
465     if (cb->offset_vector[i] < 0)
466     fprintf(f, "%2d: <unset>\n", i/2);
467     else
468     {
469     fprintf(f, "%2d: ", i/2);
470     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471     cb->offset_vector[i+1] - cb->offset_vector[i], f);
472     fprintf(f, "\n");
473     }
474     }
475     }
476 nigel 3
477 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
478     datails. On subsequent calls in the same match, we use pchars just to find the
479     printed lengths of the substrings. */
480 nigel 3
481 nigel 63 if (f != NULL) fprintf(f, "--->");
482 nigel 3
483 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485     cb->current_position - cb->start_match, f);
486 nigel 3
487 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488    
489 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490     cb->subject_length - cb->current_position, f);
491 nigel 3
492 nigel 63 if (f != NULL) fprintf(f, "\n");
493 nigel 9
494 nigel 63 /* Always print appropriate indicators, with callout number if not already
495 nigel 75 shown. For automatic callouts, show the pattern offset. */
496 nigel 3
497 nigel 75 if (cb->callout_number == 255)
498     {
499     fprintf(outfile, "%+3d ", cb->pattern_position);
500     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501     }
502     else
503     {
504     if (callout_extra) fprintf(outfile, " ");
505     else fprintf(outfile, "%3d ", cb->callout_number);
506     }
507 nigel 3
508 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509     fprintf(outfile, "^");
510 nigel 3
511 nigel 63 if (post_start > 0)
512     {
513     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514     fprintf(outfile, "^");
515 nigel 3 }
516    
517 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518     fprintf(outfile, " ");
519    
520     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521     pbuffer + cb->pattern_position);
522    
523 nigel 63 fprintf(outfile, "\n");
524     first_callout = 0;
525 nigel 3
526 nigel 71 if (cb->callout_data != NULL)
527 nigel 49 {
528 nigel 71 int callout_data = *((int *)(cb->callout_data));
529     if (callout_data != 0)
530     {
531     fprintf(outfile, "Callout data = %d\n", callout_data);
532     return callout_data;
533     }
534 nigel 63 }
535 nigel 49
536 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
537     (++callout_count >= callout_fail_count)? 1 : 0;
538 nigel 3 }
539    
540    
541 nigel 63 /*************************************************
542 nigel 73 * Local malloc functions *
543 nigel 63 *************************************************/
544 nigel 3
545     /* Alternative malloc function, to test functionality and show the size of the
546     compiled re. */
547    
548     static void *new_malloc(size_t size)
549     {
550 nigel 73 void *block = malloc(size);
551 nigel 43 gotten_store = size;
552 nigel 73 if (show_malloc)
553 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 nigel 73 return block;
555 nigel 3 }
556    
557 nigel 73 static void new_free(void *block)
558     {
559     if (show_malloc)
560     fprintf(outfile, "free %p\n", block);
561     free(block);
562     }
563 nigel 3
564    
565 nigel 73 /* For recursion malloc/free, to test stacking calls */
566    
567     static void *stack_malloc(size_t size)
568     {
569     void *block = malloc(size);
570     if (show_malloc)
571 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 nigel 73 return block;
573     }
574    
575     static void stack_free(void *block)
576     {
577     if (show_malloc)
578     fprintf(outfile, "stack_free %p\n", block);
579     free(block);
580     }
581    
582    
583 nigel 63 /*************************************************
584     * Call pcre_fullinfo() *
585     *************************************************/
586 nigel 43
587     /* Get one piece of information from the pcre_fullinfo() function */
588    
589     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590     {
591     int rc;
592     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594     }
595    
596    
597    
598 nigel 63 /*************************************************
599 nigel 75 * Byte flipping function *
600     *************************************************/
601    
602 nigel 91 static unsigned long int
603     byteflip(unsigned long int value, int n)
604 nigel 75 {
605     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606     return ((value & 0x000000ff) << 24) |
607     ((value & 0x0000ff00) << 8) |
608     ((value & 0x00ff0000) >> 8) |
609     ((value & 0xff000000) >> 24);
610     }
611    
612    
613    
614    
615     /*************************************************
616 nigel 87 * Check match or recursion limit *
617     *************************************************/
618    
619     static int
620     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621     int start_offset, int options, int *use_offsets, int use_size_offsets,
622     int flag, unsigned long int *limit, int errnumber, const char *msg)
623     {
624     int count;
625     int min = 0;
626     int mid = 64;
627     int max = -1;
628    
629     extra->flags |= flag;
630    
631     for (;;)
632     {
633     *limit = mid;
634    
635     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636     use_offsets, use_size_offsets);
637    
638     if (count == errnumber)
639     {
640     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641     min = mid;
642     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643     }
644    
645     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646     count == PCRE_ERROR_PARTIAL)
647     {
648     if (mid == min + 1)
649     {
650     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651     break;
652     }
653     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654     max = mid;
655     mid = (min + mid)/2;
656     }
657     else break; /* Some other error */
658     }
659    
660     extra->flags &= ~flag;
661     return count;
662     }
663    
664    
665    
666     /*************************************************
667 nigel 91 * Check newline indicator *
668     *************************************************/
669    
670     /* This is used both at compile and run-time to check for <xxx> escapes, where
671 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
672     no match.
673 nigel 91
674     Arguments:
675     p points after the leading '<'
676     f file for error message
677    
678     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
679     */
680    
681     static int
682     check_newline(uschar *p, FILE *f)
683     {
684     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
685     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
686     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
687 ph10 149 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
688 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
689 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
690     return 0;
691     }
692    
693    
694    
695     /*************************************************
696 nigel 93 * Usage function *
697     *************************************************/
698    
699     static void
700     usage(void)
701     {
702     printf("Usage: pcretest [options] [<input> [<output>]]\n");
703     printf(" -b show compiled code (bytecode)\n");
704     printf(" -C show PCRE compile-time options and exit\n");
705     printf(" -d debug: show compiled code and information (-b and -i)\n");
706     #if !defined NODFA
707     printf(" -dfa force DFA matching for all subjects\n");
708     #endif
709     printf(" -help show usage information\n");
710     printf(" -i show information about compiled patterns\n"
711     " -m output memory used information\n"
712     " -o <n> set size of offsets vector to <n>\n");
713     #if !defined NOPOSIX
714     printf(" -p use POSIX interface\n");
715     #endif
716     printf(" -q quiet: do not output PCRE version number at start\n");
717     printf(" -S <n> set stack size to <n> megabytes\n");
718     printf(" -s output store (memory) used information\n"
719     " -t time compilation and execution\n");
720     printf(" -t <n> time compilation and execution, repeating <n> times\n");
721     printf(" -tm time execution (matching) only\n");
722     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
723     }
724    
725    
726    
727     /*************************************************
728 nigel 63 * Main Program *
729     *************************************************/
730 nigel 43
731 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
732     consist of a regular expression, in delimiters and optionally followed by
733     options, followed by a set of test data, terminated by an empty line. */
734    
735     int main(int argc, char **argv)
736     {
737     FILE *infile = stdin;
738     int options = 0;
739     int study_options = 0;
740     int op = 1;
741     int timeit = 0;
742 nigel 93 int timeitm = 0;
743 nigel 3 int showinfo = 0;
744 nigel 31 int showstore = 0;
745 nigel 87 int quiet = 0;
746 nigel 53 int size_offsets = 45;
747     int size_offsets_max;
748 nigel 77 int *offsets = NULL;
749 nigel 53 #if !defined NOPOSIX
750 nigel 3 int posix = 0;
751 nigel 53 #endif
752 nigel 3 int debug = 0;
753 nigel 11 int done = 0;
754 nigel 77 int all_use_dfa = 0;
755     int yield = 0;
756 nigel 91 int stack_size;
757 nigel 3
758 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
759     that 1024 is plenty long enough for the few names we'll be testing. */
760 nigel 69
761 nigel 91 uschar copynames[1024];
762     uschar getnames[1024];
763    
764     uschar *copynamesptr;
765     uschar *getnamesptr;
766    
767 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
768 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
769 nigel 69
770 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
771     dbuffer = (unsigned char *)malloc(buffer_size);
772     pbuffer = (unsigned char *)malloc(buffer_size);
773 nigel 69
774 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
775 nigel 3
776 nigel 93 outfile = stdout;
777    
778     /* The following _setmode() stuff is some Windows magic that tells its runtime
779     library to translate CRLF into a single LF character. At least, that's what
780     I've been told: never having used Windows I take this all on trust. Originally
781     it set 0x8000, but then I was advised that _O_BINARY was better. */
782    
783 nigel 75 #if defined(_WIN32) || defined(WIN32)
784 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
785     #endif
786 nigel 75
787 nigel 3 /* Scan options */
788    
789     while (argc > 1 && argv[op][0] == '-')
790     {
791 nigel 63 unsigned char *endptr;
792 nigel 53
793 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
794     showstore = 1;
795 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
796 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
797 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
798     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
799 nigel 79 #if !defined NODFA
800 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
801 nigel 79 #endif
802 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
803 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
804     *endptr == 0))
805 nigel 53 {
806     op++;
807     argc--;
808     }
809 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
810     {
811     int both = argv[op][2] == 0;
812     int temp;
813     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
814     *endptr == 0))
815     {
816     timeitm = temp;
817     op++;
818     argc--;
819     }
820     else timeitm = LOOPREPEAT;
821     if (both) timeit = timeitm;
822     }
823 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
824     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
825     *endptr == 0))
826     {
827 nigel 93 #if defined(_WIN32) || defined(WIN32)
828 nigel 91 printf("PCRE: -S not supported on this OS\n");
829     exit(1);
830     #else
831     int rc;
832     struct rlimit rlim;
833     getrlimit(RLIMIT_STACK, &rlim);
834     rlim.rlim_cur = stack_size * 1024 * 1024;
835     rc = setrlimit(RLIMIT_STACK, &rlim);
836     if (rc != 0)
837     {
838     printf("PCRE: setrlimit() failed with error %d\n", rc);
839     exit(1);
840     }
841     op++;
842     argc--;
843     #endif
844     }
845 nigel 53 #if !defined NOPOSIX
846 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
847 nigel 53 #endif
848 nigel 63 else if (strcmp(argv[op], "-C") == 0)
849     {
850     int rc;
851     printf("PCRE version %s\n", pcre_version());
852     printf("Compiled with\n");
853     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
854     printf(" %sUTF-8 support\n", rc? "" : "No ");
855 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
856     printf(" %sUnicode properties support\n", rc? "" : "No ");
857 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
858 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
859 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
860 ph10 150 (rc == -2)? "ANYCRLF" :
861 nigel 93 (rc == -1)? "ANY" : "???");
862 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
863     printf(" Internal link size = %d\n", rc);
864     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
865     printf(" POSIX malloc threshold = %d\n", rc);
866     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
867     printf(" Default match limit = %d\n", rc);
868 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
869     printf(" Default recursion depth limit = %d\n", rc);
870 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
871     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
872 ph10 121 goto EXIT;
873 nigel 63 }
874 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
875     strcmp(argv[op], "--help") == 0)
876     {
877     usage();
878     goto EXIT;
879     }
880 nigel 3 else
881     {
882 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
883 nigel 93 usage();
884 nigel 77 yield = 1;
885     goto EXIT;
886 nigel 3 }
887     op++;
888     argc--;
889     }
890    
891 nigel 53 /* Get the store for the offsets vector, and remember what it was */
892    
893     size_offsets_max = size_offsets;
894 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
895 nigel 53 if (offsets == NULL)
896     {
897     printf("** Failed to get %d bytes of memory for offsets vector\n",
898 ph10 151 (int)(size_offsets_max * sizeof(int)));
899 nigel 77 yield = 1;
900     goto EXIT;
901 nigel 53 }
902    
903 nigel 3 /* Sort out the input and output files */
904    
905     if (argc > 1)
906     {
907 nigel 93 infile = fopen(argv[op], INPUT_MODE);
908 nigel 3 if (infile == NULL)
909     {
910     printf("** Failed to open %s\n", argv[op]);
911 nigel 77 yield = 1;
912     goto EXIT;
913 nigel 3 }
914     }
915    
916     if (argc > 2)
917     {
918 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
919 nigel 3 if (outfile == NULL)
920     {
921     printf("** Failed to open %s\n", argv[op+1]);
922 nigel 77 yield = 1;
923     goto EXIT;
924 nigel 3 }
925     }
926    
927     /* Set alternative malloc function */
928    
929     pcre_malloc = new_malloc;
930 nigel 73 pcre_free = new_free;
931     pcre_stack_malloc = stack_malloc;
932     pcre_stack_free = stack_free;
933 nigel 3
934 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
935 nigel 3
936 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
937 nigel 3
938     /* Main loop */
939    
940 nigel 11 while (!done)
941 nigel 3 {
942     pcre *re = NULL;
943     pcre_extra *extra = NULL;
944 nigel 37
945     #if !defined NOPOSIX /* There are still compilers that require no indent */
946 nigel 3 regex_t preg;
947 nigel 45 int do_posix = 0;
948 nigel 37 #endif
949    
950 nigel 7 const char *error;
951 nigel 25 unsigned char *p, *pp, *ppp;
952 nigel 75 unsigned char *to_file = NULL;
953 nigel 53 const unsigned char *tables = NULL;
954 nigel 75 unsigned long int true_size, true_study_size = 0;
955     size_t size, regex_gotten_store;
956 nigel 3 int do_study = 0;
957 nigel 25 int do_debug = debug;
958 nigel 35 int do_G = 0;
959     int do_g = 0;
960 nigel 25 int do_showinfo = showinfo;
961 nigel 35 int do_showrest = 0;
962 nigel 75 int do_flip = 0;
963 nigel 93 int erroroffset, len, delimiter, poffset;
964 nigel 3
965 nigel 67 use_utf8 = 0;
966 ph10 210 debug_lengths = 1;
967 nigel 63
968 nigel 3 if (infile == stdin) printf(" re> ");
969 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
970 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
971 nigel 63 fflush(outfile);
972 nigel 3
973     p = buffer;
974     while (isspace(*p)) p++;
975     if (*p == 0) continue;
976    
977 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
978 nigel 3
979 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
980     {
981 nigel 91 unsigned long int magic, get_options;
982 nigel 75 uschar sbuf[8];
983     FILE *f;
984    
985     p++;
986     pp = p + (int)strlen((char *)p);
987     while (isspace(pp[-1])) pp--;
988     *pp = 0;
989    
990     f = fopen((char *)p, "rb");
991     if (f == NULL)
992     {
993     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
994     continue;
995     }
996    
997     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
998    
999     true_size =
1000     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1001     true_study_size =
1002     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1003    
1004     re = (real_pcre *)new_malloc(true_size);
1005     regex_gotten_store = gotten_store;
1006    
1007     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1008    
1009     magic = ((real_pcre *)re)->magic_number;
1010     if (magic != MAGIC_NUMBER)
1011     {
1012     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1013     {
1014     do_flip = 1;
1015     }
1016     else
1017     {
1018     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1019     fclose(f);
1020     continue;
1021     }
1022     }
1023    
1024     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1025     do_flip? " (byte-inverted)" : "", p);
1026    
1027     /* Need to know if UTF-8 for printing data strings */
1028    
1029 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1030     use_utf8 = (get_options & PCRE_UTF8) != 0;
1031 nigel 75
1032     /* Now see if there is any following study data */
1033    
1034     if (true_study_size != 0)
1035     {
1036     pcre_study_data *psd;
1037    
1038     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1039     extra->flags = PCRE_EXTRA_STUDY_DATA;
1040    
1041     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1042     extra->study_data = psd;
1043    
1044     if (fread(psd, 1, true_study_size, f) != true_study_size)
1045     {
1046     FAIL_READ:
1047     fprintf(outfile, "Failed to read data from %s\n", p);
1048     if (extra != NULL) new_free(extra);
1049     if (re != NULL) new_free(re);
1050     fclose(f);
1051     continue;
1052     }
1053     fprintf(outfile, "Study data loaded from %s\n", p);
1054     do_study = 1; /* To get the data output if requested */
1055     }
1056     else fprintf(outfile, "No study data\n");
1057    
1058     fclose(f);
1059     goto SHOW_INFO;
1060     }
1061    
1062     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1063     the pattern; if is isn't complete, read more. */
1064    
1065 nigel 3 delimiter = *p++;
1066    
1067 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1068 nigel 3 {
1069 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1070 nigel 3 goto SKIP_DATA;
1071     }
1072    
1073     pp = p;
1074 nigel 93 poffset = p - buffer;
1075 nigel 3
1076     for(;;)
1077     {
1078 nigel 29 while (*pp != 0)
1079     {
1080     if (*pp == '\\' && pp[1] != 0) pp++;
1081     else if (*pp == delimiter) break;
1082     pp++;
1083     }
1084 nigel 3 if (*pp != 0) break;
1085     if (infile == stdin) printf(" > ");
1086 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1087 nigel 3 {
1088     fprintf(outfile, "** Unexpected EOF\n");
1089 nigel 11 done = 1;
1090     goto CONTINUE;
1091 nigel 3 }
1092 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1093 nigel 3 }
1094    
1095 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1096     pointer to the correct relative point in the buffer. */
1097    
1098     p = buffer + poffset;
1099    
1100 nigel 29 /* If the first character after the delimiter is backslash, make
1101     the pattern end with backslash. This is purely to provide a way
1102     of testing for the error message when a pattern ends with backslash. */
1103    
1104     if (pp[1] == '\\') *pp++ = '\\';
1105    
1106 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1107     for callouts. */
1108 nigel 3
1109     *pp++ = 0;
1110 nigel 75 strcpy((char *)pbuffer, (char *)p);
1111 nigel 3
1112     /* Look for options after final delimiter */
1113    
1114     options = 0;
1115     study_options = 0;
1116 nigel 31 log_store = showstore; /* default from command line */
1117    
1118 nigel 3 while (*pp != 0)
1119     {
1120     switch (*pp++)
1121     {
1122 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1123 nigel 35 case 'g': do_g = 1; break;
1124 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1125     case 'm': options |= PCRE_MULTILINE; break;
1126     case 's': options |= PCRE_DOTALL; break;
1127     case 'x': options |= PCRE_EXTENDED; break;
1128 nigel 25
1129 nigel 35 case '+': do_showrest = 1; break;
1130 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1131 nigel 93 case 'B': do_debug = 1; break;
1132 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1133 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1134 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1135 nigel 75 case 'F': do_flip = 1; break;
1136 nigel 35 case 'G': do_G = 1; break;
1137 nigel 25 case 'I': do_showinfo = 1; break;
1138 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1139 nigel 31 case 'M': log_store = 1; break;
1140 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1141 nigel 37
1142     #if !defined NOPOSIX
1143 nigel 3 case 'P': do_posix = 1; break;
1144 nigel 37 #endif
1145    
1146 nigel 3 case 'S': do_study = 1; break;
1147 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1148 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1149 ph10 126 case 'Z': debug_lengths = 0; break;
1150 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1151 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1152 nigel 25
1153     case 'L':
1154     ppp = pp;
1155 nigel 93 /* The '\r' test here is so that it works on Windows. */
1156     /* The '0' test is just in case this is an unterminated line. */
1157     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1158 nigel 25 *ppp = 0;
1159     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1160     {
1161     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1162     goto SKIP_DATA;
1163     }
1164 nigel 93 locale_set = 1;
1165 nigel 25 tables = pcre_maketables();
1166     pp = ppp;
1167     break;
1168    
1169 nigel 75 case '>':
1170     to_file = pp;
1171     while (*pp != 0) pp++;
1172     while (isspace(pp[-1])) pp--;
1173     *pp = 0;
1174     break;
1175    
1176 nigel 91 case '<':
1177     {
1178     int x = check_newline(pp, outfile);
1179     if (x == 0) goto SKIP_DATA;
1180     options |= x;
1181     while (*pp++ != '>');
1182     }
1183     break;
1184    
1185 nigel 77 case '\r': /* So that it works in Windows */
1186     case '\n':
1187     case ' ':
1188     break;
1189 nigel 75
1190 nigel 3 default:
1191     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1192     goto SKIP_DATA;
1193     }
1194     }
1195    
1196 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1197 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1198     local character tables. */
1199 nigel 3
1200 nigel 37 #if !defined NOPOSIX
1201 nigel 3 if (posix || do_posix)
1202     {
1203     int rc;
1204     int cflags = 0;
1205 nigel 75
1206 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1207     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1208 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1209 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1210     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1211    
1212 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1213    
1214     /* Compilation failed; go back for another re, skipping to blank line
1215     if non-interactive. */
1216    
1217     if (rc != 0)
1218     {
1219 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1220 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1221     goto SKIP_DATA;
1222     }
1223     }
1224    
1225     /* Handle compiling via the native interface */
1226    
1227     else
1228 nigel 37 #endif /* !defined NOPOSIX */
1229    
1230 nigel 3 {
1231 nigel 93 if (timeit > 0)
1232 nigel 3 {
1233     register int i;
1234     clock_t time_taken;
1235     clock_t start_time = clock();
1236 nigel 93 for (i = 0; i < timeit; i++)
1237 nigel 3 {
1238 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1239 nigel 3 if (re != NULL) free(re);
1240     }
1241     time_taken = clock() - start_time;
1242 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1243     (((double)time_taken * 1000.0) / (double)timeit) /
1244 nigel 63 (double)CLOCKS_PER_SEC);
1245 nigel 3 }
1246    
1247 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1248 nigel 3
1249     /* Compilation failed; go back for another re, skipping to blank line
1250     if non-interactive. */
1251    
1252     if (re == NULL)
1253     {
1254     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1255     SKIP_DATA:
1256     if (infile != stdin)
1257     {
1258     for (;;)
1259     {
1260 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1261 nigel 11 {
1262     done = 1;
1263     goto CONTINUE;
1264     }
1265 nigel 3 len = (int)strlen((char *)buffer);
1266     while (len > 0 && isspace(buffer[len-1])) len--;
1267     if (len == 0) break;
1268     }
1269     fprintf(outfile, "\n");
1270     }
1271 nigel 25 goto CONTINUE;
1272 nigel 3 }
1273    
1274 nigel 43 /* Compilation succeeded; print data if required. There are now two
1275     info-returning functions. The old one has a limited interface and
1276     returns only limited data. Check that it agrees with the newer one. */
1277 nigel 3
1278 nigel 63 if (log_store)
1279     fprintf(outfile, "Memory allocation (code space): %d\n",
1280     (int)(gotten_store -
1281     sizeof(real_pcre) -
1282     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1283    
1284 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1285     and remember the store that was got. */
1286    
1287     true_size = ((real_pcre *)re)->size;
1288     regex_gotten_store = gotten_store;
1289    
1290     /* If /S was present, study the regexp to generate additional info to
1291     help with the matching. */
1292    
1293     if (do_study)
1294     {
1295 nigel 93 if (timeit > 0)
1296 nigel 75 {
1297     register int i;
1298     clock_t time_taken;
1299     clock_t start_time = clock();
1300 nigel 93 for (i = 0; i < timeit; i++)
1301 nigel 75 extra = pcre_study(re, study_options, &error);
1302     time_taken = clock() - start_time;
1303     if (extra != NULL) free(extra);
1304 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1305     (((double)time_taken * 1000.0) / (double)timeit) /
1306 nigel 75 (double)CLOCKS_PER_SEC);
1307     }
1308     extra = pcre_study(re, study_options, &error);
1309     if (error != NULL)
1310     fprintf(outfile, "Failed to study: %s\n", error);
1311     else if (extra != NULL)
1312     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1313     }
1314    
1315     /* If the 'F' option was present, we flip the bytes of all the integer
1316     fields in the regex data block and the study block. This is to make it
1317     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1318     compiled on a different architecture. */
1319    
1320     if (do_flip)
1321     {
1322     real_pcre *rre = (real_pcre *)re;
1323     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1324     rre->size = byteflip(rre->size, sizeof(rre->size));
1325     rre->options = byteflip(rre->options, sizeof(rre->options));
1326     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1327     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1328     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1329     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1330     rre->name_table_offset = byteflip(rre->name_table_offset,
1331     sizeof(rre->name_table_offset));
1332     rre->name_entry_size = byteflip(rre->name_entry_size,
1333     sizeof(rre->name_entry_size));
1334     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1335    
1336     if (extra != NULL)
1337     {
1338     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1339     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1340     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1341     }
1342     }
1343    
1344     /* Extract information from the compiled data if required */
1345    
1346     SHOW_INFO:
1347    
1348 nigel 93 if (do_debug)
1349     {
1350     fprintf(outfile, "------------------------------------------------------------------\n");
1351 ph10 116 pcre_printint(re, outfile, debug_lengths);
1352 nigel 93 }
1353    
1354 nigel 25 if (do_showinfo)
1355 nigel 3 {
1356 nigel 75 unsigned long int get_options, all_options;
1357 nigel 79 #if !defined NOINFOCHECK
1358 nigel 43 int old_first_char, old_options, old_count;
1359 nigel 79 #endif
1360 ph10 169 int count, backrefmax, first_char, need_char, okpartial, jchanged;
1361 nigel 63 int nameentrysize, namecount;
1362     const uschar *nametable;
1363 nigel 3
1364 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1365 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1366     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1367     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1368 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1369 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1370 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1371     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1372 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1373 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1374     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1375 nigel 43
1376 nigel 79 #if !defined NOINFOCHECK
1377 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1378 nigel 3 if (count < 0) fprintf(outfile,
1379 nigel 43 "Error %d from pcre_info()\n", count);
1380 nigel 3 else
1381     {
1382 nigel 43 if (old_count != count) fprintf(outfile,
1383     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1384     old_count);
1385 nigel 37
1386 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1387     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1388     first_char, old_first_char);
1389 nigel 37
1390 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1391     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1392     get_options, old_options);
1393 nigel 43 }
1394 nigel 79 #endif
1395 nigel 43
1396 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1397 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1398 nigel 77 (int)size, (int)regex_gotten_store);
1399 nigel 43
1400     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1401     if (backrefmax > 0)
1402     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1403 nigel 63
1404     if (namecount > 0)
1405     {
1406     fprintf(outfile, "Named capturing subpatterns:\n");
1407     while (namecount-- > 0)
1408     {
1409     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1410     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1411     GET2(nametable, 0));
1412     nametable += nameentrysize;
1413     }
1414     }
1415 ph10 172
1416 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1417 nigel 63
1418 nigel 75 all_options = ((real_pcre *)re)->options;
1419 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1420 nigel 75
1421 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1422 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1423 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1424     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1425     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1426     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1427 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1428 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1429     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1430     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1431     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1432 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1433 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1434 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1435     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1436 ph10 172
1437 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1438 nigel 43
1439 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1440 nigel 91 {
1441     case PCRE_NEWLINE_CR:
1442     fprintf(outfile, "Forced newline sequence: CR\n");
1443     break;
1444 nigel 43
1445 nigel 91 case PCRE_NEWLINE_LF:
1446     fprintf(outfile, "Forced newline sequence: LF\n");
1447     break;
1448    
1449     case PCRE_NEWLINE_CRLF:
1450     fprintf(outfile, "Forced newline sequence: CRLF\n");
1451     break;
1452    
1453 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1454     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1455     break;
1456    
1457 nigel 93 case PCRE_NEWLINE_ANY:
1458     fprintf(outfile, "Forced newline sequence: ANY\n");
1459     break;
1460    
1461 nigel 91 default:
1462     break;
1463     }
1464    
1465 nigel 43 if (first_char == -1)
1466     {
1467 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1468 nigel 43 }
1469     else if (first_char < 0)
1470     {
1471     fprintf(outfile, "No first char\n");
1472     }
1473     else
1474     {
1475 nigel 63 int ch = first_char & 255;
1476 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1477 nigel 63 "" : " (caseless)";
1478 nigel 93 if (PRINTHEX(ch))
1479 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1480 nigel 3 else
1481 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1482 nigel 43 }
1483 nigel 37
1484 nigel 43 if (need_char < 0)
1485     {
1486     fprintf(outfile, "No need char\n");
1487 nigel 3 }
1488 nigel 43 else
1489     {
1490 nigel 63 int ch = need_char & 255;
1491 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1492 nigel 63 "" : " (caseless)";
1493 nigel 93 if (PRINTHEX(ch))
1494 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1495 nigel 43 else
1496 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1497 nigel 43 }
1498 nigel 75
1499     /* Don't output study size; at present it is in any case a fixed
1500     value, but it varies, depending on the computer architecture, and
1501     so messes up the test suite. (And with the /F option, it might be
1502     flipped.) */
1503    
1504     if (do_study)
1505     {
1506     if (extra == NULL)
1507     fprintf(outfile, "Study returned NULL\n");
1508     else
1509     {
1510     uschar *start_bits = NULL;
1511     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1512    
1513     if (start_bits == NULL)
1514     fprintf(outfile, "No starting byte set\n");
1515     else
1516     {
1517     int i;
1518     int c = 24;
1519     fprintf(outfile, "Starting byte set: ");
1520     for (i = 0; i < 256; i++)
1521     {
1522     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1523     {
1524     if (c > 75)
1525     {
1526     fprintf(outfile, "\n ");
1527     c = 2;
1528     }
1529 nigel 93 if (PRINTHEX(i) && i != ' ')
1530 nigel 75 {
1531     fprintf(outfile, "%c ", i);
1532     c += 2;
1533     }
1534     else
1535     {
1536     fprintf(outfile, "\\x%02x ", i);
1537     c += 5;
1538     }
1539     }
1540     }
1541     fprintf(outfile, "\n");
1542     }
1543     }
1544     }
1545 nigel 3 }
1546    
1547 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1548     that is all. The first 8 bytes of the file are the regex length and then
1549     the study length, in big-endian order. */
1550 nigel 3
1551 nigel 75 if (to_file != NULL)
1552 nigel 3 {
1553 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1554     if (f == NULL)
1555 nigel 3 {
1556 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1557 nigel 3 }
1558 nigel 75 else
1559     {
1560     uschar sbuf[8];
1561     sbuf[0] = (true_size >> 24) & 255;
1562     sbuf[1] = (true_size >> 16) & 255;
1563     sbuf[2] = (true_size >> 8) & 255;
1564     sbuf[3] = (true_size) & 255;
1565 nigel 3
1566 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1567     sbuf[5] = (true_study_size >> 16) & 255;
1568     sbuf[6] = (true_study_size >> 8) & 255;
1569     sbuf[7] = (true_study_size) & 255;
1570 nigel 3
1571 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1572     fwrite(re, 1, true_size, f) < true_size)
1573     {
1574     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1575     }
1576 nigel 3 else
1577     {
1578 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1579     if (extra != NULL)
1580 nigel 3 {
1581 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1582     true_study_size)
1583 nigel 3 {
1584 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1585     strerror(errno));
1586 nigel 3 }
1587 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1588 nigel 93
1589 nigel 3 }
1590     }
1591 nigel 75 fclose(f);
1592 nigel 3 }
1593 nigel 77
1594     new_free(re);
1595     if (extra != NULL) new_free(extra);
1596     if (tables != NULL) new_free((void *)tables);
1597 nigel 75 continue; /* With next regex */
1598 nigel 3 }
1599 nigel 75 } /* End of non-POSIX compile */
1600 nigel 3
1601     /* Read data lines and test them */
1602    
1603     for (;;)
1604     {
1605 nigel 87 uschar *q;
1606 ph10 147 uschar *bptr;
1607 nigel 57 int *use_offsets = offsets;
1608 nigel 53 int use_size_offsets = size_offsets;
1609 nigel 63 int callout_data = 0;
1610     int callout_data_set = 0;
1611 nigel 3 int count, c;
1612 nigel 29 int copystrings = 0;
1613 nigel 63 int find_match_limit = 0;
1614 nigel 29 int getstrings = 0;
1615     int getlist = 0;
1616 nigel 39 int gmatched = 0;
1617 nigel 35 int start_offset = 0;
1618 nigel 41 int g_notempty = 0;
1619 nigel 77 int use_dfa = 0;
1620 nigel 3
1621     options = 0;
1622    
1623 nigel 91 *copynames = 0;
1624     *getnames = 0;
1625    
1626     copynamesptr = copynames;
1627     getnamesptr = getnames;
1628    
1629 nigel 63 pcre_callout = callout;
1630     first_callout = 1;
1631     callout_extra = 0;
1632     callout_count = 0;
1633     callout_fail_count = 999999;
1634     callout_fail_id = -1;
1635 nigel 73 show_malloc = 0;
1636 nigel 63
1637 nigel 91 if (extra != NULL) extra->flags &=
1638     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1639    
1640     len = 0;
1641     for (;;)
1642 nigel 11 {
1643 nigel 91 if (infile == stdin) printf("data> ");
1644     if (extend_inputline(infile, buffer + len) == NULL)
1645     {
1646     if (len > 0) break;
1647     done = 1;
1648     goto CONTINUE;
1649     }
1650     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1651     len = (int)strlen((char *)buffer);
1652     if (buffer[len-1] == '\n') break;
1653 nigel 11 }
1654 nigel 3
1655     while (len > 0 && isspace(buffer[len-1])) len--;
1656     buffer[len] = 0;
1657     if (len == 0) break;
1658    
1659     p = buffer;
1660     while (isspace(*p)) p++;
1661    
1662 ph10 147 bptr = q = dbuffer;
1663 nigel 3 while ((c = *p++) != 0)
1664     {
1665     int i = 0;
1666     int n = 0;
1667 nigel 63
1668 nigel 3 if (c == '\\') switch ((c = *p++))
1669     {
1670     case 'a': c = 7; break;
1671     case 'b': c = '\b'; break;
1672     case 'e': c = 27; break;
1673     case 'f': c = '\f'; break;
1674     case 'n': c = '\n'; break;
1675     case 'r': c = '\r'; break;
1676     case 't': c = '\t'; break;
1677     case 'v': c = '\v'; break;
1678    
1679     case '0': case '1': case '2': case '3':
1680     case '4': case '5': case '6': case '7':
1681     c -= '0';
1682     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1683     c = c * 8 + *p++ - '0';
1684 nigel 91
1685     #if !defined NOUTF8
1686     if (use_utf8 && c > 255)
1687     {
1688     unsigned char buff8[8];
1689     int ii, utn;
1690     utn = ord2utf8(c, buff8);
1691     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1692     c = buff8[ii]; /* Last byte */
1693     }
1694     #endif
1695 nigel 3 break;
1696    
1697     case 'x':
1698 nigel 49
1699     /* Handle \x{..} specially - new Perl thing for utf8 */
1700    
1701 nigel 79 #if !defined NOUTF8
1702 nigel 49 if (*p == '{')
1703     {
1704     unsigned char *pt = p;
1705     c = 0;
1706     while (isxdigit(*(++pt)))
1707     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1708     if (*pt == '}')
1709     {
1710 nigel 67 unsigned char buff8[8];
1711 nigel 49 int ii, utn;
1712 nigel 85 utn = ord2utf8(c, buff8);
1713 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1714     c = buff8[ii]; /* Last byte */
1715 nigel 49 p = pt + 1;
1716     break;
1717     }
1718     /* Not correct form; fall through */
1719     }
1720 nigel 79 #endif
1721 nigel 49
1722     /* Ordinary \x */
1723    
1724 nigel 3 c = 0;
1725     while (i++ < 2 && isxdigit(*p))
1726     {
1727     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1728     p++;
1729     }
1730     break;
1731    
1732 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1733 nigel 3 p--;
1734     continue;
1735    
1736 nigel 75 case '>':
1737     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1738     continue;
1739    
1740 nigel 3 case 'A': /* Option setting */
1741     options |= PCRE_ANCHORED;
1742     continue;
1743    
1744     case 'B':
1745     options |= PCRE_NOTBOL;
1746     continue;
1747    
1748 nigel 29 case 'C':
1749 nigel 63 if (isdigit(*p)) /* Set copy string */
1750     {
1751     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1752     copystrings |= 1 << n;
1753     }
1754     else if (isalnum(*p))
1755     {
1756 nigel 91 uschar *npp = copynamesptr;
1757 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1758 nigel 91 *npp++ = 0;
1759 nigel 67 *npp = 0;
1760 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1761 nigel 63 if (n < 0)
1762 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1763     copynamesptr = npp;
1764 nigel 63 }
1765     else if (*p == '+')
1766     {
1767     callout_extra = 1;
1768     p++;
1769     }
1770     else if (*p == '-')
1771     {
1772     pcre_callout = NULL;
1773     p++;
1774     }
1775     else if (*p == '!')
1776     {
1777     callout_fail_id = 0;
1778     p++;
1779     while(isdigit(*p))
1780     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1781     callout_fail_count = 0;
1782     if (*p == '!')
1783     {
1784     p++;
1785     while(isdigit(*p))
1786     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1787     }
1788     }
1789     else if (*p == '*')
1790     {
1791     int sign = 1;
1792     callout_data = 0;
1793     if (*(++p) == '-') { sign = -1; p++; }
1794     while(isdigit(*p))
1795     callout_data = callout_data * 10 + *p++ - '0';
1796     callout_data *= sign;
1797     callout_data_set = 1;
1798     }
1799 nigel 29 continue;
1800    
1801 nigel 79 #if !defined NODFA
1802 nigel 77 case 'D':
1803 nigel 79 #if !defined NOPOSIX
1804 nigel 77 if (posix || do_posix)
1805     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1806     else
1807 nigel 79 #endif
1808 nigel 77 use_dfa = 1;
1809     continue;
1810    
1811     case 'F':
1812     options |= PCRE_DFA_SHORTEST;
1813     continue;
1814 nigel 79 #endif
1815 nigel 77
1816 nigel 29 case 'G':
1817 nigel 63 if (isdigit(*p))
1818     {
1819     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1820     getstrings |= 1 << n;
1821     }
1822     else if (isalnum(*p))
1823     {
1824 nigel 91 uschar *npp = getnamesptr;
1825 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1826 nigel 91 *npp++ = 0;
1827 nigel 67 *npp = 0;
1828 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1829 nigel 63 if (n < 0)
1830 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1831     getnamesptr = npp;
1832 nigel 63 }
1833 nigel 29 continue;
1834    
1835     case 'L':
1836     getlist = 1;
1837     continue;
1838    
1839 nigel 63 case 'M':
1840     find_match_limit = 1;
1841     continue;
1842    
1843 nigel 37 case 'N':
1844     options |= PCRE_NOTEMPTY;
1845     continue;
1846    
1847 nigel 3 case 'O':
1848     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1849 nigel 53 if (n > size_offsets_max)
1850     {
1851     size_offsets_max = n;
1852 nigel 57 free(offsets);
1853 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1854 nigel 53 if (offsets == NULL)
1855     {
1856     printf("** Failed to get %d bytes of memory for offsets vector\n",
1857 ph10 151 (int)(size_offsets_max * sizeof(int)));
1858 nigel 77 yield = 1;
1859     goto EXIT;
1860 nigel 53 }
1861     }
1862     use_size_offsets = n;
1863 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1864 nigel 3 continue;
1865    
1866 nigel 75 case 'P':
1867     options |= PCRE_PARTIAL;
1868     continue;
1869    
1870 nigel 91 case 'Q':
1871     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1872     if (extra == NULL)
1873     {
1874     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1875     extra->flags = 0;
1876     }
1877     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1878     extra->match_limit_recursion = n;
1879     continue;
1880    
1881     case 'q':
1882     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1883     if (extra == NULL)
1884     {
1885     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1886     extra->flags = 0;
1887     }
1888     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1889     extra->match_limit = n;
1890     continue;
1891    
1892 nigel 79 #if !defined NODFA
1893 nigel 77 case 'R':
1894     options |= PCRE_DFA_RESTART;
1895     continue;
1896 nigel 79 #endif
1897 nigel 77
1898 nigel 73 case 'S':
1899     show_malloc = 1;
1900     continue;
1901    
1902 nigel 3 case 'Z':
1903     options |= PCRE_NOTEOL;
1904     continue;
1905 nigel 71
1906     case '?':
1907     options |= PCRE_NO_UTF8_CHECK;
1908     continue;
1909 nigel 91
1910     case '<':
1911     {
1912     int x = check_newline(p, outfile);
1913     if (x == 0) goto NEXT_DATA;
1914     options |= x;
1915     while (*p++ != '>');
1916     }
1917     continue;
1918 nigel 3 }
1919 nigel 9 *q++ = c;
1920 nigel 3 }
1921 nigel 9 *q = 0;
1922     len = q - dbuffer;
1923 nigel 3
1924 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1925     {
1926     printf("**Match limit not relevant for DFA matching: ignored\n");
1927     find_match_limit = 0;
1928     }
1929    
1930 nigel 3 /* Handle matching via the POSIX interface, which does not
1931 nigel 63 support timing or playing with the match limit or callout data. */
1932 nigel 3
1933 nigel 37 #if !defined NOPOSIX
1934 nigel 3 if (posix || do_posix)
1935     {
1936     int rc;
1937     int eflags = 0;
1938 nigel 63 regmatch_t *pmatch = NULL;
1939     if (use_size_offsets > 0)
1940 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1941 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1942     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1943    
1944 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1945 nigel 3
1946     if (rc != 0)
1947     {
1948 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1949 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1950     }
1951 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1952     != 0)
1953     {
1954     fprintf(outfile, "Matched with REG_NOSUB\n");
1955     }
1956 nigel 3 else
1957     {
1958 nigel 7 size_t i;
1959 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1960 nigel 3 {
1961     if (pmatch[i].rm_so >= 0)
1962     {
1963 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1964 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1965     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1966 nigel 3 fprintf(outfile, "\n");
1967 nigel 35 if (i == 0 && do_showrest)
1968     {
1969     fprintf(outfile, " 0+ ");
1970 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1971     outfile);
1972 nigel 35 fprintf(outfile, "\n");
1973     }
1974 nigel 3 }
1975     }
1976     }
1977 nigel 53 free(pmatch);
1978 nigel 3 }
1979    
1980 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1981 nigel 3
1982 nigel 37 else
1983     #endif /* !defined NOPOSIX */
1984    
1985 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1986 nigel 3 {
1987 nigel 93 if (timeitm > 0)
1988 nigel 3 {
1989     register int i;
1990     clock_t time_taken;
1991     clock_t start_time = clock();
1992 nigel 77
1993 nigel 79 #if !defined NODFA
1994 nigel 77 if (all_use_dfa || use_dfa)
1995     {
1996     int workspace[1000];
1997 nigel 93 for (i = 0; i < timeitm; i++)
1998 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1999     options | g_notempty, use_offsets, use_size_offsets, workspace,
2000     sizeof(workspace)/sizeof(int));
2001     }
2002     else
2003 nigel 79 #endif
2004 nigel 77
2005 nigel 93 for (i = 0; i < timeitm; i++)
2006 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2007 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2008 nigel 77
2009 nigel 3 time_taken = clock() - start_time;
2010 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2011     (((double)time_taken * 1000.0) / (double)timeitm) /
2012 nigel 63 (double)CLOCKS_PER_SEC);
2013 nigel 3 }
2014    
2015 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2016 nigel 87 varying limits in order to find the minimum value for the match limit and
2017     for the recursion limit. */
2018 nigel 63
2019     if (find_match_limit)
2020     {
2021     if (extra == NULL)
2022     {
2023 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024 nigel 63 extra->flags = 0;
2025     }
2026    
2027 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2028 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2029     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2030     PCRE_ERROR_MATCHLIMIT, "match()");
2031 nigel 63
2032 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2033     options|g_notempty, use_offsets, use_size_offsets,
2034     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2035     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2036 nigel 63 }
2037    
2038     /* If callout_data is set, use the interface with additional data */
2039    
2040     else if (callout_data_set)
2041     {
2042     if (extra == NULL)
2043     {
2044 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2045 nigel 63 extra->flags = 0;
2046     }
2047     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2048 nigel 71 extra->callout_data = &callout_data;
2049 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2050     options | g_notempty, use_offsets, use_size_offsets);
2051     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2052     }
2053    
2054     /* The normal case is just to do the match once, with the default
2055     value of match_limit. */
2056    
2057 nigel 79 #if !defined NODFA
2058 nigel 77 else if (all_use_dfa || use_dfa)
2059     {
2060     int workspace[1000];
2061     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2062     options | g_notempty, use_offsets, use_size_offsets, workspace,
2063     sizeof(workspace)/sizeof(int));
2064     if (count == 0)
2065     {
2066     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2067     count = use_size_offsets/2;
2068     }
2069     }
2070 nigel 79 #endif
2071 nigel 77
2072 nigel 75 else
2073     {
2074     count = pcre_exec(re, extra, (char *)bptr, len,
2075     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2076 nigel 77 if (count == 0)
2077     {
2078     fprintf(outfile, "Matched, but too many substrings\n");
2079     count = use_size_offsets/3;
2080     }
2081 nigel 75 }
2082 nigel 3
2083 nigel 39 /* Matched */
2084    
2085 nigel 3 if (count >= 0)
2086     {
2087 nigel 93 int i, maxcount;
2088    
2089     #if !defined NODFA
2090     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2091     #endif
2092     maxcount = use_size_offsets/3;
2093    
2094     /* This is a check against a lunatic return value. */
2095    
2096     if (count > maxcount)
2097     {
2098     fprintf(outfile,
2099     "** PCRE error: returned count %d is too big for offset size %d\n",
2100     count, use_size_offsets);
2101     count = use_size_offsets/3;
2102     if (do_g || do_G)
2103     {
2104     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2105     do_g = do_G = FALSE; /* Break g/G loop */
2106     }
2107     }
2108    
2109 nigel 29 for (i = 0; i < count * 2; i += 2)
2110 nigel 3 {
2111 nigel 57 if (use_offsets[i] < 0)
2112 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2113     else
2114     {
2115     fprintf(outfile, "%2d: ", i/2);
2116 nigel 63 (void)pchars(bptr + use_offsets[i],
2117     use_offsets[i+1] - use_offsets[i], outfile);
2118 nigel 3 fprintf(outfile, "\n");
2119 nigel 35 if (i == 0)
2120     {
2121     if (do_showrest)
2122     {
2123     fprintf(outfile, " 0+ ");
2124 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2125     outfile);
2126 nigel 35 fprintf(outfile, "\n");
2127     }
2128     }
2129 nigel 3 }
2130     }
2131 nigel 29
2132     for (i = 0; i < 32; i++)
2133     {
2134     if ((copystrings & (1 << i)) != 0)
2135     {
2136 nigel 91 char copybuffer[256];
2137 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2138 nigel 37 i, copybuffer, sizeof(copybuffer));
2139 nigel 29 if (rc < 0)
2140     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2141     else
2142 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2143 nigel 29 }
2144     }
2145    
2146 nigel 91 for (copynamesptr = copynames;
2147     *copynamesptr != 0;
2148     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2149     {
2150     char copybuffer[256];
2151     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2152     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2153     if (rc < 0)
2154     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2155     else
2156     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2157     }
2158    
2159 nigel 29 for (i = 0; i < 32; i++)
2160     {
2161     if ((getstrings & (1 << i)) != 0)
2162     {
2163     const char *substring;
2164 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2165 nigel 29 i, &substring);
2166     if (rc < 0)
2167     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2168     else
2169     {
2170     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2171 nigel 49 pcre_free_substring(substring);
2172 nigel 29 }
2173     }
2174     }
2175    
2176 nigel 91 for (getnamesptr = getnames;
2177     *getnamesptr != 0;
2178     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2179     {
2180     const char *substring;
2181     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2182     count, (char *)getnamesptr, &substring);
2183     if (rc < 0)
2184     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2185     else
2186     {
2187     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2188     pcre_free_substring(substring);
2189     }
2190     }
2191    
2192 nigel 29 if (getlist)
2193     {
2194     const char **stringlist;
2195 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2196 nigel 29 &stringlist);
2197     if (rc < 0)
2198     fprintf(outfile, "get substring list failed %d\n", rc);
2199     else
2200     {
2201     for (i = 0; i < count; i++)
2202     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2203     if (stringlist[i] != NULL)
2204     fprintf(outfile, "string list not terminated by NULL\n");
2205 nigel 49 /* free((void *)stringlist); */
2206     pcre_free_substring_list(stringlist);
2207 nigel 29 }
2208     }
2209 nigel 39 }
2210 nigel 29
2211 nigel 75 /* There was a partial match */
2212    
2213     else if (count == PCRE_ERROR_PARTIAL)
2214     {
2215 nigel 77 fprintf(outfile, "Partial match");
2216 nigel 79 #if !defined NODFA
2217 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2218     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2219     bptr + use_offsets[0]);
2220 nigel 79 #endif
2221 nigel 77 fprintf(outfile, "\n");
2222 nigel 75 break; /* Out of the /g loop */
2223     }
2224    
2225 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2226 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2227     to advance the start offset, and continue. We won't be at the end of the
2228     string - that was checked before setting g_notempty.
2229 nigel 39
2230 ph10 150 Complication arises in the case when the newline option is "any" or
2231 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2232     CRLF, an advance of one character just passes the \r, whereas we should
2233     prefer the longer newline sequence, as does the code in pcre_exec().
2234     Fudge the offset value to achieve this.
2235 ph10 144
2236 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2237     character, not one byte. */
2238    
2239 nigel 3 else
2240     {
2241 nigel 41 if (g_notempty != 0)
2242 nigel 35 {
2243 nigel 73 int onechar = 1;
2244 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2245 nigel 57 use_offsets[0] = start_offset;
2246 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2247     {
2248     int d;
2249     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2250     obits = (d == '\r')? PCRE_NEWLINE_CR :
2251     (d == '\n')? PCRE_NEWLINE_LF :
2252     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2253 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2254 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2255     }
2256 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2257 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2258 ph10 149 &&
2259 ph10 143 start_offset < len - 1 &&
2260     bptr[start_offset] == '\r' &&
2261     bptr[start_offset+1] == '\n')
2262 ph10 144 onechar++;
2263 ph10 143 else if (use_utf8)
2264 nigel 73 {
2265     while (start_offset + onechar < len)
2266     {
2267     int tb = bptr[start_offset+onechar];
2268     if (tb <= 127) break;
2269     tb &= 0xc0;
2270     if (tb != 0 && tb != 0xc0) onechar++;
2271     }
2272     }
2273     use_offsets[1] = start_offset + onechar;
2274 nigel 35 }
2275 nigel 41 else
2276     {
2277 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2278 nigel 41 {
2279 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2280 nigel 41 }
2281 nigel 73 else fprintf(outfile, "Error %d\n", count);
2282 nigel 41 break; /* Out of the /g loop */
2283     }
2284 nigel 3 }
2285 nigel 35
2286 nigel 39 /* If not /g or /G we are done */
2287    
2288     if (!do_g && !do_G) break;
2289    
2290 nigel 41 /* If we have matched an empty string, first check to see if we are at
2291     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2292     what Perl's /g options does. This turns out to be rather cunning. First
2293 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2294     same point. If this fails (picked up above) we advance to the next
2295 ph10 143 character. */
2296 ph10 142
2297 nigel 41 g_notempty = 0;
2298 ph10 142
2299 nigel 57 if (use_offsets[0] == use_offsets[1])
2300 nigel 41 {
2301 nigel 57 if (use_offsets[0] == len) break;
2302 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2303 nigel 41 }
2304 nigel 39
2305     /* For /g, update the start offset, leaving the rest alone */
2306    
2307 ph10 143 if (do_g) start_offset = use_offsets[1];
2308 nigel 39
2309     /* For /G, update the pointer and length */
2310    
2311     else
2312 nigel 35 {
2313 ph10 143 bptr += use_offsets[1];
2314     len -= use_offsets[1];
2315 nigel 35 }
2316 nigel 39 } /* End of loop for /g and /G */
2317 nigel 91
2318     NEXT_DATA: continue;
2319 nigel 39 } /* End of loop for data lines */
2320 nigel 3
2321 nigel 11 CONTINUE:
2322 nigel 37
2323     #if !defined NOPOSIX
2324 nigel 3 if (posix || do_posix) regfree(&preg);
2325 nigel 37 #endif
2326    
2327 nigel 77 if (re != NULL) new_free(re);
2328     if (extra != NULL) new_free(extra);
2329 nigel 25 if (tables != NULL)
2330     {
2331 nigel 77 new_free((void *)tables);
2332 nigel 25 setlocale(LC_CTYPE, "C");
2333 nigel 93 locale_set = 0;
2334 nigel 25 }
2335 nigel 3 }
2336    
2337 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2338 nigel 77
2339     EXIT:
2340    
2341     if (infile != NULL && infile != stdin) fclose(infile);
2342     if (outfile != NULL && outfile != stdout) fclose(outfile);
2343    
2344     free(buffer);
2345     free(dbuffer);
2346     free(pbuffer);
2347     free(offsets);
2348    
2349     return yield;
2350 nigel 3 }
2351    
2352 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12