/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 226 - (hide annotations) (download)
Tue Aug 21 11:46:08 2007 UTC (7 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 68848 byte(s)
Don't advance by 2 if explicit \r or \n in the pattern. Add 
PCRE_INFO_HASCRORLF.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 199 #include <config.h>
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97     #define _pcre_OP_lengths OP_lengths
98    
99     #include "pcre_tables.c"
100    
101     /* We also need the pcre_printint() function for printing out compiled
102     patterns. This function is in a separate file so that it can be included in
103 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
104 nigel 85
105 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
106     output character as-is or as a hex value when showing compiled patterns, is
107     contained in this file. We uses it here also, in cases when the locale has not
108     been explicitly changed, so as to get consistent output from systems that
109     differ in their output from isprint() even in the "C" locale. */
110    
111 nigel 85 #include "pcre_printint.src"
112    
113 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114 nigel 85
115 nigel 93
116 nigel 37 /* It is possible to compile this test program without including support for
117     testing the POSIX interface, though this is not available via the standard
118     Makefile. */
119    
120     #if !defined NOPOSIX
121 nigel 3 #include "pcreposix.h"
122 nigel 37 #endif
123 nigel 3
124 ph10 107 /* It is also possible, for the benefit of the version currently imported into
125     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126     interface to the DFA matcher (NODFA), and without the doublecheck of the old
127     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128     UTF8 support if PCRE is built without it. */
129 nigel 79
130 ph10 107 #ifndef SUPPORT_UTF8
131     #ifndef NOUTF8
132     #define NOUTF8
133     #endif
134     #endif
135 nigel 79
136 ph10 107
137 nigel 85 /* Other parameters */
138    
139 nigel 3 #ifndef CLOCKS_PER_SEC
140     #ifdef CLK_TCK
141     #define CLOCKS_PER_SEC CLK_TCK
142     #else
143     #define CLOCKS_PER_SEC 100
144     #endif
145     #endif
146    
147 nigel 93 /* This is the default loop count for timing. */
148    
149 nigel 75 #define LOOPREPEAT 500000
150 nigel 3
151 nigel 85 /* Static variables */
152    
153 nigel 3 static FILE *outfile;
154     static int log_store = 0;
155 nigel 63 static int callout_count;
156     static int callout_extra;
157     static int callout_fail_count;
158     static int callout_fail_id;
159 ph10 210 static int debug_lengths;
160 nigel 63 static int first_callout;
161 nigel 93 static int locale_set = 0;
162 nigel 73 static int show_malloc;
163 nigel 67 static int use_utf8;
164 nigel 43 static size_t gotten_store;
165 nigel 3
166 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
167    
168     static int buffer_size = 50000;
169     static uschar *buffer = NULL;
170     static uschar *dbuffer = NULL;
171 nigel 75 static uschar *pbuffer = NULL;
172 nigel 3
173 nigel 75
174 nigel 49
175     /*************************************************
176 nigel 91 * Read or extend an input line *
177     *************************************************/
178    
179     /* Input lines are read into buffer, but both patterns and data lines can be
180     continued over multiple input lines. In addition, if the buffer fills up, we
181     want to automatically expand it so as to be able to handle extremely large
182     lines that are needed for certain stress tests. When the input buffer is
183     expanded, the other two buffers must also be expanded likewise, and the
184     contents of pbuffer, which are a copy of the input for callouts, must be
185     preserved (for when expansion happens for a data line). This is not the most
186     optimal way of handling this, but hey, this is just a test program!
187    
188     Arguments:
189     f the file to read
190     start where in buffer to start (this *must* be within buffer)
191    
192     Returns: pointer to the start of new data
193     could be a copy of start, or could be moved
194     NULL if no data read and EOF reached
195     */
196    
197     static uschar *
198     extend_inputline(FILE *f, uschar *start)
199     {
200     uschar *here = start;
201    
202     for (;;)
203     {
204     int rlen = buffer_size - (here - buffer);
205 nigel 93
206 nigel 91 if (rlen > 1000)
207     {
208     int dlen;
209     if (fgets((char *)here, rlen, f) == NULL)
210     return (here == start)? NULL : start;
211     dlen = (int)strlen((char *)here);
212     if (dlen > 0 && here[dlen - 1] == '\n') return start;
213     here += dlen;
214     }
215    
216     else
217     {
218     int new_buffer_size = 2*buffer_size;
219     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222    
223     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224     {
225     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226     exit(1);
227     }
228    
229     memcpy(new_buffer, buffer, buffer_size);
230     memcpy(new_pbuffer, pbuffer, buffer_size);
231    
232     buffer_size = new_buffer_size;
233    
234     start = new_buffer + (start - buffer);
235     here = new_buffer + (here - buffer);
236    
237     free(buffer);
238     free(dbuffer);
239     free(pbuffer);
240    
241     buffer = new_buffer;
242     dbuffer = new_dbuffer;
243     pbuffer = new_pbuffer;
244     }
245     }
246    
247     return NULL; /* Control never gets here */
248     }
249    
250    
251    
252    
253    
254    
255    
256     /*************************************************
257 nigel 63 * Read number from string *
258     *************************************************/
259    
260     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261     around with conditional compilation, just do the job by hand. It is only used
262 nigel 93 for unpicking arguments, so just keep it simple.
263 nigel 63
264     Arguments:
265     str string to be converted
266     endptr where to put the end pointer
267    
268     Returns: the unsigned long
269     */
270    
271     static int
272     get_value(unsigned char *str, unsigned char **endptr)
273     {
274     int result = 0;
275     while(*str != 0 && isspace(*str)) str++;
276     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277     *endptr = str;
278     return(result);
279     }
280    
281    
282    
283 nigel 49
284     /*************************************************
285     * Convert UTF-8 string to value *
286     *************************************************/
287    
288     /* This function takes one or more bytes that represents a UTF-8 character,
289     and returns the value of the character.
290    
291     Argument:
292 nigel 91 utf8bytes a pointer to the byte vector
293     vptr a pointer to an int to receive the value
294 nigel 49
295 nigel 91 Returns: > 0 => the number of bytes consumed
296     -6 to 0 => malformed UTF-8 character at offset = (-return)
297 nigel 49 */
298    
299 nigel 79 #if !defined NOUTF8
300    
301 nigel 67 static int
302 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
303 nigel 49 {
304 nigel 91 int c = *utf8bytes++;
305 nigel 49 int d = c;
306     int i, j, s;
307    
308     for (i = -1; i < 6; i++) /* i is number of additional bytes */
309     {
310     if ((d & 0x80) == 0) break;
311     d <<= 1;
312     }
313    
314     if (i == -1) { *vptr = c; return 1; } /* ascii character */
315     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316    
317     /* i now has a value in the range 1-5 */
318    
319 nigel 59 s = 6*i;
320 nigel 85 d = (c & utf8_table3[i]) << s;
321 nigel 49
322     for (j = 0; j < i; j++)
323     {
324 nigel 91 c = *utf8bytes++;
325 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
326 nigel 59 s -= 6;
327 nigel 49 d |= (c & 0x3f) << s;
328     }
329    
330     /* Check that encoding was the correct unique one */
331    
332 nigel 85 for (j = 0; j < utf8_table1_size; j++)
333     if (d <= utf8_table1[j]) break;
334 nigel 49 if (j != i) return -(i+1);
335    
336     /* Valid value */
337    
338     *vptr = d;
339     return i+1;
340     }
341    
342 nigel 79 #endif
343 nigel 49
344    
345 nigel 79
346 nigel 63 /*************************************************
347 nigel 85 * Convert character value to UTF-8 *
348     *************************************************/
349    
350     /* This function takes an integer value in the range 0 - 0x7fffffff
351     and encodes it as a UTF-8 character in 0 to 6 bytes.
352    
353     Arguments:
354     cvalue the character value
355 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
356 nigel 85
357     Returns: number of characters placed in the buffer
358     */
359    
360 nigel 93 #if !defined NOUTF8
361    
362 nigel 85 static int
363 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
364 nigel 85 {
365     register int i, j;
366     for (i = 0; i < utf8_table1_size; i++)
367     if (cvalue <= utf8_table1[i]) break;
368 nigel 91 utf8bytes += i;
369 nigel 85 for (j = i; j > 0; j--)
370     {
371 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 nigel 85 cvalue >>= 6;
373     }
374 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
375 nigel 85 return i + 1;
376     }
377    
378 nigel 93 #endif
379 nigel 85
380    
381 nigel 93
382 nigel 85 /*************************************************
383 nigel 63 * Print character string *
384     *************************************************/
385 nigel 49
386 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
387     mode. Yields number of characters printed. If handed a NULL file, just counts
388     chars without printing. */
389 nigel 49
390 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
391 nigel 3 {
392 nigel 85 int c = 0;
393 nigel 63 int yield = 0;
394 nigel 3
395 nigel 63 while (length-- > 0)
396 nigel 3 {
397 nigel 79 #if !defined NOUTF8
398 nigel 67 if (use_utf8)
399 nigel 63 {
400     int rc = utf82ord(p, &c);
401 nigel 3
402 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403     {
404     length -= rc - 1;
405     p += rc;
406 nigel 93 if (PRINTHEX(c))
407 nigel 63 {
408     if (f != NULL) fprintf(f, "%c", c);
409     yield++;
410     }
411     else
412     {
413 nigel 93 int n = 4;
414     if (f != NULL) fprintf(f, "\\x{%02x}", c);
415     yield += (n <= 0x000000ff)? 2 :
416     (n <= 0x00000fff)? 3 :
417     (n <= 0x0000ffff)? 4 :
418     (n <= 0x000fffff)? 5 : 6;
419 nigel 63 }
420     continue;
421     }
422     }
423 nigel 79 #endif
424 nigel 3
425 nigel 63 /* Not UTF-8, or malformed UTF-8 */
426    
427 nigel 93 c = *p++;
428     if (PRINTHEX(c))
429 nigel 3 {
430 nigel 63 if (f != NULL) fprintf(f, "%c", c);
431     yield++;
432 nigel 3 }
433 nigel 63 else
434 nigel 3 {
435 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
436     yield += 4;
437     }
438     }
439 nigel 3
440 nigel 63 return yield;
441     }
442 nigel 23
443 nigel 3
444 nigel 23
445 nigel 63 /*************************************************
446     * Callout function *
447     *************************************************/
448 nigel 3
449 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450     the match. Yield zero unless more callouts than the fail count, or the callout
451     data is not zero. */
452 nigel 3
453 nigel 63 static int callout(pcre_callout_block *cb)
454     {
455     FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 nigel 75 int i, pre_start, post_start, subject_length;
457 nigel 3
458 nigel 63 if (callout_extra)
459     {
460     fprintf(f, "Callout %d: last capture = %d\n",
461     cb->callout_number, cb->capture_last);
462 nigel 3
463 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
464     {
465     if (cb->offset_vector[i] < 0)
466     fprintf(f, "%2d: <unset>\n", i/2);
467     else
468     {
469     fprintf(f, "%2d: ", i/2);
470     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471     cb->offset_vector[i+1] - cb->offset_vector[i], f);
472     fprintf(f, "\n");
473     }
474     }
475     }
476 nigel 3
477 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
478     datails. On subsequent calls in the same match, we use pchars just to find the
479     printed lengths of the substrings. */
480 nigel 3
481 nigel 63 if (f != NULL) fprintf(f, "--->");
482 nigel 3
483 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485     cb->current_position - cb->start_match, f);
486 nigel 3
487 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488    
489 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490     cb->subject_length - cb->current_position, f);
491 nigel 3
492 nigel 63 if (f != NULL) fprintf(f, "\n");
493 nigel 9
494 nigel 63 /* Always print appropriate indicators, with callout number if not already
495 nigel 75 shown. For automatic callouts, show the pattern offset. */
496 nigel 3
497 nigel 75 if (cb->callout_number == 255)
498     {
499     fprintf(outfile, "%+3d ", cb->pattern_position);
500     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501     }
502     else
503     {
504     if (callout_extra) fprintf(outfile, " ");
505     else fprintf(outfile, "%3d ", cb->callout_number);
506     }
507 nigel 3
508 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509     fprintf(outfile, "^");
510 nigel 3
511 nigel 63 if (post_start > 0)
512     {
513     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514     fprintf(outfile, "^");
515 nigel 3 }
516    
517 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518     fprintf(outfile, " ");
519    
520     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521     pbuffer + cb->pattern_position);
522    
523 nigel 63 fprintf(outfile, "\n");
524     first_callout = 0;
525 nigel 3
526 nigel 71 if (cb->callout_data != NULL)
527 nigel 49 {
528 nigel 71 int callout_data = *((int *)(cb->callout_data));
529     if (callout_data != 0)
530     {
531     fprintf(outfile, "Callout data = %d\n", callout_data);
532     return callout_data;
533     }
534 nigel 63 }
535 nigel 49
536 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
537     (++callout_count >= callout_fail_count)? 1 : 0;
538 nigel 3 }
539    
540    
541 nigel 63 /*************************************************
542 nigel 73 * Local malloc functions *
543 nigel 63 *************************************************/
544 nigel 3
545     /* Alternative malloc function, to test functionality and show the size of the
546     compiled re. */
547    
548     static void *new_malloc(size_t size)
549     {
550 nigel 73 void *block = malloc(size);
551 nigel 43 gotten_store = size;
552 nigel 73 if (show_malloc)
553 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 nigel 73 return block;
555 nigel 3 }
556    
557 nigel 73 static void new_free(void *block)
558     {
559     if (show_malloc)
560     fprintf(outfile, "free %p\n", block);
561     free(block);
562     }
563 nigel 3
564    
565 nigel 73 /* For recursion malloc/free, to test stacking calls */
566    
567     static void *stack_malloc(size_t size)
568     {
569     void *block = malloc(size);
570     if (show_malloc)
571 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 nigel 73 return block;
573     }
574    
575     static void stack_free(void *block)
576     {
577     if (show_malloc)
578     fprintf(outfile, "stack_free %p\n", block);
579     free(block);
580     }
581    
582    
583 nigel 63 /*************************************************
584     * Call pcre_fullinfo() *
585     *************************************************/
586 nigel 43
587     /* Get one piece of information from the pcre_fullinfo() function */
588    
589     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590     {
591     int rc;
592     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594     }
595    
596    
597    
598 nigel 63 /*************************************************
599 nigel 75 * Byte flipping function *
600     *************************************************/
601    
602 nigel 91 static unsigned long int
603     byteflip(unsigned long int value, int n)
604 nigel 75 {
605     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606     return ((value & 0x000000ff) << 24) |
607     ((value & 0x0000ff00) << 8) |
608     ((value & 0x00ff0000) >> 8) |
609     ((value & 0xff000000) >> 24);
610     }
611    
612    
613    
614    
615     /*************************************************
616 nigel 87 * Check match or recursion limit *
617     *************************************************/
618    
619     static int
620     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621     int start_offset, int options, int *use_offsets, int use_size_offsets,
622     int flag, unsigned long int *limit, int errnumber, const char *msg)
623     {
624     int count;
625     int min = 0;
626     int mid = 64;
627     int max = -1;
628    
629     extra->flags |= flag;
630    
631     for (;;)
632     {
633     *limit = mid;
634    
635     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636     use_offsets, use_size_offsets);
637    
638     if (count == errnumber)
639     {
640     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641     min = mid;
642     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643     }
644    
645     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646     count == PCRE_ERROR_PARTIAL)
647     {
648     if (mid == min + 1)
649     {
650     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651     break;
652     }
653     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654     max = mid;
655     mid = (min + mid)/2;
656     }
657     else break; /* Some other error */
658     }
659    
660     extra->flags &= ~flag;
661     return count;
662     }
663    
664    
665    
666     /*************************************************
667 nigel 91 * Check newline indicator *
668     *************************************************/
669    
670     /* This is used both at compile and run-time to check for <xxx> escapes, where
671 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
672     no match.
673 nigel 91
674     Arguments:
675     p points after the leading '<'
676     f file for error message
677    
678     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
679     */
680    
681     static int
682     check_newline(uschar *p, FILE *f)
683     {
684     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
685     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
686     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
687 ph10 149 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
688 nigel 93 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
689 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
690     return 0;
691     }
692    
693    
694    
695     /*************************************************
696 nigel 93 * Usage function *
697     *************************************************/
698    
699     static void
700     usage(void)
701     {
702     printf("Usage: pcretest [options] [<input> [<output>]]\n");
703     printf(" -b show compiled code (bytecode)\n");
704     printf(" -C show PCRE compile-time options and exit\n");
705     printf(" -d debug: show compiled code and information (-b and -i)\n");
706     #if !defined NODFA
707     printf(" -dfa force DFA matching for all subjects\n");
708     #endif
709     printf(" -help show usage information\n");
710     printf(" -i show information about compiled patterns\n"
711     " -m output memory used information\n"
712     " -o <n> set size of offsets vector to <n>\n");
713     #if !defined NOPOSIX
714     printf(" -p use POSIX interface\n");
715     #endif
716     printf(" -q quiet: do not output PCRE version number at start\n");
717     printf(" -S <n> set stack size to <n> megabytes\n");
718     printf(" -s output store (memory) used information\n"
719     " -t time compilation and execution\n");
720     printf(" -t <n> time compilation and execution, repeating <n> times\n");
721     printf(" -tm time execution (matching) only\n");
722     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
723     }
724    
725    
726    
727     /*************************************************
728 nigel 63 * Main Program *
729     *************************************************/
730 nigel 43
731 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
732     consist of a regular expression, in delimiters and optionally followed by
733     options, followed by a set of test data, terminated by an empty line. */
734    
735     int main(int argc, char **argv)
736     {
737     FILE *infile = stdin;
738     int options = 0;
739     int study_options = 0;
740     int op = 1;
741     int timeit = 0;
742 nigel 93 int timeitm = 0;
743 nigel 3 int showinfo = 0;
744 nigel 31 int showstore = 0;
745 nigel 87 int quiet = 0;
746 nigel 53 int size_offsets = 45;
747     int size_offsets_max;
748 nigel 77 int *offsets = NULL;
749 nigel 53 #if !defined NOPOSIX
750 nigel 3 int posix = 0;
751 nigel 53 #endif
752 nigel 3 int debug = 0;
753 nigel 11 int done = 0;
754 nigel 77 int all_use_dfa = 0;
755     int yield = 0;
756 nigel 91 int stack_size;
757 nigel 3
758 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
759     that 1024 is plenty long enough for the few names we'll be testing. */
760 nigel 69
761 nigel 91 uschar copynames[1024];
762     uschar getnames[1024];
763    
764     uschar *copynamesptr;
765     uschar *getnamesptr;
766    
767 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
768 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
769 nigel 69
770 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
771     dbuffer = (unsigned char *)malloc(buffer_size);
772     pbuffer = (unsigned char *)malloc(buffer_size);
773 nigel 69
774 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
775 nigel 3
776 nigel 93 outfile = stdout;
777    
778     /* The following _setmode() stuff is some Windows magic that tells its runtime
779     library to translate CRLF into a single LF character. At least, that's what
780     I've been told: never having used Windows I take this all on trust. Originally
781     it set 0x8000, but then I was advised that _O_BINARY was better. */
782    
783 nigel 75 #if defined(_WIN32) || defined(WIN32)
784 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
785     #endif
786 nigel 75
787 nigel 3 /* Scan options */
788    
789     while (argc > 1 && argv[op][0] == '-')
790     {
791 nigel 63 unsigned char *endptr;
792 nigel 53
793 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
794     showstore = 1;
795 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
796 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
797 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
798     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
799 nigel 79 #if !defined NODFA
800 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
801 nigel 79 #endif
802 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
803 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
804     *endptr == 0))
805 nigel 53 {
806     op++;
807     argc--;
808     }
809 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
810     {
811     int both = argv[op][2] == 0;
812     int temp;
813     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
814     *endptr == 0))
815     {
816     timeitm = temp;
817     op++;
818     argc--;
819     }
820     else timeitm = LOOPREPEAT;
821     if (both) timeit = timeitm;
822     }
823 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
824     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
825     *endptr == 0))
826     {
827 nigel 93 #if defined(_WIN32) || defined(WIN32)
828 nigel 91 printf("PCRE: -S not supported on this OS\n");
829     exit(1);
830     #else
831     int rc;
832     struct rlimit rlim;
833     getrlimit(RLIMIT_STACK, &rlim);
834     rlim.rlim_cur = stack_size * 1024 * 1024;
835     rc = setrlimit(RLIMIT_STACK, &rlim);
836     if (rc != 0)
837     {
838     printf("PCRE: setrlimit() failed with error %d\n", rc);
839     exit(1);
840     }
841     op++;
842     argc--;
843     #endif
844     }
845 nigel 53 #if !defined NOPOSIX
846 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
847 nigel 53 #endif
848 nigel 63 else if (strcmp(argv[op], "-C") == 0)
849     {
850     int rc;
851     printf("PCRE version %s\n", pcre_version());
852     printf("Compiled with\n");
853     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
854     printf(" %sUTF-8 support\n", rc? "" : "No ");
855 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
856     printf(" %sUnicode properties support\n", rc? "" : "No ");
857 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
858 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
859 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
860 ph10 150 (rc == -2)? "ANYCRLF" :
861 nigel 93 (rc == -1)? "ANY" : "???");
862 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
863     printf(" Internal link size = %d\n", rc);
864     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
865     printf(" POSIX malloc threshold = %d\n", rc);
866     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
867     printf(" Default match limit = %d\n", rc);
868 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
869     printf(" Default recursion depth limit = %d\n", rc);
870 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
871     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
872 ph10 121 goto EXIT;
873 nigel 63 }
874 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
875     strcmp(argv[op], "--help") == 0)
876     {
877     usage();
878     goto EXIT;
879     }
880 nigel 3 else
881     {
882 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
883 nigel 93 usage();
884 nigel 77 yield = 1;
885     goto EXIT;
886 nigel 3 }
887     op++;
888     argc--;
889     }
890    
891 nigel 53 /* Get the store for the offsets vector, and remember what it was */
892    
893     size_offsets_max = size_offsets;
894 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
895 nigel 53 if (offsets == NULL)
896     {
897     printf("** Failed to get %d bytes of memory for offsets vector\n",
898 ph10 151 (int)(size_offsets_max * sizeof(int)));
899 nigel 77 yield = 1;
900     goto EXIT;
901 nigel 53 }
902    
903 nigel 3 /* Sort out the input and output files */
904    
905     if (argc > 1)
906     {
907 nigel 93 infile = fopen(argv[op], INPUT_MODE);
908 nigel 3 if (infile == NULL)
909     {
910     printf("** Failed to open %s\n", argv[op]);
911 nigel 77 yield = 1;
912     goto EXIT;
913 nigel 3 }
914     }
915    
916     if (argc > 2)
917     {
918 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
919 nigel 3 if (outfile == NULL)
920     {
921     printf("** Failed to open %s\n", argv[op+1]);
922 nigel 77 yield = 1;
923     goto EXIT;
924 nigel 3 }
925     }
926    
927     /* Set alternative malloc function */
928    
929     pcre_malloc = new_malloc;
930 nigel 73 pcre_free = new_free;
931     pcre_stack_malloc = stack_malloc;
932     pcre_stack_free = stack_free;
933 nigel 3
934 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
935 nigel 3
936 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
937 nigel 3
938     /* Main loop */
939    
940 nigel 11 while (!done)
941 nigel 3 {
942     pcre *re = NULL;
943     pcre_extra *extra = NULL;
944 nigel 37
945     #if !defined NOPOSIX /* There are still compilers that require no indent */
946 nigel 3 regex_t preg;
947 nigel 45 int do_posix = 0;
948 nigel 37 #endif
949    
950 nigel 7 const char *error;
951 nigel 25 unsigned char *p, *pp, *ppp;
952 nigel 75 unsigned char *to_file = NULL;
953 nigel 53 const unsigned char *tables = NULL;
954 nigel 75 unsigned long int true_size, true_study_size = 0;
955     size_t size, regex_gotten_store;
956 nigel 3 int do_study = 0;
957 nigel 25 int do_debug = debug;
958 nigel 35 int do_G = 0;
959     int do_g = 0;
960 nigel 25 int do_showinfo = showinfo;
961 nigel 35 int do_showrest = 0;
962 nigel 75 int do_flip = 0;
963 nigel 93 int erroroffset, len, delimiter, poffset;
964 nigel 3
965 nigel 67 use_utf8 = 0;
966 ph10 211 debug_lengths = 1;
967 nigel 63
968 nigel 3 if (infile == stdin) printf(" re> ");
969 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
970 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
971 nigel 63 fflush(outfile);
972 nigel 3
973     p = buffer;
974     while (isspace(*p)) p++;
975     if (*p == 0) continue;
976    
977 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
978 nigel 3
979 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
980     {
981 nigel 91 unsigned long int magic, get_options;
982 nigel 75 uschar sbuf[8];
983     FILE *f;
984    
985     p++;
986     pp = p + (int)strlen((char *)p);
987     while (isspace(pp[-1])) pp--;
988     *pp = 0;
989    
990     f = fopen((char *)p, "rb");
991     if (f == NULL)
992     {
993     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
994     continue;
995     }
996    
997     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
998    
999     true_size =
1000     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1001     true_study_size =
1002     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1003    
1004     re = (real_pcre *)new_malloc(true_size);
1005     regex_gotten_store = gotten_store;
1006    
1007     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1008    
1009     magic = ((real_pcre *)re)->magic_number;
1010     if (magic != MAGIC_NUMBER)
1011     {
1012     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1013     {
1014     do_flip = 1;
1015     }
1016     else
1017     {
1018     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1019     fclose(f);
1020     continue;
1021     }
1022     }
1023    
1024     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1025     do_flip? " (byte-inverted)" : "", p);
1026    
1027     /* Need to know if UTF-8 for printing data strings */
1028    
1029 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1030     use_utf8 = (get_options & PCRE_UTF8) != 0;
1031 nigel 75
1032     /* Now see if there is any following study data */
1033    
1034     if (true_study_size != 0)
1035     {
1036     pcre_study_data *psd;
1037    
1038     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1039     extra->flags = PCRE_EXTRA_STUDY_DATA;
1040    
1041     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1042     extra->study_data = psd;
1043    
1044     if (fread(psd, 1, true_study_size, f) != true_study_size)
1045     {
1046     FAIL_READ:
1047     fprintf(outfile, "Failed to read data from %s\n", p);
1048     if (extra != NULL) new_free(extra);
1049     if (re != NULL) new_free(re);
1050     fclose(f);
1051     continue;
1052     }
1053     fprintf(outfile, "Study data loaded from %s\n", p);
1054     do_study = 1; /* To get the data output if requested */
1055     }
1056     else fprintf(outfile, "No study data\n");
1057    
1058     fclose(f);
1059     goto SHOW_INFO;
1060     }
1061    
1062     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1063     the pattern; if is isn't complete, read more. */
1064    
1065 nigel 3 delimiter = *p++;
1066    
1067 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1068 nigel 3 {
1069 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1070 nigel 3 goto SKIP_DATA;
1071     }
1072    
1073     pp = p;
1074 nigel 93 poffset = p - buffer;
1075 nigel 3
1076     for(;;)
1077     {
1078 nigel 29 while (*pp != 0)
1079     {
1080     if (*pp == '\\' && pp[1] != 0) pp++;
1081     else if (*pp == delimiter) break;
1082     pp++;
1083     }
1084 nigel 3 if (*pp != 0) break;
1085     if (infile == stdin) printf(" > ");
1086 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1087 nigel 3 {
1088     fprintf(outfile, "** Unexpected EOF\n");
1089 nigel 11 done = 1;
1090     goto CONTINUE;
1091 nigel 3 }
1092 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1093 nigel 3 }
1094    
1095 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1096     pointer to the correct relative point in the buffer. */
1097    
1098     p = buffer + poffset;
1099    
1100 nigel 29 /* If the first character after the delimiter is backslash, make
1101     the pattern end with backslash. This is purely to provide a way
1102     of testing for the error message when a pattern ends with backslash. */
1103    
1104     if (pp[1] == '\\') *pp++ = '\\';
1105    
1106 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1107     for callouts. */
1108 nigel 3
1109     *pp++ = 0;
1110 nigel 75 strcpy((char *)pbuffer, (char *)p);
1111 nigel 3
1112     /* Look for options after final delimiter */
1113    
1114     options = 0;
1115     study_options = 0;
1116 nigel 31 log_store = showstore; /* default from command line */
1117    
1118 nigel 3 while (*pp != 0)
1119     {
1120     switch (*pp++)
1121     {
1122 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1123 nigel 35 case 'g': do_g = 1; break;
1124 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1125     case 'm': options |= PCRE_MULTILINE; break;
1126     case 's': options |= PCRE_DOTALL; break;
1127     case 'x': options |= PCRE_EXTENDED; break;
1128 nigel 25
1129 nigel 35 case '+': do_showrest = 1; break;
1130 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1131 nigel 93 case 'B': do_debug = 1; break;
1132 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1133 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1134 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1135 nigel 75 case 'F': do_flip = 1; break;
1136 nigel 35 case 'G': do_G = 1; break;
1137 nigel 25 case 'I': do_showinfo = 1; break;
1138 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1139 nigel 31 case 'M': log_store = 1; break;
1140 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1141 nigel 37
1142     #if !defined NOPOSIX
1143 nigel 3 case 'P': do_posix = 1; break;
1144 nigel 37 #endif
1145    
1146 nigel 3 case 'S': do_study = 1; break;
1147 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1148 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1149 ph10 126 case 'Z': debug_lengths = 0; break;
1150 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1151 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1152 nigel 25
1153     case 'L':
1154     ppp = pp;
1155 nigel 93 /* The '\r' test here is so that it works on Windows. */
1156     /* The '0' test is just in case this is an unterminated line. */
1157     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1158 nigel 25 *ppp = 0;
1159     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1160     {
1161     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1162     goto SKIP_DATA;
1163     }
1164 nigel 93 locale_set = 1;
1165 nigel 25 tables = pcre_maketables();
1166     pp = ppp;
1167     break;
1168    
1169 nigel 75 case '>':
1170     to_file = pp;
1171     while (*pp != 0) pp++;
1172     while (isspace(pp[-1])) pp--;
1173     *pp = 0;
1174     break;
1175    
1176 nigel 91 case '<':
1177     {
1178     int x = check_newline(pp, outfile);
1179     if (x == 0) goto SKIP_DATA;
1180     options |= x;
1181     while (*pp++ != '>');
1182     }
1183     break;
1184    
1185 nigel 77 case '\r': /* So that it works in Windows */
1186     case '\n':
1187     case ' ':
1188     break;
1189 nigel 75
1190 nigel 3 default:
1191     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1192     goto SKIP_DATA;
1193     }
1194     }
1195    
1196 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1197 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1198     local character tables. */
1199 nigel 3
1200 nigel 37 #if !defined NOPOSIX
1201 nigel 3 if (posix || do_posix)
1202     {
1203     int rc;
1204     int cflags = 0;
1205 nigel 75
1206 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1207     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1208 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1209 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1210     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1211    
1212 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1213    
1214     /* Compilation failed; go back for another re, skipping to blank line
1215     if non-interactive. */
1216    
1217     if (rc != 0)
1218     {
1219 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1220 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1221     goto SKIP_DATA;
1222     }
1223     }
1224    
1225     /* Handle compiling via the native interface */
1226    
1227     else
1228 nigel 37 #endif /* !defined NOPOSIX */
1229    
1230 nigel 3 {
1231 nigel 93 if (timeit > 0)
1232 nigel 3 {
1233     register int i;
1234     clock_t time_taken;
1235     clock_t start_time = clock();
1236 nigel 93 for (i = 0; i < timeit; i++)
1237 nigel 3 {
1238 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1239 nigel 3 if (re != NULL) free(re);
1240     }
1241     time_taken = clock() - start_time;
1242 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1243     (((double)time_taken * 1000.0) / (double)timeit) /
1244 nigel 63 (double)CLOCKS_PER_SEC);
1245 nigel 3 }
1246    
1247 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1248 nigel 3
1249     /* Compilation failed; go back for another re, skipping to blank line
1250     if non-interactive. */
1251    
1252     if (re == NULL)
1253     {
1254     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1255     SKIP_DATA:
1256     if (infile != stdin)
1257     {
1258     for (;;)
1259     {
1260 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1261 nigel 11 {
1262     done = 1;
1263     goto CONTINUE;
1264     }
1265 nigel 3 len = (int)strlen((char *)buffer);
1266     while (len > 0 && isspace(buffer[len-1])) len--;
1267     if (len == 0) break;
1268     }
1269     fprintf(outfile, "\n");
1270     }
1271 nigel 25 goto CONTINUE;
1272 nigel 3 }
1273    
1274 nigel 43 /* Compilation succeeded; print data if required. There are now two
1275     info-returning functions. The old one has a limited interface and
1276     returns only limited data. Check that it agrees with the newer one. */
1277 nigel 3
1278 nigel 63 if (log_store)
1279     fprintf(outfile, "Memory allocation (code space): %d\n",
1280     (int)(gotten_store -
1281     sizeof(real_pcre) -
1282     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1283    
1284 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1285     and remember the store that was got. */
1286    
1287     true_size = ((real_pcre *)re)->size;
1288     regex_gotten_store = gotten_store;
1289    
1290     /* If /S was present, study the regexp to generate additional info to
1291     help with the matching. */
1292    
1293     if (do_study)
1294     {
1295 nigel 93 if (timeit > 0)
1296 nigel 75 {
1297     register int i;
1298     clock_t time_taken;
1299     clock_t start_time = clock();
1300 nigel 93 for (i = 0; i < timeit; i++)
1301 nigel 75 extra = pcre_study(re, study_options, &error);
1302     time_taken = clock() - start_time;
1303     if (extra != NULL) free(extra);
1304 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1305     (((double)time_taken * 1000.0) / (double)timeit) /
1306 nigel 75 (double)CLOCKS_PER_SEC);
1307     }
1308     extra = pcre_study(re, study_options, &error);
1309     if (error != NULL)
1310     fprintf(outfile, "Failed to study: %s\n", error);
1311     else if (extra != NULL)
1312     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1313     }
1314    
1315     /* If the 'F' option was present, we flip the bytes of all the integer
1316     fields in the regex data block and the study block. This is to make it
1317     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1318     compiled on a different architecture. */
1319    
1320     if (do_flip)
1321     {
1322     real_pcre *rre = (real_pcre *)re;
1323     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1324     rre->size = byteflip(rre->size, sizeof(rre->size));
1325     rre->options = byteflip(rre->options, sizeof(rre->options));
1326     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1327     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1328     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1329     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1330     rre->name_table_offset = byteflip(rre->name_table_offset,
1331     sizeof(rre->name_table_offset));
1332     rre->name_entry_size = byteflip(rre->name_entry_size,
1333     sizeof(rre->name_entry_size));
1334     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1335    
1336     if (extra != NULL)
1337     {
1338     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1339     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1340     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1341     }
1342     }
1343    
1344     /* Extract information from the compiled data if required */
1345    
1346     SHOW_INFO:
1347    
1348 nigel 93 if (do_debug)
1349     {
1350     fprintf(outfile, "------------------------------------------------------------------\n");
1351 ph10 116 pcre_printint(re, outfile, debug_lengths);
1352 nigel 93 }
1353    
1354 nigel 25 if (do_showinfo)
1355 nigel 3 {
1356 nigel 75 unsigned long int get_options, all_options;
1357 nigel 79 #if !defined NOINFOCHECK
1358 nigel 43 int old_first_char, old_options, old_count;
1359 nigel 79 #endif
1360 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1361     hascrorlf;
1362 nigel 63 int nameentrysize, namecount;
1363     const uschar *nametable;
1364 nigel 3
1365 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1366 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1367     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1368     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1369 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1370 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1371 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1372     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1373 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1374 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1375     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1376 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1377 nigel 43
1378 nigel 79 #if !defined NOINFOCHECK
1379 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1380 nigel 3 if (count < 0) fprintf(outfile,
1381 nigel 43 "Error %d from pcre_info()\n", count);
1382 nigel 3 else
1383     {
1384 nigel 43 if (old_count != count) fprintf(outfile,
1385     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1386     old_count);
1387 nigel 37
1388 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1389     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1390     first_char, old_first_char);
1391 nigel 37
1392 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1393     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1394     get_options, old_options);
1395 nigel 43 }
1396 nigel 79 #endif
1397 nigel 43
1398 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1399 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1400 nigel 77 (int)size, (int)regex_gotten_store);
1401 nigel 43
1402     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1403     if (backrefmax > 0)
1404     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1405 nigel 63
1406     if (namecount > 0)
1407     {
1408     fprintf(outfile, "Named capturing subpatterns:\n");
1409     while (namecount-- > 0)
1410     {
1411     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1412     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1413     GET2(nametable, 0));
1414     nametable += nameentrysize;
1415     }
1416     }
1417 ph10 172
1418 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1419 ph10 226 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1420 nigel 63
1421 nigel 75 all_options = ((real_pcre *)re)->options;
1422 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1423 nigel 75
1424 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1425 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1426 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1427     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1428     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1429     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1430 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1431 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1432     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1433     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1434     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1435 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1436 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1437 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1438     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1439 ph10 172
1440 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1441 nigel 43
1442 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1443 nigel 91 {
1444     case PCRE_NEWLINE_CR:
1445     fprintf(outfile, "Forced newline sequence: CR\n");
1446     break;
1447 nigel 43
1448 nigel 91 case PCRE_NEWLINE_LF:
1449     fprintf(outfile, "Forced newline sequence: LF\n");
1450     break;
1451    
1452     case PCRE_NEWLINE_CRLF:
1453     fprintf(outfile, "Forced newline sequence: CRLF\n");
1454     break;
1455    
1456 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1457     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1458     break;
1459    
1460 nigel 93 case PCRE_NEWLINE_ANY:
1461     fprintf(outfile, "Forced newline sequence: ANY\n");
1462     break;
1463    
1464 nigel 91 default:
1465     break;
1466     }
1467    
1468 nigel 43 if (first_char == -1)
1469     {
1470 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1471 nigel 43 }
1472     else if (first_char < 0)
1473     {
1474     fprintf(outfile, "No first char\n");
1475     }
1476     else
1477     {
1478 nigel 63 int ch = first_char & 255;
1479 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1480 nigel 63 "" : " (caseless)";
1481 nigel 93 if (PRINTHEX(ch))
1482 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1483 nigel 3 else
1484 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1485 nigel 43 }
1486 nigel 37
1487 nigel 43 if (need_char < 0)
1488     {
1489     fprintf(outfile, "No need char\n");
1490 nigel 3 }
1491 nigel 43 else
1492     {
1493 nigel 63 int ch = need_char & 255;
1494 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1495 nigel 63 "" : " (caseless)";
1496 nigel 93 if (PRINTHEX(ch))
1497 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1498 nigel 43 else
1499 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1500 nigel 43 }
1501 nigel 75
1502     /* Don't output study size; at present it is in any case a fixed
1503     value, but it varies, depending on the computer architecture, and
1504     so messes up the test suite. (And with the /F option, it might be
1505     flipped.) */
1506    
1507     if (do_study)
1508     {
1509     if (extra == NULL)
1510     fprintf(outfile, "Study returned NULL\n");
1511     else
1512     {
1513     uschar *start_bits = NULL;
1514     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1515    
1516     if (start_bits == NULL)
1517     fprintf(outfile, "No starting byte set\n");
1518     else
1519     {
1520     int i;
1521     int c = 24;
1522     fprintf(outfile, "Starting byte set: ");
1523     for (i = 0; i < 256; i++)
1524     {
1525     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1526     {
1527     if (c > 75)
1528     {
1529     fprintf(outfile, "\n ");
1530     c = 2;
1531     }
1532 nigel 93 if (PRINTHEX(i) && i != ' ')
1533 nigel 75 {
1534     fprintf(outfile, "%c ", i);
1535     c += 2;
1536     }
1537     else
1538     {
1539     fprintf(outfile, "\\x%02x ", i);
1540     c += 5;
1541     }
1542     }
1543     }
1544     fprintf(outfile, "\n");
1545     }
1546     }
1547     }
1548 nigel 3 }
1549    
1550 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1551     that is all. The first 8 bytes of the file are the regex length and then
1552     the study length, in big-endian order. */
1553 nigel 3
1554 nigel 75 if (to_file != NULL)
1555 nigel 3 {
1556 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1557     if (f == NULL)
1558 nigel 3 {
1559 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1560 nigel 3 }
1561 nigel 75 else
1562     {
1563     uschar sbuf[8];
1564     sbuf[0] = (true_size >> 24) & 255;
1565     sbuf[1] = (true_size >> 16) & 255;
1566     sbuf[2] = (true_size >> 8) & 255;
1567     sbuf[3] = (true_size) & 255;
1568 nigel 3
1569 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1570     sbuf[5] = (true_study_size >> 16) & 255;
1571     sbuf[6] = (true_study_size >> 8) & 255;
1572     sbuf[7] = (true_study_size) & 255;
1573 nigel 3
1574 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1575     fwrite(re, 1, true_size, f) < true_size)
1576     {
1577     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1578     }
1579 nigel 3 else
1580     {
1581 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1582     if (extra != NULL)
1583 nigel 3 {
1584 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1585     true_study_size)
1586 nigel 3 {
1587 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1588     strerror(errno));
1589 nigel 3 }
1590 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1591 nigel 93
1592 nigel 3 }
1593     }
1594 nigel 75 fclose(f);
1595 nigel 3 }
1596 nigel 77
1597     new_free(re);
1598     if (extra != NULL) new_free(extra);
1599     if (tables != NULL) new_free((void *)tables);
1600 nigel 75 continue; /* With next regex */
1601 nigel 3 }
1602 nigel 75 } /* End of non-POSIX compile */
1603 nigel 3
1604     /* Read data lines and test them */
1605    
1606     for (;;)
1607     {
1608 nigel 87 uschar *q;
1609 ph10 147 uschar *bptr;
1610 nigel 57 int *use_offsets = offsets;
1611 nigel 53 int use_size_offsets = size_offsets;
1612 nigel 63 int callout_data = 0;
1613     int callout_data_set = 0;
1614 nigel 3 int count, c;
1615 nigel 29 int copystrings = 0;
1616 nigel 63 int find_match_limit = 0;
1617 nigel 29 int getstrings = 0;
1618     int getlist = 0;
1619 nigel 39 int gmatched = 0;
1620 nigel 35 int start_offset = 0;
1621 nigel 41 int g_notempty = 0;
1622 nigel 77 int use_dfa = 0;
1623 nigel 3
1624     options = 0;
1625    
1626 nigel 91 *copynames = 0;
1627     *getnames = 0;
1628    
1629     copynamesptr = copynames;
1630     getnamesptr = getnames;
1631    
1632 nigel 63 pcre_callout = callout;
1633     first_callout = 1;
1634     callout_extra = 0;
1635     callout_count = 0;
1636     callout_fail_count = 999999;
1637     callout_fail_id = -1;
1638 nigel 73 show_malloc = 0;
1639 nigel 63
1640 nigel 91 if (extra != NULL) extra->flags &=
1641     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1642    
1643     len = 0;
1644     for (;;)
1645 nigel 11 {
1646 nigel 91 if (infile == stdin) printf("data> ");
1647     if (extend_inputline(infile, buffer + len) == NULL)
1648     {
1649     if (len > 0) break;
1650     done = 1;
1651     goto CONTINUE;
1652     }
1653     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1654     len = (int)strlen((char *)buffer);
1655     if (buffer[len-1] == '\n') break;
1656 nigel 11 }
1657 nigel 3
1658     while (len > 0 && isspace(buffer[len-1])) len--;
1659     buffer[len] = 0;
1660     if (len == 0) break;
1661    
1662     p = buffer;
1663     while (isspace(*p)) p++;
1664    
1665 ph10 147 bptr = q = dbuffer;
1666 nigel 3 while ((c = *p++) != 0)
1667     {
1668     int i = 0;
1669     int n = 0;
1670 nigel 63
1671 nigel 3 if (c == '\\') switch ((c = *p++))
1672     {
1673     case 'a': c = 7; break;
1674     case 'b': c = '\b'; break;
1675     case 'e': c = 27; break;
1676     case 'f': c = '\f'; break;
1677     case 'n': c = '\n'; break;
1678     case 'r': c = '\r'; break;
1679     case 't': c = '\t'; break;
1680     case 'v': c = '\v'; break;
1681    
1682     case '0': case '1': case '2': case '3':
1683     case '4': case '5': case '6': case '7':
1684     c -= '0';
1685     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1686     c = c * 8 + *p++ - '0';
1687 nigel 91
1688     #if !defined NOUTF8
1689     if (use_utf8 && c > 255)
1690     {
1691     unsigned char buff8[8];
1692     int ii, utn;
1693     utn = ord2utf8(c, buff8);
1694     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1695     c = buff8[ii]; /* Last byte */
1696     }
1697     #endif
1698 nigel 3 break;
1699    
1700     case 'x':
1701 nigel 49
1702     /* Handle \x{..} specially - new Perl thing for utf8 */
1703    
1704 nigel 79 #if !defined NOUTF8
1705 nigel 49 if (*p == '{')
1706     {
1707     unsigned char *pt = p;
1708     c = 0;
1709     while (isxdigit(*(++pt)))
1710     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1711     if (*pt == '}')
1712     {
1713 nigel 67 unsigned char buff8[8];
1714 nigel 49 int ii, utn;
1715 nigel 85 utn = ord2utf8(c, buff8);
1716 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1717     c = buff8[ii]; /* Last byte */
1718 nigel 49 p = pt + 1;
1719     break;
1720     }
1721     /* Not correct form; fall through */
1722     }
1723 nigel 79 #endif
1724 nigel 49
1725     /* Ordinary \x */
1726    
1727 nigel 3 c = 0;
1728     while (i++ < 2 && isxdigit(*p))
1729     {
1730     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1731     p++;
1732     }
1733     break;
1734    
1735 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1736 nigel 3 p--;
1737     continue;
1738    
1739 nigel 75 case '>':
1740     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1741     continue;
1742    
1743 nigel 3 case 'A': /* Option setting */
1744     options |= PCRE_ANCHORED;
1745     continue;
1746    
1747     case 'B':
1748     options |= PCRE_NOTBOL;
1749     continue;
1750    
1751 nigel 29 case 'C':
1752 nigel 63 if (isdigit(*p)) /* Set copy string */
1753     {
1754     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1755     copystrings |= 1 << n;
1756     }
1757     else if (isalnum(*p))
1758     {
1759 nigel 91 uschar *npp = copynamesptr;
1760 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1761 nigel 91 *npp++ = 0;
1762 nigel 67 *npp = 0;
1763 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1764 nigel 63 if (n < 0)
1765 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1766     copynamesptr = npp;
1767 nigel 63 }
1768     else if (*p == '+')
1769     {
1770     callout_extra = 1;
1771     p++;
1772     }
1773     else if (*p == '-')
1774     {
1775     pcre_callout = NULL;
1776     p++;
1777     }
1778     else if (*p == '!')
1779     {
1780     callout_fail_id = 0;
1781     p++;
1782     while(isdigit(*p))
1783     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1784     callout_fail_count = 0;
1785     if (*p == '!')
1786     {
1787     p++;
1788     while(isdigit(*p))
1789     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1790     }
1791     }
1792     else if (*p == '*')
1793     {
1794     int sign = 1;
1795     callout_data = 0;
1796     if (*(++p) == '-') { sign = -1; p++; }
1797     while(isdigit(*p))
1798     callout_data = callout_data * 10 + *p++ - '0';
1799     callout_data *= sign;
1800     callout_data_set = 1;
1801     }
1802 nigel 29 continue;
1803    
1804 nigel 79 #if !defined NODFA
1805 nigel 77 case 'D':
1806 nigel 79 #if !defined NOPOSIX
1807 nigel 77 if (posix || do_posix)
1808     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1809     else
1810 nigel 79 #endif
1811 nigel 77 use_dfa = 1;
1812     continue;
1813    
1814     case 'F':
1815     options |= PCRE_DFA_SHORTEST;
1816     continue;
1817 nigel 79 #endif
1818 nigel 77
1819 nigel 29 case 'G':
1820 nigel 63 if (isdigit(*p))
1821     {
1822     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1823     getstrings |= 1 << n;
1824     }
1825     else if (isalnum(*p))
1826     {
1827 nigel 91 uschar *npp = getnamesptr;
1828 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1829 nigel 91 *npp++ = 0;
1830 nigel 67 *npp = 0;
1831 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1832 nigel 63 if (n < 0)
1833 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1834     getnamesptr = npp;
1835 nigel 63 }
1836 nigel 29 continue;
1837    
1838     case 'L':
1839     getlist = 1;
1840     continue;
1841    
1842 nigel 63 case 'M':
1843     find_match_limit = 1;
1844     continue;
1845    
1846 nigel 37 case 'N':
1847     options |= PCRE_NOTEMPTY;
1848     continue;
1849    
1850 nigel 3 case 'O':
1851     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1852 nigel 53 if (n > size_offsets_max)
1853     {
1854     size_offsets_max = n;
1855 nigel 57 free(offsets);
1856 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1857 nigel 53 if (offsets == NULL)
1858     {
1859     printf("** Failed to get %d bytes of memory for offsets vector\n",
1860 ph10 151 (int)(size_offsets_max * sizeof(int)));
1861 nigel 77 yield = 1;
1862     goto EXIT;
1863 nigel 53 }
1864     }
1865     use_size_offsets = n;
1866 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1867 nigel 3 continue;
1868    
1869 nigel 75 case 'P':
1870     options |= PCRE_PARTIAL;
1871     continue;
1872    
1873 nigel 91 case 'Q':
1874     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1875     if (extra == NULL)
1876     {
1877     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1878     extra->flags = 0;
1879     }
1880     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1881     extra->match_limit_recursion = n;
1882     continue;
1883    
1884     case 'q':
1885     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1886     if (extra == NULL)
1887     {
1888     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1889     extra->flags = 0;
1890     }
1891     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1892     extra->match_limit = n;
1893     continue;
1894    
1895 nigel 79 #if !defined NODFA
1896 nigel 77 case 'R':
1897     options |= PCRE_DFA_RESTART;
1898     continue;
1899 nigel 79 #endif
1900 nigel 77
1901 nigel 73 case 'S':
1902     show_malloc = 1;
1903     continue;
1904    
1905 nigel 3 case 'Z':
1906     options |= PCRE_NOTEOL;
1907     continue;
1908 nigel 71
1909     case '?':
1910     options |= PCRE_NO_UTF8_CHECK;
1911     continue;
1912 nigel 91
1913     case '<':
1914     {
1915     int x = check_newline(p, outfile);
1916     if (x == 0) goto NEXT_DATA;
1917     options |= x;
1918     while (*p++ != '>');
1919     }
1920     continue;
1921 nigel 3 }
1922 nigel 9 *q++ = c;
1923 nigel 3 }
1924 nigel 9 *q = 0;
1925     len = q - dbuffer;
1926 nigel 3
1927 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1928     {
1929     printf("**Match limit not relevant for DFA matching: ignored\n");
1930     find_match_limit = 0;
1931     }
1932    
1933 nigel 3 /* Handle matching via the POSIX interface, which does not
1934 nigel 63 support timing or playing with the match limit or callout data. */
1935 nigel 3
1936 nigel 37 #if !defined NOPOSIX
1937 nigel 3 if (posix || do_posix)
1938     {
1939     int rc;
1940     int eflags = 0;
1941 nigel 63 regmatch_t *pmatch = NULL;
1942     if (use_size_offsets > 0)
1943 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1944 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1945     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1946    
1947 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1948 nigel 3
1949     if (rc != 0)
1950     {
1951 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1952 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1953     }
1954 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1955     != 0)
1956     {
1957     fprintf(outfile, "Matched with REG_NOSUB\n");
1958     }
1959 nigel 3 else
1960     {
1961 nigel 7 size_t i;
1962 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1963 nigel 3 {
1964     if (pmatch[i].rm_so >= 0)
1965     {
1966 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1967 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1968     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1969 nigel 3 fprintf(outfile, "\n");
1970 nigel 35 if (i == 0 && do_showrest)
1971     {
1972     fprintf(outfile, " 0+ ");
1973 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1974     outfile);
1975 nigel 35 fprintf(outfile, "\n");
1976     }
1977 nigel 3 }
1978     }
1979     }
1980 nigel 53 free(pmatch);
1981 nigel 3 }
1982    
1983 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1984 nigel 3
1985 nigel 37 else
1986     #endif /* !defined NOPOSIX */
1987    
1988 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1989 nigel 3 {
1990 nigel 93 if (timeitm > 0)
1991 nigel 3 {
1992     register int i;
1993     clock_t time_taken;
1994     clock_t start_time = clock();
1995 nigel 77
1996 nigel 79 #if !defined NODFA
1997 nigel 77 if (all_use_dfa || use_dfa)
1998     {
1999     int workspace[1000];
2000 nigel 93 for (i = 0; i < timeitm; i++)
2001 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2002     options | g_notempty, use_offsets, use_size_offsets, workspace,
2003     sizeof(workspace)/sizeof(int));
2004     }
2005     else
2006 nigel 79 #endif
2007 nigel 77
2008 nigel 93 for (i = 0; i < timeitm; i++)
2009 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2010 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2011 nigel 77
2012 nigel 3 time_taken = clock() - start_time;
2013 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2014     (((double)time_taken * 1000.0) / (double)timeitm) /
2015 nigel 63 (double)CLOCKS_PER_SEC);
2016 nigel 3 }
2017    
2018 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2019 nigel 87 varying limits in order to find the minimum value for the match limit and
2020     for the recursion limit. */
2021 nigel 63
2022     if (find_match_limit)
2023     {
2024     if (extra == NULL)
2025     {
2026 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2027 nigel 63 extra->flags = 0;
2028     }
2029    
2030 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2031 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2032     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2033     PCRE_ERROR_MATCHLIMIT, "match()");
2034 nigel 63
2035 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2036     options|g_notempty, use_offsets, use_size_offsets,
2037     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2038     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2039 nigel 63 }
2040    
2041     /* If callout_data is set, use the interface with additional data */
2042    
2043     else if (callout_data_set)
2044     {
2045     if (extra == NULL)
2046     {
2047 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2048 nigel 63 extra->flags = 0;
2049     }
2050     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2051 nigel 71 extra->callout_data = &callout_data;
2052 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2053     options | g_notempty, use_offsets, use_size_offsets);
2054     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2055     }
2056    
2057     /* The normal case is just to do the match once, with the default
2058     value of match_limit. */
2059    
2060 nigel 79 #if !defined NODFA
2061 nigel 77 else if (all_use_dfa || use_dfa)
2062     {
2063     int workspace[1000];
2064     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2065     options | g_notempty, use_offsets, use_size_offsets, workspace,
2066     sizeof(workspace)/sizeof(int));
2067     if (count == 0)
2068     {
2069     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2070     count = use_size_offsets/2;
2071     }
2072     }
2073 nigel 79 #endif
2074 nigel 77
2075 nigel 75 else
2076     {
2077     count = pcre_exec(re, extra, (char *)bptr, len,
2078     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2079 nigel 77 if (count == 0)
2080     {
2081     fprintf(outfile, "Matched, but too many substrings\n");
2082     count = use_size_offsets/3;
2083     }
2084 nigel 75 }
2085 nigel 3
2086 nigel 39 /* Matched */
2087    
2088 nigel 3 if (count >= 0)
2089     {
2090 nigel 93 int i, maxcount;
2091    
2092     #if !defined NODFA
2093     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2094     #endif
2095     maxcount = use_size_offsets/3;
2096    
2097     /* This is a check against a lunatic return value. */
2098    
2099     if (count > maxcount)
2100     {
2101     fprintf(outfile,
2102     "** PCRE error: returned count %d is too big for offset size %d\n",
2103     count, use_size_offsets);
2104     count = use_size_offsets/3;
2105     if (do_g || do_G)
2106     {
2107     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2108     do_g = do_G = FALSE; /* Break g/G loop */
2109     }
2110     }
2111    
2112 nigel 29 for (i = 0; i < count * 2; i += 2)
2113 nigel 3 {
2114 nigel 57 if (use_offsets[i] < 0)
2115 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2116     else
2117     {
2118     fprintf(outfile, "%2d: ", i/2);
2119 nigel 63 (void)pchars(bptr + use_offsets[i],
2120     use_offsets[i+1] - use_offsets[i], outfile);
2121 nigel 3 fprintf(outfile, "\n");
2122 nigel 35 if (i == 0)
2123     {
2124     if (do_showrest)
2125     {
2126     fprintf(outfile, " 0+ ");
2127 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2128     outfile);
2129 nigel 35 fprintf(outfile, "\n");
2130     }
2131     }
2132 nigel 3 }
2133     }
2134 nigel 29
2135     for (i = 0; i < 32; i++)
2136     {
2137     if ((copystrings & (1 << i)) != 0)
2138     {
2139 nigel 91 char copybuffer[256];
2140 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2141 nigel 37 i, copybuffer, sizeof(copybuffer));
2142 nigel 29 if (rc < 0)
2143     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2144     else
2145 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2146 nigel 29 }
2147     }
2148    
2149 nigel 91 for (copynamesptr = copynames;
2150     *copynamesptr != 0;
2151     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2152     {
2153     char copybuffer[256];
2154     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2155     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2156     if (rc < 0)
2157     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2158     else
2159     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2160     }
2161    
2162 nigel 29 for (i = 0; i < 32; i++)
2163     {
2164     if ((getstrings & (1 << i)) != 0)
2165     {
2166     const char *substring;
2167 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2168 nigel 29 i, &substring);
2169     if (rc < 0)
2170     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2171     else
2172     {
2173     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2174 nigel 49 pcre_free_substring(substring);
2175 nigel 29 }
2176     }
2177     }
2178    
2179 nigel 91 for (getnamesptr = getnames;
2180     *getnamesptr != 0;
2181     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2182     {
2183     const char *substring;
2184     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2185     count, (char *)getnamesptr, &substring);
2186     if (rc < 0)
2187     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2188     else
2189     {
2190     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2191     pcre_free_substring(substring);
2192     }
2193     }
2194    
2195 nigel 29 if (getlist)
2196     {
2197     const char **stringlist;
2198 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2199 nigel 29 &stringlist);
2200     if (rc < 0)
2201     fprintf(outfile, "get substring list failed %d\n", rc);
2202     else
2203     {
2204     for (i = 0; i < count; i++)
2205     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2206     if (stringlist[i] != NULL)
2207     fprintf(outfile, "string list not terminated by NULL\n");
2208 nigel 49 /* free((void *)stringlist); */
2209     pcre_free_substring_list(stringlist);
2210 nigel 29 }
2211     }
2212 nigel 39 }
2213 nigel 29
2214 nigel 75 /* There was a partial match */
2215    
2216     else if (count == PCRE_ERROR_PARTIAL)
2217     {
2218 nigel 77 fprintf(outfile, "Partial match");
2219 nigel 79 #if !defined NODFA
2220 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2221     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2222     bptr + use_offsets[0]);
2223 nigel 79 #endif
2224 nigel 77 fprintf(outfile, "\n");
2225 nigel 75 break; /* Out of the /g loop */
2226     }
2227    
2228 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2229 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2230     to advance the start offset, and continue. We won't be at the end of the
2231     string - that was checked before setting g_notempty.
2232 nigel 39
2233 ph10 150 Complication arises in the case when the newline option is "any" or
2234 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2235     CRLF, an advance of one character just passes the \r, whereas we should
2236     prefer the longer newline sequence, as does the code in pcre_exec().
2237     Fudge the offset value to achieve this.
2238 ph10 144
2239 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2240     character, not one byte. */
2241    
2242 nigel 3 else
2243     {
2244 nigel 41 if (g_notempty != 0)
2245 nigel 35 {
2246 nigel 73 int onechar = 1;
2247 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2248 nigel 57 use_offsets[0] = start_offset;
2249 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2250     {
2251     int d;
2252     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2253     obits = (d == '\r')? PCRE_NEWLINE_CR :
2254     (d == '\n')? PCRE_NEWLINE_LF :
2255     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2256 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2257 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2258     }
2259 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2260 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2261 ph10 149 &&
2262 ph10 143 start_offset < len - 1 &&
2263     bptr[start_offset] == '\r' &&
2264     bptr[start_offset+1] == '\n')
2265 ph10 144 onechar++;
2266 ph10 143 else if (use_utf8)
2267 nigel 73 {
2268     while (start_offset + onechar < len)
2269     {
2270     int tb = bptr[start_offset+onechar];
2271     if (tb <= 127) break;
2272     tb &= 0xc0;
2273     if (tb != 0 && tb != 0xc0) onechar++;
2274     }
2275     }
2276     use_offsets[1] = start_offset + onechar;
2277 nigel 35 }
2278 nigel 41 else
2279     {
2280 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2281 nigel 41 {
2282 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2283 nigel 41 }
2284 nigel 73 else fprintf(outfile, "Error %d\n", count);
2285 nigel 41 break; /* Out of the /g loop */
2286     }
2287 nigel 3 }
2288 nigel 35
2289 nigel 39 /* If not /g or /G we are done */
2290    
2291     if (!do_g && !do_G) break;
2292    
2293 nigel 41 /* If we have matched an empty string, first check to see if we are at
2294     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2295     what Perl's /g options does. This turns out to be rather cunning. First
2296 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2297     same point. If this fails (picked up above) we advance to the next
2298 ph10 143 character. */
2299 ph10 142
2300 nigel 41 g_notempty = 0;
2301 ph10 142
2302 nigel 57 if (use_offsets[0] == use_offsets[1])
2303 nigel 41 {
2304 nigel 57 if (use_offsets[0] == len) break;
2305 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2306 nigel 41 }
2307 nigel 39
2308     /* For /g, update the start offset, leaving the rest alone */
2309    
2310 ph10 143 if (do_g) start_offset = use_offsets[1];
2311 nigel 39
2312     /* For /G, update the pointer and length */
2313    
2314     else
2315 nigel 35 {
2316 ph10 143 bptr += use_offsets[1];
2317     len -= use_offsets[1];
2318 nigel 35 }
2319 nigel 39 } /* End of loop for /g and /G */
2320 nigel 91
2321     NEXT_DATA: continue;
2322 nigel 39 } /* End of loop for data lines */
2323 nigel 3
2324 nigel 11 CONTINUE:
2325 nigel 37
2326     #if !defined NOPOSIX
2327 nigel 3 if (posix || do_posix) regfree(&preg);
2328 nigel 37 #endif
2329    
2330 nigel 77 if (re != NULL) new_free(re);
2331     if (extra != NULL) new_free(extra);
2332 nigel 25 if (tables != NULL)
2333     {
2334 nigel 77 new_free((void *)tables);
2335 nigel 25 setlocale(LC_CTYPE, "C");
2336 nigel 93 locale_set = 0;
2337 nigel 25 }
2338 nigel 3 }
2339    
2340 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2341 nigel 77
2342     EXIT:
2343    
2344     if (infile != NULL && infile != stdin) fclose(infile);
2345     if (outfile != NULL && outfile != stdout) fclose(outfile);
2346    
2347     free(buffer);
2348     free(dbuffer);
2349     free(pbuffer);
2350     free(offsets);
2351    
2352     return yield;
2353 nigel 3 }
2354    
2355 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12