/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 230 - (hide annotations) (download)
Mon Sep 10 13:23:56 2007 UTC (7 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 69401 byte(s)
(1) Move internal flags out of the options field, to make room.
(2) \r and \n must be explicit to trigger the special CRLF handline exception.
(3) (?J) at the start now sets JCHANGED as well as DUPNAMES.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 199 #include <config.h>
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 nigel 93
52     /* A number of things vary for Windows builds. Originally, pcretest opened its
53     input and output without "b"; then I was told that "b" was needed in some
54     environments, so it was added for release 5.0 to both the input and output. (It
55     makes no difference on Unix-like systems.) Later I was told that it is wrong
56     for the input on Windows. I've now abstracted the modes into two macros that
57     are set here, to make it easier to fiddle with them, and removed "b" from the
58     input mode under Windows. */
59    
60     #if defined(_WIN32) || defined(WIN32)
61     #include <io.h> /* For _setmode() */
62     #include <fcntl.h> /* For _O_BINARY */
63     #define INPUT_MODE "r"
64     #define OUTPUT_MODE "wb"
65    
66     #else
67     #include <sys/time.h> /* These two includes are needed */
68     #include <sys/resource.h> /* for setrlimit(). */
69     #define INPUT_MODE "rb"
70     #define OUTPUT_MODE "wb"
71 nigel 91 #endif
72    
73 nigel 93
74 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
75     displaying the results of pcre_study() and we also need to know about the
76     internal macros, structures, and other internal data values; pcretest has
77     "inside information" compared to a program that strictly follows the PCRE API.
78 nigel 37
79 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81     appropriately for an application, not for building PCRE. */
82 nigel 77
83 ph10 145 #include "pcre.h"
84 nigel 77 #include "pcre_internal.h"
85    
86 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
87     two copies, we include the source file here, changing the names of the external
88     symbols to prevent clashes. */
89 nigel 77
90 nigel 85 #define _pcre_utf8_table1 utf8_table1
91     #define _pcre_utf8_table1_size utf8_table1_size
92     #define _pcre_utf8_table2 utf8_table2
93     #define _pcre_utf8_table3 utf8_table3
94     #define _pcre_utf8_table4 utf8_table4
95     #define _pcre_utt utt
96     #define _pcre_utt_size utt_size
97     #define _pcre_OP_lengths OP_lengths
98    
99     #include "pcre_tables.c"
100    
101     /* We also need the pcre_printint() function for printing out compiled
102     patterns. This function is in a separate file so that it can be included in
103 nigel 93 pcre_compile.c when that module is compiled with debugging enabled.
104 nigel 85
105 nigel 93 The definition of the macro PRINTABLE, which determines whether to print an
106     output character as-is or as a hex value when showing compiled patterns, is
107     contained in this file. We uses it here also, in cases when the locale has not
108     been explicitly changed, so as to get consistent output from systems that
109     differ in their output from isprint() even in the "C" locale. */
110    
111 nigel 85 #include "pcre_printint.src"
112    
113 nigel 93 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114 nigel 85
115 nigel 93
116 nigel 37 /* It is possible to compile this test program without including support for
117     testing the POSIX interface, though this is not available via the standard
118     Makefile. */
119    
120     #if !defined NOPOSIX
121 nigel 3 #include "pcreposix.h"
122 nigel 37 #endif
123 nigel 3
124 ph10 107 /* It is also possible, for the benefit of the version currently imported into
125     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126     interface to the DFA matcher (NODFA), and without the doublecheck of the old
127     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128     UTF8 support if PCRE is built without it. */
129 nigel 79
130 ph10 107 #ifndef SUPPORT_UTF8
131     #ifndef NOUTF8
132     #define NOUTF8
133     #endif
134     #endif
135 nigel 79
136 ph10 107
137 nigel 85 /* Other parameters */
138    
139 nigel 3 #ifndef CLOCKS_PER_SEC
140     #ifdef CLK_TCK
141     #define CLOCKS_PER_SEC CLK_TCK
142     #else
143     #define CLOCKS_PER_SEC 100
144     #endif
145     #endif
146    
147 nigel 93 /* This is the default loop count for timing. */
148    
149 nigel 75 #define LOOPREPEAT 500000
150 nigel 3
151 nigel 85 /* Static variables */
152    
153 nigel 3 static FILE *outfile;
154     static int log_store = 0;
155 nigel 63 static int callout_count;
156     static int callout_extra;
157     static int callout_fail_count;
158     static int callout_fail_id;
159 ph10 210 static int debug_lengths;
160 nigel 63 static int first_callout;
161 nigel 93 static int locale_set = 0;
162 nigel 73 static int show_malloc;
163 nigel 67 static int use_utf8;
164 nigel 43 static size_t gotten_store;
165 nigel 3
166 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
167    
168     static int buffer_size = 50000;
169     static uschar *buffer = NULL;
170     static uschar *dbuffer = NULL;
171 nigel 75 static uschar *pbuffer = NULL;
172 nigel 3
173 nigel 75
174 nigel 49
175     /*************************************************
176 nigel 91 * Read or extend an input line *
177     *************************************************/
178    
179     /* Input lines are read into buffer, but both patterns and data lines can be
180     continued over multiple input lines. In addition, if the buffer fills up, we
181     want to automatically expand it so as to be able to handle extremely large
182     lines that are needed for certain stress tests. When the input buffer is
183     expanded, the other two buffers must also be expanded likewise, and the
184     contents of pbuffer, which are a copy of the input for callouts, must be
185     preserved (for when expansion happens for a data line). This is not the most
186     optimal way of handling this, but hey, this is just a test program!
187    
188     Arguments:
189     f the file to read
190     start where in buffer to start (this *must* be within buffer)
191    
192     Returns: pointer to the start of new data
193     could be a copy of start, or could be moved
194     NULL if no data read and EOF reached
195     */
196    
197     static uschar *
198     extend_inputline(FILE *f, uschar *start)
199     {
200     uschar *here = start;
201    
202     for (;;)
203     {
204     int rlen = buffer_size - (here - buffer);
205 nigel 93
206 nigel 91 if (rlen > 1000)
207     {
208     int dlen;
209     if (fgets((char *)here, rlen, f) == NULL)
210     return (here == start)? NULL : start;
211     dlen = (int)strlen((char *)here);
212     if (dlen > 0 && here[dlen - 1] == '\n') return start;
213     here += dlen;
214     }
215    
216     else
217     {
218     int new_buffer_size = 2*buffer_size;
219     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222    
223     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224     {
225     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226     exit(1);
227     }
228    
229     memcpy(new_buffer, buffer, buffer_size);
230     memcpy(new_pbuffer, pbuffer, buffer_size);
231    
232     buffer_size = new_buffer_size;
233    
234     start = new_buffer + (start - buffer);
235     here = new_buffer + (here - buffer);
236    
237     free(buffer);
238     free(dbuffer);
239     free(pbuffer);
240    
241     buffer = new_buffer;
242     dbuffer = new_dbuffer;
243     pbuffer = new_pbuffer;
244     }
245     }
246    
247     return NULL; /* Control never gets here */
248     }
249    
250    
251    
252    
253    
254    
255    
256     /*************************************************
257 nigel 63 * Read number from string *
258     *************************************************/
259    
260     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261     around with conditional compilation, just do the job by hand. It is only used
262 nigel 93 for unpicking arguments, so just keep it simple.
263 nigel 63
264     Arguments:
265     str string to be converted
266     endptr where to put the end pointer
267    
268     Returns: the unsigned long
269     */
270    
271     static int
272     get_value(unsigned char *str, unsigned char **endptr)
273     {
274     int result = 0;
275     while(*str != 0 && isspace(*str)) str++;
276     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277     *endptr = str;
278     return(result);
279     }
280    
281    
282    
283 nigel 49
284     /*************************************************
285     * Convert UTF-8 string to value *
286     *************************************************/
287    
288     /* This function takes one or more bytes that represents a UTF-8 character,
289     and returns the value of the character.
290    
291     Argument:
292 nigel 91 utf8bytes a pointer to the byte vector
293     vptr a pointer to an int to receive the value
294 nigel 49
295 nigel 91 Returns: > 0 => the number of bytes consumed
296     -6 to 0 => malformed UTF-8 character at offset = (-return)
297 nigel 49 */
298    
299 nigel 79 #if !defined NOUTF8
300    
301 nigel 67 static int
302 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
303 nigel 49 {
304 nigel 91 int c = *utf8bytes++;
305 nigel 49 int d = c;
306     int i, j, s;
307    
308     for (i = -1; i < 6; i++) /* i is number of additional bytes */
309     {
310     if ((d & 0x80) == 0) break;
311     d <<= 1;
312     }
313    
314     if (i == -1) { *vptr = c; return 1; } /* ascii character */
315     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316    
317     /* i now has a value in the range 1-5 */
318    
319 nigel 59 s = 6*i;
320 nigel 85 d = (c & utf8_table3[i]) << s;
321 nigel 49
322     for (j = 0; j < i; j++)
323     {
324 nigel 91 c = *utf8bytes++;
325 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
326 nigel 59 s -= 6;
327 nigel 49 d |= (c & 0x3f) << s;
328     }
329    
330     /* Check that encoding was the correct unique one */
331    
332 nigel 85 for (j = 0; j < utf8_table1_size; j++)
333     if (d <= utf8_table1[j]) break;
334 nigel 49 if (j != i) return -(i+1);
335    
336     /* Valid value */
337    
338     *vptr = d;
339     return i+1;
340     }
341    
342 nigel 79 #endif
343 nigel 49
344    
345 nigel 79
346 nigel 63 /*************************************************
347 nigel 85 * Convert character value to UTF-8 *
348     *************************************************/
349    
350     /* This function takes an integer value in the range 0 - 0x7fffffff
351     and encodes it as a UTF-8 character in 0 to 6 bytes.
352    
353     Arguments:
354     cvalue the character value
355 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
356 nigel 85
357     Returns: number of characters placed in the buffer
358     */
359    
360 nigel 93 #if !defined NOUTF8
361    
362 nigel 85 static int
363 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
364 nigel 85 {
365     register int i, j;
366     for (i = 0; i < utf8_table1_size; i++)
367     if (cvalue <= utf8_table1[i]) break;
368 nigel 91 utf8bytes += i;
369 nigel 85 for (j = i; j > 0; j--)
370     {
371 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 nigel 85 cvalue >>= 6;
373     }
374 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
375 nigel 85 return i + 1;
376     }
377    
378 nigel 93 #endif
379 nigel 85
380    
381 nigel 93
382 nigel 85 /*************************************************
383 nigel 63 * Print character string *
384     *************************************************/
385 nigel 49
386 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
387     mode. Yields number of characters printed. If handed a NULL file, just counts
388     chars without printing. */
389 nigel 49
390 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
391 nigel 3 {
392 nigel 85 int c = 0;
393 nigel 63 int yield = 0;
394 nigel 3
395 nigel 63 while (length-- > 0)
396 nigel 3 {
397 nigel 79 #if !defined NOUTF8
398 nigel 67 if (use_utf8)
399 nigel 63 {
400     int rc = utf82ord(p, &c);
401 nigel 3
402 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403     {
404     length -= rc - 1;
405     p += rc;
406 nigel 93 if (PRINTHEX(c))
407 nigel 63 {
408     if (f != NULL) fprintf(f, "%c", c);
409     yield++;
410     }
411     else
412     {
413 nigel 93 int n = 4;
414     if (f != NULL) fprintf(f, "\\x{%02x}", c);
415     yield += (n <= 0x000000ff)? 2 :
416     (n <= 0x00000fff)? 3 :
417     (n <= 0x0000ffff)? 4 :
418     (n <= 0x000fffff)? 5 : 6;
419 nigel 63 }
420     continue;
421     }
422     }
423 nigel 79 #endif
424 nigel 3
425 nigel 63 /* Not UTF-8, or malformed UTF-8 */
426    
427 nigel 93 c = *p++;
428     if (PRINTHEX(c))
429 nigel 3 {
430 nigel 63 if (f != NULL) fprintf(f, "%c", c);
431     yield++;
432 nigel 3 }
433 nigel 63 else
434 nigel 3 {
435 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
436     yield += 4;
437     }
438     }
439 nigel 3
440 nigel 63 return yield;
441     }
442 nigel 23
443 nigel 3
444 nigel 23
445 nigel 63 /*************************************************
446     * Callout function *
447     *************************************************/
448 nigel 3
449 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450     the match. Yield zero unless more callouts than the fail count, or the callout
451     data is not zero. */
452 nigel 3
453 nigel 63 static int callout(pcre_callout_block *cb)
454     {
455     FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 nigel 75 int i, pre_start, post_start, subject_length;
457 nigel 3
458 nigel 63 if (callout_extra)
459     {
460     fprintf(f, "Callout %d: last capture = %d\n",
461     cb->callout_number, cb->capture_last);
462 nigel 3
463 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
464     {
465     if (cb->offset_vector[i] < 0)
466     fprintf(f, "%2d: <unset>\n", i/2);
467     else
468     {
469     fprintf(f, "%2d: ", i/2);
470     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471     cb->offset_vector[i+1] - cb->offset_vector[i], f);
472     fprintf(f, "\n");
473     }
474     }
475     }
476 nigel 3
477 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
478     datails. On subsequent calls in the same match, we use pchars just to find the
479     printed lengths of the substrings. */
480 nigel 3
481 nigel 63 if (f != NULL) fprintf(f, "--->");
482 nigel 3
483 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485     cb->current_position - cb->start_match, f);
486 nigel 3
487 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488    
489 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490     cb->subject_length - cb->current_position, f);
491 nigel 3
492 nigel 63 if (f != NULL) fprintf(f, "\n");
493 nigel 9
494 nigel 63 /* Always print appropriate indicators, with callout number if not already
495 nigel 75 shown. For automatic callouts, show the pattern offset. */
496 nigel 3
497 nigel 75 if (cb->callout_number == 255)
498     {
499     fprintf(outfile, "%+3d ", cb->pattern_position);
500     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501     }
502     else
503     {
504     if (callout_extra) fprintf(outfile, " ");
505     else fprintf(outfile, "%3d ", cb->callout_number);
506     }
507 nigel 3
508 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509     fprintf(outfile, "^");
510 nigel 3
511 nigel 63 if (post_start > 0)
512     {
513     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514     fprintf(outfile, "^");
515 nigel 3 }
516    
517 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518     fprintf(outfile, " ");
519    
520     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521     pbuffer + cb->pattern_position);
522    
523 nigel 63 fprintf(outfile, "\n");
524     first_callout = 0;
525 nigel 3
526 nigel 71 if (cb->callout_data != NULL)
527 nigel 49 {
528 nigel 71 int callout_data = *((int *)(cb->callout_data));
529     if (callout_data != 0)
530     {
531     fprintf(outfile, "Callout data = %d\n", callout_data);
532     return callout_data;
533     }
534 nigel 63 }
535 nigel 49
536 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
537     (++callout_count >= callout_fail_count)? 1 : 0;
538 nigel 3 }
539    
540    
541 nigel 63 /*************************************************
542 nigel 73 * Local malloc functions *
543 nigel 63 *************************************************/
544 nigel 3
545     /* Alternative malloc function, to test functionality and show the size of the
546     compiled re. */
547    
548     static void *new_malloc(size_t size)
549     {
550 nigel 73 void *block = malloc(size);
551 nigel 43 gotten_store = size;
552 nigel 73 if (show_malloc)
553 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 nigel 73 return block;
555 nigel 3 }
556    
557 nigel 73 static void new_free(void *block)
558     {
559     if (show_malloc)
560     fprintf(outfile, "free %p\n", block);
561     free(block);
562     }
563 nigel 3
564    
565 nigel 73 /* For recursion malloc/free, to test stacking calls */
566    
567     static void *stack_malloc(size_t size)
568     {
569     void *block = malloc(size);
570     if (show_malloc)
571 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 nigel 73 return block;
573     }
574    
575     static void stack_free(void *block)
576     {
577     if (show_malloc)
578     fprintf(outfile, "stack_free %p\n", block);
579     free(block);
580     }
581    
582    
583 nigel 63 /*************************************************
584     * Call pcre_fullinfo() *
585     *************************************************/
586 nigel 43
587     /* Get one piece of information from the pcre_fullinfo() function */
588    
589     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590     {
591     int rc;
592     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594     }
595    
596    
597    
598 nigel 63 /*************************************************
599 nigel 75 * Byte flipping function *
600     *************************************************/
601    
602 nigel 91 static unsigned long int
603     byteflip(unsigned long int value, int n)
604 nigel 75 {
605     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606     return ((value & 0x000000ff) << 24) |
607     ((value & 0x0000ff00) << 8) |
608     ((value & 0x00ff0000) >> 8) |
609     ((value & 0xff000000) >> 24);
610     }
611    
612    
613    
614    
615     /*************************************************
616 nigel 87 * Check match or recursion limit *
617     *************************************************/
618    
619     static int
620     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621     int start_offset, int options, int *use_offsets, int use_size_offsets,
622     int flag, unsigned long int *limit, int errnumber, const char *msg)
623     {
624     int count;
625     int min = 0;
626     int mid = 64;
627     int max = -1;
628    
629     extra->flags |= flag;
630    
631     for (;;)
632     {
633     *limit = mid;
634    
635     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636     use_offsets, use_size_offsets);
637    
638     if (count == errnumber)
639     {
640     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641     min = mid;
642     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643     }
644    
645     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646     count == PCRE_ERROR_PARTIAL)
647     {
648     if (mid == min + 1)
649     {
650     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651     break;
652     }
653     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654     max = mid;
655     mid = (min + mid)/2;
656     }
657     else break; /* Some other error */
658     }
659    
660     extra->flags &= ~flag;
661     return count;
662     }
663    
664    
665    
666     /*************************************************
667 ph10 227 * Case-independent strncmp() function *
668     *************************************************/
669    
670     /*
671     Arguments:
672     s first string
673     t second string
674     n number of characters to compare
675    
676     Returns: < 0, = 0, or > 0, according to the comparison
677     */
678    
679     static int
680     strncmpic(uschar *s, uschar *t, int n)
681     {
682     while (n--)
683     {
684     int c = tolower(*s++) - tolower(*t++);
685     if (c) return c;
686     }
687     return 0;
688     }
689    
690    
691    
692     /*************************************************
693 nigel 91 * Check newline indicator *
694     *************************************************/
695    
696     /* This is used both at compile and run-time to check for <xxx> escapes, where
697 ph10 149 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698     no match.
699 nigel 91
700     Arguments:
701     p points after the leading '<'
702     f file for error message
703    
704     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
705     */
706    
707     static int
708     check_newline(uschar *p, FILE *f)
709     {
710 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
716     return 0;
717     }
718    
719    
720    
721     /*************************************************
722 nigel 93 * Usage function *
723     *************************************************/
724    
725     static void
726     usage(void)
727     {
728     printf("Usage: pcretest [options] [<input> [<output>]]\n");
729     printf(" -b show compiled code (bytecode)\n");
730     printf(" -C show PCRE compile-time options and exit\n");
731     printf(" -d debug: show compiled code and information (-b and -i)\n");
732     #if !defined NODFA
733     printf(" -dfa force DFA matching for all subjects\n");
734     #endif
735     printf(" -help show usage information\n");
736     printf(" -i show information about compiled patterns\n"
737     " -m output memory used information\n"
738     " -o <n> set size of offsets vector to <n>\n");
739     #if !defined NOPOSIX
740     printf(" -p use POSIX interface\n");
741     #endif
742     printf(" -q quiet: do not output PCRE version number at start\n");
743     printf(" -S <n> set stack size to <n> megabytes\n");
744     printf(" -s output store (memory) used information\n"
745     " -t time compilation and execution\n");
746     printf(" -t <n> time compilation and execution, repeating <n> times\n");
747     printf(" -tm time execution (matching) only\n");
748     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
749     }
750    
751    
752    
753     /*************************************************
754 nigel 63 * Main Program *
755     *************************************************/
756 nigel 43
757 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
758     consist of a regular expression, in delimiters and optionally followed by
759     options, followed by a set of test data, terminated by an empty line. */
760    
761     int main(int argc, char **argv)
762     {
763     FILE *infile = stdin;
764     int options = 0;
765     int study_options = 0;
766     int op = 1;
767     int timeit = 0;
768 nigel 93 int timeitm = 0;
769 nigel 3 int showinfo = 0;
770 nigel 31 int showstore = 0;
771 nigel 87 int quiet = 0;
772 nigel 53 int size_offsets = 45;
773     int size_offsets_max;
774 nigel 77 int *offsets = NULL;
775 nigel 53 #if !defined NOPOSIX
776 nigel 3 int posix = 0;
777 nigel 53 #endif
778 nigel 3 int debug = 0;
779 nigel 11 int done = 0;
780 nigel 77 int all_use_dfa = 0;
781     int yield = 0;
782 nigel 91 int stack_size;
783 nigel 3
784 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
785     that 1024 is plenty long enough for the few names we'll be testing. */
786 nigel 69
787 nigel 91 uschar copynames[1024];
788     uschar getnames[1024];
789    
790     uschar *copynamesptr;
791     uschar *getnamesptr;
792    
793 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
794 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
795 nigel 69
796 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
797     dbuffer = (unsigned char *)malloc(buffer_size);
798     pbuffer = (unsigned char *)malloc(buffer_size);
799 nigel 69
800 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
801 nigel 3
802 nigel 93 outfile = stdout;
803    
804     /* The following _setmode() stuff is some Windows magic that tells its runtime
805     library to translate CRLF into a single LF character. At least, that's what
806     I've been told: never having used Windows I take this all on trust. Originally
807     it set 0x8000, but then I was advised that _O_BINARY was better. */
808    
809 nigel 75 #if defined(_WIN32) || defined(WIN32)
810 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
811     #endif
812 nigel 75
813 nigel 3 /* Scan options */
814    
815     while (argc > 1 && argv[op][0] == '-')
816     {
817 nigel 63 unsigned char *endptr;
818 nigel 53
819 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
820     showstore = 1;
821 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
822 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
823 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
824     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
825 nigel 79 #if !defined NODFA
826 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
827 nigel 79 #endif
828 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
829 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
830     *endptr == 0))
831 nigel 53 {
832     op++;
833     argc--;
834     }
835 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
836     {
837     int both = argv[op][2] == 0;
838     int temp;
839     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
840     *endptr == 0))
841     {
842     timeitm = temp;
843     op++;
844     argc--;
845     }
846     else timeitm = LOOPREPEAT;
847     if (both) timeit = timeitm;
848     }
849 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
850     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
851     *endptr == 0))
852     {
853 nigel 93 #if defined(_WIN32) || defined(WIN32)
854 nigel 91 printf("PCRE: -S not supported on this OS\n");
855     exit(1);
856     #else
857     int rc;
858     struct rlimit rlim;
859     getrlimit(RLIMIT_STACK, &rlim);
860     rlim.rlim_cur = stack_size * 1024 * 1024;
861     rc = setrlimit(RLIMIT_STACK, &rlim);
862     if (rc != 0)
863     {
864     printf("PCRE: setrlimit() failed with error %d\n", rc);
865     exit(1);
866     }
867     op++;
868     argc--;
869     #endif
870     }
871 nigel 53 #if !defined NOPOSIX
872 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
873 nigel 53 #endif
874 nigel 63 else if (strcmp(argv[op], "-C") == 0)
875     {
876     int rc;
877     printf("PCRE version %s\n", pcre_version());
878     printf("Compiled with\n");
879     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
880     printf(" %sUTF-8 support\n", rc? "" : "No ");
881 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
882     printf(" %sUnicode properties support\n", rc? "" : "No ");
883 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
885 nigel 93 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886 ph10 150 (rc == -2)? "ANYCRLF" :
887 nigel 93 (rc == -1)? "ANY" : "???");
888 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889     printf(" Internal link size = %d\n", rc);
890     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
891     printf(" POSIX malloc threshold = %d\n", rc);
892     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
893     printf(" Default match limit = %d\n", rc);
894 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
895     printf(" Default recursion depth limit = %d\n", rc);
896 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
898 ph10 121 goto EXIT;
899 nigel 63 }
900 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
901     strcmp(argv[op], "--help") == 0)
902     {
903     usage();
904     goto EXIT;
905     }
906 nigel 3 else
907     {
908 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
909 nigel 93 usage();
910 nigel 77 yield = 1;
911     goto EXIT;
912 nigel 3 }
913     op++;
914     argc--;
915     }
916    
917 nigel 53 /* Get the store for the offsets vector, and remember what it was */
918    
919     size_offsets_max = size_offsets;
920 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
921 nigel 53 if (offsets == NULL)
922     {
923     printf("** Failed to get %d bytes of memory for offsets vector\n",
924 ph10 151 (int)(size_offsets_max * sizeof(int)));
925 nigel 77 yield = 1;
926     goto EXIT;
927 nigel 53 }
928    
929 nigel 3 /* Sort out the input and output files */
930    
931     if (argc > 1)
932     {
933 nigel 93 infile = fopen(argv[op], INPUT_MODE);
934 nigel 3 if (infile == NULL)
935     {
936     printf("** Failed to open %s\n", argv[op]);
937 nigel 77 yield = 1;
938     goto EXIT;
939 nigel 3 }
940     }
941    
942     if (argc > 2)
943     {
944 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
945 nigel 3 if (outfile == NULL)
946     {
947     printf("** Failed to open %s\n", argv[op+1]);
948 nigel 77 yield = 1;
949     goto EXIT;
950 nigel 3 }
951     }
952    
953     /* Set alternative malloc function */
954    
955     pcre_malloc = new_malloc;
956 nigel 73 pcre_free = new_free;
957     pcre_stack_malloc = stack_malloc;
958     pcre_stack_free = stack_free;
959 nigel 3
960 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
961 nigel 3
962 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
963 nigel 3
964     /* Main loop */
965    
966 nigel 11 while (!done)
967 nigel 3 {
968     pcre *re = NULL;
969     pcre_extra *extra = NULL;
970 nigel 37
971     #if !defined NOPOSIX /* There are still compilers that require no indent */
972 nigel 3 regex_t preg;
973 nigel 45 int do_posix = 0;
974 nigel 37 #endif
975    
976 nigel 7 const char *error;
977 nigel 25 unsigned char *p, *pp, *ppp;
978 nigel 75 unsigned char *to_file = NULL;
979 nigel 53 const unsigned char *tables = NULL;
980 nigel 75 unsigned long int true_size, true_study_size = 0;
981     size_t size, regex_gotten_store;
982 nigel 3 int do_study = 0;
983 nigel 25 int do_debug = debug;
984 nigel 35 int do_G = 0;
985     int do_g = 0;
986 nigel 25 int do_showinfo = showinfo;
987 nigel 35 int do_showrest = 0;
988 nigel 75 int do_flip = 0;
989 nigel 93 int erroroffset, len, delimiter, poffset;
990 nigel 3
991 nigel 67 use_utf8 = 0;
992 ph10 211 debug_lengths = 1;
993 nigel 63
994 nigel 3 if (infile == stdin) printf(" re> ");
995 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
996 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
997 nigel 63 fflush(outfile);
998 nigel 3
999     p = buffer;
1000     while (isspace(*p)) p++;
1001     if (*p == 0) continue;
1002    
1003 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1004 nigel 3
1005 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1006     {
1007 nigel 91 unsigned long int magic, get_options;
1008 nigel 75 uschar sbuf[8];
1009     FILE *f;
1010    
1011     p++;
1012     pp = p + (int)strlen((char *)p);
1013     while (isspace(pp[-1])) pp--;
1014     *pp = 0;
1015    
1016     f = fopen((char *)p, "rb");
1017     if (f == NULL)
1018     {
1019     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1020     continue;
1021     }
1022    
1023     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1024    
1025     true_size =
1026     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1027     true_study_size =
1028     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1029    
1030     re = (real_pcre *)new_malloc(true_size);
1031     regex_gotten_store = gotten_store;
1032    
1033     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1034    
1035     magic = ((real_pcre *)re)->magic_number;
1036     if (magic != MAGIC_NUMBER)
1037     {
1038     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1039     {
1040     do_flip = 1;
1041     }
1042     else
1043     {
1044     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1045     fclose(f);
1046     continue;
1047     }
1048     }
1049    
1050     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1051     do_flip? " (byte-inverted)" : "", p);
1052    
1053     /* Need to know if UTF-8 for printing data strings */
1054    
1055 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1056     use_utf8 = (get_options & PCRE_UTF8) != 0;
1057 nigel 75
1058     /* Now see if there is any following study data */
1059    
1060     if (true_study_size != 0)
1061     {
1062     pcre_study_data *psd;
1063    
1064     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1065     extra->flags = PCRE_EXTRA_STUDY_DATA;
1066    
1067     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1068     extra->study_data = psd;
1069    
1070     if (fread(psd, 1, true_study_size, f) != true_study_size)
1071     {
1072     FAIL_READ:
1073     fprintf(outfile, "Failed to read data from %s\n", p);
1074     if (extra != NULL) new_free(extra);
1075     if (re != NULL) new_free(re);
1076     fclose(f);
1077     continue;
1078     }
1079     fprintf(outfile, "Study data loaded from %s\n", p);
1080     do_study = 1; /* To get the data output if requested */
1081     }
1082     else fprintf(outfile, "No study data\n");
1083    
1084     fclose(f);
1085     goto SHOW_INFO;
1086     }
1087    
1088     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1089     the pattern; if is isn't complete, read more. */
1090    
1091 nigel 3 delimiter = *p++;
1092    
1093 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1094 nigel 3 {
1095 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1096 nigel 3 goto SKIP_DATA;
1097     }
1098    
1099     pp = p;
1100 nigel 93 poffset = p - buffer;
1101 nigel 3
1102     for(;;)
1103     {
1104 nigel 29 while (*pp != 0)
1105     {
1106     if (*pp == '\\' && pp[1] != 0) pp++;
1107     else if (*pp == delimiter) break;
1108     pp++;
1109     }
1110 nigel 3 if (*pp != 0) break;
1111     if (infile == stdin) printf(" > ");
1112 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
1113 nigel 3 {
1114     fprintf(outfile, "** Unexpected EOF\n");
1115 nigel 11 done = 1;
1116     goto CONTINUE;
1117 nigel 3 }
1118 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1119 nigel 3 }
1120    
1121 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1122     pointer to the correct relative point in the buffer. */
1123    
1124     p = buffer + poffset;
1125    
1126 nigel 29 /* If the first character after the delimiter is backslash, make
1127     the pattern end with backslash. This is purely to provide a way
1128     of testing for the error message when a pattern ends with backslash. */
1129    
1130     if (pp[1] == '\\') *pp++ = '\\';
1131    
1132 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1133     for callouts. */
1134 nigel 3
1135     *pp++ = 0;
1136 nigel 75 strcpy((char *)pbuffer, (char *)p);
1137 nigel 3
1138     /* Look for options after final delimiter */
1139    
1140     options = 0;
1141     study_options = 0;
1142 nigel 31 log_store = showstore; /* default from command line */
1143    
1144 nigel 3 while (*pp != 0)
1145     {
1146     switch (*pp++)
1147     {
1148 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1149 nigel 35 case 'g': do_g = 1; break;
1150 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1151     case 'm': options |= PCRE_MULTILINE; break;
1152     case 's': options |= PCRE_DOTALL; break;
1153     case 'x': options |= PCRE_EXTENDED; break;
1154 nigel 25
1155 nigel 35 case '+': do_showrest = 1; break;
1156 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1157 nigel 93 case 'B': do_debug = 1; break;
1158 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1159 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1160 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1161 nigel 75 case 'F': do_flip = 1; break;
1162 nigel 35 case 'G': do_G = 1; break;
1163 nigel 25 case 'I': do_showinfo = 1; break;
1164 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1165 nigel 31 case 'M': log_store = 1; break;
1166 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1167 nigel 37
1168     #if !defined NOPOSIX
1169 nigel 3 case 'P': do_posix = 1; break;
1170 nigel 37 #endif
1171    
1172 nigel 3 case 'S': do_study = 1; break;
1173 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1174 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1175 ph10 126 case 'Z': debug_lengths = 0; break;
1176 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178 nigel 25
1179     case 'L':
1180     ppp = pp;
1181 nigel 93 /* The '\r' test here is so that it works on Windows. */
1182     /* The '0' test is just in case this is an unterminated line. */
1183     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1184 nigel 25 *ppp = 0;
1185     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1186     {
1187     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1188     goto SKIP_DATA;
1189     }
1190 nigel 93 locale_set = 1;
1191 nigel 25 tables = pcre_maketables();
1192     pp = ppp;
1193     break;
1194    
1195 nigel 75 case '>':
1196     to_file = pp;
1197     while (*pp != 0) pp++;
1198     while (isspace(pp[-1])) pp--;
1199     *pp = 0;
1200     break;
1201    
1202 nigel 91 case '<':
1203     {
1204     int x = check_newline(pp, outfile);
1205     if (x == 0) goto SKIP_DATA;
1206     options |= x;
1207     while (*pp++ != '>');
1208     }
1209     break;
1210    
1211 nigel 77 case '\r': /* So that it works in Windows */
1212     case '\n':
1213     case ' ':
1214     break;
1215 nigel 75
1216 nigel 3 default:
1217     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1218     goto SKIP_DATA;
1219     }
1220     }
1221    
1222 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1223 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1224     local character tables. */
1225 nigel 3
1226 nigel 37 #if !defined NOPOSIX
1227 nigel 3 if (posix || do_posix)
1228     {
1229     int rc;
1230     int cflags = 0;
1231 nigel 75
1232 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1233     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1234 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1235 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1236     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1237    
1238 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1239    
1240     /* Compilation failed; go back for another re, skipping to blank line
1241     if non-interactive. */
1242    
1243     if (rc != 0)
1244     {
1245 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1246 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1247     goto SKIP_DATA;
1248     }
1249     }
1250    
1251     /* Handle compiling via the native interface */
1252    
1253     else
1254 nigel 37 #endif /* !defined NOPOSIX */
1255    
1256 nigel 3 {
1257 nigel 93 if (timeit > 0)
1258 nigel 3 {
1259     register int i;
1260     clock_t time_taken;
1261     clock_t start_time = clock();
1262 nigel 93 for (i = 0; i < timeit; i++)
1263 nigel 3 {
1264 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1265 nigel 3 if (re != NULL) free(re);
1266     }
1267     time_taken = clock() - start_time;
1268 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1269     (((double)time_taken * 1000.0) / (double)timeit) /
1270 nigel 63 (double)CLOCKS_PER_SEC);
1271 nigel 3 }
1272    
1273 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1274 nigel 3
1275     /* Compilation failed; go back for another re, skipping to blank line
1276     if non-interactive. */
1277    
1278     if (re == NULL)
1279     {
1280     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1281     SKIP_DATA:
1282     if (infile != stdin)
1283     {
1284     for (;;)
1285     {
1286 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1287 nigel 11 {
1288     done = 1;
1289     goto CONTINUE;
1290     }
1291 nigel 3 len = (int)strlen((char *)buffer);
1292     while (len > 0 && isspace(buffer[len-1])) len--;
1293     if (len == 0) break;
1294     }
1295     fprintf(outfile, "\n");
1296     }
1297 nigel 25 goto CONTINUE;
1298 nigel 3 }
1299    
1300 nigel 43 /* Compilation succeeded; print data if required. There are now two
1301     info-returning functions. The old one has a limited interface and
1302     returns only limited data. Check that it agrees with the newer one. */
1303 nigel 3
1304 nigel 63 if (log_store)
1305     fprintf(outfile, "Memory allocation (code space): %d\n",
1306     (int)(gotten_store -
1307     sizeof(real_pcre) -
1308     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1309    
1310 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1311     and remember the store that was got. */
1312    
1313     true_size = ((real_pcre *)re)->size;
1314     regex_gotten_store = gotten_store;
1315    
1316     /* If /S was present, study the regexp to generate additional info to
1317     help with the matching. */
1318    
1319     if (do_study)
1320     {
1321 nigel 93 if (timeit > 0)
1322 nigel 75 {
1323     register int i;
1324     clock_t time_taken;
1325     clock_t start_time = clock();
1326 nigel 93 for (i = 0; i < timeit; i++)
1327 nigel 75 extra = pcre_study(re, study_options, &error);
1328     time_taken = clock() - start_time;
1329     if (extra != NULL) free(extra);
1330 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1331     (((double)time_taken * 1000.0) / (double)timeit) /
1332 nigel 75 (double)CLOCKS_PER_SEC);
1333     }
1334     extra = pcre_study(re, study_options, &error);
1335     if (error != NULL)
1336     fprintf(outfile, "Failed to study: %s\n", error);
1337     else if (extra != NULL)
1338     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1339     }
1340    
1341     /* If the 'F' option was present, we flip the bytes of all the integer
1342     fields in the regex data block and the study block. This is to make it
1343     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1344     compiled on a different architecture. */
1345    
1346     if (do_flip)
1347     {
1348     real_pcre *rre = (real_pcre *)re;
1349     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1350     rre->size = byteflip(rre->size, sizeof(rre->size));
1351     rre->options = byteflip(rre->options, sizeof(rre->options));
1352 ph10 230 rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1353 nigel 75 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1354     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1355     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1356     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1357     rre->name_table_offset = byteflip(rre->name_table_offset,
1358     sizeof(rre->name_table_offset));
1359     rre->name_entry_size = byteflip(rre->name_entry_size,
1360     sizeof(rre->name_entry_size));
1361     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1362    
1363     if (extra != NULL)
1364     {
1365     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1366     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1367     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1368     }
1369     }
1370    
1371     /* Extract information from the compiled data if required */
1372    
1373     SHOW_INFO:
1374    
1375 nigel 93 if (do_debug)
1376     {
1377     fprintf(outfile, "------------------------------------------------------------------\n");
1378 ph10 116 pcre_printint(re, outfile, debug_lengths);
1379 nigel 93 }
1380    
1381 nigel 25 if (do_showinfo)
1382 nigel 3 {
1383 nigel 75 unsigned long int get_options, all_options;
1384 nigel 79 #if !defined NOINFOCHECK
1385 nigel 43 int old_first_char, old_options, old_count;
1386 nigel 79 #endif
1387 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1388 ph10 227 hascrorlf;
1389 nigel 63 int nameentrysize, namecount;
1390     const uschar *nametable;
1391 nigel 3
1392 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1393 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1394     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1395     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1396 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1397 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1398 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1399     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1400 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1401 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1402     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1403 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1404 nigel 43
1405 nigel 79 #if !defined NOINFOCHECK
1406 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1407 nigel 3 if (count < 0) fprintf(outfile,
1408 nigel 43 "Error %d from pcre_info()\n", count);
1409 nigel 3 else
1410     {
1411 nigel 43 if (old_count != count) fprintf(outfile,
1412     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1413     old_count);
1414 nigel 37
1415 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1416     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1417     first_char, old_first_char);
1418 nigel 37
1419 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1420     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1421     get_options, old_options);
1422 nigel 43 }
1423 nigel 79 #endif
1424 nigel 43
1425 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1426 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1427 nigel 77 (int)size, (int)regex_gotten_store);
1428 nigel 43
1429     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1430     if (backrefmax > 0)
1431     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1432 nigel 63
1433     if (namecount > 0)
1434     {
1435     fprintf(outfile, "Named capturing subpatterns:\n");
1436     while (namecount-- > 0)
1437     {
1438     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1439     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1440     GET2(nametable, 0));
1441     nametable += nameentrysize;
1442     }
1443     }
1444 ph10 172
1445 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1446 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1447 nigel 63
1448 nigel 75 all_options = ((real_pcre *)re)->options;
1449 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1450 nigel 75
1451 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1452 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1453 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1454     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1455     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1456     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1457 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1458 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1459     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1460     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1461     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1462 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1463 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1464 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1465     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1466 ph10 172
1467 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1468 nigel 43
1469 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1470 nigel 91 {
1471     case PCRE_NEWLINE_CR:
1472     fprintf(outfile, "Forced newline sequence: CR\n");
1473     break;
1474 nigel 43
1475 nigel 91 case PCRE_NEWLINE_LF:
1476     fprintf(outfile, "Forced newline sequence: LF\n");
1477     break;
1478    
1479     case PCRE_NEWLINE_CRLF:
1480     fprintf(outfile, "Forced newline sequence: CRLF\n");
1481     break;
1482    
1483 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1484     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1485     break;
1486    
1487 nigel 93 case PCRE_NEWLINE_ANY:
1488     fprintf(outfile, "Forced newline sequence: ANY\n");
1489     break;
1490    
1491 nigel 91 default:
1492     break;
1493     }
1494    
1495 nigel 43 if (first_char == -1)
1496     {
1497 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1498 nigel 43 }
1499     else if (first_char < 0)
1500     {
1501     fprintf(outfile, "No first char\n");
1502     }
1503     else
1504     {
1505 nigel 63 int ch = first_char & 255;
1506 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1507 nigel 63 "" : " (caseless)";
1508 nigel 93 if (PRINTHEX(ch))
1509 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1510 nigel 3 else
1511 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1512 nigel 43 }
1513 nigel 37
1514 nigel 43 if (need_char < 0)
1515     {
1516     fprintf(outfile, "No need char\n");
1517 nigel 3 }
1518 nigel 43 else
1519     {
1520 nigel 63 int ch = need_char & 255;
1521 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1522 nigel 63 "" : " (caseless)";
1523 nigel 93 if (PRINTHEX(ch))
1524 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1525 nigel 43 else
1526 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1527 nigel 43 }
1528 nigel 75
1529     /* Don't output study size; at present it is in any case a fixed
1530     value, but it varies, depending on the computer architecture, and
1531     so messes up the test suite. (And with the /F option, it might be
1532     flipped.) */
1533    
1534     if (do_study)
1535     {
1536     if (extra == NULL)
1537     fprintf(outfile, "Study returned NULL\n");
1538     else
1539     {
1540     uschar *start_bits = NULL;
1541     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1542    
1543     if (start_bits == NULL)
1544     fprintf(outfile, "No starting byte set\n");
1545     else
1546     {
1547     int i;
1548     int c = 24;
1549     fprintf(outfile, "Starting byte set: ");
1550     for (i = 0; i < 256; i++)
1551     {
1552     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1553     {
1554     if (c > 75)
1555     {
1556     fprintf(outfile, "\n ");
1557     c = 2;
1558     }
1559 nigel 93 if (PRINTHEX(i) && i != ' ')
1560 nigel 75 {
1561     fprintf(outfile, "%c ", i);
1562     c += 2;
1563     }
1564     else
1565     {
1566     fprintf(outfile, "\\x%02x ", i);
1567     c += 5;
1568     }
1569     }
1570     }
1571     fprintf(outfile, "\n");
1572     }
1573     }
1574     }
1575 nigel 3 }
1576    
1577 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1578     that is all. The first 8 bytes of the file are the regex length and then
1579     the study length, in big-endian order. */
1580 nigel 3
1581 nigel 75 if (to_file != NULL)
1582 nigel 3 {
1583 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1584     if (f == NULL)
1585 nigel 3 {
1586 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1587 nigel 3 }
1588 nigel 75 else
1589     {
1590     uschar sbuf[8];
1591     sbuf[0] = (true_size >> 24) & 255;
1592     sbuf[1] = (true_size >> 16) & 255;
1593     sbuf[2] = (true_size >> 8) & 255;
1594     sbuf[3] = (true_size) & 255;
1595 nigel 3
1596 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1597     sbuf[5] = (true_study_size >> 16) & 255;
1598     sbuf[6] = (true_study_size >> 8) & 255;
1599     sbuf[7] = (true_study_size) & 255;
1600 nigel 3
1601 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1602     fwrite(re, 1, true_size, f) < true_size)
1603     {
1604     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1605     }
1606 nigel 3 else
1607     {
1608 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1609     if (extra != NULL)
1610 nigel 3 {
1611 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1612     true_study_size)
1613 nigel 3 {
1614 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1615     strerror(errno));
1616 nigel 3 }
1617 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1618 nigel 93
1619 nigel 3 }
1620     }
1621 nigel 75 fclose(f);
1622 nigel 3 }
1623 nigel 77
1624     new_free(re);
1625     if (extra != NULL) new_free(extra);
1626     if (tables != NULL) new_free((void *)tables);
1627 nigel 75 continue; /* With next regex */
1628 nigel 3 }
1629 nigel 75 } /* End of non-POSIX compile */
1630 nigel 3
1631     /* Read data lines and test them */
1632    
1633     for (;;)
1634     {
1635 nigel 87 uschar *q;
1636 ph10 147 uschar *bptr;
1637 nigel 57 int *use_offsets = offsets;
1638 nigel 53 int use_size_offsets = size_offsets;
1639 nigel 63 int callout_data = 0;
1640     int callout_data_set = 0;
1641 nigel 3 int count, c;
1642 nigel 29 int copystrings = 0;
1643 nigel 63 int find_match_limit = 0;
1644 nigel 29 int getstrings = 0;
1645     int getlist = 0;
1646 nigel 39 int gmatched = 0;
1647 nigel 35 int start_offset = 0;
1648 nigel 41 int g_notempty = 0;
1649 nigel 77 int use_dfa = 0;
1650 nigel 3
1651     options = 0;
1652    
1653 nigel 91 *copynames = 0;
1654     *getnames = 0;
1655    
1656     copynamesptr = copynames;
1657     getnamesptr = getnames;
1658    
1659 nigel 63 pcre_callout = callout;
1660     first_callout = 1;
1661     callout_extra = 0;
1662     callout_count = 0;
1663     callout_fail_count = 999999;
1664     callout_fail_id = -1;
1665 nigel 73 show_malloc = 0;
1666 nigel 63
1667 nigel 91 if (extra != NULL) extra->flags &=
1668     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1669    
1670     len = 0;
1671     for (;;)
1672 nigel 11 {
1673 nigel 91 if (infile == stdin) printf("data> ");
1674     if (extend_inputline(infile, buffer + len) == NULL)
1675     {
1676     if (len > 0) break;
1677     done = 1;
1678     goto CONTINUE;
1679     }
1680     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1681     len = (int)strlen((char *)buffer);
1682     if (buffer[len-1] == '\n') break;
1683 nigel 11 }
1684 nigel 3
1685     while (len > 0 && isspace(buffer[len-1])) len--;
1686     buffer[len] = 0;
1687     if (len == 0) break;
1688    
1689     p = buffer;
1690     while (isspace(*p)) p++;
1691    
1692 ph10 147 bptr = q = dbuffer;
1693 nigel 3 while ((c = *p++) != 0)
1694     {
1695     int i = 0;
1696     int n = 0;
1697 nigel 63
1698 nigel 3 if (c == '\\') switch ((c = *p++))
1699     {
1700     case 'a': c = 7; break;
1701     case 'b': c = '\b'; break;
1702     case 'e': c = 27; break;
1703     case 'f': c = '\f'; break;
1704     case 'n': c = '\n'; break;
1705     case 'r': c = '\r'; break;
1706     case 't': c = '\t'; break;
1707     case 'v': c = '\v'; break;
1708    
1709     case '0': case '1': case '2': case '3':
1710     case '4': case '5': case '6': case '7':
1711     c -= '0';
1712     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1713     c = c * 8 + *p++ - '0';
1714 nigel 91
1715     #if !defined NOUTF8
1716     if (use_utf8 && c > 255)
1717     {
1718     unsigned char buff8[8];
1719     int ii, utn;
1720     utn = ord2utf8(c, buff8);
1721     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1722     c = buff8[ii]; /* Last byte */
1723     }
1724     #endif
1725 nigel 3 break;
1726    
1727     case 'x':
1728 nigel 49
1729     /* Handle \x{..} specially - new Perl thing for utf8 */
1730    
1731 nigel 79 #if !defined NOUTF8
1732 nigel 49 if (*p == '{')
1733     {
1734     unsigned char *pt = p;
1735     c = 0;
1736     while (isxdigit(*(++pt)))
1737     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1738     if (*pt == '}')
1739     {
1740 nigel 67 unsigned char buff8[8];
1741 nigel 49 int ii, utn;
1742 nigel 85 utn = ord2utf8(c, buff8);
1743 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1744     c = buff8[ii]; /* Last byte */
1745 nigel 49 p = pt + 1;
1746     break;
1747     }
1748     /* Not correct form; fall through */
1749     }
1750 nigel 79 #endif
1751 nigel 49
1752     /* Ordinary \x */
1753    
1754 nigel 3 c = 0;
1755     while (i++ < 2 && isxdigit(*p))
1756     {
1757     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1758     p++;
1759     }
1760     break;
1761    
1762 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1763 nigel 3 p--;
1764     continue;
1765    
1766 nigel 75 case '>':
1767     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1768     continue;
1769    
1770 nigel 3 case 'A': /* Option setting */
1771     options |= PCRE_ANCHORED;
1772     continue;
1773    
1774     case 'B':
1775     options |= PCRE_NOTBOL;
1776     continue;
1777    
1778 nigel 29 case 'C':
1779 nigel 63 if (isdigit(*p)) /* Set copy string */
1780     {
1781     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1782     copystrings |= 1 << n;
1783     }
1784     else if (isalnum(*p))
1785     {
1786 nigel 91 uschar *npp = copynamesptr;
1787 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1788 nigel 91 *npp++ = 0;
1789 nigel 67 *npp = 0;
1790 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1791 nigel 63 if (n < 0)
1792 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1793     copynamesptr = npp;
1794 nigel 63 }
1795     else if (*p == '+')
1796     {
1797     callout_extra = 1;
1798     p++;
1799     }
1800     else if (*p == '-')
1801     {
1802     pcre_callout = NULL;
1803     p++;
1804     }
1805     else if (*p == '!')
1806     {
1807     callout_fail_id = 0;
1808     p++;
1809     while(isdigit(*p))
1810     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1811     callout_fail_count = 0;
1812     if (*p == '!')
1813     {
1814     p++;
1815     while(isdigit(*p))
1816     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1817     }
1818     }
1819     else if (*p == '*')
1820     {
1821     int sign = 1;
1822     callout_data = 0;
1823     if (*(++p) == '-') { sign = -1; p++; }
1824     while(isdigit(*p))
1825     callout_data = callout_data * 10 + *p++ - '0';
1826     callout_data *= sign;
1827     callout_data_set = 1;
1828     }
1829 nigel 29 continue;
1830    
1831 nigel 79 #if !defined NODFA
1832 nigel 77 case 'D':
1833 nigel 79 #if !defined NOPOSIX
1834 nigel 77 if (posix || do_posix)
1835     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1836     else
1837 nigel 79 #endif
1838 nigel 77 use_dfa = 1;
1839     continue;
1840    
1841     case 'F':
1842     options |= PCRE_DFA_SHORTEST;
1843     continue;
1844 nigel 79 #endif
1845 nigel 77
1846 nigel 29 case 'G':
1847 nigel 63 if (isdigit(*p))
1848     {
1849     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1850     getstrings |= 1 << n;
1851     }
1852     else if (isalnum(*p))
1853     {
1854 nigel 91 uschar *npp = getnamesptr;
1855 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1856 nigel 91 *npp++ = 0;
1857 nigel 67 *npp = 0;
1858 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1859 nigel 63 if (n < 0)
1860 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1861     getnamesptr = npp;
1862 nigel 63 }
1863 nigel 29 continue;
1864    
1865     case 'L':
1866     getlist = 1;
1867     continue;
1868    
1869 nigel 63 case 'M':
1870     find_match_limit = 1;
1871     continue;
1872    
1873 nigel 37 case 'N':
1874     options |= PCRE_NOTEMPTY;
1875     continue;
1876    
1877 nigel 3 case 'O':
1878     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1879 nigel 53 if (n > size_offsets_max)
1880     {
1881     size_offsets_max = n;
1882 nigel 57 free(offsets);
1883 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1884 nigel 53 if (offsets == NULL)
1885     {
1886     printf("** Failed to get %d bytes of memory for offsets vector\n",
1887 ph10 151 (int)(size_offsets_max * sizeof(int)));
1888 nigel 77 yield = 1;
1889     goto EXIT;
1890 nigel 53 }
1891     }
1892     use_size_offsets = n;
1893 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1894 nigel 3 continue;
1895    
1896 nigel 75 case 'P':
1897     options |= PCRE_PARTIAL;
1898     continue;
1899    
1900 nigel 91 case 'Q':
1901     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1902     if (extra == NULL)
1903     {
1904     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1905     extra->flags = 0;
1906     }
1907     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1908     extra->match_limit_recursion = n;
1909     continue;
1910    
1911     case 'q':
1912     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1913     if (extra == NULL)
1914     {
1915     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1916     extra->flags = 0;
1917     }
1918     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1919     extra->match_limit = n;
1920     continue;
1921    
1922 nigel 79 #if !defined NODFA
1923 nigel 77 case 'R':
1924     options |= PCRE_DFA_RESTART;
1925     continue;
1926 nigel 79 #endif
1927 nigel 77
1928 nigel 73 case 'S':
1929     show_malloc = 1;
1930     continue;
1931    
1932 nigel 3 case 'Z':
1933     options |= PCRE_NOTEOL;
1934     continue;
1935 nigel 71
1936     case '?':
1937     options |= PCRE_NO_UTF8_CHECK;
1938     continue;
1939 nigel 91
1940     case '<':
1941     {
1942     int x = check_newline(p, outfile);
1943     if (x == 0) goto NEXT_DATA;
1944     options |= x;
1945     while (*p++ != '>');
1946     }
1947     continue;
1948 nigel 3 }
1949 nigel 9 *q++ = c;
1950 nigel 3 }
1951 nigel 9 *q = 0;
1952     len = q - dbuffer;
1953 nigel 3
1954 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1955     {
1956     printf("**Match limit not relevant for DFA matching: ignored\n");
1957     find_match_limit = 0;
1958     }
1959    
1960 nigel 3 /* Handle matching via the POSIX interface, which does not
1961 nigel 63 support timing or playing with the match limit or callout data. */
1962 nigel 3
1963 nigel 37 #if !defined NOPOSIX
1964 nigel 3 if (posix || do_posix)
1965     {
1966     int rc;
1967     int eflags = 0;
1968 nigel 63 regmatch_t *pmatch = NULL;
1969     if (use_size_offsets > 0)
1970 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1971 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1972     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1973    
1974 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1975 nigel 3
1976     if (rc != 0)
1977     {
1978 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1979 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1980     }
1981 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1982     != 0)
1983     {
1984     fprintf(outfile, "Matched with REG_NOSUB\n");
1985     }
1986 nigel 3 else
1987     {
1988 nigel 7 size_t i;
1989 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1990 nigel 3 {
1991     if (pmatch[i].rm_so >= 0)
1992     {
1993 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1994 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1995     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1996 nigel 3 fprintf(outfile, "\n");
1997 nigel 35 if (i == 0 && do_showrest)
1998     {
1999     fprintf(outfile, " 0+ ");
2000 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2001     outfile);
2002 nigel 35 fprintf(outfile, "\n");
2003     }
2004 nigel 3 }
2005     }
2006     }
2007 nigel 53 free(pmatch);
2008 nigel 3 }
2009    
2010 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2011 nigel 3
2012 nigel 37 else
2013     #endif /* !defined NOPOSIX */
2014    
2015 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2016 nigel 3 {
2017 nigel 93 if (timeitm > 0)
2018 nigel 3 {
2019     register int i;
2020     clock_t time_taken;
2021     clock_t start_time = clock();
2022 nigel 77
2023 nigel 79 #if !defined NODFA
2024 nigel 77 if (all_use_dfa || use_dfa)
2025     {
2026     int workspace[1000];
2027 nigel 93 for (i = 0; i < timeitm; i++)
2028 nigel 77 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2029     options | g_notempty, use_offsets, use_size_offsets, workspace,
2030     sizeof(workspace)/sizeof(int));
2031     }
2032     else
2033 nigel 79 #endif
2034 nigel 77
2035 nigel 93 for (i = 0; i < timeitm; i++)
2036 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2037 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2038 nigel 77
2039 nigel 3 time_taken = clock() - start_time;
2040 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2041     (((double)time_taken * 1000.0) / (double)timeitm) /
2042 nigel 63 (double)CLOCKS_PER_SEC);
2043 nigel 3 }
2044    
2045 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2046 nigel 87 varying limits in order to find the minimum value for the match limit and
2047     for the recursion limit. */
2048 nigel 63
2049     if (find_match_limit)
2050     {
2051     if (extra == NULL)
2052     {
2053 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2054 nigel 63 extra->flags = 0;
2055     }
2056    
2057 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2058 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2059     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2060     PCRE_ERROR_MATCHLIMIT, "match()");
2061 nigel 63
2062 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2063     options|g_notempty, use_offsets, use_size_offsets,
2064     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2065     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2066 nigel 63 }
2067    
2068     /* If callout_data is set, use the interface with additional data */
2069    
2070     else if (callout_data_set)
2071     {
2072     if (extra == NULL)
2073     {
2074 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2075 nigel 63 extra->flags = 0;
2076     }
2077     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2078 nigel 71 extra->callout_data = &callout_data;
2079 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2080     options | g_notempty, use_offsets, use_size_offsets);
2081     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2082     }
2083    
2084     /* The normal case is just to do the match once, with the default
2085     value of match_limit. */
2086    
2087 nigel 79 #if !defined NODFA
2088 nigel 77 else if (all_use_dfa || use_dfa)
2089     {
2090     int workspace[1000];
2091     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2092     options | g_notempty, use_offsets, use_size_offsets, workspace,
2093     sizeof(workspace)/sizeof(int));
2094     if (count == 0)
2095     {
2096     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2097     count = use_size_offsets/2;
2098     }
2099     }
2100 nigel 79 #endif
2101 nigel 77
2102 nigel 75 else
2103     {
2104     count = pcre_exec(re, extra, (char *)bptr, len,
2105     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2106 nigel 77 if (count == 0)
2107     {
2108     fprintf(outfile, "Matched, but too many substrings\n");
2109     count = use_size_offsets/3;
2110     }
2111 nigel 75 }
2112 nigel 3
2113 nigel 39 /* Matched */
2114    
2115 nigel 3 if (count >= 0)
2116     {
2117 nigel 93 int i, maxcount;
2118    
2119     #if !defined NODFA
2120     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2121     #endif
2122     maxcount = use_size_offsets/3;
2123    
2124     /* This is a check against a lunatic return value. */
2125    
2126     if (count > maxcount)
2127     {
2128     fprintf(outfile,
2129     "** PCRE error: returned count %d is too big for offset size %d\n",
2130     count, use_size_offsets);
2131     count = use_size_offsets/3;
2132     if (do_g || do_G)
2133     {
2134     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2135     do_g = do_G = FALSE; /* Break g/G loop */
2136     }
2137     }
2138    
2139 nigel 29 for (i = 0; i < count * 2; i += 2)
2140 nigel 3 {
2141 nigel 57 if (use_offsets[i] < 0)
2142 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2143     else
2144     {
2145     fprintf(outfile, "%2d: ", i/2);
2146 nigel 63 (void)pchars(bptr + use_offsets[i],
2147     use_offsets[i+1] - use_offsets[i], outfile);
2148 nigel 3 fprintf(outfile, "\n");
2149 nigel 35 if (i == 0)
2150     {
2151     if (do_showrest)
2152     {
2153     fprintf(outfile, " 0+ ");
2154 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2155     outfile);
2156 nigel 35 fprintf(outfile, "\n");
2157     }
2158     }
2159 nigel 3 }
2160     }
2161 nigel 29
2162     for (i = 0; i < 32; i++)
2163     {
2164     if ((copystrings & (1 << i)) != 0)
2165     {
2166 nigel 91 char copybuffer[256];
2167 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2168 nigel 37 i, copybuffer, sizeof(copybuffer));
2169 nigel 29 if (rc < 0)
2170     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2171     else
2172 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2173 nigel 29 }
2174     }
2175    
2176 nigel 91 for (copynamesptr = copynames;
2177     *copynamesptr != 0;
2178     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2179     {
2180     char copybuffer[256];
2181     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2182     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2183     if (rc < 0)
2184     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2185     else
2186     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2187     }
2188    
2189 nigel 29 for (i = 0; i < 32; i++)
2190     {
2191     if ((getstrings & (1 << i)) != 0)
2192     {
2193     const char *substring;
2194 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2195 nigel 29 i, &substring);
2196     if (rc < 0)
2197     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2198     else
2199     {
2200     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2201 nigel 49 pcre_free_substring(substring);
2202 nigel 29 }
2203     }
2204     }
2205    
2206 nigel 91 for (getnamesptr = getnames;
2207     *getnamesptr != 0;
2208     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2209     {
2210     const char *substring;
2211     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2212     count, (char *)getnamesptr, &substring);
2213     if (rc < 0)
2214     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2215     else
2216     {
2217     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2218     pcre_free_substring(substring);
2219     }
2220     }
2221    
2222 nigel 29 if (getlist)
2223     {
2224     const char **stringlist;
2225 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2226 nigel 29 &stringlist);
2227     if (rc < 0)
2228     fprintf(outfile, "get substring list failed %d\n", rc);
2229     else
2230     {
2231     for (i = 0; i < count; i++)
2232     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2233     if (stringlist[i] != NULL)
2234     fprintf(outfile, "string list not terminated by NULL\n");
2235 nigel 49 /* free((void *)stringlist); */
2236     pcre_free_substring_list(stringlist);
2237 nigel 29 }
2238     }
2239 nigel 39 }
2240 nigel 29
2241 nigel 75 /* There was a partial match */
2242    
2243     else if (count == PCRE_ERROR_PARTIAL)
2244     {
2245 nigel 77 fprintf(outfile, "Partial match");
2246 nigel 79 #if !defined NODFA
2247 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2248     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2249     bptr + use_offsets[0]);
2250 nigel 79 #endif
2251 nigel 77 fprintf(outfile, "\n");
2252 nigel 75 break; /* Out of the /g loop */
2253     }
2254    
2255 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2256 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2257     to advance the start offset, and continue. We won't be at the end of the
2258     string - that was checked before setting g_notempty.
2259 nigel 39
2260 ph10 150 Complication arises in the case when the newline option is "any" or
2261 ph10 149 "anycrlf". If the previous match was at the end of a line terminated by
2262     CRLF, an advance of one character just passes the \r, whereas we should
2263     prefer the longer newline sequence, as does the code in pcre_exec().
2264     Fudge the offset value to achieve this.
2265 ph10 144
2266 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2267     character, not one byte. */
2268    
2269 nigel 3 else
2270     {
2271 nigel 41 if (g_notempty != 0)
2272 nigel 35 {
2273 nigel 73 int onechar = 1;
2274 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2275 nigel 57 use_offsets[0] = start_offset;
2276 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2277     {
2278     int d;
2279     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2280     obits = (d == '\r')? PCRE_NEWLINE_CR :
2281     (d == '\n')? PCRE_NEWLINE_LF :
2282     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2283 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2284 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2285     }
2286 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2287 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2288 ph10 149 &&
2289 ph10 143 start_offset < len - 1 &&
2290     bptr[start_offset] == '\r' &&
2291     bptr[start_offset+1] == '\n')
2292 ph10 144 onechar++;
2293 ph10 143 else if (use_utf8)
2294 nigel 73 {
2295     while (start_offset + onechar < len)
2296     {
2297     int tb = bptr[start_offset+onechar];
2298     if (tb <= 127) break;
2299     tb &= 0xc0;
2300     if (tb != 0 && tb != 0xc0) onechar++;
2301     }
2302     }
2303     use_offsets[1] = start_offset + onechar;
2304 nigel 35 }
2305 nigel 41 else
2306     {
2307 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2308 nigel 41 {
2309 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2310 nigel 41 }
2311 nigel 73 else fprintf(outfile, "Error %d\n", count);
2312 nigel 41 break; /* Out of the /g loop */
2313     }
2314 nigel 3 }
2315 nigel 35
2316 nigel 39 /* If not /g or /G we are done */
2317    
2318     if (!do_g && !do_G) break;
2319    
2320 nigel 41 /* If we have matched an empty string, first check to see if we are at
2321     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2322     what Perl's /g options does. This turns out to be rather cunning. First
2323 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2324     same point. If this fails (picked up above) we advance to the next
2325 ph10 143 character. */
2326 ph10 142
2327 nigel 41 g_notempty = 0;
2328 ph10 142
2329 nigel 57 if (use_offsets[0] == use_offsets[1])
2330 nigel 41 {
2331 nigel 57 if (use_offsets[0] == len) break;
2332 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2333 nigel 41 }
2334 nigel 39
2335     /* For /g, update the start offset, leaving the rest alone */
2336    
2337 ph10 143 if (do_g) start_offset = use_offsets[1];
2338 nigel 39
2339     /* For /G, update the pointer and length */
2340    
2341     else
2342 nigel 35 {
2343 ph10 143 bptr += use_offsets[1];
2344     len -= use_offsets[1];
2345 nigel 35 }
2346 nigel 39 } /* End of loop for /g and /G */
2347 nigel 91
2348     NEXT_DATA: continue;
2349 nigel 39 } /* End of loop for data lines */
2350 nigel 3
2351 nigel 11 CONTINUE:
2352 nigel 37
2353     #if !defined NOPOSIX
2354 nigel 3 if (posix || do_posix) regfree(&preg);
2355 nigel 37 #endif
2356    
2357 nigel 77 if (re != NULL) new_free(re);
2358     if (extra != NULL) new_free(extra);
2359 nigel 25 if (tables != NULL)
2360     {
2361 nigel 77 new_free((void *)tables);
2362 nigel 25 setlocale(LC_CTYPE, "C");
2363 nigel 93 locale_set = 0;
2364 nigel 25 }
2365 nigel 3 }
2366    
2367 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2368 nigel 77
2369     EXIT:
2370    
2371     if (infile != NULL && infile != stdin) fclose(infile);
2372     if (outfile != NULL && outfile != stdout) fclose(outfile);
2373    
2374     free(buffer);
2375     free(dbuffer);
2376     free(pbuffer);
2377     free(offsets);
2378    
2379     return yield;
2380 nigel 3 }
2381    
2382 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12