/[pcre]/code/tags/pcre-7.0/pcretest.c
ViewVC logotype

Contents of /code/tags/pcre-7.0/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (hide annotations) (download)
Sat Feb 24 21:41:34 2007 UTC (7 years, 7 months ago) by nigel
Original Path: code/trunk/pcretest.c
File MIME type: text/plain
File size: 62873 byte(s)
Load pcre-6.7 into code/trunk.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 nigel 3 #include <ctype.h>
40     #include <stdio.h>
41     #include <string.h>
42     #include <stdlib.h>
43     #include <time.h>
44 nigel 25 #include <locale.h>
45 nigel 75 #include <errno.h>
46 nigel 3
47 nigel 91 #ifndef _WIN32
48     #include <sys/resource.h>
49     #endif
50    
51 nigel 63 #define PCRE_SPY /* For Win32 build, import data, not export */
52 nigel 37
53 nigel 85 /* We include pcre_internal.h because we need the internal info for displaying
54     the results of pcre_study() and we also need to know about the internal
55     macros, structures, and other internal data values; pcretest has "inside
56     information" compared to a program that strictly follows the PCRE API. */
57 nigel 77
58     #include "pcre_internal.h"
59    
60 nigel 85 /* We need access to the data tables that PCRE uses. So as not to have to keep
61     two copies, we include the source file here, changing the names of the external
62     symbols to prevent clashes. */
63 nigel 77
64 nigel 85 #define _pcre_utf8_table1 utf8_table1
65     #define _pcre_utf8_table1_size utf8_table1_size
66     #define _pcre_utf8_table2 utf8_table2
67     #define _pcre_utf8_table3 utf8_table3
68     #define _pcre_utf8_table4 utf8_table4
69     #define _pcre_utt utt
70     #define _pcre_utt_size utt_size
71     #define _pcre_OP_lengths OP_lengths
72    
73     #include "pcre_tables.c"
74    
75     /* We also need the pcre_printint() function for printing out compiled
76     patterns. This function is in a separate file so that it can be included in
77     pcre_compile.c when that module is compiled with debugging enabled. */
78    
79     #include "pcre_printint.src"
80    
81    
82 nigel 37 /* It is possible to compile this test program without including support for
83     testing the POSIX interface, though this is not available via the standard
84     Makefile. */
85    
86     #if !defined NOPOSIX
87 nigel 3 #include "pcreposix.h"
88 nigel 37 #endif
89 nigel 3
90 nigel 79 /* It is also possible, for the benefit of the version imported into Exim, to
91     build pcretest without support for UTF8 (define NOUTF8), without the interface
92     to the DFA matcher (NODFA), and without the doublecheck of the old "info"
93     function (define NOINFOCHECK). */
94    
95    
96 nigel 85 /* Other parameters */
97    
98 nigel 3 #ifndef CLOCKS_PER_SEC
99     #ifdef CLK_TCK
100     #define CLOCKS_PER_SEC CLK_TCK
101     #else
102     #define CLOCKS_PER_SEC 100
103     #endif
104     #endif
105    
106 nigel 75 #define LOOPREPEAT 500000
107 nigel 3
108 nigel 85 /* Static variables */
109    
110 nigel 3 static FILE *outfile;
111     static int log_store = 0;
112 nigel 63 static int callout_count;
113     static int callout_extra;
114     static int callout_fail_count;
115     static int callout_fail_id;
116     static int first_callout;
117 nigel 73 static int show_malloc;
118 nigel 67 static int use_utf8;
119 nigel 43 static size_t gotten_store;
120 nigel 3
121 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
122    
123     static int buffer_size = 50000;
124     static uschar *buffer = NULL;
125     static uschar *dbuffer = NULL;
126 nigel 75 static uschar *pbuffer = NULL;
127 nigel 3
128 nigel 75
129 nigel 49
130     /*************************************************
131 nigel 91 * Read or extend an input line *
132     *************************************************/
133    
134     /* Input lines are read into buffer, but both patterns and data lines can be
135     continued over multiple input lines. In addition, if the buffer fills up, we
136     want to automatically expand it so as to be able to handle extremely large
137     lines that are needed for certain stress tests. When the input buffer is
138     expanded, the other two buffers must also be expanded likewise, and the
139     contents of pbuffer, which are a copy of the input for callouts, must be
140     preserved (for when expansion happens for a data line). This is not the most
141     optimal way of handling this, but hey, this is just a test program!
142    
143     Arguments:
144     f the file to read
145     start where in buffer to start (this *must* be within buffer)
146    
147     Returns: pointer to the start of new data
148     could be a copy of start, or could be moved
149     NULL if no data read and EOF reached
150     */
151    
152     static uschar *
153     extend_inputline(FILE *f, uschar *start)
154     {
155     uschar *here = start;
156    
157     for (;;)
158     {
159     int rlen = buffer_size - (here - buffer);
160     if (rlen > 1000)
161     {
162     int dlen;
163     if (fgets((char *)here, rlen, f) == NULL)
164     return (here == start)? NULL : start;
165     dlen = (int)strlen((char *)here);
166     if (dlen > 0 && here[dlen - 1] == '\n') return start;
167     here += dlen;
168     }
169    
170     else
171     {
172     int new_buffer_size = 2*buffer_size;
173     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
174     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
175     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
176    
177     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
178     {
179     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
180     exit(1);
181     }
182    
183     memcpy(new_buffer, buffer, buffer_size);
184     memcpy(new_pbuffer, pbuffer, buffer_size);
185    
186     buffer_size = new_buffer_size;
187    
188     start = new_buffer + (start - buffer);
189     here = new_buffer + (here - buffer);
190    
191     free(buffer);
192     free(dbuffer);
193     free(pbuffer);
194    
195     buffer = new_buffer;
196     dbuffer = new_dbuffer;
197     pbuffer = new_pbuffer;
198     }
199     }
200    
201     return NULL; /* Control never gets here */
202     }
203    
204    
205    
206    
207    
208    
209    
210     /*************************************************
211 nigel 63 * Read number from string *
212     *************************************************/
213    
214     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
215     around with conditional compilation, just do the job by hand. It is only used
216     for unpicking the -o argument, so just keep it simple.
217    
218     Arguments:
219     str string to be converted
220     endptr where to put the end pointer
221    
222     Returns: the unsigned long
223     */
224    
225     static int
226     get_value(unsigned char *str, unsigned char **endptr)
227     {
228     int result = 0;
229     while(*str != 0 && isspace(*str)) str++;
230     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
231     *endptr = str;
232     return(result);
233     }
234    
235    
236    
237 nigel 49
238     /*************************************************
239     * Convert UTF-8 string to value *
240     *************************************************/
241    
242     /* This function takes one or more bytes that represents a UTF-8 character,
243     and returns the value of the character.
244    
245     Argument:
246 nigel 91 utf8bytes a pointer to the byte vector
247     vptr a pointer to an int to receive the value
248 nigel 49
249 nigel 91 Returns: > 0 => the number of bytes consumed
250     -6 to 0 => malformed UTF-8 character at offset = (-return)
251 nigel 49 */
252    
253 nigel 79 #if !defined NOUTF8
254    
255 nigel 67 static int
256 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
257 nigel 49 {
258 nigel 91 int c = *utf8bytes++;
259 nigel 49 int d = c;
260     int i, j, s;
261    
262     for (i = -1; i < 6; i++) /* i is number of additional bytes */
263     {
264     if ((d & 0x80) == 0) break;
265     d <<= 1;
266     }
267    
268     if (i == -1) { *vptr = c; return 1; } /* ascii character */
269     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
270    
271     /* i now has a value in the range 1-5 */
272    
273 nigel 59 s = 6*i;
274 nigel 85 d = (c & utf8_table3[i]) << s;
275 nigel 49
276     for (j = 0; j < i; j++)
277     {
278 nigel 91 c = *utf8bytes++;
279 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
280 nigel 59 s -= 6;
281 nigel 49 d |= (c & 0x3f) << s;
282     }
283    
284     /* Check that encoding was the correct unique one */
285    
286 nigel 85 for (j = 0; j < utf8_table1_size; j++)
287     if (d <= utf8_table1[j]) break;
288 nigel 49 if (j != i) return -(i+1);
289    
290     /* Valid value */
291    
292     *vptr = d;
293     return i+1;
294     }
295    
296 nigel 79 #endif
297 nigel 49
298    
299 nigel 79
300 nigel 63 /*************************************************
301 nigel 85 * Convert character value to UTF-8 *
302     *************************************************/
303    
304     /* This function takes an integer value in the range 0 - 0x7fffffff
305     and encodes it as a UTF-8 character in 0 to 6 bytes.
306    
307     Arguments:
308     cvalue the character value
309 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
310 nigel 85
311     Returns: number of characters placed in the buffer
312     */
313    
314     static int
315 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
316 nigel 85 {
317     register int i, j;
318     for (i = 0; i < utf8_table1_size; i++)
319     if (cvalue <= utf8_table1[i]) break;
320 nigel 91 utf8bytes += i;
321 nigel 85 for (j = i; j > 0; j--)
322     {
323 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
324 nigel 85 cvalue >>= 6;
325     }
326 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
327 nigel 85 return i + 1;
328     }
329    
330    
331    
332     /*************************************************
333 nigel 63 * Print character string *
334     *************************************************/
335 nigel 49
336 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
337     mode. Yields number of characters printed. If handed a NULL file, just counts
338     chars without printing. */
339 nigel 49
340 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
341 nigel 3 {
342 nigel 85 int c = 0;
343 nigel 63 int yield = 0;
344 nigel 3
345 nigel 63 while (length-- > 0)
346 nigel 3 {
347 nigel 79 #if !defined NOUTF8
348 nigel 67 if (use_utf8)
349 nigel 63 {
350     int rc = utf82ord(p, &c);
351 nigel 3
352 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
353     {
354     length -= rc - 1;
355     p += rc;
356     if (c < 256 && isprint(c))
357     {
358     if (f != NULL) fprintf(f, "%c", c);
359     yield++;
360     }
361     else
362     {
363     int n;
364     if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
365     yield += n;
366     }
367     continue;
368     }
369     }
370 nigel 79 #endif
371 nigel 3
372 nigel 63 /* Not UTF-8, or malformed UTF-8 */
373    
374     if (isprint(c = *(p++)))
375 nigel 3 {
376 nigel 63 if (f != NULL) fprintf(f, "%c", c);
377     yield++;
378 nigel 3 }
379 nigel 63 else
380 nigel 3 {
381 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
382     yield += 4;
383     }
384     }
385 nigel 3
386 nigel 63 return yield;
387     }
388 nigel 23
389 nigel 3
390 nigel 23
391 nigel 63 /*************************************************
392     * Callout function *
393     *************************************************/
394 nigel 3
395 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
396     the match. Yield zero unless more callouts than the fail count, or the callout
397     data is not zero. */
398 nigel 3
399 nigel 63 static int callout(pcre_callout_block *cb)
400     {
401     FILE *f = (first_callout | callout_extra)? outfile : NULL;
402 nigel 75 int i, pre_start, post_start, subject_length;
403 nigel 3
404 nigel 63 if (callout_extra)
405     {
406     fprintf(f, "Callout %d: last capture = %d\n",
407     cb->callout_number, cb->capture_last);
408 nigel 3
409 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
410     {
411     if (cb->offset_vector[i] < 0)
412     fprintf(f, "%2d: <unset>\n", i/2);
413     else
414     {
415     fprintf(f, "%2d: ", i/2);
416     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
417     cb->offset_vector[i+1] - cb->offset_vector[i], f);
418     fprintf(f, "\n");
419     }
420     }
421     }
422 nigel 3
423 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
424     datails. On subsequent calls in the same match, we use pchars just to find the
425     printed lengths of the substrings. */
426 nigel 3
427 nigel 63 if (f != NULL) fprintf(f, "--->");
428 nigel 3
429 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
430     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
431     cb->current_position - cb->start_match, f);
432 nigel 3
433 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
434    
435 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
436     cb->subject_length - cb->current_position, f);
437 nigel 3
438 nigel 63 if (f != NULL) fprintf(f, "\n");
439 nigel 9
440 nigel 63 /* Always print appropriate indicators, with callout number if not already
441 nigel 75 shown. For automatic callouts, show the pattern offset. */
442 nigel 3
443 nigel 75 if (cb->callout_number == 255)
444     {
445     fprintf(outfile, "%+3d ", cb->pattern_position);
446     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
447     }
448     else
449     {
450     if (callout_extra) fprintf(outfile, " ");
451     else fprintf(outfile, "%3d ", cb->callout_number);
452     }
453 nigel 3
454 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
455     fprintf(outfile, "^");
456 nigel 3
457 nigel 63 if (post_start > 0)
458     {
459     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
460     fprintf(outfile, "^");
461 nigel 3 }
462    
463 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
464     fprintf(outfile, " ");
465    
466     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
467     pbuffer + cb->pattern_position);
468    
469 nigel 63 fprintf(outfile, "\n");
470     first_callout = 0;
471 nigel 3
472 nigel 71 if (cb->callout_data != NULL)
473 nigel 49 {
474 nigel 71 int callout_data = *((int *)(cb->callout_data));
475     if (callout_data != 0)
476     {
477     fprintf(outfile, "Callout data = %d\n", callout_data);
478     return callout_data;
479     }
480 nigel 63 }
481 nigel 49
482 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
483     (++callout_count >= callout_fail_count)? 1 : 0;
484 nigel 3 }
485    
486    
487 nigel 63 /*************************************************
488 nigel 73 * Local malloc functions *
489 nigel 63 *************************************************/
490 nigel 3
491     /* Alternative malloc function, to test functionality and show the size of the
492     compiled re. */
493    
494     static void *new_malloc(size_t size)
495     {
496 nigel 73 void *block = malloc(size);
497 nigel 43 gotten_store = size;
498 nigel 73 if (show_malloc)
499 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
500 nigel 73 return block;
501 nigel 3 }
502    
503 nigel 73 static void new_free(void *block)
504     {
505     if (show_malloc)
506     fprintf(outfile, "free %p\n", block);
507     free(block);
508     }
509 nigel 3
510    
511 nigel 73 /* For recursion malloc/free, to test stacking calls */
512    
513     static void *stack_malloc(size_t size)
514     {
515     void *block = malloc(size);
516     if (show_malloc)
517 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
518 nigel 73 return block;
519     }
520    
521     static void stack_free(void *block)
522     {
523     if (show_malloc)
524     fprintf(outfile, "stack_free %p\n", block);
525     free(block);
526     }
527    
528    
529 nigel 63 /*************************************************
530     * Call pcre_fullinfo() *
531     *************************************************/
532 nigel 43
533     /* Get one piece of information from the pcre_fullinfo() function */
534    
535     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
536     {
537     int rc;
538     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
539     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
540     }
541    
542    
543    
544 nigel 63 /*************************************************
545 nigel 75 * Byte flipping function *
546     *************************************************/
547    
548 nigel 91 static unsigned long int
549     byteflip(unsigned long int value, int n)
550 nigel 75 {
551     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
552     return ((value & 0x000000ff) << 24) |
553     ((value & 0x0000ff00) << 8) |
554     ((value & 0x00ff0000) >> 8) |
555     ((value & 0xff000000) >> 24);
556     }
557    
558    
559    
560    
561     /*************************************************
562 nigel 87 * Check match or recursion limit *
563     *************************************************/
564    
565     static int
566     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
567     int start_offset, int options, int *use_offsets, int use_size_offsets,
568     int flag, unsigned long int *limit, int errnumber, const char *msg)
569     {
570     int count;
571     int min = 0;
572     int mid = 64;
573     int max = -1;
574    
575     extra->flags |= flag;
576    
577     for (;;)
578     {
579     *limit = mid;
580    
581     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
582     use_offsets, use_size_offsets);
583    
584     if (count == errnumber)
585     {
586     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
587     min = mid;
588     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
589     }
590    
591     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
592     count == PCRE_ERROR_PARTIAL)
593     {
594     if (mid == min + 1)
595     {
596     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
597     break;
598     }
599     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
600     max = mid;
601     mid = (min + mid)/2;
602     }
603     else break; /* Some other error */
604     }
605    
606     extra->flags &= ~flag;
607     return count;
608     }
609    
610    
611    
612     /*************************************************
613 nigel 91 * Check newline indicator *
614     *************************************************/
615    
616     /* This is used both at compile and run-time to check for <xxx> escapes, where
617     xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
618    
619     Arguments:
620     p points after the leading '<'
621     f file for error message
622    
623     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
624     */
625    
626     static int
627     check_newline(uschar *p, FILE *f)
628     {
629     if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
630     if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
631     if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
632     fprintf(f, "Unknown newline type at: <%s\n", p);
633     return 0;
634     }
635    
636    
637    
638     /*************************************************
639 nigel 63 * Main Program *
640     *************************************************/
641 nigel 43
642 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
643     consist of a regular expression, in delimiters and optionally followed by
644     options, followed by a set of test data, terminated by an empty line. */
645    
646     int main(int argc, char **argv)
647     {
648     FILE *infile = stdin;
649     int options = 0;
650     int study_options = 0;
651     int op = 1;
652     int timeit = 0;
653     int showinfo = 0;
654 nigel 31 int showstore = 0;
655 nigel 87 int quiet = 0;
656 nigel 53 int size_offsets = 45;
657     int size_offsets_max;
658 nigel 77 int *offsets = NULL;
659 nigel 53 #if !defined NOPOSIX
660 nigel 3 int posix = 0;
661 nigel 53 #endif
662 nigel 3 int debug = 0;
663 nigel 11 int done = 0;
664 nigel 77 int all_use_dfa = 0;
665     int yield = 0;
666 nigel 91 int stack_size;
667 nigel 3
668 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
669     that 1024 is plenty long enough for the few names we'll be testing. */
670 nigel 69
671 nigel 91 uschar copynames[1024];
672     uschar getnames[1024];
673    
674     uschar *copynamesptr;
675     uschar *getnamesptr;
676    
677 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
678 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
679 nigel 69
680 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
681     dbuffer = (unsigned char *)malloc(buffer_size);
682     pbuffer = (unsigned char *)malloc(buffer_size);
683 nigel 69
684 nigel 75 /* The outfile variable is static so that new_malloc can use it. The _setmode()
685     stuff is some magic that I don't understand, but which apparently does good
686     things in Windows. It's related to line terminations. */
687 nigel 3
688 nigel 75 #if defined(_WIN32) || defined(WIN32)
689     _setmode( _fileno( stdout ), 0x8000 );
690     #endif /* defined(_WIN32) || defined(WIN32) */
691    
692 nigel 3 outfile = stdout;
693    
694     /* Scan options */
695    
696     while (argc > 1 && argv[op][0] == '-')
697     {
698 nigel 63 unsigned char *endptr;
699 nigel 53
700 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
701     showstore = 1;
702 nigel 3 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
703 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
704 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
705     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
706 nigel 79 #if !defined NODFA
707 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
708 nigel 79 #endif
709 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
710 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
711     *endptr == 0))
712 nigel 53 {
713     op++;
714     argc--;
715     }
716 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
717     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
718     *endptr == 0))
719     {
720     #ifdef _WIN32
721     printf("PCRE: -S not supported on this OS\n");
722     exit(1);
723     #else
724     int rc;
725     struct rlimit rlim;
726     getrlimit(RLIMIT_STACK, &rlim);
727     rlim.rlim_cur = stack_size * 1024 * 1024;
728     rc = setrlimit(RLIMIT_STACK, &rlim);
729     if (rc != 0)
730     {
731     printf("PCRE: setrlimit() failed with error %d\n", rc);
732     exit(1);
733     }
734     op++;
735     argc--;
736     #endif
737     }
738 nigel 53 #if !defined NOPOSIX
739 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
740 nigel 53 #endif
741 nigel 63 else if (strcmp(argv[op], "-C") == 0)
742     {
743     int rc;
744     printf("PCRE version %s\n", pcre_version());
745     printf("Compiled with\n");
746     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
747     printf(" %sUTF-8 support\n", rc? "" : "No ");
748 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
749     printf(" %sUnicode properties support\n", rc? "" : "No ");
750 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
751 nigel 91 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
752     (rc == '\n')? "LF" : "CRLF");
753 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
754     printf(" Internal link size = %d\n", rc);
755     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
756     printf(" POSIX malloc threshold = %d\n", rc);
757     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
758     printf(" Default match limit = %d\n", rc);
759 nigel 87 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
760     printf(" Default recursion depth limit = %d\n", rc);
761 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
762     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
763 nigel 63 exit(0);
764     }
765 nigel 3 else
766     {
767 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
768 nigel 91 printf("Usage: pcretest [options] [<input> [<output>]]\n");
769 nigel 63 printf(" -C show PCRE compile-time options and exit\n");
770 nigel 77 printf(" -d debug: show compiled code; implies -i\n");
771 nigel 79 #if !defined NODFA
772 nigel 77 printf(" -dfa force DFA matching for all subjects\n");
773 nigel 79 #endif
774 nigel 77 printf(" -i show information about compiled pattern\n"
775 nigel 75 " -m output memory used information\n"
776 nigel 53 " -o <n> set size of offsets vector to <n>\n");
777     #if !defined NOPOSIX
778     printf(" -p use POSIX interface\n");
779     #endif
780 nigel 91 printf(" -S <n> set stack size to <n> megabytes\n");
781 nigel 75 printf(" -s output store (memory) used information\n"
782 nigel 53 " -t time compilation and execution\n");
783 nigel 77 yield = 1;
784     goto EXIT;
785 nigel 3 }
786     op++;
787     argc--;
788     }
789    
790 nigel 53 /* Get the store for the offsets vector, and remember what it was */
791    
792     size_offsets_max = size_offsets;
793 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
794 nigel 53 if (offsets == NULL)
795     {
796     printf("** Failed to get %d bytes of memory for offsets vector\n",
797     size_offsets_max * sizeof(int));
798 nigel 77 yield = 1;
799     goto EXIT;
800 nigel 53 }
801    
802 nigel 3 /* Sort out the input and output files */
803    
804     if (argc > 1)
805     {
806 nigel 75 infile = fopen(argv[op], "rb");
807 nigel 3 if (infile == NULL)
808     {
809     printf("** Failed to open %s\n", argv[op]);
810 nigel 77 yield = 1;
811     goto EXIT;
812 nigel 3 }
813     }
814    
815     if (argc > 2)
816     {
817 nigel 75 outfile = fopen(argv[op+1], "wb");
818 nigel 3 if (outfile == NULL)
819     {
820     printf("** Failed to open %s\n", argv[op+1]);
821 nigel 77 yield = 1;
822     goto EXIT;
823 nigel 3 }
824     }
825    
826     /* Set alternative malloc function */
827    
828     pcre_malloc = new_malloc;
829 nigel 73 pcre_free = new_free;
830     pcre_stack_malloc = stack_malloc;
831     pcre_stack_free = stack_free;
832 nigel 3
833 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
834 nigel 3
835 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
836 nigel 3
837     /* Main loop */
838    
839 nigel 11 while (!done)
840 nigel 3 {
841     pcre *re = NULL;
842     pcre_extra *extra = NULL;
843 nigel 37
844     #if !defined NOPOSIX /* There are still compilers that require no indent */
845 nigel 3 regex_t preg;
846 nigel 45 int do_posix = 0;
847 nigel 37 #endif
848    
849 nigel 7 const char *error;
850 nigel 25 unsigned char *p, *pp, *ppp;
851 nigel 75 unsigned char *to_file = NULL;
852 nigel 53 const unsigned char *tables = NULL;
853 nigel 75 unsigned long int true_size, true_study_size = 0;
854     size_t size, regex_gotten_store;
855 nigel 3 int do_study = 0;
856 nigel 25 int do_debug = debug;
857 nigel 35 int do_G = 0;
858     int do_g = 0;
859 nigel 25 int do_showinfo = showinfo;
860 nigel 35 int do_showrest = 0;
861 nigel 75 int do_flip = 0;
862 nigel 3 int erroroffset, len, delimiter;
863    
864 nigel 67 use_utf8 = 0;
865 nigel 63
866 nigel 3 if (infile == stdin) printf(" re> ");
867 nigel 91 if (extend_inputline(infile, buffer) == NULL) break;
868 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
869 nigel 63 fflush(outfile);
870 nigel 3
871     p = buffer;
872     while (isspace(*p)) p++;
873     if (*p == 0) continue;
874    
875 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
876 nigel 3
877 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
878     {
879 nigel 91 unsigned long int magic, get_options;
880 nigel 75 uschar sbuf[8];
881     FILE *f;
882    
883     p++;
884     pp = p + (int)strlen((char *)p);
885     while (isspace(pp[-1])) pp--;
886     *pp = 0;
887    
888     f = fopen((char *)p, "rb");
889     if (f == NULL)
890     {
891     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
892     continue;
893     }
894    
895     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
896    
897     true_size =
898     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
899     true_study_size =
900     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
901    
902     re = (real_pcre *)new_malloc(true_size);
903     regex_gotten_store = gotten_store;
904    
905     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
906    
907     magic = ((real_pcre *)re)->magic_number;
908     if (magic != MAGIC_NUMBER)
909     {
910     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
911     {
912     do_flip = 1;
913     }
914     else
915     {
916     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
917     fclose(f);
918     continue;
919     }
920     }
921    
922     fprintf(outfile, "Compiled regex%s loaded from %s\n",
923     do_flip? " (byte-inverted)" : "", p);
924    
925     /* Need to know if UTF-8 for printing data strings */
926    
927 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
928     use_utf8 = (get_options & PCRE_UTF8) != 0;
929 nigel 75
930     /* Now see if there is any following study data */
931    
932     if (true_study_size != 0)
933     {
934     pcre_study_data *psd;
935    
936     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
937     extra->flags = PCRE_EXTRA_STUDY_DATA;
938    
939     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
940     extra->study_data = psd;
941    
942     if (fread(psd, 1, true_study_size, f) != true_study_size)
943     {
944     FAIL_READ:
945     fprintf(outfile, "Failed to read data from %s\n", p);
946     if (extra != NULL) new_free(extra);
947     if (re != NULL) new_free(re);
948     fclose(f);
949     continue;
950     }
951     fprintf(outfile, "Study data loaded from %s\n", p);
952     do_study = 1; /* To get the data output if requested */
953     }
954     else fprintf(outfile, "No study data\n");
955    
956     fclose(f);
957     goto SHOW_INFO;
958     }
959    
960     /* In-line pattern (the usual case). Get the delimiter and seek the end of
961     the pattern; if is isn't complete, read more. */
962    
963 nigel 3 delimiter = *p++;
964    
965 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
966 nigel 3 {
967 nigel 29 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
968 nigel 3 goto SKIP_DATA;
969     }
970    
971     pp = p;
972    
973     for(;;)
974     {
975 nigel 29 while (*pp != 0)
976     {
977     if (*pp == '\\' && pp[1] != 0) pp++;
978     else if (*pp == delimiter) break;
979     pp++;
980     }
981 nigel 3 if (*pp != 0) break;
982     if (infile == stdin) printf(" > ");
983 nigel 91 if ((pp = extend_inputline(infile, pp)) == NULL)
984 nigel 3 {
985     fprintf(outfile, "** Unexpected EOF\n");
986 nigel 11 done = 1;
987     goto CONTINUE;
988 nigel 3 }
989 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
990 nigel 3 }
991    
992 nigel 29 /* If the first character after the delimiter is backslash, make
993     the pattern end with backslash. This is purely to provide a way
994     of testing for the error message when a pattern ends with backslash. */
995    
996     if (pp[1] == '\\') *pp++ = '\\';
997    
998 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
999     for callouts. */
1000 nigel 3
1001     *pp++ = 0;
1002 nigel 75 strcpy((char *)pbuffer, (char *)p);
1003 nigel 3
1004     /* Look for options after final delimiter */
1005    
1006     options = 0;
1007     study_options = 0;
1008 nigel 31 log_store = showstore; /* default from command line */
1009    
1010 nigel 3 while (*pp != 0)
1011     {
1012     switch (*pp++)
1013     {
1014 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1015 nigel 35 case 'g': do_g = 1; break;
1016 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1017     case 'm': options |= PCRE_MULTILINE; break;
1018     case 's': options |= PCRE_DOTALL; break;
1019     case 'x': options |= PCRE_EXTENDED; break;
1020 nigel 25
1021 nigel 35 case '+': do_showrest = 1; break;
1022 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1023 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1024 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1025 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1026 nigel 75 case 'F': do_flip = 1; break;
1027 nigel 35 case 'G': do_G = 1; break;
1028 nigel 25 case 'I': do_showinfo = 1; break;
1029 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1030 nigel 31 case 'M': log_store = 1; break;
1031 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1032 nigel 37
1033     #if !defined NOPOSIX
1034 nigel 3 case 'P': do_posix = 1; break;
1035 nigel 37 #endif
1036    
1037 nigel 3 case 'S': do_study = 1; break;
1038 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1039 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1040 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1041 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1042 nigel 25
1043     case 'L':
1044     ppp = pp;
1045 nigel 77 /* The '\r' test here is so that it works on Windows */
1046     while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1047 nigel 25 *ppp = 0;
1048     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1049     {
1050     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1051     goto SKIP_DATA;
1052     }
1053     tables = pcre_maketables();
1054     pp = ppp;
1055     break;
1056    
1057 nigel 75 case '>':
1058     to_file = pp;
1059     while (*pp != 0) pp++;
1060     while (isspace(pp[-1])) pp--;
1061     *pp = 0;
1062     break;
1063    
1064 nigel 91 case '<':
1065     {
1066     int x = check_newline(pp, outfile);
1067     if (x == 0) goto SKIP_DATA;
1068     options |= x;
1069     while (*pp++ != '>');
1070     }
1071     break;
1072    
1073 nigel 77 case '\r': /* So that it works in Windows */
1074     case '\n':
1075     case ' ':
1076     break;
1077 nigel 75
1078 nigel 3 default:
1079     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1080     goto SKIP_DATA;
1081     }
1082     }
1083    
1084 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1085 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1086     local character tables. */
1087 nigel 3
1088 nigel 37 #if !defined NOPOSIX
1089 nigel 3 if (posix || do_posix)
1090     {
1091     int rc;
1092     int cflags = 0;
1093 nigel 75
1094 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1095     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1096 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1097 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1098     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1099    
1100 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1101    
1102     /* Compilation failed; go back for another re, skipping to blank line
1103     if non-interactive. */
1104    
1105     if (rc != 0)
1106     {
1107 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1108 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1109     goto SKIP_DATA;
1110     }
1111     }
1112    
1113     /* Handle compiling via the native interface */
1114    
1115     else
1116 nigel 37 #endif /* !defined NOPOSIX */
1117    
1118 nigel 3 {
1119     if (timeit)
1120     {
1121     register int i;
1122     clock_t time_taken;
1123     clock_t start_time = clock();
1124 nigel 23 for (i = 0; i < LOOPREPEAT; i++)
1125 nigel 3 {
1126 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1127 nigel 3 if (re != NULL) free(re);
1128     }
1129     time_taken = clock() - start_time;
1130 nigel 27 fprintf(outfile, "Compile time %.3f milliseconds\n",
1131 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1132     (double)CLOCKS_PER_SEC);
1133 nigel 3 }
1134    
1135 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1136 nigel 3
1137     /* Compilation failed; go back for another re, skipping to blank line
1138     if non-interactive. */
1139    
1140     if (re == NULL)
1141     {
1142     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1143     SKIP_DATA:
1144     if (infile != stdin)
1145     {
1146     for (;;)
1147     {
1148 nigel 91 if (extend_inputline(infile, buffer) == NULL)
1149 nigel 11 {
1150     done = 1;
1151     goto CONTINUE;
1152     }
1153 nigel 3 len = (int)strlen((char *)buffer);
1154     while (len > 0 && isspace(buffer[len-1])) len--;
1155     if (len == 0) break;
1156     }
1157     fprintf(outfile, "\n");
1158     }
1159 nigel 25 goto CONTINUE;
1160 nigel 3 }
1161    
1162 nigel 43 /* Compilation succeeded; print data if required. There are now two
1163     info-returning functions. The old one has a limited interface and
1164     returns only limited data. Check that it agrees with the newer one. */
1165 nigel 3
1166 nigel 63 if (log_store)
1167     fprintf(outfile, "Memory allocation (code space): %d\n",
1168     (int)(gotten_store -
1169     sizeof(real_pcre) -
1170     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1171    
1172 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1173     and remember the store that was got. */
1174    
1175     true_size = ((real_pcre *)re)->size;
1176     regex_gotten_store = gotten_store;
1177    
1178     /* If /S was present, study the regexp to generate additional info to
1179     help with the matching. */
1180    
1181     if (do_study)
1182     {
1183     if (timeit)
1184     {
1185     register int i;
1186     clock_t time_taken;
1187     clock_t start_time = clock();
1188     for (i = 0; i < LOOPREPEAT; i++)
1189     extra = pcre_study(re, study_options, &error);
1190     time_taken = clock() - start_time;
1191     if (extra != NULL) free(extra);
1192     fprintf(outfile, " Study time %.3f milliseconds\n",
1193     (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1194     (double)CLOCKS_PER_SEC);
1195     }
1196     extra = pcre_study(re, study_options, &error);
1197     if (error != NULL)
1198     fprintf(outfile, "Failed to study: %s\n", error);
1199     else if (extra != NULL)
1200     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1201     }
1202    
1203     /* If the 'F' option was present, we flip the bytes of all the integer
1204     fields in the regex data block and the study block. This is to make it
1205     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1206     compiled on a different architecture. */
1207    
1208     if (do_flip)
1209     {
1210     real_pcre *rre = (real_pcre *)re;
1211     rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1212     rre->size = byteflip(rre->size, sizeof(rre->size));
1213     rre->options = byteflip(rre->options, sizeof(rre->options));
1214     rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1215     rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1216     rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1217     rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1218     rre->name_table_offset = byteflip(rre->name_table_offset,
1219     sizeof(rre->name_table_offset));
1220     rre->name_entry_size = byteflip(rre->name_entry_size,
1221     sizeof(rre->name_entry_size));
1222     rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1223    
1224     if (extra != NULL)
1225     {
1226     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1227     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1228     rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1229     }
1230     }
1231    
1232     /* Extract information from the compiled data if required */
1233    
1234     SHOW_INFO:
1235    
1236 nigel 25 if (do_showinfo)
1237 nigel 3 {
1238 nigel 75 unsigned long int get_options, all_options;
1239 nigel 79 #if !defined NOINFOCHECK
1240 nigel 43 int old_first_char, old_options, old_count;
1241 nigel 79 #endif
1242 nigel 43 int count, backrefmax, first_char, need_char;
1243 nigel 63 int nameentrysize, namecount;
1244     const uschar *nametable;
1245 nigel 3
1246 nigel 63 if (do_debug)
1247     {
1248     fprintf(outfile, "------------------------------------------------------------------\n");
1249 nigel 85 pcre_printint(re, outfile);
1250 nigel 63 }
1251 nigel 3
1252 nigel 53 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1253 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1254     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1255     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1256 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1257 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1258 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1259     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1260 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1261 nigel 43
1262 nigel 79 #if !defined NOINFOCHECK
1263 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1264 nigel 3 if (count < 0) fprintf(outfile,
1265 nigel 43 "Error %d from pcre_info()\n", count);
1266 nigel 3 else
1267     {
1268 nigel 43 if (old_count != count) fprintf(outfile,
1269     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1270     old_count);
1271 nigel 37
1272 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1273     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1274     first_char, old_first_char);
1275 nigel 37
1276 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1277     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1278     get_options, old_options);
1279 nigel 43 }
1280 nigel 79 #endif
1281 nigel 43
1282 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1283 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1284 nigel 77 (int)size, (int)regex_gotten_store);
1285 nigel 43
1286     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1287     if (backrefmax > 0)
1288     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1289 nigel 63
1290     if (namecount > 0)
1291     {
1292     fprintf(outfile, "Named capturing subpatterns:\n");
1293     while (namecount-- > 0)
1294     {
1295     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1296     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1297     GET2(nametable, 0));
1298     nametable += nameentrysize;
1299     }
1300     }
1301    
1302 nigel 75 /* The NOPARTIAL bit is a private bit in the options, so we have
1303     to fish it out via out back door */
1304    
1305     all_options = ((real_pcre *)re)->options;
1306     if (do_flip)
1307     {
1308     all_options = byteflip(all_options, sizeof(all_options));
1309 nigel 91 }
1310 nigel 75
1311     if ((all_options & PCRE_NOPARTIAL) != 0)
1312     fprintf(outfile, "Partial matching not supported\n");
1313    
1314 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1315 nigel 91 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1316 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1317     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1318     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1319     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1320 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1321 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1322     ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1323     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1324     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1325 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1326 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1327 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1328     ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1329 nigel 43
1330 nigel 91 switch (get_options & PCRE_NEWLINE_CRLF)
1331     {
1332     case PCRE_NEWLINE_CR:
1333     fprintf(outfile, "Forced newline sequence: CR\n");
1334     break;
1335 nigel 43
1336 nigel 91 case PCRE_NEWLINE_LF:
1337     fprintf(outfile, "Forced newline sequence: LF\n");
1338     break;
1339    
1340     case PCRE_NEWLINE_CRLF:
1341     fprintf(outfile, "Forced newline sequence: CRLF\n");
1342     break;
1343    
1344     default:
1345     break;
1346     }
1347    
1348 nigel 43 if (first_char == -1)
1349     {
1350 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1351 nigel 43 }
1352     else if (first_char < 0)
1353     {
1354     fprintf(outfile, "No first char\n");
1355     }
1356     else
1357     {
1358 nigel 63 int ch = first_char & 255;
1359 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1360 nigel 63 "" : " (caseless)";
1361     if (isprint(ch))
1362     fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1363 nigel 3 else
1364 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1365 nigel 43 }
1366 nigel 37
1367 nigel 43 if (need_char < 0)
1368     {
1369     fprintf(outfile, "No need char\n");
1370 nigel 3 }
1371 nigel 43 else
1372     {
1373 nigel 63 int ch = need_char & 255;
1374 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1375 nigel 63 "" : " (caseless)";
1376     if (isprint(ch))
1377     fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1378 nigel 43 else
1379 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1380 nigel 43 }
1381 nigel 75
1382     /* Don't output study size; at present it is in any case a fixed
1383     value, but it varies, depending on the computer architecture, and
1384     so messes up the test suite. (And with the /F option, it might be
1385     flipped.) */
1386    
1387     if (do_study)
1388     {
1389     if (extra == NULL)
1390     fprintf(outfile, "Study returned NULL\n");
1391     else
1392     {
1393     uschar *start_bits = NULL;
1394     new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1395    
1396     if (start_bits == NULL)
1397     fprintf(outfile, "No starting byte set\n");
1398     else
1399     {
1400     int i;
1401     int c = 24;
1402     fprintf(outfile, "Starting byte set: ");
1403     for (i = 0; i < 256; i++)
1404     {
1405     if ((start_bits[i/8] & (1<<(i&7))) != 0)
1406     {
1407     if (c > 75)
1408     {
1409     fprintf(outfile, "\n ");
1410     c = 2;
1411     }
1412     if (isprint(i) && i != ' ')
1413     {
1414     fprintf(outfile, "%c ", i);
1415     c += 2;
1416     }
1417     else
1418     {
1419     fprintf(outfile, "\\x%02x ", i);
1420     c += 5;
1421     }
1422     }
1423     }
1424     fprintf(outfile, "\n");
1425     }
1426     }
1427     }
1428 nigel 3 }
1429    
1430 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
1431     that is all. The first 8 bytes of the file are the regex length and then
1432     the study length, in big-endian order. */
1433 nigel 3
1434 nigel 75 if (to_file != NULL)
1435 nigel 3 {
1436 nigel 75 FILE *f = fopen((char *)to_file, "wb");
1437     if (f == NULL)
1438 nigel 3 {
1439 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1440 nigel 3 }
1441 nigel 75 else
1442     {
1443     uschar sbuf[8];
1444     sbuf[0] = (true_size >> 24) & 255;
1445     sbuf[1] = (true_size >> 16) & 255;
1446     sbuf[2] = (true_size >> 8) & 255;
1447     sbuf[3] = (true_size) & 255;
1448 nigel 3
1449 nigel 75 sbuf[4] = (true_study_size >> 24) & 255;
1450     sbuf[5] = (true_study_size >> 16) & 255;
1451     sbuf[6] = (true_study_size >> 8) & 255;
1452     sbuf[7] = (true_study_size) & 255;
1453 nigel 3
1454 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
1455     fwrite(re, 1, true_size, f) < true_size)
1456     {
1457     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1458     }
1459 nigel 3 else
1460     {
1461 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1462     if (extra != NULL)
1463 nigel 3 {
1464 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
1465     true_study_size)
1466 nigel 3 {
1467 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
1468     strerror(errno));
1469 nigel 3 }
1470 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
1471 nigel 3 }
1472     }
1473 nigel 75 fclose(f);
1474 nigel 3 }
1475 nigel 77
1476     new_free(re);
1477     if (extra != NULL) new_free(extra);
1478     if (tables != NULL) new_free((void *)tables);
1479 nigel 75 continue; /* With next regex */
1480 nigel 3 }
1481 nigel 75 } /* End of non-POSIX compile */
1482 nigel 3
1483     /* Read data lines and test them */
1484    
1485     for (;;)
1486     {
1487 nigel 87 uschar *q;
1488     uschar *bptr = dbuffer;
1489 nigel 57 int *use_offsets = offsets;
1490 nigel 53 int use_size_offsets = size_offsets;
1491 nigel 63 int callout_data = 0;
1492     int callout_data_set = 0;
1493 nigel 3 int count, c;
1494 nigel 29 int copystrings = 0;
1495 nigel 63 int find_match_limit = 0;
1496 nigel 29 int getstrings = 0;
1497     int getlist = 0;
1498 nigel 39 int gmatched = 0;
1499 nigel 35 int start_offset = 0;
1500 nigel 41 int g_notempty = 0;
1501 nigel 77 int use_dfa = 0;
1502 nigel 3
1503     options = 0;
1504    
1505 nigel 91 *copynames = 0;
1506     *getnames = 0;
1507    
1508     copynamesptr = copynames;
1509     getnamesptr = getnames;
1510    
1511 nigel 63 pcre_callout = callout;
1512     first_callout = 1;
1513     callout_extra = 0;
1514     callout_count = 0;
1515     callout_fail_count = 999999;
1516     callout_fail_id = -1;
1517 nigel 73 show_malloc = 0;
1518 nigel 63
1519 nigel 91 if (extra != NULL) extra->flags &=
1520     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1521    
1522     len = 0;
1523     for (;;)
1524 nigel 11 {
1525 nigel 91 if (infile == stdin) printf("data> ");
1526     if (extend_inputline(infile, buffer + len) == NULL)
1527     {
1528     if (len > 0) break;
1529     done = 1;
1530     goto CONTINUE;
1531     }
1532     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1533     len = (int)strlen((char *)buffer);
1534     if (buffer[len-1] == '\n') break;
1535 nigel 11 }
1536 nigel 3
1537     while (len > 0 && isspace(buffer[len-1])) len--;
1538     buffer[len] = 0;
1539     if (len == 0) break;
1540    
1541     p = buffer;
1542     while (isspace(*p)) p++;
1543    
1544 nigel 9 q = dbuffer;
1545 nigel 3 while ((c = *p++) != 0)
1546     {
1547     int i = 0;
1548     int n = 0;
1549 nigel 63
1550 nigel 3 if (c == '\\') switch ((c = *p++))
1551     {
1552     case 'a': c = 7; break;
1553     case 'b': c = '\b'; break;
1554     case 'e': c = 27; break;
1555     case 'f': c = '\f'; break;
1556     case 'n': c = '\n'; break;
1557     case 'r': c = '\r'; break;
1558     case 't': c = '\t'; break;
1559     case 'v': c = '\v'; break;
1560    
1561     case '0': case '1': case '2': case '3':
1562     case '4': case '5': case '6': case '7':
1563     c -= '0';
1564     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1565     c = c * 8 + *p++ - '0';
1566 nigel 91
1567     #if !defined NOUTF8
1568     if (use_utf8 && c > 255)
1569     {
1570     unsigned char buff8[8];
1571     int ii, utn;
1572     utn = ord2utf8(c, buff8);
1573     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1574     c = buff8[ii]; /* Last byte */
1575     }
1576     #endif
1577 nigel 3 break;
1578    
1579     case 'x':
1580 nigel 49
1581     /* Handle \x{..} specially - new Perl thing for utf8 */
1582    
1583 nigel 79 #if !defined NOUTF8
1584 nigel 49 if (*p == '{')
1585     {
1586     unsigned char *pt = p;
1587     c = 0;
1588     while (isxdigit(*(++pt)))
1589     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1590     if (*pt == '}')
1591     {
1592 nigel 67 unsigned char buff8[8];
1593 nigel 49 int ii, utn;
1594 nigel 85 utn = ord2utf8(c, buff8);
1595 nigel 67 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1596     c = buff8[ii]; /* Last byte */
1597 nigel 49 p = pt + 1;
1598     break;
1599     }
1600     /* Not correct form; fall through */
1601     }
1602 nigel 79 #endif
1603 nigel 49
1604     /* Ordinary \x */
1605    
1606 nigel 3 c = 0;
1607     while (i++ < 2 && isxdigit(*p))
1608     {
1609     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1610     p++;
1611     }
1612     break;
1613    
1614 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
1615 nigel 3 p--;
1616     continue;
1617    
1618 nigel 75 case '>':
1619     while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1620     continue;
1621    
1622 nigel 3 case 'A': /* Option setting */
1623     options |= PCRE_ANCHORED;
1624     continue;
1625    
1626     case 'B':
1627     options |= PCRE_NOTBOL;
1628     continue;
1629    
1630 nigel 29 case 'C':
1631 nigel 63 if (isdigit(*p)) /* Set copy string */
1632     {
1633     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1634     copystrings |= 1 << n;
1635     }
1636     else if (isalnum(*p))
1637     {
1638 nigel 91 uschar *npp = copynamesptr;
1639 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1640 nigel 91 *npp++ = 0;
1641 nigel 67 *npp = 0;
1642 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1643 nigel 63 if (n < 0)
1644 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1645     copynamesptr = npp;
1646 nigel 63 }
1647     else if (*p == '+')
1648     {
1649     callout_extra = 1;
1650     p++;
1651     }
1652     else if (*p == '-')
1653     {
1654     pcre_callout = NULL;
1655     p++;
1656     }
1657     else if (*p == '!')
1658     {
1659     callout_fail_id = 0;
1660     p++;
1661     while(isdigit(*p))
1662     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1663     callout_fail_count = 0;
1664     if (*p == '!')
1665     {
1666     p++;
1667     while(isdigit(*p))
1668     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1669     }
1670     }
1671     else if (*p == '*')
1672     {
1673     int sign = 1;
1674     callout_data = 0;
1675     if (*(++p) == '-') { sign = -1; p++; }
1676     while(isdigit(*p))
1677     callout_data = callout_data * 10 + *p++ - '0';
1678     callout_data *= sign;
1679     callout_data_set = 1;
1680     }
1681 nigel 29 continue;
1682    
1683 nigel 79 #if !defined NODFA
1684 nigel 77 case 'D':
1685 nigel 79 #if !defined NOPOSIX
1686 nigel 77 if (posix || do_posix)
1687     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1688     else
1689 nigel 79 #endif
1690 nigel 77 use_dfa = 1;
1691     continue;
1692    
1693     case 'F':
1694     options |= PCRE_DFA_SHORTEST;
1695     continue;
1696 nigel 79 #endif
1697 nigel 77
1698 nigel 29 case 'G':
1699 nigel 63 if (isdigit(*p))
1700     {
1701     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1702     getstrings |= 1 << n;
1703     }
1704     else if (isalnum(*p))
1705     {
1706 nigel 91 uschar *npp = getnamesptr;
1707 nigel 67 while (isalnum(*p)) *npp++ = *p++;
1708 nigel 91 *npp++ = 0;
1709 nigel 67 *npp = 0;
1710 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1711 nigel 63 if (n < 0)
1712 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1713     getnamesptr = npp;
1714 nigel 63 }
1715 nigel 29 continue;
1716    
1717     case 'L':
1718     getlist = 1;
1719     continue;
1720    
1721 nigel 63 case 'M':
1722     find_match_limit = 1;
1723     continue;
1724    
1725 nigel 37 case 'N':
1726     options |= PCRE_NOTEMPTY;
1727     continue;
1728    
1729 nigel 3 case 'O':
1730     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1731 nigel 53 if (n > size_offsets_max)
1732     {
1733     size_offsets_max = n;
1734 nigel 57 free(offsets);
1735 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1736 nigel 53 if (offsets == NULL)
1737     {
1738     printf("** Failed to get %d bytes of memory for offsets vector\n",
1739     size_offsets_max * sizeof(int));
1740 nigel 77 yield = 1;
1741     goto EXIT;
1742 nigel 53 }
1743     }
1744     use_size_offsets = n;
1745 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1746 nigel 3 continue;
1747    
1748 nigel 75 case 'P':
1749     options |= PCRE_PARTIAL;
1750     continue;
1751    
1752 nigel 91 case 'Q':
1753     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1754     if (extra == NULL)
1755     {
1756     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1757     extra->flags = 0;
1758     }
1759     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1760     extra->match_limit_recursion = n;
1761     continue;
1762    
1763     case 'q':
1764     while(isdigit(*p)) n = n * 10 + *p++ - '0';
1765     if (extra == NULL)
1766     {
1767     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1768     extra->flags = 0;
1769     }
1770     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1771     extra->match_limit = n;
1772     continue;
1773    
1774 nigel 79 #if !defined NODFA
1775 nigel 77 case 'R':
1776     options |= PCRE_DFA_RESTART;
1777     continue;
1778 nigel 79 #endif
1779 nigel 77
1780 nigel 73 case 'S':
1781     show_malloc = 1;
1782     continue;
1783    
1784 nigel 3 case 'Z':
1785     options |= PCRE_NOTEOL;
1786     continue;
1787 nigel 71
1788     case '?':
1789     options |= PCRE_NO_UTF8_CHECK;
1790     continue;
1791 nigel 91
1792     case '<':
1793     {
1794     int x = check_newline(p, outfile);
1795     if (x == 0) goto NEXT_DATA;
1796     options |= x;
1797     while (*p++ != '>');
1798     }
1799     continue;
1800 nigel 3 }
1801 nigel 9 *q++ = c;
1802 nigel 3 }
1803 nigel 9 *q = 0;
1804     len = q - dbuffer;
1805 nigel 3
1806 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
1807     {
1808     printf("**Match limit not relevant for DFA matching: ignored\n");
1809     find_match_limit = 0;
1810     }
1811    
1812 nigel 3 /* Handle matching via the POSIX interface, which does not
1813 nigel 63 support timing or playing with the match limit or callout data. */
1814 nigel 3
1815 nigel 37 #if !defined NOPOSIX
1816 nigel 3 if (posix || do_posix)
1817     {
1818     int rc;
1819     int eflags = 0;
1820 nigel 63 regmatch_t *pmatch = NULL;
1821     if (use_size_offsets > 0)
1822 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1823 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1824     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1825    
1826 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1827 nigel 3
1828     if (rc != 0)
1829     {
1830 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1831 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1832     }
1833 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1834     != 0)
1835     {
1836     fprintf(outfile, "Matched with REG_NOSUB\n");
1837     }
1838 nigel 3 else
1839     {
1840 nigel 7 size_t i;
1841 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
1842 nigel 3 {
1843     if (pmatch[i].rm_so >= 0)
1844     {
1845 nigel 23 fprintf(outfile, "%2d: ", (int)i);
1846 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
1847     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1848 nigel 3 fprintf(outfile, "\n");
1849 nigel 35 if (i == 0 && do_showrest)
1850     {
1851     fprintf(outfile, " 0+ ");
1852 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1853     outfile);
1854 nigel 35 fprintf(outfile, "\n");
1855     }
1856 nigel 3 }
1857     }
1858     }
1859 nigel 53 free(pmatch);
1860 nigel 3 }
1861    
1862 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
1863 nigel 3
1864 nigel 37 else
1865     #endif /* !defined NOPOSIX */
1866    
1867 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
1868 nigel 3 {
1869     if (timeit)
1870     {
1871     register int i;
1872     clock_t time_taken;
1873     clock_t start_time = clock();
1874 nigel 77
1875 nigel 79 #if !defined NODFA
1876 nigel 77 if (all_use_dfa || use_dfa)
1877     {
1878     int workspace[1000];
1879     for (i = 0; i < LOOPREPEAT; i++)
1880     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1881     options | g_notempty, use_offsets, use_size_offsets, workspace,
1882     sizeof(workspace)/sizeof(int));
1883     }
1884     else
1885 nigel 79 #endif
1886 nigel 77
1887 nigel 27 for (i = 0; i < LOOPREPEAT; i++)
1888 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
1889 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1890 nigel 77
1891 nigel 3 time_taken = clock() - start_time;
1892 nigel 27 fprintf(outfile, "Execute time %.3f milliseconds\n",
1893 nigel 63 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1894     (double)CLOCKS_PER_SEC);
1895 nigel 3 }
1896    
1897 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
1898 nigel 87 varying limits in order to find the minimum value for the match limit and
1899     for the recursion limit. */
1900 nigel 63
1901     if (find_match_limit)
1902     {
1903     if (extra == NULL)
1904     {
1905 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1906 nigel 63 extra->flags = 0;
1907     }
1908    
1909 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
1910 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
1911     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1912     PCRE_ERROR_MATCHLIMIT, "match()");
1913 nigel 63
1914 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
1915     options|g_notempty, use_offsets, use_size_offsets,
1916     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1917     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1918 nigel 63 }
1919    
1920     /* If callout_data is set, use the interface with additional data */
1921    
1922     else if (callout_data_set)
1923     {
1924     if (extra == NULL)
1925     {
1926 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1927 nigel 63 extra->flags = 0;
1928     }
1929     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1930 nigel 71 extra->callout_data = &callout_data;
1931 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1932     options | g_notempty, use_offsets, use_size_offsets);
1933     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1934     }
1935    
1936     /* The normal case is just to do the match once, with the default
1937     value of match_limit. */
1938    
1939 nigel 79 #if !defined NODFA
1940 nigel 77 else if (all_use_dfa || use_dfa)
1941     {
1942     int workspace[1000];
1943     count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1944     options | g_notempty, use_offsets, use_size_offsets, workspace,
1945     sizeof(workspace)/sizeof(int));
1946     if (count == 0)
1947     {
1948     fprintf(outfile, "Matched, but too many subsidiary matches\n");
1949     count = use_size_offsets/2;
1950     }
1951     }
1952 nigel 79 #endif
1953 nigel 77
1954 nigel 75 else
1955     {
1956     count = pcre_exec(re, extra, (char *)bptr, len,
1957     start_offset, options | g_notempty, use_offsets, use_size_offsets);
1958 nigel 77 if (count == 0)
1959     {
1960     fprintf(outfile, "Matched, but too many substrings\n");
1961     count = use_size_offsets/3;
1962     }
1963 nigel 75 }
1964 nigel 3
1965 nigel 39 /* Matched */
1966    
1967 nigel 3 if (count >= 0)
1968     {
1969     int i;
1970 nigel 29 for (i = 0; i < count * 2; i += 2)
1971 nigel 3 {
1972 nigel 57 if (use_offsets[i] < 0)
1973 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
1974     else
1975     {
1976     fprintf(outfile, "%2d: ", i/2);
1977 nigel 63 (void)pchars(bptr + use_offsets[i],
1978     use_offsets[i+1] - use_offsets[i], outfile);
1979 nigel 3 fprintf(outfile, "\n");
1980 nigel 35 if (i == 0)
1981     {
1982     if (do_showrest)
1983     {
1984     fprintf(outfile, " 0+ ");
1985 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1986     outfile);
1987 nigel 35 fprintf(outfile, "\n");
1988     }
1989     }
1990 nigel 3 }
1991     }
1992 nigel 29
1993     for (i = 0; i < 32; i++)
1994     {
1995     if ((copystrings & (1 << i)) != 0)
1996     {
1997 nigel 91 char copybuffer[256];
1998 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1999 nigel 37 i, copybuffer, sizeof(copybuffer));
2000 nigel 29 if (rc < 0)
2001     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2002     else
2003 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2004 nigel 29 }
2005     }
2006    
2007 nigel 91 for (copynamesptr = copynames;
2008     *copynamesptr != 0;
2009     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2010     {
2011     char copybuffer[256];
2012     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2013     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2014     if (rc < 0)
2015     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2016     else
2017     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2018     }
2019    
2020 nigel 29 for (i = 0; i < 32; i++)
2021     {
2022     if ((getstrings & (1 << i)) != 0)
2023     {
2024     const char *substring;
2025 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2026 nigel 29 i, &substring);
2027     if (rc < 0)
2028     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2029     else
2030     {
2031     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2032 nigel 49 pcre_free_substring(substring);
2033 nigel 29 }
2034     }
2035     }
2036    
2037 nigel 91 for (getnamesptr = getnames;
2038     *getnamesptr != 0;
2039     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2040     {
2041     const char *substring;
2042     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2043     count, (char *)getnamesptr, &substring);
2044     if (rc < 0)
2045     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2046     else
2047     {
2048     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2049     pcre_free_substring(substring);
2050     }
2051     }
2052    
2053 nigel 29 if (getlist)
2054     {
2055     const char **stringlist;
2056 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2057 nigel 29 &stringlist);
2058     if (rc < 0)
2059     fprintf(outfile, "get substring list failed %d\n", rc);
2060     else
2061     {
2062     for (i = 0; i < count; i++)
2063     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2064     if (stringlist[i] != NULL)
2065     fprintf(outfile, "string list not terminated by NULL\n");
2066 nigel 49 /* free((void *)stringlist); */
2067     pcre_free_substring_list(stringlist);
2068 nigel 29 }
2069     }
2070 nigel 39 }
2071 nigel 29
2072 nigel 75 /* There was a partial match */
2073    
2074     else if (count == PCRE_ERROR_PARTIAL)
2075     {
2076 nigel 77 fprintf(outfile, "Partial match");
2077 nigel 79 #if !defined NODFA
2078 nigel 77 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2079     fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2080     bptr + use_offsets[0]);
2081 nigel 79 #endif
2082 nigel 77 fprintf(outfile, "\n");
2083 nigel 75 break; /* Out of the /g loop */
2084     }
2085    
2086 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2087 nigel 47 g_notempty after a null match, this is not necessarily the end.
2088 nigel 73 We want to advance the start offset, and continue. In the case of UTF-8
2089     matching, the advance must be one character, not one byte. Fudge the
2090     offset values to achieve this. We won't be at the end of the string -
2091     that was checked before setting g_notempty. */
2092 nigel 39
2093 nigel 3 else
2094     {
2095 nigel 41 if (g_notempty != 0)
2096 nigel 35 {
2097 nigel 73 int onechar = 1;
2098 nigel 57 use_offsets[0] = start_offset;
2099 nigel 73 if (use_utf8)
2100     {
2101     while (start_offset + onechar < len)
2102     {
2103     int tb = bptr[start_offset+onechar];
2104     if (tb <= 127) break;
2105     tb &= 0xc0;
2106     if (tb != 0 && tb != 0xc0) onechar++;
2107     }
2108     }
2109     use_offsets[1] = start_offset + onechar;
2110 nigel 35 }
2111 nigel 41 else
2112     {
2113 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2114 nigel 41 {
2115 nigel 73 if (gmatched == 0) fprintf(outfile, "No match\n");
2116 nigel 41 }
2117 nigel 73 else fprintf(outfile, "Error %d\n", count);
2118 nigel 41 break; /* Out of the /g loop */
2119     }
2120 nigel 3 }
2121 nigel 35
2122 nigel 39 /* If not /g or /G we are done */
2123    
2124     if (!do_g && !do_G) break;
2125    
2126 nigel 41 /* If we have matched an empty string, first check to see if we are at
2127     the end of the subject. If so, the /g loop is over. Otherwise, mimic
2128     what Perl's /g options does. This turns out to be rather cunning. First
2129 nigel 47 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2130     same point. If this fails (picked up above) we advance to the next
2131     character. */
2132 nigel 39
2133 nigel 41 g_notempty = 0;
2134 nigel 57 if (use_offsets[0] == use_offsets[1])
2135 nigel 41 {
2136 nigel 57 if (use_offsets[0] == len) break;
2137 nigel 47 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2138 nigel 41 }
2139 nigel 39
2140     /* For /g, update the start offset, leaving the rest alone */
2141    
2142 nigel 57 if (do_g) start_offset = use_offsets[1];
2143 nigel 39
2144     /* For /G, update the pointer and length */
2145    
2146     else
2147 nigel 35 {
2148 nigel 57 bptr += use_offsets[1];
2149     len -= use_offsets[1];
2150 nigel 35 }
2151 nigel 39 } /* End of loop for /g and /G */
2152 nigel 91
2153     NEXT_DATA: continue;
2154 nigel 39 } /* End of loop for data lines */
2155 nigel 3
2156 nigel 11 CONTINUE:
2157 nigel 37
2158     #if !defined NOPOSIX
2159 nigel 3 if (posix || do_posix) regfree(&preg);
2160 nigel 37 #endif
2161    
2162 nigel 77 if (re != NULL) new_free(re);
2163     if (extra != NULL) new_free(extra);
2164 nigel 25 if (tables != NULL)
2165     {
2166 nigel 77 new_free((void *)tables);
2167 nigel 25 setlocale(LC_CTYPE, "C");
2168     }
2169 nigel 3 }
2170    
2171 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2172 nigel 77
2173     EXIT:
2174    
2175     if (infile != NULL && infile != stdin) fclose(infile);
2176     if (outfile != NULL && outfile != stdout) fclose(outfile);
2177    
2178     free(buffer);
2179     free(dbuffer);
2180     free(pbuffer);
2181     free(offsets);
2182    
2183     return yield;
2184 nigel 3 }
2185    
2186 nigel 77 /* End of pcretest.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12