/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 689 - (hide annotations) (download)
Fri Sep 9 10:34:57 2011 UTC (2 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 93634 byte(s)
Patch to RunTest for use with simulations; further JIT code/test tidies.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 ph10 667 #define _pcre_ucp_typerange ucp_typerange
116 nigel 85 #define _pcre_utf8_table1 utf8_table1
117     #define _pcre_utf8_table1_size utf8_table1_size
118     #define _pcre_utf8_table2 utf8_table2
119     #define _pcre_utf8_table3 utf8_table3
120     #define _pcre_utf8_table4 utf8_table4
121 ph10 667 #define _pcre_utf8_char_sizes utf8_char_sizes
122 nigel 85 #define _pcre_utt utt
123     #define _pcre_utt_size utt_size
124 ph10 240 #define _pcre_utt_names utt_names
125 nigel 85 #define _pcre_OP_lengths OP_lengths
126    
127     #include "pcre_tables.c"
128    
129     /* We also need the pcre_printint() function for printing out compiled
130     patterns. This function is in a separate file so that it can be included in
131 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 ph10 498 know which case is being compiled. */
133 nigel 85
134 ph10 498 #define COMPILING_PCRETEST
135     #include "pcre_printint.src"
136    
137     /* The definition of the macro PRINTABLE, which determines whether to print an
138 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
139 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
140     locale has not been explicitly changed, so as to get consistent output from
141     systems that differ in their output from isprint() even in the "C" locale. */
142 nigel 93
143     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144 nigel 85
145 nigel 37 /* It is possible to compile this test program without including support for
146     testing the POSIX interface, though this is not available via the standard
147     Makefile. */
148    
149     #if !defined NOPOSIX
150 nigel 3 #include "pcreposix.h"
151 nigel 37 #endif
152 nigel 3
153 ph10 107 /* It is also possible, for the benefit of the version currently imported into
154     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155     interface to the DFA matcher (NODFA), and without the doublecheck of the old
156     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157     UTF8 support if PCRE is built without it. */
158 nigel 79
159 ph10 107 #ifndef SUPPORT_UTF8
160     #ifndef NOUTF8
161     #define NOUTF8
162     #endif
163     #endif
164 nigel 79
165 ph10 107
166 nigel 85 /* Other parameters */
167    
168 nigel 3 #ifndef CLOCKS_PER_SEC
169     #ifdef CLK_TCK
170     #define CLOCKS_PER_SEC CLK_TCK
171     #else
172     #define CLOCKS_PER_SEC 100
173     #endif
174     #endif
175    
176 nigel 93 /* This is the default loop count for timing. */
177    
178 nigel 75 #define LOOPREPEAT 500000
179 nigel 3
180 nigel 85 /* Static variables */
181    
182 nigel 3 static FILE *outfile;
183     static int log_store = 0;
184 nigel 63 static int callout_count;
185     static int callout_extra;
186     static int callout_fail_count;
187     static int callout_fail_id;
188 ph10 210 static int debug_lengths;
189 nigel 63 static int first_callout;
190 nigel 93 static int locale_set = 0;
191 nigel 73 static int show_malloc;
192 nigel 67 static int use_utf8;
193 nigel 43 static size_t gotten_store;
194 ph10 645 static const unsigned char *last_callout_mark = NULL;
195 nigel 3
196 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
197    
198     static int buffer_size = 50000;
199     static uschar *buffer = NULL;
200     static uschar *dbuffer = NULL;
201 nigel 75 static uschar *pbuffer = NULL;
202 nigel 3
203 ph10 598 /* Textual explanations for runtime error codes */
204 nigel 75
205 ph10 598 static const char *errtexts[] = {
206     NULL, /* 0 is no error */
207     NULL, /* NOMATCH is handled specially */
208     "NULL argument passed",
209     "bad option value",
210     "magic number missing",
211     "unknown opcode - pattern overwritten?",
212     "no more memory",
213 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 ph10 598 "match limit exceeded",
215     "callout error code",
216     NULL, /* BADUTF8 is handled specially */
217     "bad UTF-8 offset",
218     NULL, /* PARTIAL is handled specially */
219     "not used - internal error",
220     "internal error - pattern overwritten?",
221     "bad count value",
222     "item unsupported for DFA matching",
223     "backreference condition or recursion test not supported for DFA matching",
224     "match limit not supported for DFA matching",
225     "workspace size exceeded in DFA matching",
226 ph10 654 "too much recursion for DFA matching",
227 ph10 598 "recursion limit exceeded",
228     "not used - internal error",
229     "invalid combination of newline options",
230     "bad offset value",
231 ph10 642 NULL, /* SHORTUTF8 is handled specially */
232 ph10 676 "nested recursion at the same subject position",
233     "JIT stack limit reached"
234 ph10 598 };
235    
236 ph10 654
237 ph10 541 /*************************************************
238     * Alternate character tables *
239     *************************************************/
240 nigel 49
241 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242     using the default tables of the library. However, the T option can be used to
243     select alternate sets of tables, for different kinds of testing. Note also that
244 ph10 541 the L (locale) option also adjusts the tables. */
245    
246 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
247 ph10 541 only ASCII characters. */
248    
249     static const unsigned char tables0[] = {
250    
251     /* This table is a lower casing table. */
252    
253     0, 1, 2, 3, 4, 5, 6, 7,
254     8, 9, 10, 11, 12, 13, 14, 15,
255     16, 17, 18, 19, 20, 21, 22, 23,
256     24, 25, 26, 27, 28, 29, 30, 31,
257     32, 33, 34, 35, 36, 37, 38, 39,
258     40, 41, 42, 43, 44, 45, 46, 47,
259     48, 49, 50, 51, 52, 53, 54, 55,
260     56, 57, 58, 59, 60, 61, 62, 63,
261     64, 97, 98, 99,100,101,102,103,
262     104,105,106,107,108,109,110,111,
263     112,113,114,115,116,117,118,119,
264     120,121,122, 91, 92, 93, 94, 95,
265     96, 97, 98, 99,100,101,102,103,
266     104,105,106,107,108,109,110,111,
267     112,113,114,115,116,117,118,119,
268     120,121,122,123,124,125,126,127,
269     128,129,130,131,132,133,134,135,
270     136,137,138,139,140,141,142,143,
271     144,145,146,147,148,149,150,151,
272     152,153,154,155,156,157,158,159,
273     160,161,162,163,164,165,166,167,
274     168,169,170,171,172,173,174,175,
275     176,177,178,179,180,181,182,183,
276     184,185,186,187,188,189,190,191,
277     192,193,194,195,196,197,198,199,
278     200,201,202,203,204,205,206,207,
279     208,209,210,211,212,213,214,215,
280     216,217,218,219,220,221,222,223,
281     224,225,226,227,228,229,230,231,
282     232,233,234,235,236,237,238,239,
283     240,241,242,243,244,245,246,247,
284     248,249,250,251,252,253,254,255,
285    
286     /* This table is a case flipping table. */
287    
288     0, 1, 2, 3, 4, 5, 6, 7,
289     8, 9, 10, 11, 12, 13, 14, 15,
290     16, 17, 18, 19, 20, 21, 22, 23,
291     24, 25, 26, 27, 28, 29, 30, 31,
292     32, 33, 34, 35, 36, 37, 38, 39,
293     40, 41, 42, 43, 44, 45, 46, 47,
294     48, 49, 50, 51, 52, 53, 54, 55,
295     56, 57, 58, 59, 60, 61, 62, 63,
296     64, 97, 98, 99,100,101,102,103,
297     104,105,106,107,108,109,110,111,
298     112,113,114,115,116,117,118,119,
299     120,121,122, 91, 92, 93, 94, 95,
300     96, 65, 66, 67, 68, 69, 70, 71,
301     72, 73, 74, 75, 76, 77, 78, 79,
302     80, 81, 82, 83, 84, 85, 86, 87,
303     88, 89, 90,123,124,125,126,127,
304     128,129,130,131,132,133,134,135,
305     136,137,138,139,140,141,142,143,
306     144,145,146,147,148,149,150,151,
307     152,153,154,155,156,157,158,159,
308     160,161,162,163,164,165,166,167,
309     168,169,170,171,172,173,174,175,
310     176,177,178,179,180,181,182,183,
311     184,185,186,187,188,189,190,191,
312     192,193,194,195,196,197,198,199,
313     200,201,202,203,204,205,206,207,
314     208,209,210,211,212,213,214,215,
315     216,217,218,219,220,221,222,223,
316     224,225,226,227,228,229,230,231,
317     232,233,234,235,236,237,238,239,
318     240,241,242,243,244,245,246,247,
319     248,249,250,251,252,253,254,255,
320    
321     /* This table contains bit maps for various character classes. Each map is 32
322     bytes long and the bits run from the least significant end of each byte. The
323     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324     graph, print, punct, and cntrl. Other classes are built from combinations. */
325    
326     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375    
376     /* This table identifies various classes of character by individual bits:
377     0x01 white space character
378     0x02 letter
379     0x04 decimal digit
380     0x08 hexadecimal digit
381     0x10 alphanumeric or '_'
382     0x80 regular expression metacharacter or binary zero
383     */
384    
385     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
386     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
387     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
388     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
389     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
390     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
391     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
392     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
393     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
395     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
396     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
397     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
398     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
399     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
400     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417    
418 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
419     be at least an approximation of ISO 8859. In particular, there are characters
420 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
421    
422     static const unsigned char tables1[] = {
423     0,1,2,3,4,5,6,7,
424     8,9,10,11,12,13,14,15,
425     16,17,18,19,20,21,22,23,
426     24,25,26,27,28,29,30,31,
427     32,33,34,35,36,37,38,39,
428     40,41,42,43,44,45,46,47,
429     48,49,50,51,52,53,54,55,
430     56,57,58,59,60,61,62,63,
431     64,97,98,99,100,101,102,103,
432     104,105,106,107,108,109,110,111,
433     112,113,114,115,116,117,118,119,
434     120,121,122,91,92,93,94,95,
435     96,97,98,99,100,101,102,103,
436     104,105,106,107,108,109,110,111,
437     112,113,114,115,116,117,118,119,
438     120,121,122,123,124,125,126,127,
439     128,129,130,131,132,133,134,135,
440     136,137,138,139,140,141,142,143,
441     144,145,146,147,148,149,150,151,
442     152,153,154,155,156,157,158,159,
443     160,161,162,163,164,165,166,167,
444     168,169,170,171,172,173,174,175,
445     176,177,178,179,180,181,182,183,
446     184,185,186,187,188,189,190,191,
447     224,225,226,227,228,229,230,231,
448     232,233,234,235,236,237,238,239,
449     240,241,242,243,244,245,246,215,
450     248,249,250,251,252,253,254,223,
451     224,225,226,227,228,229,230,231,
452     232,233,234,235,236,237,238,239,
453     240,241,242,243,244,245,246,247,
454     248,249,250,251,252,253,254,255,
455     0,1,2,3,4,5,6,7,
456     8,9,10,11,12,13,14,15,
457     16,17,18,19,20,21,22,23,
458     24,25,26,27,28,29,30,31,
459     32,33,34,35,36,37,38,39,
460     40,41,42,43,44,45,46,47,
461     48,49,50,51,52,53,54,55,
462     56,57,58,59,60,61,62,63,
463     64,97,98,99,100,101,102,103,
464     104,105,106,107,108,109,110,111,
465     112,113,114,115,116,117,118,119,
466     120,121,122,91,92,93,94,95,
467     96,65,66,67,68,69,70,71,
468     72,73,74,75,76,77,78,79,
469     80,81,82,83,84,85,86,87,
470     88,89,90,123,124,125,126,127,
471     128,129,130,131,132,133,134,135,
472     136,137,138,139,140,141,142,143,
473     144,145,146,147,148,149,150,151,
474     152,153,154,155,156,157,158,159,
475     160,161,162,163,164,165,166,167,
476     168,169,170,171,172,173,174,175,
477     176,177,178,179,180,181,182,183,
478     184,185,186,187,188,189,190,191,
479     224,225,226,227,228,229,230,231,
480     232,233,234,235,236,237,238,239,
481     240,241,242,243,244,245,246,215,
482     248,249,250,251,252,253,254,223,
483     192,193,194,195,196,197,198,199,
484     200,201,202,203,204,205,206,207,
485     208,209,210,211,212,213,214,247,
486     216,217,218,219,220,221,222,255,
487     0,62,0,0,1,0,0,0,
488     0,0,0,0,0,0,0,0,
489     32,0,0,0,1,0,0,0,
490     0,0,0,0,0,0,0,0,
491     0,0,0,0,0,0,255,3,
492     126,0,0,0,126,0,0,0,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     0,0,0,0,0,0,255,3,
496     0,0,0,0,0,0,0,0,
497     0,0,0,0,0,0,12,2,
498     0,0,0,0,0,0,0,0,
499     0,0,0,0,0,0,0,0,
500     254,255,255,7,0,0,0,0,
501     0,0,0,0,0,0,0,0,
502     255,255,127,127,0,0,0,0,
503     0,0,0,0,0,0,0,0,
504     0,0,0,0,254,255,255,7,
505     0,0,0,0,0,4,32,4,
506     0,0,0,128,255,255,127,255,
507     0,0,0,0,0,0,255,3,
508     254,255,255,135,254,255,255,7,
509     0,0,0,0,0,4,44,6,
510     255,255,127,255,255,255,127,255,
511     0,0,0,0,254,255,255,255,
512     255,255,255,255,255,255,255,127,
513     0,0,0,0,254,255,255,255,
514     255,255,255,255,255,255,255,255,
515     0,2,0,0,255,255,255,255,
516     255,255,255,255,255,255,255,127,
517     0,0,0,0,255,255,255,255,
518     255,255,255,255,255,255,255,255,
519     0,0,0,0,254,255,0,252,
520     1,0,0,248,1,0,0,120,
521     0,0,0,0,254,255,255,255,
522     0,0,128,0,0,0,128,0,
523     255,255,255,255,0,0,0,0,
524     0,0,0,0,0,0,0,128,
525     255,255,255,255,0,0,0,0,
526     0,0,0,0,0,0,0,0,
527     128,0,0,0,0,0,0,0,
528     0,1,1,0,1,1,0,0,
529     0,0,0,0,0,0,0,0,
530     0,0,0,0,0,0,0,0,
531     1,0,0,0,128,0,0,0,
532     128,128,128,128,0,0,128,0,
533     28,28,28,28,28,28,28,28,
534     28,28,0,0,0,0,0,128,
535     0,26,26,26,26,26,26,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,18,18,18,18,18,
538     18,18,18,128,128,0,128,16,
539     0,26,26,26,26,26,26,18,
540     18,18,18,18,18,18,18,18,
541     18,18,18,18,18,18,18,18,
542     18,18,18,128,128,0,0,0,
543     0,0,0,0,0,1,0,0,
544     0,0,0,0,0,0,0,0,
545     0,0,0,0,0,0,0,0,
546     0,0,0,0,0,0,0,0,
547     1,0,0,0,0,0,0,0,
548     0,0,18,0,0,0,0,0,
549     0,0,20,20,0,18,0,0,
550     0,20,18,0,0,0,0,0,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,18,
553     18,18,18,18,18,18,18,0,
554     18,18,18,18,18,18,18,18,
555     18,18,18,18,18,18,18,18,
556     18,18,18,18,18,18,18,18,
557     18,18,18,18,18,18,18,0,
558     18,18,18,18,18,18,18,18
559     };
560    
561    
562    
563 ph10 558
564     #ifndef HAVE_STRERROR
565 nigel 49 /*************************************************
566 ph10 558 * Provide strerror() for non-ANSI libraries *
567     *************************************************/
568    
569     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570     in their libraries, but can provide the same facility by this simple
571     alternative function. */
572    
573     extern int sys_nerr;
574     extern char *sys_errlist[];
575    
576     char *
577     strerror(int n)
578     {
579     if (n < 0 || n >= sys_nerr) return "unknown error number";
580     return sys_errlist[n];
581     }
582     #endif /* HAVE_STRERROR */
583    
584    
585 ph10 667 /*************************************************
586     * JIT memory callback *
587     *************************************************/
588 ph10 558
589 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
590     {
591     return (pcre_jit_stack *)arg;
592     }
593 ph10 558
594 ph10 667
595 ph10 558 /*************************************************
596 nigel 91 * Read or extend an input line *
597     *************************************************/
598    
599     /* Input lines are read into buffer, but both patterns and data lines can be
600     continued over multiple input lines. In addition, if the buffer fills up, we
601     want to automatically expand it so as to be able to handle extremely large
602     lines that are needed for certain stress tests. When the input buffer is
603     expanded, the other two buffers must also be expanded likewise, and the
604     contents of pbuffer, which are a copy of the input for callouts, must be
605     preserved (for when expansion happens for a data line). This is not the most
606     optimal way of handling this, but hey, this is just a test program!
607    
608     Arguments:
609     f the file to read
610     start where in buffer to start (this *must* be within buffer)
611 ph10 287 prompt for stdin or readline()
612 nigel 91
613     Returns: pointer to the start of new data
614     could be a copy of start, or could be moved
615     NULL if no data read and EOF reached
616     */
617    
618     static uschar *
619 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
620 nigel 91 {
621     uschar *here = start;
622    
623     for (;;)
624     {
625 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
626 nigel 93
627 nigel 91 if (rlen > 1000)
628     {
629     int dlen;
630 ph10 289
631 ph10 287 /* If libreadline support is required, use readline() to read a line if the
632     input is a terminal. Note that readline() removes the trailing newline, so
633     we must put it back again, to be compatible with fgets(). */
634 ph10 289
635 ph10 287 #ifdef SUPPORT_LIBREADLINE
636     if (isatty(fileno(f)))
637     {
638 ph10 289 size_t len;
639 ph10 287 char *s = readline(prompt);
640     if (s == NULL) return (here == start)? NULL : start;
641     len = strlen(s);
642 ph10 289 if (len > 0) add_history(s);
643 ph10 287 if (len > rlen - 1) len = rlen - 1;
644     memcpy(here, s, len);
645     here[len] = '\n';
646 ph10 289 here[len+1] = 0;
647     free(s);
648 ph10 287 }
649 ph10 289 else
650     #endif
651    
652 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
653 ph10 289
654 ph10 287 {
655 ph10 516 if (f == stdin) printf("%s", prompt);
656 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
657     return (here == start)? NULL : start;
658 ph10 289 }
659    
660 nigel 91 dlen = (int)strlen((char *)here);
661     if (dlen > 0 && here[dlen - 1] == '\n') return start;
662     here += dlen;
663     }
664    
665     else
666     {
667     int new_buffer_size = 2*buffer_size;
668     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
669     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671    
672     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673     {
674     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
675     exit(1);
676     }
677    
678     memcpy(new_buffer, buffer, buffer_size);
679     memcpy(new_pbuffer, pbuffer, buffer_size);
680    
681     buffer_size = new_buffer_size;
682    
683     start = new_buffer + (start - buffer);
684     here = new_buffer + (here - buffer);
685    
686     free(buffer);
687     free(dbuffer);
688     free(pbuffer);
689    
690     buffer = new_buffer;
691     dbuffer = new_dbuffer;
692     pbuffer = new_pbuffer;
693     }
694     }
695    
696     return NULL; /* Control never gets here */
697     }
698    
699    
700    
701    
702    
703    
704    
705     /*************************************************
706 nigel 63 * Read number from string *
707     *************************************************/
708    
709     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
710     around with conditional compilation, just do the job by hand. It is only used
711 nigel 93 for unpicking arguments, so just keep it simple.
712 nigel 63
713     Arguments:
714     str string to be converted
715     endptr where to put the end pointer
716    
717     Returns: the unsigned long
718     */
719    
720     static int
721     get_value(unsigned char *str, unsigned char **endptr)
722     {
723     int result = 0;
724     while(*str != 0 && isspace(*str)) str++;
725     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
726     *endptr = str;
727     return(result);
728     }
729    
730    
731    
732 nigel 49
733     /*************************************************
734     * Convert UTF-8 string to value *
735     *************************************************/
736    
737     /* This function takes one or more bytes that represents a UTF-8 character,
738     and returns the value of the character.
739    
740     Argument:
741 nigel 91 utf8bytes a pointer to the byte vector
742     vptr a pointer to an int to receive the value
743 nigel 49
744 nigel 91 Returns: > 0 => the number of bytes consumed
745     -6 to 0 => malformed UTF-8 character at offset = (-return)
746 nigel 49 */
747    
748 nigel 79 #if !defined NOUTF8
749    
750 nigel 67 static int
751 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
752 nigel 49 {
753 nigel 91 int c = *utf8bytes++;
754 nigel 49 int d = c;
755     int i, j, s;
756    
757     for (i = -1; i < 6; i++) /* i is number of additional bytes */
758     {
759     if ((d & 0x80) == 0) break;
760     d <<= 1;
761     }
762    
763     if (i == -1) { *vptr = c; return 1; } /* ascii character */
764     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
765    
766     /* i now has a value in the range 1-5 */
767    
768 nigel 59 s = 6*i;
769 nigel 85 d = (c & utf8_table3[i]) << s;
770 nigel 49
771     for (j = 0; j < i; j++)
772     {
773 nigel 91 c = *utf8bytes++;
774 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
775 nigel 59 s -= 6;
776 nigel 49 d |= (c & 0x3f) << s;
777     }
778    
779     /* Check that encoding was the correct unique one */
780    
781 nigel 85 for (j = 0; j < utf8_table1_size; j++)
782     if (d <= utf8_table1[j]) break;
783 nigel 49 if (j != i) return -(i+1);
784    
785     /* Valid value */
786    
787     *vptr = d;
788     return i+1;
789     }
790    
791 nigel 79 #endif
792 nigel 49
793    
794 nigel 79
795 nigel 63 /*************************************************
796 nigel 85 * Convert character value to UTF-8 *
797     *************************************************/
798    
799     /* This function takes an integer value in the range 0 - 0x7fffffff
800     and encodes it as a UTF-8 character in 0 to 6 bytes.
801    
802     Arguments:
803     cvalue the character value
804 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
805 nigel 85
806     Returns: number of characters placed in the buffer
807     */
808    
809 nigel 93 #if !defined NOUTF8
810    
811 nigel 85 static int
812 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
813 nigel 85 {
814     register int i, j;
815     for (i = 0; i < utf8_table1_size; i++)
816     if (cvalue <= utf8_table1[i]) break;
817 nigel 91 utf8bytes += i;
818 nigel 85 for (j = i; j > 0; j--)
819     {
820 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
821 nigel 85 cvalue >>= 6;
822     }
823 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
824 nigel 85 return i + 1;
825     }
826    
827 nigel 93 #endif
828 nigel 85
829    
830 nigel 93
831 nigel 85 /*************************************************
832 nigel 63 * Print character string *
833     *************************************************/
834 nigel 49
835 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
836     mode. Yields number of characters printed. If handed a NULL file, just counts
837     chars without printing. */
838 nigel 49
839 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
840 nigel 3 {
841 nigel 85 int c = 0;
842 nigel 63 int yield = 0;
843 nigel 3
844 nigel 63 while (length-- > 0)
845 nigel 3 {
846 nigel 79 #if !defined NOUTF8
847 nigel 67 if (use_utf8)
848 nigel 63 {
849     int rc = utf82ord(p, &c);
850 nigel 3
851 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
852     {
853     length -= rc - 1;
854     p += rc;
855 nigel 93 if (PRINTHEX(c))
856 nigel 63 {
857     if (f != NULL) fprintf(f, "%c", c);
858     yield++;
859     }
860     else
861     {
862 nigel 93 int n = 4;
863     if (f != NULL) fprintf(f, "\\x{%02x}", c);
864     yield += (n <= 0x000000ff)? 2 :
865     (n <= 0x00000fff)? 3 :
866     (n <= 0x0000ffff)? 4 :
867     (n <= 0x000fffff)? 5 : 6;
868 nigel 63 }
869     continue;
870     }
871     }
872 nigel 79 #endif
873 nigel 3
874 nigel 63 /* Not UTF-8, or malformed UTF-8 */
875    
876 nigel 93 c = *p++;
877     if (PRINTHEX(c))
878 nigel 3 {
879 nigel 63 if (f != NULL) fprintf(f, "%c", c);
880     yield++;
881 nigel 3 }
882 nigel 63 else
883 nigel 3 {
884 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
885     yield += 4;
886     }
887     }
888 nigel 3
889 nigel 63 return yield;
890     }
891 nigel 23
892 nigel 3
893 nigel 23
894 nigel 63 /*************************************************
895     * Callout function *
896     *************************************************/
897 nigel 3
898 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
899     the match. Yield zero unless more callouts than the fail count, or the callout
900     data is not zero. */
901 nigel 3
902 nigel 63 static int callout(pcre_callout_block *cb)
903     {
904     FILE *f = (first_callout | callout_extra)? outfile : NULL;
905 nigel 75 int i, pre_start, post_start, subject_length;
906 nigel 3
907 nigel 63 if (callout_extra)
908     {
909     fprintf(f, "Callout %d: last capture = %d\n",
910     cb->callout_number, cb->capture_last);
911 nigel 3
912 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
913     {
914     if (cb->offset_vector[i] < 0)
915     fprintf(f, "%2d: <unset>\n", i/2);
916     else
917     {
918     fprintf(f, "%2d: ", i/2);
919     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
920     cb->offset_vector[i+1] - cb->offset_vector[i], f);
921     fprintf(f, "\n");
922     }
923     }
924     }
925 nigel 3
926 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
927     datails. On subsequent calls in the same match, we use pchars just to find the
928     printed lengths of the substrings. */
929 nigel 3
930 nigel 63 if (f != NULL) fprintf(f, "--->");
931 nigel 3
932 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
933     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
934     cb->current_position - cb->start_match, f);
935 nigel 3
936 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
937    
938 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
939     cb->subject_length - cb->current_position, f);
940 nigel 3
941 nigel 63 if (f != NULL) fprintf(f, "\n");
942 nigel 9
943 nigel 63 /* Always print appropriate indicators, with callout number if not already
944 nigel 75 shown. For automatic callouts, show the pattern offset. */
945 nigel 3
946 nigel 75 if (cb->callout_number == 255)
947     {
948     fprintf(outfile, "%+3d ", cb->pattern_position);
949     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
950     }
951     else
952     {
953     if (callout_extra) fprintf(outfile, " ");
954     else fprintf(outfile, "%3d ", cb->callout_number);
955     }
956 nigel 3
957 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
958     fprintf(outfile, "^");
959 nigel 3
960 nigel 63 if (post_start > 0)
961     {
962     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
963     fprintf(outfile, "^");
964 nigel 3 }
965    
966 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
967     fprintf(outfile, " ");
968    
969     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
970     pbuffer + cb->pattern_position);
971    
972 nigel 63 fprintf(outfile, "\n");
973     first_callout = 0;
974 nigel 3
975 ph10 654 if (cb->mark != last_callout_mark)
976 ph10 645 {
977 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
978 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979 ph10 654 last_callout_mark = cb->mark;
980     }
981 ph10 645
982 nigel 71 if (cb->callout_data != NULL)
983 nigel 49 {
984 nigel 71 int callout_data = *((int *)(cb->callout_data));
985     if (callout_data != 0)
986     {
987     fprintf(outfile, "Callout data = %d\n", callout_data);
988     return callout_data;
989     }
990 nigel 63 }
991 nigel 49
992 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
993     (++callout_count >= callout_fail_count)? 1 : 0;
994 nigel 3 }
995    
996    
997 nigel 63 /*************************************************
998 nigel 73 * Local malloc functions *
999 nigel 63 *************************************************/
1000 nigel 3
1001 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1002     compiled re. The show_malloc variable is set only during matching. */
1003 nigel 3
1004     static void *new_malloc(size_t size)
1005     {
1006 nigel 73 void *block = malloc(size);
1007 nigel 43 gotten_store = size;
1008 nigel 73 if (show_malloc)
1009 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1010 nigel 73 return block;
1011 nigel 3 }
1012    
1013 nigel 73 static void new_free(void *block)
1014     {
1015     if (show_malloc)
1016     fprintf(outfile, "free %p\n", block);
1017     free(block);
1018     }
1019 nigel 3
1020 nigel 73 /* For recursion malloc/free, to test stacking calls */
1021    
1022     static void *stack_malloc(size_t size)
1023     {
1024     void *block = malloc(size);
1025     if (show_malloc)
1026 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1027 nigel 73 return block;
1028     }
1029    
1030     static void stack_free(void *block)
1031     {
1032     if (show_malloc)
1033     fprintf(outfile, "stack_free %p\n", block);
1034     free(block);
1035     }
1036    
1037    
1038 nigel 63 /*************************************************
1039     * Call pcre_fullinfo() *
1040     *************************************************/
1041 nigel 43
1042     /* Get one piece of information from the pcre_fullinfo() function */
1043    
1044     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1045     {
1046     int rc;
1047     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1048     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1049     }
1050    
1051    
1052    
1053 nigel 63 /*************************************************
1054 nigel 75 * Byte flipping function *
1055     *************************************************/
1056    
1057 nigel 91 static unsigned long int
1058     byteflip(unsigned long int value, int n)
1059 nigel 75 {
1060     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1061     return ((value & 0x000000ff) << 24) |
1062     ((value & 0x0000ff00) << 8) |
1063     ((value & 0x00ff0000) >> 8) |
1064     ((value & 0xff000000) >> 24);
1065     }
1066    
1067    
1068    
1069    
1070     /*************************************************
1071 nigel 87 * Check match or recursion limit *
1072     *************************************************/
1073    
1074     static int
1075     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1076     int start_offset, int options, int *use_offsets, int use_size_offsets,
1077     int flag, unsigned long int *limit, int errnumber, const char *msg)
1078     {
1079     int count;
1080     int min = 0;
1081     int mid = 64;
1082     int max = -1;
1083    
1084     extra->flags |= flag;
1085    
1086     for (;;)
1087     {
1088     *limit = mid;
1089    
1090     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1091     use_offsets, use_size_offsets);
1092    
1093     if (count == errnumber)
1094     {
1095     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1096     min = mid;
1097     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1098     }
1099    
1100     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1101     count == PCRE_ERROR_PARTIAL)
1102     {
1103     if (mid == min + 1)
1104     {
1105     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1106     break;
1107     }
1108     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1109     max = mid;
1110     mid = (min + mid)/2;
1111     }
1112     else break; /* Some other error */
1113     }
1114    
1115     extra->flags &= ~flag;
1116     return count;
1117     }
1118    
1119    
1120    
1121     /*************************************************
1122 ph10 227 * Case-independent strncmp() function *
1123     *************************************************/
1124    
1125     /*
1126     Arguments:
1127     s first string
1128     t second string
1129     n number of characters to compare
1130    
1131     Returns: < 0, = 0, or > 0, according to the comparison
1132     */
1133    
1134     static int
1135     strncmpic(uschar *s, uschar *t, int n)
1136     {
1137     while (n--)
1138     {
1139     int c = tolower(*s++) - tolower(*t++);
1140     if (c) return c;
1141     }
1142     return 0;
1143     }
1144    
1145    
1146    
1147     /*************************************************
1148 nigel 91 * Check newline indicator *
1149     *************************************************/
1150    
1151 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1152     a message and return 0 if there is no match.
1153 nigel 91
1154     Arguments:
1155     p points after the leading '<'
1156     f file for error message
1157    
1158     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1159     */
1160    
1161     static int
1162     check_newline(uschar *p, FILE *f)
1163     {
1164 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1165     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1166     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1167     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1168     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1169 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1170     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1171 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1172     return 0;
1173     }
1174    
1175    
1176    
1177     /*************************************************
1178 nigel 93 * Usage function *
1179     *************************************************/
1180    
1181     static void
1182     usage(void)
1183     {
1184 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1185     printf("Input and output default to stdin and stdout.\n");
1186     #ifdef SUPPORT_LIBREADLINE
1187     printf("If input is a terminal, readline() is used to read from it.\n");
1188     #else
1189     printf("This version of pcretest is not linked with readline().\n");
1190     #endif
1191     printf("\nOptions:\n");
1192 nigel 93 printf(" -b show compiled code (bytecode)\n");
1193     printf(" -C show PCRE compile-time options and exit\n");
1194     printf(" -d debug: show compiled code and information (-b and -i)\n");
1195     #if !defined NODFA
1196     printf(" -dfa force DFA matching for all subjects\n");
1197     #endif
1198     printf(" -help show usage information\n");
1199     printf(" -i show information about compiled patterns\n"
1200 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1201 nigel 93 " -m output memory used information\n"
1202     " -o <n> set size of offsets vector to <n>\n");
1203     #if !defined NOPOSIX
1204     printf(" -p use POSIX interface\n");
1205     #endif
1206     printf(" -q quiet: do not output PCRE version number at start\n");
1207     printf(" -S <n> set stack size to <n> megabytes\n");
1208 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1209     " -s+ force each pattern to be studied, using JIT if available\n"
1210 nigel 93 " -t time compilation and execution\n");
1211     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1212     printf(" -tm time execution (matching) only\n");
1213     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1214     }
1215    
1216    
1217    
1218     /*************************************************
1219 nigel 63 * Main Program *
1220     *************************************************/
1221 nigel 43
1222 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1223     consist of a regular expression, in delimiters and optionally followed by
1224     options, followed by a set of test data, terminated by an empty line. */
1225    
1226     int main(int argc, char **argv)
1227     {
1228     FILE *infile = stdin;
1229     int options = 0;
1230     int study_options = 0;
1231 ph10 386 int default_find_match_limit = FALSE;
1232 nigel 3 int op = 1;
1233     int timeit = 0;
1234 nigel 93 int timeitm = 0;
1235 nigel 3 int showinfo = 0;
1236 nigel 31 int showstore = 0;
1237 ph10 667 int force_study = -1;
1238     int force_study_options = 0;
1239 nigel 87 int quiet = 0;
1240 nigel 53 int size_offsets = 45;
1241     int size_offsets_max;
1242 nigel 77 int *offsets = NULL;
1243 nigel 53 #if !defined NOPOSIX
1244 nigel 3 int posix = 0;
1245 nigel 53 #endif
1246 nigel 3 int debug = 0;
1247 nigel 11 int done = 0;
1248 nigel 77 int all_use_dfa = 0;
1249     int yield = 0;
1250 nigel 91 int stack_size;
1251 nigel 3
1252 ph10 667 pcre_jit_stack *jit_stack = NULL;
1253    
1254    
1255 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1256     that 1024 is plenty long enough for the few names we'll be testing. */
1257 nigel 69
1258 nigel 91 uschar copynames[1024];
1259     uschar getnames[1024];
1260    
1261     uschar *copynamesptr;
1262     uschar *getnamesptr;
1263    
1264 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1265 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1266 nigel 69
1267 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1268     dbuffer = (unsigned char *)malloc(buffer_size);
1269     pbuffer = (unsigned char *)malloc(buffer_size);
1270 nigel 69
1271 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1272 nigel 3
1273 nigel 93 outfile = stdout;
1274    
1275     /* The following _setmode() stuff is some Windows magic that tells its runtime
1276     library to translate CRLF into a single LF character. At least, that's what
1277     I've been told: never having used Windows I take this all on trust. Originally
1278     it set 0x8000, but then I was advised that _O_BINARY was better. */
1279    
1280 nigel 75 #if defined(_WIN32) || defined(WIN32)
1281 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1282     #endif
1283 nigel 75
1284 nigel 3 /* Scan options */
1285    
1286     while (argc > 1 && argv[op][0] == '-')
1287     {
1288 nigel 63 unsigned char *endptr;
1289 nigel 53
1290 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1291 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1292     else if (strcmp(argv[op], "-s+") == 0)
1293     {
1294     force_study = 1;
1295     force_study_options = PCRE_STUDY_JIT_COMPILE;
1296     }
1297 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1298 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1299 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1300     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1301 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1302 nigel 79 #if !defined NODFA
1303 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1304 nigel 79 #endif
1305 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1306 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1307     *endptr == 0))
1308 nigel 53 {
1309     op++;
1310     argc--;
1311     }
1312 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1313     {
1314     int both = argv[op][2] == 0;
1315     int temp;
1316     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1317     *endptr == 0))
1318     {
1319     timeitm = temp;
1320     op++;
1321     argc--;
1322     }
1323     else timeitm = LOOPREPEAT;
1324     if (both) timeit = timeitm;
1325     }
1326 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1327     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1328     *endptr == 0))
1329     {
1330 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1331 nigel 91 printf("PCRE: -S not supported on this OS\n");
1332     exit(1);
1333     #else
1334     int rc;
1335     struct rlimit rlim;
1336     getrlimit(RLIMIT_STACK, &rlim);
1337     rlim.rlim_cur = stack_size * 1024 * 1024;
1338     rc = setrlimit(RLIMIT_STACK, &rlim);
1339     if (rc != 0)
1340     {
1341     printf("PCRE: setrlimit() failed with error %d\n", rc);
1342     exit(1);
1343     }
1344     op++;
1345     argc--;
1346     #endif
1347     }
1348 nigel 53 #if !defined NOPOSIX
1349 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1350 nigel 53 #endif
1351 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1352     {
1353     int rc;
1354 ph10 392 unsigned long int lrc;
1355 nigel 63 printf("PCRE version %s\n", pcre_version());
1356     printf("Compiled with\n");
1357     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1358     printf(" %sUTF-8 support\n", rc? "" : "No ");
1359 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1360     printf(" %sUnicode properties support\n", rc? "" : "No ");
1361 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1362 ph10 674 if (rc)
1363 ph10 689 printf(" Just-in-time compiler support\n");
1364 ph10 674 else
1365     printf(" No just-in-time compiler support\n");
1366 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1367 ph10 391 /* Note that these values are always the ASCII values, even
1368 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1369 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1370     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1371 ph10 150 (rc == -2)? "ANYCRLF" :
1372 nigel 93 (rc == -1)? "ANY" : "???");
1373 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1374     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1375     "all Unicode newlines");
1376 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1377     printf(" Internal link size = %d\n", rc);
1378     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1379     printf(" POSIX malloc threshold = %d\n", rc);
1380 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1381     printf(" Default match limit = %ld\n", lrc);
1382     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1383     printf(" Default recursion depth limit = %ld\n", lrc);
1384 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1385     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1386 ph10 121 goto EXIT;
1387 nigel 63 }
1388 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1389     strcmp(argv[op], "--help") == 0)
1390     {
1391     usage();
1392     goto EXIT;
1393     }
1394 nigel 3 else
1395     {
1396 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1397 nigel 93 usage();
1398 nigel 77 yield = 1;
1399     goto EXIT;
1400 nigel 3 }
1401     op++;
1402     argc--;
1403     }
1404    
1405 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1406    
1407     size_offsets_max = size_offsets;
1408 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409 nigel 53 if (offsets == NULL)
1410     {
1411     printf("** Failed to get %d bytes of memory for offsets vector\n",
1412 ph10 151 (int)(size_offsets_max * sizeof(int)));
1413 nigel 77 yield = 1;
1414     goto EXIT;
1415 nigel 53 }
1416    
1417 nigel 3 /* Sort out the input and output files */
1418    
1419     if (argc > 1)
1420     {
1421 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1422 nigel 3 if (infile == NULL)
1423     {
1424     printf("** Failed to open %s\n", argv[op]);
1425 nigel 77 yield = 1;
1426     goto EXIT;
1427 nigel 3 }
1428     }
1429    
1430     if (argc > 2)
1431     {
1432 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1433 nigel 3 if (outfile == NULL)
1434     {
1435     printf("** Failed to open %s\n", argv[op+1]);
1436 nigel 77 yield = 1;
1437     goto EXIT;
1438 nigel 3 }
1439     }
1440    
1441     /* Set alternative malloc function */
1442    
1443     pcre_malloc = new_malloc;
1444 nigel 73 pcre_free = new_free;
1445     pcre_stack_malloc = stack_malloc;
1446     pcre_stack_free = stack_free;
1447 nigel 3
1448 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1449 nigel 3
1450 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1451 nigel 3
1452     /* Main loop */
1453    
1454 nigel 11 while (!done)
1455 nigel 3 {
1456     pcre *re = NULL;
1457     pcre_extra *extra = NULL;
1458 nigel 37
1459     #if !defined NOPOSIX /* There are still compilers that require no indent */
1460 nigel 3 regex_t preg;
1461 nigel 45 int do_posix = 0;
1462 nigel 37 #endif
1463    
1464 nigel 7 const char *error;
1465 ph10 512 unsigned char *markptr;
1466 nigel 25 unsigned char *p, *pp, *ppp;
1467 nigel 75 unsigned char *to_file = NULL;
1468 nigel 53 const unsigned char *tables = NULL;
1469 nigel 75 unsigned long int true_size, true_study_size = 0;
1470     size_t size, regex_gotten_store;
1471 ph10 654 int do_allcaps = 0;
1472 ph10 512 int do_mark = 0;
1473 nigel 3 int do_study = 0;
1474 ph10 654 int no_force_study = 0;
1475 nigel 25 int do_debug = debug;
1476 nigel 35 int do_G = 0;
1477     int do_g = 0;
1478 nigel 25 int do_showinfo = showinfo;
1479 nigel 35 int do_showrest = 0;
1480 ph10 616 int do_showcaprest = 0;
1481 nigel 75 int do_flip = 0;
1482 nigel 93 int erroroffset, len, delimiter, poffset;
1483 nigel 3
1484 nigel 67 use_utf8 = 0;
1485 ph10 211 debug_lengths = 1;
1486 nigel 63
1487 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1488 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1489 nigel 63 fflush(outfile);
1490 nigel 3
1491     p = buffer;
1492     while (isspace(*p)) p++;
1493     if (*p == 0) continue;
1494    
1495 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1496 nigel 3
1497 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1498     {
1499 nigel 91 unsigned long int magic, get_options;
1500 nigel 75 uschar sbuf[8];
1501     FILE *f;
1502    
1503     p++;
1504     pp = p + (int)strlen((char *)p);
1505     while (isspace(pp[-1])) pp--;
1506     *pp = 0;
1507    
1508     f = fopen((char *)p, "rb");
1509     if (f == NULL)
1510     {
1511     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1512     continue;
1513     }
1514    
1515     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1516    
1517     true_size =
1518     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1519     true_study_size =
1520     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1521    
1522     re = (real_pcre *)new_malloc(true_size);
1523     regex_gotten_store = gotten_store;
1524    
1525     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1526    
1527     magic = ((real_pcre *)re)->magic_number;
1528     if (magic != MAGIC_NUMBER)
1529     {
1530     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1531     {
1532     do_flip = 1;
1533     }
1534     else
1535     {
1536     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1537     fclose(f);
1538     continue;
1539     }
1540     }
1541    
1542 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1543 nigel 75 do_flip? " (byte-inverted)" : "", p);
1544    
1545     /* Need to know if UTF-8 for printing data strings */
1546    
1547 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1548     use_utf8 = (get_options & PCRE_UTF8) != 0;
1549 nigel 75
1550 ph10 612 /* Now see if there is any following study data. */
1551 nigel 75
1552     if (true_study_size != 0)
1553     {
1554     pcre_study_data *psd;
1555    
1556     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1557     extra->flags = PCRE_EXTRA_STUDY_DATA;
1558    
1559     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1560     extra->study_data = psd;
1561    
1562     if (fread(psd, 1, true_study_size, f) != true_study_size)
1563     {
1564     FAIL_READ:
1565     fprintf(outfile, "Failed to read data from %s\n", p);
1566 ph10 667 if (extra != NULL) pcre_free_study(extra);
1567 nigel 75 if (re != NULL) new_free(re);
1568     fclose(f);
1569     continue;
1570     }
1571     fprintf(outfile, "Study data loaded from %s\n", p);
1572     do_study = 1; /* To get the data output if requested */
1573     }
1574     else fprintf(outfile, "No study data\n");
1575    
1576     fclose(f);
1577     goto SHOW_INFO;
1578     }
1579    
1580     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1581     the pattern; if is isn't complete, read more. */
1582    
1583 nigel 3 delimiter = *p++;
1584    
1585 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1586 nigel 3 {
1587 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1588 nigel 3 goto SKIP_DATA;
1589     }
1590    
1591     pp = p;
1592 ph10 530 poffset = (int)(p - buffer);
1593 nigel 3
1594     for(;;)
1595     {
1596 nigel 29 while (*pp != 0)
1597     {
1598     if (*pp == '\\' && pp[1] != 0) pp++;
1599     else if (*pp == delimiter) break;
1600     pp++;
1601     }
1602 nigel 3 if (*pp != 0) break;
1603 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1604 nigel 3 {
1605     fprintf(outfile, "** Unexpected EOF\n");
1606 nigel 11 done = 1;
1607     goto CONTINUE;
1608 nigel 3 }
1609 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1610 nigel 3 }
1611    
1612 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1613     pointer to the correct relative point in the buffer. */
1614    
1615     p = buffer + poffset;
1616    
1617 nigel 29 /* If the first character after the delimiter is backslash, make
1618     the pattern end with backslash. This is purely to provide a way
1619     of testing for the error message when a pattern ends with backslash. */
1620    
1621     if (pp[1] == '\\') *pp++ = '\\';
1622    
1623 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1624     for callouts. */
1625 nigel 3
1626     *pp++ = 0;
1627 nigel 75 strcpy((char *)pbuffer, (char *)p);
1628 nigel 3
1629     /* Look for options after final delimiter */
1630    
1631     options = 0;
1632 nigel 31 log_store = showstore; /* default from command line */
1633    
1634 nigel 3 while (*pp != 0)
1635     {
1636     switch (*pp++)
1637     {
1638 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1639 nigel 35 case 'g': do_g = 1; break;
1640 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1641     case 'm': options |= PCRE_MULTILINE; break;
1642     case 's': options |= PCRE_DOTALL; break;
1643     case 'x': options |= PCRE_EXTENDED; break;
1644 nigel 25
1645 ph10 616 case '+':
1646 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1647 ph10 616 break;
1648 ph10 654
1649     case '=': do_allcaps = 1; break;
1650 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1651 nigel 93 case 'B': do_debug = 1; break;
1652 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1653 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1654 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1655 nigel 75 case 'F': do_flip = 1; break;
1656 nigel 35 case 'G': do_G = 1; break;
1657 nigel 25 case 'I': do_showinfo = 1; break;
1658 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1659 ph10 512 case 'K': do_mark = 1; break;
1660 nigel 31 case 'M': log_store = 1; break;
1661 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1662 nigel 37
1663     #if !defined NOPOSIX
1664 nigel 3 case 'P': do_posix = 1; break;
1665 nigel 37 #endif
1666    
1667 ph10 654 case 'S':
1668 ph10 667 if (do_study == 0)
1669 ph10 612 {
1670 ph10 667 do_study = 1;
1671     if (*pp == '+')
1672     {
1673     study_options |= PCRE_STUDY_JIT_COMPILE;
1674     pp++;
1675     }
1676     }
1677     else
1678     {
1679 ph10 612 do_study = 0;
1680     no_force_study = 1;
1681 ph10 654 }
1682 ph10 612 break;
1683    
1684 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1685 ph10 535 case 'W': options |= PCRE_UCP; break;
1686 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1687 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1688 ph10 126 case 'Z': debug_lengths = 0; break;
1689 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1690 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1691 ph10 545
1692 ph10 541 case 'T':
1693     switch (*pp++)
1694     {
1695     case '0': tables = tables0; break;
1696     case '1': tables = tables1; break;
1697 ph10 545
1698 ph10 541 case '\r':
1699     case '\n':
1700 ph10 545 case ' ':
1701     case 0:
1702 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1703 ph10 545 goto SKIP_DATA;
1704    
1705     default:
1706 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1707 ph10 545 goto SKIP_DATA;
1708 ph10 541 }
1709 ph10 545 break;
1710 nigel 25
1711     case 'L':
1712     ppp = pp;
1713 nigel 93 /* The '\r' test here is so that it works on Windows. */
1714     /* The '0' test is just in case this is an unterminated line. */
1715     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1716 nigel 25 *ppp = 0;
1717     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1718     {
1719     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1720     goto SKIP_DATA;
1721     }
1722 nigel 93 locale_set = 1;
1723 nigel 25 tables = pcre_maketables();
1724     pp = ppp;
1725     break;
1726    
1727 nigel 75 case '>':
1728     to_file = pp;
1729     while (*pp != 0) pp++;
1730     while (isspace(pp[-1])) pp--;
1731     *pp = 0;
1732     break;
1733    
1734 nigel 91 case '<':
1735     {
1736 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1737 ph10 336 {
1738     options |= PCRE_JAVASCRIPT_COMPAT;
1739 ph10 345 pp += 3;
1740 ph10 336 }
1741     else
1742 ph10 345 {
1743 ph10 336 int x = check_newline(pp, outfile);
1744     if (x == 0) goto SKIP_DATA;
1745     options |= x;
1746     while (*pp++ != '>');
1747 ph10 345 }
1748 nigel 91 }
1749     break;
1750    
1751 nigel 77 case '\r': /* So that it works in Windows */
1752     case '\n':
1753     case ' ':
1754     break;
1755 nigel 75
1756 nigel 3 default:
1757     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1758     goto SKIP_DATA;
1759     }
1760     }
1761    
1762 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1763 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1764     local character tables. */
1765 nigel 3
1766 nigel 37 #if !defined NOPOSIX
1767 nigel 3 if (posix || do_posix)
1768     {
1769     int rc;
1770     int cflags = 0;
1771 nigel 75
1772 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1773     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1774 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1775 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1776     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1777 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1778 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1779 nigel 87
1780 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1781    
1782     /* Compilation failed; go back for another re, skipping to blank line
1783     if non-interactive. */
1784    
1785     if (rc != 0)
1786     {
1787 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1788 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1789     goto SKIP_DATA;
1790     }
1791     }
1792    
1793     /* Handle compiling via the native interface */
1794    
1795     else
1796 nigel 37 #endif /* !defined NOPOSIX */
1797    
1798 nigel 3 {
1799 ph10 412 unsigned long int get_options;
1800 ph10 416
1801 nigel 93 if (timeit > 0)
1802 nigel 3 {
1803     register int i;
1804     clock_t time_taken;
1805     clock_t start_time = clock();
1806 nigel 93 for (i = 0; i < timeit; i++)
1807 nigel 3 {
1808 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1809 nigel 3 if (re != NULL) free(re);
1810     }
1811     time_taken = clock() - start_time;
1812 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1813     (((double)time_taken * 1000.0) / (double)timeit) /
1814 nigel 63 (double)CLOCKS_PER_SEC);
1815 nigel 3 }
1816    
1817 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1818 nigel 3
1819     /* Compilation failed; go back for another re, skipping to blank line
1820     if non-interactive. */
1821    
1822     if (re == NULL)
1823     {
1824     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1825     SKIP_DATA:
1826     if (infile != stdin)
1827     {
1828     for (;;)
1829     {
1830 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1831 nigel 11 {
1832     done = 1;
1833     goto CONTINUE;
1834     }
1835 nigel 3 len = (int)strlen((char *)buffer);
1836     while (len > 0 && isspace(buffer[len-1])) len--;
1837     if (len == 0) break;
1838     }
1839     fprintf(outfile, "\n");
1840     }
1841 nigel 25 goto CONTINUE;
1842 nigel 3 }
1843 ph10 416
1844     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1845     within the regex; check for this so that we know how to process the data
1846 ph10 412 lines. */
1847 ph10 416
1848 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1849     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1850 nigel 3
1851 ph10 412 /* Print information if required. There are now two info-returning
1852     functions. The old one has a limited interface and returns only limited
1853     data. Check that it agrees with the newer one. */
1854 nigel 3
1855 nigel 63 if (log_store)
1856     fprintf(outfile, "Memory allocation (code space): %d\n",
1857     (int)(gotten_store -
1858     sizeof(real_pcre) -
1859     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1860    
1861 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1862     and remember the store that was got. */
1863    
1864     true_size = ((real_pcre *)re)->size;
1865     regex_gotten_store = gotten_store;
1866    
1867 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1868 ph10 654 help with the matching, unless the pattern has the SS option, which
1869 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
1870     never sensible). */
1871 nigel 75
1872 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
1873 nigel 75 {
1874 nigel 93 if (timeit > 0)
1875 nigel 75 {
1876     register int i;
1877     clock_t time_taken;
1878     clock_t start_time = clock();
1879 nigel 93 for (i = 0; i < timeit; i++)
1880 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1881 nigel 75 time_taken = clock() - start_time;
1882 ph10 667 if (extra != NULL) pcre_free_study(extra);
1883 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1884     (((double)time_taken * 1000.0) / (double)timeit) /
1885 nigel 75 (double)CLOCKS_PER_SEC);
1886     }
1887 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1888 nigel 75 if (error != NULL)
1889     fprintf(outfile, "Failed to study: %s\n", error);
1890     else if (extra != NULL)
1891     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1892     }
1893 ph10 512
1894 ph10 510 /* If /K was present, we set up for handling MARK data. */
1895 ph10 512
1896 ph10 510 if (do_mark)
1897     {
1898     if (extra == NULL)
1899     {
1900     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1901     extra->flags = 0;
1902     }
1903 ph10 512 extra->mark = &markptr;
1904 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1905 ph10 512 }
1906 nigel 75
1907     /* If the 'F' option was present, we flip the bytes of all the integer
1908     fields in the regex data block and the study block. This is to make it
1909     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1910     compiled on a different architecture. */
1911    
1912     if (do_flip)
1913     {
1914     real_pcre *rre = (real_pcre *)re;
1915 ph10 259 rre->magic_number =
1916 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1917 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1918     rre->options = byteflip(rre->options, sizeof(rre->options));
1919 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1920 ph10 259 rre->top_bracket =
1921 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1922 ph10 259 rre->top_backref =
1923 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1924 ph10 259 rre->first_byte =
1925 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1926 ph10 259 rre->req_byte =
1927 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1928     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1929 nigel 75 sizeof(rre->name_table_offset));
1930 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1931 nigel 75 sizeof(rre->name_entry_size));
1932 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1933 ph10 255 sizeof(rre->name_count));
1934 nigel 75
1935     if (extra != NULL)
1936     {
1937     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1938     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1939 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1940     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1941 nigel 75 }
1942     }
1943    
1944     /* Extract information from the compiled data if required */
1945    
1946     SHOW_INFO:
1947    
1948 nigel 93 if (do_debug)
1949     {
1950     fprintf(outfile, "------------------------------------------------------------------\n");
1951 ph10 116 pcre_printint(re, outfile, debug_lengths);
1952 nigel 93 }
1953 ph10 416
1954 ph10 412 /* We already have the options in get_options (see above) */
1955 nigel 93
1956 nigel 25 if (do_showinfo)
1957 nigel 3 {
1958 ph10 412 unsigned long int all_options;
1959 nigel 79 #if !defined NOINFOCHECK
1960 nigel 43 int old_first_char, old_options, old_count;
1961 nigel 79 #endif
1962 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1963 ph10 227 hascrorlf;
1964 nigel 63 int nameentrysize, namecount;
1965     const uschar *nametable;
1966 nigel 3
1967 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1968     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1969     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1970 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1971 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1972 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1973     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1974 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1975 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1976     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1977 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1978 nigel 43
1979 nigel 79 #if !defined NOINFOCHECK
1980 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1981 nigel 3 if (count < 0) fprintf(outfile,
1982 nigel 43 "Error %d from pcre_info()\n", count);
1983 nigel 3 else
1984     {
1985 nigel 43 if (old_count != count) fprintf(outfile,
1986     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1987     old_count);
1988 nigel 37
1989 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1990     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1991     first_char, old_first_char);
1992 nigel 37
1993 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1994     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1995     get_options, old_options);
1996 nigel 43 }
1997 nigel 79 #endif
1998 nigel 43
1999 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
2000 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2001 nigel 77 (int)size, (int)regex_gotten_store);
2002 nigel 43
2003     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2004     if (backrefmax > 0)
2005     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2006 nigel 63
2007     if (namecount > 0)
2008     {
2009     fprintf(outfile, "Named capturing subpatterns:\n");
2010     while (namecount-- > 0)
2011     {
2012     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2013     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2014     GET2(nametable, 0));
2015     nametable += nameentrysize;
2016     }
2017     }
2018 ph10 172
2019 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2020 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2021 nigel 63
2022 nigel 75 all_options = ((real_pcre *)re)->options;
2023 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2024 nigel 75
2025 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2026 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2027 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2028     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2029     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2030     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2031 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2032 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2033 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2034     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2035 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2036     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2037     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2038 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2039 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2040 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2041 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2042 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2043 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2044 ph10 172
2045 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2046 nigel 43
2047 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2048 nigel 91 {
2049     case PCRE_NEWLINE_CR:
2050     fprintf(outfile, "Forced newline sequence: CR\n");
2051     break;
2052 nigel 43
2053 nigel 91 case PCRE_NEWLINE_LF:
2054     fprintf(outfile, "Forced newline sequence: LF\n");
2055     break;
2056    
2057     case PCRE_NEWLINE_CRLF:
2058     fprintf(outfile, "Forced newline sequence: CRLF\n");
2059     break;
2060    
2061 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2062     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2063     break;
2064    
2065 nigel 93 case PCRE_NEWLINE_ANY:
2066     fprintf(outfile, "Forced newline sequence: ANY\n");
2067     break;
2068    
2069 nigel 91 default:
2070     break;
2071     }
2072    
2073 nigel 43 if (first_char == -1)
2074     {
2075 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2076 nigel 43 }
2077     else if (first_char < 0)
2078     {
2079     fprintf(outfile, "No first char\n");
2080     }
2081     else
2082     {
2083 nigel 63 int ch = first_char & 255;
2084 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2085 nigel 63 "" : " (caseless)";
2086 nigel 93 if (PRINTHEX(ch))
2087 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2088 nigel 3 else
2089 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2090 nigel 43 }
2091 nigel 37
2092 nigel 43 if (need_char < 0)
2093     {
2094     fprintf(outfile, "No need char\n");
2095 nigel 3 }
2096 nigel 43 else
2097     {
2098 nigel 63 int ch = need_char & 255;
2099 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2100 nigel 63 "" : " (caseless)";
2101 nigel 93 if (PRINTHEX(ch))
2102 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2103 nigel 43 else
2104 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2105 nigel 43 }
2106 nigel 75
2107     /* Don't output study size; at present it is in any case a fixed
2108     value, but it varies, depending on the computer architecture, and
2109     so messes up the test suite. (And with the /F option, it might be
2110 ph10 654 flipped.) If study was forced by an external -s, don't show this
2111 ph10 612 information unless -i or -d was also present. This means that, except
2112     when auto-callouts are involved, the output from runs with and without
2113     -s should be identical. */
2114 nigel 75
2115 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2116 nigel 75 {
2117     if (extra == NULL)
2118     fprintf(outfile, "Study returned NULL\n");
2119     else
2120     {
2121     uschar *start_bits = NULL;
2122 ph10 455 int minlength;
2123 ph10 461
2124 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2125 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2126    
2127 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2128     if (start_bits == NULL)
2129 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2130 nigel 75 else
2131     {
2132     int i;
2133     int c = 24;
2134     fprintf(outfile, "Starting byte set: ");
2135     for (i = 0; i < 256; i++)
2136     {
2137     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2138     {
2139     if (c > 75)
2140     {
2141     fprintf(outfile, "\n ");
2142     c = 2;
2143     }
2144 nigel 93 if (PRINTHEX(i) && i != ' ')
2145 nigel 75 {
2146     fprintf(outfile, "%c ", i);
2147     c += 2;
2148     }
2149     else
2150     {
2151     fprintf(outfile, "\\x%02x ", i);
2152     c += 5;
2153     }
2154     }
2155     }
2156     fprintf(outfile, "\n");
2157     }
2158     }
2159 ph10 667
2160     /* Show this only if the JIT was set by /S, not by -s. */
2161    
2162     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2163     {
2164     int jit;
2165     new_info(re, extra, PCRE_INFO_JIT, &jit);
2166     if (jit)
2167     fprintf(outfile, "JIT study was successful\n");
2168     else
2169     #ifdef SUPPORT_JIT
2170     fprintf(outfile, "JIT study was not successful\n");
2171     #else
2172     fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2173     #endif
2174     }
2175 nigel 75 }
2176 nigel 3 }
2177    
2178 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2179     that is all. The first 8 bytes of the file are the regex length and then
2180     the study length, in big-endian order. */
2181 nigel 3
2182 nigel 75 if (to_file != NULL)
2183 nigel 3 {
2184 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2185     if (f == NULL)
2186 nigel 3 {
2187 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2188 nigel 3 }
2189 nigel 75 else
2190     {
2191     uschar sbuf[8];
2192 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2193     sbuf[1] = (uschar)((true_size >> 16) & 255);
2194     sbuf[2] = (uschar)((true_size >> 8) & 255);
2195     sbuf[3] = (uschar)((true_size) & 255);
2196 ph10 259
2197 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2198     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2199     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2200     sbuf[7] = (uschar)((true_study_size) & 255);
2201 nigel 3
2202 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2203     fwrite(re, 1, true_size, f) < true_size)
2204     {
2205     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2206     }
2207 nigel 3 else
2208     {
2209 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2210 ph10 654
2211 ph10 658 /* If there is study data, write it. */
2212 ph10 654
2213 nigel 75 if (extra != NULL)
2214 nigel 3 {
2215 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2216     true_study_size)
2217 nigel 3 {
2218 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2219     strerror(errno));
2220 nigel 3 }
2221 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2222 nigel 3 }
2223     }
2224 nigel 75 fclose(f);
2225 nigel 3 }
2226 nigel 77
2227     new_free(re);
2228 ph10 667 if (extra != NULL) pcre_free_study(extra);
2229 ph10 545 if (locale_set)
2230 ph10 541 {
2231     new_free((void *)tables);
2232     setlocale(LC_CTYPE, "C");
2233 ph10 545 locale_set = 0;
2234     }
2235 nigel 75 continue; /* With next regex */
2236 nigel 3 }
2237 nigel 75 } /* End of non-POSIX compile */
2238 nigel 3
2239     /* Read data lines and test them */
2240    
2241     for (;;)
2242     {
2243 nigel 87 uschar *q;
2244 ph10 147 uschar *bptr;
2245 nigel 57 int *use_offsets = offsets;
2246 nigel 53 int use_size_offsets = size_offsets;
2247 nigel 63 int callout_data = 0;
2248     int callout_data_set = 0;
2249 nigel 3 int count, c;
2250 nigel 29 int copystrings = 0;
2251 ph10 386 int find_match_limit = default_find_match_limit;
2252 nigel 29 int getstrings = 0;
2253     int getlist = 0;
2254 nigel 39 int gmatched = 0;
2255 nigel 35 int start_offset = 0;
2256 ph10 579 int start_offset_sign = 1;
2257 nigel 41 int g_notempty = 0;
2258 nigel 77 int use_dfa = 0;
2259 nigel 3
2260     options = 0;
2261    
2262 nigel 91 *copynames = 0;
2263     *getnames = 0;
2264    
2265     copynamesptr = copynames;
2266     getnamesptr = getnames;
2267    
2268 nigel 63 pcre_callout = callout;
2269     first_callout = 1;
2270 ph10 654 last_callout_mark = NULL;
2271 nigel 63 callout_extra = 0;
2272     callout_count = 0;
2273     callout_fail_count = 999999;
2274     callout_fail_id = -1;
2275 nigel 73 show_malloc = 0;
2276 nigel 63
2277 nigel 91 if (extra != NULL) extra->flags &=
2278     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2279    
2280     len = 0;
2281     for (;;)
2282 nigel 11 {
2283 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2284 nigel 91 {
2285 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2286     {
2287 ph10 545 fprintf(outfile, "\n");
2288 ph10 537 break;
2289 ph10 545 }
2290 nigel 91 done = 1;
2291     goto CONTINUE;
2292     }
2293     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2294     len = (int)strlen((char *)buffer);
2295     if (buffer[len-1] == '\n') break;
2296 nigel 11 }
2297 nigel 3
2298     while (len > 0 && isspace(buffer[len-1])) len--;
2299     buffer[len] = 0;
2300     if (len == 0) break;
2301    
2302     p = buffer;
2303     while (isspace(*p)) p++;
2304    
2305 ph10 147 bptr = q = dbuffer;
2306 nigel 3 while ((c = *p++) != 0)
2307     {
2308     int i = 0;
2309     int n = 0;
2310 nigel 63
2311 nigel 3 if (c == '\\') switch ((c = *p++))
2312     {
2313     case 'a': c = 7; break;
2314     case 'b': c = '\b'; break;
2315     case 'e': c = 27; break;
2316     case 'f': c = '\f'; break;
2317     case 'n': c = '\n'; break;
2318     case 'r': c = '\r'; break;
2319     case 't': c = '\t'; break;
2320     case 'v': c = '\v'; break;
2321    
2322     case '0': case '1': case '2': case '3':
2323     case '4': case '5': case '6': case '7':
2324     c -= '0';
2325     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2326     c = c * 8 + *p++ - '0';
2327 nigel 91
2328     #if !defined NOUTF8
2329     if (use_utf8 && c > 255)
2330     {
2331     unsigned char buff8[8];
2332     int ii, utn;
2333     utn = ord2utf8(c, buff8);
2334     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2335     c = buff8[ii]; /* Last byte */
2336     }
2337     #endif
2338 nigel 3 break;
2339    
2340     case 'x':
2341 nigel 49
2342     /* Handle \x{..} specially - new Perl thing for utf8 */
2343    
2344 nigel 79 #if !defined NOUTF8
2345 nigel 49 if (*p == '{')
2346     {
2347     unsigned char *pt = p;
2348     c = 0;
2349     while (isxdigit(*(++pt)))
2350     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2351     if (*pt == '}')
2352     {
2353 nigel 67 unsigned char buff8[8];
2354 nigel 49 int ii, utn;
2355 ph10 355 if (use_utf8)
2356 ph10 358 {
2357 ph10 355 utn = ord2utf8(c, buff8);
2358     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2359     c = buff8[ii]; /* Last byte */
2360     }
2361     else
2362     {
2363 ph10 358 if (c > 255)
2364 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2365     "UTF-8 mode is not enabled.\n"
2366     "** Truncation will probably give the wrong result.\n", c);
2367 ph10 358 }
2368 nigel 49 p = pt + 1;
2369     break;
2370     }
2371     /* Not correct form; fall through */
2372     }
2373 nigel 79 #endif
2374 nigel 49
2375     /* Ordinary \x */
2376    
2377 nigel 3 c = 0;
2378     while (i++ < 2 && isxdigit(*p))
2379     {
2380     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2381     p++;
2382     }
2383     break;
2384    
2385 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2386 nigel 3 p--;
2387     continue;
2388    
2389 nigel 75 case '>':
2390 ph10 579 if (*p == '-')
2391 ph10 567 {
2392     start_offset_sign = -1;
2393     p++;
2394 ph10 579 }
2395 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2396 ph10 579 start_offset *= start_offset_sign;
2397 nigel 75 continue;
2398    
2399 nigel 3 case 'A': /* Option setting */
2400     options |= PCRE_ANCHORED;
2401     continue;
2402    
2403     case 'B':
2404     options |= PCRE_NOTBOL;
2405     continue;
2406    
2407 nigel 29 case 'C':
2408 nigel 63 if (isdigit(*p)) /* Set copy string */
2409     {
2410     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2411     copystrings |= 1 << n;
2412     }
2413     else if (isalnum(*p))
2414     {
2415 nigel 91 uschar *npp = copynamesptr;
2416 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2417 nigel 91 *npp++ = 0;
2418 nigel 67 *npp = 0;
2419 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2420 nigel 63 if (n < 0)
2421 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2422     copynamesptr = npp;
2423 nigel 63 }
2424     else if (*p == '+')
2425     {
2426     callout_extra = 1;
2427     p++;
2428     }
2429     else if (*p == '-')
2430     {
2431     pcre_callout = NULL;
2432     p++;
2433     }
2434     else if (*p == '!')
2435     {
2436     callout_fail_id = 0;
2437     p++;
2438     while(isdigit(*p))
2439     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2440     callout_fail_count = 0;
2441     if (*p == '!')
2442     {
2443     p++;
2444     while(isdigit(*p))
2445     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2446     }
2447     }
2448     else if (*p == '*')
2449     {
2450     int sign = 1;
2451     callout_data = 0;
2452     if (*(++p) == '-') { sign = -1; p++; }
2453     while(isdigit(*p))
2454     callout_data = callout_data * 10 + *p++ - '0';
2455     callout_data *= sign;
2456     callout_data_set = 1;
2457     }
2458 nigel 29 continue;
2459    
2460 nigel 79 #if !defined NODFA
2461 nigel 77 case 'D':
2462 nigel 79 #if !defined NOPOSIX
2463 nigel 77 if (posix || do_posix)
2464     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2465     else
2466 nigel 79 #endif
2467 nigel 77 use_dfa = 1;
2468     continue;
2469 ph10 553 #endif
2470 nigel 77
2471 ph10 553 #if !defined NODFA
2472 nigel 77 case 'F':
2473     options |= PCRE_DFA_SHORTEST;
2474     continue;
2475 nigel 79 #endif
2476 nigel 77
2477 nigel 29 case 'G':
2478 nigel 63 if (isdigit(*p))
2479     {
2480     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2481     getstrings |= 1 << n;
2482     }
2483     else if (isalnum(*p))
2484     {
2485 nigel 91 uschar *npp = getnamesptr;
2486 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2487 nigel 91 *npp++ = 0;
2488 nigel 67 *npp = 0;
2489 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2490 nigel 63 if (n < 0)
2491 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2492     getnamesptr = npp;
2493 nigel 63 }
2494 nigel 29 continue;
2495 ph10 667
2496     case 'J':
2497     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2498     if (extra != NULL
2499     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2500     && extra->executable_jit != NULL)
2501     {
2502     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2503     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2504 ph10 675 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2505 ph10 667 }
2506     continue;
2507 nigel 29
2508     case 'L':
2509     getlist = 1;
2510     continue;
2511    
2512 nigel 63 case 'M':
2513     find_match_limit = 1;
2514     continue;
2515    
2516 nigel 37 case 'N':
2517 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2518     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2519 ph10 461 else
2520 ph10 442 options |= PCRE_NOTEMPTY;
2521 nigel 37 continue;
2522    
2523 nigel 3 case 'O':
2524     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2525 nigel 53 if (n > size_offsets_max)
2526     {
2527     size_offsets_max = n;
2528 nigel 57 free(offsets);
2529 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2530 nigel 53 if (offsets == NULL)
2531     {
2532     printf("** Failed to get %d bytes of memory for offsets vector\n",
2533 ph10 151 (int)(size_offsets_max * sizeof(int)));
2534 nigel 77 yield = 1;
2535     goto EXIT;
2536 nigel 53 }
2537     }
2538     use_size_offsets = n;
2539 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2540 nigel 3 continue;
2541    
2542 nigel 75 case 'P':
2543 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2544 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2545 nigel 75 continue;
2546    
2547 nigel 91 case 'Q':
2548     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2549     if (extra == NULL)
2550     {
2551     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2552     extra->flags = 0;
2553     }
2554     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2555     extra->match_limit_recursion = n;
2556     continue;
2557    
2558     case 'q':
2559     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2560     if (extra == NULL)
2561     {
2562     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2563     extra->flags = 0;
2564     }
2565     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2566     extra->match_limit = n;
2567     continue;
2568    
2569 nigel 79 #if !defined NODFA
2570 nigel 77 case 'R':
2571     options |= PCRE_DFA_RESTART;
2572     continue;
2573 nigel 79 #endif
2574 nigel 77
2575 nigel 73 case 'S':
2576     show_malloc = 1;
2577     continue;
2578 ph10 392
2579 ph10 389 case 'Y':
2580     options |= PCRE_NO_START_OPTIMIZE;
2581 ph10 392 continue;
2582 nigel 73
2583 nigel 3 case 'Z':
2584     options |= PCRE_NOTEOL;
2585     continue;
2586 nigel 71
2587     case '?':
2588     options |= PCRE_NO_UTF8_CHECK;
2589     continue;
2590 nigel 91
2591     case '<':
2592     {
2593     int x = check_newline(p, outfile);
2594     if (x == 0) goto NEXT_DATA;
2595     options |= x;
2596     while (*p++ != '>');
2597     }
2598     continue;
2599 nigel 3 }
2600 nigel 9 *q++ = c;
2601 nigel 3 }
2602 nigel 9 *q = 0;
2603 ph10 530 len = (int)(q - dbuffer);
2604 ph10 545
2605 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2606 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2607 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2608 ph10 371
2609 ph10 363 #if !defined NOPOSIX
2610     if (posix || do_posix)
2611     {
2612     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2613 ph10 371 bptr += buffer_size - len - 1;
2614 ph10 363 }
2615 ph10 371 else
2616     #endif
2617 ph10 363 {
2618     memmove(bptr + buffer_size - len, bptr, len);
2619 ph10 371 bptr += buffer_size - len;
2620     }
2621 nigel 3
2622 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2623     {
2624     printf("**Match limit not relevant for DFA matching: ignored\n");
2625     find_match_limit = 0;
2626     }
2627    
2628 nigel 3 /* Handle matching via the POSIX interface, which does not
2629 nigel 63 support timing or playing with the match limit or callout data. */
2630 nigel 3
2631 nigel 37 #if !defined NOPOSIX
2632 nigel 3 if (posix || do_posix)
2633     {
2634     int rc;
2635     int eflags = 0;
2636 nigel 63 regmatch_t *pmatch = NULL;
2637     if (use_size_offsets > 0)
2638 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2639 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2640     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2641 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2642 nigel 3
2643 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2644 nigel 3
2645     if (rc != 0)
2646     {
2647 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2648 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2649     }
2650 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2651     != 0)
2652     {
2653     fprintf(outfile, "Matched with REG_NOSUB\n");
2654     }
2655 nigel 3 else
2656     {
2657 nigel 7 size_t i;
2658 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2659 nigel 3 {
2660     if (pmatch[i].rm_so >= 0)
2661     {
2662 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2663 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2664     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2665 nigel 3 fprintf(outfile, "\n");
2666 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2667 nigel 35 {
2668 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2669 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2670     outfile);
2671 nigel 35 fprintf(outfile, "\n");
2672     }
2673 nigel 3 }
2674     }
2675     }
2676 nigel 53 free(pmatch);
2677 nigel 3 }
2678    
2679 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2680 nigel 3
2681 nigel 37 else
2682     #endif /* !defined NOPOSIX */
2683    
2684 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2685 nigel 3 {
2686 ph10 512 markptr = NULL;
2687    
2688 nigel 93 if (timeitm > 0)
2689 nigel 3 {
2690     register int i;
2691     clock_t time_taken;
2692     clock_t start_time = clock();
2693 nigel 77
2694 nigel 79 #if !defined NODFA
2695 nigel 77 if (all_use_dfa || use_dfa)
2696     {
2697     int workspace[1000];
2698 nigel 93 for (i = 0; i < timeitm; i++)
2699 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2700 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2701     sizeof(workspace)/sizeof(int));
2702     }
2703     else
2704 nigel 79 #endif
2705 nigel 77
2706 nigel 93 for (i = 0; i < timeitm; i++)
2707 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2708 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2709 nigel 77
2710 nigel 3 time_taken = clock() - start_time;
2711 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2712     (((double)time_taken * 1000.0) / (double)timeitm) /
2713 nigel 63 (double)CLOCKS_PER_SEC);
2714 nigel 3 }
2715    
2716 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2717 nigel 87 varying limits in order to find the minimum value for the match limit and
2718 ph10 667 for the recursion limit. The match limits are relevant only to the normal
2719     running of pcre_exec(), so disable the JIT optimization. This makes it
2720     possible to run the same set of tests with and without JIT externally
2721     requested. */
2722 nigel 63
2723     if (find_match_limit)
2724     {
2725     if (extra == NULL)
2726     {
2727 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2728 nigel 63 extra->flags = 0;
2729     }
2730 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2731    
2732 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2733 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2734     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2735     PCRE_ERROR_MATCHLIMIT, "match()");
2736 nigel 63
2737 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2738     options|g_notempty, use_offsets, use_size_offsets,
2739     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2740     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2741 nigel 63 }
2742    
2743     /* If callout_data is set, use the interface with additional data */
2744    
2745     else if (callout_data_set)
2746     {
2747     if (extra == NULL)
2748     {
2749 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2750 nigel 63 extra->flags = 0;
2751     }
2752     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2753 nigel 71 extra->callout_data = &callout_data;
2754 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2755     options | g_notempty, use_offsets, use_size_offsets);
2756     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2757     }
2758    
2759     /* The normal case is just to do the match once, with the default
2760     value of match_limit. */
2761    
2762 nigel 79 #if !defined NODFA
2763 nigel 77 else if (all_use_dfa || use_dfa)
2764     {
2765     int workspace[1000];
2766 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2767 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2768     sizeof(workspace)/sizeof(int));
2769     if (count == 0)
2770     {
2771     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2772     count = use_size_offsets/2;
2773     }
2774     }
2775 nigel 79 #endif
2776 nigel 77
2777 nigel 75 else
2778     {
2779     count = pcre_exec(re, extra, (char *)bptr, len,
2780     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2781 nigel 77 if (count == 0)
2782     {
2783     fprintf(outfile, "Matched, but too many substrings\n");
2784     count = use_size_offsets/3;
2785     }
2786 nigel 75 }
2787 nigel 3
2788 nigel 39 /* Matched */
2789    
2790 nigel 3 if (count >= 0)
2791     {
2792 nigel 93 int i, maxcount;
2793    
2794     #if !defined NODFA
2795     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2796     #endif
2797     maxcount = use_size_offsets/3;
2798    
2799     /* This is a check against a lunatic return value. */
2800    
2801     if (count > maxcount)
2802     {
2803     fprintf(outfile,
2804     "** PCRE error: returned count %d is too big for offset size %d\n",
2805     count, use_size_offsets);
2806     count = use_size_offsets/3;
2807     if (do_g || do_G)
2808     {
2809     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2810     do_g = do_G = FALSE; /* Break g/G loop */
2811     }
2812     }
2813 ph10 654
2814 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
2815     unset ones at the end. */
2816 ph10 654
2817 ph10 626 if (do_allcaps)
2818     {
2819     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2820 ph10 654 count++; /* Allow for full match */
2821     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2822     }
2823 nigel 93
2824 ph10 626 /* Output the captured substrings */
2825 ph10 654
2826 nigel 29 for (i = 0; i < count * 2; i += 2)
2827 nigel 3 {
2828 nigel 57 if (use_offsets[i] < 0)
2829 ph10 654 {
2830 ph10 626 if (use_offsets[i] != -1)
2831     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2832 ph10 654 use_offsets[i], i);
2833 ph10 626 if (use_offsets[i+1] != -1)
2834     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2835 ph10 654 use_offsets[i+1], i+1);
2836 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2837 ph10 654 }
2838 nigel 3 else
2839     {
2840     fprintf(outfile, "%2d: ", i/2);
2841 nigel 63 (void)pchars(bptr + use_offsets[i],
2842     use_offsets[i+1] - use_offsets[i], outfile);
2843 nigel 3 fprintf(outfile, "\n");
2844 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2845 nigel 35 {
2846 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2847     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2848     outfile);
2849     fprintf(outfile, "\n");
2850 nigel 35 }
2851 nigel 3 }
2852     }
2853 ph10 512
2854 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2855 nigel 29
2856     for (i = 0; i < 32; i++)
2857     {
2858     if ((copystrings & (1 << i)) != 0)
2859     {
2860 nigel 91 char copybuffer[256];
2861 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2862 nigel 37 i, copybuffer, sizeof(copybuffer));
2863 nigel 29 if (rc < 0)
2864     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2865     else
2866 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2867 nigel 29 }
2868     }
2869    
2870 nigel 91 for (copynamesptr = copynames;
2871     *copynamesptr != 0;
2872     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2873     {
2874     char copybuffer[256];
2875     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2876     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2877     if (rc < 0)
2878     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2879     else
2880     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2881     }
2882    
2883 nigel 29 for (i = 0; i < 32; i++)
2884     {
2885     if ((getstrings & (1 << i)) != 0)
2886     {
2887     const char *substring;
2888 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2889 nigel 29 i, &substring);
2890     if (rc < 0)
2891     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2892     else
2893     {
2894     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2895 nigel 49 pcre_free_substring(substring);
2896 nigel 29 }
2897     }
2898     }
2899    
2900 nigel 91 for (getnamesptr = getnames;
2901     *getnamesptr != 0;
2902     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2903     {
2904     const char *substring;
2905     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2906     count, (char *)getnamesptr, &substring);
2907     if (rc < 0)
2908     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2909     else
2910     {
2911     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2912     pcre_free_substring(substring);
2913     }
2914     }
2915    
2916 nigel 29 if (getlist)
2917     {
2918     const char **stringlist;
2919 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2920 nigel 29 &stringlist);
2921     if (rc < 0)
2922     fprintf(outfile, "get substring list failed %d\n", rc);
2923     else
2924     {
2925     for (i = 0; i < count; i++)
2926     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2927     if (stringlist[i] != NULL)
2928     fprintf(outfile, "string list not terminated by NULL\n");
2929 nigel 49 pcre_free_substring_list(stringlist);
2930 nigel 29 }
2931     }
2932 nigel 39 }
2933 nigel 29
2934 nigel 75 /* There was a partial match */
2935    
2936     else if (count == PCRE_ERROR_PARTIAL)
2937     {
2938 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2939     else fprintf(outfile, "Partial match, mark=%s", markptr);
2940 ph10 426 if (use_size_offsets > 1)
2941     {
2942     fprintf(outfile, ": ");
2943     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2944 ph10 461 outfile);
2945     }
2946 nigel 77 fprintf(outfile, "\n");
2947 nigel 75 break; /* Out of the /g loop */
2948     }
2949    
2950 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2951 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2952     to advance the start offset, and continue. We won't be at the end of the
2953     string - that was checked before setting g_notempty.
2954 nigel 39
2955 ph10 566 Complication arises in the case when the newline convention is "any",
2956 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2957     terminated by CRLF, an advance of one character just passes the \r,
2958 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2959 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2960     newline setting in the pattern; if none was set, use pcre_config() to
2961 ph10 566 find the default.
2962 ph10 144
2963 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2964     character, not one byte. */
2965    
2966 nigel 3 else
2967     {
2968 nigel 41 if (g_notempty != 0)
2969 nigel 35 {
2970 nigel 73 int onechar = 1;
2971 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2972 nigel 57 use_offsets[0] = start_offset;
2973 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2974     {
2975     int d;
2976     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2977 ph10 391 /* Note that these values are always the ASCII ones, even in
2978     EBCDIC environments. CR = 13, NL = 10. */
2979     obits = (d == 13)? PCRE_NEWLINE_CR :
2980     (d == 10)? PCRE_NEWLINE_LF :
2981     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2982 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2983 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2984     }
2985 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2986 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2987 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2988 ph10 149 &&
2989 ph10 143 start_offset < len - 1 &&
2990     bptr[start_offset] == '\r' &&
2991     bptr[start_offset+1] == '\n')
2992 ph10 144 onechar++;
2993 ph10 143 else if (use_utf8)
2994 nigel 73 {
2995     while (start_offset + onechar < len)
2996     {
2997 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2998 ph10 579 onechar++;
2999 nigel 73 }
3000     }
3001     use_offsets[1] = start_offset + onechar;
3002 nigel 35 }
3003 nigel 41 else
3004     {
3005 ph10 598 switch(count)
3006 ph10 654 {
3007 ph10 598 case PCRE_ERROR_NOMATCH:
3008 ph10 512 if (gmatched == 0)
3009 ph10 510 {
3010     if (markptr == NULL) fprintf(outfile, "No match\n");
3011     else fprintf(outfile, "No match, mark = %s\n", markptr);
3012 ph10 512 }
3013 ph10 598 break;
3014 ph10 654
3015 ph10 598 case PCRE_ERROR_BADUTF8:
3016     case PCRE_ERROR_SHORTUTF8:
3017     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3018     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3019     if (use_size_offsets >= 2)
3020 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3021 ph10 598 use_offsets[1]);
3022 ph10 654 fprintf(outfile, "\n");
3023     break;
3024    
3025 ph10 598 default:
3026 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3027 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3028 ph10 654 else
3029     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3030 ph10 598 break;
3031 nigel 41 }
3032 ph10 654
3033 nigel 41 break; /* Out of the /g loop */
3034     }
3035 nigel 3 }
3036 nigel 35
3037 nigel 39 /* If not /g or /G we are done */
3038    
3039     if (!do_g && !do_G) break;
3040    
3041 nigel 41 /* If we have matched an empty string, first check to see if we are at
3042 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3043     Perl's /g options does. This turns out to be rather cunning. First we set
3044     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3045 nigel 47 same point. If this fails (picked up above) we advance to the next
3046 ph10 143 character. */
3047 ph10 142
3048 nigel 41 g_notempty = 0;
3049 ph10 142
3050 nigel 57 if (use_offsets[0] == use_offsets[1])
3051 nigel 41 {
3052 nigel 57 if (use_offsets[0] == len) break;
3053 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3054 nigel 41 }
3055 nigel 39
3056     /* For /g, update the start offset, leaving the rest alone */
3057    
3058 ph10 143 if (do_g) start_offset = use_offsets[1];
3059 nigel 39
3060     /* For /G, update the pointer and length */
3061    
3062     else
3063 nigel 35 {
3064 ph10 143 bptr += use_offsets[1];
3065     len -= use_offsets[1];
3066 nigel 35 }
3067 nigel 39 } /* End of loop for /g and /G */
3068 nigel 91
3069     NEXT_DATA: continue;
3070 nigel 39 } /* End of loop for data lines */
3071 nigel 3
3072 nigel 11 CONTINUE:
3073 nigel 37
3074     #if !defined NOPOSIX
3075 nigel 3 if (posix || do_posix) regfree(&preg);
3076 nigel 37 #endif
3077    
3078 nigel 77 if (re != NULL) new_free(re);
3079 ph10 667 if (extra != NULL) pcre_free_study(extra);
3080 ph10 541 if (locale_set)
3081 nigel 25 {
3082 nigel 77 new_free((void *)tables);
3083 nigel 25 setlocale(LC_CTYPE, "C");
3084 nigel 93 locale_set = 0;
3085 nigel 25 }
3086 ph10 667 if (jit_stack != NULL)
3087     {
3088     pcre_jit_stack_free(jit_stack);
3089     jit_stack = NULL;
3090     }
3091 nigel 3 }
3092    
3093 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3094 nigel 77
3095     EXIT:
3096    
3097     if (infile != NULL && infile != stdin) fclose(infile);
3098     if (outfile != NULL && outfile != stdout) fclose(outfile);
3099    
3100     free(buffer);
3101     free(dbuffer);
3102     free(pbuffer);
3103     free(offsets);
3104    
3105     return yield;
3106 nigel 3 }
3107    
3108 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12