/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 676 - (hide annotations) (download)
Sat Aug 27 15:53:04 2011 UTC (3 years ago) by ph10
File MIME type: text/plain
File size: 94287 byte(s)
Give PCRE_ERROR_JIT_STACKLIMIT when JIT runs out of stack.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 ph10 667 #define _pcre_ucp_typerange ucp_typerange
116 nigel 85 #define _pcre_utf8_table1 utf8_table1
117     #define _pcre_utf8_table1_size utf8_table1_size
118     #define _pcre_utf8_table2 utf8_table2
119     #define _pcre_utf8_table3 utf8_table3
120     #define _pcre_utf8_table4 utf8_table4
121 ph10 667 #define _pcre_utf8_char_sizes utf8_char_sizes
122 nigel 85 #define _pcre_utt utt
123     #define _pcre_utt_size utt_size
124 ph10 240 #define _pcre_utt_names utt_names
125 nigel 85 #define _pcre_OP_lengths OP_lengths
126    
127     #include "pcre_tables.c"
128    
129     /* We also need the pcre_printint() function for printing out compiled
130     patterns. This function is in a separate file so that it can be included in
131 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 ph10 498 know which case is being compiled. */
133 nigel 85
134 ph10 498 #define COMPILING_PCRETEST
135     #include "pcre_printint.src"
136    
137     /* The definition of the macro PRINTABLE, which determines whether to print an
138 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
139 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
140     locale has not been explicitly changed, so as to get consistent output from
141     systems that differ in their output from isprint() even in the "C" locale. */
142 nigel 93
143     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144 nigel 85
145 nigel 37 /* It is possible to compile this test program without including support for
146     testing the POSIX interface, though this is not available via the standard
147     Makefile. */
148    
149     #if !defined NOPOSIX
150 nigel 3 #include "pcreposix.h"
151 nigel 37 #endif
152 nigel 3
153 ph10 107 /* It is also possible, for the benefit of the version currently imported into
154     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155     interface to the DFA matcher (NODFA), and without the doublecheck of the old
156     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157     UTF8 support if PCRE is built without it. */
158 nigel 79
159 ph10 107 #ifndef SUPPORT_UTF8
160     #ifndef NOUTF8
161     #define NOUTF8
162     #endif
163     #endif
164 nigel 79
165 ph10 107
166 nigel 85 /* Other parameters */
167    
168 nigel 3 #ifndef CLOCKS_PER_SEC
169     #ifdef CLK_TCK
170     #define CLOCKS_PER_SEC CLK_TCK
171     #else
172     #define CLOCKS_PER_SEC 100
173     #endif
174     #endif
175    
176 nigel 93 /* This is the default loop count for timing. */
177    
178 nigel 75 #define LOOPREPEAT 500000
179 nigel 3
180 nigel 85 /* Static variables */
181    
182 nigel 3 static FILE *outfile;
183     static int log_store = 0;
184 nigel 63 static int callout_count;
185     static int callout_extra;
186     static int callout_fail_count;
187     static int callout_fail_id;
188 ph10 210 static int debug_lengths;
189 nigel 63 static int first_callout;
190 nigel 93 static int locale_set = 0;
191 nigel 73 static int show_malloc;
192 nigel 67 static int use_utf8;
193 nigel 43 static size_t gotten_store;
194 ph10 645 static const unsigned char *last_callout_mark = NULL;
195 nigel 3
196 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
197    
198     static int buffer_size = 50000;
199     static uschar *buffer = NULL;
200     static uschar *dbuffer = NULL;
201 nigel 75 static uschar *pbuffer = NULL;
202 nigel 3
203 ph10 598 /* Textual explanations for runtime error codes */
204 nigel 75
205 ph10 598 static const char *errtexts[] = {
206     NULL, /* 0 is no error */
207     NULL, /* NOMATCH is handled specially */
208     "NULL argument passed",
209     "bad option value",
210     "magic number missing",
211     "unknown opcode - pattern overwritten?",
212     "no more memory",
213 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 ph10 598 "match limit exceeded",
215     "callout error code",
216     NULL, /* BADUTF8 is handled specially */
217     "bad UTF-8 offset",
218     NULL, /* PARTIAL is handled specially */
219     "not used - internal error",
220     "internal error - pattern overwritten?",
221     "bad count value",
222     "item unsupported for DFA matching",
223     "backreference condition or recursion test not supported for DFA matching",
224     "match limit not supported for DFA matching",
225     "workspace size exceeded in DFA matching",
226 ph10 654 "too much recursion for DFA matching",
227 ph10 598 "recursion limit exceeded",
228     "not used - internal error",
229     "invalid combination of newline options",
230     "bad offset value",
231 ph10 642 NULL, /* SHORTUTF8 is handled specially */
232 ph10 676 "nested recursion at the same subject position",
233     "JIT stack limit reached"
234 ph10 598 };
235    
236 ph10 654
237 ph10 541 /*************************************************
238     * Alternate character tables *
239     *************************************************/
240 nigel 49
241 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242     using the default tables of the library. However, the T option can be used to
243     select alternate sets of tables, for different kinds of testing. Note also that
244 ph10 541 the L (locale) option also adjusts the tables. */
245    
246 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
247 ph10 541 only ASCII characters. */
248    
249     static const unsigned char tables0[] = {
250    
251     /* This table is a lower casing table. */
252    
253     0, 1, 2, 3, 4, 5, 6, 7,
254     8, 9, 10, 11, 12, 13, 14, 15,
255     16, 17, 18, 19, 20, 21, 22, 23,
256     24, 25, 26, 27, 28, 29, 30, 31,
257     32, 33, 34, 35, 36, 37, 38, 39,
258     40, 41, 42, 43, 44, 45, 46, 47,
259     48, 49, 50, 51, 52, 53, 54, 55,
260     56, 57, 58, 59, 60, 61, 62, 63,
261     64, 97, 98, 99,100,101,102,103,
262     104,105,106,107,108,109,110,111,
263     112,113,114,115,116,117,118,119,
264     120,121,122, 91, 92, 93, 94, 95,
265     96, 97, 98, 99,100,101,102,103,
266     104,105,106,107,108,109,110,111,
267     112,113,114,115,116,117,118,119,
268     120,121,122,123,124,125,126,127,
269     128,129,130,131,132,133,134,135,
270     136,137,138,139,140,141,142,143,
271     144,145,146,147,148,149,150,151,
272     152,153,154,155,156,157,158,159,
273     160,161,162,163,164,165,166,167,
274     168,169,170,171,172,173,174,175,
275     176,177,178,179,180,181,182,183,
276     184,185,186,187,188,189,190,191,
277     192,193,194,195,196,197,198,199,
278     200,201,202,203,204,205,206,207,
279     208,209,210,211,212,213,214,215,
280     216,217,218,219,220,221,222,223,
281     224,225,226,227,228,229,230,231,
282     232,233,234,235,236,237,238,239,
283     240,241,242,243,244,245,246,247,
284     248,249,250,251,252,253,254,255,
285    
286     /* This table is a case flipping table. */
287    
288     0, 1, 2, 3, 4, 5, 6, 7,
289     8, 9, 10, 11, 12, 13, 14, 15,
290     16, 17, 18, 19, 20, 21, 22, 23,
291     24, 25, 26, 27, 28, 29, 30, 31,
292     32, 33, 34, 35, 36, 37, 38, 39,
293     40, 41, 42, 43, 44, 45, 46, 47,
294     48, 49, 50, 51, 52, 53, 54, 55,
295     56, 57, 58, 59, 60, 61, 62, 63,
296     64, 97, 98, 99,100,101,102,103,
297     104,105,106,107,108,109,110,111,
298     112,113,114,115,116,117,118,119,
299     120,121,122, 91, 92, 93, 94, 95,
300     96, 65, 66, 67, 68, 69, 70, 71,
301     72, 73, 74, 75, 76, 77, 78, 79,
302     80, 81, 82, 83, 84, 85, 86, 87,
303     88, 89, 90,123,124,125,126,127,
304     128,129,130,131,132,133,134,135,
305     136,137,138,139,140,141,142,143,
306     144,145,146,147,148,149,150,151,
307     152,153,154,155,156,157,158,159,
308     160,161,162,163,164,165,166,167,
309     168,169,170,171,172,173,174,175,
310     176,177,178,179,180,181,182,183,
311     184,185,186,187,188,189,190,191,
312     192,193,194,195,196,197,198,199,
313     200,201,202,203,204,205,206,207,
314     208,209,210,211,212,213,214,215,
315     216,217,218,219,220,221,222,223,
316     224,225,226,227,228,229,230,231,
317     232,233,234,235,236,237,238,239,
318     240,241,242,243,244,245,246,247,
319     248,249,250,251,252,253,254,255,
320    
321     /* This table contains bit maps for various character classes. Each map is 32
322     bytes long and the bits run from the least significant end of each byte. The
323     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324     graph, print, punct, and cntrl. Other classes are built from combinations. */
325    
326     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375    
376     /* This table identifies various classes of character by individual bits:
377     0x01 white space character
378     0x02 letter
379     0x04 decimal digit
380     0x08 hexadecimal digit
381     0x10 alphanumeric or '_'
382     0x80 regular expression metacharacter or binary zero
383     */
384    
385     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
386     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
387     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
388     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
389     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
390     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
391     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
392     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
393     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
395     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
396     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
397     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
398     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
399     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
400     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417    
418 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
419     be at least an approximation of ISO 8859. In particular, there are characters
420 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
421    
422     static const unsigned char tables1[] = {
423     0,1,2,3,4,5,6,7,
424     8,9,10,11,12,13,14,15,
425     16,17,18,19,20,21,22,23,
426     24,25,26,27,28,29,30,31,
427     32,33,34,35,36,37,38,39,
428     40,41,42,43,44,45,46,47,
429     48,49,50,51,52,53,54,55,
430     56,57,58,59,60,61,62,63,
431     64,97,98,99,100,101,102,103,
432     104,105,106,107,108,109,110,111,
433     112,113,114,115,116,117,118,119,
434     120,121,122,91,92,93,94,95,
435     96,97,98,99,100,101,102,103,
436     104,105,106,107,108,109,110,111,
437     112,113,114,115,116,117,118,119,
438     120,121,122,123,124,125,126,127,
439     128,129,130,131,132,133,134,135,
440     136,137,138,139,140,141,142,143,
441     144,145,146,147,148,149,150,151,
442     152,153,154,155,156,157,158,159,
443     160,161,162,163,164,165,166,167,
444     168,169,170,171,172,173,174,175,
445     176,177,178,179,180,181,182,183,
446     184,185,186,187,188,189,190,191,
447     224,225,226,227,228,229,230,231,
448     232,233,234,235,236,237,238,239,
449     240,241,242,243,244,245,246,215,
450     248,249,250,251,252,253,254,223,
451     224,225,226,227,228,229,230,231,
452     232,233,234,235,236,237,238,239,
453     240,241,242,243,244,245,246,247,
454     248,249,250,251,252,253,254,255,
455     0,1,2,3,4,5,6,7,
456     8,9,10,11,12,13,14,15,
457     16,17,18,19,20,21,22,23,
458     24,25,26,27,28,29,30,31,
459     32,33,34,35,36,37,38,39,
460     40,41,42,43,44,45,46,47,
461     48,49,50,51,52,53,54,55,
462     56,57,58,59,60,61,62,63,
463     64,97,98,99,100,101,102,103,
464     104,105,106,107,108,109,110,111,
465     112,113,114,115,116,117,118,119,
466     120,121,122,91,92,93,94,95,
467     96,65,66,67,68,69,70,71,
468     72,73,74,75,76,77,78,79,
469     80,81,82,83,84,85,86,87,
470     88,89,90,123,124,125,126,127,
471     128,129,130,131,132,133,134,135,
472     136,137,138,139,140,141,142,143,
473     144,145,146,147,148,149,150,151,
474     152,153,154,155,156,157,158,159,
475     160,161,162,163,164,165,166,167,
476     168,169,170,171,172,173,174,175,
477     176,177,178,179,180,181,182,183,
478     184,185,186,187,188,189,190,191,
479     224,225,226,227,228,229,230,231,
480     232,233,234,235,236,237,238,239,
481     240,241,242,243,244,245,246,215,
482     248,249,250,251,252,253,254,223,
483     192,193,194,195,196,197,198,199,
484     200,201,202,203,204,205,206,207,
485     208,209,210,211,212,213,214,247,
486     216,217,218,219,220,221,222,255,
487     0,62,0,0,1,0,0,0,
488     0,0,0,0,0,0,0,0,
489     32,0,0,0,1,0,0,0,
490     0,0,0,0,0,0,0,0,
491     0,0,0,0,0,0,255,3,
492     126,0,0,0,126,0,0,0,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     0,0,0,0,0,0,255,3,
496     0,0,0,0,0,0,0,0,
497     0,0,0,0,0,0,12,2,
498     0,0,0,0,0,0,0,0,
499     0,0,0,0,0,0,0,0,
500     254,255,255,7,0,0,0,0,
501     0,0,0,0,0,0,0,0,
502     255,255,127,127,0,0,0,0,
503     0,0,0,0,0,0,0,0,
504     0,0,0,0,254,255,255,7,
505     0,0,0,0,0,4,32,4,
506     0,0,0,128,255,255,127,255,
507     0,0,0,0,0,0,255,3,
508     254,255,255,135,254,255,255,7,
509     0,0,0,0,0,4,44,6,
510     255,255,127,255,255,255,127,255,
511     0,0,0,0,254,255,255,255,
512     255,255,255,255,255,255,255,127,
513     0,0,0,0,254,255,255,255,
514     255,255,255,255,255,255,255,255,
515     0,2,0,0,255,255,255,255,
516     255,255,255,255,255,255,255,127,
517     0,0,0,0,255,255,255,255,
518     255,255,255,255,255,255,255,255,
519     0,0,0,0,254,255,0,252,
520     1,0,0,248,1,0,0,120,
521     0,0,0,0,254,255,255,255,
522     0,0,128,0,0,0,128,0,
523     255,255,255,255,0,0,0,0,
524     0,0,0,0,0,0,0,128,
525     255,255,255,255,0,0,0,0,
526     0,0,0,0,0,0,0,0,
527     128,0,0,0,0,0,0,0,
528     0,1,1,0,1,1,0,0,
529     0,0,0,0,0,0,0,0,
530     0,0,0,0,0,0,0,0,
531     1,0,0,0,128,0,0,0,
532     128,128,128,128,0,0,128,0,
533     28,28,28,28,28,28,28,28,
534     28,28,0,0,0,0,0,128,
535     0,26,26,26,26,26,26,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,18,18,18,18,18,
538     18,18,18,128,128,0,128,16,
539     0,26,26,26,26,26,26,18,
540     18,18,18,18,18,18,18,18,
541     18,18,18,18,18,18,18,18,
542     18,18,18,128,128,0,0,0,
543     0,0,0,0,0,1,0,0,
544     0,0,0,0,0,0,0,0,
545     0,0,0,0,0,0,0,0,
546     0,0,0,0,0,0,0,0,
547     1,0,0,0,0,0,0,0,
548     0,0,18,0,0,0,0,0,
549     0,0,20,20,0,18,0,0,
550     0,20,18,0,0,0,0,0,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,18,
553     18,18,18,18,18,18,18,0,
554     18,18,18,18,18,18,18,18,
555     18,18,18,18,18,18,18,18,
556     18,18,18,18,18,18,18,18,
557     18,18,18,18,18,18,18,0,
558     18,18,18,18,18,18,18,18
559     };
560    
561    
562    
563 ph10 558
564     #ifndef HAVE_STRERROR
565 nigel 49 /*************************************************
566 ph10 558 * Provide strerror() for non-ANSI libraries *
567     *************************************************/
568    
569     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570     in their libraries, but can provide the same facility by this simple
571     alternative function. */
572    
573     extern int sys_nerr;
574     extern char *sys_errlist[];
575    
576     char *
577     strerror(int n)
578     {
579     if (n < 0 || n >= sys_nerr) return "unknown error number";
580     return sys_errlist[n];
581     }
582     #endif /* HAVE_STRERROR */
583    
584    
585 ph10 667 /*************************************************
586     * JIT memory callback *
587     *************************************************/
588 ph10 558
589 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
590     {
591     return (pcre_jit_stack *)arg;
592     }
593 ph10 558
594 ph10 667
595 ph10 558 /*************************************************
596 nigel 91 * Read or extend an input line *
597     *************************************************/
598    
599     /* Input lines are read into buffer, but both patterns and data lines can be
600     continued over multiple input lines. In addition, if the buffer fills up, we
601     want to automatically expand it so as to be able to handle extremely large
602     lines that are needed for certain stress tests. When the input buffer is
603     expanded, the other two buffers must also be expanded likewise, and the
604     contents of pbuffer, which are a copy of the input for callouts, must be
605     preserved (for when expansion happens for a data line). This is not the most
606     optimal way of handling this, but hey, this is just a test program!
607    
608     Arguments:
609     f the file to read
610     start where in buffer to start (this *must* be within buffer)
611 ph10 287 prompt for stdin or readline()
612 nigel 91
613     Returns: pointer to the start of new data
614     could be a copy of start, or could be moved
615     NULL if no data read and EOF reached
616     */
617    
618     static uschar *
619 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
620 nigel 91 {
621     uschar *here = start;
622    
623     for (;;)
624     {
625 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
626 nigel 93
627 nigel 91 if (rlen > 1000)
628     {
629     int dlen;
630 ph10 289
631 ph10 287 /* If libreadline support is required, use readline() to read a line if the
632     input is a terminal. Note that readline() removes the trailing newline, so
633     we must put it back again, to be compatible with fgets(). */
634 ph10 289
635 ph10 287 #ifdef SUPPORT_LIBREADLINE
636     if (isatty(fileno(f)))
637     {
638 ph10 289 size_t len;
639 ph10 287 char *s = readline(prompt);
640     if (s == NULL) return (here == start)? NULL : start;
641     len = strlen(s);
642 ph10 289 if (len > 0) add_history(s);
643 ph10 287 if (len > rlen - 1) len = rlen - 1;
644     memcpy(here, s, len);
645     here[len] = '\n';
646 ph10 289 here[len+1] = 0;
647     free(s);
648 ph10 287 }
649 ph10 289 else
650     #endif
651    
652 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
653 ph10 289
654 ph10 287 {
655 ph10 516 if (f == stdin) printf("%s", prompt);
656 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
657     return (here == start)? NULL : start;
658 ph10 289 }
659    
660 nigel 91 dlen = (int)strlen((char *)here);
661     if (dlen > 0 && here[dlen - 1] == '\n') return start;
662     here += dlen;
663     }
664    
665     else
666     {
667     int new_buffer_size = 2*buffer_size;
668     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
669     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671    
672     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673     {
674     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
675     exit(1);
676     }
677    
678     memcpy(new_buffer, buffer, buffer_size);
679     memcpy(new_pbuffer, pbuffer, buffer_size);
680    
681     buffer_size = new_buffer_size;
682    
683     start = new_buffer + (start - buffer);
684     here = new_buffer + (here - buffer);
685    
686     free(buffer);
687     free(dbuffer);
688     free(pbuffer);
689    
690     buffer = new_buffer;
691     dbuffer = new_dbuffer;
692     pbuffer = new_pbuffer;
693     }
694     }
695    
696     return NULL; /* Control never gets here */
697     }
698    
699    
700    
701    
702    
703    
704    
705     /*************************************************
706 nigel 63 * Read number from string *
707     *************************************************/
708    
709     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
710     around with conditional compilation, just do the job by hand. It is only used
711 nigel 93 for unpicking arguments, so just keep it simple.
712 nigel 63
713     Arguments:
714     str string to be converted
715     endptr where to put the end pointer
716    
717     Returns: the unsigned long
718     */
719    
720     static int
721     get_value(unsigned char *str, unsigned char **endptr)
722     {
723     int result = 0;
724     while(*str != 0 && isspace(*str)) str++;
725     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
726     *endptr = str;
727     return(result);
728     }
729    
730    
731    
732 nigel 49
733     /*************************************************
734     * Convert UTF-8 string to value *
735     *************************************************/
736    
737     /* This function takes one or more bytes that represents a UTF-8 character,
738     and returns the value of the character.
739    
740     Argument:
741 nigel 91 utf8bytes a pointer to the byte vector
742     vptr a pointer to an int to receive the value
743 nigel 49
744 nigel 91 Returns: > 0 => the number of bytes consumed
745     -6 to 0 => malformed UTF-8 character at offset = (-return)
746 nigel 49 */
747    
748 nigel 79 #if !defined NOUTF8
749    
750 nigel 67 static int
751 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
752 nigel 49 {
753 nigel 91 int c = *utf8bytes++;
754 nigel 49 int d = c;
755     int i, j, s;
756    
757     for (i = -1; i < 6; i++) /* i is number of additional bytes */
758     {
759     if ((d & 0x80) == 0) break;
760     d <<= 1;
761     }
762    
763     if (i == -1) { *vptr = c; return 1; } /* ascii character */
764     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
765    
766     /* i now has a value in the range 1-5 */
767    
768 nigel 59 s = 6*i;
769 nigel 85 d = (c & utf8_table3[i]) << s;
770 nigel 49
771     for (j = 0; j < i; j++)
772     {
773 nigel 91 c = *utf8bytes++;
774 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
775 nigel 59 s -= 6;
776 nigel 49 d |= (c & 0x3f) << s;
777     }
778    
779     /* Check that encoding was the correct unique one */
780    
781 nigel 85 for (j = 0; j < utf8_table1_size; j++)
782     if (d <= utf8_table1[j]) break;
783 nigel 49 if (j != i) return -(i+1);
784    
785     /* Valid value */
786    
787     *vptr = d;
788     return i+1;
789     }
790    
791 nigel 79 #endif
792 nigel 49
793    
794 nigel 79
795 nigel 63 /*************************************************
796 nigel 85 * Convert character value to UTF-8 *
797     *************************************************/
798    
799     /* This function takes an integer value in the range 0 - 0x7fffffff
800     and encodes it as a UTF-8 character in 0 to 6 bytes.
801    
802     Arguments:
803     cvalue the character value
804 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
805 nigel 85
806     Returns: number of characters placed in the buffer
807     */
808    
809 nigel 93 #if !defined NOUTF8
810    
811 nigel 85 static int
812 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
813 nigel 85 {
814     register int i, j;
815     for (i = 0; i < utf8_table1_size; i++)
816     if (cvalue <= utf8_table1[i]) break;
817 nigel 91 utf8bytes += i;
818 nigel 85 for (j = i; j > 0; j--)
819     {
820 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
821 nigel 85 cvalue >>= 6;
822     }
823 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
824 nigel 85 return i + 1;
825     }
826    
827 nigel 93 #endif
828 nigel 85
829    
830 nigel 93
831 nigel 85 /*************************************************
832 nigel 63 * Print character string *
833     *************************************************/
834 nigel 49
835 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
836     mode. Yields number of characters printed. If handed a NULL file, just counts
837     chars without printing. */
838 nigel 49
839 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
840 nigel 3 {
841 nigel 85 int c = 0;
842 nigel 63 int yield = 0;
843 nigel 3
844 nigel 63 while (length-- > 0)
845 nigel 3 {
846 nigel 79 #if !defined NOUTF8
847 nigel 67 if (use_utf8)
848 nigel 63 {
849     int rc = utf82ord(p, &c);
850 nigel 3
851 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
852     {
853     length -= rc - 1;
854     p += rc;
855 nigel 93 if (PRINTHEX(c))
856 nigel 63 {
857     if (f != NULL) fprintf(f, "%c", c);
858     yield++;
859     }
860     else
861     {
862 nigel 93 int n = 4;
863     if (f != NULL) fprintf(f, "\\x{%02x}", c);
864     yield += (n <= 0x000000ff)? 2 :
865     (n <= 0x00000fff)? 3 :
866     (n <= 0x0000ffff)? 4 :
867     (n <= 0x000fffff)? 5 : 6;
868 nigel 63 }
869     continue;
870     }
871     }
872 nigel 79 #endif
873 nigel 3
874 nigel 63 /* Not UTF-8, or malformed UTF-8 */
875    
876 nigel 93 c = *p++;
877     if (PRINTHEX(c))
878 nigel 3 {
879 nigel 63 if (f != NULL) fprintf(f, "%c", c);
880     yield++;
881 nigel 3 }
882 nigel 63 else
883 nigel 3 {
884 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
885     yield += 4;
886     }
887     }
888 nigel 3
889 nigel 63 return yield;
890     }
891 nigel 23
892 nigel 3
893 nigel 23
894 nigel 63 /*************************************************
895     * Callout function *
896     *************************************************/
897 nigel 3
898 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
899     the match. Yield zero unless more callouts than the fail count, or the callout
900     data is not zero. */
901 nigel 3
902 nigel 63 static int callout(pcre_callout_block *cb)
903     {
904     FILE *f = (first_callout | callout_extra)? outfile : NULL;
905 nigel 75 int i, pre_start, post_start, subject_length;
906 nigel 3
907 nigel 63 if (callout_extra)
908     {
909     fprintf(f, "Callout %d: last capture = %d\n",
910     cb->callout_number, cb->capture_last);
911 nigel 3
912 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
913     {
914     if (cb->offset_vector[i] < 0)
915     fprintf(f, "%2d: <unset>\n", i/2);
916     else
917     {
918     fprintf(f, "%2d: ", i/2);
919     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
920     cb->offset_vector[i+1] - cb->offset_vector[i], f);
921     fprintf(f, "\n");
922     }
923     }
924     }
925 nigel 3
926 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
927     datails. On subsequent calls in the same match, we use pchars just to find the
928     printed lengths of the substrings. */
929 nigel 3
930 nigel 63 if (f != NULL) fprintf(f, "--->");
931 nigel 3
932 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
933     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
934     cb->current_position - cb->start_match, f);
935 nigel 3
936 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
937    
938 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
939     cb->subject_length - cb->current_position, f);
940 nigel 3
941 nigel 63 if (f != NULL) fprintf(f, "\n");
942 nigel 9
943 nigel 63 /* Always print appropriate indicators, with callout number if not already
944 nigel 75 shown. For automatic callouts, show the pattern offset. */
945 nigel 3
946 nigel 75 if (cb->callout_number == 255)
947     {
948     fprintf(outfile, "%+3d ", cb->pattern_position);
949     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
950     }
951     else
952     {
953     if (callout_extra) fprintf(outfile, " ");
954     else fprintf(outfile, "%3d ", cb->callout_number);
955     }
956 nigel 3
957 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
958     fprintf(outfile, "^");
959 nigel 3
960 nigel 63 if (post_start > 0)
961     {
962     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
963     fprintf(outfile, "^");
964 nigel 3 }
965    
966 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
967     fprintf(outfile, " ");
968    
969     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
970     pbuffer + cb->pattern_position);
971    
972 nigel 63 fprintf(outfile, "\n");
973     first_callout = 0;
974 nigel 3
975 ph10 654 if (cb->mark != last_callout_mark)
976 ph10 645 {
977 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
978 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979 ph10 654 last_callout_mark = cb->mark;
980     }
981 ph10 645
982 nigel 71 if (cb->callout_data != NULL)
983 nigel 49 {
984 nigel 71 int callout_data = *((int *)(cb->callout_data));
985     if (callout_data != 0)
986     {
987     fprintf(outfile, "Callout data = %d\n", callout_data);
988     return callout_data;
989     }
990 nigel 63 }
991 nigel 49
992 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
993     (++callout_count >= callout_fail_count)? 1 : 0;
994 nigel 3 }
995    
996    
997 nigel 63 /*************************************************
998 nigel 73 * Local malloc functions *
999 nigel 63 *************************************************/
1000 nigel 3
1001 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1002     compiled re. The show_malloc variable is set only during matching. */
1003 nigel 3
1004     static void *new_malloc(size_t size)
1005     {
1006 nigel 73 void *block = malloc(size);
1007 nigel 43 gotten_store = size;
1008 nigel 73 if (show_malloc)
1009 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1010 nigel 73 return block;
1011 nigel 3 }
1012    
1013 nigel 73 static void new_free(void *block)
1014     {
1015     if (show_malloc)
1016     fprintf(outfile, "free %p\n", block);
1017     free(block);
1018     }
1019 nigel 3
1020 nigel 73 /* For recursion malloc/free, to test stacking calls */
1021    
1022     static void *stack_malloc(size_t size)
1023     {
1024     void *block = malloc(size);
1025     if (show_malloc)
1026 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1027 nigel 73 return block;
1028     }
1029    
1030     static void stack_free(void *block)
1031     {
1032     if (show_malloc)
1033     fprintf(outfile, "stack_free %p\n", block);
1034     free(block);
1035     }
1036    
1037    
1038 nigel 63 /*************************************************
1039     * Call pcre_fullinfo() *
1040     *************************************************/
1041 nigel 43
1042     /* Get one piece of information from the pcre_fullinfo() function */
1043    
1044     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1045     {
1046     int rc;
1047     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1048     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1049     }
1050    
1051    
1052    
1053 nigel 63 /*************************************************
1054 ph10 674 * Check for supported JIT architecture *
1055     *************************************************/
1056    
1057     /* If it won't JIT-compile a very simple regex, return FALSE. */
1058    
1059     static int check_jit_arch(void)
1060     {
1061     const char *error;
1062     int erroffset, rc;
1063     pcre *re = pcre_compile("abc", 0, &error, &erroffset, NULL);
1064     pcre_extra *extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
1065     rc = extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1066     extra->executable_jit != NULL;
1067     pcre_free_study(extra);
1068     free(re);
1069     return rc;
1070     }
1071    
1072    
1073     /*************************************************
1074 nigel 75 * Byte flipping function *
1075     *************************************************/
1076    
1077 nigel 91 static unsigned long int
1078     byteflip(unsigned long int value, int n)
1079 nigel 75 {
1080     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1081     return ((value & 0x000000ff) << 24) |
1082     ((value & 0x0000ff00) << 8) |
1083     ((value & 0x00ff0000) >> 8) |
1084     ((value & 0xff000000) >> 24);
1085     }
1086    
1087    
1088    
1089    
1090     /*************************************************
1091 nigel 87 * Check match or recursion limit *
1092     *************************************************/
1093    
1094     static int
1095     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1096     int start_offset, int options, int *use_offsets, int use_size_offsets,
1097     int flag, unsigned long int *limit, int errnumber, const char *msg)
1098     {
1099     int count;
1100     int min = 0;
1101     int mid = 64;
1102     int max = -1;
1103    
1104     extra->flags |= flag;
1105    
1106     for (;;)
1107     {
1108     *limit = mid;
1109    
1110     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1111     use_offsets, use_size_offsets);
1112    
1113     if (count == errnumber)
1114     {
1115     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1116     min = mid;
1117     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1118     }
1119    
1120     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1121     count == PCRE_ERROR_PARTIAL)
1122     {
1123     if (mid == min + 1)
1124     {
1125     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1126     break;
1127     }
1128     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1129     max = mid;
1130     mid = (min + mid)/2;
1131     }
1132     else break; /* Some other error */
1133     }
1134    
1135     extra->flags &= ~flag;
1136     return count;
1137     }
1138    
1139    
1140    
1141     /*************************************************
1142 ph10 227 * Case-independent strncmp() function *
1143     *************************************************/
1144    
1145     /*
1146     Arguments:
1147     s first string
1148     t second string
1149     n number of characters to compare
1150    
1151     Returns: < 0, = 0, or > 0, according to the comparison
1152     */
1153    
1154     static int
1155     strncmpic(uschar *s, uschar *t, int n)
1156     {
1157     while (n--)
1158     {
1159     int c = tolower(*s++) - tolower(*t++);
1160     if (c) return c;
1161     }
1162     return 0;
1163     }
1164    
1165    
1166    
1167     /*************************************************
1168 nigel 91 * Check newline indicator *
1169     *************************************************/
1170    
1171 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1172     a message and return 0 if there is no match.
1173 nigel 91
1174     Arguments:
1175     p points after the leading '<'
1176     f file for error message
1177    
1178     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1179     */
1180    
1181     static int
1182     check_newline(uschar *p, FILE *f)
1183     {
1184 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1185     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1186     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1187     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1188     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1189 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1190     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1191 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1192     return 0;
1193     }
1194    
1195    
1196    
1197     /*************************************************
1198 nigel 93 * Usage function *
1199     *************************************************/
1200    
1201     static void
1202     usage(void)
1203     {
1204 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1205     printf("Input and output default to stdin and stdout.\n");
1206     #ifdef SUPPORT_LIBREADLINE
1207     printf("If input is a terminal, readline() is used to read from it.\n");
1208     #else
1209     printf("This version of pcretest is not linked with readline().\n");
1210     #endif
1211     printf("\nOptions:\n");
1212 nigel 93 printf(" -b show compiled code (bytecode)\n");
1213     printf(" -C show PCRE compile-time options and exit\n");
1214     printf(" -d debug: show compiled code and information (-b and -i)\n");
1215     #if !defined NODFA
1216     printf(" -dfa force DFA matching for all subjects\n");
1217     #endif
1218     printf(" -help show usage information\n");
1219     printf(" -i show information about compiled patterns\n"
1220 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1221 nigel 93 " -m output memory used information\n"
1222     " -o <n> set size of offsets vector to <n>\n");
1223     #if !defined NOPOSIX
1224     printf(" -p use POSIX interface\n");
1225     #endif
1226     printf(" -q quiet: do not output PCRE version number at start\n");
1227     printf(" -S <n> set stack size to <n> megabytes\n");
1228 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1229     " -s+ force each pattern to be studied, using JIT if available\n"
1230 nigel 93 " -t time compilation and execution\n");
1231     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1232     printf(" -tm time execution (matching) only\n");
1233     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1234     }
1235    
1236    
1237    
1238     /*************************************************
1239 nigel 63 * Main Program *
1240     *************************************************/
1241 nigel 43
1242 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1243     consist of a regular expression, in delimiters and optionally followed by
1244     options, followed by a set of test data, terminated by an empty line. */
1245    
1246     int main(int argc, char **argv)
1247     {
1248     FILE *infile = stdin;
1249     int options = 0;
1250     int study_options = 0;
1251 ph10 386 int default_find_match_limit = FALSE;
1252 nigel 3 int op = 1;
1253     int timeit = 0;
1254 nigel 93 int timeitm = 0;
1255 nigel 3 int showinfo = 0;
1256 nigel 31 int showstore = 0;
1257 ph10 667 int force_study = -1;
1258     int force_study_options = 0;
1259 nigel 87 int quiet = 0;
1260 nigel 53 int size_offsets = 45;
1261     int size_offsets_max;
1262 nigel 77 int *offsets = NULL;
1263 nigel 53 #if !defined NOPOSIX
1264 nigel 3 int posix = 0;
1265 nigel 53 #endif
1266 nigel 3 int debug = 0;
1267 nigel 11 int done = 0;
1268 nigel 77 int all_use_dfa = 0;
1269     int yield = 0;
1270 nigel 91 int stack_size;
1271 nigel 3
1272 ph10 667 pcre_jit_stack *jit_stack = NULL;
1273    
1274    
1275 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1276     that 1024 is plenty long enough for the few names we'll be testing. */
1277 nigel 69
1278 nigel 91 uschar copynames[1024];
1279     uschar getnames[1024];
1280    
1281     uschar *copynamesptr;
1282     uschar *getnamesptr;
1283    
1284 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1285 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1286 nigel 69
1287 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1288     dbuffer = (unsigned char *)malloc(buffer_size);
1289     pbuffer = (unsigned char *)malloc(buffer_size);
1290 nigel 69
1291 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1292 nigel 3
1293 nigel 93 outfile = stdout;
1294    
1295     /* The following _setmode() stuff is some Windows magic that tells its runtime
1296     library to translate CRLF into a single LF character. At least, that's what
1297     I've been told: never having used Windows I take this all on trust. Originally
1298     it set 0x8000, but then I was advised that _O_BINARY was better. */
1299    
1300 nigel 75 #if defined(_WIN32) || defined(WIN32)
1301 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1302     #endif
1303 nigel 75
1304 nigel 3 /* Scan options */
1305    
1306     while (argc > 1 && argv[op][0] == '-')
1307     {
1308 nigel 63 unsigned char *endptr;
1309 nigel 53
1310 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1311 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1312     else if (strcmp(argv[op], "-s+") == 0)
1313     {
1314     force_study = 1;
1315     force_study_options = PCRE_STUDY_JIT_COMPILE;
1316     }
1317 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1318 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1319 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1320     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1321 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1322 nigel 79 #if !defined NODFA
1323 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1324 nigel 79 #endif
1325 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1326 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1327     *endptr == 0))
1328 nigel 53 {
1329     op++;
1330     argc--;
1331     }
1332 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1333     {
1334     int both = argv[op][2] == 0;
1335     int temp;
1336     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1337     *endptr == 0))
1338     {
1339     timeitm = temp;
1340     op++;
1341     argc--;
1342     }
1343     else timeitm = LOOPREPEAT;
1344     if (both) timeit = timeitm;
1345     }
1346 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1347     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1348     *endptr == 0))
1349     {
1350 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1351 nigel 91 printf("PCRE: -S not supported on this OS\n");
1352     exit(1);
1353     #else
1354     int rc;
1355     struct rlimit rlim;
1356     getrlimit(RLIMIT_STACK, &rlim);
1357     rlim.rlim_cur = stack_size * 1024 * 1024;
1358     rc = setrlimit(RLIMIT_STACK, &rlim);
1359     if (rc != 0)
1360     {
1361     printf("PCRE: setrlimit() failed with error %d\n", rc);
1362     exit(1);
1363     }
1364     op++;
1365     argc--;
1366     #endif
1367     }
1368 nigel 53 #if !defined NOPOSIX
1369 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1370 nigel 53 #endif
1371 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1372     {
1373     int rc;
1374 ph10 392 unsigned long int lrc;
1375 nigel 63 printf("PCRE version %s\n", pcre_version());
1376     printf("Compiled with\n");
1377     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1378     printf(" %sUTF-8 support\n", rc? "" : "No ");
1379 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1380     printf(" %sUnicode properties support\n", rc? "" : "No ");
1381 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1382 ph10 674 if (rc)
1383     printf(" Just-in-time compiler support%s\n", check_jit_arch()?
1384     "" : " (but this architecture is unsupported)");
1385     else
1386     printf(" No just-in-time compiler support\n");
1387 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1388 ph10 391 /* Note that these values are always the ASCII values, even
1389 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1390 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1391     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1392 ph10 150 (rc == -2)? "ANYCRLF" :
1393 nigel 93 (rc == -1)? "ANY" : "???");
1394 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1395     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1396     "all Unicode newlines");
1397 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1398     printf(" Internal link size = %d\n", rc);
1399     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1400     printf(" POSIX malloc threshold = %d\n", rc);
1401 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1402     printf(" Default match limit = %ld\n", lrc);
1403     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1404     printf(" Default recursion depth limit = %ld\n", lrc);
1405 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1406     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1407 ph10 121 goto EXIT;
1408 nigel 63 }
1409 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1410     strcmp(argv[op], "--help") == 0)
1411     {
1412     usage();
1413     goto EXIT;
1414     }
1415 nigel 3 else
1416     {
1417 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1418 nigel 93 usage();
1419 nigel 77 yield = 1;
1420     goto EXIT;
1421 nigel 3 }
1422     op++;
1423     argc--;
1424     }
1425    
1426 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1427    
1428     size_offsets_max = size_offsets;
1429 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1430 nigel 53 if (offsets == NULL)
1431     {
1432     printf("** Failed to get %d bytes of memory for offsets vector\n",
1433 ph10 151 (int)(size_offsets_max * sizeof(int)));
1434 nigel 77 yield = 1;
1435     goto EXIT;
1436 nigel 53 }
1437    
1438 nigel 3 /* Sort out the input and output files */
1439    
1440     if (argc > 1)
1441     {
1442 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1443 nigel 3 if (infile == NULL)
1444     {
1445     printf("** Failed to open %s\n", argv[op]);
1446 nigel 77 yield = 1;
1447     goto EXIT;
1448 nigel 3 }
1449     }
1450    
1451     if (argc > 2)
1452     {
1453 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1454 nigel 3 if (outfile == NULL)
1455     {
1456     printf("** Failed to open %s\n", argv[op+1]);
1457 nigel 77 yield = 1;
1458     goto EXIT;
1459 nigel 3 }
1460     }
1461    
1462     /* Set alternative malloc function */
1463    
1464     pcre_malloc = new_malloc;
1465 nigel 73 pcre_free = new_free;
1466     pcre_stack_malloc = stack_malloc;
1467     pcre_stack_free = stack_free;
1468 nigel 3
1469 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1470 nigel 3
1471 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1472 nigel 3
1473     /* Main loop */
1474    
1475 nigel 11 while (!done)
1476 nigel 3 {
1477     pcre *re = NULL;
1478     pcre_extra *extra = NULL;
1479 nigel 37
1480     #if !defined NOPOSIX /* There are still compilers that require no indent */
1481 nigel 3 regex_t preg;
1482 nigel 45 int do_posix = 0;
1483 nigel 37 #endif
1484    
1485 nigel 7 const char *error;
1486 ph10 512 unsigned char *markptr;
1487 nigel 25 unsigned char *p, *pp, *ppp;
1488 nigel 75 unsigned char *to_file = NULL;
1489 nigel 53 const unsigned char *tables = NULL;
1490 nigel 75 unsigned long int true_size, true_study_size = 0;
1491     size_t size, regex_gotten_store;
1492 ph10 654 int do_allcaps = 0;
1493 ph10 512 int do_mark = 0;
1494 nigel 3 int do_study = 0;
1495 ph10 654 int no_force_study = 0;
1496 nigel 25 int do_debug = debug;
1497 nigel 35 int do_G = 0;
1498     int do_g = 0;
1499 nigel 25 int do_showinfo = showinfo;
1500 nigel 35 int do_showrest = 0;
1501 ph10 616 int do_showcaprest = 0;
1502 nigel 75 int do_flip = 0;
1503 nigel 93 int erroroffset, len, delimiter, poffset;
1504 nigel 3
1505 nigel 67 use_utf8 = 0;
1506 ph10 211 debug_lengths = 1;
1507 nigel 63
1508 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1509 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1510 nigel 63 fflush(outfile);
1511 nigel 3
1512     p = buffer;
1513     while (isspace(*p)) p++;
1514     if (*p == 0) continue;
1515    
1516 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1517 nigel 3
1518 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1519     {
1520 nigel 91 unsigned long int magic, get_options;
1521 nigel 75 uschar sbuf[8];
1522     FILE *f;
1523    
1524     p++;
1525     pp = p + (int)strlen((char *)p);
1526     while (isspace(pp[-1])) pp--;
1527     *pp = 0;
1528    
1529     f = fopen((char *)p, "rb");
1530     if (f == NULL)
1531     {
1532     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1533     continue;
1534     }
1535    
1536     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1537    
1538     true_size =
1539     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1540     true_study_size =
1541     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1542    
1543     re = (real_pcre *)new_malloc(true_size);
1544     regex_gotten_store = gotten_store;
1545    
1546     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1547    
1548     magic = ((real_pcre *)re)->magic_number;
1549     if (magic != MAGIC_NUMBER)
1550     {
1551     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1552     {
1553     do_flip = 1;
1554     }
1555     else
1556     {
1557     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1558     fclose(f);
1559     continue;
1560     }
1561     }
1562    
1563 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1564 nigel 75 do_flip? " (byte-inverted)" : "", p);
1565    
1566     /* Need to know if UTF-8 for printing data strings */
1567    
1568 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1569     use_utf8 = (get_options & PCRE_UTF8) != 0;
1570 nigel 75
1571 ph10 612 /* Now see if there is any following study data. */
1572 nigel 75
1573     if (true_study_size != 0)
1574     {
1575     pcre_study_data *psd;
1576    
1577     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1578     extra->flags = PCRE_EXTRA_STUDY_DATA;
1579    
1580     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1581     extra->study_data = psd;
1582    
1583     if (fread(psd, 1, true_study_size, f) != true_study_size)
1584     {
1585     FAIL_READ:
1586     fprintf(outfile, "Failed to read data from %s\n", p);
1587 ph10 667 if (extra != NULL) pcre_free_study(extra);
1588 nigel 75 if (re != NULL) new_free(re);
1589     fclose(f);
1590     continue;
1591     }
1592     fprintf(outfile, "Study data loaded from %s\n", p);
1593     do_study = 1; /* To get the data output if requested */
1594     }
1595     else fprintf(outfile, "No study data\n");
1596    
1597     fclose(f);
1598     goto SHOW_INFO;
1599     }
1600    
1601     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1602     the pattern; if is isn't complete, read more. */
1603    
1604 nigel 3 delimiter = *p++;
1605    
1606 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1607 nigel 3 {
1608 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1609 nigel 3 goto SKIP_DATA;
1610     }
1611    
1612     pp = p;
1613 ph10 530 poffset = (int)(p - buffer);
1614 nigel 3
1615     for(;;)
1616     {
1617 nigel 29 while (*pp != 0)
1618     {
1619     if (*pp == '\\' && pp[1] != 0) pp++;
1620     else if (*pp == delimiter) break;
1621     pp++;
1622     }
1623 nigel 3 if (*pp != 0) break;
1624 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1625 nigel 3 {
1626     fprintf(outfile, "** Unexpected EOF\n");
1627 nigel 11 done = 1;
1628     goto CONTINUE;
1629 nigel 3 }
1630 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1631 nigel 3 }
1632    
1633 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1634     pointer to the correct relative point in the buffer. */
1635    
1636     p = buffer + poffset;
1637    
1638 nigel 29 /* If the first character after the delimiter is backslash, make
1639     the pattern end with backslash. This is purely to provide a way
1640     of testing for the error message when a pattern ends with backslash. */
1641    
1642     if (pp[1] == '\\') *pp++ = '\\';
1643    
1644 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1645     for callouts. */
1646 nigel 3
1647     *pp++ = 0;
1648 nigel 75 strcpy((char *)pbuffer, (char *)p);
1649 nigel 3
1650     /* Look for options after final delimiter */
1651    
1652     options = 0;
1653 nigel 31 log_store = showstore; /* default from command line */
1654    
1655 nigel 3 while (*pp != 0)
1656     {
1657     switch (*pp++)
1658     {
1659 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1660 nigel 35 case 'g': do_g = 1; break;
1661 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1662     case 'm': options |= PCRE_MULTILINE; break;
1663     case 's': options |= PCRE_DOTALL; break;
1664     case 'x': options |= PCRE_EXTENDED; break;
1665 nigel 25
1666 ph10 616 case '+':
1667 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1668 ph10 616 break;
1669 ph10 654
1670     case '=': do_allcaps = 1; break;
1671 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1672 nigel 93 case 'B': do_debug = 1; break;
1673 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1674 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1675 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1676 nigel 75 case 'F': do_flip = 1; break;
1677 nigel 35 case 'G': do_G = 1; break;
1678 nigel 25 case 'I': do_showinfo = 1; break;
1679 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1680 ph10 512 case 'K': do_mark = 1; break;
1681 nigel 31 case 'M': log_store = 1; break;
1682 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1683 nigel 37
1684     #if !defined NOPOSIX
1685 nigel 3 case 'P': do_posix = 1; break;
1686 nigel 37 #endif
1687    
1688 ph10 654 case 'S':
1689 ph10 667 if (do_study == 0)
1690 ph10 612 {
1691 ph10 667 do_study = 1;
1692     if (*pp == '+')
1693     {
1694     study_options |= PCRE_STUDY_JIT_COMPILE;
1695     pp++;
1696     }
1697     }
1698     else
1699     {
1700 ph10 612 do_study = 0;
1701     no_force_study = 1;
1702 ph10 654 }
1703 ph10 612 break;
1704    
1705 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1706 ph10 535 case 'W': options |= PCRE_UCP; break;
1707 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1708 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1709 ph10 126 case 'Z': debug_lengths = 0; break;
1710 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1711 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1712 ph10 545
1713 ph10 541 case 'T':
1714     switch (*pp++)
1715     {
1716     case '0': tables = tables0; break;
1717     case '1': tables = tables1; break;
1718 ph10 545
1719 ph10 541 case '\r':
1720     case '\n':
1721 ph10 545 case ' ':
1722     case 0:
1723 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1724 ph10 545 goto SKIP_DATA;
1725    
1726     default:
1727 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1728 ph10 545 goto SKIP_DATA;
1729 ph10 541 }
1730 ph10 545 break;
1731 nigel 25
1732     case 'L':
1733     ppp = pp;
1734 nigel 93 /* The '\r' test here is so that it works on Windows. */
1735     /* The '0' test is just in case this is an unterminated line. */
1736     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1737 nigel 25 *ppp = 0;
1738     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1739     {
1740     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1741     goto SKIP_DATA;
1742     }
1743 nigel 93 locale_set = 1;
1744 nigel 25 tables = pcre_maketables();
1745     pp = ppp;
1746     break;
1747    
1748 nigel 75 case '>':
1749     to_file = pp;
1750     while (*pp != 0) pp++;
1751     while (isspace(pp[-1])) pp--;
1752     *pp = 0;
1753     break;
1754    
1755 nigel 91 case '<':
1756     {
1757 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1758 ph10 336 {
1759     options |= PCRE_JAVASCRIPT_COMPAT;
1760 ph10 345 pp += 3;
1761 ph10 336 }
1762     else
1763 ph10 345 {
1764 ph10 336 int x = check_newline(pp, outfile);
1765     if (x == 0) goto SKIP_DATA;
1766     options |= x;
1767     while (*pp++ != '>');
1768 ph10 345 }
1769 nigel 91 }
1770     break;
1771    
1772 nigel 77 case '\r': /* So that it works in Windows */
1773     case '\n':
1774     case ' ':
1775     break;
1776 nigel 75
1777 nigel 3 default:
1778     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1779     goto SKIP_DATA;
1780     }
1781     }
1782    
1783 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1784 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1785     local character tables. */
1786 nigel 3
1787 nigel 37 #if !defined NOPOSIX
1788 nigel 3 if (posix || do_posix)
1789     {
1790     int rc;
1791     int cflags = 0;
1792 nigel 75
1793 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1794     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1795 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1796 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1797     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1798 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1799 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1800 nigel 87
1801 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1802    
1803     /* Compilation failed; go back for another re, skipping to blank line
1804     if non-interactive. */
1805    
1806     if (rc != 0)
1807     {
1808 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1809 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1810     goto SKIP_DATA;
1811     }
1812     }
1813    
1814     /* Handle compiling via the native interface */
1815    
1816     else
1817 nigel 37 #endif /* !defined NOPOSIX */
1818    
1819 nigel 3 {
1820 ph10 412 unsigned long int get_options;
1821 ph10 416
1822 nigel 93 if (timeit > 0)
1823 nigel 3 {
1824     register int i;
1825     clock_t time_taken;
1826     clock_t start_time = clock();
1827 nigel 93 for (i = 0; i < timeit; i++)
1828 nigel 3 {
1829 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1830 nigel 3 if (re != NULL) free(re);
1831     }
1832     time_taken = clock() - start_time;
1833 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1834     (((double)time_taken * 1000.0) / (double)timeit) /
1835 nigel 63 (double)CLOCKS_PER_SEC);
1836 nigel 3 }
1837    
1838 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1839 nigel 3
1840     /* Compilation failed; go back for another re, skipping to blank line
1841     if non-interactive. */
1842    
1843     if (re == NULL)
1844     {
1845     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1846     SKIP_DATA:
1847     if (infile != stdin)
1848     {
1849     for (;;)
1850     {
1851 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1852 nigel 11 {
1853     done = 1;
1854     goto CONTINUE;
1855     }
1856 nigel 3 len = (int)strlen((char *)buffer);
1857     while (len > 0 && isspace(buffer[len-1])) len--;
1858     if (len == 0) break;
1859     }
1860     fprintf(outfile, "\n");
1861     }
1862 nigel 25 goto CONTINUE;
1863 nigel 3 }
1864 ph10 416
1865     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1866     within the regex; check for this so that we know how to process the data
1867 ph10 412 lines. */
1868 ph10 416
1869 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1870     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1871 nigel 3
1872 ph10 412 /* Print information if required. There are now two info-returning
1873     functions. The old one has a limited interface and returns only limited
1874     data. Check that it agrees with the newer one. */
1875 nigel 3
1876 nigel 63 if (log_store)
1877     fprintf(outfile, "Memory allocation (code space): %d\n",
1878     (int)(gotten_store -
1879     sizeof(real_pcre) -
1880     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1881    
1882 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1883     and remember the store that was got. */
1884    
1885     true_size = ((real_pcre *)re)->size;
1886     regex_gotten_store = gotten_store;
1887    
1888 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1889 ph10 654 help with the matching, unless the pattern has the SS option, which
1890 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
1891     never sensible). */
1892 nigel 75
1893 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
1894 nigel 75 {
1895 nigel 93 if (timeit > 0)
1896 nigel 75 {
1897     register int i;
1898     clock_t time_taken;
1899     clock_t start_time = clock();
1900 nigel 93 for (i = 0; i < timeit; i++)
1901 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1902 nigel 75 time_taken = clock() - start_time;
1903 ph10 667 if (extra != NULL) pcre_free_study(extra);
1904 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1905     (((double)time_taken * 1000.0) / (double)timeit) /
1906 nigel 75 (double)CLOCKS_PER_SEC);
1907     }
1908 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1909 nigel 75 if (error != NULL)
1910     fprintf(outfile, "Failed to study: %s\n", error);
1911     else if (extra != NULL)
1912     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1913     }
1914 ph10 512
1915 ph10 510 /* If /K was present, we set up for handling MARK data. */
1916 ph10 512
1917 ph10 510 if (do_mark)
1918     {
1919     if (extra == NULL)
1920     {
1921     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1922     extra->flags = 0;
1923     }
1924 ph10 512 extra->mark = &markptr;
1925 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1926 ph10 512 }
1927 nigel 75
1928     /* If the 'F' option was present, we flip the bytes of all the integer
1929     fields in the regex data block and the study block. This is to make it
1930     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1931     compiled on a different architecture. */
1932    
1933     if (do_flip)
1934     {
1935     real_pcre *rre = (real_pcre *)re;
1936 ph10 259 rre->magic_number =
1937 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1938 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1939     rre->options = byteflip(rre->options, sizeof(rre->options));
1940 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1941 ph10 259 rre->top_bracket =
1942 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1943 ph10 259 rre->top_backref =
1944 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1945 ph10 259 rre->first_byte =
1946 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1947 ph10 259 rre->req_byte =
1948 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1949     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1950 nigel 75 sizeof(rre->name_table_offset));
1951 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1952 nigel 75 sizeof(rre->name_entry_size));
1953 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1954 ph10 255 sizeof(rre->name_count));
1955 nigel 75
1956     if (extra != NULL)
1957     {
1958     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1959     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1960 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1961     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1962 nigel 75 }
1963     }
1964    
1965     /* Extract information from the compiled data if required */
1966    
1967     SHOW_INFO:
1968    
1969 nigel 93 if (do_debug)
1970     {
1971     fprintf(outfile, "------------------------------------------------------------------\n");
1972 ph10 116 pcre_printint(re, outfile, debug_lengths);
1973 nigel 93 }
1974 ph10 416
1975 ph10 412 /* We already have the options in get_options (see above) */
1976 nigel 93
1977 nigel 25 if (do_showinfo)
1978 nigel 3 {
1979 ph10 412 unsigned long int all_options;
1980 nigel 79 #if !defined NOINFOCHECK
1981 nigel 43 int old_first_char, old_options, old_count;
1982 nigel 79 #endif
1983 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1984 ph10 227 hascrorlf;
1985 nigel 63 int nameentrysize, namecount;
1986     const uschar *nametable;
1987 nigel 3
1988 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1989     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1990     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1991 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1992 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1993 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1994     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1995 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1996 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1997     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1998 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1999 nigel 43
2000 nigel 79 #if !defined NOINFOCHECK
2001 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
2002 nigel 3 if (count < 0) fprintf(outfile,
2003 nigel 43 "Error %d from pcre_info()\n", count);
2004 nigel 3 else
2005     {
2006 nigel 43 if (old_count != count) fprintf(outfile,
2007     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2008     old_count);
2009 nigel 37
2010 nigel 43 if (old_first_char != first_char) fprintf(outfile,
2011     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2012     first_char, old_first_char);
2013 nigel 37
2014 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
2015     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2016     get_options, old_options);
2017 nigel 43 }
2018 nigel 79 #endif
2019 nigel 43
2020 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
2021 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2022 nigel 77 (int)size, (int)regex_gotten_store);
2023 nigel 43
2024     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2025     if (backrefmax > 0)
2026     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2027 nigel 63
2028     if (namecount > 0)
2029     {
2030     fprintf(outfile, "Named capturing subpatterns:\n");
2031     while (namecount-- > 0)
2032     {
2033     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2034     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2035     GET2(nametable, 0));
2036     nametable += nameentrysize;
2037     }
2038     }
2039 ph10 172
2040 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2041 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2042 nigel 63
2043 nigel 75 all_options = ((real_pcre *)re)->options;
2044 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2045 nigel 75
2046 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2047 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2048 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2049     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2050     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2051     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2052 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2053 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2054 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2055     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2056 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2057     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2058     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2059 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2060 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2061 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2062 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2063 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2064 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2065 ph10 172
2066 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2067 nigel 43
2068 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2069 nigel 91 {
2070     case PCRE_NEWLINE_CR:
2071     fprintf(outfile, "Forced newline sequence: CR\n");
2072     break;
2073 nigel 43
2074 nigel 91 case PCRE_NEWLINE_LF:
2075     fprintf(outfile, "Forced newline sequence: LF\n");
2076     break;
2077    
2078     case PCRE_NEWLINE_CRLF:
2079     fprintf(outfile, "Forced newline sequence: CRLF\n");
2080     break;
2081    
2082 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2083     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2084     break;
2085    
2086 nigel 93 case PCRE_NEWLINE_ANY:
2087     fprintf(outfile, "Forced newline sequence: ANY\n");
2088     break;
2089    
2090 nigel 91 default:
2091     break;
2092     }
2093    
2094 nigel 43 if (first_char == -1)
2095     {
2096 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2097 nigel 43 }
2098     else if (first_char < 0)
2099     {
2100     fprintf(outfile, "No first char\n");
2101     }
2102     else
2103     {
2104 nigel 63 int ch = first_char & 255;
2105 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2106 nigel 63 "" : " (caseless)";
2107 nigel 93 if (PRINTHEX(ch))
2108 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2109 nigel 3 else
2110 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2111 nigel 43 }
2112 nigel 37
2113 nigel 43 if (need_char < 0)
2114     {
2115     fprintf(outfile, "No need char\n");
2116 nigel 3 }
2117 nigel 43 else
2118     {
2119 nigel 63 int ch = need_char & 255;
2120 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2121 nigel 63 "" : " (caseless)";
2122 nigel 93 if (PRINTHEX(ch))
2123 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2124 nigel 43 else
2125 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2126 nigel 43 }
2127 nigel 75
2128     /* Don't output study size; at present it is in any case a fixed
2129     value, but it varies, depending on the computer architecture, and
2130     so messes up the test suite. (And with the /F option, it might be
2131 ph10 654 flipped.) If study was forced by an external -s, don't show this
2132 ph10 612 information unless -i or -d was also present. This means that, except
2133     when auto-callouts are involved, the output from runs with and without
2134     -s should be identical. */
2135 nigel 75
2136 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2137 nigel 75 {
2138     if (extra == NULL)
2139     fprintf(outfile, "Study returned NULL\n");
2140     else
2141     {
2142     uschar *start_bits = NULL;
2143 ph10 455 int minlength;
2144 ph10 461
2145 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2146 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2147    
2148 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2149     if (start_bits == NULL)
2150 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2151 nigel 75 else
2152     {
2153     int i;
2154     int c = 24;
2155     fprintf(outfile, "Starting byte set: ");
2156     for (i = 0; i < 256; i++)
2157     {
2158     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2159     {
2160     if (c > 75)
2161     {
2162     fprintf(outfile, "\n ");
2163     c = 2;
2164     }
2165 nigel 93 if (PRINTHEX(i) && i != ' ')
2166 nigel 75 {
2167     fprintf(outfile, "%c ", i);
2168     c += 2;
2169     }
2170     else
2171     {
2172     fprintf(outfile, "\\x%02x ", i);
2173     c += 5;
2174     }
2175     }
2176     }
2177     fprintf(outfile, "\n");
2178     }
2179     }
2180 ph10 667
2181     /* Show this only if the JIT was set by /S, not by -s. */
2182    
2183     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2184     {
2185     int jit;
2186     new_info(re, extra, PCRE_INFO_JIT, &jit);
2187     if (jit)
2188     fprintf(outfile, "JIT study was successful\n");
2189     else
2190     #ifdef SUPPORT_JIT
2191     fprintf(outfile, "JIT study was not successful\n");
2192     #else
2193     fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2194     #endif
2195     }
2196 nigel 75 }
2197 nigel 3 }
2198    
2199 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2200     that is all. The first 8 bytes of the file are the regex length and then
2201     the study length, in big-endian order. */
2202 nigel 3
2203 nigel 75 if (to_file != NULL)
2204 nigel 3 {
2205 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2206     if (f == NULL)
2207 nigel 3 {
2208 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2209 nigel 3 }
2210 nigel 75 else
2211     {
2212     uschar sbuf[8];
2213 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2214     sbuf[1] = (uschar)((true_size >> 16) & 255);
2215     sbuf[2] = (uschar)((true_size >> 8) & 255);
2216     sbuf[3] = (uschar)((true_size) & 255);
2217 ph10 259
2218 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2219     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2220     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2221     sbuf[7] = (uschar)((true_study_size) & 255);
2222 nigel 3
2223 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2224     fwrite(re, 1, true_size, f) < true_size)
2225     {
2226     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2227     }
2228 nigel 3 else
2229     {
2230 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2231 ph10 654
2232 ph10 658 /* If there is study data, write it. */
2233 ph10 654
2234 nigel 75 if (extra != NULL)
2235 nigel 3 {
2236 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2237     true_study_size)
2238 nigel 3 {
2239 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2240     strerror(errno));
2241 nigel 3 }
2242 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2243 nigel 3 }
2244     }
2245 nigel 75 fclose(f);
2246 nigel 3 }
2247 nigel 77
2248     new_free(re);
2249 ph10 667 if (extra != NULL) pcre_free_study(extra);
2250 ph10 545 if (locale_set)
2251 ph10 541 {
2252     new_free((void *)tables);
2253     setlocale(LC_CTYPE, "C");
2254 ph10 545 locale_set = 0;
2255     }
2256 nigel 75 continue; /* With next regex */
2257 nigel 3 }
2258 nigel 75 } /* End of non-POSIX compile */
2259 nigel 3
2260     /* Read data lines and test them */
2261    
2262     for (;;)
2263     {
2264 nigel 87 uschar *q;
2265 ph10 147 uschar *bptr;
2266 nigel 57 int *use_offsets = offsets;
2267 nigel 53 int use_size_offsets = size_offsets;
2268 nigel 63 int callout_data = 0;
2269     int callout_data_set = 0;
2270 nigel 3 int count, c;
2271 nigel 29 int copystrings = 0;
2272 ph10 386 int find_match_limit = default_find_match_limit;
2273 nigel 29 int getstrings = 0;
2274     int getlist = 0;
2275 nigel 39 int gmatched = 0;
2276 nigel 35 int start_offset = 0;
2277 ph10 579 int start_offset_sign = 1;
2278 nigel 41 int g_notempty = 0;
2279 nigel 77 int use_dfa = 0;
2280 nigel 3
2281     options = 0;
2282    
2283 nigel 91 *copynames = 0;
2284     *getnames = 0;
2285    
2286     copynamesptr = copynames;
2287     getnamesptr = getnames;
2288    
2289 nigel 63 pcre_callout = callout;
2290     first_callout = 1;
2291 ph10 654 last_callout_mark = NULL;
2292 nigel 63 callout_extra = 0;
2293     callout_count = 0;
2294     callout_fail_count = 999999;
2295     callout_fail_id = -1;
2296 nigel 73 show_malloc = 0;
2297 nigel 63
2298 nigel 91 if (extra != NULL) extra->flags &=
2299     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2300    
2301     len = 0;
2302     for (;;)
2303 nigel 11 {
2304 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2305 nigel 91 {
2306 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2307     {
2308 ph10 545 fprintf(outfile, "\n");
2309 ph10 537 break;
2310 ph10 545 }
2311 nigel 91 done = 1;
2312     goto CONTINUE;
2313     }
2314     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2315     len = (int)strlen((char *)buffer);
2316     if (buffer[len-1] == '\n') break;
2317 nigel 11 }
2318 nigel 3
2319     while (len > 0 && isspace(buffer[len-1])) len--;
2320     buffer[len] = 0;
2321     if (len == 0) break;
2322    
2323     p = buffer;
2324     while (isspace(*p)) p++;
2325    
2326 ph10 147 bptr = q = dbuffer;
2327 nigel 3 while ((c = *p++) != 0)
2328     {
2329     int i = 0;
2330     int n = 0;
2331 nigel 63
2332 nigel 3 if (c == '\\') switch ((c = *p++))
2333     {
2334     case 'a': c = 7; break;
2335     case 'b': c = '\b'; break;
2336     case 'e': c = 27; break;
2337     case 'f': c = '\f'; break;
2338     case 'n': c = '\n'; break;
2339     case 'r': c = '\r'; break;
2340     case 't': c = '\t'; break;
2341     case 'v': c = '\v'; break;
2342    
2343     case '0': case '1': case '2': case '3':
2344     case '4': case '5': case '6': case '7':
2345     c -= '0';
2346     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2347     c = c * 8 + *p++ - '0';
2348 nigel 91
2349     #if !defined NOUTF8
2350     if (use_utf8 && c > 255)
2351     {
2352     unsigned char buff8[8];
2353     int ii, utn;
2354     utn = ord2utf8(c, buff8);
2355     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2356     c = buff8[ii]; /* Last byte */
2357     }
2358     #endif
2359 nigel 3 break;
2360    
2361     case 'x':
2362 nigel 49
2363     /* Handle \x{..} specially - new Perl thing for utf8 */
2364    
2365 nigel 79 #if !defined NOUTF8
2366 nigel 49 if (*p == '{')
2367     {
2368     unsigned char *pt = p;
2369     c = 0;
2370     while (isxdigit(*(++pt)))
2371     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2372     if (*pt == '}')
2373     {
2374 nigel 67 unsigned char buff8[8];
2375 nigel 49 int ii, utn;
2376 ph10 355 if (use_utf8)
2377 ph10 358 {
2378 ph10 355 utn = ord2utf8(c, buff8);
2379     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2380     c = buff8[ii]; /* Last byte */
2381     }
2382     else
2383     {
2384 ph10 358 if (c > 255)
2385 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2386     "UTF-8 mode is not enabled.\n"
2387     "** Truncation will probably give the wrong result.\n", c);
2388 ph10 358 }
2389 nigel 49 p = pt + 1;
2390     break;
2391     }
2392     /* Not correct form; fall through */
2393     }
2394 nigel 79 #endif
2395 nigel 49
2396     /* Ordinary \x */
2397    
2398 nigel 3 c = 0;
2399     while (i++ < 2 && isxdigit(*p))
2400     {
2401     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2402     p++;
2403     }
2404     break;
2405    
2406 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2407 nigel 3 p--;
2408     continue;
2409    
2410 nigel 75 case '>':
2411 ph10 579 if (*p == '-')
2412 ph10 567 {
2413     start_offset_sign = -1;
2414     p++;
2415 ph10 579 }
2416 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2417 ph10 579 start_offset *= start_offset_sign;
2418 nigel 75 continue;
2419    
2420 nigel 3 case 'A': /* Option setting */
2421     options |= PCRE_ANCHORED;
2422     continue;
2423    
2424     case 'B':
2425     options |= PCRE_NOTBOL;
2426     continue;
2427    
2428 nigel 29 case 'C':
2429 nigel 63 if (isdigit(*p)) /* Set copy string */
2430     {
2431     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2432     copystrings |= 1 << n;
2433     }
2434     else if (isalnum(*p))
2435     {
2436 nigel 91 uschar *npp = copynamesptr;
2437 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2438 nigel 91 *npp++ = 0;
2439 nigel 67 *npp = 0;
2440 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2441 nigel 63 if (n < 0)
2442 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2443     copynamesptr = npp;
2444 nigel 63 }
2445     else if (*p == '+')
2446     {
2447     callout_extra = 1;
2448     p++;
2449     }
2450     else if (*p == '-')
2451     {
2452     pcre_callout = NULL;
2453     p++;
2454     }
2455     else if (*p == '!')
2456     {
2457     callout_fail_id = 0;
2458     p++;
2459     while(isdigit(*p))
2460     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2461     callout_fail_count = 0;
2462     if (*p == '!')
2463     {
2464     p++;
2465     while(isdigit(*p))
2466     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2467     }
2468     }
2469     else if (*p == '*')
2470     {
2471     int sign = 1;
2472     callout_data = 0;
2473     if (*(++p) == '-') { sign = -1; p++; }
2474     while(isdigit(*p))
2475     callout_data = callout_data * 10 + *p++ - '0';
2476     callout_data *= sign;
2477     callout_data_set = 1;
2478     }
2479 nigel 29 continue;
2480    
2481 nigel 79 #if !defined NODFA
2482 nigel 77 case 'D':
2483 nigel 79 #if !defined NOPOSIX
2484 nigel 77 if (posix || do_posix)
2485     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2486     else
2487 nigel 79 #endif
2488 nigel 77 use_dfa = 1;
2489     continue;
2490 ph10 553 #endif
2491 nigel 77
2492 ph10 553 #if !defined NODFA
2493 nigel 77 case 'F':
2494     options |= PCRE_DFA_SHORTEST;
2495     continue;
2496 nigel 79 #endif
2497 nigel 77
2498 nigel 29 case 'G':
2499 nigel 63 if (isdigit(*p))
2500     {
2501     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2502     getstrings |= 1 << n;
2503     }
2504     else if (isalnum(*p))
2505     {
2506 nigel 91 uschar *npp = getnamesptr;
2507 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2508 nigel 91 *npp++ = 0;
2509 nigel 67 *npp = 0;
2510 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2511 nigel 63 if (n < 0)
2512 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2513     getnamesptr = npp;
2514 nigel 63 }
2515 nigel 29 continue;
2516 ph10 667
2517     case 'J':
2518     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2519     if (extra != NULL
2520     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2521     && extra->executable_jit != NULL)
2522     {
2523     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2524     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2525 ph10 675 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2526 ph10 667 }
2527     continue;
2528 nigel 29
2529     case 'L':
2530     getlist = 1;
2531     continue;
2532    
2533 nigel 63 case 'M':
2534     find_match_limit = 1;
2535     continue;
2536    
2537 nigel 37 case 'N':
2538 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2539     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2540 ph10 461 else
2541 ph10 442 options |= PCRE_NOTEMPTY;
2542 nigel 37 continue;
2543    
2544 nigel 3 case 'O':
2545     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2546 nigel 53 if (n > size_offsets_max)
2547     {
2548     size_offsets_max = n;
2549 nigel 57 free(offsets);
2550 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2551 nigel 53 if (offsets == NULL)
2552     {
2553     printf("** Failed to get %d bytes of memory for offsets vector\n",
2554 ph10 151 (int)(size_offsets_max * sizeof(int)));
2555 nigel 77 yield = 1;
2556     goto EXIT;
2557 nigel 53 }
2558     }
2559     use_size_offsets = n;
2560 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2561 nigel 3 continue;
2562    
2563 nigel 75 case 'P':
2564 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2565 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2566 nigel 75 continue;
2567    
2568 nigel 91 case 'Q':
2569     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2570     if (extra == NULL)
2571     {
2572     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2573     extra->flags = 0;
2574     }
2575     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2576     extra->match_limit_recursion = n;
2577     continue;
2578    
2579     case 'q':
2580     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2581     if (extra == NULL)
2582     {
2583     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2584     extra->flags = 0;
2585     }
2586     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2587     extra->match_limit = n;
2588     continue;
2589    
2590 nigel 79 #if !defined NODFA
2591 nigel 77 case 'R':
2592     options |= PCRE_DFA_RESTART;
2593     continue;
2594 nigel 79 #endif
2595 nigel 77
2596 nigel 73 case 'S':
2597     show_malloc = 1;
2598     continue;
2599 ph10 392
2600 ph10 389 case 'Y':
2601     options |= PCRE_NO_START_OPTIMIZE;
2602 ph10 392 continue;
2603 nigel 73
2604 nigel 3 case 'Z':
2605     options |= PCRE_NOTEOL;
2606     continue;
2607 nigel 71
2608     case '?':
2609     options |= PCRE_NO_UTF8_CHECK;
2610     continue;
2611 nigel 91
2612     case '<':
2613     {
2614     int x = check_newline(p, outfile);
2615     if (x == 0) goto NEXT_DATA;
2616     options |= x;
2617     while (*p++ != '>');
2618     }
2619     continue;
2620 nigel 3 }
2621 nigel 9 *q++ = c;
2622 nigel 3 }
2623 nigel 9 *q = 0;
2624 ph10 530 len = (int)(q - dbuffer);
2625 ph10 545
2626 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2627 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2628 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2629 ph10 371
2630 ph10 363 #if !defined NOPOSIX
2631     if (posix || do_posix)
2632     {
2633     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2634 ph10 371 bptr += buffer_size - len - 1;
2635 ph10 363 }
2636 ph10 371 else
2637     #endif
2638 ph10 363 {
2639     memmove(bptr + buffer_size - len, bptr, len);
2640 ph10 371 bptr += buffer_size - len;
2641     }
2642 nigel 3
2643 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2644     {
2645     printf("**Match limit not relevant for DFA matching: ignored\n");
2646     find_match_limit = 0;
2647     }
2648    
2649 nigel 3 /* Handle matching via the POSIX interface, which does not
2650 nigel 63 support timing or playing with the match limit or callout data. */
2651 nigel 3
2652 nigel 37 #if !defined NOPOSIX
2653 nigel 3 if (posix || do_posix)
2654     {
2655     int rc;
2656     int eflags = 0;
2657 nigel 63 regmatch_t *pmatch = NULL;
2658     if (use_size_offsets > 0)
2659 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2660 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2661     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2662 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2663 nigel 3
2664 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2665 nigel 3
2666     if (rc != 0)
2667     {
2668 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2669 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2670     }
2671 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2672     != 0)
2673     {
2674     fprintf(outfile, "Matched with REG_NOSUB\n");
2675     }
2676 nigel 3 else
2677     {
2678 nigel 7 size_t i;
2679 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2680 nigel 3 {
2681     if (pmatch[i].rm_so >= 0)
2682     {
2683 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2684 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2685     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2686 nigel 3 fprintf(outfile, "\n");
2687 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2688 nigel 35 {
2689 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2690 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2691     outfile);
2692 nigel 35 fprintf(outfile, "\n");
2693     }
2694 nigel 3 }
2695     }
2696     }
2697 nigel 53 free(pmatch);
2698 nigel 3 }
2699    
2700 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2701 nigel 3
2702 nigel 37 else
2703     #endif /* !defined NOPOSIX */
2704    
2705 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2706 nigel 3 {
2707 ph10 512 markptr = NULL;
2708    
2709 nigel 93 if (timeitm > 0)
2710 nigel 3 {
2711     register int i;
2712     clock_t time_taken;
2713     clock_t start_time = clock();
2714 nigel 77
2715 nigel 79 #if !defined NODFA
2716 nigel 77 if (all_use_dfa || use_dfa)
2717     {
2718     int workspace[1000];
2719 nigel 93 for (i = 0; i < timeitm; i++)
2720 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2721 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2722     sizeof(workspace)/sizeof(int));
2723     }
2724     else
2725 nigel 79 #endif
2726 nigel 77
2727 nigel 93 for (i = 0; i < timeitm; i++)
2728 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2729 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2730 nigel 77
2731 nigel 3 time_taken = clock() - start_time;
2732 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2733     (((double)time_taken * 1000.0) / (double)timeitm) /
2734 nigel 63 (double)CLOCKS_PER_SEC);
2735 nigel 3 }
2736    
2737 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2738 nigel 87 varying limits in order to find the minimum value for the match limit and
2739 ph10 667 for the recursion limit. The match limits are relevant only to the normal
2740     running of pcre_exec(), so disable the JIT optimization. This makes it
2741     possible to run the same set of tests with and without JIT externally
2742     requested. */
2743 nigel 63
2744     if (find_match_limit)
2745     {
2746     if (extra == NULL)
2747     {
2748 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2749 nigel 63 extra->flags = 0;
2750     }
2751 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2752    
2753 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2754 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2755     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2756     PCRE_ERROR_MATCHLIMIT, "match()");
2757 nigel 63
2758 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2759     options|g_notempty, use_offsets, use_size_offsets,
2760     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2761     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2762 nigel 63 }
2763    
2764     /* If callout_data is set, use the interface with additional data */
2765    
2766     else if (callout_data_set)
2767     {
2768     if (extra == NULL)
2769     {
2770 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2771 nigel 63 extra->flags = 0;
2772     }
2773     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2774 nigel 71 extra->callout_data = &callout_data;
2775 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2776     options | g_notempty, use_offsets, use_size_offsets);
2777     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2778     }
2779    
2780     /* The normal case is just to do the match once, with the default
2781     value of match_limit. */
2782    
2783 nigel 79 #if !defined NODFA
2784 nigel 77 else if (all_use_dfa || use_dfa)
2785     {
2786     int workspace[1000];
2787 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2788 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2789     sizeof(workspace)/sizeof(int));
2790     if (count == 0)
2791     {
2792     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2793     count = use_size_offsets/2;
2794     }
2795     }
2796 nigel 79 #endif
2797 nigel 77
2798 nigel 75 else
2799     {
2800     count = pcre_exec(re, extra, (char *)bptr, len,
2801     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2802 nigel 77 if (count == 0)
2803     {
2804     fprintf(outfile, "Matched, but too many substrings\n");
2805     count = use_size_offsets/3;
2806     }
2807 nigel 75 }
2808 nigel 3
2809 nigel 39 /* Matched */
2810    
2811 nigel 3 if (count >= 0)
2812     {
2813 nigel 93 int i, maxcount;
2814    
2815     #if !defined NODFA
2816     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2817     #endif
2818     maxcount = use_size_offsets/3;
2819    
2820     /* This is a check against a lunatic return value. */
2821    
2822     if (count > maxcount)
2823     {
2824     fprintf(outfile,
2825     "** PCRE error: returned count %d is too big for offset size %d\n",
2826     count, use_size_offsets);
2827     count = use_size_offsets/3;
2828     if (do_g || do_G)
2829     {
2830     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2831     do_g = do_G = FALSE; /* Break g/G loop */
2832     }
2833     }
2834 ph10 654
2835 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
2836     unset ones at the end. */
2837 ph10 654
2838 ph10 626 if (do_allcaps)
2839     {
2840     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2841 ph10 654 count++; /* Allow for full match */
2842     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2843     }
2844 nigel 93
2845 ph10 626 /* Output the captured substrings */
2846 ph10 654
2847 nigel 29 for (i = 0; i < count * 2; i += 2)
2848 nigel 3 {
2849 nigel 57 if (use_offsets[i] < 0)
2850 ph10 654 {
2851 ph10 626 if (use_offsets[i] != -1)
2852     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2853 ph10 654 use_offsets[i], i);
2854 ph10 626 if (use_offsets[i+1] != -1)
2855     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2856 ph10 654 use_offsets[i+1], i+1);
2857 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2858 ph10 654 }
2859 nigel 3 else
2860     {
2861     fprintf(outfile, "%2d: ", i/2);
2862 nigel 63 (void)pchars(bptr + use_offsets[i],
2863     use_offsets[i+1] - use_offsets[i], outfile);
2864 nigel 3 fprintf(outfile, "\n");
2865 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2866 nigel 35 {
2867 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2868     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2869     outfile);
2870     fprintf(outfile, "\n");
2871 nigel 35 }
2872 nigel 3 }
2873     }
2874 ph10 512
2875 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2876 nigel 29
2877     for (i = 0; i < 32; i++)
2878     {
2879     if ((copystrings & (1 << i)) != 0)
2880     {
2881 nigel 91 char copybuffer[256];
2882 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2883 nigel 37 i, copybuffer, sizeof(copybuffer));
2884 nigel 29 if (rc < 0)
2885     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2886     else
2887 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2888 nigel 29 }
2889     }
2890    
2891 nigel 91 for (copynamesptr = copynames;
2892     *copynamesptr != 0;
2893     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2894     {
2895     char copybuffer[256];
2896     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2897     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2898     if (rc < 0)
2899     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2900     else
2901     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2902     }
2903    
2904 nigel 29 for (i = 0; i < 32; i++)
2905     {
2906     if ((getstrings & (1 << i)) != 0)
2907     {
2908     const char *substring;
2909 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2910 nigel 29 i, &substring);
2911     if (rc < 0)
2912     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2913     else
2914     {
2915     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2916 nigel 49 pcre_free_substring(substring);
2917 nigel 29 }
2918     }
2919     }
2920    
2921 nigel 91 for (getnamesptr = getnames;
2922     *getnamesptr != 0;
2923     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2924     {
2925     const char *substring;
2926     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2927     count, (char *)getnamesptr, &substring);
2928     if (rc < 0)
2929     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2930     else
2931     {
2932     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2933     pcre_free_substring(substring);
2934     }
2935     }
2936    
2937 nigel 29 if (getlist)
2938     {
2939     const char **stringlist;
2940 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2941 nigel 29 &stringlist);
2942     if (rc < 0)
2943     fprintf(outfile, "get substring list failed %d\n", rc);
2944     else
2945     {
2946     for (i = 0; i < count; i++)
2947     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2948     if (stringlist[i] != NULL)
2949     fprintf(outfile, "string list not terminated by NULL\n");
2950 nigel 49 pcre_free_substring_list(stringlist);
2951 nigel 29 }
2952     }
2953 nigel 39 }
2954 nigel 29
2955 nigel 75 /* There was a partial match */
2956    
2957     else if (count == PCRE_ERROR_PARTIAL)
2958     {
2959 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2960     else fprintf(outfile, "Partial match, mark=%s", markptr);
2961 ph10 426 if (use_size_offsets > 1)
2962     {
2963     fprintf(outfile, ": ");
2964     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2965 ph10 461 outfile);
2966     }
2967 nigel 77 fprintf(outfile, "\n");
2968 nigel 75 break; /* Out of the /g loop */
2969     }
2970    
2971 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2972 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2973     to advance the start offset, and continue. We won't be at the end of the
2974     string - that was checked before setting g_notempty.
2975 nigel 39
2976 ph10 566 Complication arises in the case when the newline convention is "any",
2977 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2978     terminated by CRLF, an advance of one character just passes the \r,
2979 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2980 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2981     newline setting in the pattern; if none was set, use pcre_config() to
2982 ph10 566 find the default.
2983 ph10 144
2984 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2985     character, not one byte. */
2986    
2987 nigel 3 else
2988     {
2989 nigel 41 if (g_notempty != 0)
2990 nigel 35 {
2991 nigel 73 int onechar = 1;
2992 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2993 nigel 57 use_offsets[0] = start_offset;
2994 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2995     {
2996     int d;
2997     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2998 ph10 391 /* Note that these values are always the ASCII ones, even in
2999     EBCDIC environments. CR = 13, NL = 10. */
3000     obits = (d == 13)? PCRE_NEWLINE_CR :
3001     (d == 10)? PCRE_NEWLINE_LF :
3002     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3003 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3004 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
3005     }
3006 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3007 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3008 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3009 ph10 149 &&
3010 ph10 143 start_offset < len - 1 &&
3011     bptr[start_offset] == '\r' &&
3012     bptr[start_offset+1] == '\n')
3013 ph10 144 onechar++;
3014 ph10 143 else if (use_utf8)
3015 nigel 73 {
3016     while (start_offset + onechar < len)
3017     {
3018 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3019 ph10 579 onechar++;
3020 nigel 73 }
3021     }
3022     use_offsets[1] = start_offset + onechar;
3023 nigel 35 }
3024 nigel 41 else
3025     {
3026 ph10 598 switch(count)
3027 ph10 654 {
3028 ph10 598 case PCRE_ERROR_NOMATCH:
3029 ph10 512 if (gmatched == 0)
3030 ph10 510 {
3031     if (markptr == NULL) fprintf(outfile, "No match\n");
3032     else fprintf(outfile, "No match, mark = %s\n", markptr);
3033 ph10 512 }
3034 ph10 598 break;
3035 ph10 654
3036 ph10 598 case PCRE_ERROR_BADUTF8:
3037     case PCRE_ERROR_SHORTUTF8:
3038     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3039     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3040     if (use_size_offsets >= 2)
3041 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3042 ph10 598 use_offsets[1]);
3043 ph10 654 fprintf(outfile, "\n");
3044     break;
3045    
3046 ph10 598 default:
3047 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3048 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3049 ph10 654 else
3050     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3051 ph10 598 break;
3052 nigel 41 }
3053 ph10 654
3054 nigel 41 break; /* Out of the /g loop */
3055     }
3056 nigel 3 }
3057 nigel 35
3058 nigel 39 /* If not /g or /G we are done */
3059    
3060     if (!do_g && !do_G) break;
3061    
3062 nigel 41 /* If we have matched an empty string, first check to see if we are at
3063 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3064     Perl's /g options does. This turns out to be rather cunning. First we set
3065     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3066 nigel 47 same point. If this fails (picked up above) we advance to the next
3067 ph10 143 character. */
3068 ph10 142
3069 nigel 41 g_notempty = 0;
3070 ph10 142
3071 nigel 57 if (use_offsets[0] == use_offsets[1])
3072 nigel 41 {
3073 nigel 57 if (use_offsets[0] == len) break;
3074 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3075 nigel 41 }
3076 nigel 39
3077     /* For /g, update the start offset, leaving the rest alone */
3078    
3079 ph10 143 if (do_g) start_offset = use_offsets[1];
3080 nigel 39
3081     /* For /G, update the pointer and length */
3082    
3083     else
3084 nigel 35 {
3085 ph10 143 bptr += use_offsets[1];
3086     len -= use_offsets[1];
3087 nigel 35 }
3088 nigel 39 } /* End of loop for /g and /G */
3089 nigel 91
3090     NEXT_DATA: continue;
3091 nigel 39 } /* End of loop for data lines */
3092 nigel 3
3093 nigel 11 CONTINUE:
3094 nigel 37
3095     #if !defined NOPOSIX
3096 nigel 3 if (posix || do_posix) regfree(&preg);
3097 nigel 37 #endif
3098    
3099 nigel 77 if (re != NULL) new_free(re);
3100 ph10 667 if (extra != NULL) pcre_free_study(extra);
3101 ph10 541 if (locale_set)
3102 nigel 25 {
3103 nigel 77 new_free((void *)tables);
3104 nigel 25 setlocale(LC_CTYPE, "C");
3105 nigel 93 locale_set = 0;
3106 nigel 25 }
3107 ph10 667 if (jit_stack != NULL)
3108     {
3109     pcre_jit_stack_free(jit_stack);
3110     jit_stack = NULL;
3111     }
3112 nigel 3 }
3113    
3114 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3115 nigel 77
3116     EXIT:
3117    
3118     if (infile != NULL && infile != stdin) fclose(infile);
3119     if (outfile != NULL && outfile != stdout) fclose(outfile);
3120    
3121     free(buffer);
3122     free(dbuffer);
3123     free(pbuffer);
3124     free(offsets);
3125    
3126     return yield;
3127 nigel 3 }
3128    
3129 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12