/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 675 - (hide annotations) (download)
Sat Aug 27 10:18:46 2011 UTC (3 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 94257 byte(s)
Change pcre_assign_jit_callback to pcre_assign_jit_stack.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 ph10 667 #define _pcre_ucp_typerange ucp_typerange
116 nigel 85 #define _pcre_utf8_table1 utf8_table1
117     #define _pcre_utf8_table1_size utf8_table1_size
118     #define _pcre_utf8_table2 utf8_table2
119     #define _pcre_utf8_table3 utf8_table3
120     #define _pcre_utf8_table4 utf8_table4
121 ph10 667 #define _pcre_utf8_char_sizes utf8_char_sizes
122 nigel 85 #define _pcre_utt utt
123     #define _pcre_utt_size utt_size
124 ph10 240 #define _pcre_utt_names utt_names
125 nigel 85 #define _pcre_OP_lengths OP_lengths
126    
127     #include "pcre_tables.c"
128    
129     /* We also need the pcre_printint() function for printing out compiled
130     patterns. This function is in a separate file so that it can be included in
131 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 ph10 498 know which case is being compiled. */
133 nigel 85
134 ph10 498 #define COMPILING_PCRETEST
135     #include "pcre_printint.src"
136    
137     /* The definition of the macro PRINTABLE, which determines whether to print an
138 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
139 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
140     locale has not been explicitly changed, so as to get consistent output from
141     systems that differ in their output from isprint() even in the "C" locale. */
142 nigel 93
143     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144 nigel 85
145 nigel 37 /* It is possible to compile this test program without including support for
146     testing the POSIX interface, though this is not available via the standard
147     Makefile. */
148    
149     #if !defined NOPOSIX
150 nigel 3 #include "pcreposix.h"
151 nigel 37 #endif
152 nigel 3
153 ph10 107 /* It is also possible, for the benefit of the version currently imported into
154     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155     interface to the DFA matcher (NODFA), and without the doublecheck of the old
156     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157     UTF8 support if PCRE is built without it. */
158 nigel 79
159 ph10 107 #ifndef SUPPORT_UTF8
160     #ifndef NOUTF8
161     #define NOUTF8
162     #endif
163     #endif
164 nigel 79
165 ph10 107
166 nigel 85 /* Other parameters */
167    
168 nigel 3 #ifndef CLOCKS_PER_SEC
169     #ifdef CLK_TCK
170     #define CLOCKS_PER_SEC CLK_TCK
171     #else
172     #define CLOCKS_PER_SEC 100
173     #endif
174     #endif
175    
176 nigel 93 /* This is the default loop count for timing. */
177    
178 nigel 75 #define LOOPREPEAT 500000
179 nigel 3
180 nigel 85 /* Static variables */
181    
182 nigel 3 static FILE *outfile;
183     static int log_store = 0;
184 nigel 63 static int callout_count;
185     static int callout_extra;
186     static int callout_fail_count;
187     static int callout_fail_id;
188 ph10 210 static int debug_lengths;
189 nigel 63 static int first_callout;
190 nigel 93 static int locale_set = 0;
191 nigel 73 static int show_malloc;
192 nigel 67 static int use_utf8;
193 nigel 43 static size_t gotten_store;
194 ph10 645 static const unsigned char *last_callout_mark = NULL;
195 nigel 3
196 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
197    
198     static int buffer_size = 50000;
199     static uschar *buffer = NULL;
200     static uschar *dbuffer = NULL;
201 nigel 75 static uschar *pbuffer = NULL;
202 nigel 3
203 ph10 598 /* Textual explanations for runtime error codes */
204 nigel 75
205 ph10 598 static const char *errtexts[] = {
206     NULL, /* 0 is no error */
207     NULL, /* NOMATCH is handled specially */
208     "NULL argument passed",
209     "bad option value",
210     "magic number missing",
211     "unknown opcode - pattern overwritten?",
212     "no more memory",
213 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 ph10 598 "match limit exceeded",
215     "callout error code",
216     NULL, /* BADUTF8 is handled specially */
217     "bad UTF-8 offset",
218     NULL, /* PARTIAL is handled specially */
219     "not used - internal error",
220     "internal error - pattern overwritten?",
221     "bad count value",
222     "item unsupported for DFA matching",
223     "backreference condition or recursion test not supported for DFA matching",
224     "match limit not supported for DFA matching",
225     "workspace size exceeded in DFA matching",
226 ph10 654 "too much recursion for DFA matching",
227 ph10 598 "recursion limit exceeded",
228     "not used - internal error",
229     "invalid combination of newline options",
230     "bad offset value",
231 ph10 642 NULL, /* SHORTUTF8 is handled specially */
232     "nested recursion at the same subject position"
233 ph10 598 };
234    
235 ph10 654
236 ph10 541 /*************************************************
237     * Alternate character tables *
238     *************************************************/
239 nigel 49
240 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
241     using the default tables of the library. However, the T option can be used to
242     select alternate sets of tables, for different kinds of testing. Note also that
243 ph10 541 the L (locale) option also adjusts the tables. */
244    
245 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
246 ph10 541 only ASCII characters. */
247    
248     static const unsigned char tables0[] = {
249    
250     /* This table is a lower casing table. */
251    
252     0, 1, 2, 3, 4, 5, 6, 7,
253     8, 9, 10, 11, 12, 13, 14, 15,
254     16, 17, 18, 19, 20, 21, 22, 23,
255     24, 25, 26, 27, 28, 29, 30, 31,
256     32, 33, 34, 35, 36, 37, 38, 39,
257     40, 41, 42, 43, 44, 45, 46, 47,
258     48, 49, 50, 51, 52, 53, 54, 55,
259     56, 57, 58, 59, 60, 61, 62, 63,
260     64, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122, 91, 92, 93, 94, 95,
264     96, 97, 98, 99,100,101,102,103,
265     104,105,106,107,108,109,110,111,
266     112,113,114,115,116,117,118,119,
267     120,121,122,123,124,125,126,127,
268     128,129,130,131,132,133,134,135,
269     136,137,138,139,140,141,142,143,
270     144,145,146,147,148,149,150,151,
271     152,153,154,155,156,157,158,159,
272     160,161,162,163,164,165,166,167,
273     168,169,170,171,172,173,174,175,
274     176,177,178,179,180,181,182,183,
275     184,185,186,187,188,189,190,191,
276     192,193,194,195,196,197,198,199,
277     200,201,202,203,204,205,206,207,
278     208,209,210,211,212,213,214,215,
279     216,217,218,219,220,221,222,223,
280     224,225,226,227,228,229,230,231,
281     232,233,234,235,236,237,238,239,
282     240,241,242,243,244,245,246,247,
283     248,249,250,251,252,253,254,255,
284    
285     /* This table is a case flipping table. */
286    
287     0, 1, 2, 3, 4, 5, 6, 7,
288     8, 9, 10, 11, 12, 13, 14, 15,
289     16, 17, 18, 19, 20, 21, 22, 23,
290     24, 25, 26, 27, 28, 29, 30, 31,
291     32, 33, 34, 35, 36, 37, 38, 39,
292     40, 41, 42, 43, 44, 45, 46, 47,
293     48, 49, 50, 51, 52, 53, 54, 55,
294     56, 57, 58, 59, 60, 61, 62, 63,
295     64, 97, 98, 99,100,101,102,103,
296     104,105,106,107,108,109,110,111,
297     112,113,114,115,116,117,118,119,
298     120,121,122, 91, 92, 93, 94, 95,
299     96, 65, 66, 67, 68, 69, 70, 71,
300     72, 73, 74, 75, 76, 77, 78, 79,
301     80, 81, 82, 83, 84, 85, 86, 87,
302     88, 89, 90,123,124,125,126,127,
303     128,129,130,131,132,133,134,135,
304     136,137,138,139,140,141,142,143,
305     144,145,146,147,148,149,150,151,
306     152,153,154,155,156,157,158,159,
307     160,161,162,163,164,165,166,167,
308     168,169,170,171,172,173,174,175,
309     176,177,178,179,180,181,182,183,
310     184,185,186,187,188,189,190,191,
311     192,193,194,195,196,197,198,199,
312     200,201,202,203,204,205,206,207,
313     208,209,210,211,212,213,214,215,
314     216,217,218,219,220,221,222,223,
315     224,225,226,227,228,229,230,231,
316     232,233,234,235,236,237,238,239,
317     240,241,242,243,244,245,246,247,
318     248,249,250,251,252,253,254,255,
319    
320     /* This table contains bit maps for various character classes. Each map is 32
321     bytes long and the bits run from the least significant end of each byte. The
322     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
323     graph, print, punct, and cntrl. Other classes are built from combinations. */
324    
325     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
326     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
331     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
342     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344    
345     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
347     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349    
350     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
351     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
352     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354    
355     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
356     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
357     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359    
360     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
361     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
362     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364    
365     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
366     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369    
370     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
371     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
372     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374    
375     /* This table identifies various classes of character by individual bits:
376     0x01 white space character
377     0x02 letter
378     0x04 decimal digit
379     0x08 hexadecimal digit
380     0x10 alphanumeric or '_'
381     0x80 regular expression metacharacter or binary zero
382     */
383    
384     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
385     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
386     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
387     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
388     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
389     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
390     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
391     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
392     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
393     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
395     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
396     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
397     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
398     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
399     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
413     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
414     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
415     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
416    
417 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
418     be at least an approximation of ISO 8859. In particular, there are characters
419 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
420    
421     static const unsigned char tables1[] = {
422     0,1,2,3,4,5,6,7,
423     8,9,10,11,12,13,14,15,
424     16,17,18,19,20,21,22,23,
425     24,25,26,27,28,29,30,31,
426     32,33,34,35,36,37,38,39,
427     40,41,42,43,44,45,46,47,
428     48,49,50,51,52,53,54,55,
429     56,57,58,59,60,61,62,63,
430     64,97,98,99,100,101,102,103,
431     104,105,106,107,108,109,110,111,
432     112,113,114,115,116,117,118,119,
433     120,121,122,91,92,93,94,95,
434     96,97,98,99,100,101,102,103,
435     104,105,106,107,108,109,110,111,
436     112,113,114,115,116,117,118,119,
437     120,121,122,123,124,125,126,127,
438     128,129,130,131,132,133,134,135,
439     136,137,138,139,140,141,142,143,
440     144,145,146,147,148,149,150,151,
441     152,153,154,155,156,157,158,159,
442     160,161,162,163,164,165,166,167,
443     168,169,170,171,172,173,174,175,
444     176,177,178,179,180,181,182,183,
445     184,185,186,187,188,189,190,191,
446     224,225,226,227,228,229,230,231,
447     232,233,234,235,236,237,238,239,
448     240,241,242,243,244,245,246,215,
449     248,249,250,251,252,253,254,223,
450     224,225,226,227,228,229,230,231,
451     232,233,234,235,236,237,238,239,
452     240,241,242,243,244,245,246,247,
453     248,249,250,251,252,253,254,255,
454     0,1,2,3,4,5,6,7,
455     8,9,10,11,12,13,14,15,
456     16,17,18,19,20,21,22,23,
457     24,25,26,27,28,29,30,31,
458     32,33,34,35,36,37,38,39,
459     40,41,42,43,44,45,46,47,
460     48,49,50,51,52,53,54,55,
461     56,57,58,59,60,61,62,63,
462     64,97,98,99,100,101,102,103,
463     104,105,106,107,108,109,110,111,
464     112,113,114,115,116,117,118,119,
465     120,121,122,91,92,93,94,95,
466     96,65,66,67,68,69,70,71,
467     72,73,74,75,76,77,78,79,
468     80,81,82,83,84,85,86,87,
469     88,89,90,123,124,125,126,127,
470     128,129,130,131,132,133,134,135,
471     136,137,138,139,140,141,142,143,
472     144,145,146,147,148,149,150,151,
473     152,153,154,155,156,157,158,159,
474     160,161,162,163,164,165,166,167,
475     168,169,170,171,172,173,174,175,
476     176,177,178,179,180,181,182,183,
477     184,185,186,187,188,189,190,191,
478     224,225,226,227,228,229,230,231,
479     232,233,234,235,236,237,238,239,
480     240,241,242,243,244,245,246,215,
481     248,249,250,251,252,253,254,223,
482     192,193,194,195,196,197,198,199,
483     200,201,202,203,204,205,206,207,
484     208,209,210,211,212,213,214,247,
485     216,217,218,219,220,221,222,255,
486     0,62,0,0,1,0,0,0,
487     0,0,0,0,0,0,0,0,
488     32,0,0,0,1,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,255,3,
491     126,0,0,0,126,0,0,0,
492     0,0,0,0,0,0,0,0,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,255,3,
495     0,0,0,0,0,0,0,0,
496     0,0,0,0,0,0,12,2,
497     0,0,0,0,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     254,255,255,7,0,0,0,0,
500     0,0,0,0,0,0,0,0,
501     255,255,127,127,0,0,0,0,
502     0,0,0,0,0,0,0,0,
503     0,0,0,0,254,255,255,7,
504     0,0,0,0,0,4,32,4,
505     0,0,0,128,255,255,127,255,
506     0,0,0,0,0,0,255,3,
507     254,255,255,135,254,255,255,7,
508     0,0,0,0,0,4,44,6,
509     255,255,127,255,255,255,127,255,
510     0,0,0,0,254,255,255,255,
511     255,255,255,255,255,255,255,127,
512     0,0,0,0,254,255,255,255,
513     255,255,255,255,255,255,255,255,
514     0,2,0,0,255,255,255,255,
515     255,255,255,255,255,255,255,127,
516     0,0,0,0,255,255,255,255,
517     255,255,255,255,255,255,255,255,
518     0,0,0,0,254,255,0,252,
519     1,0,0,248,1,0,0,120,
520     0,0,0,0,254,255,255,255,
521     0,0,128,0,0,0,128,0,
522     255,255,255,255,0,0,0,0,
523     0,0,0,0,0,0,0,128,
524     255,255,255,255,0,0,0,0,
525     0,0,0,0,0,0,0,0,
526     128,0,0,0,0,0,0,0,
527     0,1,1,0,1,1,0,0,
528     0,0,0,0,0,0,0,0,
529     0,0,0,0,0,0,0,0,
530     1,0,0,0,128,0,0,0,
531     128,128,128,128,0,0,128,0,
532     28,28,28,28,28,28,28,28,
533     28,28,0,0,0,0,0,128,
534     0,26,26,26,26,26,26,18,
535     18,18,18,18,18,18,18,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,128,128,0,128,16,
538     0,26,26,26,26,26,26,18,
539     18,18,18,18,18,18,18,18,
540     18,18,18,18,18,18,18,18,
541     18,18,18,128,128,0,0,0,
542     0,0,0,0,0,1,0,0,
543     0,0,0,0,0,0,0,0,
544     0,0,0,0,0,0,0,0,
545     0,0,0,0,0,0,0,0,
546     1,0,0,0,0,0,0,0,
547     0,0,18,0,0,0,0,0,
548     0,0,20,20,0,18,0,0,
549     0,20,18,0,0,0,0,0,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,0,
553     18,18,18,18,18,18,18,18,
554     18,18,18,18,18,18,18,18,
555     18,18,18,18,18,18,18,18,
556     18,18,18,18,18,18,18,0,
557     18,18,18,18,18,18,18,18
558     };
559    
560    
561    
562 ph10 558
563     #ifndef HAVE_STRERROR
564 nigel 49 /*************************************************
565 ph10 558 * Provide strerror() for non-ANSI libraries *
566     *************************************************/
567    
568     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
569     in their libraries, but can provide the same facility by this simple
570     alternative function. */
571    
572     extern int sys_nerr;
573     extern char *sys_errlist[];
574    
575     char *
576     strerror(int n)
577     {
578     if (n < 0 || n >= sys_nerr) return "unknown error number";
579     return sys_errlist[n];
580     }
581     #endif /* HAVE_STRERROR */
582    
583    
584 ph10 667 /*************************************************
585     * JIT memory callback *
586     *************************************************/
587 ph10 558
588 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
589     {
590     return (pcre_jit_stack *)arg;
591     }
592 ph10 558
593 ph10 667
594 ph10 558 /*************************************************
595 nigel 91 * Read or extend an input line *
596     *************************************************/
597    
598     /* Input lines are read into buffer, but both patterns and data lines can be
599     continued over multiple input lines. In addition, if the buffer fills up, we
600     want to automatically expand it so as to be able to handle extremely large
601     lines that are needed for certain stress tests. When the input buffer is
602     expanded, the other two buffers must also be expanded likewise, and the
603     contents of pbuffer, which are a copy of the input for callouts, must be
604     preserved (for when expansion happens for a data line). This is not the most
605     optimal way of handling this, but hey, this is just a test program!
606    
607     Arguments:
608     f the file to read
609     start where in buffer to start (this *must* be within buffer)
610 ph10 287 prompt for stdin or readline()
611 nigel 91
612     Returns: pointer to the start of new data
613     could be a copy of start, or could be moved
614     NULL if no data read and EOF reached
615     */
616    
617     static uschar *
618 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
619 nigel 91 {
620     uschar *here = start;
621    
622     for (;;)
623     {
624 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
625 nigel 93
626 nigel 91 if (rlen > 1000)
627     {
628     int dlen;
629 ph10 289
630 ph10 287 /* If libreadline support is required, use readline() to read a line if the
631     input is a terminal. Note that readline() removes the trailing newline, so
632     we must put it back again, to be compatible with fgets(). */
633 ph10 289
634 ph10 287 #ifdef SUPPORT_LIBREADLINE
635     if (isatty(fileno(f)))
636     {
637 ph10 289 size_t len;
638 ph10 287 char *s = readline(prompt);
639     if (s == NULL) return (here == start)? NULL : start;
640     len = strlen(s);
641 ph10 289 if (len > 0) add_history(s);
642 ph10 287 if (len > rlen - 1) len = rlen - 1;
643     memcpy(here, s, len);
644     here[len] = '\n';
645 ph10 289 here[len+1] = 0;
646     free(s);
647 ph10 287 }
648 ph10 289 else
649     #endif
650    
651 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
652 ph10 289
653 ph10 287 {
654 ph10 516 if (f == stdin) printf("%s", prompt);
655 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
656     return (here == start)? NULL : start;
657 ph10 289 }
658    
659 nigel 91 dlen = (int)strlen((char *)here);
660     if (dlen > 0 && here[dlen - 1] == '\n') return start;
661     here += dlen;
662     }
663    
664     else
665     {
666     int new_buffer_size = 2*buffer_size;
667     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
668     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
669     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
670    
671     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
672     {
673     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
674     exit(1);
675     }
676    
677     memcpy(new_buffer, buffer, buffer_size);
678     memcpy(new_pbuffer, pbuffer, buffer_size);
679    
680     buffer_size = new_buffer_size;
681    
682     start = new_buffer + (start - buffer);
683     here = new_buffer + (here - buffer);
684    
685     free(buffer);
686     free(dbuffer);
687     free(pbuffer);
688    
689     buffer = new_buffer;
690     dbuffer = new_dbuffer;
691     pbuffer = new_pbuffer;
692     }
693     }
694    
695     return NULL; /* Control never gets here */
696     }
697    
698    
699    
700    
701    
702    
703    
704     /*************************************************
705 nigel 63 * Read number from string *
706     *************************************************/
707    
708     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
709     around with conditional compilation, just do the job by hand. It is only used
710 nigel 93 for unpicking arguments, so just keep it simple.
711 nigel 63
712     Arguments:
713     str string to be converted
714     endptr where to put the end pointer
715    
716     Returns: the unsigned long
717     */
718    
719     static int
720     get_value(unsigned char *str, unsigned char **endptr)
721     {
722     int result = 0;
723     while(*str != 0 && isspace(*str)) str++;
724     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
725     *endptr = str;
726     return(result);
727     }
728    
729    
730    
731 nigel 49
732     /*************************************************
733     * Convert UTF-8 string to value *
734     *************************************************/
735    
736     /* This function takes one or more bytes that represents a UTF-8 character,
737     and returns the value of the character.
738    
739     Argument:
740 nigel 91 utf8bytes a pointer to the byte vector
741     vptr a pointer to an int to receive the value
742 nigel 49
743 nigel 91 Returns: > 0 => the number of bytes consumed
744     -6 to 0 => malformed UTF-8 character at offset = (-return)
745 nigel 49 */
746    
747 nigel 79 #if !defined NOUTF8
748    
749 nigel 67 static int
750 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
751 nigel 49 {
752 nigel 91 int c = *utf8bytes++;
753 nigel 49 int d = c;
754     int i, j, s;
755    
756     for (i = -1; i < 6; i++) /* i is number of additional bytes */
757     {
758     if ((d & 0x80) == 0) break;
759     d <<= 1;
760     }
761    
762     if (i == -1) { *vptr = c; return 1; } /* ascii character */
763     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
764    
765     /* i now has a value in the range 1-5 */
766    
767 nigel 59 s = 6*i;
768 nigel 85 d = (c & utf8_table3[i]) << s;
769 nigel 49
770     for (j = 0; j < i; j++)
771     {
772 nigel 91 c = *utf8bytes++;
773 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
774 nigel 59 s -= 6;
775 nigel 49 d |= (c & 0x3f) << s;
776     }
777    
778     /* Check that encoding was the correct unique one */
779    
780 nigel 85 for (j = 0; j < utf8_table1_size; j++)
781     if (d <= utf8_table1[j]) break;
782 nigel 49 if (j != i) return -(i+1);
783    
784     /* Valid value */
785    
786     *vptr = d;
787     return i+1;
788     }
789    
790 nigel 79 #endif
791 nigel 49
792    
793 nigel 79
794 nigel 63 /*************************************************
795 nigel 85 * Convert character value to UTF-8 *
796     *************************************************/
797    
798     /* This function takes an integer value in the range 0 - 0x7fffffff
799     and encodes it as a UTF-8 character in 0 to 6 bytes.
800    
801     Arguments:
802     cvalue the character value
803 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
804 nigel 85
805     Returns: number of characters placed in the buffer
806     */
807    
808 nigel 93 #if !defined NOUTF8
809    
810 nigel 85 static int
811 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
812 nigel 85 {
813     register int i, j;
814     for (i = 0; i < utf8_table1_size; i++)
815     if (cvalue <= utf8_table1[i]) break;
816 nigel 91 utf8bytes += i;
817 nigel 85 for (j = i; j > 0; j--)
818     {
819 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
820 nigel 85 cvalue >>= 6;
821     }
822 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
823 nigel 85 return i + 1;
824     }
825    
826 nigel 93 #endif
827 nigel 85
828    
829 nigel 93
830 nigel 85 /*************************************************
831 nigel 63 * Print character string *
832     *************************************************/
833 nigel 49
834 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
835     mode. Yields number of characters printed. If handed a NULL file, just counts
836     chars without printing. */
837 nigel 49
838 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
839 nigel 3 {
840 nigel 85 int c = 0;
841 nigel 63 int yield = 0;
842 nigel 3
843 nigel 63 while (length-- > 0)
844 nigel 3 {
845 nigel 79 #if !defined NOUTF8
846 nigel 67 if (use_utf8)
847 nigel 63 {
848     int rc = utf82ord(p, &c);
849 nigel 3
850 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
851     {
852     length -= rc - 1;
853     p += rc;
854 nigel 93 if (PRINTHEX(c))
855 nigel 63 {
856     if (f != NULL) fprintf(f, "%c", c);
857     yield++;
858     }
859     else
860     {
861 nigel 93 int n = 4;
862     if (f != NULL) fprintf(f, "\\x{%02x}", c);
863     yield += (n <= 0x000000ff)? 2 :
864     (n <= 0x00000fff)? 3 :
865     (n <= 0x0000ffff)? 4 :
866     (n <= 0x000fffff)? 5 : 6;
867 nigel 63 }
868     continue;
869     }
870     }
871 nigel 79 #endif
872 nigel 3
873 nigel 63 /* Not UTF-8, or malformed UTF-8 */
874    
875 nigel 93 c = *p++;
876     if (PRINTHEX(c))
877 nigel 3 {
878 nigel 63 if (f != NULL) fprintf(f, "%c", c);
879     yield++;
880 nigel 3 }
881 nigel 63 else
882 nigel 3 {
883 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
884     yield += 4;
885     }
886     }
887 nigel 3
888 nigel 63 return yield;
889     }
890 nigel 23
891 nigel 3
892 nigel 23
893 nigel 63 /*************************************************
894     * Callout function *
895     *************************************************/
896 nigel 3
897 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
898     the match. Yield zero unless more callouts than the fail count, or the callout
899     data is not zero. */
900 nigel 3
901 nigel 63 static int callout(pcre_callout_block *cb)
902     {
903     FILE *f = (first_callout | callout_extra)? outfile : NULL;
904 nigel 75 int i, pre_start, post_start, subject_length;
905 nigel 3
906 nigel 63 if (callout_extra)
907     {
908     fprintf(f, "Callout %d: last capture = %d\n",
909     cb->callout_number, cb->capture_last);
910 nigel 3
911 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
912     {
913     if (cb->offset_vector[i] < 0)
914     fprintf(f, "%2d: <unset>\n", i/2);
915     else
916     {
917     fprintf(f, "%2d: ", i/2);
918     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
919     cb->offset_vector[i+1] - cb->offset_vector[i], f);
920     fprintf(f, "\n");
921     }
922     }
923     }
924 nigel 3
925 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
926     datails. On subsequent calls in the same match, we use pchars just to find the
927     printed lengths of the substrings. */
928 nigel 3
929 nigel 63 if (f != NULL) fprintf(f, "--->");
930 nigel 3
931 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
932     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
933     cb->current_position - cb->start_match, f);
934 nigel 3
935 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
936    
937 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
938     cb->subject_length - cb->current_position, f);
939 nigel 3
940 nigel 63 if (f != NULL) fprintf(f, "\n");
941 nigel 9
942 nigel 63 /* Always print appropriate indicators, with callout number if not already
943 nigel 75 shown. For automatic callouts, show the pattern offset. */
944 nigel 3
945 nigel 75 if (cb->callout_number == 255)
946     {
947     fprintf(outfile, "%+3d ", cb->pattern_position);
948     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
949     }
950     else
951     {
952     if (callout_extra) fprintf(outfile, " ");
953     else fprintf(outfile, "%3d ", cb->callout_number);
954     }
955 nigel 3
956 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
957     fprintf(outfile, "^");
958 nigel 3
959 nigel 63 if (post_start > 0)
960     {
961     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
962     fprintf(outfile, "^");
963 nigel 3 }
964    
965 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
966     fprintf(outfile, " ");
967    
968     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
969     pbuffer + cb->pattern_position);
970    
971 nigel 63 fprintf(outfile, "\n");
972     first_callout = 0;
973 nigel 3
974 ph10 654 if (cb->mark != last_callout_mark)
975 ph10 645 {
976 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
977 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
978 ph10 654 last_callout_mark = cb->mark;
979     }
980 ph10 645
981 nigel 71 if (cb->callout_data != NULL)
982 nigel 49 {
983 nigel 71 int callout_data = *((int *)(cb->callout_data));
984     if (callout_data != 0)
985     {
986     fprintf(outfile, "Callout data = %d\n", callout_data);
987     return callout_data;
988     }
989 nigel 63 }
990 nigel 49
991 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
992     (++callout_count >= callout_fail_count)? 1 : 0;
993 nigel 3 }
994    
995    
996 nigel 63 /*************************************************
997 nigel 73 * Local malloc functions *
998 nigel 63 *************************************************/
999 nigel 3
1000 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1001     compiled re. The show_malloc variable is set only during matching. */
1002 nigel 3
1003     static void *new_malloc(size_t size)
1004     {
1005 nigel 73 void *block = malloc(size);
1006 nigel 43 gotten_store = size;
1007 nigel 73 if (show_malloc)
1008 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1009 nigel 73 return block;
1010 nigel 3 }
1011    
1012 nigel 73 static void new_free(void *block)
1013     {
1014     if (show_malloc)
1015     fprintf(outfile, "free %p\n", block);
1016     free(block);
1017     }
1018 nigel 3
1019 nigel 73 /* For recursion malloc/free, to test stacking calls */
1020    
1021     static void *stack_malloc(size_t size)
1022     {
1023     void *block = malloc(size);
1024     if (show_malloc)
1025 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1026 nigel 73 return block;
1027     }
1028    
1029     static void stack_free(void *block)
1030     {
1031     if (show_malloc)
1032     fprintf(outfile, "stack_free %p\n", block);
1033     free(block);
1034     }
1035    
1036    
1037 nigel 63 /*************************************************
1038     * Call pcre_fullinfo() *
1039     *************************************************/
1040 nigel 43
1041     /* Get one piece of information from the pcre_fullinfo() function */
1042    
1043     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1044     {
1045     int rc;
1046     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1047     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1048     }
1049    
1050    
1051    
1052 nigel 63 /*************************************************
1053 ph10 674 * Check for supported JIT architecture *
1054     *************************************************/
1055    
1056     /* If it won't JIT-compile a very simple regex, return FALSE. */
1057    
1058     static int check_jit_arch(void)
1059     {
1060     const char *error;
1061     int erroffset, rc;
1062     pcre *re = pcre_compile("abc", 0, &error, &erroffset, NULL);
1063     pcre_extra *extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
1064     rc = extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1065     extra->executable_jit != NULL;
1066     pcre_free_study(extra);
1067     free(re);
1068     return rc;
1069     }
1070    
1071    
1072     /*************************************************
1073 nigel 75 * Byte flipping function *
1074     *************************************************/
1075    
1076 nigel 91 static unsigned long int
1077     byteflip(unsigned long int value, int n)
1078 nigel 75 {
1079     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1080     return ((value & 0x000000ff) << 24) |
1081     ((value & 0x0000ff00) << 8) |
1082     ((value & 0x00ff0000) >> 8) |
1083     ((value & 0xff000000) >> 24);
1084     }
1085    
1086    
1087    
1088    
1089     /*************************************************
1090 nigel 87 * Check match or recursion limit *
1091     *************************************************/
1092    
1093     static int
1094     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1095     int start_offset, int options, int *use_offsets, int use_size_offsets,
1096     int flag, unsigned long int *limit, int errnumber, const char *msg)
1097     {
1098     int count;
1099     int min = 0;
1100     int mid = 64;
1101     int max = -1;
1102    
1103     extra->flags |= flag;
1104    
1105     for (;;)
1106     {
1107     *limit = mid;
1108    
1109     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1110     use_offsets, use_size_offsets);
1111    
1112     if (count == errnumber)
1113     {
1114     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1115     min = mid;
1116     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1117     }
1118    
1119     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1120     count == PCRE_ERROR_PARTIAL)
1121     {
1122     if (mid == min + 1)
1123     {
1124     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1125     break;
1126     }
1127     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1128     max = mid;
1129     mid = (min + mid)/2;
1130     }
1131     else break; /* Some other error */
1132     }
1133    
1134     extra->flags &= ~flag;
1135     return count;
1136     }
1137    
1138    
1139    
1140     /*************************************************
1141 ph10 227 * Case-independent strncmp() function *
1142     *************************************************/
1143    
1144     /*
1145     Arguments:
1146     s first string
1147     t second string
1148     n number of characters to compare
1149    
1150     Returns: < 0, = 0, or > 0, according to the comparison
1151     */
1152    
1153     static int
1154     strncmpic(uschar *s, uschar *t, int n)
1155     {
1156     while (n--)
1157     {
1158     int c = tolower(*s++) - tolower(*t++);
1159     if (c) return c;
1160     }
1161     return 0;
1162     }
1163    
1164    
1165    
1166     /*************************************************
1167 nigel 91 * Check newline indicator *
1168     *************************************************/
1169    
1170 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1171     a message and return 0 if there is no match.
1172 nigel 91
1173     Arguments:
1174     p points after the leading '<'
1175     f file for error message
1176    
1177     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1178     */
1179    
1180     static int
1181     check_newline(uschar *p, FILE *f)
1182     {
1183 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1184     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1185     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1186     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1187     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1188 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1189     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1190 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1191     return 0;
1192     }
1193    
1194    
1195    
1196     /*************************************************
1197 nigel 93 * Usage function *
1198     *************************************************/
1199    
1200     static void
1201     usage(void)
1202     {
1203 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1204     printf("Input and output default to stdin and stdout.\n");
1205     #ifdef SUPPORT_LIBREADLINE
1206     printf("If input is a terminal, readline() is used to read from it.\n");
1207     #else
1208     printf("This version of pcretest is not linked with readline().\n");
1209     #endif
1210     printf("\nOptions:\n");
1211 nigel 93 printf(" -b show compiled code (bytecode)\n");
1212     printf(" -C show PCRE compile-time options and exit\n");
1213     printf(" -d debug: show compiled code and information (-b and -i)\n");
1214     #if !defined NODFA
1215     printf(" -dfa force DFA matching for all subjects\n");
1216     #endif
1217     printf(" -help show usage information\n");
1218     printf(" -i show information about compiled patterns\n"
1219 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1220 nigel 93 " -m output memory used information\n"
1221     " -o <n> set size of offsets vector to <n>\n");
1222     #if !defined NOPOSIX
1223     printf(" -p use POSIX interface\n");
1224     #endif
1225     printf(" -q quiet: do not output PCRE version number at start\n");
1226     printf(" -S <n> set stack size to <n> megabytes\n");
1227 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1228     " -s+ force each pattern to be studied, using JIT if available\n"
1229 nigel 93 " -t time compilation and execution\n");
1230     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1231     printf(" -tm time execution (matching) only\n");
1232     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1233     }
1234    
1235    
1236    
1237     /*************************************************
1238 nigel 63 * Main Program *
1239     *************************************************/
1240 nigel 43
1241 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1242     consist of a regular expression, in delimiters and optionally followed by
1243     options, followed by a set of test data, terminated by an empty line. */
1244    
1245     int main(int argc, char **argv)
1246     {
1247     FILE *infile = stdin;
1248     int options = 0;
1249     int study_options = 0;
1250 ph10 386 int default_find_match_limit = FALSE;
1251 nigel 3 int op = 1;
1252     int timeit = 0;
1253 nigel 93 int timeitm = 0;
1254 nigel 3 int showinfo = 0;
1255 nigel 31 int showstore = 0;
1256 ph10 667 int force_study = -1;
1257     int force_study_options = 0;
1258 nigel 87 int quiet = 0;
1259 nigel 53 int size_offsets = 45;
1260     int size_offsets_max;
1261 nigel 77 int *offsets = NULL;
1262 nigel 53 #if !defined NOPOSIX
1263 nigel 3 int posix = 0;
1264 nigel 53 #endif
1265 nigel 3 int debug = 0;
1266 nigel 11 int done = 0;
1267 nigel 77 int all_use_dfa = 0;
1268     int yield = 0;
1269 nigel 91 int stack_size;
1270 nigel 3
1271 ph10 667 pcre_jit_stack *jit_stack = NULL;
1272    
1273    
1274 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1275     that 1024 is plenty long enough for the few names we'll be testing. */
1276 nigel 69
1277 nigel 91 uschar copynames[1024];
1278     uschar getnames[1024];
1279    
1280     uschar *copynamesptr;
1281     uschar *getnamesptr;
1282    
1283 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1284 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1285 nigel 69
1286 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1287     dbuffer = (unsigned char *)malloc(buffer_size);
1288     pbuffer = (unsigned char *)malloc(buffer_size);
1289 nigel 69
1290 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1291 nigel 3
1292 nigel 93 outfile = stdout;
1293    
1294     /* The following _setmode() stuff is some Windows magic that tells its runtime
1295     library to translate CRLF into a single LF character. At least, that's what
1296     I've been told: never having used Windows I take this all on trust. Originally
1297     it set 0x8000, but then I was advised that _O_BINARY was better. */
1298    
1299 nigel 75 #if defined(_WIN32) || defined(WIN32)
1300 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1301     #endif
1302 nigel 75
1303 nigel 3 /* Scan options */
1304    
1305     while (argc > 1 && argv[op][0] == '-')
1306     {
1307 nigel 63 unsigned char *endptr;
1308 nigel 53
1309 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1310 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1311     else if (strcmp(argv[op], "-s+") == 0)
1312     {
1313     force_study = 1;
1314     force_study_options = PCRE_STUDY_JIT_COMPILE;
1315     }
1316 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1317 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1318 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1319     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1320 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1321 nigel 79 #if !defined NODFA
1322 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1323 nigel 79 #endif
1324 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1325 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1326     *endptr == 0))
1327 nigel 53 {
1328     op++;
1329     argc--;
1330     }
1331 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1332     {
1333     int both = argv[op][2] == 0;
1334     int temp;
1335     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1336     *endptr == 0))
1337     {
1338     timeitm = temp;
1339     op++;
1340     argc--;
1341     }
1342     else timeitm = LOOPREPEAT;
1343     if (both) timeit = timeitm;
1344     }
1345 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1346     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1347     *endptr == 0))
1348     {
1349 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1350 nigel 91 printf("PCRE: -S not supported on this OS\n");
1351     exit(1);
1352     #else
1353     int rc;
1354     struct rlimit rlim;
1355     getrlimit(RLIMIT_STACK, &rlim);
1356     rlim.rlim_cur = stack_size * 1024 * 1024;
1357     rc = setrlimit(RLIMIT_STACK, &rlim);
1358     if (rc != 0)
1359     {
1360     printf("PCRE: setrlimit() failed with error %d\n", rc);
1361     exit(1);
1362     }
1363     op++;
1364     argc--;
1365     #endif
1366     }
1367 nigel 53 #if !defined NOPOSIX
1368 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1369 nigel 53 #endif
1370 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1371     {
1372     int rc;
1373 ph10 392 unsigned long int lrc;
1374 nigel 63 printf("PCRE version %s\n", pcre_version());
1375     printf("Compiled with\n");
1376     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1377     printf(" %sUTF-8 support\n", rc? "" : "No ");
1378 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1379     printf(" %sUnicode properties support\n", rc? "" : "No ");
1380 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1381 ph10 674 if (rc)
1382     printf(" Just-in-time compiler support%s\n", check_jit_arch()?
1383     "" : " (but this architecture is unsupported)");
1384     else
1385     printf(" No just-in-time compiler support\n");
1386 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1387 ph10 391 /* Note that these values are always the ASCII values, even
1388 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1389 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1390     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1391 ph10 150 (rc == -2)? "ANYCRLF" :
1392 nigel 93 (rc == -1)? "ANY" : "???");
1393 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1394     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1395     "all Unicode newlines");
1396 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1397     printf(" Internal link size = %d\n", rc);
1398     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1399     printf(" POSIX malloc threshold = %d\n", rc);
1400 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1401     printf(" Default match limit = %ld\n", lrc);
1402     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1403     printf(" Default recursion depth limit = %ld\n", lrc);
1404 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1405     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1406 ph10 121 goto EXIT;
1407 nigel 63 }
1408 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1409     strcmp(argv[op], "--help") == 0)
1410     {
1411     usage();
1412     goto EXIT;
1413     }
1414 nigel 3 else
1415     {
1416 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1417 nigel 93 usage();
1418 nigel 77 yield = 1;
1419     goto EXIT;
1420 nigel 3 }
1421     op++;
1422     argc--;
1423     }
1424    
1425 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1426    
1427     size_offsets_max = size_offsets;
1428 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1429 nigel 53 if (offsets == NULL)
1430     {
1431     printf("** Failed to get %d bytes of memory for offsets vector\n",
1432 ph10 151 (int)(size_offsets_max * sizeof(int)));
1433 nigel 77 yield = 1;
1434     goto EXIT;
1435 nigel 53 }
1436    
1437 nigel 3 /* Sort out the input and output files */
1438    
1439     if (argc > 1)
1440     {
1441 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1442 nigel 3 if (infile == NULL)
1443     {
1444     printf("** Failed to open %s\n", argv[op]);
1445 nigel 77 yield = 1;
1446     goto EXIT;
1447 nigel 3 }
1448     }
1449    
1450     if (argc > 2)
1451     {
1452 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1453 nigel 3 if (outfile == NULL)
1454     {
1455     printf("** Failed to open %s\n", argv[op+1]);
1456 nigel 77 yield = 1;
1457     goto EXIT;
1458 nigel 3 }
1459     }
1460    
1461     /* Set alternative malloc function */
1462    
1463     pcre_malloc = new_malloc;
1464 nigel 73 pcre_free = new_free;
1465     pcre_stack_malloc = stack_malloc;
1466     pcre_stack_free = stack_free;
1467 nigel 3
1468 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1469 nigel 3
1470 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1471 nigel 3
1472     /* Main loop */
1473    
1474 nigel 11 while (!done)
1475 nigel 3 {
1476     pcre *re = NULL;
1477     pcre_extra *extra = NULL;
1478 nigel 37
1479     #if !defined NOPOSIX /* There are still compilers that require no indent */
1480 nigel 3 regex_t preg;
1481 nigel 45 int do_posix = 0;
1482 nigel 37 #endif
1483    
1484 nigel 7 const char *error;
1485 ph10 512 unsigned char *markptr;
1486 nigel 25 unsigned char *p, *pp, *ppp;
1487 nigel 75 unsigned char *to_file = NULL;
1488 nigel 53 const unsigned char *tables = NULL;
1489 nigel 75 unsigned long int true_size, true_study_size = 0;
1490     size_t size, regex_gotten_store;
1491 ph10 654 int do_allcaps = 0;
1492 ph10 512 int do_mark = 0;
1493 nigel 3 int do_study = 0;
1494 ph10 654 int no_force_study = 0;
1495 nigel 25 int do_debug = debug;
1496 nigel 35 int do_G = 0;
1497     int do_g = 0;
1498 nigel 25 int do_showinfo = showinfo;
1499 nigel 35 int do_showrest = 0;
1500 ph10 616 int do_showcaprest = 0;
1501 nigel 75 int do_flip = 0;
1502 nigel 93 int erroroffset, len, delimiter, poffset;
1503 nigel 3
1504 nigel 67 use_utf8 = 0;
1505 ph10 211 debug_lengths = 1;
1506 nigel 63
1507 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1508 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1509 nigel 63 fflush(outfile);
1510 nigel 3
1511     p = buffer;
1512     while (isspace(*p)) p++;
1513     if (*p == 0) continue;
1514    
1515 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1516 nigel 3
1517 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1518     {
1519 nigel 91 unsigned long int magic, get_options;
1520 nigel 75 uschar sbuf[8];
1521     FILE *f;
1522    
1523     p++;
1524     pp = p + (int)strlen((char *)p);
1525     while (isspace(pp[-1])) pp--;
1526     *pp = 0;
1527    
1528     f = fopen((char *)p, "rb");
1529     if (f == NULL)
1530     {
1531     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1532     continue;
1533     }
1534    
1535     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1536    
1537     true_size =
1538     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1539     true_study_size =
1540     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1541    
1542     re = (real_pcre *)new_malloc(true_size);
1543     regex_gotten_store = gotten_store;
1544    
1545     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1546    
1547     magic = ((real_pcre *)re)->magic_number;
1548     if (magic != MAGIC_NUMBER)
1549     {
1550     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1551     {
1552     do_flip = 1;
1553     }
1554     else
1555     {
1556     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1557     fclose(f);
1558     continue;
1559     }
1560     }
1561    
1562 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1563 nigel 75 do_flip? " (byte-inverted)" : "", p);
1564    
1565     /* Need to know if UTF-8 for printing data strings */
1566    
1567 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1568     use_utf8 = (get_options & PCRE_UTF8) != 0;
1569 nigel 75
1570 ph10 612 /* Now see if there is any following study data. */
1571 nigel 75
1572     if (true_study_size != 0)
1573     {
1574     pcre_study_data *psd;
1575    
1576     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1577     extra->flags = PCRE_EXTRA_STUDY_DATA;
1578    
1579     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1580     extra->study_data = psd;
1581    
1582     if (fread(psd, 1, true_study_size, f) != true_study_size)
1583     {
1584     FAIL_READ:
1585     fprintf(outfile, "Failed to read data from %s\n", p);
1586 ph10 667 if (extra != NULL) pcre_free_study(extra);
1587 nigel 75 if (re != NULL) new_free(re);
1588     fclose(f);
1589     continue;
1590     }
1591     fprintf(outfile, "Study data loaded from %s\n", p);
1592     do_study = 1; /* To get the data output if requested */
1593     }
1594     else fprintf(outfile, "No study data\n");
1595    
1596     fclose(f);
1597     goto SHOW_INFO;
1598     }
1599    
1600     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1601     the pattern; if is isn't complete, read more. */
1602    
1603 nigel 3 delimiter = *p++;
1604    
1605 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1606 nigel 3 {
1607 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1608 nigel 3 goto SKIP_DATA;
1609     }
1610    
1611     pp = p;
1612 ph10 530 poffset = (int)(p - buffer);
1613 nigel 3
1614     for(;;)
1615     {
1616 nigel 29 while (*pp != 0)
1617     {
1618     if (*pp == '\\' && pp[1] != 0) pp++;
1619     else if (*pp == delimiter) break;
1620     pp++;
1621     }
1622 nigel 3 if (*pp != 0) break;
1623 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1624 nigel 3 {
1625     fprintf(outfile, "** Unexpected EOF\n");
1626 nigel 11 done = 1;
1627     goto CONTINUE;
1628 nigel 3 }
1629 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1630 nigel 3 }
1631    
1632 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1633     pointer to the correct relative point in the buffer. */
1634    
1635     p = buffer + poffset;
1636    
1637 nigel 29 /* If the first character after the delimiter is backslash, make
1638     the pattern end with backslash. This is purely to provide a way
1639     of testing for the error message when a pattern ends with backslash. */
1640    
1641     if (pp[1] == '\\') *pp++ = '\\';
1642    
1643 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1644     for callouts. */
1645 nigel 3
1646     *pp++ = 0;
1647 nigel 75 strcpy((char *)pbuffer, (char *)p);
1648 nigel 3
1649     /* Look for options after final delimiter */
1650    
1651     options = 0;
1652 nigel 31 log_store = showstore; /* default from command line */
1653    
1654 nigel 3 while (*pp != 0)
1655     {
1656     switch (*pp++)
1657     {
1658 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1659 nigel 35 case 'g': do_g = 1; break;
1660 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1661     case 'm': options |= PCRE_MULTILINE; break;
1662     case 's': options |= PCRE_DOTALL; break;
1663     case 'x': options |= PCRE_EXTENDED; break;
1664 nigel 25
1665 ph10 616 case '+':
1666 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1667 ph10 616 break;
1668 ph10 654
1669     case '=': do_allcaps = 1; break;
1670 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1671 nigel 93 case 'B': do_debug = 1; break;
1672 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1673 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1674 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1675 nigel 75 case 'F': do_flip = 1; break;
1676 nigel 35 case 'G': do_G = 1; break;
1677 nigel 25 case 'I': do_showinfo = 1; break;
1678 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1679 ph10 512 case 'K': do_mark = 1; break;
1680 nigel 31 case 'M': log_store = 1; break;
1681 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1682 nigel 37
1683     #if !defined NOPOSIX
1684 nigel 3 case 'P': do_posix = 1; break;
1685 nigel 37 #endif
1686    
1687 ph10 654 case 'S':
1688 ph10 667 if (do_study == 0)
1689 ph10 612 {
1690 ph10 667 do_study = 1;
1691     if (*pp == '+')
1692     {
1693     study_options |= PCRE_STUDY_JIT_COMPILE;
1694     pp++;
1695     }
1696     }
1697     else
1698     {
1699 ph10 612 do_study = 0;
1700     no_force_study = 1;
1701 ph10 654 }
1702 ph10 612 break;
1703    
1704 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1705 ph10 535 case 'W': options |= PCRE_UCP; break;
1706 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1707 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1708 ph10 126 case 'Z': debug_lengths = 0; break;
1709 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1710 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1711 ph10 545
1712 ph10 541 case 'T':
1713     switch (*pp++)
1714     {
1715     case '0': tables = tables0; break;
1716     case '1': tables = tables1; break;
1717 ph10 545
1718 ph10 541 case '\r':
1719     case '\n':
1720 ph10 545 case ' ':
1721     case 0:
1722 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1723 ph10 545 goto SKIP_DATA;
1724    
1725     default:
1726 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1727 ph10 545 goto SKIP_DATA;
1728 ph10 541 }
1729 ph10 545 break;
1730 nigel 25
1731     case 'L':
1732     ppp = pp;
1733 nigel 93 /* The '\r' test here is so that it works on Windows. */
1734     /* The '0' test is just in case this is an unterminated line. */
1735     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1736 nigel 25 *ppp = 0;
1737     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1738     {
1739     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1740     goto SKIP_DATA;
1741     }
1742 nigel 93 locale_set = 1;
1743 nigel 25 tables = pcre_maketables();
1744     pp = ppp;
1745     break;
1746    
1747 nigel 75 case '>':
1748     to_file = pp;
1749     while (*pp != 0) pp++;
1750     while (isspace(pp[-1])) pp--;
1751     *pp = 0;
1752     break;
1753    
1754 nigel 91 case '<':
1755     {
1756 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1757 ph10 336 {
1758     options |= PCRE_JAVASCRIPT_COMPAT;
1759 ph10 345 pp += 3;
1760 ph10 336 }
1761     else
1762 ph10 345 {
1763 ph10 336 int x = check_newline(pp, outfile);
1764     if (x == 0) goto SKIP_DATA;
1765     options |= x;
1766     while (*pp++ != '>');
1767 ph10 345 }
1768 nigel 91 }
1769     break;
1770    
1771 nigel 77 case '\r': /* So that it works in Windows */
1772     case '\n':
1773     case ' ':
1774     break;
1775 nigel 75
1776 nigel 3 default:
1777     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1778     goto SKIP_DATA;
1779     }
1780     }
1781    
1782 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1783 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1784     local character tables. */
1785 nigel 3
1786 nigel 37 #if !defined NOPOSIX
1787 nigel 3 if (posix || do_posix)
1788     {
1789     int rc;
1790     int cflags = 0;
1791 nigel 75
1792 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1793     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1794 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1795 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1796     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1797 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1798 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1799 nigel 87
1800 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1801    
1802     /* Compilation failed; go back for another re, skipping to blank line
1803     if non-interactive. */
1804    
1805     if (rc != 0)
1806     {
1807 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1808 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1809     goto SKIP_DATA;
1810     }
1811     }
1812    
1813     /* Handle compiling via the native interface */
1814    
1815     else
1816 nigel 37 #endif /* !defined NOPOSIX */
1817    
1818 nigel 3 {
1819 ph10 412 unsigned long int get_options;
1820 ph10 416
1821 nigel 93 if (timeit > 0)
1822 nigel 3 {
1823     register int i;
1824     clock_t time_taken;
1825     clock_t start_time = clock();
1826 nigel 93 for (i = 0; i < timeit; i++)
1827 nigel 3 {
1828 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1829 nigel 3 if (re != NULL) free(re);
1830     }
1831     time_taken = clock() - start_time;
1832 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1833     (((double)time_taken * 1000.0) / (double)timeit) /
1834 nigel 63 (double)CLOCKS_PER_SEC);
1835 nigel 3 }
1836    
1837 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1838 nigel 3
1839     /* Compilation failed; go back for another re, skipping to blank line
1840     if non-interactive. */
1841    
1842     if (re == NULL)
1843     {
1844     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1845     SKIP_DATA:
1846     if (infile != stdin)
1847     {
1848     for (;;)
1849     {
1850 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1851 nigel 11 {
1852     done = 1;
1853     goto CONTINUE;
1854     }
1855 nigel 3 len = (int)strlen((char *)buffer);
1856     while (len > 0 && isspace(buffer[len-1])) len--;
1857     if (len == 0) break;
1858     }
1859     fprintf(outfile, "\n");
1860     }
1861 nigel 25 goto CONTINUE;
1862 nigel 3 }
1863 ph10 416
1864     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1865     within the regex; check for this so that we know how to process the data
1866 ph10 412 lines. */
1867 ph10 416
1868 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1869     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1870 nigel 3
1871 ph10 412 /* Print information if required. There are now two info-returning
1872     functions. The old one has a limited interface and returns only limited
1873     data. Check that it agrees with the newer one. */
1874 nigel 3
1875 nigel 63 if (log_store)
1876     fprintf(outfile, "Memory allocation (code space): %d\n",
1877     (int)(gotten_store -
1878     sizeof(real_pcre) -
1879     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1880    
1881 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1882     and remember the store that was got. */
1883    
1884     true_size = ((real_pcre *)re)->size;
1885     regex_gotten_store = gotten_store;
1886    
1887 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1888 ph10 654 help with the matching, unless the pattern has the SS option, which
1889 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
1890     never sensible). */
1891 nigel 75
1892 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
1893 nigel 75 {
1894 nigel 93 if (timeit > 0)
1895 nigel 75 {
1896     register int i;
1897     clock_t time_taken;
1898     clock_t start_time = clock();
1899 nigel 93 for (i = 0; i < timeit; i++)
1900 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1901 nigel 75 time_taken = clock() - start_time;
1902 ph10 667 if (extra != NULL) pcre_free_study(extra);
1903 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1904     (((double)time_taken * 1000.0) / (double)timeit) /
1905 nigel 75 (double)CLOCKS_PER_SEC);
1906     }
1907 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1908 nigel 75 if (error != NULL)
1909     fprintf(outfile, "Failed to study: %s\n", error);
1910     else if (extra != NULL)
1911     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1912     }
1913 ph10 512
1914 ph10 510 /* If /K was present, we set up for handling MARK data. */
1915 ph10 512
1916 ph10 510 if (do_mark)
1917     {
1918     if (extra == NULL)
1919     {
1920     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1921     extra->flags = 0;
1922     }
1923 ph10 512 extra->mark = &markptr;
1924 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1925 ph10 512 }
1926 nigel 75
1927     /* If the 'F' option was present, we flip the bytes of all the integer
1928     fields in the regex data block and the study block. This is to make it
1929     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1930     compiled on a different architecture. */
1931    
1932     if (do_flip)
1933     {
1934     real_pcre *rre = (real_pcre *)re;
1935 ph10 259 rre->magic_number =
1936 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1937 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1938     rre->options = byteflip(rre->options, sizeof(rre->options));
1939 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1940 ph10 259 rre->top_bracket =
1941 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1942 ph10 259 rre->top_backref =
1943 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1944 ph10 259 rre->first_byte =
1945 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1946 ph10 259 rre->req_byte =
1947 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1948     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1949 nigel 75 sizeof(rre->name_table_offset));
1950 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1951 nigel 75 sizeof(rre->name_entry_size));
1952 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1953 ph10 255 sizeof(rre->name_count));
1954 nigel 75
1955     if (extra != NULL)
1956     {
1957     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1958     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1959 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1960     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1961 nigel 75 }
1962     }
1963    
1964     /* Extract information from the compiled data if required */
1965    
1966     SHOW_INFO:
1967    
1968 nigel 93 if (do_debug)
1969     {
1970     fprintf(outfile, "------------------------------------------------------------------\n");
1971 ph10 116 pcre_printint(re, outfile, debug_lengths);
1972 nigel 93 }
1973 ph10 416
1974 ph10 412 /* We already have the options in get_options (see above) */
1975 nigel 93
1976 nigel 25 if (do_showinfo)
1977 nigel 3 {
1978 ph10 412 unsigned long int all_options;
1979 nigel 79 #if !defined NOINFOCHECK
1980 nigel 43 int old_first_char, old_options, old_count;
1981 nigel 79 #endif
1982 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1983 ph10 227 hascrorlf;
1984 nigel 63 int nameentrysize, namecount;
1985     const uschar *nametable;
1986 nigel 3
1987 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1988     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1989     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1990 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1991 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1992 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1993     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1994 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1995 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1996     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1997 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1998 nigel 43
1999 nigel 79 #if !defined NOINFOCHECK
2000 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
2001 nigel 3 if (count < 0) fprintf(outfile,
2002 nigel 43 "Error %d from pcre_info()\n", count);
2003 nigel 3 else
2004     {
2005 nigel 43 if (old_count != count) fprintf(outfile,
2006     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2007     old_count);
2008 nigel 37
2009 nigel 43 if (old_first_char != first_char) fprintf(outfile,
2010     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2011     first_char, old_first_char);
2012 nigel 37
2013 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
2014     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2015     get_options, old_options);
2016 nigel 43 }
2017 nigel 79 #endif
2018 nigel 43
2019 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
2020 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2021 nigel 77 (int)size, (int)regex_gotten_store);
2022 nigel 43
2023     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2024     if (backrefmax > 0)
2025     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2026 nigel 63
2027     if (namecount > 0)
2028     {
2029     fprintf(outfile, "Named capturing subpatterns:\n");
2030     while (namecount-- > 0)
2031     {
2032     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2033     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2034     GET2(nametable, 0));
2035     nametable += nameentrysize;
2036     }
2037     }
2038 ph10 172
2039 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2040 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2041 nigel 63
2042 nigel 75 all_options = ((real_pcre *)re)->options;
2043 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2044 nigel 75
2045 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2046 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2047 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2048     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2049     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2050     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2051 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2052 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2053 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2054     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2055 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2056     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2057     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2058 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2059 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2060 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2061 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2062 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2063 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2064 ph10 172
2065 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2066 nigel 43
2067 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2068 nigel 91 {
2069     case PCRE_NEWLINE_CR:
2070     fprintf(outfile, "Forced newline sequence: CR\n");
2071     break;
2072 nigel 43
2073 nigel 91 case PCRE_NEWLINE_LF:
2074     fprintf(outfile, "Forced newline sequence: LF\n");
2075     break;
2076    
2077     case PCRE_NEWLINE_CRLF:
2078     fprintf(outfile, "Forced newline sequence: CRLF\n");
2079     break;
2080    
2081 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2082     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2083     break;
2084    
2085 nigel 93 case PCRE_NEWLINE_ANY:
2086     fprintf(outfile, "Forced newline sequence: ANY\n");
2087     break;
2088    
2089 nigel 91 default:
2090     break;
2091     }
2092    
2093 nigel 43 if (first_char == -1)
2094     {
2095 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2096 nigel 43 }
2097     else if (first_char < 0)
2098     {
2099     fprintf(outfile, "No first char\n");
2100     }
2101     else
2102     {
2103 nigel 63 int ch = first_char & 255;
2104 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2105 nigel 63 "" : " (caseless)";
2106 nigel 93 if (PRINTHEX(ch))
2107 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2108 nigel 3 else
2109 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2110 nigel 43 }
2111 nigel 37
2112 nigel 43 if (need_char < 0)
2113     {
2114     fprintf(outfile, "No need char\n");
2115 nigel 3 }
2116 nigel 43 else
2117     {
2118 nigel 63 int ch = need_char & 255;
2119 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2120 nigel 63 "" : " (caseless)";
2121 nigel 93 if (PRINTHEX(ch))
2122 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2123 nigel 43 else
2124 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2125 nigel 43 }
2126 nigel 75
2127     /* Don't output study size; at present it is in any case a fixed
2128     value, but it varies, depending on the computer architecture, and
2129     so messes up the test suite. (And with the /F option, it might be
2130 ph10 654 flipped.) If study was forced by an external -s, don't show this
2131 ph10 612 information unless -i or -d was also present. This means that, except
2132     when auto-callouts are involved, the output from runs with and without
2133     -s should be identical. */
2134 nigel 75
2135 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2136 nigel 75 {
2137     if (extra == NULL)
2138     fprintf(outfile, "Study returned NULL\n");
2139     else
2140     {
2141     uschar *start_bits = NULL;
2142 ph10 455 int minlength;
2143 ph10 461
2144 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2145 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2146    
2147 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2148     if (start_bits == NULL)
2149 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2150 nigel 75 else
2151     {
2152     int i;
2153     int c = 24;
2154     fprintf(outfile, "Starting byte set: ");
2155     for (i = 0; i < 256; i++)
2156     {
2157     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2158     {
2159     if (c > 75)
2160     {
2161     fprintf(outfile, "\n ");
2162     c = 2;
2163     }
2164 nigel 93 if (PRINTHEX(i) && i != ' ')
2165 nigel 75 {
2166     fprintf(outfile, "%c ", i);
2167     c += 2;
2168     }
2169     else
2170     {
2171     fprintf(outfile, "\\x%02x ", i);
2172     c += 5;
2173     }
2174     }
2175     }
2176     fprintf(outfile, "\n");
2177     }
2178     }
2179 ph10 667
2180     /* Show this only if the JIT was set by /S, not by -s. */
2181    
2182     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2183     {
2184     int jit;
2185     new_info(re, extra, PCRE_INFO_JIT, &jit);
2186     if (jit)
2187     fprintf(outfile, "JIT study was successful\n");
2188     else
2189     #ifdef SUPPORT_JIT
2190     fprintf(outfile, "JIT study was not successful\n");
2191     #else
2192     fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2193     #endif
2194     }
2195 nigel 75 }
2196 nigel 3 }
2197    
2198 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2199     that is all. The first 8 bytes of the file are the regex length and then
2200     the study length, in big-endian order. */
2201 nigel 3
2202 nigel 75 if (to_file != NULL)
2203 nigel 3 {
2204 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2205     if (f == NULL)
2206 nigel 3 {
2207 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2208 nigel 3 }
2209 nigel 75 else
2210     {
2211     uschar sbuf[8];
2212 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2213     sbuf[1] = (uschar)((true_size >> 16) & 255);
2214     sbuf[2] = (uschar)((true_size >> 8) & 255);
2215     sbuf[3] = (uschar)((true_size) & 255);
2216 ph10 259
2217 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2218     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2219     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2220     sbuf[7] = (uschar)((true_study_size) & 255);
2221 nigel 3
2222 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2223     fwrite(re, 1, true_size, f) < true_size)
2224     {
2225     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2226     }
2227 nigel 3 else
2228     {
2229 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2230 ph10 654
2231 ph10 658 /* If there is study data, write it. */
2232 ph10 654
2233 nigel 75 if (extra != NULL)
2234 nigel 3 {
2235 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2236     true_study_size)
2237 nigel 3 {
2238 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2239     strerror(errno));
2240 nigel 3 }
2241 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2242 nigel 3 }
2243     }
2244 nigel 75 fclose(f);
2245 nigel 3 }
2246 nigel 77
2247     new_free(re);
2248 ph10 667 if (extra != NULL) pcre_free_study(extra);
2249 ph10 545 if (locale_set)
2250 ph10 541 {
2251     new_free((void *)tables);
2252     setlocale(LC_CTYPE, "C");
2253 ph10 545 locale_set = 0;
2254     }
2255 nigel 75 continue; /* With next regex */
2256 nigel 3 }
2257 nigel 75 } /* End of non-POSIX compile */
2258 nigel 3
2259     /* Read data lines and test them */
2260    
2261     for (;;)
2262     {
2263 nigel 87 uschar *q;
2264 ph10 147 uschar *bptr;
2265 nigel 57 int *use_offsets = offsets;
2266 nigel 53 int use_size_offsets = size_offsets;
2267 nigel 63 int callout_data = 0;
2268     int callout_data_set = 0;
2269 nigel 3 int count, c;
2270 nigel 29 int copystrings = 0;
2271 ph10 386 int find_match_limit = default_find_match_limit;
2272 nigel 29 int getstrings = 0;
2273     int getlist = 0;
2274 nigel 39 int gmatched = 0;
2275 nigel 35 int start_offset = 0;
2276 ph10 579 int start_offset_sign = 1;
2277 nigel 41 int g_notempty = 0;
2278 nigel 77 int use_dfa = 0;
2279 nigel 3
2280     options = 0;
2281    
2282 nigel 91 *copynames = 0;
2283     *getnames = 0;
2284    
2285     copynamesptr = copynames;
2286     getnamesptr = getnames;
2287    
2288 nigel 63 pcre_callout = callout;
2289     first_callout = 1;
2290 ph10 654 last_callout_mark = NULL;
2291 nigel 63 callout_extra = 0;
2292     callout_count = 0;
2293     callout_fail_count = 999999;
2294     callout_fail_id = -1;
2295 nigel 73 show_malloc = 0;
2296 nigel 63
2297 nigel 91 if (extra != NULL) extra->flags &=
2298     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2299    
2300     len = 0;
2301     for (;;)
2302 nigel 11 {
2303 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2304 nigel 91 {
2305 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2306     {
2307 ph10 545 fprintf(outfile, "\n");
2308 ph10 537 break;
2309 ph10 545 }
2310 nigel 91 done = 1;
2311     goto CONTINUE;
2312     }
2313     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2314     len = (int)strlen((char *)buffer);
2315     if (buffer[len-1] == '\n') break;
2316 nigel 11 }
2317 nigel 3
2318     while (len > 0 && isspace(buffer[len-1])) len--;
2319     buffer[len] = 0;
2320     if (len == 0) break;
2321    
2322     p = buffer;
2323     while (isspace(*p)) p++;
2324    
2325 ph10 147 bptr = q = dbuffer;
2326 nigel 3 while ((c = *p++) != 0)
2327     {
2328     int i = 0;
2329     int n = 0;
2330 nigel 63
2331 nigel 3 if (c == '\\') switch ((c = *p++))
2332     {
2333     case 'a': c = 7; break;
2334     case 'b': c = '\b'; break;
2335     case 'e': c = 27; break;
2336     case 'f': c = '\f'; break;
2337     case 'n': c = '\n'; break;
2338     case 'r': c = '\r'; break;
2339     case 't': c = '\t'; break;
2340     case 'v': c = '\v'; break;
2341    
2342     case '0': case '1': case '2': case '3':
2343     case '4': case '5': case '6': case '7':
2344     c -= '0';
2345     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2346     c = c * 8 + *p++ - '0';
2347 nigel 91
2348     #if !defined NOUTF8
2349     if (use_utf8 && c > 255)
2350     {
2351     unsigned char buff8[8];
2352     int ii, utn;
2353     utn = ord2utf8(c, buff8);
2354     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2355     c = buff8[ii]; /* Last byte */
2356     }
2357     #endif
2358 nigel 3 break;
2359    
2360     case 'x':
2361 nigel 49
2362     /* Handle \x{..} specially - new Perl thing for utf8 */
2363    
2364 nigel 79 #if !defined NOUTF8
2365 nigel 49 if (*p == '{')
2366     {
2367     unsigned char *pt = p;
2368     c = 0;
2369     while (isxdigit(*(++pt)))
2370     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2371     if (*pt == '}')
2372     {
2373 nigel 67 unsigned char buff8[8];
2374 nigel 49 int ii, utn;
2375 ph10 355 if (use_utf8)
2376 ph10 358 {
2377 ph10 355 utn = ord2utf8(c, buff8);
2378     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2379     c = buff8[ii]; /* Last byte */
2380     }
2381     else
2382     {
2383 ph10 358 if (c > 255)
2384 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2385     "UTF-8 mode is not enabled.\n"
2386     "** Truncation will probably give the wrong result.\n", c);
2387 ph10 358 }
2388 nigel 49 p = pt + 1;
2389     break;
2390     }
2391     /* Not correct form; fall through */
2392     }
2393 nigel 79 #endif
2394 nigel 49
2395     /* Ordinary \x */
2396    
2397 nigel 3 c = 0;
2398     while (i++ < 2 && isxdigit(*p))
2399     {
2400     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2401     p++;
2402     }
2403     break;
2404    
2405 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2406 nigel 3 p--;
2407     continue;
2408    
2409 nigel 75 case '>':
2410 ph10 579 if (*p == '-')
2411 ph10 567 {
2412     start_offset_sign = -1;
2413     p++;
2414 ph10 579 }
2415 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2416 ph10 579 start_offset *= start_offset_sign;
2417 nigel 75 continue;
2418    
2419 nigel 3 case 'A': /* Option setting */
2420     options |= PCRE_ANCHORED;
2421     continue;
2422    
2423     case 'B':
2424     options |= PCRE_NOTBOL;
2425     continue;
2426    
2427 nigel 29 case 'C':
2428 nigel 63 if (isdigit(*p)) /* Set copy string */
2429     {
2430     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2431     copystrings |= 1 << n;
2432     }
2433     else if (isalnum(*p))
2434     {
2435 nigel 91 uschar *npp = copynamesptr;
2436 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2437 nigel 91 *npp++ = 0;
2438 nigel 67 *npp = 0;
2439 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2440 nigel 63 if (n < 0)
2441 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2442     copynamesptr = npp;
2443 nigel 63 }
2444     else if (*p == '+')
2445     {
2446     callout_extra = 1;
2447     p++;
2448     }
2449     else if (*p == '-')
2450     {
2451     pcre_callout = NULL;
2452     p++;
2453     }
2454     else if (*p == '!')
2455     {
2456     callout_fail_id = 0;
2457     p++;
2458     while(isdigit(*p))
2459     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2460     callout_fail_count = 0;
2461     if (*p == '!')
2462     {
2463     p++;
2464     while(isdigit(*p))
2465     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2466     }
2467     }
2468     else if (*p == '*')
2469     {
2470     int sign = 1;
2471     callout_data = 0;
2472     if (*(++p) == '-') { sign = -1; p++; }
2473     while(isdigit(*p))
2474     callout_data = callout_data * 10 + *p++ - '0';
2475     callout_data *= sign;
2476     callout_data_set = 1;
2477     }
2478 nigel 29 continue;
2479    
2480 nigel 79 #if !defined NODFA
2481 nigel 77 case 'D':
2482 nigel 79 #if !defined NOPOSIX
2483 nigel 77 if (posix || do_posix)
2484     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2485     else
2486 nigel 79 #endif
2487 nigel 77 use_dfa = 1;
2488     continue;
2489 ph10 553 #endif
2490 nigel 77
2491 ph10 553 #if !defined NODFA
2492 nigel 77 case 'F':
2493     options |= PCRE_DFA_SHORTEST;
2494     continue;
2495 nigel 79 #endif
2496 nigel 77
2497 nigel 29 case 'G':
2498 nigel 63 if (isdigit(*p))
2499     {
2500     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2501     getstrings |= 1 << n;
2502     }
2503     else if (isalnum(*p))
2504     {
2505 nigel 91 uschar *npp = getnamesptr;
2506 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2507 nigel 91 *npp++ = 0;
2508 nigel 67 *npp = 0;
2509 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2510 nigel 63 if (n < 0)
2511 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2512     getnamesptr = npp;
2513 nigel 63 }
2514 nigel 29 continue;
2515 ph10 667
2516     case 'J':
2517     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2518     if (extra != NULL
2519     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2520     && extra->executable_jit != NULL)
2521     {
2522     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2523     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2524 ph10 675 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2525 ph10 667 }
2526     continue;
2527 nigel 29
2528     case 'L':
2529     getlist = 1;
2530     continue;
2531    
2532 nigel 63 case 'M':
2533     find_match_limit = 1;
2534     continue;
2535    
2536 nigel 37 case 'N':
2537 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2538     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2539 ph10 461 else
2540 ph10 442 options |= PCRE_NOTEMPTY;
2541 nigel 37 continue;
2542    
2543 nigel 3 case 'O':
2544     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2545 nigel 53 if (n > size_offsets_max)
2546     {
2547     size_offsets_max = n;
2548 nigel 57 free(offsets);
2549 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2550 nigel 53 if (offsets == NULL)
2551     {
2552     printf("** Failed to get %d bytes of memory for offsets vector\n",
2553 ph10 151 (int)(size_offsets_max * sizeof(int)));
2554 nigel 77 yield = 1;
2555     goto EXIT;
2556 nigel 53 }
2557     }
2558     use_size_offsets = n;
2559 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2560 nigel 3 continue;
2561    
2562 nigel 75 case 'P':
2563 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2564 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2565 nigel 75 continue;
2566    
2567 nigel 91 case 'Q':
2568     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2569     if (extra == NULL)
2570     {
2571     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2572     extra->flags = 0;
2573     }
2574     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2575     extra->match_limit_recursion = n;
2576     continue;
2577    
2578     case 'q':
2579     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2580     if (extra == NULL)
2581     {
2582     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2583     extra->flags = 0;
2584     }
2585     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2586     extra->match_limit = n;
2587     continue;
2588    
2589 nigel 79 #if !defined NODFA
2590 nigel 77 case 'R':
2591     options |= PCRE_DFA_RESTART;
2592     continue;
2593 nigel 79 #endif
2594 nigel 77
2595 nigel 73 case 'S':
2596     show_malloc = 1;
2597     continue;
2598 ph10 392
2599 ph10 389 case 'Y':
2600     options |= PCRE_NO_START_OPTIMIZE;
2601 ph10 392 continue;
2602 nigel 73
2603 nigel 3 case 'Z':
2604     options |= PCRE_NOTEOL;
2605     continue;
2606 nigel 71
2607     case '?':
2608     options |= PCRE_NO_UTF8_CHECK;
2609     continue;
2610 nigel 91
2611     case '<':
2612     {
2613     int x = check_newline(p, outfile);
2614     if (x == 0) goto NEXT_DATA;
2615     options |= x;
2616     while (*p++ != '>');
2617     }
2618     continue;
2619 nigel 3 }
2620 nigel 9 *q++ = c;
2621 nigel 3 }
2622 nigel 9 *q = 0;
2623 ph10 530 len = (int)(q - dbuffer);
2624 ph10 545
2625 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2626 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2627 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2628 ph10 371
2629 ph10 363 #if !defined NOPOSIX
2630     if (posix || do_posix)
2631     {
2632     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2633 ph10 371 bptr += buffer_size - len - 1;
2634 ph10 363 }
2635 ph10 371 else
2636     #endif
2637 ph10 363 {
2638     memmove(bptr + buffer_size - len, bptr, len);
2639 ph10 371 bptr += buffer_size - len;
2640     }
2641 nigel 3
2642 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2643     {
2644     printf("**Match limit not relevant for DFA matching: ignored\n");
2645     find_match_limit = 0;
2646     }
2647    
2648 nigel 3 /* Handle matching via the POSIX interface, which does not
2649 nigel 63 support timing or playing with the match limit or callout data. */
2650 nigel 3
2651 nigel 37 #if !defined NOPOSIX
2652 nigel 3 if (posix || do_posix)
2653     {
2654     int rc;
2655     int eflags = 0;
2656 nigel 63 regmatch_t *pmatch = NULL;
2657     if (use_size_offsets > 0)
2658 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2659 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2660     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2661 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2662 nigel 3
2663 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2664 nigel 3
2665     if (rc != 0)
2666     {
2667 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2668 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2669     }
2670 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2671     != 0)
2672     {
2673     fprintf(outfile, "Matched with REG_NOSUB\n");
2674     }
2675 nigel 3 else
2676     {
2677 nigel 7 size_t i;
2678 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2679 nigel 3 {
2680     if (pmatch[i].rm_so >= 0)
2681     {
2682 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2683 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2684     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2685 nigel 3 fprintf(outfile, "\n");
2686 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2687 nigel 35 {
2688 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2689 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2690     outfile);
2691 nigel 35 fprintf(outfile, "\n");
2692     }
2693 nigel 3 }
2694     }
2695     }
2696 nigel 53 free(pmatch);
2697 nigel 3 }
2698    
2699 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2700 nigel 3
2701 nigel 37 else
2702     #endif /* !defined NOPOSIX */
2703    
2704 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2705 nigel 3 {
2706 ph10 512 markptr = NULL;
2707    
2708 nigel 93 if (timeitm > 0)
2709 nigel 3 {
2710     register int i;
2711     clock_t time_taken;
2712     clock_t start_time = clock();
2713 nigel 77
2714 nigel 79 #if !defined NODFA
2715 nigel 77 if (all_use_dfa || use_dfa)
2716     {
2717     int workspace[1000];
2718 nigel 93 for (i = 0; i < timeitm; i++)
2719 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2720 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2721     sizeof(workspace)/sizeof(int));
2722     }
2723     else
2724 nigel 79 #endif
2725 nigel 77
2726 nigel 93 for (i = 0; i < timeitm; i++)
2727 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2728 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2729 nigel 77
2730 nigel 3 time_taken = clock() - start_time;
2731 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2732     (((double)time_taken * 1000.0) / (double)timeitm) /
2733 nigel 63 (double)CLOCKS_PER_SEC);
2734 nigel 3 }
2735    
2736 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2737 nigel 87 varying limits in order to find the minimum value for the match limit and
2738 ph10 667 for the recursion limit. The match limits are relevant only to the normal
2739     running of pcre_exec(), so disable the JIT optimization. This makes it
2740     possible to run the same set of tests with and without JIT externally
2741     requested. */
2742 nigel 63
2743     if (find_match_limit)
2744     {
2745     if (extra == NULL)
2746     {
2747 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2748 nigel 63 extra->flags = 0;
2749     }
2750 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2751    
2752 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2753 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2754     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2755     PCRE_ERROR_MATCHLIMIT, "match()");
2756 nigel 63
2757 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2758     options|g_notempty, use_offsets, use_size_offsets,
2759     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2760     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2761 nigel 63 }
2762    
2763     /* If callout_data is set, use the interface with additional data */
2764    
2765     else if (callout_data_set)
2766     {
2767     if (extra == NULL)
2768     {
2769 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2770 nigel 63 extra->flags = 0;
2771     }
2772     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2773 nigel 71 extra->callout_data = &callout_data;
2774 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2775     options | g_notempty, use_offsets, use_size_offsets);
2776     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2777     }
2778    
2779     /* The normal case is just to do the match once, with the default
2780     value of match_limit. */
2781    
2782 nigel 79 #if !defined NODFA
2783 nigel 77 else if (all_use_dfa || use_dfa)
2784     {
2785     int workspace[1000];
2786 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2787 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2788     sizeof(workspace)/sizeof(int));
2789     if (count == 0)
2790     {
2791     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2792     count = use_size_offsets/2;
2793     }
2794     }
2795 nigel 79 #endif
2796 nigel 77
2797 nigel 75 else
2798     {
2799     count = pcre_exec(re, extra, (char *)bptr, len,
2800     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2801 nigel 77 if (count == 0)
2802     {
2803     fprintf(outfile, "Matched, but too many substrings\n");
2804     count = use_size_offsets/3;
2805     }
2806 nigel 75 }
2807 nigel 3
2808 nigel 39 /* Matched */
2809    
2810 nigel 3 if (count >= 0)
2811     {
2812 nigel 93 int i, maxcount;
2813    
2814     #if !defined NODFA
2815     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2816     #endif
2817     maxcount = use_size_offsets/3;
2818    
2819     /* This is a check against a lunatic return value. */
2820    
2821     if (count > maxcount)
2822     {
2823     fprintf(outfile,
2824     "** PCRE error: returned count %d is too big for offset size %d\n",
2825     count, use_size_offsets);
2826     count = use_size_offsets/3;
2827     if (do_g || do_G)
2828     {
2829     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2830     do_g = do_G = FALSE; /* Break g/G loop */
2831     }
2832     }
2833 ph10 654
2834 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
2835     unset ones at the end. */
2836 ph10 654
2837 ph10 626 if (do_allcaps)
2838     {
2839     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2840 ph10 654 count++; /* Allow for full match */
2841     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2842     }
2843 nigel 93
2844 ph10 626 /* Output the captured substrings */
2845 ph10 654
2846 nigel 29 for (i = 0; i < count * 2; i += 2)
2847 nigel 3 {
2848 nigel 57 if (use_offsets[i] < 0)
2849 ph10 654 {
2850 ph10 626 if (use_offsets[i] != -1)
2851     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2852 ph10 654 use_offsets[i], i);
2853 ph10 626 if (use_offsets[i+1] != -1)
2854     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2855 ph10 654 use_offsets[i+1], i+1);
2856 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2857 ph10 654 }
2858 nigel 3 else
2859     {
2860     fprintf(outfile, "%2d: ", i/2);
2861 nigel 63 (void)pchars(bptr + use_offsets[i],
2862     use_offsets[i+1] - use_offsets[i], outfile);
2863 nigel 3 fprintf(outfile, "\n");
2864 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2865 nigel 35 {
2866 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2867     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2868     outfile);
2869     fprintf(outfile, "\n");
2870 nigel 35 }
2871 nigel 3 }
2872     }
2873 ph10 512
2874 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2875 nigel 29
2876     for (i = 0; i < 32; i++)
2877     {
2878     if ((copystrings & (1 << i)) != 0)
2879     {
2880 nigel 91 char copybuffer[256];
2881 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2882 nigel 37 i, copybuffer, sizeof(copybuffer));
2883 nigel 29 if (rc < 0)
2884     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2885     else
2886 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2887 nigel 29 }
2888     }
2889    
2890 nigel 91 for (copynamesptr = copynames;
2891     *copynamesptr != 0;
2892     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2893     {
2894     char copybuffer[256];
2895     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2896     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2897     if (rc < 0)
2898     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2899     else
2900     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2901     }
2902    
2903 nigel 29 for (i = 0; i < 32; i++)
2904     {
2905     if ((getstrings & (1 << i)) != 0)
2906     {
2907     const char *substring;
2908 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2909 nigel 29 i, &substring);
2910     if (rc < 0)
2911     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2912     else
2913     {
2914     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2915 nigel 49 pcre_free_substring(substring);
2916 nigel 29 }
2917     }
2918     }
2919    
2920 nigel 91 for (getnamesptr = getnames;
2921     *getnamesptr != 0;
2922     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2923     {
2924     const char *substring;
2925     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2926     count, (char *)getnamesptr, &substring);
2927     if (rc < 0)
2928     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2929     else
2930     {
2931     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2932     pcre_free_substring(substring);
2933     }
2934     }
2935    
2936 nigel 29 if (getlist)
2937     {
2938     const char **stringlist;
2939 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2940 nigel 29 &stringlist);
2941     if (rc < 0)
2942     fprintf(outfile, "get substring list failed %d\n", rc);
2943     else
2944     {
2945     for (i = 0; i < count; i++)
2946     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2947     if (stringlist[i] != NULL)
2948     fprintf(outfile, "string list not terminated by NULL\n");
2949 nigel 49 pcre_free_substring_list(stringlist);
2950 nigel 29 }
2951     }
2952 nigel 39 }
2953 nigel 29
2954 nigel 75 /* There was a partial match */
2955    
2956     else if (count == PCRE_ERROR_PARTIAL)
2957     {
2958 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2959     else fprintf(outfile, "Partial match, mark=%s", markptr);
2960 ph10 426 if (use_size_offsets > 1)
2961     {
2962     fprintf(outfile, ": ");
2963     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2964 ph10 461 outfile);
2965     }
2966 nigel 77 fprintf(outfile, "\n");
2967 nigel 75 break; /* Out of the /g loop */
2968     }
2969    
2970 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2971 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2972     to advance the start offset, and continue. We won't be at the end of the
2973     string - that was checked before setting g_notempty.
2974 nigel 39
2975 ph10 566 Complication arises in the case when the newline convention is "any",
2976 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2977     terminated by CRLF, an advance of one character just passes the \r,
2978 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2979 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2980     newline setting in the pattern; if none was set, use pcre_config() to
2981 ph10 566 find the default.
2982 ph10 144
2983 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2984     character, not one byte. */
2985    
2986 nigel 3 else
2987     {
2988 nigel 41 if (g_notempty != 0)
2989 nigel 35 {
2990 nigel 73 int onechar = 1;
2991 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2992 nigel 57 use_offsets[0] = start_offset;
2993 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2994     {
2995     int d;
2996     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2997 ph10 391 /* Note that these values are always the ASCII ones, even in
2998     EBCDIC environments. CR = 13, NL = 10. */
2999     obits = (d == 13)? PCRE_NEWLINE_CR :
3000     (d == 10)? PCRE_NEWLINE_LF :
3001     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3002 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3003 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
3004     }
3005 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3006 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3007 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3008 ph10 149 &&
3009 ph10 143 start_offset < len - 1 &&
3010     bptr[start_offset] == '\r' &&
3011     bptr[start_offset+1] == '\n')
3012 ph10 144 onechar++;
3013 ph10 143 else if (use_utf8)
3014 nigel 73 {
3015     while (start_offset + onechar < len)
3016     {
3017 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3018 ph10 579 onechar++;
3019 nigel 73 }
3020     }
3021     use_offsets[1] = start_offset + onechar;
3022 nigel 35 }
3023 nigel 41 else
3024     {
3025 ph10 598 switch(count)
3026 ph10 654 {
3027 ph10 598 case PCRE_ERROR_NOMATCH:
3028 ph10 512 if (gmatched == 0)
3029 ph10 510 {
3030     if (markptr == NULL) fprintf(outfile, "No match\n");
3031     else fprintf(outfile, "No match, mark = %s\n", markptr);
3032 ph10 512 }
3033 ph10 598 break;
3034 ph10 654
3035 ph10 598 case PCRE_ERROR_BADUTF8:
3036     case PCRE_ERROR_SHORTUTF8:
3037     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3038     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3039     if (use_size_offsets >= 2)
3040 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3041 ph10 598 use_offsets[1]);
3042 ph10 654 fprintf(outfile, "\n");
3043     break;
3044    
3045 ph10 598 default:
3046 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3047 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3048 ph10 654 else
3049     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3050 ph10 598 break;
3051 nigel 41 }
3052 ph10 654
3053 nigel 41 break; /* Out of the /g loop */
3054     }
3055 nigel 3 }
3056 nigel 35
3057 nigel 39 /* If not /g or /G we are done */
3058    
3059     if (!do_g && !do_G) break;
3060    
3061 nigel 41 /* If we have matched an empty string, first check to see if we are at
3062 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3063     Perl's /g options does. This turns out to be rather cunning. First we set
3064     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3065 nigel 47 same point. If this fails (picked up above) we advance to the next
3066 ph10 143 character. */
3067 ph10 142
3068 nigel 41 g_notempty = 0;
3069 ph10 142
3070 nigel 57 if (use_offsets[0] == use_offsets[1])
3071 nigel 41 {
3072 nigel 57 if (use_offsets[0] == len) break;
3073 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3074 nigel 41 }
3075 nigel 39
3076     /* For /g, update the start offset, leaving the rest alone */
3077    
3078 ph10 143 if (do_g) start_offset = use_offsets[1];
3079 nigel 39
3080     /* For /G, update the pointer and length */
3081    
3082     else
3083 nigel 35 {
3084 ph10 143 bptr += use_offsets[1];
3085     len -= use_offsets[1];
3086 nigel 35 }
3087 nigel 39 } /* End of loop for /g and /G */
3088 nigel 91
3089     NEXT_DATA: continue;
3090 nigel 39 } /* End of loop for data lines */
3091 nigel 3
3092 nigel 11 CONTINUE:
3093 nigel 37
3094     #if !defined NOPOSIX
3095 nigel 3 if (posix || do_posix) regfree(&preg);
3096 nigel 37 #endif
3097    
3098 nigel 77 if (re != NULL) new_free(re);
3099 ph10 667 if (extra != NULL) pcre_free_study(extra);
3100 ph10 541 if (locale_set)
3101 nigel 25 {
3102 nigel 77 new_free((void *)tables);
3103 nigel 25 setlocale(LC_CTYPE, "C");
3104 nigel 93 locale_set = 0;
3105 nigel 25 }
3106 ph10 667 if (jit_stack != NULL)
3107     {
3108     pcre_jit_stack_free(jit_stack);
3109     jit_stack = NULL;
3110     }
3111 nigel 3 }
3112    
3113 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3114 nigel 77
3115     EXIT:
3116    
3117     if (infile != NULL && infile != stdin) fclose(infile);
3118     if (outfile != NULL && outfile != stdout) fclose(outfile);
3119    
3120     free(buffer);
3121     free(dbuffer);
3122     free(pbuffer);
3123     free(offsets);
3124    
3125     return yield;
3126 nigel 3 }
3127    
3128 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12