/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 667 - (hide annotations) (download)
Mon Aug 22 14:57:32 2011 UTC (2 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 93548 byte(s)
Commit all the changes for JIT support, but without any documentation yet.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 ph10 667 #define _pcre_ucp_typerange ucp_typerange
116 nigel 85 #define _pcre_utf8_table1 utf8_table1
117     #define _pcre_utf8_table1_size utf8_table1_size
118     #define _pcre_utf8_table2 utf8_table2
119     #define _pcre_utf8_table3 utf8_table3
120     #define _pcre_utf8_table4 utf8_table4
121 ph10 667 #define _pcre_utf8_char_sizes utf8_char_sizes
122 nigel 85 #define _pcre_utt utt
123     #define _pcre_utt_size utt_size
124 ph10 240 #define _pcre_utt_names utt_names
125 nigel 85 #define _pcre_OP_lengths OP_lengths
126    
127     #include "pcre_tables.c"
128    
129     /* We also need the pcre_printint() function for printing out compiled
130     patterns. This function is in a separate file so that it can be included in
131 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 ph10 498 know which case is being compiled. */
133 nigel 85
134 ph10 498 #define COMPILING_PCRETEST
135     #include "pcre_printint.src"
136    
137     /* The definition of the macro PRINTABLE, which determines whether to print an
138 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
139 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
140     locale has not been explicitly changed, so as to get consistent output from
141     systems that differ in their output from isprint() even in the "C" locale. */
142 nigel 93
143     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144 nigel 85
145 nigel 37 /* It is possible to compile this test program without including support for
146     testing the POSIX interface, though this is not available via the standard
147     Makefile. */
148    
149     #if !defined NOPOSIX
150 nigel 3 #include "pcreposix.h"
151 nigel 37 #endif
152 nigel 3
153 ph10 107 /* It is also possible, for the benefit of the version currently imported into
154     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155     interface to the DFA matcher (NODFA), and without the doublecheck of the old
156     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157     UTF8 support if PCRE is built without it. */
158 nigel 79
159 ph10 107 #ifndef SUPPORT_UTF8
160     #ifndef NOUTF8
161     #define NOUTF8
162     #endif
163     #endif
164 nigel 79
165 ph10 107
166 nigel 85 /* Other parameters */
167    
168 nigel 3 #ifndef CLOCKS_PER_SEC
169     #ifdef CLK_TCK
170     #define CLOCKS_PER_SEC CLK_TCK
171     #else
172     #define CLOCKS_PER_SEC 100
173     #endif
174     #endif
175    
176 nigel 93 /* This is the default loop count for timing. */
177    
178 nigel 75 #define LOOPREPEAT 500000
179 nigel 3
180 nigel 85 /* Static variables */
181    
182 nigel 3 static FILE *outfile;
183     static int log_store = 0;
184 nigel 63 static int callout_count;
185     static int callout_extra;
186     static int callout_fail_count;
187     static int callout_fail_id;
188 ph10 210 static int debug_lengths;
189 nigel 63 static int first_callout;
190 nigel 93 static int locale_set = 0;
191 nigel 73 static int show_malloc;
192 nigel 67 static int use_utf8;
193 nigel 43 static size_t gotten_store;
194 ph10 645 static const unsigned char *last_callout_mark = NULL;
195 nigel 3
196 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
197    
198     static int buffer_size = 50000;
199     static uschar *buffer = NULL;
200     static uschar *dbuffer = NULL;
201 nigel 75 static uschar *pbuffer = NULL;
202 nigel 3
203 ph10 598 /* Textual explanations for runtime error codes */
204 nigel 75
205 ph10 598 static const char *errtexts[] = {
206     NULL, /* 0 is no error */
207     NULL, /* NOMATCH is handled specially */
208     "NULL argument passed",
209     "bad option value",
210     "magic number missing",
211     "unknown opcode - pattern overwritten?",
212     "no more memory",
213 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 ph10 598 "match limit exceeded",
215     "callout error code",
216     NULL, /* BADUTF8 is handled specially */
217     "bad UTF-8 offset",
218     NULL, /* PARTIAL is handled specially */
219     "not used - internal error",
220     "internal error - pattern overwritten?",
221     "bad count value",
222     "item unsupported for DFA matching",
223     "backreference condition or recursion test not supported for DFA matching",
224     "match limit not supported for DFA matching",
225     "workspace size exceeded in DFA matching",
226 ph10 654 "too much recursion for DFA matching",
227 ph10 598 "recursion limit exceeded",
228     "not used - internal error",
229     "invalid combination of newline options",
230     "bad offset value",
231 ph10 642 NULL, /* SHORTUTF8 is handled specially */
232     "nested recursion at the same subject position"
233 ph10 598 };
234    
235 ph10 654
236 ph10 541 /*************************************************
237     * Alternate character tables *
238     *************************************************/
239 nigel 49
240 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
241     using the default tables of the library. However, the T option can be used to
242     select alternate sets of tables, for different kinds of testing. Note also that
243 ph10 541 the L (locale) option also adjusts the tables. */
244    
245 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
246 ph10 541 only ASCII characters. */
247    
248     static const unsigned char tables0[] = {
249    
250     /* This table is a lower casing table. */
251    
252     0, 1, 2, 3, 4, 5, 6, 7,
253     8, 9, 10, 11, 12, 13, 14, 15,
254     16, 17, 18, 19, 20, 21, 22, 23,
255     24, 25, 26, 27, 28, 29, 30, 31,
256     32, 33, 34, 35, 36, 37, 38, 39,
257     40, 41, 42, 43, 44, 45, 46, 47,
258     48, 49, 50, 51, 52, 53, 54, 55,
259     56, 57, 58, 59, 60, 61, 62, 63,
260     64, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122, 91, 92, 93, 94, 95,
264     96, 97, 98, 99,100,101,102,103,
265     104,105,106,107,108,109,110,111,
266     112,113,114,115,116,117,118,119,
267     120,121,122,123,124,125,126,127,
268     128,129,130,131,132,133,134,135,
269     136,137,138,139,140,141,142,143,
270     144,145,146,147,148,149,150,151,
271     152,153,154,155,156,157,158,159,
272     160,161,162,163,164,165,166,167,
273     168,169,170,171,172,173,174,175,
274     176,177,178,179,180,181,182,183,
275     184,185,186,187,188,189,190,191,
276     192,193,194,195,196,197,198,199,
277     200,201,202,203,204,205,206,207,
278     208,209,210,211,212,213,214,215,
279     216,217,218,219,220,221,222,223,
280     224,225,226,227,228,229,230,231,
281     232,233,234,235,236,237,238,239,
282     240,241,242,243,244,245,246,247,
283     248,249,250,251,252,253,254,255,
284    
285     /* This table is a case flipping table. */
286    
287     0, 1, 2, 3, 4, 5, 6, 7,
288     8, 9, 10, 11, 12, 13, 14, 15,
289     16, 17, 18, 19, 20, 21, 22, 23,
290     24, 25, 26, 27, 28, 29, 30, 31,
291     32, 33, 34, 35, 36, 37, 38, 39,
292     40, 41, 42, 43, 44, 45, 46, 47,
293     48, 49, 50, 51, 52, 53, 54, 55,
294     56, 57, 58, 59, 60, 61, 62, 63,
295     64, 97, 98, 99,100,101,102,103,
296     104,105,106,107,108,109,110,111,
297     112,113,114,115,116,117,118,119,
298     120,121,122, 91, 92, 93, 94, 95,
299     96, 65, 66, 67, 68, 69, 70, 71,
300     72, 73, 74, 75, 76, 77, 78, 79,
301     80, 81, 82, 83, 84, 85, 86, 87,
302     88, 89, 90,123,124,125,126,127,
303     128,129,130,131,132,133,134,135,
304     136,137,138,139,140,141,142,143,
305     144,145,146,147,148,149,150,151,
306     152,153,154,155,156,157,158,159,
307     160,161,162,163,164,165,166,167,
308     168,169,170,171,172,173,174,175,
309     176,177,178,179,180,181,182,183,
310     184,185,186,187,188,189,190,191,
311     192,193,194,195,196,197,198,199,
312     200,201,202,203,204,205,206,207,
313     208,209,210,211,212,213,214,215,
314     216,217,218,219,220,221,222,223,
315     224,225,226,227,228,229,230,231,
316     232,233,234,235,236,237,238,239,
317     240,241,242,243,244,245,246,247,
318     248,249,250,251,252,253,254,255,
319    
320     /* This table contains bit maps for various character classes. Each map is 32
321     bytes long and the bits run from the least significant end of each byte. The
322     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
323     graph, print, punct, and cntrl. Other classes are built from combinations. */
324    
325     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
326     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
331     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
342     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344    
345     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
347     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349    
350     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
351     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
352     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354    
355     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
356     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
357     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359    
360     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
361     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
362     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364    
365     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
366     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369    
370     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
371     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
372     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374    
375     /* This table identifies various classes of character by individual bits:
376     0x01 white space character
377     0x02 letter
378     0x04 decimal digit
379     0x08 hexadecimal digit
380     0x10 alphanumeric or '_'
381     0x80 regular expression metacharacter or binary zero
382     */
383    
384     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
385     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
386     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
387     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
388     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
389     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
390     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
391     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
392     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
393     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
395     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
396     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
397     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
398     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
399     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
413     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
414     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
415     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
416    
417 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
418     be at least an approximation of ISO 8859. In particular, there are characters
419 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
420    
421     static const unsigned char tables1[] = {
422     0,1,2,3,4,5,6,7,
423     8,9,10,11,12,13,14,15,
424     16,17,18,19,20,21,22,23,
425     24,25,26,27,28,29,30,31,
426     32,33,34,35,36,37,38,39,
427     40,41,42,43,44,45,46,47,
428     48,49,50,51,52,53,54,55,
429     56,57,58,59,60,61,62,63,
430     64,97,98,99,100,101,102,103,
431     104,105,106,107,108,109,110,111,
432     112,113,114,115,116,117,118,119,
433     120,121,122,91,92,93,94,95,
434     96,97,98,99,100,101,102,103,
435     104,105,106,107,108,109,110,111,
436     112,113,114,115,116,117,118,119,
437     120,121,122,123,124,125,126,127,
438     128,129,130,131,132,133,134,135,
439     136,137,138,139,140,141,142,143,
440     144,145,146,147,148,149,150,151,
441     152,153,154,155,156,157,158,159,
442     160,161,162,163,164,165,166,167,
443     168,169,170,171,172,173,174,175,
444     176,177,178,179,180,181,182,183,
445     184,185,186,187,188,189,190,191,
446     224,225,226,227,228,229,230,231,
447     232,233,234,235,236,237,238,239,
448     240,241,242,243,244,245,246,215,
449     248,249,250,251,252,253,254,223,
450     224,225,226,227,228,229,230,231,
451     232,233,234,235,236,237,238,239,
452     240,241,242,243,244,245,246,247,
453     248,249,250,251,252,253,254,255,
454     0,1,2,3,4,5,6,7,
455     8,9,10,11,12,13,14,15,
456     16,17,18,19,20,21,22,23,
457     24,25,26,27,28,29,30,31,
458     32,33,34,35,36,37,38,39,
459     40,41,42,43,44,45,46,47,
460     48,49,50,51,52,53,54,55,
461     56,57,58,59,60,61,62,63,
462     64,97,98,99,100,101,102,103,
463     104,105,106,107,108,109,110,111,
464     112,113,114,115,116,117,118,119,
465     120,121,122,91,92,93,94,95,
466     96,65,66,67,68,69,70,71,
467     72,73,74,75,76,77,78,79,
468     80,81,82,83,84,85,86,87,
469     88,89,90,123,124,125,126,127,
470     128,129,130,131,132,133,134,135,
471     136,137,138,139,140,141,142,143,
472     144,145,146,147,148,149,150,151,
473     152,153,154,155,156,157,158,159,
474     160,161,162,163,164,165,166,167,
475     168,169,170,171,172,173,174,175,
476     176,177,178,179,180,181,182,183,
477     184,185,186,187,188,189,190,191,
478     224,225,226,227,228,229,230,231,
479     232,233,234,235,236,237,238,239,
480     240,241,242,243,244,245,246,215,
481     248,249,250,251,252,253,254,223,
482     192,193,194,195,196,197,198,199,
483     200,201,202,203,204,205,206,207,
484     208,209,210,211,212,213,214,247,
485     216,217,218,219,220,221,222,255,
486     0,62,0,0,1,0,0,0,
487     0,0,0,0,0,0,0,0,
488     32,0,0,0,1,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,255,3,
491     126,0,0,0,126,0,0,0,
492     0,0,0,0,0,0,0,0,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,255,3,
495     0,0,0,0,0,0,0,0,
496     0,0,0,0,0,0,12,2,
497     0,0,0,0,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     254,255,255,7,0,0,0,0,
500     0,0,0,0,0,0,0,0,
501     255,255,127,127,0,0,0,0,
502     0,0,0,0,0,0,0,0,
503     0,0,0,0,254,255,255,7,
504     0,0,0,0,0,4,32,4,
505     0,0,0,128,255,255,127,255,
506     0,0,0,0,0,0,255,3,
507     254,255,255,135,254,255,255,7,
508     0,0,0,0,0,4,44,6,
509     255,255,127,255,255,255,127,255,
510     0,0,0,0,254,255,255,255,
511     255,255,255,255,255,255,255,127,
512     0,0,0,0,254,255,255,255,
513     255,255,255,255,255,255,255,255,
514     0,2,0,0,255,255,255,255,
515     255,255,255,255,255,255,255,127,
516     0,0,0,0,255,255,255,255,
517     255,255,255,255,255,255,255,255,
518     0,0,0,0,254,255,0,252,
519     1,0,0,248,1,0,0,120,
520     0,0,0,0,254,255,255,255,
521     0,0,128,0,0,0,128,0,
522     255,255,255,255,0,0,0,0,
523     0,0,0,0,0,0,0,128,
524     255,255,255,255,0,0,0,0,
525     0,0,0,0,0,0,0,0,
526     128,0,0,0,0,0,0,0,
527     0,1,1,0,1,1,0,0,
528     0,0,0,0,0,0,0,0,
529     0,0,0,0,0,0,0,0,
530     1,0,0,0,128,0,0,0,
531     128,128,128,128,0,0,128,0,
532     28,28,28,28,28,28,28,28,
533     28,28,0,0,0,0,0,128,
534     0,26,26,26,26,26,26,18,
535     18,18,18,18,18,18,18,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,128,128,0,128,16,
538     0,26,26,26,26,26,26,18,
539     18,18,18,18,18,18,18,18,
540     18,18,18,18,18,18,18,18,
541     18,18,18,128,128,0,0,0,
542     0,0,0,0,0,1,0,0,
543     0,0,0,0,0,0,0,0,
544     0,0,0,0,0,0,0,0,
545     0,0,0,0,0,0,0,0,
546     1,0,0,0,0,0,0,0,
547     0,0,18,0,0,0,0,0,
548     0,0,20,20,0,18,0,0,
549     0,20,18,0,0,0,0,0,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,0,
553     18,18,18,18,18,18,18,18,
554     18,18,18,18,18,18,18,18,
555     18,18,18,18,18,18,18,18,
556     18,18,18,18,18,18,18,0,
557     18,18,18,18,18,18,18,18
558     };
559    
560    
561    
562 ph10 558
563     #ifndef HAVE_STRERROR
564 nigel 49 /*************************************************
565 ph10 558 * Provide strerror() for non-ANSI libraries *
566     *************************************************/
567    
568     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
569     in their libraries, but can provide the same facility by this simple
570     alternative function. */
571    
572     extern int sys_nerr;
573     extern char *sys_errlist[];
574    
575     char *
576     strerror(int n)
577     {
578     if (n < 0 || n >= sys_nerr) return "unknown error number";
579     return sys_errlist[n];
580     }
581     #endif /* HAVE_STRERROR */
582    
583    
584 ph10 667 /*************************************************
585     * JIT memory callback *
586     *************************************************/
587 ph10 558
588 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
589     {
590     return (pcre_jit_stack *)arg;
591     }
592 ph10 558
593 ph10 667
594 ph10 558 /*************************************************
595 nigel 91 * Read or extend an input line *
596     *************************************************/
597    
598     /* Input lines are read into buffer, but both patterns and data lines can be
599     continued over multiple input lines. In addition, if the buffer fills up, we
600     want to automatically expand it so as to be able to handle extremely large
601     lines that are needed for certain stress tests. When the input buffer is
602     expanded, the other two buffers must also be expanded likewise, and the
603     contents of pbuffer, which are a copy of the input for callouts, must be
604     preserved (for when expansion happens for a data line). This is not the most
605     optimal way of handling this, but hey, this is just a test program!
606    
607     Arguments:
608     f the file to read
609     start where in buffer to start (this *must* be within buffer)
610 ph10 287 prompt for stdin or readline()
611 nigel 91
612     Returns: pointer to the start of new data
613     could be a copy of start, or could be moved
614     NULL if no data read and EOF reached
615     */
616    
617     static uschar *
618 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
619 nigel 91 {
620     uschar *here = start;
621    
622     for (;;)
623     {
624 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
625 nigel 93
626 nigel 91 if (rlen > 1000)
627     {
628     int dlen;
629 ph10 289
630 ph10 287 /* If libreadline support is required, use readline() to read a line if the
631     input is a terminal. Note that readline() removes the trailing newline, so
632     we must put it back again, to be compatible with fgets(). */
633 ph10 289
634 ph10 287 #ifdef SUPPORT_LIBREADLINE
635     if (isatty(fileno(f)))
636     {
637 ph10 289 size_t len;
638 ph10 287 char *s = readline(prompt);
639     if (s == NULL) return (here == start)? NULL : start;
640     len = strlen(s);
641 ph10 289 if (len > 0) add_history(s);
642 ph10 287 if (len > rlen - 1) len = rlen - 1;
643     memcpy(here, s, len);
644     here[len] = '\n';
645 ph10 289 here[len+1] = 0;
646     free(s);
647 ph10 287 }
648 ph10 289 else
649     #endif
650    
651 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
652 ph10 289
653 ph10 287 {
654 ph10 516 if (f == stdin) printf("%s", prompt);
655 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
656     return (here == start)? NULL : start;
657 ph10 289 }
658    
659 nigel 91 dlen = (int)strlen((char *)here);
660     if (dlen > 0 && here[dlen - 1] == '\n') return start;
661     here += dlen;
662     }
663    
664     else
665     {
666     int new_buffer_size = 2*buffer_size;
667     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
668     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
669     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
670    
671     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
672     {
673     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
674     exit(1);
675     }
676    
677     memcpy(new_buffer, buffer, buffer_size);
678     memcpy(new_pbuffer, pbuffer, buffer_size);
679    
680     buffer_size = new_buffer_size;
681    
682     start = new_buffer + (start - buffer);
683     here = new_buffer + (here - buffer);
684    
685     free(buffer);
686     free(dbuffer);
687     free(pbuffer);
688    
689     buffer = new_buffer;
690     dbuffer = new_dbuffer;
691     pbuffer = new_pbuffer;
692     }
693     }
694    
695     return NULL; /* Control never gets here */
696     }
697    
698    
699    
700    
701    
702    
703    
704     /*************************************************
705 nigel 63 * Read number from string *
706     *************************************************/
707    
708     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
709     around with conditional compilation, just do the job by hand. It is only used
710 nigel 93 for unpicking arguments, so just keep it simple.
711 nigel 63
712     Arguments:
713     str string to be converted
714     endptr where to put the end pointer
715    
716     Returns: the unsigned long
717     */
718    
719     static int
720     get_value(unsigned char *str, unsigned char **endptr)
721     {
722     int result = 0;
723     while(*str != 0 && isspace(*str)) str++;
724     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
725     *endptr = str;
726     return(result);
727     }
728    
729    
730    
731 nigel 49
732     /*************************************************
733     * Convert UTF-8 string to value *
734     *************************************************/
735    
736     /* This function takes one or more bytes that represents a UTF-8 character,
737     and returns the value of the character.
738    
739     Argument:
740 nigel 91 utf8bytes a pointer to the byte vector
741     vptr a pointer to an int to receive the value
742 nigel 49
743 nigel 91 Returns: > 0 => the number of bytes consumed
744     -6 to 0 => malformed UTF-8 character at offset = (-return)
745 nigel 49 */
746    
747 nigel 79 #if !defined NOUTF8
748    
749 nigel 67 static int
750 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
751 nigel 49 {
752 nigel 91 int c = *utf8bytes++;
753 nigel 49 int d = c;
754     int i, j, s;
755    
756     for (i = -1; i < 6; i++) /* i is number of additional bytes */
757     {
758     if ((d & 0x80) == 0) break;
759     d <<= 1;
760     }
761    
762     if (i == -1) { *vptr = c; return 1; } /* ascii character */
763     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
764    
765     /* i now has a value in the range 1-5 */
766    
767 nigel 59 s = 6*i;
768 nigel 85 d = (c & utf8_table3[i]) << s;
769 nigel 49
770     for (j = 0; j < i; j++)
771     {
772 nigel 91 c = *utf8bytes++;
773 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
774 nigel 59 s -= 6;
775 nigel 49 d |= (c & 0x3f) << s;
776     }
777    
778     /* Check that encoding was the correct unique one */
779    
780 nigel 85 for (j = 0; j < utf8_table1_size; j++)
781     if (d <= utf8_table1[j]) break;
782 nigel 49 if (j != i) return -(i+1);
783    
784     /* Valid value */
785    
786     *vptr = d;
787     return i+1;
788     }
789    
790 nigel 79 #endif
791 nigel 49
792    
793 nigel 79
794 nigel 63 /*************************************************
795 nigel 85 * Convert character value to UTF-8 *
796     *************************************************/
797    
798     /* This function takes an integer value in the range 0 - 0x7fffffff
799     and encodes it as a UTF-8 character in 0 to 6 bytes.
800    
801     Arguments:
802     cvalue the character value
803 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
804 nigel 85
805     Returns: number of characters placed in the buffer
806     */
807    
808 nigel 93 #if !defined NOUTF8
809    
810 nigel 85 static int
811 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
812 nigel 85 {
813     register int i, j;
814     for (i = 0; i < utf8_table1_size; i++)
815     if (cvalue <= utf8_table1[i]) break;
816 nigel 91 utf8bytes += i;
817 nigel 85 for (j = i; j > 0; j--)
818     {
819 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
820 nigel 85 cvalue >>= 6;
821     }
822 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
823 nigel 85 return i + 1;
824     }
825    
826 nigel 93 #endif
827 nigel 85
828    
829 nigel 93
830 nigel 85 /*************************************************
831 nigel 63 * Print character string *
832     *************************************************/
833 nigel 49
834 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
835     mode. Yields number of characters printed. If handed a NULL file, just counts
836     chars without printing. */
837 nigel 49
838 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
839 nigel 3 {
840 nigel 85 int c = 0;
841 nigel 63 int yield = 0;
842 nigel 3
843 nigel 63 while (length-- > 0)
844 nigel 3 {
845 nigel 79 #if !defined NOUTF8
846 nigel 67 if (use_utf8)
847 nigel 63 {
848     int rc = utf82ord(p, &c);
849 nigel 3
850 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
851     {
852     length -= rc - 1;
853     p += rc;
854 nigel 93 if (PRINTHEX(c))
855 nigel 63 {
856     if (f != NULL) fprintf(f, "%c", c);
857     yield++;
858     }
859     else
860     {
861 nigel 93 int n = 4;
862     if (f != NULL) fprintf(f, "\\x{%02x}", c);
863     yield += (n <= 0x000000ff)? 2 :
864     (n <= 0x00000fff)? 3 :
865     (n <= 0x0000ffff)? 4 :
866     (n <= 0x000fffff)? 5 : 6;
867 nigel 63 }
868     continue;
869     }
870     }
871 nigel 79 #endif
872 nigel 3
873 nigel 63 /* Not UTF-8, or malformed UTF-8 */
874    
875 nigel 93 c = *p++;
876     if (PRINTHEX(c))
877 nigel 3 {
878 nigel 63 if (f != NULL) fprintf(f, "%c", c);
879     yield++;
880 nigel 3 }
881 nigel 63 else
882 nigel 3 {
883 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
884     yield += 4;
885     }
886     }
887 nigel 3
888 nigel 63 return yield;
889     }
890 nigel 23
891 nigel 3
892 nigel 23
893 nigel 63 /*************************************************
894     * Callout function *
895     *************************************************/
896 nigel 3
897 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
898     the match. Yield zero unless more callouts than the fail count, or the callout
899     data is not zero. */
900 nigel 3
901 nigel 63 static int callout(pcre_callout_block *cb)
902     {
903     FILE *f = (first_callout | callout_extra)? outfile : NULL;
904 nigel 75 int i, pre_start, post_start, subject_length;
905 nigel 3
906 nigel 63 if (callout_extra)
907     {
908     fprintf(f, "Callout %d: last capture = %d\n",
909     cb->callout_number, cb->capture_last);
910 nigel 3
911 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
912     {
913     if (cb->offset_vector[i] < 0)
914     fprintf(f, "%2d: <unset>\n", i/2);
915     else
916     {
917     fprintf(f, "%2d: ", i/2);
918     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
919     cb->offset_vector[i+1] - cb->offset_vector[i], f);
920     fprintf(f, "\n");
921     }
922     }
923     }
924 nigel 3
925 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
926     datails. On subsequent calls in the same match, we use pchars just to find the
927     printed lengths of the substrings. */
928 nigel 3
929 nigel 63 if (f != NULL) fprintf(f, "--->");
930 nigel 3
931 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
932     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
933     cb->current_position - cb->start_match, f);
934 nigel 3
935 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
936    
937 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
938     cb->subject_length - cb->current_position, f);
939 nigel 3
940 nigel 63 if (f != NULL) fprintf(f, "\n");
941 nigel 9
942 nigel 63 /* Always print appropriate indicators, with callout number if not already
943 nigel 75 shown. For automatic callouts, show the pattern offset. */
944 nigel 3
945 nigel 75 if (cb->callout_number == 255)
946     {
947     fprintf(outfile, "%+3d ", cb->pattern_position);
948     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
949     }
950     else
951     {
952     if (callout_extra) fprintf(outfile, " ");
953     else fprintf(outfile, "%3d ", cb->callout_number);
954     }
955 nigel 3
956 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
957     fprintf(outfile, "^");
958 nigel 3
959 nigel 63 if (post_start > 0)
960     {
961     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
962     fprintf(outfile, "^");
963 nigel 3 }
964    
965 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
966     fprintf(outfile, " ");
967    
968     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
969     pbuffer + cb->pattern_position);
970    
971 nigel 63 fprintf(outfile, "\n");
972     first_callout = 0;
973 nigel 3
974 ph10 654 if (cb->mark != last_callout_mark)
975 ph10 645 {
976 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
977 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
978 ph10 654 last_callout_mark = cb->mark;
979     }
980 ph10 645
981 nigel 71 if (cb->callout_data != NULL)
982 nigel 49 {
983 nigel 71 int callout_data = *((int *)(cb->callout_data));
984     if (callout_data != 0)
985     {
986     fprintf(outfile, "Callout data = %d\n", callout_data);
987     return callout_data;
988     }
989 nigel 63 }
990 nigel 49
991 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
992     (++callout_count >= callout_fail_count)? 1 : 0;
993 nigel 3 }
994    
995    
996 nigel 63 /*************************************************
997 nigel 73 * Local malloc functions *
998 nigel 63 *************************************************/
999 nigel 3
1000 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1001     compiled re. The show_malloc variable is set only during matching. */
1002 nigel 3
1003     static void *new_malloc(size_t size)
1004     {
1005 nigel 73 void *block = malloc(size);
1006 nigel 43 gotten_store = size;
1007 nigel 73 if (show_malloc)
1008 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1009 nigel 73 return block;
1010 nigel 3 }
1011    
1012 nigel 73 static void new_free(void *block)
1013     {
1014     if (show_malloc)
1015     fprintf(outfile, "free %p\n", block);
1016     free(block);
1017     }
1018 nigel 3
1019 nigel 73 /* For recursion malloc/free, to test stacking calls */
1020    
1021     static void *stack_malloc(size_t size)
1022     {
1023     void *block = malloc(size);
1024     if (show_malloc)
1025 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1026 nigel 73 return block;
1027     }
1028    
1029     static void stack_free(void *block)
1030     {
1031     if (show_malloc)
1032     fprintf(outfile, "stack_free %p\n", block);
1033     free(block);
1034     }
1035    
1036    
1037 nigel 63 /*************************************************
1038     * Call pcre_fullinfo() *
1039     *************************************************/
1040 nigel 43
1041     /* Get one piece of information from the pcre_fullinfo() function */
1042    
1043     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1044     {
1045     int rc;
1046     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1047     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1048     }
1049    
1050    
1051    
1052 nigel 63 /*************************************************
1053 nigel 75 * Byte flipping function *
1054     *************************************************/
1055    
1056 nigel 91 static unsigned long int
1057     byteflip(unsigned long int value, int n)
1058 nigel 75 {
1059     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1060     return ((value & 0x000000ff) << 24) |
1061     ((value & 0x0000ff00) << 8) |
1062     ((value & 0x00ff0000) >> 8) |
1063     ((value & 0xff000000) >> 24);
1064     }
1065    
1066    
1067    
1068    
1069     /*************************************************
1070 nigel 87 * Check match or recursion limit *
1071     *************************************************/
1072    
1073     static int
1074     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1075     int start_offset, int options, int *use_offsets, int use_size_offsets,
1076     int flag, unsigned long int *limit, int errnumber, const char *msg)
1077     {
1078     int count;
1079     int min = 0;
1080     int mid = 64;
1081     int max = -1;
1082    
1083     extra->flags |= flag;
1084    
1085     for (;;)
1086     {
1087     *limit = mid;
1088    
1089     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1090     use_offsets, use_size_offsets);
1091    
1092     if (count == errnumber)
1093     {
1094     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1095     min = mid;
1096     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1097     }
1098    
1099     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1100     count == PCRE_ERROR_PARTIAL)
1101     {
1102     if (mid == min + 1)
1103     {
1104     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1105     break;
1106     }
1107     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1108     max = mid;
1109     mid = (min + mid)/2;
1110     }
1111     else break; /* Some other error */
1112     }
1113    
1114     extra->flags &= ~flag;
1115     return count;
1116     }
1117    
1118    
1119    
1120     /*************************************************
1121 ph10 227 * Case-independent strncmp() function *
1122     *************************************************/
1123    
1124     /*
1125     Arguments:
1126     s first string
1127     t second string
1128     n number of characters to compare
1129    
1130     Returns: < 0, = 0, or > 0, according to the comparison
1131     */
1132    
1133     static int
1134     strncmpic(uschar *s, uschar *t, int n)
1135     {
1136     while (n--)
1137     {
1138     int c = tolower(*s++) - tolower(*t++);
1139     if (c) return c;
1140     }
1141     return 0;
1142     }
1143    
1144    
1145    
1146     /*************************************************
1147 nigel 91 * Check newline indicator *
1148     *************************************************/
1149    
1150 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1151     a message and return 0 if there is no match.
1152 nigel 91
1153     Arguments:
1154     p points after the leading '<'
1155     f file for error message
1156    
1157     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1158     */
1159    
1160     static int
1161     check_newline(uschar *p, FILE *f)
1162     {
1163 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1164     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1165     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1166     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1167     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1168 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1169     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1170 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1171     return 0;
1172     }
1173    
1174    
1175    
1176     /*************************************************
1177 nigel 93 * Usage function *
1178     *************************************************/
1179    
1180     static void
1181     usage(void)
1182     {
1183 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1184     printf("Input and output default to stdin and stdout.\n");
1185     #ifdef SUPPORT_LIBREADLINE
1186     printf("If input is a terminal, readline() is used to read from it.\n");
1187     #else
1188     printf("This version of pcretest is not linked with readline().\n");
1189     #endif
1190     printf("\nOptions:\n");
1191 nigel 93 printf(" -b show compiled code (bytecode)\n");
1192     printf(" -C show PCRE compile-time options and exit\n");
1193     printf(" -d debug: show compiled code and information (-b and -i)\n");
1194     #if !defined NODFA
1195     printf(" -dfa force DFA matching for all subjects\n");
1196     #endif
1197     printf(" -help show usage information\n");
1198     printf(" -i show information about compiled patterns\n"
1199 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1200 nigel 93 " -m output memory used information\n"
1201     " -o <n> set size of offsets vector to <n>\n");
1202     #if !defined NOPOSIX
1203     printf(" -p use POSIX interface\n");
1204     #endif
1205     printf(" -q quiet: do not output PCRE version number at start\n");
1206     printf(" -S <n> set stack size to <n> megabytes\n");
1207 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1208     " -s+ force each pattern to be studied, using JIT if available\n"
1209 nigel 93 " -t time compilation and execution\n");
1210     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1211     printf(" -tm time execution (matching) only\n");
1212     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1213     }
1214    
1215    
1216    
1217     /*************************************************
1218 nigel 63 * Main Program *
1219     *************************************************/
1220 nigel 43
1221 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1222     consist of a regular expression, in delimiters and optionally followed by
1223     options, followed by a set of test data, terminated by an empty line. */
1224    
1225     int main(int argc, char **argv)
1226     {
1227     FILE *infile = stdin;
1228     int options = 0;
1229     int study_options = 0;
1230 ph10 386 int default_find_match_limit = FALSE;
1231 nigel 3 int op = 1;
1232     int timeit = 0;
1233 nigel 93 int timeitm = 0;
1234 nigel 3 int showinfo = 0;
1235 nigel 31 int showstore = 0;
1236 ph10 667 int force_study = -1;
1237     int force_study_options = 0;
1238 nigel 87 int quiet = 0;
1239 nigel 53 int size_offsets = 45;
1240     int size_offsets_max;
1241 nigel 77 int *offsets = NULL;
1242 nigel 53 #if !defined NOPOSIX
1243 nigel 3 int posix = 0;
1244 nigel 53 #endif
1245 nigel 3 int debug = 0;
1246 nigel 11 int done = 0;
1247 nigel 77 int all_use_dfa = 0;
1248     int yield = 0;
1249 nigel 91 int stack_size;
1250 nigel 3
1251 ph10 667 pcre_jit_stack *jit_stack = NULL;
1252    
1253    
1254 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1255     that 1024 is plenty long enough for the few names we'll be testing. */
1256 nigel 69
1257 nigel 91 uschar copynames[1024];
1258     uschar getnames[1024];
1259    
1260     uschar *copynamesptr;
1261     uschar *getnamesptr;
1262    
1263 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1264 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1265 nigel 69
1266 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1267     dbuffer = (unsigned char *)malloc(buffer_size);
1268     pbuffer = (unsigned char *)malloc(buffer_size);
1269 nigel 69
1270 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1271 nigel 3
1272 nigel 93 outfile = stdout;
1273    
1274     /* The following _setmode() stuff is some Windows magic that tells its runtime
1275     library to translate CRLF into a single LF character. At least, that's what
1276     I've been told: never having used Windows I take this all on trust. Originally
1277     it set 0x8000, but then I was advised that _O_BINARY was better. */
1278    
1279 nigel 75 #if defined(_WIN32) || defined(WIN32)
1280 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1281     #endif
1282 nigel 75
1283 nigel 3 /* Scan options */
1284    
1285     while (argc > 1 && argv[op][0] == '-')
1286     {
1287 nigel 63 unsigned char *endptr;
1288 nigel 53
1289 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1290 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1291     else if (strcmp(argv[op], "-s+") == 0)
1292     {
1293     force_study = 1;
1294     force_study_options = PCRE_STUDY_JIT_COMPILE;
1295     }
1296 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1297 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1298 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1299     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1300 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1301 nigel 79 #if !defined NODFA
1302 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1303 nigel 79 #endif
1304 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1305 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1306     *endptr == 0))
1307 nigel 53 {
1308     op++;
1309     argc--;
1310     }
1311 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1312     {
1313     int both = argv[op][2] == 0;
1314     int temp;
1315     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1316     *endptr == 0))
1317     {
1318     timeitm = temp;
1319     op++;
1320     argc--;
1321     }
1322     else timeitm = LOOPREPEAT;
1323     if (both) timeit = timeitm;
1324     }
1325 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1326     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1327     *endptr == 0))
1328     {
1329 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1330 nigel 91 printf("PCRE: -S not supported on this OS\n");
1331     exit(1);
1332     #else
1333     int rc;
1334     struct rlimit rlim;
1335     getrlimit(RLIMIT_STACK, &rlim);
1336     rlim.rlim_cur = stack_size * 1024 * 1024;
1337     rc = setrlimit(RLIMIT_STACK, &rlim);
1338     if (rc != 0)
1339     {
1340     printf("PCRE: setrlimit() failed with error %d\n", rc);
1341     exit(1);
1342     }
1343     op++;
1344     argc--;
1345     #endif
1346     }
1347 nigel 53 #if !defined NOPOSIX
1348 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1349 nigel 53 #endif
1350 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1351     {
1352     int rc;
1353 ph10 392 unsigned long int lrc;
1354 nigel 63 printf("PCRE version %s\n", pcre_version());
1355     printf("Compiled with\n");
1356     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1357     printf(" %sUTF-8 support\n", rc? "" : "No ");
1358 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1359     printf(" %sUnicode properties support\n", rc? "" : "No ");
1360 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1361     printf(" %sJust-in-time compiler support\n", rc? "" : "No ");
1362 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1363 ph10 391 /* Note that these values are always the ASCII values, even
1364 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1365 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1366     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1367 ph10 150 (rc == -2)? "ANYCRLF" :
1368 nigel 93 (rc == -1)? "ANY" : "???");
1369 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1370     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1371     "all Unicode newlines");
1372 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1373     printf(" Internal link size = %d\n", rc);
1374     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1375     printf(" POSIX malloc threshold = %d\n", rc);
1376 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1377     printf(" Default match limit = %ld\n", lrc);
1378     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1379     printf(" Default recursion depth limit = %ld\n", lrc);
1380 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1381     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1382 ph10 121 goto EXIT;
1383 nigel 63 }
1384 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1385     strcmp(argv[op], "--help") == 0)
1386     {
1387     usage();
1388     goto EXIT;
1389     }
1390 nigel 3 else
1391     {
1392 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1393 nigel 93 usage();
1394 nigel 77 yield = 1;
1395     goto EXIT;
1396 nigel 3 }
1397     op++;
1398     argc--;
1399     }
1400    
1401 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1402    
1403     size_offsets_max = size_offsets;
1404 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1405 nigel 53 if (offsets == NULL)
1406     {
1407     printf("** Failed to get %d bytes of memory for offsets vector\n",
1408 ph10 151 (int)(size_offsets_max * sizeof(int)));
1409 nigel 77 yield = 1;
1410     goto EXIT;
1411 nigel 53 }
1412    
1413 nigel 3 /* Sort out the input and output files */
1414    
1415     if (argc > 1)
1416     {
1417 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1418 nigel 3 if (infile == NULL)
1419     {
1420     printf("** Failed to open %s\n", argv[op]);
1421 nigel 77 yield = 1;
1422     goto EXIT;
1423 nigel 3 }
1424     }
1425    
1426     if (argc > 2)
1427     {
1428 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1429 nigel 3 if (outfile == NULL)
1430     {
1431     printf("** Failed to open %s\n", argv[op+1]);
1432 nigel 77 yield = 1;
1433     goto EXIT;
1434 nigel 3 }
1435     }
1436    
1437     /* Set alternative malloc function */
1438    
1439     pcre_malloc = new_malloc;
1440 nigel 73 pcre_free = new_free;
1441     pcre_stack_malloc = stack_malloc;
1442     pcre_stack_free = stack_free;
1443 nigel 3
1444 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1445 nigel 3
1446 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1447 nigel 3
1448     /* Main loop */
1449    
1450 nigel 11 while (!done)
1451 nigel 3 {
1452     pcre *re = NULL;
1453     pcre_extra *extra = NULL;
1454 nigel 37
1455     #if !defined NOPOSIX /* There are still compilers that require no indent */
1456 nigel 3 regex_t preg;
1457 nigel 45 int do_posix = 0;
1458 nigel 37 #endif
1459    
1460 nigel 7 const char *error;
1461 ph10 512 unsigned char *markptr;
1462 nigel 25 unsigned char *p, *pp, *ppp;
1463 nigel 75 unsigned char *to_file = NULL;
1464 nigel 53 const unsigned char *tables = NULL;
1465 nigel 75 unsigned long int true_size, true_study_size = 0;
1466     size_t size, regex_gotten_store;
1467 ph10 654 int do_allcaps = 0;
1468 ph10 512 int do_mark = 0;
1469 nigel 3 int do_study = 0;
1470 ph10 654 int no_force_study = 0;
1471 nigel 25 int do_debug = debug;
1472 nigel 35 int do_G = 0;
1473     int do_g = 0;
1474 nigel 25 int do_showinfo = showinfo;
1475 nigel 35 int do_showrest = 0;
1476 ph10 616 int do_showcaprest = 0;
1477 nigel 75 int do_flip = 0;
1478 nigel 93 int erroroffset, len, delimiter, poffset;
1479 nigel 3
1480 nigel 67 use_utf8 = 0;
1481 ph10 211 debug_lengths = 1;
1482 nigel 63
1483 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1484 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1485 nigel 63 fflush(outfile);
1486 nigel 3
1487     p = buffer;
1488     while (isspace(*p)) p++;
1489     if (*p == 0) continue;
1490    
1491 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1492 nigel 3
1493 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1494     {
1495 nigel 91 unsigned long int magic, get_options;
1496 nigel 75 uschar sbuf[8];
1497     FILE *f;
1498    
1499     p++;
1500     pp = p + (int)strlen((char *)p);
1501     while (isspace(pp[-1])) pp--;
1502     *pp = 0;
1503    
1504     f = fopen((char *)p, "rb");
1505     if (f == NULL)
1506     {
1507     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1508     continue;
1509     }
1510    
1511     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1512    
1513     true_size =
1514     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1515     true_study_size =
1516     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1517    
1518     re = (real_pcre *)new_malloc(true_size);
1519     regex_gotten_store = gotten_store;
1520    
1521     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1522    
1523     magic = ((real_pcre *)re)->magic_number;
1524     if (magic != MAGIC_NUMBER)
1525     {
1526     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1527     {
1528     do_flip = 1;
1529     }
1530     else
1531     {
1532     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1533     fclose(f);
1534     continue;
1535     }
1536     }
1537    
1538 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1539 nigel 75 do_flip? " (byte-inverted)" : "", p);
1540    
1541     /* Need to know if UTF-8 for printing data strings */
1542    
1543 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1544     use_utf8 = (get_options & PCRE_UTF8) != 0;
1545 nigel 75
1546 ph10 612 /* Now see if there is any following study data. */
1547 nigel 75
1548     if (true_study_size != 0)
1549     {
1550     pcre_study_data *psd;
1551    
1552     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1553     extra->flags = PCRE_EXTRA_STUDY_DATA;
1554    
1555     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1556     extra->study_data = psd;
1557    
1558     if (fread(psd, 1, true_study_size, f) != true_study_size)
1559     {
1560     FAIL_READ:
1561     fprintf(outfile, "Failed to read data from %s\n", p);
1562 ph10 667 if (extra != NULL) pcre_free_study(extra);
1563 nigel 75 if (re != NULL) new_free(re);
1564     fclose(f);
1565     continue;
1566     }
1567     fprintf(outfile, "Study data loaded from %s\n", p);
1568     do_study = 1; /* To get the data output if requested */
1569     }
1570     else fprintf(outfile, "No study data\n");
1571    
1572     fclose(f);
1573     goto SHOW_INFO;
1574     }
1575    
1576     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1577     the pattern; if is isn't complete, read more. */
1578    
1579 nigel 3 delimiter = *p++;
1580    
1581 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1582 nigel 3 {
1583 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1584 nigel 3 goto SKIP_DATA;
1585     }
1586    
1587     pp = p;
1588 ph10 530 poffset = (int)(p - buffer);
1589 nigel 3
1590     for(;;)
1591     {
1592 nigel 29 while (*pp != 0)
1593     {
1594     if (*pp == '\\' && pp[1] != 0) pp++;
1595     else if (*pp == delimiter) break;
1596     pp++;
1597     }
1598 nigel 3 if (*pp != 0) break;
1599 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1600 nigel 3 {
1601     fprintf(outfile, "** Unexpected EOF\n");
1602 nigel 11 done = 1;
1603     goto CONTINUE;
1604 nigel 3 }
1605 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1606 nigel 3 }
1607    
1608 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1609     pointer to the correct relative point in the buffer. */
1610    
1611     p = buffer + poffset;
1612    
1613 nigel 29 /* If the first character after the delimiter is backslash, make
1614     the pattern end with backslash. This is purely to provide a way
1615     of testing for the error message when a pattern ends with backslash. */
1616    
1617     if (pp[1] == '\\') *pp++ = '\\';
1618    
1619 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1620     for callouts. */
1621 nigel 3
1622     *pp++ = 0;
1623 nigel 75 strcpy((char *)pbuffer, (char *)p);
1624 nigel 3
1625     /* Look for options after final delimiter */
1626    
1627     options = 0;
1628 nigel 31 log_store = showstore; /* default from command line */
1629    
1630 nigel 3 while (*pp != 0)
1631     {
1632     switch (*pp++)
1633     {
1634 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1635 nigel 35 case 'g': do_g = 1; break;
1636 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1637     case 'm': options |= PCRE_MULTILINE; break;
1638     case 's': options |= PCRE_DOTALL; break;
1639     case 'x': options |= PCRE_EXTENDED; break;
1640 nigel 25
1641 ph10 616 case '+':
1642 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1643 ph10 616 break;
1644 ph10 654
1645     case '=': do_allcaps = 1; break;
1646 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1647 nigel 93 case 'B': do_debug = 1; break;
1648 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1649 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1650 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1651 nigel 75 case 'F': do_flip = 1; break;
1652 nigel 35 case 'G': do_G = 1; break;
1653 nigel 25 case 'I': do_showinfo = 1; break;
1654 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1655 ph10 512 case 'K': do_mark = 1; break;
1656 nigel 31 case 'M': log_store = 1; break;
1657 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1658 nigel 37
1659     #if !defined NOPOSIX
1660 nigel 3 case 'P': do_posix = 1; break;
1661 nigel 37 #endif
1662    
1663 ph10 654 case 'S':
1664 ph10 667 if (do_study == 0)
1665 ph10 612 {
1666 ph10 667 do_study = 1;
1667     if (*pp == '+')
1668     {
1669     study_options |= PCRE_STUDY_JIT_COMPILE;
1670     pp++;
1671     }
1672     }
1673     else
1674     {
1675 ph10 612 do_study = 0;
1676     no_force_study = 1;
1677 ph10 654 }
1678 ph10 612 break;
1679    
1680 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1681 ph10 535 case 'W': options |= PCRE_UCP; break;
1682 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1683 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1684 ph10 126 case 'Z': debug_lengths = 0; break;
1685 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1686 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1687 ph10 545
1688 ph10 541 case 'T':
1689     switch (*pp++)
1690     {
1691     case '0': tables = tables0; break;
1692     case '1': tables = tables1; break;
1693 ph10 545
1694 ph10 541 case '\r':
1695     case '\n':
1696 ph10 545 case ' ':
1697     case 0:
1698 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1699 ph10 545 goto SKIP_DATA;
1700    
1701     default:
1702 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1703 ph10 545 goto SKIP_DATA;
1704 ph10 541 }
1705 ph10 545 break;
1706 nigel 25
1707     case 'L':
1708     ppp = pp;
1709 nigel 93 /* The '\r' test here is so that it works on Windows. */
1710     /* The '0' test is just in case this is an unterminated line. */
1711     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1712 nigel 25 *ppp = 0;
1713     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1714     {
1715     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1716     goto SKIP_DATA;
1717     }
1718 nigel 93 locale_set = 1;
1719 nigel 25 tables = pcre_maketables();
1720     pp = ppp;
1721     break;
1722    
1723 nigel 75 case '>':
1724     to_file = pp;
1725     while (*pp != 0) pp++;
1726     while (isspace(pp[-1])) pp--;
1727     *pp = 0;
1728     break;
1729    
1730 nigel 91 case '<':
1731     {
1732 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1733 ph10 336 {
1734     options |= PCRE_JAVASCRIPT_COMPAT;
1735 ph10 345 pp += 3;
1736 ph10 336 }
1737     else
1738 ph10 345 {
1739 ph10 336 int x = check_newline(pp, outfile);
1740     if (x == 0) goto SKIP_DATA;
1741     options |= x;
1742     while (*pp++ != '>');
1743 ph10 345 }
1744 nigel 91 }
1745     break;
1746    
1747 nigel 77 case '\r': /* So that it works in Windows */
1748     case '\n':
1749     case ' ':
1750     break;
1751 nigel 75
1752 nigel 3 default:
1753     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1754     goto SKIP_DATA;
1755     }
1756     }
1757    
1758 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1759 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1760     local character tables. */
1761 nigel 3
1762 nigel 37 #if !defined NOPOSIX
1763 nigel 3 if (posix || do_posix)
1764     {
1765     int rc;
1766     int cflags = 0;
1767 nigel 75
1768 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1769     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1770 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1771 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1772     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1773 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1774 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1775 nigel 87
1776 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1777    
1778     /* Compilation failed; go back for another re, skipping to blank line
1779     if non-interactive. */
1780    
1781     if (rc != 0)
1782     {
1783 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1784 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1785     goto SKIP_DATA;
1786     }
1787     }
1788    
1789     /* Handle compiling via the native interface */
1790    
1791     else
1792 nigel 37 #endif /* !defined NOPOSIX */
1793    
1794 nigel 3 {
1795 ph10 412 unsigned long int get_options;
1796 ph10 416
1797 nigel 93 if (timeit > 0)
1798 nigel 3 {
1799     register int i;
1800     clock_t time_taken;
1801     clock_t start_time = clock();
1802 nigel 93 for (i = 0; i < timeit; i++)
1803 nigel 3 {
1804 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1805 nigel 3 if (re != NULL) free(re);
1806     }
1807     time_taken = clock() - start_time;
1808 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1809     (((double)time_taken * 1000.0) / (double)timeit) /
1810 nigel 63 (double)CLOCKS_PER_SEC);
1811 nigel 3 }
1812    
1813 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1814 nigel 3
1815     /* Compilation failed; go back for another re, skipping to blank line
1816     if non-interactive. */
1817    
1818     if (re == NULL)
1819     {
1820     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1821     SKIP_DATA:
1822     if (infile != stdin)
1823     {
1824     for (;;)
1825     {
1826 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1827 nigel 11 {
1828     done = 1;
1829     goto CONTINUE;
1830     }
1831 nigel 3 len = (int)strlen((char *)buffer);
1832     while (len > 0 && isspace(buffer[len-1])) len--;
1833     if (len == 0) break;
1834     }
1835     fprintf(outfile, "\n");
1836     }
1837 nigel 25 goto CONTINUE;
1838 nigel 3 }
1839 ph10 416
1840     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1841     within the regex; check for this so that we know how to process the data
1842 ph10 412 lines. */
1843 ph10 416
1844 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1845     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1846 nigel 3
1847 ph10 412 /* Print information if required. There are now two info-returning
1848     functions. The old one has a limited interface and returns only limited
1849     data. Check that it agrees with the newer one. */
1850 nigel 3
1851 nigel 63 if (log_store)
1852     fprintf(outfile, "Memory allocation (code space): %d\n",
1853     (int)(gotten_store -
1854     sizeof(real_pcre) -
1855     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1856    
1857 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1858     and remember the store that was got. */
1859    
1860     true_size = ((real_pcre *)re)->size;
1861     regex_gotten_store = gotten_store;
1862    
1863 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1864 ph10 654 help with the matching, unless the pattern has the SS option, which
1865 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
1866     never sensible). */
1867 nigel 75
1868 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
1869 nigel 75 {
1870 nigel 93 if (timeit > 0)
1871 nigel 75 {
1872     register int i;
1873     clock_t time_taken;
1874     clock_t start_time = clock();
1875 nigel 93 for (i = 0; i < timeit; i++)
1876 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1877 nigel 75 time_taken = clock() - start_time;
1878 ph10 667 if (extra != NULL) pcre_free_study(extra);
1879 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1880     (((double)time_taken * 1000.0) / (double)timeit) /
1881 nigel 75 (double)CLOCKS_PER_SEC);
1882     }
1883 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1884 nigel 75 if (error != NULL)
1885     fprintf(outfile, "Failed to study: %s\n", error);
1886     else if (extra != NULL)
1887     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1888     }
1889 ph10 512
1890 ph10 510 /* If /K was present, we set up for handling MARK data. */
1891 ph10 512
1892 ph10 510 if (do_mark)
1893     {
1894     if (extra == NULL)
1895     {
1896     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1897     extra->flags = 0;
1898     }
1899 ph10 512 extra->mark = &markptr;
1900 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1901 ph10 512 }
1902 nigel 75
1903     /* If the 'F' option was present, we flip the bytes of all the integer
1904     fields in the regex data block and the study block. This is to make it
1905     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1906     compiled on a different architecture. */
1907    
1908     if (do_flip)
1909     {
1910     real_pcre *rre = (real_pcre *)re;
1911 ph10 259 rre->magic_number =
1912 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1913 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1914     rre->options = byteflip(rre->options, sizeof(rre->options));
1915 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1916 ph10 259 rre->top_bracket =
1917 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1918 ph10 259 rre->top_backref =
1919 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1920 ph10 259 rre->first_byte =
1921 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1922 ph10 259 rre->req_byte =
1923 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1924     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1925 nigel 75 sizeof(rre->name_table_offset));
1926 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1927 nigel 75 sizeof(rre->name_entry_size));
1928 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1929 ph10 255 sizeof(rre->name_count));
1930 nigel 75
1931     if (extra != NULL)
1932     {
1933     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1934     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1935 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1936     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1937 nigel 75 }
1938     }
1939    
1940     /* Extract information from the compiled data if required */
1941    
1942     SHOW_INFO:
1943    
1944 nigel 93 if (do_debug)
1945     {
1946     fprintf(outfile, "------------------------------------------------------------------\n");
1947 ph10 116 pcre_printint(re, outfile, debug_lengths);
1948 nigel 93 }
1949 ph10 416
1950 ph10 412 /* We already have the options in get_options (see above) */
1951 nigel 93
1952 nigel 25 if (do_showinfo)
1953 nigel 3 {
1954 ph10 412 unsigned long int all_options;
1955 nigel 79 #if !defined NOINFOCHECK
1956 nigel 43 int old_first_char, old_options, old_count;
1957 nigel 79 #endif
1958 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1959 ph10 227 hascrorlf;
1960 nigel 63 int nameentrysize, namecount;
1961     const uschar *nametable;
1962 nigel 3
1963 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1964     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1965     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1966 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1967 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1968 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1969     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1970 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1971 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1972     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1973 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1974 nigel 43
1975 nigel 79 #if !defined NOINFOCHECK
1976 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1977 nigel 3 if (count < 0) fprintf(outfile,
1978 nigel 43 "Error %d from pcre_info()\n", count);
1979 nigel 3 else
1980     {
1981 nigel 43 if (old_count != count) fprintf(outfile,
1982     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1983     old_count);
1984 nigel 37
1985 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1986     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1987     first_char, old_first_char);
1988 nigel 37
1989 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1990     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1991     get_options, old_options);
1992 nigel 43 }
1993 nigel 79 #endif
1994 nigel 43
1995 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1996 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1997 nigel 77 (int)size, (int)regex_gotten_store);
1998 nigel 43
1999     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2000     if (backrefmax > 0)
2001     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2002 nigel 63
2003     if (namecount > 0)
2004     {
2005     fprintf(outfile, "Named capturing subpatterns:\n");
2006     while (namecount-- > 0)
2007     {
2008     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2009     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2010     GET2(nametable, 0));
2011     nametable += nameentrysize;
2012     }
2013     }
2014 ph10 172
2015 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2016 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2017 nigel 63
2018 nigel 75 all_options = ((real_pcre *)re)->options;
2019 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2020 nigel 75
2021 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2022 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2023 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2024     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2025     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2026     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2027 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2028 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2029 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2030     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2031 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2032     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2033     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2034 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2035 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2036 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2037 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2038 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2039 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2040 ph10 172
2041 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2042 nigel 43
2043 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2044 nigel 91 {
2045     case PCRE_NEWLINE_CR:
2046     fprintf(outfile, "Forced newline sequence: CR\n");
2047     break;
2048 nigel 43
2049 nigel 91 case PCRE_NEWLINE_LF:
2050     fprintf(outfile, "Forced newline sequence: LF\n");
2051     break;
2052    
2053     case PCRE_NEWLINE_CRLF:
2054     fprintf(outfile, "Forced newline sequence: CRLF\n");
2055     break;
2056    
2057 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2058     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2059     break;
2060    
2061 nigel 93 case PCRE_NEWLINE_ANY:
2062     fprintf(outfile, "Forced newline sequence: ANY\n");
2063     break;
2064    
2065 nigel 91 default:
2066     break;
2067     }
2068    
2069 nigel 43 if (first_char == -1)
2070     {
2071 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2072 nigel 43 }
2073     else if (first_char < 0)
2074     {
2075     fprintf(outfile, "No first char\n");
2076     }
2077     else
2078     {
2079 nigel 63 int ch = first_char & 255;
2080 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2081 nigel 63 "" : " (caseless)";
2082 nigel 93 if (PRINTHEX(ch))
2083 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2084 nigel 3 else
2085 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2086 nigel 43 }
2087 nigel 37
2088 nigel 43 if (need_char < 0)
2089     {
2090     fprintf(outfile, "No need char\n");
2091 nigel 3 }
2092 nigel 43 else
2093     {
2094 nigel 63 int ch = need_char & 255;
2095 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2096 nigel 63 "" : " (caseless)";
2097 nigel 93 if (PRINTHEX(ch))
2098 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2099 nigel 43 else
2100 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2101 nigel 43 }
2102 nigel 75
2103     /* Don't output study size; at present it is in any case a fixed
2104     value, but it varies, depending on the computer architecture, and
2105     so messes up the test suite. (And with the /F option, it might be
2106 ph10 654 flipped.) If study was forced by an external -s, don't show this
2107 ph10 612 information unless -i or -d was also present. This means that, except
2108     when auto-callouts are involved, the output from runs with and without
2109     -s should be identical. */
2110 nigel 75
2111 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2112 nigel 75 {
2113     if (extra == NULL)
2114     fprintf(outfile, "Study returned NULL\n");
2115     else
2116     {
2117     uschar *start_bits = NULL;
2118 ph10 455 int minlength;
2119 ph10 461
2120 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2121 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2122    
2123 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2124     if (start_bits == NULL)
2125 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2126 nigel 75 else
2127     {
2128     int i;
2129     int c = 24;
2130     fprintf(outfile, "Starting byte set: ");
2131     for (i = 0; i < 256; i++)
2132     {
2133     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2134     {
2135     if (c > 75)
2136     {
2137     fprintf(outfile, "\n ");
2138     c = 2;
2139     }
2140 nigel 93 if (PRINTHEX(i) && i != ' ')
2141 nigel 75 {
2142     fprintf(outfile, "%c ", i);
2143     c += 2;
2144     }
2145     else
2146     {
2147     fprintf(outfile, "\\x%02x ", i);
2148     c += 5;
2149     }
2150     }
2151     }
2152     fprintf(outfile, "\n");
2153     }
2154     }
2155 ph10 667
2156     /* Show this only if the JIT was set by /S, not by -s. */
2157    
2158     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2159     {
2160     int jit;
2161     new_info(re, extra, PCRE_INFO_JIT, &jit);
2162     if (jit)
2163     fprintf(outfile, "JIT study was successful\n");
2164     else
2165     #ifdef SUPPORT_JIT
2166     fprintf(outfile, "JIT study was not successful\n");
2167     #else
2168     fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2169     #endif
2170     }
2171 nigel 75 }
2172 nigel 3 }
2173    
2174 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2175     that is all. The first 8 bytes of the file are the regex length and then
2176     the study length, in big-endian order. */
2177 nigel 3
2178 nigel 75 if (to_file != NULL)
2179 nigel 3 {
2180 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2181     if (f == NULL)
2182 nigel 3 {
2183 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2184 nigel 3 }
2185 nigel 75 else
2186     {
2187     uschar sbuf[8];
2188 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2189     sbuf[1] = (uschar)((true_size >> 16) & 255);
2190     sbuf[2] = (uschar)((true_size >> 8) & 255);
2191     sbuf[3] = (uschar)((true_size) & 255);
2192 ph10 259
2193 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2194     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2195     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2196     sbuf[7] = (uschar)((true_study_size) & 255);
2197 nigel 3
2198 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2199     fwrite(re, 1, true_size, f) < true_size)
2200     {
2201     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2202     }
2203 nigel 3 else
2204     {
2205 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2206 ph10 654
2207 ph10 658 /* If there is study data, write it. */
2208 ph10 654
2209 nigel 75 if (extra != NULL)
2210 nigel 3 {
2211 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2212     true_study_size)
2213 nigel 3 {
2214 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2215     strerror(errno));
2216 nigel 3 }
2217 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2218 nigel 3 }
2219     }
2220 nigel 75 fclose(f);
2221 nigel 3 }
2222 nigel 77
2223     new_free(re);
2224 ph10 667 if (extra != NULL) pcre_free_study(extra);
2225 ph10 545 if (locale_set)
2226 ph10 541 {
2227     new_free((void *)tables);
2228     setlocale(LC_CTYPE, "C");
2229 ph10 545 locale_set = 0;
2230     }
2231 nigel 75 continue; /* With next regex */
2232 nigel 3 }
2233 nigel 75 } /* End of non-POSIX compile */
2234 nigel 3
2235     /* Read data lines and test them */
2236    
2237     for (;;)
2238     {
2239 nigel 87 uschar *q;
2240 ph10 147 uschar *bptr;
2241 nigel 57 int *use_offsets = offsets;
2242 nigel 53 int use_size_offsets = size_offsets;
2243 nigel 63 int callout_data = 0;
2244     int callout_data_set = 0;
2245 nigel 3 int count, c;
2246 nigel 29 int copystrings = 0;
2247 ph10 386 int find_match_limit = default_find_match_limit;
2248 nigel 29 int getstrings = 0;
2249     int getlist = 0;
2250 nigel 39 int gmatched = 0;
2251 nigel 35 int start_offset = 0;
2252 ph10 579 int start_offset_sign = 1;
2253 nigel 41 int g_notempty = 0;
2254 nigel 77 int use_dfa = 0;
2255 nigel 3
2256     options = 0;
2257    
2258 nigel 91 *copynames = 0;
2259     *getnames = 0;
2260    
2261     copynamesptr = copynames;
2262     getnamesptr = getnames;
2263    
2264 nigel 63 pcre_callout = callout;
2265     first_callout = 1;
2266 ph10 654 last_callout_mark = NULL;
2267 nigel 63 callout_extra = 0;
2268     callout_count = 0;
2269     callout_fail_count = 999999;
2270     callout_fail_id = -1;
2271 nigel 73 show_malloc = 0;
2272 nigel 63
2273 nigel 91 if (extra != NULL) extra->flags &=
2274     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2275    
2276     len = 0;
2277     for (;;)
2278 nigel 11 {
2279 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2280 nigel 91 {
2281 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2282     {
2283 ph10 545 fprintf(outfile, "\n");
2284 ph10 537 break;
2285 ph10 545 }
2286 nigel 91 done = 1;
2287     goto CONTINUE;
2288     }
2289     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2290     len = (int)strlen((char *)buffer);
2291     if (buffer[len-1] == '\n') break;
2292 nigel 11 }
2293 nigel 3
2294     while (len > 0 && isspace(buffer[len-1])) len--;
2295     buffer[len] = 0;
2296     if (len == 0) break;
2297    
2298     p = buffer;
2299     while (isspace(*p)) p++;
2300    
2301 ph10 147 bptr = q = dbuffer;
2302 nigel 3 while ((c = *p++) != 0)
2303     {
2304     int i = 0;
2305     int n = 0;
2306 nigel 63
2307 nigel 3 if (c == '\\') switch ((c = *p++))
2308     {
2309     case 'a': c = 7; break;
2310     case 'b': c = '\b'; break;
2311     case 'e': c = 27; break;
2312     case 'f': c = '\f'; break;
2313     case 'n': c = '\n'; break;
2314     case 'r': c = '\r'; break;
2315     case 't': c = '\t'; break;
2316     case 'v': c = '\v'; break;
2317    
2318     case '0': case '1': case '2': case '3':
2319     case '4': case '5': case '6': case '7':
2320     c -= '0';
2321     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2322     c = c * 8 + *p++ - '0';
2323 nigel 91
2324     #if !defined NOUTF8
2325     if (use_utf8 && c > 255)
2326     {
2327     unsigned char buff8[8];
2328     int ii, utn;
2329     utn = ord2utf8(c, buff8);
2330     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2331     c = buff8[ii]; /* Last byte */
2332     }
2333     #endif
2334 nigel 3 break;
2335    
2336     case 'x':
2337 nigel 49
2338     /* Handle \x{..} specially - new Perl thing for utf8 */
2339    
2340 nigel 79 #if !defined NOUTF8
2341 nigel 49 if (*p == '{')
2342     {
2343     unsigned char *pt = p;
2344     c = 0;
2345     while (isxdigit(*(++pt)))
2346     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2347     if (*pt == '}')
2348     {
2349 nigel 67 unsigned char buff8[8];
2350 nigel 49 int ii, utn;
2351 ph10 355 if (use_utf8)
2352 ph10 358 {
2353 ph10 355 utn = ord2utf8(c, buff8);
2354     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2355     c = buff8[ii]; /* Last byte */
2356     }
2357     else
2358     {
2359 ph10 358 if (c > 255)
2360 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2361     "UTF-8 mode is not enabled.\n"
2362     "** Truncation will probably give the wrong result.\n", c);
2363 ph10 358 }
2364 nigel 49 p = pt + 1;
2365     break;
2366     }
2367     /* Not correct form; fall through */
2368     }
2369 nigel 79 #endif
2370 nigel 49
2371     /* Ordinary \x */
2372    
2373 nigel 3 c = 0;
2374     while (i++ < 2 && isxdigit(*p))
2375     {
2376     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2377     p++;
2378     }
2379     break;
2380    
2381 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2382 nigel 3 p--;
2383     continue;
2384    
2385 nigel 75 case '>':
2386 ph10 579 if (*p == '-')
2387 ph10 567 {
2388     start_offset_sign = -1;
2389     p++;
2390 ph10 579 }
2391 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2392 ph10 579 start_offset *= start_offset_sign;
2393 nigel 75 continue;
2394    
2395 nigel 3 case 'A': /* Option setting */
2396     options |= PCRE_ANCHORED;
2397     continue;
2398    
2399     case 'B':
2400     options |= PCRE_NOTBOL;
2401     continue;
2402    
2403 nigel 29 case 'C':
2404 nigel 63 if (isdigit(*p)) /* Set copy string */
2405     {
2406     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2407     copystrings |= 1 << n;
2408     }
2409     else if (isalnum(*p))
2410     {
2411 nigel 91 uschar *npp = copynamesptr;
2412 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2413 nigel 91 *npp++ = 0;
2414 nigel 67 *npp = 0;
2415 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2416 nigel 63 if (n < 0)
2417 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2418     copynamesptr = npp;
2419 nigel 63 }
2420     else if (*p == '+')
2421     {
2422     callout_extra = 1;
2423     p++;
2424     }
2425     else if (*p == '-')
2426     {
2427     pcre_callout = NULL;
2428     p++;
2429     }
2430     else if (*p == '!')
2431     {
2432     callout_fail_id = 0;
2433     p++;
2434     while(isdigit(*p))
2435     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2436     callout_fail_count = 0;
2437     if (*p == '!')
2438     {
2439     p++;
2440     while(isdigit(*p))
2441     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2442     }
2443     }
2444     else if (*p == '*')
2445     {
2446     int sign = 1;
2447     callout_data = 0;
2448     if (*(++p) == '-') { sign = -1; p++; }
2449     while(isdigit(*p))
2450     callout_data = callout_data * 10 + *p++ - '0';
2451     callout_data *= sign;
2452     callout_data_set = 1;
2453     }
2454 nigel 29 continue;
2455    
2456 nigel 79 #if !defined NODFA
2457 nigel 77 case 'D':
2458 nigel 79 #if !defined NOPOSIX
2459 nigel 77 if (posix || do_posix)
2460     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2461     else
2462 nigel 79 #endif
2463 nigel 77 use_dfa = 1;
2464     continue;
2465 ph10 553 #endif
2466 nigel 77
2467 ph10 553 #if !defined NODFA
2468 nigel 77 case 'F':
2469     options |= PCRE_DFA_SHORTEST;
2470     continue;
2471 nigel 79 #endif
2472 nigel 77
2473 nigel 29 case 'G':
2474 nigel 63 if (isdigit(*p))
2475     {
2476     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2477     getstrings |= 1 << n;
2478     }
2479     else if (isalnum(*p))
2480     {
2481 nigel 91 uschar *npp = getnamesptr;
2482 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2483 nigel 91 *npp++ = 0;
2484 nigel 67 *npp = 0;
2485 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2486 nigel 63 if (n < 0)
2487 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2488     getnamesptr = npp;
2489 nigel 63 }
2490 nigel 29 continue;
2491 ph10 667
2492     case 'J':
2493     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2494     if (extra != NULL
2495     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2496     && extra->executable_jit != NULL)
2497     {
2498     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2499     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2500     pcre_assign_jit_callback(extra, jit_callback, jit_stack);
2501     }
2502     continue;
2503 nigel 29
2504     case 'L':
2505     getlist = 1;
2506     continue;
2507    
2508 nigel 63 case 'M':
2509     find_match_limit = 1;
2510     continue;
2511    
2512 nigel 37 case 'N':
2513 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2514     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2515 ph10 461 else
2516 ph10 442 options |= PCRE_NOTEMPTY;
2517 nigel 37 continue;
2518    
2519 nigel 3 case 'O':
2520     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2521 nigel 53 if (n > size_offsets_max)
2522     {
2523     size_offsets_max = n;
2524 nigel 57 free(offsets);
2525 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2526 nigel 53 if (offsets == NULL)
2527     {
2528     printf("** Failed to get %d bytes of memory for offsets vector\n",
2529 ph10 151 (int)(size_offsets_max * sizeof(int)));
2530 nigel 77 yield = 1;
2531     goto EXIT;
2532 nigel 53 }
2533     }
2534     use_size_offsets = n;
2535 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2536 nigel 3 continue;
2537    
2538 nigel 75 case 'P':
2539 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2540 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2541 nigel 75 continue;
2542    
2543 nigel 91 case 'Q':
2544     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2545     if (extra == NULL)
2546     {
2547     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2548     extra->flags = 0;
2549     }
2550     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2551     extra->match_limit_recursion = n;
2552     continue;
2553    
2554     case 'q':
2555     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2556     if (extra == NULL)
2557     {
2558     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2559     extra->flags = 0;
2560     }
2561     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2562     extra->match_limit = n;
2563     continue;
2564    
2565 nigel 79 #if !defined NODFA
2566 nigel 77 case 'R':
2567     options |= PCRE_DFA_RESTART;
2568     continue;
2569 nigel 79 #endif
2570 nigel 77
2571 nigel 73 case 'S':
2572     show_malloc = 1;
2573     continue;
2574 ph10 392
2575 ph10 389 case 'Y':
2576     options |= PCRE_NO_START_OPTIMIZE;
2577 ph10 392 continue;
2578 nigel 73
2579 nigel 3 case 'Z':
2580     options |= PCRE_NOTEOL;
2581     continue;
2582 nigel 71
2583     case '?':
2584     options |= PCRE_NO_UTF8_CHECK;
2585     continue;
2586 nigel 91
2587     case '<':
2588     {
2589     int x = check_newline(p, outfile);
2590     if (x == 0) goto NEXT_DATA;
2591     options |= x;
2592     while (*p++ != '>');
2593     }
2594     continue;
2595 nigel 3 }
2596 nigel 9 *q++ = c;
2597 nigel 3 }
2598 nigel 9 *q = 0;
2599 ph10 530 len = (int)(q - dbuffer);
2600 ph10 545
2601 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2602 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2603 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2604 ph10 371
2605 ph10 363 #if !defined NOPOSIX
2606     if (posix || do_posix)
2607     {
2608     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2609 ph10 371 bptr += buffer_size - len - 1;
2610 ph10 363 }
2611 ph10 371 else
2612     #endif
2613 ph10 363 {
2614     memmove(bptr + buffer_size - len, bptr, len);
2615 ph10 371 bptr += buffer_size - len;
2616     }
2617 nigel 3
2618 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2619     {
2620     printf("**Match limit not relevant for DFA matching: ignored\n");
2621     find_match_limit = 0;
2622     }
2623    
2624 nigel 3 /* Handle matching via the POSIX interface, which does not
2625 nigel 63 support timing or playing with the match limit or callout data. */
2626 nigel 3
2627 nigel 37 #if !defined NOPOSIX
2628 nigel 3 if (posix || do_posix)
2629     {
2630     int rc;
2631     int eflags = 0;
2632 nigel 63 regmatch_t *pmatch = NULL;
2633     if (use_size_offsets > 0)
2634 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2635 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2636     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2637 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2638 nigel 3
2639 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2640 nigel 3
2641     if (rc != 0)
2642     {
2643 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2644 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2645     }
2646 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2647     != 0)
2648     {
2649     fprintf(outfile, "Matched with REG_NOSUB\n");
2650     }
2651 nigel 3 else
2652     {
2653 nigel 7 size_t i;
2654 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2655 nigel 3 {
2656     if (pmatch[i].rm_so >= 0)
2657     {
2658 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2659 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2660     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2661 nigel 3 fprintf(outfile, "\n");
2662 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2663 nigel 35 {
2664 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2665 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2666     outfile);
2667 nigel 35 fprintf(outfile, "\n");
2668     }
2669 nigel 3 }
2670     }
2671     }
2672 nigel 53 free(pmatch);
2673 nigel 3 }
2674    
2675 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2676 nigel 3
2677 nigel 37 else
2678     #endif /* !defined NOPOSIX */
2679    
2680 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2681 nigel 3 {
2682 ph10 512 markptr = NULL;
2683    
2684 nigel 93 if (timeitm > 0)
2685 nigel 3 {
2686     register int i;
2687     clock_t time_taken;
2688     clock_t start_time = clock();
2689 nigel 77
2690 nigel 79 #if !defined NODFA
2691 nigel 77 if (all_use_dfa || use_dfa)
2692     {
2693     int workspace[1000];
2694 nigel 93 for (i = 0; i < timeitm; i++)
2695 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2696 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2697     sizeof(workspace)/sizeof(int));
2698     }
2699     else
2700 nigel 79 #endif
2701 nigel 77
2702 nigel 93 for (i = 0; i < timeitm; i++)
2703 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2704 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2705 nigel 77
2706 nigel 3 time_taken = clock() - start_time;
2707 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2708     (((double)time_taken * 1000.0) / (double)timeitm) /
2709 nigel 63 (double)CLOCKS_PER_SEC);
2710 nigel 3 }
2711    
2712 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2713 nigel 87 varying limits in order to find the minimum value for the match limit and
2714 ph10 667 for the recursion limit. The match limits are relevant only to the normal
2715     running of pcre_exec(), so disable the JIT optimization. This makes it
2716     possible to run the same set of tests with and without JIT externally
2717     requested. */
2718 nigel 63
2719     if (find_match_limit)
2720     {
2721     if (extra == NULL)
2722     {
2723 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2724 nigel 63 extra->flags = 0;
2725     }
2726 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2727    
2728 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2729 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2730     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2731     PCRE_ERROR_MATCHLIMIT, "match()");
2732 nigel 63
2733 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2734     options|g_notempty, use_offsets, use_size_offsets,
2735     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2736     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2737 nigel 63 }
2738    
2739     /* If callout_data is set, use the interface with additional data */
2740    
2741     else if (callout_data_set)
2742     {
2743     if (extra == NULL)
2744     {
2745 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2746 nigel 63 extra->flags = 0;
2747     }
2748     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2749 nigel 71 extra->callout_data = &callout_data;
2750 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2751     options | g_notempty, use_offsets, use_size_offsets);
2752     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2753     }
2754    
2755     /* The normal case is just to do the match once, with the default
2756     value of match_limit. */
2757    
2758 nigel 79 #if !defined NODFA
2759 nigel 77 else if (all_use_dfa || use_dfa)
2760     {
2761     int workspace[1000];
2762 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2763 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2764     sizeof(workspace)/sizeof(int));
2765     if (count == 0)
2766     {
2767     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2768     count = use_size_offsets/2;
2769     }
2770     }
2771 nigel 79 #endif
2772 nigel 77
2773 nigel 75 else
2774     {
2775     count = pcre_exec(re, extra, (char *)bptr, len,
2776     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2777 nigel 77 if (count == 0)
2778     {
2779     fprintf(outfile, "Matched, but too many substrings\n");
2780     count = use_size_offsets/3;
2781     }
2782 nigel 75 }
2783 nigel 3
2784 nigel 39 /* Matched */
2785    
2786 nigel 3 if (count >= 0)
2787     {
2788 nigel 93 int i, maxcount;
2789    
2790     #if !defined NODFA
2791     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2792     #endif
2793     maxcount = use_size_offsets/3;
2794    
2795     /* This is a check against a lunatic return value. */
2796    
2797     if (count > maxcount)
2798     {
2799     fprintf(outfile,
2800     "** PCRE error: returned count %d is too big for offset size %d\n",
2801     count, use_size_offsets);
2802     count = use_size_offsets/3;
2803     if (do_g || do_G)
2804     {
2805     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2806     do_g = do_G = FALSE; /* Break g/G loop */
2807     }
2808     }
2809 ph10 654
2810 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
2811     unset ones at the end. */
2812 ph10 654
2813 ph10 626 if (do_allcaps)
2814     {
2815     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2816 ph10 654 count++; /* Allow for full match */
2817     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2818     }
2819 nigel 93
2820 ph10 626 /* Output the captured substrings */
2821 ph10 654
2822 nigel 29 for (i = 0; i < count * 2; i += 2)
2823 nigel 3 {
2824 nigel 57 if (use_offsets[i] < 0)
2825 ph10 654 {
2826 ph10 626 if (use_offsets[i] != -1)
2827     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2828 ph10 654 use_offsets[i], i);
2829 ph10 626 if (use_offsets[i+1] != -1)
2830     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2831 ph10 654 use_offsets[i+1], i+1);
2832 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2833 ph10 654 }
2834 nigel 3 else
2835     {
2836     fprintf(outfile, "%2d: ", i/2);
2837 nigel 63 (void)pchars(bptr + use_offsets[i],
2838     use_offsets[i+1] - use_offsets[i], outfile);
2839 nigel 3 fprintf(outfile, "\n");
2840 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2841 nigel 35 {
2842 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2843     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2844     outfile);
2845     fprintf(outfile, "\n");
2846 nigel 35 }
2847 nigel 3 }
2848     }
2849 ph10 512
2850 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2851 nigel 29
2852     for (i = 0; i < 32; i++)
2853     {
2854     if ((copystrings & (1 << i)) != 0)
2855     {
2856 nigel 91 char copybuffer[256];
2857 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2858 nigel 37 i, copybuffer, sizeof(copybuffer));
2859 nigel 29 if (rc < 0)
2860     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2861     else
2862 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2863 nigel 29 }
2864     }
2865    
2866 nigel 91 for (copynamesptr = copynames;
2867     *copynamesptr != 0;
2868     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2869     {
2870     char copybuffer[256];
2871     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2872     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2873     if (rc < 0)
2874     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2875     else
2876     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2877     }
2878    
2879 nigel 29 for (i = 0; i < 32; i++)
2880     {
2881     if ((getstrings & (1 << i)) != 0)
2882     {
2883     const char *substring;
2884 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2885 nigel 29 i, &substring);
2886     if (rc < 0)
2887     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2888     else
2889     {
2890     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2891 nigel 49 pcre_free_substring(substring);
2892 nigel 29 }
2893     }
2894     }
2895    
2896 nigel 91 for (getnamesptr = getnames;
2897     *getnamesptr != 0;
2898     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2899     {
2900     const char *substring;
2901     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2902     count, (char *)getnamesptr, &substring);
2903     if (rc < 0)
2904     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2905     else
2906     {
2907     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2908     pcre_free_substring(substring);
2909     }
2910     }
2911    
2912 nigel 29 if (getlist)
2913     {
2914     const char **stringlist;
2915 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2916 nigel 29 &stringlist);
2917     if (rc < 0)
2918     fprintf(outfile, "get substring list failed %d\n", rc);
2919     else
2920     {
2921     for (i = 0; i < count; i++)
2922     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2923     if (stringlist[i] != NULL)
2924     fprintf(outfile, "string list not terminated by NULL\n");
2925 nigel 49 pcre_free_substring_list(stringlist);
2926 nigel 29 }
2927     }
2928 nigel 39 }
2929 nigel 29
2930 nigel 75 /* There was a partial match */
2931    
2932     else if (count == PCRE_ERROR_PARTIAL)
2933     {
2934 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2935     else fprintf(outfile, "Partial match, mark=%s", markptr);
2936 ph10 426 if (use_size_offsets > 1)
2937     {
2938     fprintf(outfile, ": ");
2939     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2940 ph10 461 outfile);
2941     }
2942 nigel 77 fprintf(outfile, "\n");
2943 nigel 75 break; /* Out of the /g loop */
2944     }
2945    
2946 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2947 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2948     to advance the start offset, and continue. We won't be at the end of the
2949     string - that was checked before setting g_notempty.
2950 nigel 39
2951 ph10 566 Complication arises in the case when the newline convention is "any",
2952 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2953     terminated by CRLF, an advance of one character just passes the \r,
2954 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2955 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2956     newline setting in the pattern; if none was set, use pcre_config() to
2957 ph10 566 find the default.
2958 ph10 144
2959 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2960     character, not one byte. */
2961    
2962 nigel 3 else
2963     {
2964 nigel 41 if (g_notempty != 0)
2965 nigel 35 {
2966 nigel 73 int onechar = 1;
2967 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2968 nigel 57 use_offsets[0] = start_offset;
2969 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2970     {
2971     int d;
2972     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2973 ph10 391 /* Note that these values are always the ASCII ones, even in
2974     EBCDIC environments. CR = 13, NL = 10. */
2975     obits = (d == 13)? PCRE_NEWLINE_CR :
2976     (d == 10)? PCRE_NEWLINE_LF :
2977     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2978 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2979 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2980     }
2981 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2982 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2983 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2984 ph10 149 &&
2985 ph10 143 start_offset < len - 1 &&
2986     bptr[start_offset] == '\r' &&
2987     bptr[start_offset+1] == '\n')
2988 ph10 144 onechar++;
2989 ph10 143 else if (use_utf8)
2990 nigel 73 {
2991     while (start_offset + onechar < len)
2992     {
2993 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2994 ph10 579 onechar++;
2995 nigel 73 }
2996     }
2997     use_offsets[1] = start_offset + onechar;
2998 nigel 35 }
2999 nigel 41 else
3000     {
3001 ph10 598 switch(count)
3002 ph10 654 {
3003 ph10 598 case PCRE_ERROR_NOMATCH:
3004 ph10 512 if (gmatched == 0)
3005 ph10 510 {
3006     if (markptr == NULL) fprintf(outfile, "No match\n");
3007     else fprintf(outfile, "No match, mark = %s\n", markptr);
3008 ph10 512 }
3009 ph10 598 break;
3010 ph10 654
3011 ph10 598 case PCRE_ERROR_BADUTF8:
3012     case PCRE_ERROR_SHORTUTF8:
3013     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3014     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3015     if (use_size_offsets >= 2)
3016 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3017 ph10 598 use_offsets[1]);
3018 ph10 654 fprintf(outfile, "\n");
3019     break;
3020    
3021 ph10 598 default:
3022 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3023 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3024 ph10 654 else
3025     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3026 ph10 598 break;
3027 nigel 41 }
3028 ph10 654
3029 nigel 41 break; /* Out of the /g loop */
3030     }
3031 nigel 3 }
3032 nigel 35
3033 nigel 39 /* If not /g or /G we are done */
3034    
3035     if (!do_g && !do_G) break;
3036    
3037 nigel 41 /* If we have matched an empty string, first check to see if we are at
3038 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3039     Perl's /g options does. This turns out to be rather cunning. First we set
3040     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3041 nigel 47 same point. If this fails (picked up above) we advance to the next
3042 ph10 143 character. */
3043 ph10 142
3044 nigel 41 g_notempty = 0;
3045 ph10 142
3046 nigel 57 if (use_offsets[0] == use_offsets[1])
3047 nigel 41 {
3048 nigel 57 if (use_offsets[0] == len) break;
3049 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3050 nigel 41 }
3051 nigel 39
3052     /* For /g, update the start offset, leaving the rest alone */
3053    
3054 ph10 143 if (do_g) start_offset = use_offsets[1];
3055 nigel 39
3056     /* For /G, update the pointer and length */
3057    
3058     else
3059 nigel 35 {
3060 ph10 143 bptr += use_offsets[1];
3061     len -= use_offsets[1];
3062 nigel 35 }
3063 nigel 39 } /* End of loop for /g and /G */
3064 nigel 91
3065     NEXT_DATA: continue;
3066 nigel 39 } /* End of loop for data lines */
3067 nigel 3
3068 nigel 11 CONTINUE:
3069 nigel 37
3070     #if !defined NOPOSIX
3071 nigel 3 if (posix || do_posix) regfree(&preg);
3072 nigel 37 #endif
3073    
3074 nigel 77 if (re != NULL) new_free(re);
3075 ph10 667 if (extra != NULL) pcre_free_study(extra);
3076 ph10 541 if (locale_set)
3077 nigel 25 {
3078 nigel 77 new_free((void *)tables);
3079 nigel 25 setlocale(LC_CTYPE, "C");
3080 nigel 93 locale_set = 0;
3081 nigel 25 }
3082 ph10 667 if (jit_stack != NULL)
3083     {
3084     pcre_jit_stack_free(jit_stack);
3085     jit_stack = NULL;
3086     }
3087 nigel 3 }
3088    
3089 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3090 nigel 77
3091     EXIT:
3092    
3093     if (infile != NULL && infile != stdin) fclose(infile);
3094     if (outfile != NULL && outfile != stdout) fclose(outfile);
3095    
3096     free(buffer);
3097     free(dbuffer);
3098     free(pbuffer);
3099     free(offsets);
3100    
3101     return yield;
3102 nigel 3 }
3103    
3104 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12