/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 645 - (hide annotations) (download)
Sun Jul 31 17:02:18 2011 UTC (3 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 91502 byte(s)
Pass *MARK name to callouts

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 nigel 85 #define _pcre_utf8_table1 utf8_table1
116     #define _pcre_utf8_table1_size utf8_table1_size
117     #define _pcre_utf8_table2 utf8_table2
118     #define _pcre_utf8_table3 utf8_table3
119     #define _pcre_utf8_table4 utf8_table4
120     #define _pcre_utt utt
121     #define _pcre_utt_size utt_size
122 ph10 240 #define _pcre_utt_names utt_names
123 nigel 85 #define _pcre_OP_lengths OP_lengths
124    
125     #include "pcre_tables.c"
126    
127     /* We also need the pcre_printint() function for printing out compiled
128     patterns. This function is in a separate file so that it can be included in
129 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 ph10 498 know which case is being compiled. */
131 nigel 85
132 ph10 498 #define COMPILING_PCRETEST
133     #include "pcre_printint.src"
134    
135     /* The definition of the macro PRINTABLE, which determines whether to print an
136 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
137 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
138     locale has not been explicitly changed, so as to get consistent output from
139     systems that differ in their output from isprint() even in the "C" locale. */
140 nigel 93
141     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142 nigel 85
143 nigel 37 /* It is possible to compile this test program without including support for
144     testing the POSIX interface, though this is not available via the standard
145     Makefile. */
146    
147     #if !defined NOPOSIX
148 nigel 3 #include "pcreposix.h"
149 nigel 37 #endif
150 nigel 3
151 ph10 107 /* It is also possible, for the benefit of the version currently imported into
152     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153     interface to the DFA matcher (NODFA), and without the doublecheck of the old
154     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155     UTF8 support if PCRE is built without it. */
156 nigel 79
157 ph10 107 #ifndef SUPPORT_UTF8
158     #ifndef NOUTF8
159     #define NOUTF8
160     #endif
161     #endif
162 nigel 79
163 ph10 107
164 nigel 85 /* Other parameters */
165    
166 nigel 3 #ifndef CLOCKS_PER_SEC
167     #ifdef CLK_TCK
168     #define CLOCKS_PER_SEC CLK_TCK
169     #else
170     #define CLOCKS_PER_SEC 100
171     #endif
172     #endif
173    
174 nigel 93 /* This is the default loop count for timing. */
175    
176 nigel 75 #define LOOPREPEAT 500000
177 nigel 3
178 nigel 85 /* Static variables */
179    
180 nigel 3 static FILE *outfile;
181     static int log_store = 0;
182 nigel 63 static int callout_count;
183     static int callout_extra;
184     static int callout_fail_count;
185     static int callout_fail_id;
186 ph10 210 static int debug_lengths;
187 nigel 63 static int first_callout;
188 nigel 93 static int locale_set = 0;
189 nigel 73 static int show_malloc;
190 nigel 67 static int use_utf8;
191 nigel 43 static size_t gotten_store;
192 ph10 645 static const unsigned char *last_callout_mark = NULL;
193 nigel 3
194 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
195    
196     static int buffer_size = 50000;
197     static uschar *buffer = NULL;
198     static uschar *dbuffer = NULL;
199 nigel 75 static uschar *pbuffer = NULL;
200 nigel 3
201 ph10 598 /* Textual explanations for runtime error codes */
202 nigel 75
203 ph10 598 static const char *errtexts[] = {
204     NULL, /* 0 is no error */
205     NULL, /* NOMATCH is handled specially */
206     "NULL argument passed",
207     "bad option value",
208     "magic number missing",
209     "unknown opcode - pattern overwritten?",
210     "no more memory",
211     NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
212     "match limit exceeded",
213     "callout error code",
214     NULL, /* BADUTF8 is handled specially */
215     "bad UTF-8 offset",
216     NULL, /* PARTIAL is handled specially */
217     "not used - internal error",
218     "internal error - pattern overwritten?",
219     "bad count value",
220     "item unsupported for DFA matching",
221     "backreference condition or recursion test not supported for DFA matching",
222     "match limit not supported for DFA matching",
223     "workspace size exceeded in DFA matching",
224     "too much recursion for DFA matching",
225     "recursion limit exceeded",
226     "not used - internal error",
227     "invalid combination of newline options",
228     "bad offset value",
229 ph10 642 NULL, /* SHORTUTF8 is handled specially */
230     "nested recursion at the same subject position"
231 ph10 598 };
232    
233    
234 ph10 541 /*************************************************
235     * Alternate character tables *
236     *************************************************/
237 nigel 49
238 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
239     using the default tables of the library. However, the T option can be used to
240     select alternate sets of tables, for different kinds of testing. Note also that
241 ph10 541 the L (locale) option also adjusts the tables. */
242    
243 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
244 ph10 541 only ASCII characters. */
245    
246     static const unsigned char tables0[] = {
247    
248     /* This table is a lower casing table. */
249    
250     0, 1, 2, 3, 4, 5, 6, 7,
251     8, 9, 10, 11, 12, 13, 14, 15,
252     16, 17, 18, 19, 20, 21, 22, 23,
253     24, 25, 26, 27, 28, 29, 30, 31,
254     32, 33, 34, 35, 36, 37, 38, 39,
255     40, 41, 42, 43, 44, 45, 46, 47,
256     48, 49, 50, 51, 52, 53, 54, 55,
257     56, 57, 58, 59, 60, 61, 62, 63,
258     64, 97, 98, 99,100,101,102,103,
259     104,105,106,107,108,109,110,111,
260     112,113,114,115,116,117,118,119,
261     120,121,122, 91, 92, 93, 94, 95,
262     96, 97, 98, 99,100,101,102,103,
263     104,105,106,107,108,109,110,111,
264     112,113,114,115,116,117,118,119,
265     120,121,122,123,124,125,126,127,
266     128,129,130,131,132,133,134,135,
267     136,137,138,139,140,141,142,143,
268     144,145,146,147,148,149,150,151,
269     152,153,154,155,156,157,158,159,
270     160,161,162,163,164,165,166,167,
271     168,169,170,171,172,173,174,175,
272     176,177,178,179,180,181,182,183,
273     184,185,186,187,188,189,190,191,
274     192,193,194,195,196,197,198,199,
275     200,201,202,203,204,205,206,207,
276     208,209,210,211,212,213,214,215,
277     216,217,218,219,220,221,222,223,
278     224,225,226,227,228,229,230,231,
279     232,233,234,235,236,237,238,239,
280     240,241,242,243,244,245,246,247,
281     248,249,250,251,252,253,254,255,
282    
283     /* This table is a case flipping table. */
284    
285     0, 1, 2, 3, 4, 5, 6, 7,
286     8, 9, 10, 11, 12, 13, 14, 15,
287     16, 17, 18, 19, 20, 21, 22, 23,
288     24, 25, 26, 27, 28, 29, 30, 31,
289     32, 33, 34, 35, 36, 37, 38, 39,
290     40, 41, 42, 43, 44, 45, 46, 47,
291     48, 49, 50, 51, 52, 53, 54, 55,
292     56, 57, 58, 59, 60, 61, 62, 63,
293     64, 97, 98, 99,100,101,102,103,
294     104,105,106,107,108,109,110,111,
295     112,113,114,115,116,117,118,119,
296     120,121,122, 91, 92, 93, 94, 95,
297     96, 65, 66, 67, 68, 69, 70, 71,
298     72, 73, 74, 75, 76, 77, 78, 79,
299     80, 81, 82, 83, 84, 85, 86, 87,
300     88, 89, 90,123,124,125,126,127,
301     128,129,130,131,132,133,134,135,
302     136,137,138,139,140,141,142,143,
303     144,145,146,147,148,149,150,151,
304     152,153,154,155,156,157,158,159,
305     160,161,162,163,164,165,166,167,
306     168,169,170,171,172,173,174,175,
307     176,177,178,179,180,181,182,183,
308     184,185,186,187,188,189,190,191,
309     192,193,194,195,196,197,198,199,
310     200,201,202,203,204,205,206,207,
311     208,209,210,211,212,213,214,215,
312     216,217,218,219,220,221,222,223,
313     224,225,226,227,228,229,230,231,
314     232,233,234,235,236,237,238,239,
315     240,241,242,243,244,245,246,247,
316     248,249,250,251,252,253,254,255,
317    
318     /* This table contains bit maps for various character classes. Each map is 32
319     bytes long and the bits run from the least significant end of each byte. The
320     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
321     graph, print, punct, and cntrl. Other classes are built from combinations. */
322    
323     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
324     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327    
328     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
329     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
330     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
332    
333     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337    
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
340     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342    
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
345     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347    
348     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
349     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
350     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
352    
353     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
354     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
355     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357    
358     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
359     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
360     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362    
363     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
364     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
365     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367    
368     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
370     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
372    
373     /* This table identifies various classes of character by individual bits:
374     0x01 white space character
375     0x02 letter
376     0x04 decimal digit
377     0x08 hexadecimal digit
378     0x10 alphanumeric or '_'
379     0x80 regular expression metacharacter or binary zero
380     */
381    
382     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
383     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
384     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
385     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
386     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
387     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
388     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
389     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
390     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
391     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
392     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
393     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
394     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
395     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
396     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
397     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
398     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
399     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
413     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
414    
415 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
416     be at least an approximation of ISO 8859. In particular, there are characters
417 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
418    
419     static const unsigned char tables1[] = {
420     0,1,2,3,4,5,6,7,
421     8,9,10,11,12,13,14,15,
422     16,17,18,19,20,21,22,23,
423     24,25,26,27,28,29,30,31,
424     32,33,34,35,36,37,38,39,
425     40,41,42,43,44,45,46,47,
426     48,49,50,51,52,53,54,55,
427     56,57,58,59,60,61,62,63,
428     64,97,98,99,100,101,102,103,
429     104,105,106,107,108,109,110,111,
430     112,113,114,115,116,117,118,119,
431     120,121,122,91,92,93,94,95,
432     96,97,98,99,100,101,102,103,
433     104,105,106,107,108,109,110,111,
434     112,113,114,115,116,117,118,119,
435     120,121,122,123,124,125,126,127,
436     128,129,130,131,132,133,134,135,
437     136,137,138,139,140,141,142,143,
438     144,145,146,147,148,149,150,151,
439     152,153,154,155,156,157,158,159,
440     160,161,162,163,164,165,166,167,
441     168,169,170,171,172,173,174,175,
442     176,177,178,179,180,181,182,183,
443     184,185,186,187,188,189,190,191,
444     224,225,226,227,228,229,230,231,
445     232,233,234,235,236,237,238,239,
446     240,241,242,243,244,245,246,215,
447     248,249,250,251,252,253,254,223,
448     224,225,226,227,228,229,230,231,
449     232,233,234,235,236,237,238,239,
450     240,241,242,243,244,245,246,247,
451     248,249,250,251,252,253,254,255,
452     0,1,2,3,4,5,6,7,
453     8,9,10,11,12,13,14,15,
454     16,17,18,19,20,21,22,23,
455     24,25,26,27,28,29,30,31,
456     32,33,34,35,36,37,38,39,
457     40,41,42,43,44,45,46,47,
458     48,49,50,51,52,53,54,55,
459     56,57,58,59,60,61,62,63,
460     64,97,98,99,100,101,102,103,
461     104,105,106,107,108,109,110,111,
462     112,113,114,115,116,117,118,119,
463     120,121,122,91,92,93,94,95,
464     96,65,66,67,68,69,70,71,
465     72,73,74,75,76,77,78,79,
466     80,81,82,83,84,85,86,87,
467     88,89,90,123,124,125,126,127,
468     128,129,130,131,132,133,134,135,
469     136,137,138,139,140,141,142,143,
470     144,145,146,147,148,149,150,151,
471     152,153,154,155,156,157,158,159,
472     160,161,162,163,164,165,166,167,
473     168,169,170,171,172,173,174,175,
474     176,177,178,179,180,181,182,183,
475     184,185,186,187,188,189,190,191,
476     224,225,226,227,228,229,230,231,
477     232,233,234,235,236,237,238,239,
478     240,241,242,243,244,245,246,215,
479     248,249,250,251,252,253,254,223,
480     192,193,194,195,196,197,198,199,
481     200,201,202,203,204,205,206,207,
482     208,209,210,211,212,213,214,247,
483     216,217,218,219,220,221,222,255,
484     0,62,0,0,1,0,0,0,
485     0,0,0,0,0,0,0,0,
486     32,0,0,0,1,0,0,0,
487     0,0,0,0,0,0,0,0,
488     0,0,0,0,0,0,255,3,
489     126,0,0,0,126,0,0,0,
490     0,0,0,0,0,0,0,0,
491     0,0,0,0,0,0,0,0,
492     0,0,0,0,0,0,255,3,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,12,2,
495     0,0,0,0,0,0,0,0,
496     0,0,0,0,0,0,0,0,
497     254,255,255,7,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     255,255,127,127,0,0,0,0,
500     0,0,0,0,0,0,0,0,
501     0,0,0,0,254,255,255,7,
502     0,0,0,0,0,4,32,4,
503     0,0,0,128,255,255,127,255,
504     0,0,0,0,0,0,255,3,
505     254,255,255,135,254,255,255,7,
506     0,0,0,0,0,4,44,6,
507     255,255,127,255,255,255,127,255,
508     0,0,0,0,254,255,255,255,
509     255,255,255,255,255,255,255,127,
510     0,0,0,0,254,255,255,255,
511     255,255,255,255,255,255,255,255,
512     0,2,0,0,255,255,255,255,
513     255,255,255,255,255,255,255,127,
514     0,0,0,0,255,255,255,255,
515     255,255,255,255,255,255,255,255,
516     0,0,0,0,254,255,0,252,
517     1,0,0,248,1,0,0,120,
518     0,0,0,0,254,255,255,255,
519     0,0,128,0,0,0,128,0,
520     255,255,255,255,0,0,0,0,
521     0,0,0,0,0,0,0,128,
522     255,255,255,255,0,0,0,0,
523     0,0,0,0,0,0,0,0,
524     128,0,0,0,0,0,0,0,
525     0,1,1,0,1,1,0,0,
526     0,0,0,0,0,0,0,0,
527     0,0,0,0,0,0,0,0,
528     1,0,0,0,128,0,0,0,
529     128,128,128,128,0,0,128,0,
530     28,28,28,28,28,28,28,28,
531     28,28,0,0,0,0,0,128,
532     0,26,26,26,26,26,26,18,
533     18,18,18,18,18,18,18,18,
534     18,18,18,18,18,18,18,18,
535     18,18,18,128,128,0,128,16,
536     0,26,26,26,26,26,26,18,
537     18,18,18,18,18,18,18,18,
538     18,18,18,18,18,18,18,18,
539     18,18,18,128,128,0,0,0,
540     0,0,0,0,0,1,0,0,
541     0,0,0,0,0,0,0,0,
542     0,0,0,0,0,0,0,0,
543     0,0,0,0,0,0,0,0,
544     1,0,0,0,0,0,0,0,
545     0,0,18,0,0,0,0,0,
546     0,0,20,20,0,18,0,0,
547     0,20,18,0,0,0,0,0,
548     18,18,18,18,18,18,18,18,
549     18,18,18,18,18,18,18,18,
550     18,18,18,18,18,18,18,0,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,18,
553     18,18,18,18,18,18,18,18,
554     18,18,18,18,18,18,18,0,
555     18,18,18,18,18,18,18,18
556     };
557    
558    
559    
560 ph10 558
561     #ifndef HAVE_STRERROR
562 nigel 49 /*************************************************
563 ph10 558 * Provide strerror() for non-ANSI libraries *
564     *************************************************/
565    
566     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
567     in their libraries, but can provide the same facility by this simple
568     alternative function. */
569    
570     extern int sys_nerr;
571     extern char *sys_errlist[];
572    
573     char *
574     strerror(int n)
575     {
576     if (n < 0 || n >= sys_nerr) return "unknown error number";
577     return sys_errlist[n];
578     }
579     #endif /* HAVE_STRERROR */
580    
581    
582    
583    
584     /*************************************************
585 nigel 91 * Read or extend an input line *
586     *************************************************/
587    
588     /* Input lines are read into buffer, but both patterns and data lines can be
589     continued over multiple input lines. In addition, if the buffer fills up, we
590     want to automatically expand it so as to be able to handle extremely large
591     lines that are needed for certain stress tests. When the input buffer is
592     expanded, the other two buffers must also be expanded likewise, and the
593     contents of pbuffer, which are a copy of the input for callouts, must be
594     preserved (for when expansion happens for a data line). This is not the most
595     optimal way of handling this, but hey, this is just a test program!
596    
597     Arguments:
598     f the file to read
599     start where in buffer to start (this *must* be within buffer)
600 ph10 287 prompt for stdin or readline()
601 nigel 91
602     Returns: pointer to the start of new data
603     could be a copy of start, or could be moved
604     NULL if no data read and EOF reached
605     */
606    
607     static uschar *
608 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
609 nigel 91 {
610     uschar *here = start;
611    
612     for (;;)
613     {
614 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
615 nigel 93
616 nigel 91 if (rlen > 1000)
617     {
618     int dlen;
619 ph10 289
620 ph10 287 /* If libreadline support is required, use readline() to read a line if the
621     input is a terminal. Note that readline() removes the trailing newline, so
622     we must put it back again, to be compatible with fgets(). */
623 ph10 289
624 ph10 287 #ifdef SUPPORT_LIBREADLINE
625     if (isatty(fileno(f)))
626     {
627 ph10 289 size_t len;
628 ph10 287 char *s = readline(prompt);
629     if (s == NULL) return (here == start)? NULL : start;
630     len = strlen(s);
631 ph10 289 if (len > 0) add_history(s);
632 ph10 287 if (len > rlen - 1) len = rlen - 1;
633     memcpy(here, s, len);
634     here[len] = '\n';
635 ph10 289 here[len+1] = 0;
636     free(s);
637 ph10 287 }
638 ph10 289 else
639     #endif
640    
641 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
642 ph10 289
643 ph10 287 {
644 ph10 516 if (f == stdin) printf("%s", prompt);
645 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
646     return (here == start)? NULL : start;
647 ph10 289 }
648    
649 nigel 91 dlen = (int)strlen((char *)here);
650     if (dlen > 0 && here[dlen - 1] == '\n') return start;
651     here += dlen;
652     }
653    
654     else
655     {
656     int new_buffer_size = 2*buffer_size;
657     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
658     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
659     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
660    
661     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
662     {
663     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
664     exit(1);
665     }
666    
667     memcpy(new_buffer, buffer, buffer_size);
668     memcpy(new_pbuffer, pbuffer, buffer_size);
669    
670     buffer_size = new_buffer_size;
671    
672     start = new_buffer + (start - buffer);
673     here = new_buffer + (here - buffer);
674    
675     free(buffer);
676     free(dbuffer);
677     free(pbuffer);
678    
679     buffer = new_buffer;
680     dbuffer = new_dbuffer;
681     pbuffer = new_pbuffer;
682     }
683     }
684    
685     return NULL; /* Control never gets here */
686     }
687    
688    
689    
690    
691    
692    
693    
694     /*************************************************
695 nigel 63 * Read number from string *
696     *************************************************/
697    
698     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
699     around with conditional compilation, just do the job by hand. It is only used
700 nigel 93 for unpicking arguments, so just keep it simple.
701 nigel 63
702     Arguments:
703     str string to be converted
704     endptr where to put the end pointer
705    
706     Returns: the unsigned long
707     */
708    
709     static int
710     get_value(unsigned char *str, unsigned char **endptr)
711     {
712     int result = 0;
713     while(*str != 0 && isspace(*str)) str++;
714     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
715     *endptr = str;
716     return(result);
717     }
718    
719    
720    
721 nigel 49
722     /*************************************************
723     * Convert UTF-8 string to value *
724     *************************************************/
725    
726     /* This function takes one or more bytes that represents a UTF-8 character,
727     and returns the value of the character.
728    
729     Argument:
730 nigel 91 utf8bytes a pointer to the byte vector
731     vptr a pointer to an int to receive the value
732 nigel 49
733 nigel 91 Returns: > 0 => the number of bytes consumed
734     -6 to 0 => malformed UTF-8 character at offset = (-return)
735 nigel 49 */
736    
737 nigel 79 #if !defined NOUTF8
738    
739 nigel 67 static int
740 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
741 nigel 49 {
742 nigel 91 int c = *utf8bytes++;
743 nigel 49 int d = c;
744     int i, j, s;
745    
746     for (i = -1; i < 6; i++) /* i is number of additional bytes */
747     {
748     if ((d & 0x80) == 0) break;
749     d <<= 1;
750     }
751    
752     if (i == -1) { *vptr = c; return 1; } /* ascii character */
753     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
754    
755     /* i now has a value in the range 1-5 */
756    
757 nigel 59 s = 6*i;
758 nigel 85 d = (c & utf8_table3[i]) << s;
759 nigel 49
760     for (j = 0; j < i; j++)
761     {
762 nigel 91 c = *utf8bytes++;
763 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
764 nigel 59 s -= 6;
765 nigel 49 d |= (c & 0x3f) << s;
766     }
767    
768     /* Check that encoding was the correct unique one */
769    
770 nigel 85 for (j = 0; j < utf8_table1_size; j++)
771     if (d <= utf8_table1[j]) break;
772 nigel 49 if (j != i) return -(i+1);
773    
774     /* Valid value */
775    
776     *vptr = d;
777     return i+1;
778     }
779    
780 nigel 79 #endif
781 nigel 49
782    
783 nigel 79
784 nigel 63 /*************************************************
785 nigel 85 * Convert character value to UTF-8 *
786     *************************************************/
787    
788     /* This function takes an integer value in the range 0 - 0x7fffffff
789     and encodes it as a UTF-8 character in 0 to 6 bytes.
790    
791     Arguments:
792     cvalue the character value
793 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
794 nigel 85
795     Returns: number of characters placed in the buffer
796     */
797    
798 nigel 93 #if !defined NOUTF8
799    
800 nigel 85 static int
801 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
802 nigel 85 {
803     register int i, j;
804     for (i = 0; i < utf8_table1_size; i++)
805     if (cvalue <= utf8_table1[i]) break;
806 nigel 91 utf8bytes += i;
807 nigel 85 for (j = i; j > 0; j--)
808     {
809 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
810 nigel 85 cvalue >>= 6;
811     }
812 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
813 nigel 85 return i + 1;
814     }
815    
816 nigel 93 #endif
817 nigel 85
818    
819 nigel 93
820 nigel 85 /*************************************************
821 nigel 63 * Print character string *
822     *************************************************/
823 nigel 49
824 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
825     mode. Yields number of characters printed. If handed a NULL file, just counts
826     chars without printing. */
827 nigel 49
828 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
829 nigel 3 {
830 nigel 85 int c = 0;
831 nigel 63 int yield = 0;
832 nigel 3
833 nigel 63 while (length-- > 0)
834 nigel 3 {
835 nigel 79 #if !defined NOUTF8
836 nigel 67 if (use_utf8)
837 nigel 63 {
838     int rc = utf82ord(p, &c);
839 nigel 3
840 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
841     {
842     length -= rc - 1;
843     p += rc;
844 nigel 93 if (PRINTHEX(c))
845 nigel 63 {
846     if (f != NULL) fprintf(f, "%c", c);
847     yield++;
848     }
849     else
850     {
851 nigel 93 int n = 4;
852     if (f != NULL) fprintf(f, "\\x{%02x}", c);
853     yield += (n <= 0x000000ff)? 2 :
854     (n <= 0x00000fff)? 3 :
855     (n <= 0x0000ffff)? 4 :
856     (n <= 0x000fffff)? 5 : 6;
857 nigel 63 }
858     continue;
859     }
860     }
861 nigel 79 #endif
862 nigel 3
863 nigel 63 /* Not UTF-8, or malformed UTF-8 */
864    
865 nigel 93 c = *p++;
866     if (PRINTHEX(c))
867 nigel 3 {
868 nigel 63 if (f != NULL) fprintf(f, "%c", c);
869     yield++;
870 nigel 3 }
871 nigel 63 else
872 nigel 3 {
873 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
874     yield += 4;
875     }
876     }
877 nigel 3
878 nigel 63 return yield;
879     }
880 nigel 23
881 nigel 3
882 nigel 23
883 nigel 63 /*************************************************
884     * Callout function *
885     *************************************************/
886 nigel 3
887 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
888     the match. Yield zero unless more callouts than the fail count, or the callout
889     data is not zero. */
890 nigel 3
891 nigel 63 static int callout(pcre_callout_block *cb)
892     {
893     FILE *f = (first_callout | callout_extra)? outfile : NULL;
894 nigel 75 int i, pre_start, post_start, subject_length;
895 nigel 3
896 nigel 63 if (callout_extra)
897     {
898     fprintf(f, "Callout %d: last capture = %d\n",
899     cb->callout_number, cb->capture_last);
900 nigel 3
901 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
902     {
903     if (cb->offset_vector[i] < 0)
904     fprintf(f, "%2d: <unset>\n", i/2);
905     else
906     {
907     fprintf(f, "%2d: ", i/2);
908     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
909     cb->offset_vector[i+1] - cb->offset_vector[i], f);
910     fprintf(f, "\n");
911     }
912     }
913     }
914 nigel 3
915 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
916     datails. On subsequent calls in the same match, we use pchars just to find the
917     printed lengths of the substrings. */
918 nigel 3
919 nigel 63 if (f != NULL) fprintf(f, "--->");
920 nigel 3
921 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
922     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
923     cb->current_position - cb->start_match, f);
924 nigel 3
925 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
926    
927 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
928     cb->subject_length - cb->current_position, f);
929 nigel 3
930 nigel 63 if (f != NULL) fprintf(f, "\n");
931 nigel 9
932 nigel 63 /* Always print appropriate indicators, with callout number if not already
933 nigel 75 shown. For automatic callouts, show the pattern offset. */
934 nigel 3
935 nigel 75 if (cb->callout_number == 255)
936     {
937     fprintf(outfile, "%+3d ", cb->pattern_position);
938     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
939     }
940     else
941     {
942     if (callout_extra) fprintf(outfile, " ");
943     else fprintf(outfile, "%3d ", cb->callout_number);
944     }
945 nigel 3
946 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
947     fprintf(outfile, "^");
948 nigel 3
949 nigel 63 if (post_start > 0)
950     {
951     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
952     fprintf(outfile, "^");
953 nigel 3 }
954    
955 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
956     fprintf(outfile, " ");
957    
958     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
959     pbuffer + cb->pattern_position);
960    
961 nigel 63 fprintf(outfile, "\n");
962     first_callout = 0;
963 nigel 3
964 ph10 645 if (cb->mark != last_callout_mark)
965     {
966     fprintf(outfile, "Latest Mark: %s\n",
967     (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
968     last_callout_mark = cb->mark;
969     }
970    
971 nigel 71 if (cb->callout_data != NULL)
972 nigel 49 {
973 nigel 71 int callout_data = *((int *)(cb->callout_data));
974     if (callout_data != 0)
975     {
976     fprintf(outfile, "Callout data = %d\n", callout_data);
977     return callout_data;
978     }
979 nigel 63 }
980 nigel 49
981 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
982     (++callout_count >= callout_fail_count)? 1 : 0;
983 nigel 3 }
984    
985    
986 nigel 63 /*************************************************
987 nigel 73 * Local malloc functions *
988 nigel 63 *************************************************/
989 nigel 3
990     /* Alternative malloc function, to test functionality and show the size of the
991     compiled re. */
992    
993     static void *new_malloc(size_t size)
994     {
995 nigel 73 void *block = malloc(size);
996 nigel 43 gotten_store = size;
997 nigel 73 if (show_malloc)
998 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
999 nigel 73 return block;
1000 nigel 3 }
1001    
1002 nigel 73 static void new_free(void *block)
1003     {
1004     if (show_malloc)
1005     fprintf(outfile, "free %p\n", block);
1006     free(block);
1007     }
1008 nigel 3
1009    
1010 nigel 73 /* For recursion malloc/free, to test stacking calls */
1011    
1012     static void *stack_malloc(size_t size)
1013     {
1014     void *block = malloc(size);
1015     if (show_malloc)
1016 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1017 nigel 73 return block;
1018     }
1019    
1020     static void stack_free(void *block)
1021     {
1022     if (show_malloc)
1023     fprintf(outfile, "stack_free %p\n", block);
1024     free(block);
1025     }
1026    
1027    
1028 nigel 63 /*************************************************
1029     * Call pcre_fullinfo() *
1030     *************************************************/
1031 nigel 43
1032     /* Get one piece of information from the pcre_fullinfo() function */
1033    
1034     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1035     {
1036     int rc;
1037     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1038     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1039     }
1040    
1041    
1042    
1043 nigel 63 /*************************************************
1044 nigel 75 * Byte flipping function *
1045     *************************************************/
1046    
1047 nigel 91 static unsigned long int
1048     byteflip(unsigned long int value, int n)
1049 nigel 75 {
1050     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1051     return ((value & 0x000000ff) << 24) |
1052     ((value & 0x0000ff00) << 8) |
1053     ((value & 0x00ff0000) >> 8) |
1054     ((value & 0xff000000) >> 24);
1055     }
1056    
1057    
1058    
1059    
1060     /*************************************************
1061 nigel 87 * Check match or recursion limit *
1062     *************************************************/
1063    
1064     static int
1065     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1066     int start_offset, int options, int *use_offsets, int use_size_offsets,
1067     int flag, unsigned long int *limit, int errnumber, const char *msg)
1068     {
1069     int count;
1070     int min = 0;
1071     int mid = 64;
1072     int max = -1;
1073    
1074     extra->flags |= flag;
1075    
1076     for (;;)
1077     {
1078     *limit = mid;
1079    
1080     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1081     use_offsets, use_size_offsets);
1082    
1083     if (count == errnumber)
1084     {
1085     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1086     min = mid;
1087     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1088     }
1089    
1090     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1091     count == PCRE_ERROR_PARTIAL)
1092     {
1093     if (mid == min + 1)
1094     {
1095     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1096     break;
1097     }
1098     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099     max = mid;
1100     mid = (min + mid)/2;
1101     }
1102     else break; /* Some other error */
1103     }
1104    
1105     extra->flags &= ~flag;
1106     return count;
1107     }
1108    
1109    
1110    
1111     /*************************************************
1112 ph10 227 * Case-independent strncmp() function *
1113     *************************************************/
1114    
1115     /*
1116     Arguments:
1117     s first string
1118     t second string
1119     n number of characters to compare
1120    
1121     Returns: < 0, = 0, or > 0, according to the comparison
1122     */
1123    
1124     static int
1125     strncmpic(uschar *s, uschar *t, int n)
1126     {
1127     while (n--)
1128     {
1129     int c = tolower(*s++) - tolower(*t++);
1130     if (c) return c;
1131     }
1132     return 0;
1133     }
1134    
1135    
1136    
1137     /*************************************************
1138 nigel 91 * Check newline indicator *
1139     *************************************************/
1140    
1141 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1142     a message and return 0 if there is no match.
1143 nigel 91
1144     Arguments:
1145     p points after the leading '<'
1146     f file for error message
1147    
1148     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1149     */
1150    
1151     static int
1152     check_newline(uschar *p, FILE *f)
1153     {
1154 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1155     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1156     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1157     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1158     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1159 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1160     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1161 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1162     return 0;
1163     }
1164    
1165    
1166    
1167     /*************************************************
1168 nigel 93 * Usage function *
1169     *************************************************/
1170    
1171     static void
1172     usage(void)
1173     {
1174 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1175     printf("Input and output default to stdin and stdout.\n");
1176     #ifdef SUPPORT_LIBREADLINE
1177     printf("If input is a terminal, readline() is used to read from it.\n");
1178     #else
1179     printf("This version of pcretest is not linked with readline().\n");
1180     #endif
1181     printf("\nOptions:\n");
1182 nigel 93 printf(" -b show compiled code (bytecode)\n");
1183     printf(" -C show PCRE compile-time options and exit\n");
1184     printf(" -d debug: show compiled code and information (-b and -i)\n");
1185     #if !defined NODFA
1186     printf(" -dfa force DFA matching for all subjects\n");
1187     #endif
1188     printf(" -help show usage information\n");
1189     printf(" -i show information about compiled patterns\n"
1190 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1191 nigel 93 " -m output memory used information\n"
1192     " -o <n> set size of offsets vector to <n>\n");
1193     #if !defined NOPOSIX
1194     printf(" -p use POSIX interface\n");
1195     #endif
1196     printf(" -q quiet: do not output PCRE version number at start\n");
1197     printf(" -S <n> set stack size to <n> megabytes\n");
1198 ph10 606 printf(" -s force each pattern to be studied\n"
1199 nigel 93 " -t time compilation and execution\n");
1200     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1201     printf(" -tm time execution (matching) only\n");
1202     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1203     }
1204    
1205    
1206    
1207     /*************************************************
1208 nigel 63 * Main Program *
1209     *************************************************/
1210 nigel 43
1211 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1212     consist of a regular expression, in delimiters and optionally followed by
1213     options, followed by a set of test data, terminated by an empty line. */
1214    
1215     int main(int argc, char **argv)
1216     {
1217     FILE *infile = stdin;
1218     int options = 0;
1219     int study_options = 0;
1220 ph10 386 int default_find_match_limit = FALSE;
1221 nigel 3 int op = 1;
1222     int timeit = 0;
1223 nigel 93 int timeitm = 0;
1224 nigel 3 int showinfo = 0;
1225 nigel 31 int showstore = 0;
1226 ph10 606 int force_study = 0;
1227 nigel 87 int quiet = 0;
1228 nigel 53 int size_offsets = 45;
1229     int size_offsets_max;
1230 nigel 77 int *offsets = NULL;
1231 nigel 53 #if !defined NOPOSIX
1232 nigel 3 int posix = 0;
1233 nigel 53 #endif
1234 nigel 3 int debug = 0;
1235 nigel 11 int done = 0;
1236 nigel 77 int all_use_dfa = 0;
1237     int yield = 0;
1238 nigel 91 int stack_size;
1239 nigel 3
1240 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1241     that 1024 is plenty long enough for the few names we'll be testing. */
1242 nigel 69
1243 nigel 91 uschar copynames[1024];
1244     uschar getnames[1024];
1245    
1246     uschar *copynamesptr;
1247     uschar *getnamesptr;
1248    
1249 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1250 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1251 nigel 69
1252 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1253     dbuffer = (unsigned char *)malloc(buffer_size);
1254     pbuffer = (unsigned char *)malloc(buffer_size);
1255 nigel 69
1256 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1257 nigel 3
1258 nigel 93 outfile = stdout;
1259    
1260     /* The following _setmode() stuff is some Windows magic that tells its runtime
1261     library to translate CRLF into a single LF character. At least, that's what
1262     I've been told: never having used Windows I take this all on trust. Originally
1263     it set 0x8000, but then I was advised that _O_BINARY was better. */
1264    
1265 nigel 75 #if defined(_WIN32) || defined(WIN32)
1266 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1267     #endif
1268 nigel 75
1269 nigel 3 /* Scan options */
1270    
1271     while (argc > 1 && argv[op][0] == '-')
1272     {
1273 nigel 63 unsigned char *endptr;
1274 nigel 53
1275 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1276     else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1277 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1278 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1279 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1280     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1281 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1282 nigel 79 #if !defined NODFA
1283 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1284 nigel 79 #endif
1285 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1286 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1287     *endptr == 0))
1288 nigel 53 {
1289     op++;
1290     argc--;
1291     }
1292 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1293     {
1294     int both = argv[op][2] == 0;
1295     int temp;
1296     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1297     *endptr == 0))
1298     {
1299     timeitm = temp;
1300     op++;
1301     argc--;
1302     }
1303     else timeitm = LOOPREPEAT;
1304     if (both) timeit = timeitm;
1305     }
1306 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1307     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1308     *endptr == 0))
1309     {
1310 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1311 nigel 91 printf("PCRE: -S not supported on this OS\n");
1312     exit(1);
1313     #else
1314     int rc;
1315     struct rlimit rlim;
1316     getrlimit(RLIMIT_STACK, &rlim);
1317     rlim.rlim_cur = stack_size * 1024 * 1024;
1318     rc = setrlimit(RLIMIT_STACK, &rlim);
1319     if (rc != 0)
1320     {
1321     printf("PCRE: setrlimit() failed with error %d\n", rc);
1322     exit(1);
1323     }
1324     op++;
1325     argc--;
1326     #endif
1327     }
1328 nigel 53 #if !defined NOPOSIX
1329 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1330 nigel 53 #endif
1331 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1332     {
1333     int rc;
1334 ph10 392 unsigned long int lrc;
1335 nigel 63 printf("PCRE version %s\n", pcre_version());
1336     printf("Compiled with\n");
1337     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1338     printf(" %sUTF-8 support\n", rc? "" : "No ");
1339 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1340     printf(" %sUnicode properties support\n", rc? "" : "No ");
1341 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1342 ph10 391 /* Note that these values are always the ASCII values, even
1343 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1344 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1345     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1346 ph10 150 (rc == -2)? "ANYCRLF" :
1347 nigel 93 (rc == -1)? "ANY" : "???");
1348 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1349     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1350     "all Unicode newlines");
1351 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1352     printf(" Internal link size = %d\n", rc);
1353     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1354     printf(" POSIX malloc threshold = %d\n", rc);
1355 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1356     printf(" Default match limit = %ld\n", lrc);
1357     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1358     printf(" Default recursion depth limit = %ld\n", lrc);
1359 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1360     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1361 ph10 121 goto EXIT;
1362 nigel 63 }
1363 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1364     strcmp(argv[op], "--help") == 0)
1365     {
1366     usage();
1367     goto EXIT;
1368     }
1369 nigel 3 else
1370     {
1371 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1372 nigel 93 usage();
1373 nigel 77 yield = 1;
1374     goto EXIT;
1375 nigel 3 }
1376     op++;
1377     argc--;
1378     }
1379    
1380 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1381    
1382     size_offsets_max = size_offsets;
1383 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1384 nigel 53 if (offsets == NULL)
1385     {
1386     printf("** Failed to get %d bytes of memory for offsets vector\n",
1387 ph10 151 (int)(size_offsets_max * sizeof(int)));
1388 nigel 77 yield = 1;
1389     goto EXIT;
1390 nigel 53 }
1391    
1392 nigel 3 /* Sort out the input and output files */
1393    
1394     if (argc > 1)
1395     {
1396 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1397 nigel 3 if (infile == NULL)
1398     {
1399     printf("** Failed to open %s\n", argv[op]);
1400 nigel 77 yield = 1;
1401     goto EXIT;
1402 nigel 3 }
1403     }
1404    
1405     if (argc > 2)
1406     {
1407 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1408 nigel 3 if (outfile == NULL)
1409     {
1410     printf("** Failed to open %s\n", argv[op+1]);
1411 nigel 77 yield = 1;
1412     goto EXIT;
1413 nigel 3 }
1414     }
1415    
1416     /* Set alternative malloc function */
1417    
1418     pcre_malloc = new_malloc;
1419 nigel 73 pcre_free = new_free;
1420     pcre_stack_malloc = stack_malloc;
1421     pcre_stack_free = stack_free;
1422 nigel 3
1423 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1424 nigel 3
1425 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1426 nigel 3
1427     /* Main loop */
1428    
1429 nigel 11 while (!done)
1430 nigel 3 {
1431     pcre *re = NULL;
1432     pcre_extra *extra = NULL;
1433 nigel 37
1434     #if !defined NOPOSIX /* There are still compilers that require no indent */
1435 nigel 3 regex_t preg;
1436 nigel 45 int do_posix = 0;
1437 nigel 37 #endif
1438    
1439 nigel 7 const char *error;
1440 ph10 512 unsigned char *markptr;
1441 nigel 25 unsigned char *p, *pp, *ppp;
1442 nigel 75 unsigned char *to_file = NULL;
1443 nigel 53 const unsigned char *tables = NULL;
1444 nigel 75 unsigned long int true_size, true_study_size = 0;
1445     size_t size, regex_gotten_store;
1446 ph10 626 int do_allcaps = 0;
1447 ph10 512 int do_mark = 0;
1448 nigel 3 int do_study = 0;
1449 ph10 612 int no_force_study = 0;
1450 nigel 25 int do_debug = debug;
1451 nigel 35 int do_G = 0;
1452     int do_g = 0;
1453 nigel 25 int do_showinfo = showinfo;
1454 nigel 35 int do_showrest = 0;
1455 ph10 616 int do_showcaprest = 0;
1456 nigel 75 int do_flip = 0;
1457 nigel 93 int erroroffset, len, delimiter, poffset;
1458 nigel 3
1459 nigel 67 use_utf8 = 0;
1460 ph10 211 debug_lengths = 1;
1461 nigel 63
1462 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1463 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1464 nigel 63 fflush(outfile);
1465 nigel 3
1466     p = buffer;
1467     while (isspace(*p)) p++;
1468     if (*p == 0) continue;
1469    
1470 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1471 nigel 3
1472 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1473     {
1474 nigel 91 unsigned long int magic, get_options;
1475 nigel 75 uschar sbuf[8];
1476     FILE *f;
1477    
1478     p++;
1479     pp = p + (int)strlen((char *)p);
1480     while (isspace(pp[-1])) pp--;
1481     *pp = 0;
1482    
1483     f = fopen((char *)p, "rb");
1484     if (f == NULL)
1485     {
1486     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1487     continue;
1488     }
1489    
1490     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1491    
1492     true_size =
1493     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1494     true_study_size =
1495     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1496    
1497     re = (real_pcre *)new_malloc(true_size);
1498     regex_gotten_store = gotten_store;
1499    
1500     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1501    
1502     magic = ((real_pcre *)re)->magic_number;
1503     if (magic != MAGIC_NUMBER)
1504     {
1505     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1506     {
1507     do_flip = 1;
1508     }
1509     else
1510     {
1511     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1512     fclose(f);
1513     continue;
1514     }
1515     }
1516    
1517 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1518 nigel 75 do_flip? " (byte-inverted)" : "", p);
1519    
1520     /* Need to know if UTF-8 for printing data strings */
1521    
1522 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1523     use_utf8 = (get_options & PCRE_UTF8) != 0;
1524 nigel 75
1525 ph10 612 /* Now see if there is any following study data. */
1526 nigel 75
1527     if (true_study_size != 0)
1528     {
1529     pcre_study_data *psd;
1530    
1531     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1532     extra->flags = PCRE_EXTRA_STUDY_DATA;
1533    
1534     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1535     extra->study_data = psd;
1536    
1537     if (fread(psd, 1, true_study_size, f) != true_study_size)
1538     {
1539     FAIL_READ:
1540     fprintf(outfile, "Failed to read data from %s\n", p);
1541     if (extra != NULL) new_free(extra);
1542     if (re != NULL) new_free(re);
1543     fclose(f);
1544     continue;
1545     }
1546     fprintf(outfile, "Study data loaded from %s\n", p);
1547     do_study = 1; /* To get the data output if requested */
1548     }
1549     else fprintf(outfile, "No study data\n");
1550    
1551     fclose(f);
1552     goto SHOW_INFO;
1553     }
1554    
1555     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1556     the pattern; if is isn't complete, read more. */
1557    
1558 nigel 3 delimiter = *p++;
1559    
1560 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1561 nigel 3 {
1562 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1563 nigel 3 goto SKIP_DATA;
1564     }
1565    
1566     pp = p;
1567 ph10 530 poffset = (int)(p - buffer);
1568 nigel 3
1569     for(;;)
1570     {
1571 nigel 29 while (*pp != 0)
1572     {
1573     if (*pp == '\\' && pp[1] != 0) pp++;
1574     else if (*pp == delimiter) break;
1575     pp++;
1576     }
1577 nigel 3 if (*pp != 0) break;
1578 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1579 nigel 3 {
1580     fprintf(outfile, "** Unexpected EOF\n");
1581 nigel 11 done = 1;
1582     goto CONTINUE;
1583 nigel 3 }
1584 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1585 nigel 3 }
1586    
1587 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1588     pointer to the correct relative point in the buffer. */
1589    
1590     p = buffer + poffset;
1591    
1592 nigel 29 /* If the first character after the delimiter is backslash, make
1593     the pattern end with backslash. This is purely to provide a way
1594     of testing for the error message when a pattern ends with backslash. */
1595    
1596     if (pp[1] == '\\') *pp++ = '\\';
1597    
1598 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1599     for callouts. */
1600 nigel 3
1601     *pp++ = 0;
1602 nigel 75 strcpy((char *)pbuffer, (char *)p);
1603 nigel 3
1604     /* Look for options after final delimiter */
1605    
1606     options = 0;
1607     study_options = 0;
1608 nigel 31 log_store = showstore; /* default from command line */
1609    
1610 nigel 3 while (*pp != 0)
1611     {
1612     switch (*pp++)
1613     {
1614 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1615 nigel 35 case 'g': do_g = 1; break;
1616 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1617     case 'm': options |= PCRE_MULTILINE; break;
1618     case 's': options |= PCRE_DOTALL; break;
1619     case 'x': options |= PCRE_EXTENDED; break;
1620 nigel 25
1621 ph10 616 case '+':
1622     if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1623     break;
1624 ph10 626
1625     case '=': do_allcaps = 1; break;
1626 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1627 nigel 93 case 'B': do_debug = 1; break;
1628 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1629 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1630 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1631 nigel 75 case 'F': do_flip = 1; break;
1632 nigel 35 case 'G': do_G = 1; break;
1633 nigel 25 case 'I': do_showinfo = 1; break;
1634 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1635 ph10 512 case 'K': do_mark = 1; break;
1636 nigel 31 case 'M': log_store = 1; break;
1637 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1638 nigel 37
1639     #if !defined NOPOSIX
1640 nigel 3 case 'P': do_posix = 1; break;
1641 nigel 37 #endif
1642    
1643 ph10 612 case 'S':
1644     if (do_study == 0) do_study = 1; else
1645     {
1646     do_study = 0;
1647     no_force_study = 1;
1648     }
1649     break;
1650    
1651 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1652 ph10 535 case 'W': options |= PCRE_UCP; break;
1653 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1654 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1655 ph10 126 case 'Z': debug_lengths = 0; break;
1656 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1657 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1658 ph10 545
1659 ph10 541 case 'T':
1660     switch (*pp++)
1661     {
1662     case '0': tables = tables0; break;
1663     case '1': tables = tables1; break;
1664 ph10 545
1665 ph10 541 case '\r':
1666     case '\n':
1667 ph10 545 case ' ':
1668     case 0:
1669 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1670 ph10 545 goto SKIP_DATA;
1671    
1672     default:
1673 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1674 ph10 545 goto SKIP_DATA;
1675 ph10 541 }
1676 ph10 545 break;
1677 nigel 25
1678     case 'L':
1679     ppp = pp;
1680 nigel 93 /* The '\r' test here is so that it works on Windows. */
1681     /* The '0' test is just in case this is an unterminated line. */
1682     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1683 nigel 25 *ppp = 0;
1684     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1685     {
1686     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1687     goto SKIP_DATA;
1688     }
1689 nigel 93 locale_set = 1;
1690 nigel 25 tables = pcre_maketables();
1691     pp = ppp;
1692     break;
1693    
1694 nigel 75 case '>':
1695     to_file = pp;
1696     while (*pp != 0) pp++;
1697     while (isspace(pp[-1])) pp--;
1698     *pp = 0;
1699     break;
1700    
1701 nigel 91 case '<':
1702     {
1703 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1704 ph10 336 {
1705     options |= PCRE_JAVASCRIPT_COMPAT;
1706 ph10 345 pp += 3;
1707 ph10 336 }
1708     else
1709 ph10 345 {
1710 ph10 336 int x = check_newline(pp, outfile);
1711     if (x == 0) goto SKIP_DATA;
1712     options |= x;
1713     while (*pp++ != '>');
1714 ph10 345 }
1715 nigel 91 }
1716     break;
1717    
1718 nigel 77 case '\r': /* So that it works in Windows */
1719     case '\n':
1720     case ' ':
1721     break;
1722 nigel 75
1723 nigel 3 default:
1724     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1725     goto SKIP_DATA;
1726     }
1727     }
1728    
1729 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1730 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1731     local character tables. */
1732 nigel 3
1733 nigel 37 #if !defined NOPOSIX
1734 nigel 3 if (posix || do_posix)
1735     {
1736     int rc;
1737     int cflags = 0;
1738 nigel 75
1739 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1740     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1741 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1742 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1743     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1744 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1745 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1746 nigel 87
1747 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1748    
1749     /* Compilation failed; go back for another re, skipping to blank line
1750     if non-interactive. */
1751    
1752     if (rc != 0)
1753     {
1754 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1755 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1756     goto SKIP_DATA;
1757     }
1758     }
1759    
1760     /* Handle compiling via the native interface */
1761    
1762     else
1763 nigel 37 #endif /* !defined NOPOSIX */
1764    
1765 nigel 3 {
1766 ph10 412 unsigned long int get_options;
1767 ph10 416
1768 nigel 93 if (timeit > 0)
1769 nigel 3 {
1770     register int i;
1771     clock_t time_taken;
1772     clock_t start_time = clock();
1773 nigel 93 for (i = 0; i < timeit; i++)
1774 nigel 3 {
1775 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1776 nigel 3 if (re != NULL) free(re);
1777     }
1778     time_taken = clock() - start_time;
1779 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1780     (((double)time_taken * 1000.0) / (double)timeit) /
1781 nigel 63 (double)CLOCKS_PER_SEC);
1782 nigel 3 }
1783    
1784 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1785 nigel 3
1786     /* Compilation failed; go back for another re, skipping to blank line
1787     if non-interactive. */
1788    
1789     if (re == NULL)
1790     {
1791     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1792     SKIP_DATA:
1793     if (infile != stdin)
1794     {
1795     for (;;)
1796     {
1797 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1798 nigel 11 {
1799     done = 1;
1800     goto CONTINUE;
1801     }
1802 nigel 3 len = (int)strlen((char *)buffer);
1803     while (len > 0 && isspace(buffer[len-1])) len--;
1804     if (len == 0) break;
1805     }
1806     fprintf(outfile, "\n");
1807     }
1808 nigel 25 goto CONTINUE;
1809 nigel 3 }
1810 ph10 416
1811     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1812     within the regex; check for this so that we know how to process the data
1813 ph10 412 lines. */
1814 ph10 416
1815 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1816     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1817 nigel 3
1818 ph10 412 /* Print information if required. There are now two info-returning
1819     functions. The old one has a limited interface and returns only limited
1820     data. Check that it agrees with the newer one. */
1821 nigel 3
1822 nigel 63 if (log_store)
1823     fprintf(outfile, "Memory allocation (code space): %d\n",
1824     (int)(gotten_store -
1825     sizeof(real_pcre) -
1826     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1827    
1828 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1829     and remember the store that was got. */
1830    
1831     true_size = ((real_pcre *)re)->size;
1832     regex_gotten_store = gotten_store;
1833    
1834 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1835     help with the matching, unless the pattern has the SS option, which
1836     suppresses the effect of /S (used for a few test patterns where studying is
1837     never sensible). */
1838 nigel 75
1839 ph10 612 if (do_study || (force_study && !no_force_study))
1840 nigel 75 {
1841 nigel 93 if (timeit > 0)
1842 nigel 75 {
1843     register int i;
1844     clock_t time_taken;
1845     clock_t start_time = clock();
1846 nigel 93 for (i = 0; i < timeit; i++)
1847 nigel 75 extra = pcre_study(re, study_options, &error);
1848     time_taken = clock() - start_time;
1849     if (extra != NULL) free(extra);
1850 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1851     (((double)time_taken * 1000.0) / (double)timeit) /
1852 nigel 75 (double)CLOCKS_PER_SEC);
1853     }
1854     extra = pcre_study(re, study_options, &error);
1855     if (error != NULL)
1856     fprintf(outfile, "Failed to study: %s\n", error);
1857     else if (extra != NULL)
1858     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1859     }
1860 ph10 512
1861 ph10 510 /* If /K was present, we set up for handling MARK data. */
1862 ph10 512
1863 ph10 510 if (do_mark)
1864     {
1865     if (extra == NULL)
1866     {
1867     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1868     extra->flags = 0;
1869     }
1870 ph10 512 extra->mark = &markptr;
1871 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1872 ph10 512 }
1873 nigel 75
1874     /* If the 'F' option was present, we flip the bytes of all the integer
1875     fields in the regex data block and the study block. This is to make it
1876     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1877     compiled on a different architecture. */
1878    
1879     if (do_flip)
1880     {
1881     real_pcre *rre = (real_pcre *)re;
1882 ph10 259 rre->magic_number =
1883 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1884 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1885     rre->options = byteflip(rre->options, sizeof(rre->options));
1886 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1887 ph10 259 rre->top_bracket =
1888 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1889 ph10 259 rre->top_backref =
1890 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1891 ph10 259 rre->first_byte =
1892 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1893 ph10 259 rre->req_byte =
1894 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1895     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1896 nigel 75 sizeof(rre->name_table_offset));
1897 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1898 nigel 75 sizeof(rre->name_entry_size));
1899 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1900 ph10 255 sizeof(rre->name_count));
1901 nigel 75
1902     if (extra != NULL)
1903     {
1904     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1905     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1906 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1907     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1908 nigel 75 }
1909     }
1910    
1911     /* Extract information from the compiled data if required */
1912    
1913     SHOW_INFO:
1914    
1915 nigel 93 if (do_debug)
1916     {
1917     fprintf(outfile, "------------------------------------------------------------------\n");
1918 ph10 116 pcre_printint(re, outfile, debug_lengths);
1919 nigel 93 }
1920 ph10 416
1921 ph10 412 /* We already have the options in get_options (see above) */
1922 nigel 93
1923 nigel 25 if (do_showinfo)
1924 nigel 3 {
1925 ph10 412 unsigned long int all_options;
1926 nigel 79 #if !defined NOINFOCHECK
1927 nigel 43 int old_first_char, old_options, old_count;
1928 nigel 79 #endif
1929 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1930 ph10 227 hascrorlf;
1931 nigel 63 int nameentrysize, namecount;
1932     const uschar *nametable;
1933 nigel 3
1934 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1935     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1936     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1937 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1938 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1939 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1940     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1941 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1942 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1943     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1944 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1945 nigel 43
1946 nigel 79 #if !defined NOINFOCHECK
1947 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1948 nigel 3 if (count < 0) fprintf(outfile,
1949 nigel 43 "Error %d from pcre_info()\n", count);
1950 nigel 3 else
1951     {
1952 nigel 43 if (old_count != count) fprintf(outfile,
1953     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1954     old_count);
1955 nigel 37
1956 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1957     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1958     first_char, old_first_char);
1959 nigel 37
1960 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1961     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1962     get_options, old_options);
1963 nigel 43 }
1964 nigel 79 #endif
1965 nigel 43
1966 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1967 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1968 nigel 77 (int)size, (int)regex_gotten_store);
1969 nigel 43
1970     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1971     if (backrefmax > 0)
1972     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1973 nigel 63
1974     if (namecount > 0)
1975     {
1976     fprintf(outfile, "Named capturing subpatterns:\n");
1977     while (namecount-- > 0)
1978     {
1979     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1980     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1981     GET2(nametable, 0));
1982     nametable += nameentrysize;
1983     }
1984     }
1985 ph10 172
1986 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1987 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1988 nigel 63
1989 nigel 75 all_options = ((real_pcre *)re)->options;
1990 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1991 nigel 75
1992 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1993 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1994 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1995     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1996     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1997     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1998 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1999 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2000 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2001     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2002 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2003     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2004     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2005 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2006 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2007 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2008 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2009 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2010 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2011 ph10 172
2012 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2013 nigel 43
2014 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2015 nigel 91 {
2016     case PCRE_NEWLINE_CR:
2017     fprintf(outfile, "Forced newline sequence: CR\n");
2018     break;
2019 nigel 43
2020 nigel 91 case PCRE_NEWLINE_LF:
2021     fprintf(outfile, "Forced newline sequence: LF\n");
2022     break;
2023    
2024     case PCRE_NEWLINE_CRLF:
2025     fprintf(outfile, "Forced newline sequence: CRLF\n");
2026     break;
2027    
2028 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2029     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2030     break;
2031    
2032 nigel 93 case PCRE_NEWLINE_ANY:
2033     fprintf(outfile, "Forced newline sequence: ANY\n");
2034     break;
2035    
2036 nigel 91 default:
2037     break;
2038     }
2039    
2040 nigel 43 if (first_char == -1)
2041     {
2042 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2043 nigel 43 }
2044     else if (first_char < 0)
2045     {
2046     fprintf(outfile, "No first char\n");
2047     }
2048     else
2049     {
2050 nigel 63 int ch = first_char & 255;
2051 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2052 nigel 63 "" : " (caseless)";
2053 nigel 93 if (PRINTHEX(ch))
2054 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2055 nigel 3 else
2056 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2057 nigel 43 }
2058 nigel 37
2059 nigel 43 if (need_char < 0)
2060     {
2061     fprintf(outfile, "No need char\n");
2062 nigel 3 }
2063 nigel 43 else
2064     {
2065 nigel 63 int ch = need_char & 255;
2066 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2067 nigel 63 "" : " (caseless)";
2068 nigel 93 if (PRINTHEX(ch))
2069 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2070 nigel 43 else
2071 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2072 nigel 43 }
2073 nigel 75
2074     /* Don't output study size; at present it is in any case a fixed
2075     value, but it varies, depending on the computer architecture, and
2076     so messes up the test suite. (And with the /F option, it might be
2077 ph10 612 flipped.) If study was forced by an external -s, don't show this
2078     information unless -i or -d was also present. This means that, except
2079     when auto-callouts are involved, the output from runs with and without
2080     -s should be identical. */
2081 nigel 75
2082 ph10 612 if (do_study || (force_study && showinfo && !no_force_study))
2083 nigel 75 {
2084     if (extra == NULL)
2085     fprintf(outfile, "Study returned NULL\n");
2086     else
2087     {
2088     uschar *start_bits = NULL;
2089 ph10 455 int minlength;
2090 ph10 461
2091 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2092 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2093    
2094 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2095     if (start_bits == NULL)
2096 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2097 nigel 75 else
2098     {
2099     int i;
2100     int c = 24;
2101     fprintf(outfile, "Starting byte set: ");
2102     for (i = 0; i < 256; i++)
2103     {
2104     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2105     {
2106     if (c > 75)
2107     {
2108     fprintf(outfile, "\n ");
2109     c = 2;
2110     }
2111 nigel 93 if (PRINTHEX(i) && i != ' ')
2112 nigel 75 {
2113     fprintf(outfile, "%c ", i);
2114     c += 2;
2115     }
2116     else
2117     {
2118     fprintf(outfile, "\\x%02x ", i);
2119     c += 5;
2120     }
2121     }
2122     }
2123     fprintf(outfile, "\n");
2124     }
2125     }
2126     }
2127 nigel 3 }
2128    
2129 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2130     that is all. The first 8 bytes of the file are the regex length and then
2131     the study length, in big-endian order. */
2132 nigel 3
2133 nigel 75 if (to_file != NULL)
2134 nigel 3 {
2135 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2136     if (f == NULL)
2137 nigel 3 {
2138 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2139 nigel 3 }
2140 nigel 75 else
2141     {
2142     uschar sbuf[8];
2143 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2144     sbuf[1] = (uschar)((true_size >> 16) & 255);
2145     sbuf[2] = (uschar)((true_size >> 8) & 255);
2146     sbuf[3] = (uschar)((true_size) & 255);
2147 ph10 259
2148 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2149     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2150     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2151     sbuf[7] = (uschar)((true_study_size) & 255);
2152 nigel 3
2153 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2154     fwrite(re, 1, true_size, f) < true_size)
2155     {
2156     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2157     }
2158 nigel 3 else
2159     {
2160 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2161    
2162     /* If there is study data, write it, but verify the writing only
2163     if the studying was requested by /S, not just by -s. */
2164    
2165 nigel 75 if (extra != NULL)
2166 nigel 3 {
2167 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2168     true_study_size)
2169 nigel 3 {
2170 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2171     strerror(errno));
2172 nigel 3 }
2173 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2174 nigel 3 }
2175     }
2176 nigel 75 fclose(f);
2177 nigel 3 }
2178 nigel 77
2179     new_free(re);
2180     if (extra != NULL) new_free(extra);
2181 ph10 545 if (locale_set)
2182 ph10 541 {
2183     new_free((void *)tables);
2184     setlocale(LC_CTYPE, "C");
2185 ph10 545 locale_set = 0;
2186     }
2187 nigel 75 continue; /* With next regex */
2188 nigel 3 }
2189 nigel 75 } /* End of non-POSIX compile */
2190 nigel 3
2191     /* Read data lines and test them */
2192    
2193     for (;;)
2194     {
2195 nigel 87 uschar *q;
2196 ph10 147 uschar *bptr;
2197 nigel 57 int *use_offsets = offsets;
2198 nigel 53 int use_size_offsets = size_offsets;
2199 nigel 63 int callout_data = 0;
2200     int callout_data_set = 0;
2201 nigel 3 int count, c;
2202 nigel 29 int copystrings = 0;
2203 ph10 386 int find_match_limit = default_find_match_limit;
2204 nigel 29 int getstrings = 0;
2205     int getlist = 0;
2206 nigel 39 int gmatched = 0;
2207 nigel 35 int start_offset = 0;
2208 ph10 579 int start_offset_sign = 1;
2209 nigel 41 int g_notempty = 0;
2210 nigel 77 int use_dfa = 0;
2211 nigel 3
2212     options = 0;
2213    
2214 nigel 91 *copynames = 0;
2215     *getnames = 0;
2216    
2217     copynamesptr = copynames;
2218     getnamesptr = getnames;
2219    
2220 nigel 63 pcre_callout = callout;
2221     first_callout = 1;
2222 ph10 645 last_callout_mark = NULL;
2223 nigel 63 callout_extra = 0;
2224     callout_count = 0;
2225     callout_fail_count = 999999;
2226     callout_fail_id = -1;
2227 nigel 73 show_malloc = 0;
2228 nigel 63
2229 nigel 91 if (extra != NULL) extra->flags &=
2230     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2231    
2232     len = 0;
2233     for (;;)
2234 nigel 11 {
2235 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2236 nigel 91 {
2237 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2238     {
2239 ph10 545 fprintf(outfile, "\n");
2240 ph10 537 break;
2241 ph10 545 }
2242 nigel 91 done = 1;
2243     goto CONTINUE;
2244     }
2245     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2246     len = (int)strlen((char *)buffer);
2247     if (buffer[len-1] == '\n') break;
2248 nigel 11 }
2249 nigel 3
2250     while (len > 0 && isspace(buffer[len-1])) len--;
2251     buffer[len] = 0;
2252     if (len == 0) break;
2253    
2254     p = buffer;
2255     while (isspace(*p)) p++;
2256    
2257 ph10 147 bptr = q = dbuffer;
2258 nigel 3 while ((c = *p++) != 0)
2259     {
2260     int i = 0;
2261     int n = 0;
2262 nigel 63
2263 nigel 3 if (c == '\\') switch ((c = *p++))
2264     {
2265     case 'a': c = 7; break;
2266     case 'b': c = '\b'; break;
2267     case 'e': c = 27; break;
2268     case 'f': c = '\f'; break;
2269     case 'n': c = '\n'; break;
2270     case 'r': c = '\r'; break;
2271     case 't': c = '\t'; break;
2272     case 'v': c = '\v'; break;
2273    
2274     case '0': case '1': case '2': case '3':
2275     case '4': case '5': case '6': case '7':
2276     c -= '0';
2277     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2278     c = c * 8 + *p++ - '0';
2279 nigel 91
2280     #if !defined NOUTF8
2281     if (use_utf8 && c > 255)
2282     {
2283     unsigned char buff8[8];
2284     int ii, utn;
2285     utn = ord2utf8(c, buff8);
2286     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2287     c = buff8[ii]; /* Last byte */
2288     }
2289     #endif
2290 nigel 3 break;
2291    
2292     case 'x':
2293 nigel 49
2294     /* Handle \x{..} specially - new Perl thing for utf8 */
2295    
2296 nigel 79 #if !defined NOUTF8
2297 nigel 49 if (*p == '{')
2298     {
2299     unsigned char *pt = p;
2300     c = 0;
2301     while (isxdigit(*(++pt)))
2302     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2303     if (*pt == '}')
2304     {
2305 nigel 67 unsigned char buff8[8];
2306 nigel 49 int ii, utn;
2307 ph10 355 if (use_utf8)
2308 ph10 358 {
2309 ph10 355 utn = ord2utf8(c, buff8);
2310     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2311     c = buff8[ii]; /* Last byte */
2312     }
2313     else
2314     {
2315 ph10 358 if (c > 255)
2316 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2317     "UTF-8 mode is not enabled.\n"
2318     "** Truncation will probably give the wrong result.\n", c);
2319 ph10 358 }
2320 nigel 49 p = pt + 1;
2321     break;
2322     }
2323     /* Not correct form; fall through */
2324     }
2325 nigel 79 #endif
2326 nigel 49
2327     /* Ordinary \x */
2328    
2329 nigel 3 c = 0;
2330     while (i++ < 2 && isxdigit(*p))
2331     {
2332     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2333     p++;
2334     }
2335     break;
2336    
2337 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2338 nigel 3 p--;
2339     continue;
2340    
2341 nigel 75 case '>':
2342 ph10 579 if (*p == '-')
2343 ph10 567 {
2344     start_offset_sign = -1;
2345     p++;
2346 ph10 579 }
2347 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2348 ph10 579 start_offset *= start_offset_sign;
2349 nigel 75 continue;
2350    
2351 nigel 3 case 'A': /* Option setting */
2352     options |= PCRE_ANCHORED;
2353     continue;
2354    
2355     case 'B':
2356     options |= PCRE_NOTBOL;
2357     continue;
2358    
2359 nigel 29 case 'C':
2360 nigel 63 if (isdigit(*p)) /* Set copy string */
2361     {
2362     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2363     copystrings |= 1 << n;
2364     }
2365     else if (isalnum(*p))
2366     {
2367 nigel 91 uschar *npp = copynamesptr;
2368 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2369 nigel 91 *npp++ = 0;
2370 nigel 67 *npp = 0;
2371 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2372 nigel 63 if (n < 0)
2373 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2374     copynamesptr = npp;
2375 nigel 63 }
2376     else if (*p == '+')
2377     {
2378     callout_extra = 1;
2379     p++;
2380     }
2381     else if (*p == '-')
2382     {
2383     pcre_callout = NULL;
2384     p++;
2385     }
2386     else if (*p == '!')
2387     {
2388     callout_fail_id = 0;
2389     p++;
2390     while(isdigit(*p))
2391     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2392     callout_fail_count = 0;
2393     if (*p == '!')
2394     {
2395     p++;
2396     while(isdigit(*p))
2397     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2398     }
2399     }
2400     else if (*p == '*')
2401     {
2402     int sign = 1;
2403     callout_data = 0;
2404     if (*(++p) == '-') { sign = -1; p++; }
2405     while(isdigit(*p))
2406     callout_data = callout_data * 10 + *p++ - '0';
2407     callout_data *= sign;
2408     callout_data_set = 1;
2409     }
2410 nigel 29 continue;
2411    
2412 nigel 79 #if !defined NODFA
2413 nigel 77 case 'D':
2414 nigel 79 #if !defined NOPOSIX
2415 nigel 77 if (posix || do_posix)
2416     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2417     else
2418 nigel 79 #endif
2419 nigel 77 use_dfa = 1;
2420     continue;
2421 ph10 553 #endif
2422 nigel 77
2423 ph10 553 #if !defined NODFA
2424 nigel 77 case 'F':
2425     options |= PCRE_DFA_SHORTEST;
2426     continue;
2427 nigel 79 #endif
2428 nigel 77
2429 nigel 29 case 'G':
2430 nigel 63 if (isdigit(*p))
2431     {
2432     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2433     getstrings |= 1 << n;
2434     }
2435     else if (isalnum(*p))
2436     {
2437 nigel 91 uschar *npp = getnamesptr;
2438 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2439 nigel 91 *npp++ = 0;
2440 nigel 67 *npp = 0;
2441 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2442 nigel 63 if (n < 0)
2443 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2444     getnamesptr = npp;
2445 nigel 63 }
2446 nigel 29 continue;
2447    
2448     case 'L':
2449     getlist = 1;
2450     continue;
2451    
2452 nigel 63 case 'M':
2453     find_match_limit = 1;
2454     continue;
2455    
2456 nigel 37 case 'N':
2457 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2458     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2459 ph10 461 else
2460 ph10 442 options |= PCRE_NOTEMPTY;
2461 nigel 37 continue;
2462    
2463 nigel 3 case 'O':
2464     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2465 nigel 53 if (n > size_offsets_max)
2466     {
2467     size_offsets_max = n;
2468 nigel 57 free(offsets);
2469 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2470 nigel 53 if (offsets == NULL)
2471     {
2472     printf("** Failed to get %d bytes of memory for offsets vector\n",
2473 ph10 151 (int)(size_offsets_max * sizeof(int)));
2474 nigel 77 yield = 1;
2475     goto EXIT;
2476 nigel 53 }
2477     }
2478     use_size_offsets = n;
2479 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2480 nigel 3 continue;
2481    
2482 nigel 75 case 'P':
2483 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2484 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2485 nigel 75 continue;
2486    
2487 nigel 91 case 'Q':
2488     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2489     if (extra == NULL)
2490     {
2491     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2492     extra->flags = 0;
2493     }
2494     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2495     extra->match_limit_recursion = n;
2496     continue;
2497    
2498     case 'q':
2499     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2500     if (extra == NULL)
2501     {
2502     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2503     extra->flags = 0;
2504     }
2505     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2506     extra->match_limit = n;
2507     continue;
2508    
2509 nigel 79 #if !defined NODFA
2510 nigel 77 case 'R':
2511     options |= PCRE_DFA_RESTART;
2512     continue;
2513 nigel 79 #endif
2514 nigel 77
2515 nigel 73 case 'S':
2516     show_malloc = 1;
2517     continue;
2518 ph10 392
2519 ph10 389 case 'Y':
2520     options |= PCRE_NO_START_OPTIMIZE;
2521 ph10 392 continue;
2522 nigel 73
2523 nigel 3 case 'Z':
2524     options |= PCRE_NOTEOL;
2525     continue;
2526 nigel 71
2527     case '?':
2528     options |= PCRE_NO_UTF8_CHECK;
2529     continue;
2530 nigel 91
2531     case '<':
2532     {
2533     int x = check_newline(p, outfile);
2534     if (x == 0) goto NEXT_DATA;
2535     options |= x;
2536     while (*p++ != '>');
2537     }
2538     continue;
2539 nigel 3 }
2540 nigel 9 *q++ = c;
2541 nigel 3 }
2542 nigel 9 *q = 0;
2543 ph10 530 len = (int)(q - dbuffer);
2544 ph10 545
2545 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2546 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2547 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2548 ph10 371
2549 ph10 363 #if !defined NOPOSIX
2550     if (posix || do_posix)
2551     {
2552     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2553 ph10 371 bptr += buffer_size - len - 1;
2554 ph10 363 }
2555 ph10 371 else
2556     #endif
2557 ph10 363 {
2558     memmove(bptr + buffer_size - len, bptr, len);
2559 ph10 371 bptr += buffer_size - len;
2560     }
2561 nigel 3
2562 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2563     {
2564     printf("**Match limit not relevant for DFA matching: ignored\n");
2565     find_match_limit = 0;
2566     }
2567    
2568 nigel 3 /* Handle matching via the POSIX interface, which does not
2569 nigel 63 support timing or playing with the match limit or callout data. */
2570 nigel 3
2571 nigel 37 #if !defined NOPOSIX
2572 nigel 3 if (posix || do_posix)
2573     {
2574     int rc;
2575     int eflags = 0;
2576 nigel 63 regmatch_t *pmatch = NULL;
2577     if (use_size_offsets > 0)
2578 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2579 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2580     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2581 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2582 nigel 3
2583 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2584 nigel 3
2585     if (rc != 0)
2586     {
2587 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2588 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2589     }
2590 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2591     != 0)
2592     {
2593     fprintf(outfile, "Matched with REG_NOSUB\n");
2594     }
2595 nigel 3 else
2596     {
2597 nigel 7 size_t i;
2598 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2599 nigel 3 {
2600     if (pmatch[i].rm_so >= 0)
2601     {
2602 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2603 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2604     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2605 nigel 3 fprintf(outfile, "\n");
2606 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2607 nigel 35 {
2608 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2609 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2610     outfile);
2611 nigel 35 fprintf(outfile, "\n");
2612     }
2613 nigel 3 }
2614     }
2615     }
2616 nigel 53 free(pmatch);
2617 nigel 3 }
2618    
2619 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2620 nigel 3
2621 nigel 37 else
2622     #endif /* !defined NOPOSIX */
2623    
2624 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2625 nigel 3 {
2626 ph10 512 markptr = NULL;
2627    
2628 nigel 93 if (timeitm > 0)
2629 nigel 3 {
2630     register int i;
2631     clock_t time_taken;
2632     clock_t start_time = clock();
2633 nigel 77
2634 nigel 79 #if !defined NODFA
2635 nigel 77 if (all_use_dfa || use_dfa)
2636     {
2637     int workspace[1000];
2638 nigel 93 for (i = 0; i < timeitm; i++)
2639 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2640 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2641     sizeof(workspace)/sizeof(int));
2642     }
2643     else
2644 nigel 79 #endif
2645 nigel 77
2646 nigel 93 for (i = 0; i < timeitm; i++)
2647 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2648 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2649 nigel 77
2650 nigel 3 time_taken = clock() - start_time;
2651 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2652     (((double)time_taken * 1000.0) / (double)timeitm) /
2653 nigel 63 (double)CLOCKS_PER_SEC);
2654 nigel 3 }
2655    
2656 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2657 nigel 87 varying limits in order to find the minimum value for the match limit and
2658     for the recursion limit. */
2659 nigel 63
2660     if (find_match_limit)
2661     {
2662     if (extra == NULL)
2663     {
2664 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2665 nigel 63 extra->flags = 0;
2666     }
2667    
2668 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2669 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2670     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2671     PCRE_ERROR_MATCHLIMIT, "match()");
2672 nigel 63
2673 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2674     options|g_notempty, use_offsets, use_size_offsets,
2675     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2676     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2677 nigel 63 }
2678    
2679     /* If callout_data is set, use the interface with additional data */
2680    
2681     else if (callout_data_set)
2682     {
2683     if (extra == NULL)
2684     {
2685 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2686 nigel 63 extra->flags = 0;
2687     }
2688     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2689 nigel 71 extra->callout_data = &callout_data;
2690 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2691     options | g_notempty, use_offsets, use_size_offsets);
2692     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2693     }
2694    
2695     /* The normal case is just to do the match once, with the default
2696     value of match_limit. */
2697    
2698 nigel 79 #if !defined NODFA
2699 nigel 77 else if (all_use_dfa || use_dfa)
2700     {
2701     int workspace[1000];
2702 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2703 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2704     sizeof(workspace)/sizeof(int));
2705     if (count == 0)
2706     {
2707     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2708     count = use_size_offsets/2;
2709     }
2710     }
2711 nigel 79 #endif
2712 nigel 77
2713 nigel 75 else
2714     {
2715     count = pcre_exec(re, extra, (char *)bptr, len,
2716     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2717 nigel 77 if (count == 0)
2718     {
2719     fprintf(outfile, "Matched, but too many substrings\n");
2720     count = use_size_offsets/3;
2721     }
2722 nigel 75 }
2723 nigel 3
2724 nigel 39 /* Matched */
2725    
2726 nigel 3 if (count >= 0)
2727     {
2728 nigel 93 int i, maxcount;
2729    
2730     #if !defined NODFA
2731     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2732     #endif
2733     maxcount = use_size_offsets/3;
2734    
2735     /* This is a check against a lunatic return value. */
2736    
2737     if (count > maxcount)
2738     {
2739     fprintf(outfile,
2740     "** PCRE error: returned count %d is too big for offset size %d\n",
2741     count, use_size_offsets);
2742     count = use_size_offsets/3;
2743     if (do_g || do_G)
2744     {
2745     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2746     do_g = do_G = FALSE; /* Break g/G loop */
2747     }
2748     }
2749 ph10 626
2750     /* do_allcaps requests showing of all captures in the pattern, to check
2751     unset ones at the end. */
2752    
2753     if (do_allcaps)
2754     {
2755     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2756     count++; /* Allow for full match */
2757     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2758     }
2759 nigel 93
2760 ph10 626 /* Output the captured substrings */
2761    
2762 nigel 29 for (i = 0; i < count * 2; i += 2)
2763 nigel 3 {
2764 nigel 57 if (use_offsets[i] < 0)
2765 ph10 626 {
2766     if (use_offsets[i] != -1)
2767     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2768     use_offsets[i], i);
2769     if (use_offsets[i+1] != -1)
2770     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2771     use_offsets[i+1], i+1);
2772 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2773 ph10 626 }
2774 nigel 3 else
2775     {
2776     fprintf(outfile, "%2d: ", i/2);
2777 nigel 63 (void)pchars(bptr + use_offsets[i],
2778     use_offsets[i+1] - use_offsets[i], outfile);
2779 nigel 3 fprintf(outfile, "\n");
2780 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2781 nigel 35 {
2782 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2783     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2784     outfile);
2785     fprintf(outfile, "\n");
2786 nigel 35 }
2787 nigel 3 }
2788     }
2789 ph10 512
2790 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2791 nigel 29
2792     for (i = 0; i < 32; i++)
2793     {
2794     if ((copystrings & (1 << i)) != 0)
2795     {
2796 nigel 91 char copybuffer[256];
2797 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2798 nigel 37 i, copybuffer, sizeof(copybuffer));
2799 nigel 29 if (rc < 0)
2800     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2801     else
2802 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2803 nigel 29 }
2804     }
2805    
2806 nigel 91 for (copynamesptr = copynames;
2807     *copynamesptr != 0;
2808     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2809     {
2810     char copybuffer[256];
2811     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2812     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2813     if (rc < 0)
2814     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2815     else
2816     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2817     }
2818    
2819 nigel 29 for (i = 0; i < 32; i++)
2820     {
2821     if ((getstrings & (1 << i)) != 0)
2822     {
2823     const char *substring;
2824 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2825 nigel 29 i, &substring);
2826     if (rc < 0)
2827     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2828     else
2829     {
2830     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2831 nigel 49 pcre_free_substring(substring);
2832 nigel 29 }
2833     }
2834     }
2835    
2836 nigel 91 for (getnamesptr = getnames;
2837     *getnamesptr != 0;
2838     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2839     {
2840     const char *substring;
2841     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2842     count, (char *)getnamesptr, &substring);
2843     if (rc < 0)
2844     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2845     else
2846     {
2847     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2848     pcre_free_substring(substring);
2849     }
2850     }
2851    
2852 nigel 29 if (getlist)
2853     {
2854     const char **stringlist;
2855 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2856 nigel 29 &stringlist);
2857     if (rc < 0)
2858     fprintf(outfile, "get substring list failed %d\n", rc);
2859     else
2860     {
2861     for (i = 0; i < count; i++)
2862     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2863     if (stringlist[i] != NULL)
2864     fprintf(outfile, "string list not terminated by NULL\n");
2865 nigel 49 /* free((void *)stringlist); */
2866     pcre_free_substring_list(stringlist);
2867 nigel 29 }
2868     }
2869 nigel 39 }
2870 nigel 29
2871 nigel 75 /* There was a partial match */
2872    
2873     else if (count == PCRE_ERROR_PARTIAL)
2874     {
2875 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2876     else fprintf(outfile, "Partial match, mark=%s", markptr);
2877 ph10 426 if (use_size_offsets > 1)
2878     {
2879     fprintf(outfile, ": ");
2880     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2881 ph10 461 outfile);
2882     }
2883 nigel 77 fprintf(outfile, "\n");
2884 nigel 75 break; /* Out of the /g loop */
2885     }
2886    
2887 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2888 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2889     to advance the start offset, and continue. We won't be at the end of the
2890     string - that was checked before setting g_notempty.
2891 nigel 39
2892 ph10 566 Complication arises in the case when the newline convention is "any",
2893 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2894     terminated by CRLF, an advance of one character just passes the \r,
2895 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2896 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2897     newline setting in the pattern; if none was set, use pcre_config() to
2898 ph10 566 find the default.
2899 ph10 144
2900 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2901     character, not one byte. */
2902    
2903 nigel 3 else
2904     {
2905 nigel 41 if (g_notempty != 0)
2906 nigel 35 {
2907 nigel 73 int onechar = 1;
2908 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2909 nigel 57 use_offsets[0] = start_offset;
2910 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2911     {
2912     int d;
2913     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2914 ph10 391 /* Note that these values are always the ASCII ones, even in
2915     EBCDIC environments. CR = 13, NL = 10. */
2916     obits = (d == 13)? PCRE_NEWLINE_CR :
2917     (d == 10)? PCRE_NEWLINE_LF :
2918     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2919 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2920 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2921     }
2922 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2923 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2924 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2925 ph10 149 &&
2926 ph10 143 start_offset < len - 1 &&
2927     bptr[start_offset] == '\r' &&
2928     bptr[start_offset+1] == '\n')
2929 ph10 144 onechar++;
2930 ph10 143 else if (use_utf8)
2931 nigel 73 {
2932     while (start_offset + onechar < len)
2933     {
2934 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2935 ph10 579 onechar++;
2936 nigel 73 }
2937     }
2938     use_offsets[1] = start_offset + onechar;
2939 nigel 35 }
2940 nigel 41 else
2941     {
2942 ph10 598 switch(count)
2943     {
2944     case PCRE_ERROR_NOMATCH:
2945 ph10 512 if (gmatched == 0)
2946 ph10 510 {
2947     if (markptr == NULL) fprintf(outfile, "No match\n");
2948     else fprintf(outfile, "No match, mark = %s\n", markptr);
2949 ph10 512 }
2950 ph10 598 break;
2951    
2952     case PCRE_ERROR_BADUTF8:
2953     case PCRE_ERROR_SHORTUTF8:
2954     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2955     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2956     if (use_size_offsets >= 2)
2957     fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2958     use_offsets[1]);
2959     fprintf(outfile, "\n");
2960     break;
2961    
2962     default:
2963 ph10 604 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2964     fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2965     else
2966     fprintf(outfile, "Error %d (Unexpected value)\n", count);
2967 ph10 598 break;
2968 nigel 41 }
2969 ph10 598
2970 nigel 41 break; /* Out of the /g loop */
2971     }
2972 nigel 3 }
2973 nigel 35
2974 nigel 39 /* If not /g or /G we are done */
2975    
2976     if (!do_g && !do_G) break;
2977    
2978 nigel 41 /* If we have matched an empty string, first check to see if we are at
2979 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2980     Perl's /g options does. This turns out to be rather cunning. First we set
2981     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2982 nigel 47 same point. If this fails (picked up above) we advance to the next
2983 ph10 143 character. */
2984 ph10 142
2985 nigel 41 g_notempty = 0;
2986 ph10 142
2987 nigel 57 if (use_offsets[0] == use_offsets[1])
2988 nigel 41 {
2989 nigel 57 if (use_offsets[0] == len) break;
2990 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2991 nigel 41 }
2992 nigel 39
2993     /* For /g, update the start offset, leaving the rest alone */
2994    
2995 ph10 143 if (do_g) start_offset = use_offsets[1];
2996 nigel 39
2997     /* For /G, update the pointer and length */
2998    
2999     else
3000 nigel 35 {
3001 ph10 143 bptr += use_offsets[1];
3002     len -= use_offsets[1];
3003 nigel 35 }
3004 nigel 39 } /* End of loop for /g and /G */
3005 nigel 91
3006     NEXT_DATA: continue;
3007 nigel 39 } /* End of loop for data lines */
3008 nigel 3
3009 nigel 11 CONTINUE:
3010 nigel 37
3011     #if !defined NOPOSIX
3012 nigel 3 if (posix || do_posix) regfree(&preg);
3013 nigel 37 #endif
3014    
3015 nigel 77 if (re != NULL) new_free(re);
3016     if (extra != NULL) new_free(extra);
3017 ph10 541 if (locale_set)
3018 nigel 25 {
3019 nigel 77 new_free((void *)tables);
3020 nigel 25 setlocale(LC_CTYPE, "C");
3021 nigel 93 locale_set = 0;
3022 nigel 25 }
3023 nigel 3 }
3024    
3025 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3026 nigel 77
3027     EXIT:
3028    
3029     if (infile != NULL && infile != stdin) fclose(infile);
3030     if (outfile != NULL && outfile != stdout) fclose(outfile);
3031    
3032     free(buffer);
3033     free(dbuffer);
3034     free(pbuffer);
3035     free(offsets);
3036    
3037     return yield;
3038 nigel 3 }
3039    
3040 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12