/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 642 - (hide annotations) (download)
Thu Jul 28 18:59:40 2011 UTC (3 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 91237 byte(s)
Avoid false positive for infinite recursion by not checking conditionals at 
compile time, but add tests at runtime that also catch infinite mutual 
recursion.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 nigel 85 #define _pcre_utf8_table1 utf8_table1
116     #define _pcre_utf8_table1_size utf8_table1_size
117     #define _pcre_utf8_table2 utf8_table2
118     #define _pcre_utf8_table3 utf8_table3
119     #define _pcre_utf8_table4 utf8_table4
120     #define _pcre_utt utt
121     #define _pcre_utt_size utt_size
122 ph10 240 #define _pcre_utt_names utt_names
123 nigel 85 #define _pcre_OP_lengths OP_lengths
124    
125     #include "pcre_tables.c"
126    
127     /* We also need the pcre_printint() function for printing out compiled
128     patterns. This function is in a separate file so that it can be included in
129 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 ph10 498 know which case is being compiled. */
131 nigel 85
132 ph10 498 #define COMPILING_PCRETEST
133     #include "pcre_printint.src"
134    
135     /* The definition of the macro PRINTABLE, which determines whether to print an
136 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
137 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
138     locale has not been explicitly changed, so as to get consistent output from
139     systems that differ in their output from isprint() even in the "C" locale. */
140 nigel 93
141     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142 nigel 85
143 nigel 37 /* It is possible to compile this test program without including support for
144     testing the POSIX interface, though this is not available via the standard
145     Makefile. */
146    
147     #if !defined NOPOSIX
148 nigel 3 #include "pcreposix.h"
149 nigel 37 #endif
150 nigel 3
151 ph10 107 /* It is also possible, for the benefit of the version currently imported into
152     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153     interface to the DFA matcher (NODFA), and without the doublecheck of the old
154     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155     UTF8 support if PCRE is built without it. */
156 nigel 79
157 ph10 107 #ifndef SUPPORT_UTF8
158     #ifndef NOUTF8
159     #define NOUTF8
160     #endif
161     #endif
162 nigel 79
163 ph10 107
164 nigel 85 /* Other parameters */
165    
166 nigel 3 #ifndef CLOCKS_PER_SEC
167     #ifdef CLK_TCK
168     #define CLOCKS_PER_SEC CLK_TCK
169     #else
170     #define CLOCKS_PER_SEC 100
171     #endif
172     #endif
173    
174 nigel 93 /* This is the default loop count for timing. */
175    
176 nigel 75 #define LOOPREPEAT 500000
177 nigel 3
178 nigel 85 /* Static variables */
179    
180 nigel 3 static FILE *outfile;
181     static int log_store = 0;
182 nigel 63 static int callout_count;
183     static int callout_extra;
184     static int callout_fail_count;
185     static int callout_fail_id;
186 ph10 210 static int debug_lengths;
187 nigel 63 static int first_callout;
188 nigel 93 static int locale_set = 0;
189 nigel 73 static int show_malloc;
190 nigel 67 static int use_utf8;
191 nigel 43 static size_t gotten_store;
192 nigel 3
193 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
194    
195     static int buffer_size = 50000;
196     static uschar *buffer = NULL;
197     static uschar *dbuffer = NULL;
198 nigel 75 static uschar *pbuffer = NULL;
199 nigel 3
200 ph10 598 /* Textual explanations for runtime error codes */
201 nigel 75
202 ph10 598 static const char *errtexts[] = {
203     NULL, /* 0 is no error */
204     NULL, /* NOMATCH is handled specially */
205     "NULL argument passed",
206     "bad option value",
207     "magic number missing",
208     "unknown opcode - pattern overwritten?",
209     "no more memory",
210     NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211     "match limit exceeded",
212     "callout error code",
213     NULL, /* BADUTF8 is handled specially */
214     "bad UTF-8 offset",
215     NULL, /* PARTIAL is handled specially */
216     "not used - internal error",
217     "internal error - pattern overwritten?",
218     "bad count value",
219     "item unsupported for DFA matching",
220     "backreference condition or recursion test not supported for DFA matching",
221     "match limit not supported for DFA matching",
222     "workspace size exceeded in DFA matching",
223     "too much recursion for DFA matching",
224     "recursion limit exceeded",
225     "not used - internal error",
226     "invalid combination of newline options",
227     "bad offset value",
228 ph10 642 NULL, /* SHORTUTF8 is handled specially */
229     "nested recursion at the same subject position"
230 ph10 598 };
231    
232    
233 ph10 541 /*************************************************
234     * Alternate character tables *
235     *************************************************/
236 nigel 49
237 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
238     using the default tables of the library. However, the T option can be used to
239     select alternate sets of tables, for different kinds of testing. Note also that
240 ph10 541 the L (locale) option also adjusts the tables. */
241    
242 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
243 ph10 541 only ASCII characters. */
244    
245     static const unsigned char tables0[] = {
246    
247     /* This table is a lower casing table. */
248    
249     0, 1, 2, 3, 4, 5, 6, 7,
250     8, 9, 10, 11, 12, 13, 14, 15,
251     16, 17, 18, 19, 20, 21, 22, 23,
252     24, 25, 26, 27, 28, 29, 30, 31,
253     32, 33, 34, 35, 36, 37, 38, 39,
254     40, 41, 42, 43, 44, 45, 46, 47,
255     48, 49, 50, 51, 52, 53, 54, 55,
256     56, 57, 58, 59, 60, 61, 62, 63,
257     64, 97, 98, 99,100,101,102,103,
258     104,105,106,107,108,109,110,111,
259     112,113,114,115,116,117,118,119,
260     120,121,122, 91, 92, 93, 94, 95,
261     96, 97, 98, 99,100,101,102,103,
262     104,105,106,107,108,109,110,111,
263     112,113,114,115,116,117,118,119,
264     120,121,122,123,124,125,126,127,
265     128,129,130,131,132,133,134,135,
266     136,137,138,139,140,141,142,143,
267     144,145,146,147,148,149,150,151,
268     152,153,154,155,156,157,158,159,
269     160,161,162,163,164,165,166,167,
270     168,169,170,171,172,173,174,175,
271     176,177,178,179,180,181,182,183,
272     184,185,186,187,188,189,190,191,
273     192,193,194,195,196,197,198,199,
274     200,201,202,203,204,205,206,207,
275     208,209,210,211,212,213,214,215,
276     216,217,218,219,220,221,222,223,
277     224,225,226,227,228,229,230,231,
278     232,233,234,235,236,237,238,239,
279     240,241,242,243,244,245,246,247,
280     248,249,250,251,252,253,254,255,
281    
282     /* This table is a case flipping table. */
283    
284     0, 1, 2, 3, 4, 5, 6, 7,
285     8, 9, 10, 11, 12, 13, 14, 15,
286     16, 17, 18, 19, 20, 21, 22, 23,
287     24, 25, 26, 27, 28, 29, 30, 31,
288     32, 33, 34, 35, 36, 37, 38, 39,
289     40, 41, 42, 43, 44, 45, 46, 47,
290     48, 49, 50, 51, 52, 53, 54, 55,
291     56, 57, 58, 59, 60, 61, 62, 63,
292     64, 97, 98, 99,100,101,102,103,
293     104,105,106,107,108,109,110,111,
294     112,113,114,115,116,117,118,119,
295     120,121,122, 91, 92, 93, 94, 95,
296     96, 65, 66, 67, 68, 69, 70, 71,
297     72, 73, 74, 75, 76, 77, 78, 79,
298     80, 81, 82, 83, 84, 85, 86, 87,
299     88, 89, 90,123,124,125,126,127,
300     128,129,130,131,132,133,134,135,
301     136,137,138,139,140,141,142,143,
302     144,145,146,147,148,149,150,151,
303     152,153,154,155,156,157,158,159,
304     160,161,162,163,164,165,166,167,
305     168,169,170,171,172,173,174,175,
306     176,177,178,179,180,181,182,183,
307     184,185,186,187,188,189,190,191,
308     192,193,194,195,196,197,198,199,
309     200,201,202,203,204,205,206,207,
310     208,209,210,211,212,213,214,215,
311     216,217,218,219,220,221,222,223,
312     224,225,226,227,228,229,230,231,
313     232,233,234,235,236,237,238,239,
314     240,241,242,243,244,245,246,247,
315     248,249,250,251,252,253,254,255,
316    
317     /* This table contains bit maps for various character classes. Each map is 32
318     bytes long and the bits run from the least significant end of each byte. The
319     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
320     graph, print, punct, and cntrl. Other classes are built from combinations. */
321    
322     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
323     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
328     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336    
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341    
342     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346    
347     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
348     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351    
352     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
353     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356    
357     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
358     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361    
362     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
363     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366    
367     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371    
372     /* This table identifies various classes of character by individual bits:
373     0x01 white space character
374     0x02 letter
375     0x04 decimal digit
376     0x08 hexadecimal digit
377     0x10 alphanumeric or '_'
378     0x80 regular expression metacharacter or binary zero
379     */
380    
381     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
382     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
383     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
384     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
385     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
386     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
387     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
388     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
389     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
390     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
391     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
392     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
393     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
395     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
396     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
397     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
398     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
399     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
413    
414 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
415     be at least an approximation of ISO 8859. In particular, there are characters
416 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
417    
418     static const unsigned char tables1[] = {
419     0,1,2,3,4,5,6,7,
420     8,9,10,11,12,13,14,15,
421     16,17,18,19,20,21,22,23,
422     24,25,26,27,28,29,30,31,
423     32,33,34,35,36,37,38,39,
424     40,41,42,43,44,45,46,47,
425     48,49,50,51,52,53,54,55,
426     56,57,58,59,60,61,62,63,
427     64,97,98,99,100,101,102,103,
428     104,105,106,107,108,109,110,111,
429     112,113,114,115,116,117,118,119,
430     120,121,122,91,92,93,94,95,
431     96,97,98,99,100,101,102,103,
432     104,105,106,107,108,109,110,111,
433     112,113,114,115,116,117,118,119,
434     120,121,122,123,124,125,126,127,
435     128,129,130,131,132,133,134,135,
436     136,137,138,139,140,141,142,143,
437     144,145,146,147,148,149,150,151,
438     152,153,154,155,156,157,158,159,
439     160,161,162,163,164,165,166,167,
440     168,169,170,171,172,173,174,175,
441     176,177,178,179,180,181,182,183,
442     184,185,186,187,188,189,190,191,
443     224,225,226,227,228,229,230,231,
444     232,233,234,235,236,237,238,239,
445     240,241,242,243,244,245,246,215,
446     248,249,250,251,252,253,254,223,
447     224,225,226,227,228,229,230,231,
448     232,233,234,235,236,237,238,239,
449     240,241,242,243,244,245,246,247,
450     248,249,250,251,252,253,254,255,
451     0,1,2,3,4,5,6,7,
452     8,9,10,11,12,13,14,15,
453     16,17,18,19,20,21,22,23,
454     24,25,26,27,28,29,30,31,
455     32,33,34,35,36,37,38,39,
456     40,41,42,43,44,45,46,47,
457     48,49,50,51,52,53,54,55,
458     56,57,58,59,60,61,62,63,
459     64,97,98,99,100,101,102,103,
460     104,105,106,107,108,109,110,111,
461     112,113,114,115,116,117,118,119,
462     120,121,122,91,92,93,94,95,
463     96,65,66,67,68,69,70,71,
464     72,73,74,75,76,77,78,79,
465     80,81,82,83,84,85,86,87,
466     88,89,90,123,124,125,126,127,
467     128,129,130,131,132,133,134,135,
468     136,137,138,139,140,141,142,143,
469     144,145,146,147,148,149,150,151,
470     152,153,154,155,156,157,158,159,
471     160,161,162,163,164,165,166,167,
472     168,169,170,171,172,173,174,175,
473     176,177,178,179,180,181,182,183,
474     184,185,186,187,188,189,190,191,
475     224,225,226,227,228,229,230,231,
476     232,233,234,235,236,237,238,239,
477     240,241,242,243,244,245,246,215,
478     248,249,250,251,252,253,254,223,
479     192,193,194,195,196,197,198,199,
480     200,201,202,203,204,205,206,207,
481     208,209,210,211,212,213,214,247,
482     216,217,218,219,220,221,222,255,
483     0,62,0,0,1,0,0,0,
484     0,0,0,0,0,0,0,0,
485     32,0,0,0,1,0,0,0,
486     0,0,0,0,0,0,0,0,
487     0,0,0,0,0,0,255,3,
488     126,0,0,0,126,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,0,0,
491     0,0,0,0,0,0,255,3,
492     0,0,0,0,0,0,0,0,
493     0,0,0,0,0,0,12,2,
494     0,0,0,0,0,0,0,0,
495     0,0,0,0,0,0,0,0,
496     254,255,255,7,0,0,0,0,
497     0,0,0,0,0,0,0,0,
498     255,255,127,127,0,0,0,0,
499     0,0,0,0,0,0,0,0,
500     0,0,0,0,254,255,255,7,
501     0,0,0,0,0,4,32,4,
502     0,0,0,128,255,255,127,255,
503     0,0,0,0,0,0,255,3,
504     254,255,255,135,254,255,255,7,
505     0,0,0,0,0,4,44,6,
506     255,255,127,255,255,255,127,255,
507     0,0,0,0,254,255,255,255,
508     255,255,255,255,255,255,255,127,
509     0,0,0,0,254,255,255,255,
510     255,255,255,255,255,255,255,255,
511     0,2,0,0,255,255,255,255,
512     255,255,255,255,255,255,255,127,
513     0,0,0,0,255,255,255,255,
514     255,255,255,255,255,255,255,255,
515     0,0,0,0,254,255,0,252,
516     1,0,0,248,1,0,0,120,
517     0,0,0,0,254,255,255,255,
518     0,0,128,0,0,0,128,0,
519     255,255,255,255,0,0,0,0,
520     0,0,0,0,0,0,0,128,
521     255,255,255,255,0,0,0,0,
522     0,0,0,0,0,0,0,0,
523     128,0,0,0,0,0,0,0,
524     0,1,1,0,1,1,0,0,
525     0,0,0,0,0,0,0,0,
526     0,0,0,0,0,0,0,0,
527     1,0,0,0,128,0,0,0,
528     128,128,128,128,0,0,128,0,
529     28,28,28,28,28,28,28,28,
530     28,28,0,0,0,0,0,128,
531     0,26,26,26,26,26,26,18,
532     18,18,18,18,18,18,18,18,
533     18,18,18,18,18,18,18,18,
534     18,18,18,128,128,0,128,16,
535     0,26,26,26,26,26,26,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,18,18,18,18,18,
538     18,18,18,128,128,0,0,0,
539     0,0,0,0,0,1,0,0,
540     0,0,0,0,0,0,0,0,
541     0,0,0,0,0,0,0,0,
542     0,0,0,0,0,0,0,0,
543     1,0,0,0,0,0,0,0,
544     0,0,18,0,0,0,0,0,
545     0,0,20,20,0,18,0,0,
546     0,20,18,0,0,0,0,0,
547     18,18,18,18,18,18,18,18,
548     18,18,18,18,18,18,18,18,
549     18,18,18,18,18,18,18,0,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,18,
553     18,18,18,18,18,18,18,0,
554     18,18,18,18,18,18,18,18
555     };
556    
557    
558    
559 ph10 558
560     #ifndef HAVE_STRERROR
561 nigel 49 /*************************************************
562 ph10 558 * Provide strerror() for non-ANSI libraries *
563     *************************************************/
564    
565     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
566     in their libraries, but can provide the same facility by this simple
567     alternative function. */
568    
569     extern int sys_nerr;
570     extern char *sys_errlist[];
571    
572     char *
573     strerror(int n)
574     {
575     if (n < 0 || n >= sys_nerr) return "unknown error number";
576     return sys_errlist[n];
577     }
578     #endif /* HAVE_STRERROR */
579    
580    
581    
582    
583     /*************************************************
584 nigel 91 * Read or extend an input line *
585     *************************************************/
586    
587     /* Input lines are read into buffer, but both patterns and data lines can be
588     continued over multiple input lines. In addition, if the buffer fills up, we
589     want to automatically expand it so as to be able to handle extremely large
590     lines that are needed for certain stress tests. When the input buffer is
591     expanded, the other two buffers must also be expanded likewise, and the
592     contents of pbuffer, which are a copy of the input for callouts, must be
593     preserved (for when expansion happens for a data line). This is not the most
594     optimal way of handling this, but hey, this is just a test program!
595    
596     Arguments:
597     f the file to read
598     start where in buffer to start (this *must* be within buffer)
599 ph10 287 prompt for stdin or readline()
600 nigel 91
601     Returns: pointer to the start of new data
602     could be a copy of start, or could be moved
603     NULL if no data read and EOF reached
604     */
605    
606     static uschar *
607 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
608 nigel 91 {
609     uschar *here = start;
610    
611     for (;;)
612     {
613 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
614 nigel 93
615 nigel 91 if (rlen > 1000)
616     {
617     int dlen;
618 ph10 289
619 ph10 287 /* If libreadline support is required, use readline() to read a line if the
620     input is a terminal. Note that readline() removes the trailing newline, so
621     we must put it back again, to be compatible with fgets(). */
622 ph10 289
623 ph10 287 #ifdef SUPPORT_LIBREADLINE
624     if (isatty(fileno(f)))
625     {
626 ph10 289 size_t len;
627 ph10 287 char *s = readline(prompt);
628     if (s == NULL) return (here == start)? NULL : start;
629     len = strlen(s);
630 ph10 289 if (len > 0) add_history(s);
631 ph10 287 if (len > rlen - 1) len = rlen - 1;
632     memcpy(here, s, len);
633     here[len] = '\n';
634 ph10 289 here[len+1] = 0;
635     free(s);
636 ph10 287 }
637 ph10 289 else
638     #endif
639    
640 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
641 ph10 289
642 ph10 287 {
643 ph10 516 if (f == stdin) printf("%s", prompt);
644 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
645     return (here == start)? NULL : start;
646 ph10 289 }
647    
648 nigel 91 dlen = (int)strlen((char *)here);
649     if (dlen > 0 && here[dlen - 1] == '\n') return start;
650     here += dlen;
651     }
652    
653     else
654     {
655     int new_buffer_size = 2*buffer_size;
656     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
657     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
658     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
659    
660     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
661     {
662     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
663     exit(1);
664     }
665    
666     memcpy(new_buffer, buffer, buffer_size);
667     memcpy(new_pbuffer, pbuffer, buffer_size);
668    
669     buffer_size = new_buffer_size;
670    
671     start = new_buffer + (start - buffer);
672     here = new_buffer + (here - buffer);
673    
674     free(buffer);
675     free(dbuffer);
676     free(pbuffer);
677    
678     buffer = new_buffer;
679     dbuffer = new_dbuffer;
680     pbuffer = new_pbuffer;
681     }
682     }
683    
684     return NULL; /* Control never gets here */
685     }
686    
687    
688    
689    
690    
691    
692    
693     /*************************************************
694 nigel 63 * Read number from string *
695     *************************************************/
696    
697     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
698     around with conditional compilation, just do the job by hand. It is only used
699 nigel 93 for unpicking arguments, so just keep it simple.
700 nigel 63
701     Arguments:
702     str string to be converted
703     endptr where to put the end pointer
704    
705     Returns: the unsigned long
706     */
707    
708     static int
709     get_value(unsigned char *str, unsigned char **endptr)
710     {
711     int result = 0;
712     while(*str != 0 && isspace(*str)) str++;
713     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
714     *endptr = str;
715     return(result);
716     }
717    
718    
719    
720 nigel 49
721     /*************************************************
722     * Convert UTF-8 string to value *
723     *************************************************/
724    
725     /* This function takes one or more bytes that represents a UTF-8 character,
726     and returns the value of the character.
727    
728     Argument:
729 nigel 91 utf8bytes a pointer to the byte vector
730     vptr a pointer to an int to receive the value
731 nigel 49
732 nigel 91 Returns: > 0 => the number of bytes consumed
733     -6 to 0 => malformed UTF-8 character at offset = (-return)
734 nigel 49 */
735    
736 nigel 79 #if !defined NOUTF8
737    
738 nigel 67 static int
739 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
740 nigel 49 {
741 nigel 91 int c = *utf8bytes++;
742 nigel 49 int d = c;
743     int i, j, s;
744    
745     for (i = -1; i < 6; i++) /* i is number of additional bytes */
746     {
747     if ((d & 0x80) == 0) break;
748     d <<= 1;
749     }
750    
751     if (i == -1) { *vptr = c; return 1; } /* ascii character */
752     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
753    
754     /* i now has a value in the range 1-5 */
755    
756 nigel 59 s = 6*i;
757 nigel 85 d = (c & utf8_table3[i]) << s;
758 nigel 49
759     for (j = 0; j < i; j++)
760     {
761 nigel 91 c = *utf8bytes++;
762 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
763 nigel 59 s -= 6;
764 nigel 49 d |= (c & 0x3f) << s;
765     }
766    
767     /* Check that encoding was the correct unique one */
768    
769 nigel 85 for (j = 0; j < utf8_table1_size; j++)
770     if (d <= utf8_table1[j]) break;
771 nigel 49 if (j != i) return -(i+1);
772    
773     /* Valid value */
774    
775     *vptr = d;
776     return i+1;
777     }
778    
779 nigel 79 #endif
780 nigel 49
781    
782 nigel 79
783 nigel 63 /*************************************************
784 nigel 85 * Convert character value to UTF-8 *
785     *************************************************/
786    
787     /* This function takes an integer value in the range 0 - 0x7fffffff
788     and encodes it as a UTF-8 character in 0 to 6 bytes.
789    
790     Arguments:
791     cvalue the character value
792 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
793 nigel 85
794     Returns: number of characters placed in the buffer
795     */
796    
797 nigel 93 #if !defined NOUTF8
798    
799 nigel 85 static int
800 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
801 nigel 85 {
802     register int i, j;
803     for (i = 0; i < utf8_table1_size; i++)
804     if (cvalue <= utf8_table1[i]) break;
805 nigel 91 utf8bytes += i;
806 nigel 85 for (j = i; j > 0; j--)
807     {
808 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
809 nigel 85 cvalue >>= 6;
810     }
811 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
812 nigel 85 return i + 1;
813     }
814    
815 nigel 93 #endif
816 nigel 85
817    
818 nigel 93
819 nigel 85 /*************************************************
820 nigel 63 * Print character string *
821     *************************************************/
822 nigel 49
823 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
824     mode. Yields number of characters printed. If handed a NULL file, just counts
825     chars without printing. */
826 nigel 49
827 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
828 nigel 3 {
829 nigel 85 int c = 0;
830 nigel 63 int yield = 0;
831 nigel 3
832 nigel 63 while (length-- > 0)
833 nigel 3 {
834 nigel 79 #if !defined NOUTF8
835 nigel 67 if (use_utf8)
836 nigel 63 {
837     int rc = utf82ord(p, &c);
838 nigel 3
839 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
840     {
841     length -= rc - 1;
842     p += rc;
843 nigel 93 if (PRINTHEX(c))
844 nigel 63 {
845     if (f != NULL) fprintf(f, "%c", c);
846     yield++;
847     }
848     else
849     {
850 nigel 93 int n = 4;
851     if (f != NULL) fprintf(f, "\\x{%02x}", c);
852     yield += (n <= 0x000000ff)? 2 :
853     (n <= 0x00000fff)? 3 :
854     (n <= 0x0000ffff)? 4 :
855     (n <= 0x000fffff)? 5 : 6;
856 nigel 63 }
857     continue;
858     }
859     }
860 nigel 79 #endif
861 nigel 3
862 nigel 63 /* Not UTF-8, or malformed UTF-8 */
863    
864 nigel 93 c = *p++;
865     if (PRINTHEX(c))
866 nigel 3 {
867 nigel 63 if (f != NULL) fprintf(f, "%c", c);
868     yield++;
869 nigel 3 }
870 nigel 63 else
871 nigel 3 {
872 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
873     yield += 4;
874     }
875     }
876 nigel 3
877 nigel 63 return yield;
878     }
879 nigel 23
880 nigel 3
881 nigel 23
882 nigel 63 /*************************************************
883     * Callout function *
884     *************************************************/
885 nigel 3
886 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
887     the match. Yield zero unless more callouts than the fail count, or the callout
888     data is not zero. */
889 nigel 3
890 nigel 63 static int callout(pcre_callout_block *cb)
891     {
892     FILE *f = (first_callout | callout_extra)? outfile : NULL;
893 nigel 75 int i, pre_start, post_start, subject_length;
894 nigel 3
895 nigel 63 if (callout_extra)
896     {
897     fprintf(f, "Callout %d: last capture = %d\n",
898     cb->callout_number, cb->capture_last);
899 nigel 3
900 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
901     {
902     if (cb->offset_vector[i] < 0)
903     fprintf(f, "%2d: <unset>\n", i/2);
904     else
905     {
906     fprintf(f, "%2d: ", i/2);
907     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
908     cb->offset_vector[i+1] - cb->offset_vector[i], f);
909     fprintf(f, "\n");
910     }
911     }
912     }
913 nigel 3
914 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
915     datails. On subsequent calls in the same match, we use pchars just to find the
916     printed lengths of the substrings. */
917 nigel 3
918 nigel 63 if (f != NULL) fprintf(f, "--->");
919 nigel 3
920 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
921     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
922     cb->current_position - cb->start_match, f);
923 nigel 3
924 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
925    
926 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
927     cb->subject_length - cb->current_position, f);
928 nigel 3
929 nigel 63 if (f != NULL) fprintf(f, "\n");
930 nigel 9
931 nigel 63 /* Always print appropriate indicators, with callout number if not already
932 nigel 75 shown. For automatic callouts, show the pattern offset. */
933 nigel 3
934 nigel 75 if (cb->callout_number == 255)
935     {
936     fprintf(outfile, "%+3d ", cb->pattern_position);
937     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
938     }
939     else
940     {
941     if (callout_extra) fprintf(outfile, " ");
942     else fprintf(outfile, "%3d ", cb->callout_number);
943     }
944 nigel 3
945 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
946     fprintf(outfile, "^");
947 nigel 3
948 nigel 63 if (post_start > 0)
949     {
950     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
951     fprintf(outfile, "^");
952 nigel 3 }
953    
954 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
955     fprintf(outfile, " ");
956    
957     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
958     pbuffer + cb->pattern_position);
959    
960 nigel 63 fprintf(outfile, "\n");
961     first_callout = 0;
962 nigel 3
963 nigel 71 if (cb->callout_data != NULL)
964 nigel 49 {
965 nigel 71 int callout_data = *((int *)(cb->callout_data));
966     if (callout_data != 0)
967     {
968     fprintf(outfile, "Callout data = %d\n", callout_data);
969     return callout_data;
970     }
971 nigel 63 }
972 nigel 49
973 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
974     (++callout_count >= callout_fail_count)? 1 : 0;
975 nigel 3 }
976    
977    
978 nigel 63 /*************************************************
979 nigel 73 * Local malloc functions *
980 nigel 63 *************************************************/
981 nigel 3
982     /* Alternative malloc function, to test functionality and show the size of the
983     compiled re. */
984    
985     static void *new_malloc(size_t size)
986     {
987 nigel 73 void *block = malloc(size);
988 nigel 43 gotten_store = size;
989 nigel 73 if (show_malloc)
990 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
991 nigel 73 return block;
992 nigel 3 }
993    
994 nigel 73 static void new_free(void *block)
995     {
996     if (show_malloc)
997     fprintf(outfile, "free %p\n", block);
998     free(block);
999     }
1000 nigel 3
1001    
1002 nigel 73 /* For recursion malloc/free, to test stacking calls */
1003    
1004     static void *stack_malloc(size_t size)
1005     {
1006     void *block = malloc(size);
1007     if (show_malloc)
1008 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1009 nigel 73 return block;
1010     }
1011    
1012     static void stack_free(void *block)
1013     {
1014     if (show_malloc)
1015     fprintf(outfile, "stack_free %p\n", block);
1016     free(block);
1017     }
1018    
1019    
1020 nigel 63 /*************************************************
1021     * Call pcre_fullinfo() *
1022     *************************************************/
1023 nigel 43
1024     /* Get one piece of information from the pcre_fullinfo() function */
1025    
1026     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1027     {
1028     int rc;
1029     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1030     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1031     }
1032    
1033    
1034    
1035 nigel 63 /*************************************************
1036 nigel 75 * Byte flipping function *
1037     *************************************************/
1038    
1039 nigel 91 static unsigned long int
1040     byteflip(unsigned long int value, int n)
1041 nigel 75 {
1042     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1043     return ((value & 0x000000ff) << 24) |
1044     ((value & 0x0000ff00) << 8) |
1045     ((value & 0x00ff0000) >> 8) |
1046     ((value & 0xff000000) >> 24);
1047     }
1048    
1049    
1050    
1051    
1052     /*************************************************
1053 nigel 87 * Check match or recursion limit *
1054     *************************************************/
1055    
1056     static int
1057     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1058     int start_offset, int options, int *use_offsets, int use_size_offsets,
1059     int flag, unsigned long int *limit, int errnumber, const char *msg)
1060     {
1061     int count;
1062     int min = 0;
1063     int mid = 64;
1064     int max = -1;
1065    
1066     extra->flags |= flag;
1067    
1068     for (;;)
1069     {
1070     *limit = mid;
1071    
1072     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1073     use_offsets, use_size_offsets);
1074    
1075     if (count == errnumber)
1076     {
1077     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1078     min = mid;
1079     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1080     }
1081    
1082     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1083     count == PCRE_ERROR_PARTIAL)
1084     {
1085     if (mid == min + 1)
1086     {
1087     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1088     break;
1089     }
1090     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1091     max = mid;
1092     mid = (min + mid)/2;
1093     }
1094     else break; /* Some other error */
1095     }
1096    
1097     extra->flags &= ~flag;
1098     return count;
1099     }
1100    
1101    
1102    
1103     /*************************************************
1104 ph10 227 * Case-independent strncmp() function *
1105     *************************************************/
1106    
1107     /*
1108     Arguments:
1109     s first string
1110     t second string
1111     n number of characters to compare
1112    
1113     Returns: < 0, = 0, or > 0, according to the comparison
1114     */
1115    
1116     static int
1117     strncmpic(uschar *s, uschar *t, int n)
1118     {
1119     while (n--)
1120     {
1121     int c = tolower(*s++) - tolower(*t++);
1122     if (c) return c;
1123     }
1124     return 0;
1125     }
1126    
1127    
1128    
1129     /*************************************************
1130 nigel 91 * Check newline indicator *
1131     *************************************************/
1132    
1133 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1134     a message and return 0 if there is no match.
1135 nigel 91
1136     Arguments:
1137     p points after the leading '<'
1138     f file for error message
1139    
1140     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1141     */
1142    
1143     static int
1144     check_newline(uschar *p, FILE *f)
1145     {
1146 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1147     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1148     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1149     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1150     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1151 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1152     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1153 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1154     return 0;
1155     }
1156    
1157    
1158    
1159     /*************************************************
1160 nigel 93 * Usage function *
1161     *************************************************/
1162    
1163     static void
1164     usage(void)
1165     {
1166 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1167     printf("Input and output default to stdin and stdout.\n");
1168     #ifdef SUPPORT_LIBREADLINE
1169     printf("If input is a terminal, readline() is used to read from it.\n");
1170     #else
1171     printf("This version of pcretest is not linked with readline().\n");
1172     #endif
1173     printf("\nOptions:\n");
1174 nigel 93 printf(" -b show compiled code (bytecode)\n");
1175     printf(" -C show PCRE compile-time options and exit\n");
1176     printf(" -d debug: show compiled code and information (-b and -i)\n");
1177     #if !defined NODFA
1178     printf(" -dfa force DFA matching for all subjects\n");
1179     #endif
1180     printf(" -help show usage information\n");
1181     printf(" -i show information about compiled patterns\n"
1182 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1183 nigel 93 " -m output memory used information\n"
1184     " -o <n> set size of offsets vector to <n>\n");
1185     #if !defined NOPOSIX
1186     printf(" -p use POSIX interface\n");
1187     #endif
1188     printf(" -q quiet: do not output PCRE version number at start\n");
1189     printf(" -S <n> set stack size to <n> megabytes\n");
1190 ph10 606 printf(" -s force each pattern to be studied\n"
1191 nigel 93 " -t time compilation and execution\n");
1192     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1193     printf(" -tm time execution (matching) only\n");
1194     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1195     }
1196    
1197    
1198    
1199     /*************************************************
1200 nigel 63 * Main Program *
1201     *************************************************/
1202 nigel 43
1203 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1204     consist of a regular expression, in delimiters and optionally followed by
1205     options, followed by a set of test data, terminated by an empty line. */
1206    
1207     int main(int argc, char **argv)
1208     {
1209     FILE *infile = stdin;
1210     int options = 0;
1211     int study_options = 0;
1212 ph10 386 int default_find_match_limit = FALSE;
1213 nigel 3 int op = 1;
1214     int timeit = 0;
1215 nigel 93 int timeitm = 0;
1216 nigel 3 int showinfo = 0;
1217 nigel 31 int showstore = 0;
1218 ph10 606 int force_study = 0;
1219 nigel 87 int quiet = 0;
1220 nigel 53 int size_offsets = 45;
1221     int size_offsets_max;
1222 nigel 77 int *offsets = NULL;
1223 nigel 53 #if !defined NOPOSIX
1224 nigel 3 int posix = 0;
1225 nigel 53 #endif
1226 nigel 3 int debug = 0;
1227 nigel 11 int done = 0;
1228 nigel 77 int all_use_dfa = 0;
1229     int yield = 0;
1230 nigel 91 int stack_size;
1231 nigel 3
1232 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1233     that 1024 is plenty long enough for the few names we'll be testing. */
1234 nigel 69
1235 nigel 91 uschar copynames[1024];
1236     uschar getnames[1024];
1237    
1238     uschar *copynamesptr;
1239     uschar *getnamesptr;
1240    
1241 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1242 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1243 nigel 69
1244 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1245     dbuffer = (unsigned char *)malloc(buffer_size);
1246     pbuffer = (unsigned char *)malloc(buffer_size);
1247 nigel 69
1248 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1249 nigel 3
1250 nigel 93 outfile = stdout;
1251    
1252     /* The following _setmode() stuff is some Windows magic that tells its runtime
1253     library to translate CRLF into a single LF character. At least, that's what
1254     I've been told: never having used Windows I take this all on trust. Originally
1255     it set 0x8000, but then I was advised that _O_BINARY was better. */
1256    
1257 nigel 75 #if defined(_WIN32) || defined(WIN32)
1258 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1259     #endif
1260 nigel 75
1261 nigel 3 /* Scan options */
1262    
1263     while (argc > 1 && argv[op][0] == '-')
1264     {
1265 nigel 63 unsigned char *endptr;
1266 nigel 53
1267 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1268     else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1269 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1270 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1271 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1272     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1273 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1274 nigel 79 #if !defined NODFA
1275 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1276 nigel 79 #endif
1277 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1278 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1279     *endptr == 0))
1280 nigel 53 {
1281     op++;
1282     argc--;
1283     }
1284 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1285     {
1286     int both = argv[op][2] == 0;
1287     int temp;
1288     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1289     *endptr == 0))
1290     {
1291     timeitm = temp;
1292     op++;
1293     argc--;
1294     }
1295     else timeitm = LOOPREPEAT;
1296     if (both) timeit = timeitm;
1297     }
1298 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1299     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1300     *endptr == 0))
1301     {
1302 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1303 nigel 91 printf("PCRE: -S not supported on this OS\n");
1304     exit(1);
1305     #else
1306     int rc;
1307     struct rlimit rlim;
1308     getrlimit(RLIMIT_STACK, &rlim);
1309     rlim.rlim_cur = stack_size * 1024 * 1024;
1310     rc = setrlimit(RLIMIT_STACK, &rlim);
1311     if (rc != 0)
1312     {
1313     printf("PCRE: setrlimit() failed with error %d\n", rc);
1314     exit(1);
1315     }
1316     op++;
1317     argc--;
1318     #endif
1319     }
1320 nigel 53 #if !defined NOPOSIX
1321 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1322 nigel 53 #endif
1323 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1324     {
1325     int rc;
1326 ph10 392 unsigned long int lrc;
1327 nigel 63 printf("PCRE version %s\n", pcre_version());
1328     printf("Compiled with\n");
1329     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1330     printf(" %sUTF-8 support\n", rc? "" : "No ");
1331 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1332     printf(" %sUnicode properties support\n", rc? "" : "No ");
1333 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1334 ph10 391 /* Note that these values are always the ASCII values, even
1335 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1336 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1337     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1338 ph10 150 (rc == -2)? "ANYCRLF" :
1339 nigel 93 (rc == -1)? "ANY" : "???");
1340 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1341     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1342     "all Unicode newlines");
1343 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1344     printf(" Internal link size = %d\n", rc);
1345     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1346     printf(" POSIX malloc threshold = %d\n", rc);
1347 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1348     printf(" Default match limit = %ld\n", lrc);
1349     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1350     printf(" Default recursion depth limit = %ld\n", lrc);
1351 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1352     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1353 ph10 121 goto EXIT;
1354 nigel 63 }
1355 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1356     strcmp(argv[op], "--help") == 0)
1357     {
1358     usage();
1359     goto EXIT;
1360     }
1361 nigel 3 else
1362     {
1363 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1364 nigel 93 usage();
1365 nigel 77 yield = 1;
1366     goto EXIT;
1367 nigel 3 }
1368     op++;
1369     argc--;
1370     }
1371    
1372 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1373    
1374     size_offsets_max = size_offsets;
1375 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1376 nigel 53 if (offsets == NULL)
1377     {
1378     printf("** Failed to get %d bytes of memory for offsets vector\n",
1379 ph10 151 (int)(size_offsets_max * sizeof(int)));
1380 nigel 77 yield = 1;
1381     goto EXIT;
1382 nigel 53 }
1383    
1384 nigel 3 /* Sort out the input and output files */
1385    
1386     if (argc > 1)
1387     {
1388 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1389 nigel 3 if (infile == NULL)
1390     {
1391     printf("** Failed to open %s\n", argv[op]);
1392 nigel 77 yield = 1;
1393     goto EXIT;
1394 nigel 3 }
1395     }
1396    
1397     if (argc > 2)
1398     {
1399 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1400 nigel 3 if (outfile == NULL)
1401     {
1402     printf("** Failed to open %s\n", argv[op+1]);
1403 nigel 77 yield = 1;
1404     goto EXIT;
1405 nigel 3 }
1406     }
1407    
1408     /* Set alternative malloc function */
1409    
1410     pcre_malloc = new_malloc;
1411 nigel 73 pcre_free = new_free;
1412     pcre_stack_malloc = stack_malloc;
1413     pcre_stack_free = stack_free;
1414 nigel 3
1415 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1416 nigel 3
1417 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1418 nigel 3
1419     /* Main loop */
1420    
1421 nigel 11 while (!done)
1422 nigel 3 {
1423     pcre *re = NULL;
1424     pcre_extra *extra = NULL;
1425 nigel 37
1426     #if !defined NOPOSIX /* There are still compilers that require no indent */
1427 nigel 3 regex_t preg;
1428 nigel 45 int do_posix = 0;
1429 nigel 37 #endif
1430    
1431 nigel 7 const char *error;
1432 ph10 512 unsigned char *markptr;
1433 nigel 25 unsigned char *p, *pp, *ppp;
1434 nigel 75 unsigned char *to_file = NULL;
1435 nigel 53 const unsigned char *tables = NULL;
1436 nigel 75 unsigned long int true_size, true_study_size = 0;
1437     size_t size, regex_gotten_store;
1438 ph10 626 int do_allcaps = 0;
1439 ph10 512 int do_mark = 0;
1440 nigel 3 int do_study = 0;
1441 ph10 612 int no_force_study = 0;
1442 nigel 25 int do_debug = debug;
1443 nigel 35 int do_G = 0;
1444     int do_g = 0;
1445 nigel 25 int do_showinfo = showinfo;
1446 nigel 35 int do_showrest = 0;
1447 ph10 616 int do_showcaprest = 0;
1448 nigel 75 int do_flip = 0;
1449 nigel 93 int erroroffset, len, delimiter, poffset;
1450 nigel 3
1451 nigel 67 use_utf8 = 0;
1452 ph10 211 debug_lengths = 1;
1453 nigel 63
1454 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1455 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1456 nigel 63 fflush(outfile);
1457 nigel 3
1458     p = buffer;
1459     while (isspace(*p)) p++;
1460     if (*p == 0) continue;
1461    
1462 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1463 nigel 3
1464 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1465     {
1466 nigel 91 unsigned long int magic, get_options;
1467 nigel 75 uschar sbuf[8];
1468     FILE *f;
1469    
1470     p++;
1471     pp = p + (int)strlen((char *)p);
1472     while (isspace(pp[-1])) pp--;
1473     *pp = 0;
1474    
1475     f = fopen((char *)p, "rb");
1476     if (f == NULL)
1477     {
1478     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1479     continue;
1480     }
1481    
1482     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1483    
1484     true_size =
1485     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1486     true_study_size =
1487     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1488    
1489     re = (real_pcre *)new_malloc(true_size);
1490     regex_gotten_store = gotten_store;
1491    
1492     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1493    
1494     magic = ((real_pcre *)re)->magic_number;
1495     if (magic != MAGIC_NUMBER)
1496     {
1497     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1498     {
1499     do_flip = 1;
1500     }
1501     else
1502     {
1503     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1504     fclose(f);
1505     continue;
1506     }
1507     }
1508    
1509 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1510 nigel 75 do_flip? " (byte-inverted)" : "", p);
1511    
1512     /* Need to know if UTF-8 for printing data strings */
1513    
1514 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1515     use_utf8 = (get_options & PCRE_UTF8) != 0;
1516 nigel 75
1517 ph10 612 /* Now see if there is any following study data. */
1518 nigel 75
1519     if (true_study_size != 0)
1520     {
1521     pcre_study_data *psd;
1522    
1523     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1524     extra->flags = PCRE_EXTRA_STUDY_DATA;
1525    
1526     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1527     extra->study_data = psd;
1528    
1529     if (fread(psd, 1, true_study_size, f) != true_study_size)
1530     {
1531     FAIL_READ:
1532     fprintf(outfile, "Failed to read data from %s\n", p);
1533     if (extra != NULL) new_free(extra);
1534     if (re != NULL) new_free(re);
1535     fclose(f);
1536     continue;
1537     }
1538     fprintf(outfile, "Study data loaded from %s\n", p);
1539     do_study = 1; /* To get the data output if requested */
1540     }
1541     else fprintf(outfile, "No study data\n");
1542    
1543     fclose(f);
1544     goto SHOW_INFO;
1545     }
1546    
1547     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1548     the pattern; if is isn't complete, read more. */
1549    
1550 nigel 3 delimiter = *p++;
1551    
1552 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1553 nigel 3 {
1554 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1555 nigel 3 goto SKIP_DATA;
1556     }
1557    
1558     pp = p;
1559 ph10 530 poffset = (int)(p - buffer);
1560 nigel 3
1561     for(;;)
1562     {
1563 nigel 29 while (*pp != 0)
1564     {
1565     if (*pp == '\\' && pp[1] != 0) pp++;
1566     else if (*pp == delimiter) break;
1567     pp++;
1568     }
1569 nigel 3 if (*pp != 0) break;
1570 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1571 nigel 3 {
1572     fprintf(outfile, "** Unexpected EOF\n");
1573 nigel 11 done = 1;
1574     goto CONTINUE;
1575 nigel 3 }
1576 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1577 nigel 3 }
1578    
1579 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1580     pointer to the correct relative point in the buffer. */
1581    
1582     p = buffer + poffset;
1583    
1584 nigel 29 /* If the first character after the delimiter is backslash, make
1585     the pattern end with backslash. This is purely to provide a way
1586     of testing for the error message when a pattern ends with backslash. */
1587    
1588     if (pp[1] == '\\') *pp++ = '\\';
1589    
1590 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1591     for callouts. */
1592 nigel 3
1593     *pp++ = 0;
1594 nigel 75 strcpy((char *)pbuffer, (char *)p);
1595 nigel 3
1596     /* Look for options after final delimiter */
1597    
1598     options = 0;
1599     study_options = 0;
1600 nigel 31 log_store = showstore; /* default from command line */
1601    
1602 nigel 3 while (*pp != 0)
1603     {
1604     switch (*pp++)
1605     {
1606 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1607 nigel 35 case 'g': do_g = 1; break;
1608 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1609     case 'm': options |= PCRE_MULTILINE; break;
1610     case 's': options |= PCRE_DOTALL; break;
1611     case 'x': options |= PCRE_EXTENDED; break;
1612 nigel 25
1613 ph10 616 case '+':
1614     if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1615     break;
1616 ph10 626
1617     case '=': do_allcaps = 1; break;
1618 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1619 nigel 93 case 'B': do_debug = 1; break;
1620 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1621 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1622 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1623 nigel 75 case 'F': do_flip = 1; break;
1624 nigel 35 case 'G': do_G = 1; break;
1625 nigel 25 case 'I': do_showinfo = 1; break;
1626 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1627 ph10 512 case 'K': do_mark = 1; break;
1628 nigel 31 case 'M': log_store = 1; break;
1629 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1630 nigel 37
1631     #if !defined NOPOSIX
1632 nigel 3 case 'P': do_posix = 1; break;
1633 nigel 37 #endif
1634    
1635 ph10 612 case 'S':
1636     if (do_study == 0) do_study = 1; else
1637     {
1638     do_study = 0;
1639     no_force_study = 1;
1640     }
1641     break;
1642    
1643 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1644 ph10 535 case 'W': options |= PCRE_UCP; break;
1645 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1646 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1647 ph10 126 case 'Z': debug_lengths = 0; break;
1648 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1649 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1650 ph10 545
1651 ph10 541 case 'T':
1652     switch (*pp++)
1653     {
1654     case '0': tables = tables0; break;
1655     case '1': tables = tables1; break;
1656 ph10 545
1657 ph10 541 case '\r':
1658     case '\n':
1659 ph10 545 case ' ':
1660     case 0:
1661 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1662 ph10 545 goto SKIP_DATA;
1663    
1664     default:
1665 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1666 ph10 545 goto SKIP_DATA;
1667 ph10 541 }
1668 ph10 545 break;
1669 nigel 25
1670     case 'L':
1671     ppp = pp;
1672 nigel 93 /* The '\r' test here is so that it works on Windows. */
1673     /* The '0' test is just in case this is an unterminated line. */
1674     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1675 nigel 25 *ppp = 0;
1676     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1677     {
1678     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1679     goto SKIP_DATA;
1680     }
1681 nigel 93 locale_set = 1;
1682 nigel 25 tables = pcre_maketables();
1683     pp = ppp;
1684     break;
1685    
1686 nigel 75 case '>':
1687     to_file = pp;
1688     while (*pp != 0) pp++;
1689     while (isspace(pp[-1])) pp--;
1690     *pp = 0;
1691     break;
1692    
1693 nigel 91 case '<':
1694     {
1695 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1696 ph10 336 {
1697     options |= PCRE_JAVASCRIPT_COMPAT;
1698 ph10 345 pp += 3;
1699 ph10 336 }
1700     else
1701 ph10 345 {
1702 ph10 336 int x = check_newline(pp, outfile);
1703     if (x == 0) goto SKIP_DATA;
1704     options |= x;
1705     while (*pp++ != '>');
1706 ph10 345 }
1707 nigel 91 }
1708     break;
1709    
1710 nigel 77 case '\r': /* So that it works in Windows */
1711     case '\n':
1712     case ' ':
1713     break;
1714 nigel 75
1715 nigel 3 default:
1716     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1717     goto SKIP_DATA;
1718     }
1719     }
1720    
1721 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1722 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1723     local character tables. */
1724 nigel 3
1725 nigel 37 #if !defined NOPOSIX
1726 nigel 3 if (posix || do_posix)
1727     {
1728     int rc;
1729     int cflags = 0;
1730 nigel 75
1731 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1732     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1733 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1734 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1735     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1736 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1737 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1738 nigel 87
1739 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1740    
1741     /* Compilation failed; go back for another re, skipping to blank line
1742     if non-interactive. */
1743    
1744     if (rc != 0)
1745     {
1746 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1747 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1748     goto SKIP_DATA;
1749     }
1750     }
1751    
1752     /* Handle compiling via the native interface */
1753    
1754     else
1755 nigel 37 #endif /* !defined NOPOSIX */
1756    
1757 nigel 3 {
1758 ph10 412 unsigned long int get_options;
1759 ph10 416
1760 nigel 93 if (timeit > 0)
1761 nigel 3 {
1762     register int i;
1763     clock_t time_taken;
1764     clock_t start_time = clock();
1765 nigel 93 for (i = 0; i < timeit; i++)
1766 nigel 3 {
1767 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1768 nigel 3 if (re != NULL) free(re);
1769     }
1770     time_taken = clock() - start_time;
1771 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1772     (((double)time_taken * 1000.0) / (double)timeit) /
1773 nigel 63 (double)CLOCKS_PER_SEC);
1774 nigel 3 }
1775    
1776 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1777 nigel 3
1778     /* Compilation failed; go back for another re, skipping to blank line
1779     if non-interactive. */
1780    
1781     if (re == NULL)
1782     {
1783     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1784     SKIP_DATA:
1785     if (infile != stdin)
1786     {
1787     for (;;)
1788     {
1789 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1790 nigel 11 {
1791     done = 1;
1792     goto CONTINUE;
1793     }
1794 nigel 3 len = (int)strlen((char *)buffer);
1795     while (len > 0 && isspace(buffer[len-1])) len--;
1796     if (len == 0) break;
1797     }
1798     fprintf(outfile, "\n");
1799     }
1800 nigel 25 goto CONTINUE;
1801 nigel 3 }
1802 ph10 416
1803     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1804     within the regex; check for this so that we know how to process the data
1805 ph10 412 lines. */
1806 ph10 416
1807 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1808     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1809 nigel 3
1810 ph10 412 /* Print information if required. There are now two info-returning
1811     functions. The old one has a limited interface and returns only limited
1812     data. Check that it agrees with the newer one. */
1813 nigel 3
1814 nigel 63 if (log_store)
1815     fprintf(outfile, "Memory allocation (code space): %d\n",
1816     (int)(gotten_store -
1817     sizeof(real_pcre) -
1818     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1819    
1820 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1821     and remember the store that was got. */
1822    
1823     true_size = ((real_pcre *)re)->size;
1824     regex_gotten_store = gotten_store;
1825    
1826 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1827     help with the matching, unless the pattern has the SS option, which
1828     suppresses the effect of /S (used for a few test patterns where studying is
1829     never sensible). */
1830 nigel 75
1831 ph10 612 if (do_study || (force_study && !no_force_study))
1832 nigel 75 {
1833 nigel 93 if (timeit > 0)
1834 nigel 75 {
1835     register int i;
1836     clock_t time_taken;
1837     clock_t start_time = clock();
1838 nigel 93 for (i = 0; i < timeit; i++)
1839 nigel 75 extra = pcre_study(re, study_options, &error);
1840     time_taken = clock() - start_time;
1841     if (extra != NULL) free(extra);
1842 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1843     (((double)time_taken * 1000.0) / (double)timeit) /
1844 nigel 75 (double)CLOCKS_PER_SEC);
1845     }
1846     extra = pcre_study(re, study_options, &error);
1847     if (error != NULL)
1848     fprintf(outfile, "Failed to study: %s\n", error);
1849     else if (extra != NULL)
1850     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1851     }
1852 ph10 512
1853 ph10 510 /* If /K was present, we set up for handling MARK data. */
1854 ph10 512
1855 ph10 510 if (do_mark)
1856     {
1857     if (extra == NULL)
1858     {
1859     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1860     extra->flags = 0;
1861     }
1862 ph10 512 extra->mark = &markptr;
1863 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1864 ph10 512 }
1865 nigel 75
1866     /* If the 'F' option was present, we flip the bytes of all the integer
1867     fields in the regex data block and the study block. This is to make it
1868     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1869     compiled on a different architecture. */
1870    
1871     if (do_flip)
1872     {
1873     real_pcre *rre = (real_pcre *)re;
1874 ph10 259 rre->magic_number =
1875 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1876 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1877     rre->options = byteflip(rre->options, sizeof(rre->options));
1878 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1879 ph10 259 rre->top_bracket =
1880 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1881 ph10 259 rre->top_backref =
1882 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1883 ph10 259 rre->first_byte =
1884 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1885 ph10 259 rre->req_byte =
1886 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1887     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1888 nigel 75 sizeof(rre->name_table_offset));
1889 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1890 nigel 75 sizeof(rre->name_entry_size));
1891 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1892 ph10 255 sizeof(rre->name_count));
1893 nigel 75
1894     if (extra != NULL)
1895     {
1896     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1897     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1898 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1899     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1900 nigel 75 }
1901     }
1902    
1903     /* Extract information from the compiled data if required */
1904    
1905     SHOW_INFO:
1906    
1907 nigel 93 if (do_debug)
1908     {
1909     fprintf(outfile, "------------------------------------------------------------------\n");
1910 ph10 116 pcre_printint(re, outfile, debug_lengths);
1911 nigel 93 }
1912 ph10 416
1913 ph10 412 /* We already have the options in get_options (see above) */
1914 nigel 93
1915 nigel 25 if (do_showinfo)
1916 nigel 3 {
1917 ph10 412 unsigned long int all_options;
1918 nigel 79 #if !defined NOINFOCHECK
1919 nigel 43 int old_first_char, old_options, old_count;
1920 nigel 79 #endif
1921 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1922 ph10 227 hascrorlf;
1923 nigel 63 int nameentrysize, namecount;
1924     const uschar *nametable;
1925 nigel 3
1926 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1927     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1928     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1929 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1930 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1931 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1932     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1933 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1934 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1935     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1936 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1937 nigel 43
1938 nigel 79 #if !defined NOINFOCHECK
1939 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1940 nigel 3 if (count < 0) fprintf(outfile,
1941 nigel 43 "Error %d from pcre_info()\n", count);
1942 nigel 3 else
1943     {
1944 nigel 43 if (old_count != count) fprintf(outfile,
1945     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1946     old_count);
1947 nigel 37
1948 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1949     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1950     first_char, old_first_char);
1951 nigel 37
1952 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1953     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1954     get_options, old_options);
1955 nigel 43 }
1956 nigel 79 #endif
1957 nigel 43
1958 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1959 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1960 nigel 77 (int)size, (int)regex_gotten_store);
1961 nigel 43
1962     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1963     if (backrefmax > 0)
1964     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1965 nigel 63
1966     if (namecount > 0)
1967     {
1968     fprintf(outfile, "Named capturing subpatterns:\n");
1969     while (namecount-- > 0)
1970     {
1971     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1972     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1973     GET2(nametable, 0));
1974     nametable += nameentrysize;
1975     }
1976     }
1977 ph10 172
1978 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1979 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1980 nigel 63
1981 nigel 75 all_options = ((real_pcre *)re)->options;
1982 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1983 nigel 75
1984 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1985 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1986 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1987     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1988     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1989     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1990 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1991 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1992 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1993     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1994 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1995     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1996     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1997 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1998 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1999 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2000 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2001 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2002 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2003 ph10 172
2004 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2005 nigel 43
2006 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2007 nigel 91 {
2008     case PCRE_NEWLINE_CR:
2009     fprintf(outfile, "Forced newline sequence: CR\n");
2010     break;
2011 nigel 43
2012 nigel 91 case PCRE_NEWLINE_LF:
2013     fprintf(outfile, "Forced newline sequence: LF\n");
2014     break;
2015    
2016     case PCRE_NEWLINE_CRLF:
2017     fprintf(outfile, "Forced newline sequence: CRLF\n");
2018     break;
2019    
2020 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2021     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2022     break;
2023    
2024 nigel 93 case PCRE_NEWLINE_ANY:
2025     fprintf(outfile, "Forced newline sequence: ANY\n");
2026     break;
2027    
2028 nigel 91 default:
2029     break;
2030     }
2031    
2032 nigel 43 if (first_char == -1)
2033     {
2034 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2035 nigel 43 }
2036     else if (first_char < 0)
2037     {
2038     fprintf(outfile, "No first char\n");
2039     }
2040     else
2041     {
2042 nigel 63 int ch = first_char & 255;
2043 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2044 nigel 63 "" : " (caseless)";
2045 nigel 93 if (PRINTHEX(ch))
2046 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2047 nigel 3 else
2048 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2049 nigel 43 }
2050 nigel 37
2051 nigel 43 if (need_char < 0)
2052     {
2053     fprintf(outfile, "No need char\n");
2054 nigel 3 }
2055 nigel 43 else
2056     {
2057 nigel 63 int ch = need_char & 255;
2058 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2059 nigel 63 "" : " (caseless)";
2060 nigel 93 if (PRINTHEX(ch))
2061 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2062 nigel 43 else
2063 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2064 nigel 43 }
2065 nigel 75
2066     /* Don't output study size; at present it is in any case a fixed
2067     value, but it varies, depending on the computer architecture, and
2068     so messes up the test suite. (And with the /F option, it might be
2069 ph10 612 flipped.) If study was forced by an external -s, don't show this
2070     information unless -i or -d was also present. This means that, except
2071     when auto-callouts are involved, the output from runs with and without
2072     -s should be identical. */
2073 nigel 75
2074 ph10 612 if (do_study || (force_study && showinfo && !no_force_study))
2075 nigel 75 {
2076     if (extra == NULL)
2077     fprintf(outfile, "Study returned NULL\n");
2078     else
2079     {
2080     uschar *start_bits = NULL;
2081 ph10 455 int minlength;
2082 ph10 461
2083 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2084 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2085    
2086 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2087     if (start_bits == NULL)
2088 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2089 nigel 75 else
2090     {
2091     int i;
2092     int c = 24;
2093     fprintf(outfile, "Starting byte set: ");
2094     for (i = 0; i < 256; i++)
2095     {
2096     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2097     {
2098     if (c > 75)
2099     {
2100     fprintf(outfile, "\n ");
2101     c = 2;
2102     }
2103 nigel 93 if (PRINTHEX(i) && i != ' ')
2104 nigel 75 {
2105     fprintf(outfile, "%c ", i);
2106     c += 2;
2107     }
2108     else
2109     {
2110     fprintf(outfile, "\\x%02x ", i);
2111     c += 5;
2112     }
2113     }
2114     }
2115     fprintf(outfile, "\n");
2116     }
2117     }
2118     }
2119 nigel 3 }
2120    
2121 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2122     that is all. The first 8 bytes of the file are the regex length and then
2123     the study length, in big-endian order. */
2124 nigel 3
2125 nigel 75 if (to_file != NULL)
2126 nigel 3 {
2127 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2128     if (f == NULL)
2129 nigel 3 {
2130 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2131 nigel 3 }
2132 nigel 75 else
2133     {
2134     uschar sbuf[8];
2135 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2136     sbuf[1] = (uschar)((true_size >> 16) & 255);
2137     sbuf[2] = (uschar)((true_size >> 8) & 255);
2138     sbuf[3] = (uschar)((true_size) & 255);
2139 ph10 259
2140 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2141     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2142     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2143     sbuf[7] = (uschar)((true_study_size) & 255);
2144 nigel 3
2145 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2146     fwrite(re, 1, true_size, f) < true_size)
2147     {
2148     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2149     }
2150 nigel 3 else
2151     {
2152 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2153    
2154     /* If there is study data, write it, but verify the writing only
2155     if the studying was requested by /S, not just by -s. */
2156    
2157 nigel 75 if (extra != NULL)
2158 nigel 3 {
2159 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2160     true_study_size)
2161 nigel 3 {
2162 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2163     strerror(errno));
2164 nigel 3 }
2165 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2166 nigel 3 }
2167     }
2168 nigel 75 fclose(f);
2169 nigel 3 }
2170 nigel 77
2171     new_free(re);
2172     if (extra != NULL) new_free(extra);
2173 ph10 545 if (locale_set)
2174 ph10 541 {
2175     new_free((void *)tables);
2176     setlocale(LC_CTYPE, "C");
2177 ph10 545 locale_set = 0;
2178     }
2179 nigel 75 continue; /* With next regex */
2180 nigel 3 }
2181 nigel 75 } /* End of non-POSIX compile */
2182 nigel 3
2183     /* Read data lines and test them */
2184    
2185     for (;;)
2186     {
2187 nigel 87 uschar *q;
2188 ph10 147 uschar *bptr;
2189 nigel 57 int *use_offsets = offsets;
2190 nigel 53 int use_size_offsets = size_offsets;
2191 nigel 63 int callout_data = 0;
2192     int callout_data_set = 0;
2193 nigel 3 int count, c;
2194 nigel 29 int copystrings = 0;
2195 ph10 386 int find_match_limit = default_find_match_limit;
2196 nigel 29 int getstrings = 0;
2197     int getlist = 0;
2198 nigel 39 int gmatched = 0;
2199 nigel 35 int start_offset = 0;
2200 ph10 579 int start_offset_sign = 1;
2201 nigel 41 int g_notempty = 0;
2202 nigel 77 int use_dfa = 0;
2203 nigel 3
2204     options = 0;
2205    
2206 nigel 91 *copynames = 0;
2207     *getnames = 0;
2208    
2209     copynamesptr = copynames;
2210     getnamesptr = getnames;
2211    
2212 nigel 63 pcre_callout = callout;
2213     first_callout = 1;
2214     callout_extra = 0;
2215     callout_count = 0;
2216     callout_fail_count = 999999;
2217     callout_fail_id = -1;
2218 nigel 73 show_malloc = 0;
2219 nigel 63
2220 nigel 91 if (extra != NULL) extra->flags &=
2221     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2222    
2223     len = 0;
2224     for (;;)
2225 nigel 11 {
2226 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2227 nigel 91 {
2228 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2229     {
2230 ph10 545 fprintf(outfile, "\n");
2231 ph10 537 break;
2232 ph10 545 }
2233 nigel 91 done = 1;
2234     goto CONTINUE;
2235     }
2236     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2237     len = (int)strlen((char *)buffer);
2238     if (buffer[len-1] == '\n') break;
2239 nigel 11 }
2240 nigel 3
2241     while (len > 0 && isspace(buffer[len-1])) len--;
2242     buffer[len] = 0;
2243     if (len == 0) break;
2244    
2245     p = buffer;
2246     while (isspace(*p)) p++;
2247    
2248 ph10 147 bptr = q = dbuffer;
2249 nigel 3 while ((c = *p++) != 0)
2250     {
2251     int i = 0;
2252     int n = 0;
2253 nigel 63
2254 nigel 3 if (c == '\\') switch ((c = *p++))
2255     {
2256     case 'a': c = 7; break;
2257     case 'b': c = '\b'; break;
2258     case 'e': c = 27; break;
2259     case 'f': c = '\f'; break;
2260     case 'n': c = '\n'; break;
2261     case 'r': c = '\r'; break;
2262     case 't': c = '\t'; break;
2263     case 'v': c = '\v'; break;
2264    
2265     case '0': case '1': case '2': case '3':
2266     case '4': case '5': case '6': case '7':
2267     c -= '0';
2268     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2269     c = c * 8 + *p++ - '0';
2270 nigel 91
2271     #if !defined NOUTF8
2272     if (use_utf8 && c > 255)
2273     {
2274     unsigned char buff8[8];
2275     int ii, utn;
2276     utn = ord2utf8(c, buff8);
2277     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2278     c = buff8[ii]; /* Last byte */
2279     }
2280     #endif
2281 nigel 3 break;
2282    
2283     case 'x':
2284 nigel 49
2285     /* Handle \x{..} specially - new Perl thing for utf8 */
2286    
2287 nigel 79 #if !defined NOUTF8
2288 nigel 49 if (*p == '{')
2289     {
2290     unsigned char *pt = p;
2291     c = 0;
2292     while (isxdigit(*(++pt)))
2293     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2294     if (*pt == '}')
2295     {
2296 nigel 67 unsigned char buff8[8];
2297 nigel 49 int ii, utn;
2298 ph10 355 if (use_utf8)
2299 ph10 358 {
2300 ph10 355 utn = ord2utf8(c, buff8);
2301     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2302     c = buff8[ii]; /* Last byte */
2303     }
2304     else
2305     {
2306 ph10 358 if (c > 255)
2307 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2308     "UTF-8 mode is not enabled.\n"
2309     "** Truncation will probably give the wrong result.\n", c);
2310 ph10 358 }
2311 nigel 49 p = pt + 1;
2312     break;
2313     }
2314     /* Not correct form; fall through */
2315     }
2316 nigel 79 #endif
2317 nigel 49
2318     /* Ordinary \x */
2319    
2320 nigel 3 c = 0;
2321     while (i++ < 2 && isxdigit(*p))
2322     {
2323     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2324     p++;
2325     }
2326     break;
2327    
2328 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2329 nigel 3 p--;
2330     continue;
2331    
2332 nigel 75 case '>':
2333 ph10 579 if (*p == '-')
2334 ph10 567 {
2335     start_offset_sign = -1;
2336     p++;
2337 ph10 579 }
2338 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2339 ph10 579 start_offset *= start_offset_sign;
2340 nigel 75 continue;
2341    
2342 nigel 3 case 'A': /* Option setting */
2343     options |= PCRE_ANCHORED;
2344     continue;
2345    
2346     case 'B':
2347     options |= PCRE_NOTBOL;
2348     continue;
2349    
2350 nigel 29 case 'C':
2351 nigel 63 if (isdigit(*p)) /* Set copy string */
2352     {
2353     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2354     copystrings |= 1 << n;
2355     }
2356     else if (isalnum(*p))
2357     {
2358 nigel 91 uschar *npp = copynamesptr;
2359 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2360 nigel 91 *npp++ = 0;
2361 nigel 67 *npp = 0;
2362 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2363 nigel 63 if (n < 0)
2364 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2365     copynamesptr = npp;
2366 nigel 63 }
2367     else if (*p == '+')
2368     {
2369     callout_extra = 1;
2370     p++;
2371     }
2372     else if (*p == '-')
2373     {
2374     pcre_callout = NULL;
2375     p++;
2376     }
2377     else if (*p == '!')
2378     {
2379     callout_fail_id = 0;
2380     p++;
2381     while(isdigit(*p))
2382     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2383     callout_fail_count = 0;
2384     if (*p == '!')
2385     {
2386     p++;
2387     while(isdigit(*p))
2388     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2389     }
2390     }
2391     else if (*p == '*')
2392     {
2393     int sign = 1;
2394     callout_data = 0;
2395     if (*(++p) == '-') { sign = -1; p++; }
2396     while(isdigit(*p))
2397     callout_data = callout_data * 10 + *p++ - '0';
2398     callout_data *= sign;
2399     callout_data_set = 1;
2400     }
2401 nigel 29 continue;
2402    
2403 nigel 79 #if !defined NODFA
2404 nigel 77 case 'D':
2405 nigel 79 #if !defined NOPOSIX
2406 nigel 77 if (posix || do_posix)
2407     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2408     else
2409 nigel 79 #endif
2410 nigel 77 use_dfa = 1;
2411     continue;
2412 ph10 553 #endif
2413 nigel 77
2414 ph10 553 #if !defined NODFA
2415 nigel 77 case 'F':
2416     options |= PCRE_DFA_SHORTEST;
2417     continue;
2418 nigel 79 #endif
2419 nigel 77
2420 nigel 29 case 'G':
2421 nigel 63 if (isdigit(*p))
2422     {
2423     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2424     getstrings |= 1 << n;
2425     }
2426     else if (isalnum(*p))
2427     {
2428 nigel 91 uschar *npp = getnamesptr;
2429 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2430 nigel 91 *npp++ = 0;
2431 nigel 67 *npp = 0;
2432 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2433 nigel 63 if (n < 0)
2434 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2435     getnamesptr = npp;
2436 nigel 63 }
2437 nigel 29 continue;
2438    
2439     case 'L':
2440     getlist = 1;
2441     continue;
2442    
2443 nigel 63 case 'M':
2444     find_match_limit = 1;
2445     continue;
2446    
2447 nigel 37 case 'N':
2448 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2449     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2450 ph10 461 else
2451 ph10 442 options |= PCRE_NOTEMPTY;
2452 nigel 37 continue;
2453    
2454 nigel 3 case 'O':
2455     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2456 nigel 53 if (n > size_offsets_max)
2457     {
2458     size_offsets_max = n;
2459 nigel 57 free(offsets);
2460 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2461 nigel 53 if (offsets == NULL)
2462     {
2463     printf("** Failed to get %d bytes of memory for offsets vector\n",
2464 ph10 151 (int)(size_offsets_max * sizeof(int)));
2465 nigel 77 yield = 1;
2466     goto EXIT;
2467 nigel 53 }
2468     }
2469     use_size_offsets = n;
2470 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2471 nigel 3 continue;
2472    
2473 nigel 75 case 'P':
2474 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2475 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2476 nigel 75 continue;
2477    
2478 nigel 91 case 'Q':
2479     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2480     if (extra == NULL)
2481     {
2482     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2483     extra->flags = 0;
2484     }
2485     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2486     extra->match_limit_recursion = n;
2487     continue;
2488    
2489     case 'q':
2490     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2491     if (extra == NULL)
2492     {
2493     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2494     extra->flags = 0;
2495     }
2496     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2497     extra->match_limit = n;
2498     continue;
2499    
2500 nigel 79 #if !defined NODFA
2501 nigel 77 case 'R':
2502     options |= PCRE_DFA_RESTART;
2503     continue;
2504 nigel 79 #endif
2505 nigel 77
2506 nigel 73 case 'S':
2507     show_malloc = 1;
2508     continue;
2509 ph10 392
2510 ph10 389 case 'Y':
2511     options |= PCRE_NO_START_OPTIMIZE;
2512 ph10 392 continue;
2513 nigel 73
2514 nigel 3 case 'Z':
2515     options |= PCRE_NOTEOL;
2516     continue;
2517 nigel 71
2518     case '?':
2519     options |= PCRE_NO_UTF8_CHECK;
2520     continue;
2521 nigel 91
2522     case '<':
2523     {
2524     int x = check_newline(p, outfile);
2525     if (x == 0) goto NEXT_DATA;
2526     options |= x;
2527     while (*p++ != '>');
2528     }
2529     continue;
2530 nigel 3 }
2531 nigel 9 *q++ = c;
2532 nigel 3 }
2533 nigel 9 *q = 0;
2534 ph10 530 len = (int)(q - dbuffer);
2535 ph10 545
2536 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2537 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2538 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2539 ph10 371
2540 ph10 363 #if !defined NOPOSIX
2541     if (posix || do_posix)
2542     {
2543     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2544 ph10 371 bptr += buffer_size - len - 1;
2545 ph10 363 }
2546 ph10 371 else
2547     #endif
2548 ph10 363 {
2549     memmove(bptr + buffer_size - len, bptr, len);
2550 ph10 371 bptr += buffer_size - len;
2551     }
2552 nigel 3
2553 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2554     {
2555     printf("**Match limit not relevant for DFA matching: ignored\n");
2556     find_match_limit = 0;
2557     }
2558    
2559 nigel 3 /* Handle matching via the POSIX interface, which does not
2560 nigel 63 support timing or playing with the match limit or callout data. */
2561 nigel 3
2562 nigel 37 #if !defined NOPOSIX
2563 nigel 3 if (posix || do_posix)
2564     {
2565     int rc;
2566     int eflags = 0;
2567 nigel 63 regmatch_t *pmatch = NULL;
2568     if (use_size_offsets > 0)
2569 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2570 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2571     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2572 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2573 nigel 3
2574 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2575 nigel 3
2576     if (rc != 0)
2577     {
2578 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2579 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2580     }
2581 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2582     != 0)
2583     {
2584     fprintf(outfile, "Matched with REG_NOSUB\n");
2585     }
2586 nigel 3 else
2587     {
2588 nigel 7 size_t i;
2589 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2590 nigel 3 {
2591     if (pmatch[i].rm_so >= 0)
2592     {
2593 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2594 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2595     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2596 nigel 3 fprintf(outfile, "\n");
2597 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2598 nigel 35 {
2599 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2600 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2601     outfile);
2602 nigel 35 fprintf(outfile, "\n");
2603     }
2604 nigel 3 }
2605     }
2606     }
2607 nigel 53 free(pmatch);
2608 nigel 3 }
2609    
2610 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2611 nigel 3
2612 nigel 37 else
2613     #endif /* !defined NOPOSIX */
2614    
2615 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2616 nigel 3 {
2617 ph10 512 markptr = NULL;
2618    
2619 nigel 93 if (timeitm > 0)
2620 nigel 3 {
2621     register int i;
2622     clock_t time_taken;
2623     clock_t start_time = clock();
2624 nigel 77
2625 nigel 79 #if !defined NODFA
2626 nigel 77 if (all_use_dfa || use_dfa)
2627     {
2628     int workspace[1000];
2629 nigel 93 for (i = 0; i < timeitm; i++)
2630 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2631 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2632     sizeof(workspace)/sizeof(int));
2633     }
2634     else
2635 nigel 79 #endif
2636 nigel 77
2637 nigel 93 for (i = 0; i < timeitm; i++)
2638 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2639 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2640 nigel 77
2641 nigel 3 time_taken = clock() - start_time;
2642 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2643     (((double)time_taken * 1000.0) / (double)timeitm) /
2644 nigel 63 (double)CLOCKS_PER_SEC);
2645 nigel 3 }
2646    
2647 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2648 nigel 87 varying limits in order to find the minimum value for the match limit and
2649     for the recursion limit. */
2650 nigel 63
2651     if (find_match_limit)
2652     {
2653     if (extra == NULL)
2654     {
2655 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2656 nigel 63 extra->flags = 0;
2657     }
2658    
2659 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2660 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2661     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2662     PCRE_ERROR_MATCHLIMIT, "match()");
2663 nigel 63
2664 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2665     options|g_notempty, use_offsets, use_size_offsets,
2666     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2667     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2668 nigel 63 }
2669    
2670     /* If callout_data is set, use the interface with additional data */
2671    
2672     else if (callout_data_set)
2673     {
2674     if (extra == NULL)
2675     {
2676 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2677 nigel 63 extra->flags = 0;
2678     }
2679     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2680 nigel 71 extra->callout_data = &callout_data;
2681 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2682     options | g_notempty, use_offsets, use_size_offsets);
2683     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2684     }
2685    
2686     /* The normal case is just to do the match once, with the default
2687     value of match_limit. */
2688    
2689 nigel 79 #if !defined NODFA
2690 nigel 77 else if (all_use_dfa || use_dfa)
2691     {
2692     int workspace[1000];
2693 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2694 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2695     sizeof(workspace)/sizeof(int));
2696     if (count == 0)
2697     {
2698     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2699     count = use_size_offsets/2;
2700     }
2701     }
2702 nigel 79 #endif
2703 nigel 77
2704 nigel 75 else
2705     {
2706     count = pcre_exec(re, extra, (char *)bptr, len,
2707     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2708 nigel 77 if (count == 0)
2709     {
2710     fprintf(outfile, "Matched, but too many substrings\n");
2711     count = use_size_offsets/3;
2712     }
2713 nigel 75 }
2714 nigel 3
2715 nigel 39 /* Matched */
2716    
2717 nigel 3 if (count >= 0)
2718     {
2719 nigel 93 int i, maxcount;
2720    
2721     #if !defined NODFA
2722     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2723     #endif
2724     maxcount = use_size_offsets/3;
2725    
2726     /* This is a check against a lunatic return value. */
2727    
2728     if (count > maxcount)
2729     {
2730     fprintf(outfile,
2731     "** PCRE error: returned count %d is too big for offset size %d\n",
2732     count, use_size_offsets);
2733     count = use_size_offsets/3;
2734     if (do_g || do_G)
2735     {
2736     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2737     do_g = do_G = FALSE; /* Break g/G loop */
2738     }
2739     }
2740 ph10 626
2741     /* do_allcaps requests showing of all captures in the pattern, to check
2742     unset ones at the end. */
2743    
2744     if (do_allcaps)
2745     {
2746     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2747     count++; /* Allow for full match */
2748     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2749     }
2750 nigel 93
2751 ph10 626 /* Output the captured substrings */
2752    
2753 nigel 29 for (i = 0; i < count * 2; i += 2)
2754 nigel 3 {
2755 nigel 57 if (use_offsets[i] < 0)
2756 ph10 626 {
2757     if (use_offsets[i] != -1)
2758     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2759     use_offsets[i], i);
2760     if (use_offsets[i+1] != -1)
2761     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2762     use_offsets[i+1], i+1);
2763 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2764 ph10 626 }
2765 nigel 3 else
2766     {
2767     fprintf(outfile, "%2d: ", i/2);
2768 nigel 63 (void)pchars(bptr + use_offsets[i],
2769     use_offsets[i+1] - use_offsets[i], outfile);
2770 nigel 3 fprintf(outfile, "\n");
2771 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2772 nigel 35 {
2773 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2774     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2775     outfile);
2776     fprintf(outfile, "\n");
2777 nigel 35 }
2778 nigel 3 }
2779     }
2780 ph10 512
2781 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2782 nigel 29
2783     for (i = 0; i < 32; i++)
2784     {
2785     if ((copystrings & (1 << i)) != 0)
2786     {
2787 nigel 91 char copybuffer[256];
2788 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2789 nigel 37 i, copybuffer, sizeof(copybuffer));
2790 nigel 29 if (rc < 0)
2791     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2792     else
2793 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2794 nigel 29 }
2795     }
2796    
2797 nigel 91 for (copynamesptr = copynames;
2798     *copynamesptr != 0;
2799     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2800     {
2801     char copybuffer[256];
2802     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2803     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2804     if (rc < 0)
2805     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2806     else
2807     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2808     }
2809    
2810 nigel 29 for (i = 0; i < 32; i++)
2811     {
2812     if ((getstrings & (1 << i)) != 0)
2813     {
2814     const char *substring;
2815 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2816 nigel 29 i, &substring);
2817     if (rc < 0)
2818     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2819     else
2820     {
2821     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2822 nigel 49 pcre_free_substring(substring);
2823 nigel 29 }
2824     }
2825     }
2826    
2827 nigel 91 for (getnamesptr = getnames;
2828     *getnamesptr != 0;
2829     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2830     {
2831     const char *substring;
2832     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2833     count, (char *)getnamesptr, &substring);
2834     if (rc < 0)
2835     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2836     else
2837     {
2838     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2839     pcre_free_substring(substring);
2840     }
2841     }
2842    
2843 nigel 29 if (getlist)
2844     {
2845     const char **stringlist;
2846 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2847 nigel 29 &stringlist);
2848     if (rc < 0)
2849     fprintf(outfile, "get substring list failed %d\n", rc);
2850     else
2851     {
2852     for (i = 0; i < count; i++)
2853     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2854     if (stringlist[i] != NULL)
2855     fprintf(outfile, "string list not terminated by NULL\n");
2856 nigel 49 /* free((void *)stringlist); */
2857     pcre_free_substring_list(stringlist);
2858 nigel 29 }
2859     }
2860 nigel 39 }
2861 nigel 29
2862 nigel 75 /* There was a partial match */
2863    
2864     else if (count == PCRE_ERROR_PARTIAL)
2865     {
2866 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2867     else fprintf(outfile, "Partial match, mark=%s", markptr);
2868 ph10 426 if (use_size_offsets > 1)
2869     {
2870     fprintf(outfile, ": ");
2871     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2872 ph10 461 outfile);
2873     }
2874 nigel 77 fprintf(outfile, "\n");
2875 nigel 75 break; /* Out of the /g loop */
2876     }
2877    
2878 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2879 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2880     to advance the start offset, and continue. We won't be at the end of the
2881     string - that was checked before setting g_notempty.
2882 nigel 39
2883 ph10 566 Complication arises in the case when the newline convention is "any",
2884 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2885     terminated by CRLF, an advance of one character just passes the \r,
2886 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2887 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2888     newline setting in the pattern; if none was set, use pcre_config() to
2889 ph10 566 find the default.
2890 ph10 144
2891 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2892     character, not one byte. */
2893    
2894 nigel 3 else
2895     {
2896 nigel 41 if (g_notempty != 0)
2897 nigel 35 {
2898 nigel 73 int onechar = 1;
2899 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2900 nigel 57 use_offsets[0] = start_offset;
2901 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2902     {
2903     int d;
2904     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2905 ph10 391 /* Note that these values are always the ASCII ones, even in
2906     EBCDIC environments. CR = 13, NL = 10. */
2907     obits = (d == 13)? PCRE_NEWLINE_CR :
2908     (d == 10)? PCRE_NEWLINE_LF :
2909     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2910 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2911 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2912     }
2913 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2914 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2915 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2916 ph10 149 &&
2917 ph10 143 start_offset < len - 1 &&
2918     bptr[start_offset] == '\r' &&
2919     bptr[start_offset+1] == '\n')
2920 ph10 144 onechar++;
2921 ph10 143 else if (use_utf8)
2922 nigel 73 {
2923     while (start_offset + onechar < len)
2924     {
2925 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2926 ph10 579 onechar++;
2927 nigel 73 }
2928     }
2929     use_offsets[1] = start_offset + onechar;
2930 nigel 35 }
2931 nigel 41 else
2932     {
2933 ph10 598 switch(count)
2934     {
2935     case PCRE_ERROR_NOMATCH:
2936 ph10 512 if (gmatched == 0)
2937 ph10 510 {
2938     if (markptr == NULL) fprintf(outfile, "No match\n");
2939     else fprintf(outfile, "No match, mark = %s\n", markptr);
2940 ph10 512 }
2941 ph10 598 break;
2942    
2943     case PCRE_ERROR_BADUTF8:
2944     case PCRE_ERROR_SHORTUTF8:
2945     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2946     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2947     if (use_size_offsets >= 2)
2948     fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2949     use_offsets[1]);
2950     fprintf(outfile, "\n");
2951     break;
2952    
2953     default:
2954 ph10 604 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2955     fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2956     else
2957     fprintf(outfile, "Error %d (Unexpected value)\n", count);
2958 ph10 598 break;
2959 nigel 41 }
2960 ph10 598
2961 nigel 41 break; /* Out of the /g loop */
2962     }
2963 nigel 3 }
2964 nigel 35
2965 nigel 39 /* If not /g or /G we are done */
2966    
2967     if (!do_g && !do_G) break;
2968    
2969 nigel 41 /* If we have matched an empty string, first check to see if we are at
2970 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2971     Perl's /g options does. This turns out to be rather cunning. First we set
2972     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2973 nigel 47 same point. If this fails (picked up above) we advance to the next
2974 ph10 143 character. */
2975 ph10 142
2976 nigel 41 g_notempty = 0;
2977 ph10 142
2978 nigel 57 if (use_offsets[0] == use_offsets[1])
2979 nigel 41 {
2980 nigel 57 if (use_offsets[0] == len) break;
2981 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2982 nigel 41 }
2983 nigel 39
2984     /* For /g, update the start offset, leaving the rest alone */
2985    
2986 ph10 143 if (do_g) start_offset = use_offsets[1];
2987 nigel 39
2988     /* For /G, update the pointer and length */
2989    
2990     else
2991 nigel 35 {
2992 ph10 143 bptr += use_offsets[1];
2993     len -= use_offsets[1];
2994 nigel 35 }
2995 nigel 39 } /* End of loop for /g and /G */
2996 nigel 91
2997     NEXT_DATA: continue;
2998 nigel 39 } /* End of loop for data lines */
2999 nigel 3
3000 nigel 11 CONTINUE:
3001 nigel 37
3002     #if !defined NOPOSIX
3003 nigel 3 if (posix || do_posix) regfree(&preg);
3004 nigel 37 #endif
3005    
3006 nigel 77 if (re != NULL) new_free(re);
3007     if (extra != NULL) new_free(extra);
3008 ph10 541 if (locale_set)
3009 nigel 25 {
3010 nigel 77 new_free((void *)tables);
3011 nigel 25 setlocale(LC_CTYPE, "C");
3012 nigel 93 locale_set = 0;
3013 nigel 25 }
3014 nigel 3 }
3015    
3016 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3017 nigel 77
3018     EXIT:
3019    
3020     if (infile != NULL && infile != stdin) fclose(infile);
3021     if (outfile != NULL && outfile != stdout) fclose(outfile);
3022    
3023     free(buffer);
3024     free(dbuffer);
3025     free(pbuffer);
3026     free(offsets);
3027    
3028     return yield;
3029 nigel 3 }
3030    
3031 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12