/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 616 - (hide annotations) (download)
Mon Jul 11 15:55:25 2011 UTC (22 months, 1 week ago) by ph10
File MIME type: text/plain
File size: 90332 byte(s)
Add the ++ feature to pcretest.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 nigel 85 #define _pcre_utf8_table1 utf8_table1
116     #define _pcre_utf8_table1_size utf8_table1_size
117     #define _pcre_utf8_table2 utf8_table2
118     #define _pcre_utf8_table3 utf8_table3
119     #define _pcre_utf8_table4 utf8_table4
120     #define _pcre_utt utt
121     #define _pcre_utt_size utt_size
122 ph10 240 #define _pcre_utt_names utt_names
123 nigel 85 #define _pcre_OP_lengths OP_lengths
124    
125     #include "pcre_tables.c"
126    
127     /* We also need the pcre_printint() function for printing out compiled
128     patterns. This function is in a separate file so that it can be included in
129 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 ph10 498 know which case is being compiled. */
131 nigel 85
132 ph10 498 #define COMPILING_PCRETEST
133     #include "pcre_printint.src"
134    
135     /* The definition of the macro PRINTABLE, which determines whether to print an
136 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
137 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
138     locale has not been explicitly changed, so as to get consistent output from
139     systems that differ in their output from isprint() even in the "C" locale. */
140 nigel 93
141     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142 nigel 85
143 nigel 37 /* It is possible to compile this test program without including support for
144     testing the POSIX interface, though this is not available via the standard
145     Makefile. */
146    
147     #if !defined NOPOSIX
148 nigel 3 #include "pcreposix.h"
149 nigel 37 #endif
150 nigel 3
151 ph10 107 /* It is also possible, for the benefit of the version currently imported into
152     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153     interface to the DFA matcher (NODFA), and without the doublecheck of the old
154     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155     UTF8 support if PCRE is built without it. */
156 nigel 79
157 ph10 107 #ifndef SUPPORT_UTF8
158     #ifndef NOUTF8
159     #define NOUTF8
160     #endif
161     #endif
162 nigel 79
163 ph10 107
164 nigel 85 /* Other parameters */
165    
166 nigel 3 #ifndef CLOCKS_PER_SEC
167     #ifdef CLK_TCK
168     #define CLOCKS_PER_SEC CLK_TCK
169     #else
170     #define CLOCKS_PER_SEC 100
171     #endif
172     #endif
173    
174 nigel 93 /* This is the default loop count for timing. */
175    
176 nigel 75 #define LOOPREPEAT 500000
177 nigel 3
178 nigel 85 /* Static variables */
179    
180 nigel 3 static FILE *outfile;
181     static int log_store = 0;
182 nigel 63 static int callout_count;
183     static int callout_extra;
184     static int callout_fail_count;
185     static int callout_fail_id;
186 ph10 210 static int debug_lengths;
187 nigel 63 static int first_callout;
188 nigel 93 static int locale_set = 0;
189 nigel 73 static int show_malloc;
190 nigel 67 static int use_utf8;
191 nigel 43 static size_t gotten_store;
192 nigel 3
193 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
194    
195     static int buffer_size = 50000;
196     static uschar *buffer = NULL;
197     static uschar *dbuffer = NULL;
198 nigel 75 static uschar *pbuffer = NULL;
199 nigel 3
200 ph10 598 /* Textual explanations for runtime error codes */
201 nigel 75
202 ph10 598 static const char *errtexts[] = {
203     NULL, /* 0 is no error */
204     NULL, /* NOMATCH is handled specially */
205     "NULL argument passed",
206     "bad option value",
207     "magic number missing",
208     "unknown opcode - pattern overwritten?",
209     "no more memory",
210     NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211     "match limit exceeded",
212     "callout error code",
213     NULL, /* BADUTF8 is handled specially */
214     "bad UTF-8 offset",
215     NULL, /* PARTIAL is handled specially */
216     "not used - internal error",
217     "internal error - pattern overwritten?",
218     "bad count value",
219     "item unsupported for DFA matching",
220     "backreference condition or recursion test not supported for DFA matching",
221     "match limit not supported for DFA matching",
222     "workspace size exceeded in DFA matching",
223     "too much recursion for DFA matching",
224     "recursion limit exceeded",
225     "not used - internal error",
226     "invalid combination of newline options",
227     "bad offset value",
228     NULL /* SHORTUTF8 is handled specially */
229     };
230    
231    
232 ph10 541 /*************************************************
233     * Alternate character tables *
234     *************************************************/
235 nigel 49
236 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237     using the default tables of the library. However, the T option can be used to
238     select alternate sets of tables, for different kinds of testing. Note also that
239 ph10 541 the L (locale) option also adjusts the tables. */
240    
241 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
242 ph10 541 only ASCII characters. */
243    
244     static const unsigned char tables0[] = {
245    
246     /* This table is a lower casing table. */
247    
248     0, 1, 2, 3, 4, 5, 6, 7,
249     8, 9, 10, 11, 12, 13, 14, 15,
250     16, 17, 18, 19, 20, 21, 22, 23,
251     24, 25, 26, 27, 28, 29, 30, 31,
252     32, 33, 34, 35, 36, 37, 38, 39,
253     40, 41, 42, 43, 44, 45, 46, 47,
254     48, 49, 50, 51, 52, 53, 54, 55,
255     56, 57, 58, 59, 60, 61, 62, 63,
256     64, 97, 98, 99,100,101,102,103,
257     104,105,106,107,108,109,110,111,
258     112,113,114,115,116,117,118,119,
259     120,121,122, 91, 92, 93, 94, 95,
260     96, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122,123,124,125,126,127,
264     128,129,130,131,132,133,134,135,
265     136,137,138,139,140,141,142,143,
266     144,145,146,147,148,149,150,151,
267     152,153,154,155,156,157,158,159,
268     160,161,162,163,164,165,166,167,
269     168,169,170,171,172,173,174,175,
270     176,177,178,179,180,181,182,183,
271     184,185,186,187,188,189,190,191,
272     192,193,194,195,196,197,198,199,
273     200,201,202,203,204,205,206,207,
274     208,209,210,211,212,213,214,215,
275     216,217,218,219,220,221,222,223,
276     224,225,226,227,228,229,230,231,
277     232,233,234,235,236,237,238,239,
278     240,241,242,243,244,245,246,247,
279     248,249,250,251,252,253,254,255,
280    
281     /* This table is a case flipping table. */
282    
283     0, 1, 2, 3, 4, 5, 6, 7,
284     8, 9, 10, 11, 12, 13, 14, 15,
285     16, 17, 18, 19, 20, 21, 22, 23,
286     24, 25, 26, 27, 28, 29, 30, 31,
287     32, 33, 34, 35, 36, 37, 38, 39,
288     40, 41, 42, 43, 44, 45, 46, 47,
289     48, 49, 50, 51, 52, 53, 54, 55,
290     56, 57, 58, 59, 60, 61, 62, 63,
291     64, 97, 98, 99,100,101,102,103,
292     104,105,106,107,108,109,110,111,
293     112,113,114,115,116,117,118,119,
294     120,121,122, 91, 92, 93, 94, 95,
295     96, 65, 66, 67, 68, 69, 70, 71,
296     72, 73, 74, 75, 76, 77, 78, 79,
297     80, 81, 82, 83, 84, 85, 86, 87,
298     88, 89, 90,123,124,125,126,127,
299     128,129,130,131,132,133,134,135,
300     136,137,138,139,140,141,142,143,
301     144,145,146,147,148,149,150,151,
302     152,153,154,155,156,157,158,159,
303     160,161,162,163,164,165,166,167,
304     168,169,170,171,172,173,174,175,
305     176,177,178,179,180,181,182,183,
306     184,185,186,187,188,189,190,191,
307     192,193,194,195,196,197,198,199,
308     200,201,202,203,204,205,206,207,
309     208,209,210,211,212,213,214,215,
310     216,217,218,219,220,221,222,223,
311     224,225,226,227,228,229,230,231,
312     232,233,234,235,236,237,238,239,
313     240,241,242,243,244,245,246,247,
314     248,249,250,251,252,253,254,255,
315    
316     /* This table contains bit maps for various character classes. Each map is 32
317     bytes long and the bits run from the least significant end of each byte. The
318     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319     graph, print, punct, and cntrl. Other classes are built from combinations. */
320    
321     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325    
326     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371     /* This table identifies various classes of character by individual bits:
372     0x01 white space character
373     0x02 letter
374     0x04 decimal digit
375     0x08 hexadecimal digit
376     0x10 alphanumeric or '_'
377     0x80 regular expression metacharacter or binary zero
378     */
379    
380     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412    
413 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
414     be at least an approximation of ISO 8859. In particular, there are characters
415 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
416    
417     static const unsigned char tables1[] = {
418     0,1,2,3,4,5,6,7,
419     8,9,10,11,12,13,14,15,
420     16,17,18,19,20,21,22,23,
421     24,25,26,27,28,29,30,31,
422     32,33,34,35,36,37,38,39,
423     40,41,42,43,44,45,46,47,
424     48,49,50,51,52,53,54,55,
425     56,57,58,59,60,61,62,63,
426     64,97,98,99,100,101,102,103,
427     104,105,106,107,108,109,110,111,
428     112,113,114,115,116,117,118,119,
429     120,121,122,91,92,93,94,95,
430     96,97,98,99,100,101,102,103,
431     104,105,106,107,108,109,110,111,
432     112,113,114,115,116,117,118,119,
433     120,121,122,123,124,125,126,127,
434     128,129,130,131,132,133,134,135,
435     136,137,138,139,140,141,142,143,
436     144,145,146,147,148,149,150,151,
437     152,153,154,155,156,157,158,159,
438     160,161,162,163,164,165,166,167,
439     168,169,170,171,172,173,174,175,
440     176,177,178,179,180,181,182,183,
441     184,185,186,187,188,189,190,191,
442     224,225,226,227,228,229,230,231,
443     232,233,234,235,236,237,238,239,
444     240,241,242,243,244,245,246,215,
445     248,249,250,251,252,253,254,223,
446     224,225,226,227,228,229,230,231,
447     232,233,234,235,236,237,238,239,
448     240,241,242,243,244,245,246,247,
449     248,249,250,251,252,253,254,255,
450     0,1,2,3,4,5,6,7,
451     8,9,10,11,12,13,14,15,
452     16,17,18,19,20,21,22,23,
453     24,25,26,27,28,29,30,31,
454     32,33,34,35,36,37,38,39,
455     40,41,42,43,44,45,46,47,
456     48,49,50,51,52,53,54,55,
457     56,57,58,59,60,61,62,63,
458     64,97,98,99,100,101,102,103,
459     104,105,106,107,108,109,110,111,
460     112,113,114,115,116,117,118,119,
461     120,121,122,91,92,93,94,95,
462     96,65,66,67,68,69,70,71,
463     72,73,74,75,76,77,78,79,
464     80,81,82,83,84,85,86,87,
465     88,89,90,123,124,125,126,127,
466     128,129,130,131,132,133,134,135,
467     136,137,138,139,140,141,142,143,
468     144,145,146,147,148,149,150,151,
469     152,153,154,155,156,157,158,159,
470     160,161,162,163,164,165,166,167,
471     168,169,170,171,172,173,174,175,
472     176,177,178,179,180,181,182,183,
473     184,185,186,187,188,189,190,191,
474     224,225,226,227,228,229,230,231,
475     232,233,234,235,236,237,238,239,
476     240,241,242,243,244,245,246,215,
477     248,249,250,251,252,253,254,223,
478     192,193,194,195,196,197,198,199,
479     200,201,202,203,204,205,206,207,
480     208,209,210,211,212,213,214,247,
481     216,217,218,219,220,221,222,255,
482     0,62,0,0,1,0,0,0,
483     0,0,0,0,0,0,0,0,
484     32,0,0,0,1,0,0,0,
485     0,0,0,0,0,0,0,0,
486     0,0,0,0,0,0,255,3,
487     126,0,0,0,126,0,0,0,
488     0,0,0,0,0,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,255,3,
491     0,0,0,0,0,0,0,0,
492     0,0,0,0,0,0,12,2,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     254,255,255,7,0,0,0,0,
496     0,0,0,0,0,0,0,0,
497     255,255,127,127,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     0,0,0,0,254,255,255,7,
500     0,0,0,0,0,4,32,4,
501     0,0,0,128,255,255,127,255,
502     0,0,0,0,0,0,255,3,
503     254,255,255,135,254,255,255,7,
504     0,0,0,0,0,4,44,6,
505     255,255,127,255,255,255,127,255,
506     0,0,0,0,254,255,255,255,
507     255,255,255,255,255,255,255,127,
508     0,0,0,0,254,255,255,255,
509     255,255,255,255,255,255,255,255,
510     0,2,0,0,255,255,255,255,
511     255,255,255,255,255,255,255,127,
512     0,0,0,0,255,255,255,255,
513     255,255,255,255,255,255,255,255,
514     0,0,0,0,254,255,0,252,
515     1,0,0,248,1,0,0,120,
516     0,0,0,0,254,255,255,255,
517     0,0,128,0,0,0,128,0,
518     255,255,255,255,0,0,0,0,
519     0,0,0,0,0,0,0,128,
520     255,255,255,255,0,0,0,0,
521     0,0,0,0,0,0,0,0,
522     128,0,0,0,0,0,0,0,
523     0,1,1,0,1,1,0,0,
524     0,0,0,0,0,0,0,0,
525     0,0,0,0,0,0,0,0,
526     1,0,0,0,128,0,0,0,
527     128,128,128,128,0,0,128,0,
528     28,28,28,28,28,28,28,28,
529     28,28,0,0,0,0,0,128,
530     0,26,26,26,26,26,26,18,
531     18,18,18,18,18,18,18,18,
532     18,18,18,18,18,18,18,18,
533     18,18,18,128,128,0,128,16,
534     0,26,26,26,26,26,26,18,
535     18,18,18,18,18,18,18,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,128,128,0,0,0,
538     0,0,0,0,0,1,0,0,
539     0,0,0,0,0,0,0,0,
540     0,0,0,0,0,0,0,0,
541     0,0,0,0,0,0,0,0,
542     1,0,0,0,0,0,0,0,
543     0,0,18,0,0,0,0,0,
544     0,0,20,20,0,18,0,0,
545     0,20,18,0,0,0,0,0,
546     18,18,18,18,18,18,18,18,
547     18,18,18,18,18,18,18,18,
548     18,18,18,18,18,18,18,0,
549     18,18,18,18,18,18,18,18,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,0,
553     18,18,18,18,18,18,18,18
554     };
555    
556    
557    
558 ph10 558
559     #ifndef HAVE_STRERROR
560 nigel 49 /*************************************************
561 ph10 558 * Provide strerror() for non-ANSI libraries *
562     *************************************************/
563    
564     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565     in their libraries, but can provide the same facility by this simple
566     alternative function. */
567    
568     extern int sys_nerr;
569     extern char *sys_errlist[];
570    
571     char *
572     strerror(int n)
573     {
574     if (n < 0 || n >= sys_nerr) return "unknown error number";
575     return sys_errlist[n];
576     }
577     #endif /* HAVE_STRERROR */
578    
579    
580    
581    
582     /*************************************************
583 nigel 91 * Read or extend an input line *
584     *************************************************/
585    
586     /* Input lines are read into buffer, but both patterns and data lines can be
587     continued over multiple input lines. In addition, if the buffer fills up, we
588     want to automatically expand it so as to be able to handle extremely large
589     lines that are needed for certain stress tests. When the input buffer is
590     expanded, the other two buffers must also be expanded likewise, and the
591     contents of pbuffer, which are a copy of the input for callouts, must be
592     preserved (for when expansion happens for a data line). This is not the most
593     optimal way of handling this, but hey, this is just a test program!
594    
595     Arguments:
596     f the file to read
597     start where in buffer to start (this *must* be within buffer)
598 ph10 287 prompt for stdin or readline()
599 nigel 91
600     Returns: pointer to the start of new data
601     could be a copy of start, or could be moved
602     NULL if no data read and EOF reached
603     */
604    
605     static uschar *
606 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 nigel 91 {
608     uschar *here = start;
609    
610     for (;;)
611     {
612 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
613 nigel 93
614 nigel 91 if (rlen > 1000)
615     {
616     int dlen;
617 ph10 289
618 ph10 287 /* If libreadline support is required, use readline() to read a line if the
619     input is a terminal. Note that readline() removes the trailing newline, so
620     we must put it back again, to be compatible with fgets(). */
621 ph10 289
622 ph10 287 #ifdef SUPPORT_LIBREADLINE
623     if (isatty(fileno(f)))
624     {
625 ph10 289 size_t len;
626 ph10 287 char *s = readline(prompt);
627     if (s == NULL) return (here == start)? NULL : start;
628     len = strlen(s);
629 ph10 289 if (len > 0) add_history(s);
630 ph10 287 if (len > rlen - 1) len = rlen - 1;
631     memcpy(here, s, len);
632     here[len] = '\n';
633 ph10 289 here[len+1] = 0;
634     free(s);
635 ph10 287 }
636 ph10 289 else
637     #endif
638    
639 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
640 ph10 289
641 ph10 287 {
642 ph10 516 if (f == stdin) printf("%s", prompt);
643 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
644     return (here == start)? NULL : start;
645 ph10 289 }
646    
647 nigel 91 dlen = (int)strlen((char *)here);
648     if (dlen > 0 && here[dlen - 1] == '\n') return start;
649     here += dlen;
650     }
651    
652     else
653     {
654     int new_buffer_size = 2*buffer_size;
655     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658    
659     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660     {
661     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662     exit(1);
663     }
664    
665     memcpy(new_buffer, buffer, buffer_size);
666     memcpy(new_pbuffer, pbuffer, buffer_size);
667    
668     buffer_size = new_buffer_size;
669    
670     start = new_buffer + (start - buffer);
671     here = new_buffer + (here - buffer);
672    
673     free(buffer);
674     free(dbuffer);
675     free(pbuffer);
676    
677     buffer = new_buffer;
678     dbuffer = new_dbuffer;
679     pbuffer = new_pbuffer;
680     }
681     }
682    
683     return NULL; /* Control never gets here */
684     }
685    
686    
687    
688    
689    
690    
691    
692     /*************************************************
693 nigel 63 * Read number from string *
694     *************************************************/
695    
696     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697     around with conditional compilation, just do the job by hand. It is only used
698 nigel 93 for unpicking arguments, so just keep it simple.
699 nigel 63
700     Arguments:
701     str string to be converted
702     endptr where to put the end pointer
703    
704     Returns: the unsigned long
705     */
706    
707     static int
708     get_value(unsigned char *str, unsigned char **endptr)
709     {
710     int result = 0;
711     while(*str != 0 && isspace(*str)) str++;
712     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713     *endptr = str;
714     return(result);
715     }
716    
717    
718    
719 nigel 49
720     /*************************************************
721     * Convert UTF-8 string to value *
722     *************************************************/
723    
724     /* This function takes one or more bytes that represents a UTF-8 character,
725     and returns the value of the character.
726    
727     Argument:
728 nigel 91 utf8bytes a pointer to the byte vector
729     vptr a pointer to an int to receive the value
730 nigel 49
731 nigel 91 Returns: > 0 => the number of bytes consumed
732     -6 to 0 => malformed UTF-8 character at offset = (-return)
733 nigel 49 */
734    
735 nigel 79 #if !defined NOUTF8
736    
737 nigel 67 static int
738 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
739 nigel 49 {
740 nigel 91 int c = *utf8bytes++;
741 nigel 49 int d = c;
742     int i, j, s;
743    
744     for (i = -1; i < 6; i++) /* i is number of additional bytes */
745     {
746     if ((d & 0x80) == 0) break;
747     d <<= 1;
748     }
749    
750     if (i == -1) { *vptr = c; return 1; } /* ascii character */
751     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752    
753     /* i now has a value in the range 1-5 */
754    
755 nigel 59 s = 6*i;
756 nigel 85 d = (c & utf8_table3[i]) << s;
757 nigel 49
758     for (j = 0; j < i; j++)
759     {
760 nigel 91 c = *utf8bytes++;
761 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
762 nigel 59 s -= 6;
763 nigel 49 d |= (c & 0x3f) << s;
764     }
765    
766     /* Check that encoding was the correct unique one */
767    
768 nigel 85 for (j = 0; j < utf8_table1_size; j++)
769     if (d <= utf8_table1[j]) break;
770 nigel 49 if (j != i) return -(i+1);
771    
772     /* Valid value */
773    
774     *vptr = d;
775     return i+1;
776     }
777    
778 nigel 79 #endif
779 nigel 49
780    
781 nigel 79
782 nigel 63 /*************************************************
783 nigel 85 * Convert character value to UTF-8 *
784     *************************************************/
785    
786     /* This function takes an integer value in the range 0 - 0x7fffffff
787     and encodes it as a UTF-8 character in 0 to 6 bytes.
788    
789     Arguments:
790     cvalue the character value
791 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
792 nigel 85
793     Returns: number of characters placed in the buffer
794     */
795    
796 nigel 93 #if !defined NOUTF8
797    
798 nigel 85 static int
799 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
800 nigel 85 {
801     register int i, j;
802     for (i = 0; i < utf8_table1_size; i++)
803     if (cvalue <= utf8_table1[i]) break;
804 nigel 91 utf8bytes += i;
805 nigel 85 for (j = i; j > 0; j--)
806     {
807 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 nigel 85 cvalue >>= 6;
809     }
810 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
811 nigel 85 return i + 1;
812     }
813    
814 nigel 93 #endif
815 nigel 85
816    
817 nigel 93
818 nigel 85 /*************************************************
819 nigel 63 * Print character string *
820     *************************************************/
821 nigel 49
822 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
823     mode. Yields number of characters printed. If handed a NULL file, just counts
824     chars without printing. */
825 nigel 49
826 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
827 nigel 3 {
828 nigel 85 int c = 0;
829 nigel 63 int yield = 0;
830 nigel 3
831 nigel 63 while (length-- > 0)
832 nigel 3 {
833 nigel 79 #if !defined NOUTF8
834 nigel 67 if (use_utf8)
835 nigel 63 {
836     int rc = utf82ord(p, &c);
837 nigel 3
838 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839     {
840     length -= rc - 1;
841     p += rc;
842 nigel 93 if (PRINTHEX(c))
843 nigel 63 {
844     if (f != NULL) fprintf(f, "%c", c);
845     yield++;
846     }
847     else
848     {
849 nigel 93 int n = 4;
850     if (f != NULL) fprintf(f, "\\x{%02x}", c);
851     yield += (n <= 0x000000ff)? 2 :
852     (n <= 0x00000fff)? 3 :
853     (n <= 0x0000ffff)? 4 :
854     (n <= 0x000fffff)? 5 : 6;
855 nigel 63 }
856     continue;
857     }
858     }
859 nigel 79 #endif
860 nigel 3
861 nigel 63 /* Not UTF-8, or malformed UTF-8 */
862    
863 nigel 93 c = *p++;
864     if (PRINTHEX(c))
865 nigel 3 {
866 nigel 63 if (f != NULL) fprintf(f, "%c", c);
867     yield++;
868 nigel 3 }
869 nigel 63 else
870 nigel 3 {
871 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
872     yield += 4;
873     }
874     }
875 nigel 3
876 nigel 63 return yield;
877     }
878 nigel 23
879 nigel 3
880 nigel 23
881 nigel 63 /*************************************************
882     * Callout function *
883     *************************************************/
884 nigel 3
885 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886     the match. Yield zero unless more callouts than the fail count, or the callout
887     data is not zero. */
888 nigel 3
889 nigel 63 static int callout(pcre_callout_block *cb)
890     {
891     FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 nigel 75 int i, pre_start, post_start, subject_length;
893 nigel 3
894 nigel 63 if (callout_extra)
895     {
896     fprintf(f, "Callout %d: last capture = %d\n",
897     cb->callout_number, cb->capture_last);
898 nigel 3
899 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
900     {
901     if (cb->offset_vector[i] < 0)
902     fprintf(f, "%2d: <unset>\n", i/2);
903     else
904     {
905     fprintf(f, "%2d: ", i/2);
906     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907     cb->offset_vector[i+1] - cb->offset_vector[i], f);
908     fprintf(f, "\n");
909     }
910     }
911     }
912 nigel 3
913 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
914     datails. On subsequent calls in the same match, we use pchars just to find the
915     printed lengths of the substrings. */
916 nigel 3
917 nigel 63 if (f != NULL) fprintf(f, "--->");
918 nigel 3
919 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921     cb->current_position - cb->start_match, f);
922 nigel 3
923 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924    
925 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926     cb->subject_length - cb->current_position, f);
927 nigel 3
928 nigel 63 if (f != NULL) fprintf(f, "\n");
929 nigel 9
930 nigel 63 /* Always print appropriate indicators, with callout number if not already
931 nigel 75 shown. For automatic callouts, show the pattern offset. */
932 nigel 3
933 nigel 75 if (cb->callout_number == 255)
934     {
935     fprintf(outfile, "%+3d ", cb->pattern_position);
936     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937     }
938     else
939     {
940     if (callout_extra) fprintf(outfile, " ");
941     else fprintf(outfile, "%3d ", cb->callout_number);
942     }
943 nigel 3
944 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945     fprintf(outfile, "^");
946 nigel 3
947 nigel 63 if (post_start > 0)
948     {
949     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950     fprintf(outfile, "^");
951 nigel 3 }
952    
953 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954     fprintf(outfile, " ");
955    
956     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957     pbuffer + cb->pattern_position);
958    
959 nigel 63 fprintf(outfile, "\n");
960     first_callout = 0;
961 nigel 3
962 nigel 71 if (cb->callout_data != NULL)
963 nigel 49 {
964 nigel 71 int callout_data = *((int *)(cb->callout_data));
965     if (callout_data != 0)
966     {
967     fprintf(outfile, "Callout data = %d\n", callout_data);
968     return callout_data;
969     }
970 nigel 63 }
971 nigel 49
972 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
973     (++callout_count >= callout_fail_count)? 1 : 0;
974 nigel 3 }
975    
976    
977 nigel 63 /*************************************************
978 nigel 73 * Local malloc functions *
979 nigel 63 *************************************************/
980 nigel 3
981     /* Alternative malloc function, to test functionality and show the size of the
982     compiled re. */
983    
984     static void *new_malloc(size_t size)
985     {
986 nigel 73 void *block = malloc(size);
987 nigel 43 gotten_store = size;
988 nigel 73 if (show_malloc)
989 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 nigel 73 return block;
991 nigel 3 }
992    
993 nigel 73 static void new_free(void *block)
994     {
995     if (show_malloc)
996     fprintf(outfile, "free %p\n", block);
997     free(block);
998     }
999 nigel 3
1000    
1001 nigel 73 /* For recursion malloc/free, to test stacking calls */
1002    
1003     static void *stack_malloc(size_t size)
1004     {
1005     void *block = malloc(size);
1006     if (show_malloc)
1007 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 nigel 73 return block;
1009     }
1010    
1011     static void stack_free(void *block)
1012     {
1013     if (show_malloc)
1014     fprintf(outfile, "stack_free %p\n", block);
1015     free(block);
1016     }
1017    
1018    
1019 nigel 63 /*************************************************
1020     * Call pcre_fullinfo() *
1021     *************************************************/
1022 nigel 43
1023     /* Get one piece of information from the pcre_fullinfo() function */
1024    
1025     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026     {
1027     int rc;
1028     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030     }
1031    
1032    
1033    
1034 nigel 63 /*************************************************
1035 nigel 75 * Byte flipping function *
1036     *************************************************/
1037    
1038 nigel 91 static unsigned long int
1039     byteflip(unsigned long int value, int n)
1040 nigel 75 {
1041     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042     return ((value & 0x000000ff) << 24) |
1043     ((value & 0x0000ff00) << 8) |
1044     ((value & 0x00ff0000) >> 8) |
1045     ((value & 0xff000000) >> 24);
1046     }
1047    
1048    
1049    
1050    
1051     /*************************************************
1052 nigel 87 * Check match or recursion limit *
1053     *************************************************/
1054    
1055     static int
1056     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057     int start_offset, int options, int *use_offsets, int use_size_offsets,
1058     int flag, unsigned long int *limit, int errnumber, const char *msg)
1059     {
1060     int count;
1061     int min = 0;
1062     int mid = 64;
1063     int max = -1;
1064    
1065     extra->flags |= flag;
1066    
1067     for (;;)
1068     {
1069     *limit = mid;
1070    
1071     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072     use_offsets, use_size_offsets);
1073    
1074     if (count == errnumber)
1075     {
1076     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077     min = mid;
1078     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079     }
1080    
1081     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082     count == PCRE_ERROR_PARTIAL)
1083     {
1084     if (mid == min + 1)
1085     {
1086     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087     break;
1088     }
1089     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090     max = mid;
1091     mid = (min + mid)/2;
1092     }
1093     else break; /* Some other error */
1094     }
1095    
1096     extra->flags &= ~flag;
1097     return count;
1098     }
1099    
1100    
1101    
1102     /*************************************************
1103 ph10 227 * Case-independent strncmp() function *
1104     *************************************************/
1105    
1106     /*
1107     Arguments:
1108     s first string
1109     t second string
1110     n number of characters to compare
1111    
1112     Returns: < 0, = 0, or > 0, according to the comparison
1113     */
1114    
1115     static int
1116     strncmpic(uschar *s, uschar *t, int n)
1117     {
1118     while (n--)
1119     {
1120     int c = tolower(*s++) - tolower(*t++);
1121     if (c) return c;
1122     }
1123     return 0;
1124     }
1125    
1126    
1127    
1128     /*************************************************
1129 nigel 91 * Check newline indicator *
1130     *************************************************/
1131    
1132 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133     a message and return 0 if there is no match.
1134 nigel 91
1135     Arguments:
1136     p points after the leading '<'
1137     f file for error message
1138    
1139     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140     */
1141    
1142     static int
1143     check_newline(uschar *p, FILE *f)
1144     {
1145 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1153     return 0;
1154     }
1155    
1156    
1157    
1158     /*************************************************
1159 nigel 93 * Usage function *
1160     *************************************************/
1161    
1162     static void
1163     usage(void)
1164     {
1165 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166     printf("Input and output default to stdin and stdout.\n");
1167     #ifdef SUPPORT_LIBREADLINE
1168     printf("If input is a terminal, readline() is used to read from it.\n");
1169     #else
1170     printf("This version of pcretest is not linked with readline().\n");
1171     #endif
1172     printf("\nOptions:\n");
1173 nigel 93 printf(" -b show compiled code (bytecode)\n");
1174     printf(" -C show PCRE compile-time options and exit\n");
1175     printf(" -d debug: show compiled code and information (-b and -i)\n");
1176     #if !defined NODFA
1177     printf(" -dfa force DFA matching for all subjects\n");
1178     #endif
1179     printf(" -help show usage information\n");
1180     printf(" -i show information about compiled patterns\n"
1181 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1182 nigel 93 " -m output memory used information\n"
1183     " -o <n> set size of offsets vector to <n>\n");
1184     #if !defined NOPOSIX
1185     printf(" -p use POSIX interface\n");
1186     #endif
1187     printf(" -q quiet: do not output PCRE version number at start\n");
1188     printf(" -S <n> set stack size to <n> megabytes\n");
1189 ph10 606 printf(" -s force each pattern to be studied\n"
1190 nigel 93 " -t time compilation and execution\n");
1191     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192     printf(" -tm time execution (matching) only\n");
1193     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194     }
1195    
1196    
1197    
1198     /*************************************************
1199 nigel 63 * Main Program *
1200     *************************************************/
1201 nigel 43
1202 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1203     consist of a regular expression, in delimiters and optionally followed by
1204     options, followed by a set of test data, terminated by an empty line. */
1205    
1206     int main(int argc, char **argv)
1207     {
1208     FILE *infile = stdin;
1209     int options = 0;
1210     int study_options = 0;
1211 ph10 386 int default_find_match_limit = FALSE;
1212 nigel 3 int op = 1;
1213     int timeit = 0;
1214 nigel 93 int timeitm = 0;
1215 nigel 3 int showinfo = 0;
1216 nigel 31 int showstore = 0;
1217 ph10 606 int force_study = 0;
1218 nigel 87 int quiet = 0;
1219 nigel 53 int size_offsets = 45;
1220     int size_offsets_max;
1221 nigel 77 int *offsets = NULL;
1222 nigel 53 #if !defined NOPOSIX
1223 nigel 3 int posix = 0;
1224 nigel 53 #endif
1225 nigel 3 int debug = 0;
1226 nigel 11 int done = 0;
1227 nigel 77 int all_use_dfa = 0;
1228     int yield = 0;
1229 nigel 91 int stack_size;
1230 nigel 3
1231 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1232     that 1024 is plenty long enough for the few names we'll be testing. */
1233 nigel 69
1234 nigel 91 uschar copynames[1024];
1235     uschar getnames[1024];
1236    
1237     uschar *copynamesptr;
1238     uschar *getnamesptr;
1239    
1240 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1241 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1242 nigel 69
1243 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1244     dbuffer = (unsigned char *)malloc(buffer_size);
1245     pbuffer = (unsigned char *)malloc(buffer_size);
1246 nigel 69
1247 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1248 nigel 3
1249 nigel 93 outfile = stdout;
1250    
1251     /* The following _setmode() stuff is some Windows magic that tells its runtime
1252     library to translate CRLF into a single LF character. At least, that's what
1253     I've been told: never having used Windows I take this all on trust. Originally
1254     it set 0x8000, but then I was advised that _O_BINARY was better. */
1255    
1256 nigel 75 #if defined(_WIN32) || defined(WIN32)
1257 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1258     #endif
1259 nigel 75
1260 nigel 3 /* Scan options */
1261    
1262     while (argc > 1 && argv[op][0] == '-')
1263     {
1264 nigel 63 unsigned char *endptr;
1265 nigel 53
1266 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267     else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1271     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1272 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1273 nigel 79 #if !defined NODFA
1274 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1275 nigel 79 #endif
1276 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1277 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1278     *endptr == 0))
1279 nigel 53 {
1280     op++;
1281     argc--;
1282     }
1283 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1284     {
1285     int both = argv[op][2] == 0;
1286     int temp;
1287     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1288     *endptr == 0))
1289     {
1290     timeitm = temp;
1291     op++;
1292     argc--;
1293     }
1294     else timeitm = LOOPREPEAT;
1295     if (both) timeit = timeitm;
1296     }
1297 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1298     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1299     *endptr == 0))
1300     {
1301 nigel 93 #if defined(_WIN32) || defined(WIN32)
1302 nigel 91 printf("PCRE: -S not supported on this OS\n");
1303     exit(1);
1304     #else
1305     int rc;
1306     struct rlimit rlim;
1307     getrlimit(RLIMIT_STACK, &rlim);
1308     rlim.rlim_cur = stack_size * 1024 * 1024;
1309     rc = setrlimit(RLIMIT_STACK, &rlim);
1310     if (rc != 0)
1311     {
1312     printf("PCRE: setrlimit() failed with error %d\n", rc);
1313     exit(1);
1314     }
1315     op++;
1316     argc--;
1317     #endif
1318     }
1319 nigel 53 #if !defined NOPOSIX
1320 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1321 nigel 53 #endif
1322 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1323     {
1324     int rc;
1325 ph10 392 unsigned long int lrc;
1326 nigel 63 printf("PCRE version %s\n", pcre_version());
1327     printf("Compiled with\n");
1328     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1329     printf(" %sUTF-8 support\n", rc? "" : "No ");
1330 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1331     printf(" %sUnicode properties support\n", rc? "" : "No ");
1332 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1333 ph10 391 /* Note that these values are always the ASCII values, even
1334 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1335 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1336     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1337 ph10 150 (rc == -2)? "ANYCRLF" :
1338 nigel 93 (rc == -1)? "ANY" : "???");
1339 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1340     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1341     "all Unicode newlines");
1342 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1343     printf(" Internal link size = %d\n", rc);
1344     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1345     printf(" POSIX malloc threshold = %d\n", rc);
1346 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1347     printf(" Default match limit = %ld\n", lrc);
1348     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1349     printf(" Default recursion depth limit = %ld\n", lrc);
1350 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1351     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1352 ph10 121 goto EXIT;
1353 nigel 63 }
1354 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1355     strcmp(argv[op], "--help") == 0)
1356     {
1357     usage();
1358     goto EXIT;
1359     }
1360 nigel 3 else
1361     {
1362 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1363 nigel 93 usage();
1364 nigel 77 yield = 1;
1365     goto EXIT;
1366 nigel 3 }
1367     op++;
1368     argc--;
1369     }
1370    
1371 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1372    
1373     size_offsets_max = size_offsets;
1374 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1375 nigel 53 if (offsets == NULL)
1376     {
1377     printf("** Failed to get %d bytes of memory for offsets vector\n",
1378 ph10 151 (int)(size_offsets_max * sizeof(int)));
1379 nigel 77 yield = 1;
1380     goto EXIT;
1381 nigel 53 }
1382    
1383 nigel 3 /* Sort out the input and output files */
1384    
1385     if (argc > 1)
1386     {
1387 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1388 nigel 3 if (infile == NULL)
1389     {
1390     printf("** Failed to open %s\n", argv[op]);
1391 nigel 77 yield = 1;
1392     goto EXIT;
1393 nigel 3 }
1394     }
1395    
1396     if (argc > 2)
1397     {
1398 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1399 nigel 3 if (outfile == NULL)
1400     {
1401     printf("** Failed to open %s\n", argv[op+1]);
1402 nigel 77 yield = 1;
1403     goto EXIT;
1404 nigel 3 }
1405     }
1406    
1407     /* Set alternative malloc function */
1408    
1409     pcre_malloc = new_malloc;
1410 nigel 73 pcre_free = new_free;
1411     pcre_stack_malloc = stack_malloc;
1412     pcre_stack_free = stack_free;
1413 nigel 3
1414 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1415 nigel 3
1416 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1417 nigel 3
1418     /* Main loop */
1419    
1420 nigel 11 while (!done)
1421 nigel 3 {
1422     pcre *re = NULL;
1423     pcre_extra *extra = NULL;
1424 nigel 37
1425     #if !defined NOPOSIX /* There are still compilers that require no indent */
1426 nigel 3 regex_t preg;
1427 nigel 45 int do_posix = 0;
1428 nigel 37 #endif
1429    
1430 nigel 7 const char *error;
1431 ph10 512 unsigned char *markptr;
1432 nigel 25 unsigned char *p, *pp, *ppp;
1433 nigel 75 unsigned char *to_file = NULL;
1434 nigel 53 const unsigned char *tables = NULL;
1435 nigel 75 unsigned long int true_size, true_study_size = 0;
1436     size_t size, regex_gotten_store;
1437 ph10 512 int do_mark = 0;
1438 nigel 3 int do_study = 0;
1439 ph10 612 int no_force_study = 0;
1440 nigel 25 int do_debug = debug;
1441 nigel 35 int do_G = 0;
1442     int do_g = 0;
1443 nigel 25 int do_showinfo = showinfo;
1444 nigel 35 int do_showrest = 0;
1445 ph10 616 int do_showcaprest = 0;
1446 nigel 75 int do_flip = 0;
1447 nigel 93 int erroroffset, len, delimiter, poffset;
1448 nigel 3
1449 nigel 67 use_utf8 = 0;
1450 ph10 211 debug_lengths = 1;
1451 nigel 63
1452 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1453 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1454 nigel 63 fflush(outfile);
1455 nigel 3
1456     p = buffer;
1457     while (isspace(*p)) p++;
1458     if (*p == 0) continue;
1459    
1460 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1461 nigel 3
1462 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1463     {
1464 nigel 91 unsigned long int magic, get_options;
1465 nigel 75 uschar sbuf[8];
1466     FILE *f;
1467    
1468     p++;
1469     pp = p + (int)strlen((char *)p);
1470     while (isspace(pp[-1])) pp--;
1471     *pp = 0;
1472    
1473     f = fopen((char *)p, "rb");
1474     if (f == NULL)
1475     {
1476     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1477     continue;
1478     }
1479    
1480     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1481    
1482     true_size =
1483     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1484     true_study_size =
1485     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1486    
1487     re = (real_pcre *)new_malloc(true_size);
1488     regex_gotten_store = gotten_store;
1489    
1490     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1491    
1492     magic = ((real_pcre *)re)->magic_number;
1493     if (magic != MAGIC_NUMBER)
1494     {
1495     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1496     {
1497     do_flip = 1;
1498     }
1499     else
1500     {
1501     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1502     fclose(f);
1503     continue;
1504     }
1505     }
1506    
1507 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1508 nigel 75 do_flip? " (byte-inverted)" : "", p);
1509    
1510     /* Need to know if UTF-8 for printing data strings */
1511    
1512 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1513     use_utf8 = (get_options & PCRE_UTF8) != 0;
1514 nigel 75
1515 ph10 612 /* Now see if there is any following study data. */
1516 nigel 75
1517     if (true_study_size != 0)
1518     {
1519     pcre_study_data *psd;
1520    
1521     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1522     extra->flags = PCRE_EXTRA_STUDY_DATA;
1523    
1524     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1525     extra->study_data = psd;
1526    
1527     if (fread(psd, 1, true_study_size, f) != true_study_size)
1528     {
1529     FAIL_READ:
1530     fprintf(outfile, "Failed to read data from %s\n", p);
1531     if (extra != NULL) new_free(extra);
1532     if (re != NULL) new_free(re);
1533     fclose(f);
1534     continue;
1535     }
1536     fprintf(outfile, "Study data loaded from %s\n", p);
1537     do_study = 1; /* To get the data output if requested */
1538     }
1539     else fprintf(outfile, "No study data\n");
1540    
1541     fclose(f);
1542     goto SHOW_INFO;
1543     }
1544    
1545     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1546     the pattern; if is isn't complete, read more. */
1547    
1548 nigel 3 delimiter = *p++;
1549    
1550 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1551 nigel 3 {
1552 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1553 nigel 3 goto SKIP_DATA;
1554     }
1555    
1556     pp = p;
1557 ph10 530 poffset = (int)(p - buffer);
1558 nigel 3
1559     for(;;)
1560     {
1561 nigel 29 while (*pp != 0)
1562     {
1563     if (*pp == '\\' && pp[1] != 0) pp++;
1564     else if (*pp == delimiter) break;
1565     pp++;
1566     }
1567 nigel 3 if (*pp != 0) break;
1568 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1569 nigel 3 {
1570     fprintf(outfile, "** Unexpected EOF\n");
1571 nigel 11 done = 1;
1572     goto CONTINUE;
1573 nigel 3 }
1574 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1575 nigel 3 }
1576    
1577 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1578     pointer to the correct relative point in the buffer. */
1579    
1580     p = buffer + poffset;
1581    
1582 nigel 29 /* If the first character after the delimiter is backslash, make
1583     the pattern end with backslash. This is purely to provide a way
1584     of testing for the error message when a pattern ends with backslash. */
1585    
1586     if (pp[1] == '\\') *pp++ = '\\';
1587    
1588 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1589     for callouts. */
1590 nigel 3
1591     *pp++ = 0;
1592 nigel 75 strcpy((char *)pbuffer, (char *)p);
1593 nigel 3
1594     /* Look for options after final delimiter */
1595    
1596     options = 0;
1597     study_options = 0;
1598 nigel 31 log_store = showstore; /* default from command line */
1599    
1600 nigel 3 while (*pp != 0)
1601     {
1602     switch (*pp++)
1603     {
1604 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1605 nigel 35 case 'g': do_g = 1; break;
1606 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1607     case 'm': options |= PCRE_MULTILINE; break;
1608     case 's': options |= PCRE_DOTALL; break;
1609     case 'x': options |= PCRE_EXTENDED; break;
1610 nigel 25
1611 ph10 616 case '+':
1612     if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1613     break;
1614    
1615 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1616 nigel 93 case 'B': do_debug = 1; break;
1617 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1618 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1619 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1620 nigel 75 case 'F': do_flip = 1; break;
1621 nigel 35 case 'G': do_G = 1; break;
1622 nigel 25 case 'I': do_showinfo = 1; break;
1623 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1624 ph10 512 case 'K': do_mark = 1; break;
1625 nigel 31 case 'M': log_store = 1; break;
1626 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1627 nigel 37
1628     #if !defined NOPOSIX
1629 nigel 3 case 'P': do_posix = 1; break;
1630 nigel 37 #endif
1631    
1632 ph10 612 case 'S':
1633     if (do_study == 0) do_study = 1; else
1634     {
1635     do_study = 0;
1636     no_force_study = 1;
1637     }
1638     break;
1639    
1640 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1641 ph10 535 case 'W': options |= PCRE_UCP; break;
1642 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1643 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1644 ph10 126 case 'Z': debug_lengths = 0; break;
1645 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1646 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1647 ph10 545
1648 ph10 541 case 'T':
1649     switch (*pp++)
1650     {
1651     case '0': tables = tables0; break;
1652     case '1': tables = tables1; break;
1653 ph10 545
1654 ph10 541 case '\r':
1655     case '\n':
1656 ph10 545 case ' ':
1657     case 0:
1658 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1659 ph10 545 goto SKIP_DATA;
1660    
1661     default:
1662 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1663 ph10 545 goto SKIP_DATA;
1664 ph10 541 }
1665 ph10 545 break;
1666 nigel 25
1667     case 'L':
1668     ppp = pp;
1669 nigel 93 /* The '\r' test here is so that it works on Windows. */
1670     /* The '0' test is just in case this is an unterminated line. */
1671     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1672 nigel 25 *ppp = 0;
1673     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1674     {
1675     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1676     goto SKIP_DATA;
1677     }
1678 nigel 93 locale_set = 1;
1679 nigel 25 tables = pcre_maketables();
1680     pp = ppp;
1681     break;
1682    
1683 nigel 75 case '>':
1684     to_file = pp;
1685     while (*pp != 0) pp++;
1686     while (isspace(pp[-1])) pp--;
1687     *pp = 0;
1688     break;
1689    
1690 nigel 91 case '<':
1691     {
1692 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1693 ph10 336 {
1694     options |= PCRE_JAVASCRIPT_COMPAT;
1695 ph10 345 pp += 3;
1696 ph10 336 }
1697     else
1698 ph10 345 {
1699 ph10 336 int x = check_newline(pp, outfile);
1700     if (x == 0) goto SKIP_DATA;
1701     options |= x;
1702     while (*pp++ != '>');
1703 ph10 345 }
1704 nigel 91 }
1705     break;
1706    
1707 nigel 77 case '\r': /* So that it works in Windows */
1708     case '\n':
1709     case ' ':
1710     break;
1711 nigel 75
1712 nigel 3 default:
1713     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1714     goto SKIP_DATA;
1715     }
1716     }
1717    
1718 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1719 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1720     local character tables. */
1721 nigel 3
1722 nigel 37 #if !defined NOPOSIX
1723 nigel 3 if (posix || do_posix)
1724     {
1725     int rc;
1726     int cflags = 0;
1727 nigel 75
1728 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1729     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1730 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1731 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1732     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1733 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1734 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1735 nigel 87
1736 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1737    
1738     /* Compilation failed; go back for another re, skipping to blank line
1739     if non-interactive. */
1740    
1741     if (rc != 0)
1742     {
1743 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1744 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1745     goto SKIP_DATA;
1746     }
1747     }
1748    
1749     /* Handle compiling via the native interface */
1750    
1751     else
1752 nigel 37 #endif /* !defined NOPOSIX */
1753    
1754 nigel 3 {
1755 ph10 412 unsigned long int get_options;
1756 ph10 416
1757 nigel 93 if (timeit > 0)
1758 nigel 3 {
1759     register int i;
1760     clock_t time_taken;
1761     clock_t start_time = clock();
1762 nigel 93 for (i = 0; i < timeit; i++)
1763 nigel 3 {
1764 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1765 nigel 3 if (re != NULL) free(re);
1766     }
1767     time_taken = clock() - start_time;
1768 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1769     (((double)time_taken * 1000.0) / (double)timeit) /
1770 nigel 63 (double)CLOCKS_PER_SEC);
1771 nigel 3 }
1772    
1773 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1774 nigel 3
1775     /* Compilation failed; go back for another re, skipping to blank line
1776     if non-interactive. */
1777    
1778     if (re == NULL)
1779     {
1780     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1781     SKIP_DATA:
1782     if (infile != stdin)
1783     {
1784     for (;;)
1785     {
1786 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1787 nigel 11 {
1788     done = 1;
1789     goto CONTINUE;
1790     }
1791 nigel 3 len = (int)strlen((char *)buffer);
1792     while (len > 0 && isspace(buffer[len-1])) len--;
1793     if (len == 0) break;
1794     }
1795     fprintf(outfile, "\n");
1796     }
1797 nigel 25 goto CONTINUE;
1798 nigel 3 }
1799 ph10 416
1800     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1801     within the regex; check for this so that we know how to process the data
1802 ph10 412 lines. */
1803 ph10 416
1804 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1805     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1806 nigel 3
1807 ph10 412 /* Print information if required. There are now two info-returning
1808     functions. The old one has a limited interface and returns only limited
1809     data. Check that it agrees with the newer one. */
1810 nigel 3
1811 nigel 63 if (log_store)
1812     fprintf(outfile, "Memory allocation (code space): %d\n",
1813     (int)(gotten_store -
1814     sizeof(real_pcre) -
1815     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1816    
1817 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1818     and remember the store that was got. */
1819    
1820     true_size = ((real_pcre *)re)->size;
1821     regex_gotten_store = gotten_store;
1822    
1823 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1824     help with the matching, unless the pattern has the SS option, which
1825     suppresses the effect of /S (used for a few test patterns where studying is
1826     never sensible). */
1827 nigel 75
1828 ph10 612 if (do_study || (force_study && !no_force_study))
1829 nigel 75 {
1830 nigel 93 if (timeit > 0)
1831 nigel 75 {
1832     register int i;
1833     clock_t time_taken;
1834     clock_t start_time = clock();
1835 nigel 93 for (i = 0; i < timeit; i++)
1836 nigel 75 extra = pcre_study(re, study_options, &error);
1837     time_taken = clock() - start_time;
1838     if (extra != NULL) free(extra);
1839 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1840     (((double)time_taken * 1000.0) / (double)timeit) /
1841 nigel 75 (double)CLOCKS_PER_SEC);
1842     }
1843     extra = pcre_study(re, study_options, &error);
1844     if (error != NULL)
1845     fprintf(outfile, "Failed to study: %s\n", error);
1846     else if (extra != NULL)
1847     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1848     }
1849 ph10 512
1850 ph10 510 /* If /K was present, we set up for handling MARK data. */
1851 ph10 512
1852 ph10 510 if (do_mark)
1853     {
1854     if (extra == NULL)
1855     {
1856     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1857     extra->flags = 0;
1858     }
1859 ph10 512 extra->mark = &markptr;
1860 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1861 ph10 512 }
1862 nigel 75
1863     /* If the 'F' option was present, we flip the bytes of all the integer
1864     fields in the regex data block and the study block. This is to make it
1865     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1866     compiled on a different architecture. */
1867    
1868     if (do_flip)
1869     {
1870     real_pcre *rre = (real_pcre *)re;
1871 ph10 259 rre->magic_number =
1872 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1873 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1874     rre->options = byteflip(rre->options, sizeof(rre->options));
1875 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1876 ph10 259 rre->top_bracket =
1877 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1878 ph10 259 rre->top_backref =
1879 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1880 ph10 259 rre->first_byte =
1881 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1882 ph10 259 rre->req_byte =
1883 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1884     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1885 nigel 75 sizeof(rre->name_table_offset));
1886 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1887 nigel 75 sizeof(rre->name_entry_size));
1888 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1889 ph10 255 sizeof(rre->name_count));
1890 nigel 75
1891     if (extra != NULL)
1892     {
1893     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1894     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1895 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1896     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1897 nigel 75 }
1898     }
1899    
1900     /* Extract information from the compiled data if required */
1901    
1902     SHOW_INFO:
1903    
1904 nigel 93 if (do_debug)
1905     {
1906     fprintf(outfile, "------------------------------------------------------------------\n");
1907 ph10 116 pcre_printint(re, outfile, debug_lengths);
1908 nigel 93 }
1909 ph10 416
1910 ph10 412 /* We already have the options in get_options (see above) */
1911 nigel 93
1912 nigel 25 if (do_showinfo)
1913 nigel 3 {
1914 ph10 412 unsigned long int all_options;
1915 nigel 79 #if !defined NOINFOCHECK
1916 nigel 43 int old_first_char, old_options, old_count;
1917 nigel 79 #endif
1918 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1919 ph10 227 hascrorlf;
1920 nigel 63 int nameentrysize, namecount;
1921     const uschar *nametable;
1922 nigel 3
1923 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1924     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1925     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1926 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1927 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1928 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1929     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1930 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1931 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1932     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1933 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1934 nigel 43
1935 nigel 79 #if !defined NOINFOCHECK
1936 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1937 nigel 3 if (count < 0) fprintf(outfile,
1938 nigel 43 "Error %d from pcre_info()\n", count);
1939 nigel 3 else
1940     {
1941 nigel 43 if (old_count != count) fprintf(outfile,
1942     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1943     old_count);
1944 nigel 37
1945 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1946     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1947     first_char, old_first_char);
1948 nigel 37
1949 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1950     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1951     get_options, old_options);
1952 nigel 43 }
1953 nigel 79 #endif
1954 nigel 43
1955 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1956 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1957 nigel 77 (int)size, (int)regex_gotten_store);
1958 nigel 43
1959     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1960     if (backrefmax > 0)
1961     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1962 nigel 63
1963     if (namecount > 0)
1964     {
1965     fprintf(outfile, "Named capturing subpatterns:\n");
1966     while (namecount-- > 0)
1967     {
1968     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1969     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1970     GET2(nametable, 0));
1971     nametable += nameentrysize;
1972     }
1973     }
1974 ph10 172
1975 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1976 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1977 nigel 63
1978 nigel 75 all_options = ((real_pcre *)re)->options;
1979 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1980 nigel 75
1981 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1982 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1983 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1984     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1985     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1986     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1987 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1988 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1989 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1990     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1991 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1992     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1993     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1994 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1995 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1996 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1997 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1998 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1999 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2000 ph10 172
2001 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2002 nigel 43
2003 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2004 nigel 91 {
2005     case PCRE_NEWLINE_CR:
2006     fprintf(outfile, "Forced newline sequence: CR\n");
2007     break;
2008 nigel 43
2009 nigel 91 case PCRE_NEWLINE_LF:
2010     fprintf(outfile, "Forced newline sequence: LF\n");
2011     break;
2012    
2013     case PCRE_NEWLINE_CRLF:
2014     fprintf(outfile, "Forced newline sequence: CRLF\n");
2015     break;
2016    
2017 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2018     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2019     break;
2020    
2021 nigel 93 case PCRE_NEWLINE_ANY:
2022     fprintf(outfile, "Forced newline sequence: ANY\n");
2023     break;
2024    
2025 nigel 91 default:
2026     break;
2027     }
2028    
2029 nigel 43 if (first_char == -1)
2030     {
2031 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2032 nigel 43 }
2033     else if (first_char < 0)
2034     {
2035     fprintf(outfile, "No first char\n");
2036     }
2037     else
2038     {
2039 nigel 63 int ch = first_char & 255;
2040 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2041 nigel 63 "" : " (caseless)";
2042 nigel 93 if (PRINTHEX(ch))
2043 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2044 nigel 3 else
2045 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2046 nigel 43 }
2047 nigel 37
2048 nigel 43 if (need_char < 0)
2049     {
2050     fprintf(outfile, "No need char\n");
2051 nigel 3 }
2052 nigel 43 else
2053     {
2054 nigel 63 int ch = need_char & 255;
2055 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2056 nigel 63 "" : " (caseless)";
2057 nigel 93 if (PRINTHEX(ch))
2058 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2059 nigel 43 else
2060 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2061 nigel 43 }
2062 nigel 75
2063     /* Don't output study size; at present it is in any case a fixed
2064     value, but it varies, depending on the computer architecture, and
2065     so messes up the test suite. (And with the /F option, it might be
2066 ph10 612 flipped.) If study was forced by an external -s, don't show this
2067     information unless -i or -d was also present. This means that, except
2068     when auto-callouts are involved, the output from runs with and without
2069     -s should be identical. */
2070 nigel 75
2071 ph10 612 if (do_study || (force_study && showinfo && !no_force_study))
2072 nigel 75 {
2073     if (extra == NULL)
2074     fprintf(outfile, "Study returned NULL\n");
2075     else
2076     {
2077     uschar *start_bits = NULL;
2078 ph10 455 int minlength;
2079 ph10 461
2080 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2081 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2082    
2083 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2084     if (start_bits == NULL)
2085 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2086 nigel 75 else
2087     {
2088     int i;
2089     int c = 24;
2090     fprintf(outfile, "Starting byte set: ");
2091     for (i = 0; i < 256; i++)
2092     {
2093     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2094     {
2095     if (c > 75)
2096     {
2097     fprintf(outfile, "\n ");
2098     c = 2;
2099     }
2100 nigel 93 if (PRINTHEX(i) && i != ' ')
2101 nigel 75 {
2102     fprintf(outfile, "%c ", i);
2103     c += 2;
2104     }
2105     else
2106     {
2107     fprintf(outfile, "\\x%02x ", i);
2108     c += 5;
2109     }
2110     }
2111     }
2112     fprintf(outfile, "\n");
2113     }
2114     }
2115     }
2116 nigel 3 }
2117    
2118 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2119     that is all. The first 8 bytes of the file are the regex length and then
2120     the study length, in big-endian order. */
2121 nigel 3
2122 nigel 75 if (to_file != NULL)
2123 nigel 3 {
2124 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2125     if (f == NULL)
2126 nigel 3 {
2127 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2128 nigel 3 }
2129 nigel 75 else
2130     {
2131     uschar sbuf[8];
2132 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2133     sbuf[1] = (uschar)((true_size >> 16) & 255);
2134     sbuf[2] = (uschar)((true_size >> 8) & 255);
2135     sbuf[3] = (uschar)((true_size) & 255);
2136 ph10 259
2137 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2138     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2139     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2140     sbuf[7] = (uschar)((true_study_size) & 255);
2141 nigel 3
2142 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2143     fwrite(re, 1, true_size, f) < true_size)
2144     {
2145     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2146     }
2147 nigel 3 else
2148     {
2149 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2150    
2151     /* If there is study data, write it, but verify the writing only
2152     if the studying was requested by /S, not just by -s. */
2153    
2154 nigel 75 if (extra != NULL)
2155 nigel 3 {
2156 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2157     true_study_size)
2158 nigel 3 {
2159 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2160     strerror(errno));
2161 nigel 3 }
2162 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2163 nigel 3 }
2164     }
2165 nigel 75 fclose(f);
2166 nigel 3 }
2167 nigel 77
2168     new_free(re);
2169     if (extra != NULL) new_free(extra);
2170 ph10 545 if (locale_set)
2171 ph10 541 {
2172     new_free((void *)tables);
2173     setlocale(LC_CTYPE, "C");
2174 ph10 545 locale_set = 0;
2175     }
2176 nigel 75 continue; /* With next regex */
2177 nigel 3 }
2178 nigel 75 } /* End of non-POSIX compile */
2179 nigel 3
2180     /* Read data lines and test them */
2181    
2182     for (;;)
2183     {
2184 nigel 87 uschar *q;
2185 ph10 147 uschar *bptr;
2186 nigel 57 int *use_offsets = offsets;
2187 nigel 53 int use_size_offsets = size_offsets;
2188 nigel 63 int callout_data = 0;
2189     int callout_data_set = 0;
2190 nigel 3 int count, c;
2191 nigel 29 int copystrings = 0;
2192 ph10 386 int find_match_limit = default_find_match_limit;
2193 nigel 29 int getstrings = 0;
2194     int getlist = 0;
2195 nigel 39 int gmatched = 0;
2196 nigel 35 int start_offset = 0;
2197 ph10 579 int start_offset_sign = 1;
2198 nigel 41 int g_notempty = 0;
2199 nigel 77 int use_dfa = 0;
2200 nigel 3
2201     options = 0;
2202    
2203 nigel 91 *copynames = 0;
2204     *getnames = 0;
2205    
2206     copynamesptr = copynames;
2207     getnamesptr = getnames;
2208    
2209 nigel 63 pcre_callout = callout;
2210     first_callout = 1;
2211     callout_extra = 0;
2212     callout_count = 0;
2213     callout_fail_count = 999999;
2214     callout_fail_id = -1;
2215 nigel 73 show_malloc = 0;
2216 nigel 63
2217 nigel 91 if (extra != NULL) extra->flags &=
2218     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2219    
2220     len = 0;
2221     for (;;)
2222 nigel 11 {
2223 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2224 nigel 91 {
2225 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2226     {
2227 ph10 545 fprintf(outfile, "\n");
2228 ph10 537 break;
2229 ph10 545 }
2230 nigel 91 done = 1;
2231     goto CONTINUE;
2232     }
2233     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2234     len = (int)strlen((char *)buffer);
2235     if (buffer[len-1] == '\n') break;
2236 nigel 11 }
2237 nigel 3
2238     while (len > 0 && isspace(buffer[len-1])) len--;
2239     buffer[len] = 0;
2240     if (len == 0) break;
2241    
2242     p = buffer;
2243     while (isspace(*p)) p++;
2244    
2245 ph10 147 bptr = q = dbuffer;
2246 nigel 3 while ((c = *p++) != 0)
2247     {
2248     int i = 0;
2249     int n = 0;
2250 nigel 63
2251 nigel 3 if (c == '\\') switch ((c = *p++))
2252     {
2253     case 'a': c = 7; break;
2254     case 'b': c = '\b'; break;
2255     case 'e': c = 27; break;
2256     case 'f': c = '\f'; break;
2257     case 'n': c = '\n'; break;
2258     case 'r': c = '\r'; break;
2259     case 't': c = '\t'; break;
2260     case 'v': c = '\v'; break;
2261    
2262     case '0': case '1': case '2': case '3':
2263     case '4': case '5': case '6': case '7':
2264     c -= '0';
2265     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2266     c = c * 8 + *p++ - '0';
2267 nigel 91
2268     #if !defined NOUTF8
2269     if (use_utf8 && c > 255)
2270     {
2271     unsigned char buff8[8];
2272     int ii, utn;
2273     utn = ord2utf8(c, buff8);
2274     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2275     c = buff8[ii]; /* Last byte */
2276     }
2277     #endif
2278 nigel 3 break;
2279    
2280     case 'x':
2281 nigel 49
2282     /* Handle \x{..} specially - new Perl thing for utf8 */
2283    
2284 nigel 79 #if !defined NOUTF8
2285 nigel 49 if (*p == '{')
2286     {
2287     unsigned char *pt = p;
2288     c = 0;
2289     while (isxdigit(*(++pt)))
2290     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2291     if (*pt == '}')
2292     {
2293 nigel 67 unsigned char buff8[8];
2294 nigel 49 int ii, utn;
2295 ph10 355 if (use_utf8)
2296 ph10 358 {
2297 ph10 355 utn = ord2utf8(c, buff8);
2298     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2299     c = buff8[ii]; /* Last byte */
2300     }
2301     else
2302     {
2303 ph10 358 if (c > 255)
2304 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2305     "UTF-8 mode is not enabled.\n"
2306     "** Truncation will probably give the wrong result.\n", c);
2307 ph10 358 }
2308 nigel 49 p = pt + 1;
2309     break;
2310     }
2311     /* Not correct form; fall through */
2312     }
2313 nigel 79 #endif
2314 nigel 49
2315     /* Ordinary \x */
2316    
2317 nigel 3 c = 0;
2318     while (i++ < 2 && isxdigit(*p))
2319     {
2320     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2321     p++;
2322     }
2323     break;
2324    
2325 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2326 nigel 3 p--;
2327     continue;
2328    
2329 nigel 75 case '>':
2330 ph10 579 if (*p == '-')
2331 ph10 567 {
2332     start_offset_sign = -1;
2333     p++;
2334 ph10 579 }
2335 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2336 ph10 579 start_offset *= start_offset_sign;
2337 nigel 75 continue;
2338    
2339 nigel 3 case 'A': /* Option setting */
2340     options |= PCRE_ANCHORED;
2341     continue;
2342    
2343     case 'B':
2344     options |= PCRE_NOTBOL;
2345     continue;
2346    
2347 nigel 29 case 'C':
2348 nigel 63 if (isdigit(*p)) /* Set copy string */
2349     {
2350     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2351     copystrings |= 1 << n;
2352     }
2353     else if (isalnum(*p))
2354     {
2355 nigel 91 uschar *npp = copynamesptr;
2356 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2357 nigel 91 *npp++ = 0;
2358 nigel 67 *npp = 0;
2359 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2360 nigel 63 if (n < 0)
2361 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2362     copynamesptr = npp;
2363 nigel 63 }
2364     else if (*p == '+')
2365     {
2366     callout_extra = 1;
2367     p++;
2368     }
2369     else if (*p == '-')
2370     {
2371     pcre_callout = NULL;
2372     p++;
2373     }
2374     else if (*p == '!')
2375     {
2376     callout_fail_id = 0;
2377     p++;
2378     while(isdigit(*p))
2379     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2380     callout_fail_count = 0;
2381     if (*p == '!')
2382     {
2383     p++;
2384     while(isdigit(*p))
2385     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2386     }
2387     }
2388     else if (*p == '*')
2389     {
2390     int sign = 1;
2391     callout_data = 0;
2392     if (*(++p) == '-') { sign = -1; p++; }
2393     while(isdigit(*p))
2394     callout_data = callout_data * 10 + *p++ - '0';
2395     callout_data *= sign;
2396     callout_data_set = 1;
2397     }
2398 nigel 29 continue;
2399    
2400 nigel 79 #if !defined NODFA
2401 nigel 77 case 'D':
2402 nigel 79 #if !defined NOPOSIX
2403 nigel 77 if (posix || do_posix)
2404     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2405     else
2406 nigel 79 #endif
2407 nigel 77 use_dfa = 1;
2408     continue;
2409 ph10 553 #endif
2410 nigel 77
2411 ph10 553 #if !defined NODFA
2412 nigel 77 case 'F':
2413     options |= PCRE_DFA_SHORTEST;
2414     continue;
2415 nigel 79 #endif
2416 nigel 77
2417 nigel 29 case 'G':
2418 nigel 63 if (isdigit(*p))
2419     {
2420     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2421     getstrings |= 1 << n;
2422     }
2423     else if (isalnum(*p))
2424     {
2425 nigel 91 uschar *npp = getnamesptr;
2426 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2427 nigel 91 *npp++ = 0;
2428 nigel 67 *npp = 0;
2429 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2430 nigel 63 if (n < 0)
2431 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2432     getnamesptr = npp;
2433 nigel 63 }
2434 nigel 29 continue;
2435    
2436     case 'L':
2437     getlist = 1;
2438     continue;
2439    
2440 nigel 63 case 'M':
2441     find_match_limit = 1;
2442     continue;
2443    
2444 nigel 37 case 'N':
2445 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2446     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2447 ph10 461 else
2448 ph10 442 options |= PCRE_NOTEMPTY;
2449 nigel 37 continue;
2450    
2451 nigel 3 case 'O':
2452     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2453 nigel 53 if (n > size_offsets_max)
2454     {
2455     size_offsets_max = n;
2456 nigel 57 free(offsets);
2457 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2458 nigel 53 if (offsets == NULL)
2459     {
2460     printf("** Failed to get %d bytes of memory for offsets vector\n",
2461 ph10 151 (int)(size_offsets_max * sizeof(int)));
2462 nigel 77 yield = 1;
2463     goto EXIT;
2464 nigel 53 }
2465     }
2466     use_size_offsets = n;
2467 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2468 nigel 3 continue;
2469    
2470 nigel 75 case 'P':
2471 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2472 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2473 nigel 75 continue;
2474    
2475 nigel 91 case 'Q':
2476     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2477     if (extra == NULL)
2478     {
2479     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2480     extra->flags = 0;
2481     }
2482     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2483     extra->match_limit_recursion = n;
2484     continue;
2485    
2486     case 'q':
2487     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2488     if (extra == NULL)
2489     {
2490     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2491     extra->flags = 0;
2492     }
2493     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2494     extra->match_limit = n;
2495     continue;
2496    
2497 nigel 79 #if !defined NODFA
2498 nigel 77 case 'R':
2499     options |= PCRE_DFA_RESTART;
2500     continue;
2501 nigel 79 #endif
2502 nigel 77
2503 nigel 73 case 'S':
2504     show_malloc = 1;
2505     continue;
2506 ph10 392
2507 ph10 389 case 'Y':
2508     options |= PCRE_NO_START_OPTIMIZE;
2509 ph10 392 continue;
2510 nigel 73
2511 nigel 3 case 'Z':
2512     options |= PCRE_NOTEOL;
2513     continue;
2514 nigel 71
2515     case '?':
2516     options |= PCRE_NO_UTF8_CHECK;
2517     continue;
2518 nigel 91
2519     case '<':
2520     {
2521     int x = check_newline(p, outfile);
2522     if (x == 0) goto NEXT_DATA;
2523     options |= x;
2524     while (*p++ != '>');
2525     }
2526     continue;
2527 nigel 3 }
2528 nigel 9 *q++ = c;
2529 nigel 3 }
2530 nigel 9 *q = 0;
2531 ph10 530 len = (int)(q - dbuffer);
2532 ph10 545
2533 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2534 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2535 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2536 ph10 371
2537 ph10 363 #if !defined NOPOSIX
2538     if (posix || do_posix)
2539     {
2540     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2541 ph10 371 bptr += buffer_size - len - 1;
2542 ph10 363 }
2543 ph10 371 else
2544     #endif
2545 ph10 363 {
2546     memmove(bptr + buffer_size - len, bptr, len);
2547 ph10 371 bptr += buffer_size - len;
2548     }
2549 nigel 3
2550 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2551     {
2552     printf("**Match limit not relevant for DFA matching: ignored\n");
2553     find_match_limit = 0;
2554     }
2555    
2556 nigel 3 /* Handle matching via the POSIX interface, which does not
2557 nigel 63 support timing or playing with the match limit or callout data. */
2558 nigel 3
2559 nigel 37 #if !defined NOPOSIX
2560 nigel 3 if (posix || do_posix)
2561     {
2562     int rc;
2563     int eflags = 0;
2564 nigel 63 regmatch_t *pmatch = NULL;
2565     if (use_size_offsets > 0)
2566 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2567 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2568     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2569 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2570 nigel 3
2571 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2572 nigel 3
2573     if (rc != 0)
2574     {
2575 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2576 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2577     }
2578 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2579     != 0)
2580     {
2581     fprintf(outfile, "Matched with REG_NOSUB\n");
2582     }
2583 nigel 3 else
2584     {
2585 nigel 7 size_t i;
2586 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2587 nigel 3 {
2588     if (pmatch[i].rm_so >= 0)
2589     {
2590 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2591 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2592     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2593 nigel 3 fprintf(outfile, "\n");
2594 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2595 nigel 35 {
2596 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2597 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2598     outfile);
2599 nigel 35 fprintf(outfile, "\n");
2600     }
2601 nigel 3 }
2602     }
2603     }
2604 nigel 53 free(pmatch);
2605 nigel 3 }
2606    
2607 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2608 nigel 3
2609 nigel 37 else
2610     #endif /* !defined NOPOSIX */
2611    
2612 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2613 nigel 3 {
2614 ph10 512 markptr = NULL;
2615    
2616 nigel 93 if (timeitm > 0)
2617 nigel 3 {
2618     register int i;
2619     clock_t time_taken;
2620     clock_t start_time = clock();
2621 nigel 77
2622 nigel 79 #if !defined NODFA
2623 nigel 77 if (all_use_dfa || use_dfa)
2624     {
2625     int workspace[1000];
2626 nigel 93 for (i = 0; i < timeitm; i++)
2627 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2628 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2629     sizeof(workspace)/sizeof(int));
2630     }
2631     else
2632 nigel 79 #endif
2633 nigel 77
2634 nigel 93 for (i = 0; i < timeitm; i++)
2635 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2636 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2637 nigel 77
2638 nigel 3 time_taken = clock() - start_time;
2639 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2640     (((double)time_taken * 1000.0) / (double)timeitm) /
2641 nigel 63 (double)CLOCKS_PER_SEC);
2642 nigel 3 }
2643    
2644 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2645 nigel 87 varying limits in order to find the minimum value for the match limit and
2646     for the recursion limit. */
2647 nigel 63
2648     if (find_match_limit)
2649     {
2650     if (extra == NULL)
2651     {
2652 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2653 nigel 63 extra->flags = 0;
2654     }
2655    
2656 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2657 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2658     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2659     PCRE_ERROR_MATCHLIMIT, "match()");
2660 nigel 63
2661 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2662     options|g_notempty, use_offsets, use_size_offsets,
2663     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2664     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2665 nigel 63 }
2666    
2667     /* If callout_data is set, use the interface with additional data */
2668    
2669     else if (callout_data_set)
2670     {
2671     if (extra == NULL)
2672     {
2673 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2674 nigel 63 extra->flags = 0;
2675     }
2676     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2677 nigel 71 extra->callout_data = &callout_data;
2678 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2679     options | g_notempty, use_offsets, use_size_offsets);
2680     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2681     }
2682    
2683     /* The normal case is just to do the match once, with the default
2684     value of match_limit. */
2685    
2686 nigel 79 #if !defined NODFA
2687 nigel 77 else if (all_use_dfa || use_dfa)
2688     {
2689     int workspace[1000];
2690 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2691 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2692     sizeof(workspace)/sizeof(int));
2693     if (count == 0)
2694     {
2695     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2696     count = use_size_offsets/2;
2697     }
2698     }
2699 nigel 79 #endif
2700 nigel 77
2701 nigel 75 else
2702     {
2703     count = pcre_exec(re, extra, (char *)bptr, len,
2704     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2705 nigel 77 if (count == 0)
2706     {
2707     fprintf(outfile, "Matched, but too many substrings\n");
2708     count = use_size_offsets/3;
2709     }
2710 nigel 75 }
2711 nigel 3
2712 nigel 39 /* Matched */
2713    
2714 nigel 3 if (count >= 0)
2715     {
2716 nigel 93 int i, maxcount;
2717    
2718     #if !defined NODFA
2719     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2720     #endif
2721     maxcount = use_size_offsets/3;
2722    
2723     /* This is a check against a lunatic return value. */
2724    
2725     if (count > maxcount)
2726     {
2727     fprintf(outfile,
2728     "** PCRE error: returned count %d is too big for offset size %d\n",
2729     count, use_size_offsets);
2730     count = use_size_offsets/3;
2731     if (do_g || do_G)
2732     {
2733     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2734     do_g = do_G = FALSE; /* Break g/G loop */
2735     }
2736     }
2737    
2738 nigel 29 for (i = 0; i < count * 2; i += 2)
2739 nigel 3 {
2740 nigel 57 if (use_offsets[i] < 0)
2741 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2742     else
2743     {
2744     fprintf(outfile, "%2d: ", i/2);
2745 nigel 63 (void)pchars(bptr + use_offsets[i],
2746     use_offsets[i+1] - use_offsets[i], outfile);
2747 nigel 3 fprintf(outfile, "\n");
2748 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2749 nigel 35 {
2750 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2751     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2752     outfile);
2753     fprintf(outfile, "\n");
2754 nigel 35 }
2755 nigel 3 }
2756     }
2757 ph10 512
2758 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2759 nigel 29
2760     for (i = 0; i < 32; i++)
2761     {
2762     if ((copystrings & (1 << i)) != 0)
2763     {
2764 nigel 91 char copybuffer[256];
2765 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2766 nigel 37 i, copybuffer, sizeof(copybuffer));
2767 nigel 29 if (rc < 0)
2768     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2769     else
2770 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2771 nigel 29 }
2772     }
2773    
2774 nigel 91 for (copynamesptr = copynames;
2775     *copynamesptr != 0;
2776     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2777     {
2778     char copybuffer[256];
2779     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2780     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2781     if (rc < 0)
2782     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2783     else
2784     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2785     }
2786    
2787 nigel 29 for (i = 0; i < 32; i++)
2788     {
2789     if ((getstrings & (1 << i)) != 0)
2790     {
2791     const char *substring;
2792 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2793 nigel 29 i, &substring);
2794     if (rc < 0)
2795     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2796     else
2797     {
2798     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2799 nigel 49 pcre_free_substring(substring);
2800 nigel 29 }
2801     }
2802     }
2803    
2804 nigel 91 for (getnamesptr = getnames;
2805     *getnamesptr != 0;
2806     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2807     {
2808     const char *substring;
2809     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2810     count, (char *)getnamesptr, &substring);
2811     if (rc < 0)
2812     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2813     else
2814     {
2815     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2816     pcre_free_substring(substring);
2817     }
2818     }
2819    
2820 nigel 29 if (getlist)
2821     {
2822     const char **stringlist;
2823 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2824 nigel 29 &stringlist);
2825     if (rc < 0)
2826     fprintf(outfile, "get substring list failed %d\n", rc);
2827     else
2828     {
2829     for (i = 0; i < count; i++)
2830     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2831     if (stringlist[i] != NULL)
2832     fprintf(outfile, "string list not terminated by NULL\n");
2833 nigel 49 /* free((void *)stringlist); */
2834     pcre_free_substring_list(stringlist);
2835 nigel 29 }
2836     }
2837 nigel 39 }
2838 nigel 29
2839 nigel 75 /* There was a partial match */
2840    
2841     else if (count == PCRE_ERROR_PARTIAL)
2842     {
2843 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2844     else fprintf(outfile, "Partial match, mark=%s", markptr);
2845 ph10 426 if (use_size_offsets > 1)
2846     {
2847     fprintf(outfile, ": ");
2848     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2849 ph10 461 outfile);
2850     }
2851 nigel 77 fprintf(outfile, "\n");
2852 nigel 75 break; /* Out of the /g loop */
2853     }
2854    
2855 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2856 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2857     to advance the start offset, and continue. We won't be at the end of the
2858     string - that was checked before setting g_notempty.
2859 nigel 39
2860 ph10 566 Complication arises in the case when the newline convention is "any",
2861 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2862     terminated by CRLF, an advance of one character just passes the \r,
2863 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2864 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2865     newline setting in the pattern; if none was set, use pcre_config() to
2866 ph10 566 find the default.
2867 ph10 144
2868 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2869     character, not one byte. */
2870    
2871 nigel 3 else
2872     {
2873 nigel 41 if (g_notempty != 0)
2874 nigel 35 {
2875 nigel 73 int onechar = 1;
2876 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2877 nigel 57 use_offsets[0] = start_offset;
2878 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2879     {
2880     int d;
2881     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2882 ph10 391 /* Note that these values are always the ASCII ones, even in
2883     EBCDIC environments. CR = 13, NL = 10. */
2884     obits = (d == 13)? PCRE_NEWLINE_CR :
2885     (d == 10)? PCRE_NEWLINE_LF :
2886     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2887 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2888 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2889     }
2890 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2891 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2892 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2893 ph10 149 &&
2894 ph10 143 start_offset < len - 1 &&
2895     bptr[start_offset] == '\r' &&
2896     bptr[start_offset+1] == '\n')
2897 ph10 144 onechar++;
2898 ph10 143 else if (use_utf8)
2899 nigel 73 {
2900     while (start_offset + onechar < len)
2901     {
2902 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2903 ph10 579 onechar++;
2904 nigel 73 }
2905     }
2906     use_offsets[1] = start_offset + onechar;
2907 nigel 35 }
2908 nigel 41 else
2909     {
2910 ph10 598 switch(count)
2911     {
2912     case PCRE_ERROR_NOMATCH:
2913 ph10 512 if (gmatched == 0)
2914 ph10 510 {
2915     if (markptr == NULL) fprintf(outfile, "No match\n");
2916     else fprintf(outfile, "No match, mark = %s\n", markptr);
2917 ph10 512 }
2918 ph10 598 break;
2919    
2920     case PCRE_ERROR_BADUTF8:
2921     case PCRE_ERROR_SHORTUTF8:
2922     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2923     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2924     if (use_size_offsets >= 2)
2925     fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2926     use_offsets[1]);
2927     fprintf(outfile, "\n");
2928     break;
2929    
2930     default:
2931 ph10 604 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2932     fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2933     else
2934     fprintf(outfile, "Error %d (Unexpected value)\n", count);
2935 ph10 598 break;
2936 nigel 41 }
2937 ph10 598
2938 nigel 41 break; /* Out of the /g loop */
2939     }
2940 nigel 3 }
2941 nigel 35
2942 nigel 39 /* If not /g or /G we are done */
2943    
2944     if (!do_g && !do_G) break;
2945    
2946 nigel 41 /* If we have matched an empty string, first check to see if we are at
2947 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2948     Perl's /g options does. This turns out to be rather cunning. First we set
2949     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2950 nigel 47 same point. If this fails (picked up above) we advance to the next
2951 ph10 143 character. */
2952 ph10 142
2953 nigel 41 g_notempty = 0;
2954 ph10 142
2955 nigel 57 if (use_offsets[0] == use_offsets[1])
2956 nigel 41 {
2957 nigel 57 if (use_offsets[0] == len) break;
2958 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2959 nigel 41 }
2960 nigel 39
2961     /* For /g, update the start offset, leaving the rest alone */
2962    
2963 ph10 143 if (do_g) start_offset = use_offsets[1];
2964 nigel 39
2965     /* For /G, update the pointer and length */
2966    
2967     else
2968 nigel 35 {
2969 ph10 143 bptr += use_offsets[1];
2970     len -= use_offsets[1];
2971 nigel 35 }
2972 nigel 39 } /* End of loop for /g and /G */
2973 nigel 91
2974     NEXT_DATA: continue;
2975 nigel 39 } /* End of loop for data lines */
2976 nigel 3
2977 nigel 11 CONTINUE:
2978 nigel 37
2979     #if !defined NOPOSIX
2980 nigel 3 if (posix || do_posix) regfree(&preg);
2981 nigel 37 #endif
2982    
2983 nigel 77 if (re != NULL) new_free(re);
2984     if (extra != NULL) new_free(extra);
2985 ph10 541 if (locale_set)
2986 nigel 25 {
2987 nigel 77 new_free((void *)tables);
2988 nigel 25 setlocale(LC_CTYPE, "C");
2989 nigel 93 locale_set = 0;
2990 nigel 25 }
2991 nigel 3 }
2992    
2993 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2994 nigel 77
2995     EXIT:
2996    
2997     if (infile != NULL && infile != stdin) fclose(infile);
2998     if (outfile != NULL && outfile != stdout) fclose(outfile);
2999    
3000     free(buffer);
3001     free(dbuffer);
3002     free(pbuffer);
3003     free(offsets);
3004    
3005     return yield;
3006 nigel 3 }
3007    
3008 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12