/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 598 - (hide annotations) (download)
Sat May 7 15:37:31 2011 UTC (3 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 89264 byte(s)
Pass back detailed info when UTF-8 check fails at runtime.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 nigel 85 #define _pcre_utf8_table1 utf8_table1
116     #define _pcre_utf8_table1_size utf8_table1_size
117     #define _pcre_utf8_table2 utf8_table2
118     #define _pcre_utf8_table3 utf8_table3
119     #define _pcre_utf8_table4 utf8_table4
120     #define _pcre_utt utt
121     #define _pcre_utt_size utt_size
122 ph10 240 #define _pcre_utt_names utt_names
123 nigel 85 #define _pcre_OP_lengths OP_lengths
124    
125     #include "pcre_tables.c"
126    
127     /* We also need the pcre_printint() function for printing out compiled
128     patterns. This function is in a separate file so that it can be included in
129 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 ph10 498 know which case is being compiled. */
131 nigel 85
132 ph10 498 #define COMPILING_PCRETEST
133     #include "pcre_printint.src"
134    
135     /* The definition of the macro PRINTABLE, which determines whether to print an
136 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
137 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
138     locale has not been explicitly changed, so as to get consistent output from
139     systems that differ in their output from isprint() even in the "C" locale. */
140 nigel 93
141     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142 nigel 85
143 nigel 37 /* It is possible to compile this test program without including support for
144     testing the POSIX interface, though this is not available via the standard
145     Makefile. */
146    
147     #if !defined NOPOSIX
148 nigel 3 #include "pcreposix.h"
149 nigel 37 #endif
150 nigel 3
151 ph10 107 /* It is also possible, for the benefit of the version currently imported into
152     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153     interface to the DFA matcher (NODFA), and without the doublecheck of the old
154     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155     UTF8 support if PCRE is built without it. */
156 nigel 79
157 ph10 107 #ifndef SUPPORT_UTF8
158     #ifndef NOUTF8
159     #define NOUTF8
160     #endif
161     #endif
162 nigel 79
163 ph10 107
164 nigel 85 /* Other parameters */
165    
166 nigel 3 #ifndef CLOCKS_PER_SEC
167     #ifdef CLK_TCK
168     #define CLOCKS_PER_SEC CLK_TCK
169     #else
170     #define CLOCKS_PER_SEC 100
171     #endif
172     #endif
173    
174 nigel 93 /* This is the default loop count for timing. */
175    
176 nigel 75 #define LOOPREPEAT 500000
177 nigel 3
178 nigel 85 /* Static variables */
179    
180 nigel 3 static FILE *outfile;
181     static int log_store = 0;
182 nigel 63 static int callout_count;
183     static int callout_extra;
184     static int callout_fail_count;
185     static int callout_fail_id;
186 ph10 210 static int debug_lengths;
187 nigel 63 static int first_callout;
188 nigel 93 static int locale_set = 0;
189 nigel 73 static int show_malloc;
190 nigel 67 static int use_utf8;
191 nigel 43 static size_t gotten_store;
192 nigel 3
193 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
194    
195     static int buffer_size = 50000;
196     static uschar *buffer = NULL;
197     static uschar *dbuffer = NULL;
198 nigel 75 static uschar *pbuffer = NULL;
199 nigel 3
200 ph10 598 /* Textual explanations for runtime error codes */
201 nigel 75
202 ph10 598 static const char *errtexts[] = {
203     NULL, /* 0 is no error */
204     NULL, /* NOMATCH is handled specially */
205     "NULL argument passed",
206     "bad option value",
207     "magic number missing",
208     "unknown opcode - pattern overwritten?",
209     "no more memory",
210     NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211     "match limit exceeded",
212     "callout error code",
213     NULL, /* BADUTF8 is handled specially */
214     "bad UTF-8 offset",
215     NULL, /* PARTIAL is handled specially */
216     "not used - internal error",
217     "internal error - pattern overwritten?",
218     "bad count value",
219     "item unsupported for DFA matching",
220     "backreference condition or recursion test not supported for DFA matching",
221     "match limit not supported for DFA matching",
222     "workspace size exceeded in DFA matching",
223     "too much recursion for DFA matching",
224     "recursion limit exceeded",
225     "not used - internal error",
226     "invalid combination of newline options",
227     "bad offset value",
228     NULL /* SHORTUTF8 is handled specially */
229     };
230    
231    
232 ph10 541 /*************************************************
233     * Alternate character tables *
234     *************************************************/
235 nigel 49
236 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237     using the default tables of the library. However, the T option can be used to
238     select alternate sets of tables, for different kinds of testing. Note also that
239 ph10 541 the L (locale) option also adjusts the tables. */
240    
241 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
242 ph10 541 only ASCII characters. */
243    
244     static const unsigned char tables0[] = {
245    
246     /* This table is a lower casing table. */
247    
248     0, 1, 2, 3, 4, 5, 6, 7,
249     8, 9, 10, 11, 12, 13, 14, 15,
250     16, 17, 18, 19, 20, 21, 22, 23,
251     24, 25, 26, 27, 28, 29, 30, 31,
252     32, 33, 34, 35, 36, 37, 38, 39,
253     40, 41, 42, 43, 44, 45, 46, 47,
254     48, 49, 50, 51, 52, 53, 54, 55,
255     56, 57, 58, 59, 60, 61, 62, 63,
256     64, 97, 98, 99,100,101,102,103,
257     104,105,106,107,108,109,110,111,
258     112,113,114,115,116,117,118,119,
259     120,121,122, 91, 92, 93, 94, 95,
260     96, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122,123,124,125,126,127,
264     128,129,130,131,132,133,134,135,
265     136,137,138,139,140,141,142,143,
266     144,145,146,147,148,149,150,151,
267     152,153,154,155,156,157,158,159,
268     160,161,162,163,164,165,166,167,
269     168,169,170,171,172,173,174,175,
270     176,177,178,179,180,181,182,183,
271     184,185,186,187,188,189,190,191,
272     192,193,194,195,196,197,198,199,
273     200,201,202,203,204,205,206,207,
274     208,209,210,211,212,213,214,215,
275     216,217,218,219,220,221,222,223,
276     224,225,226,227,228,229,230,231,
277     232,233,234,235,236,237,238,239,
278     240,241,242,243,244,245,246,247,
279     248,249,250,251,252,253,254,255,
280    
281     /* This table is a case flipping table. */
282    
283     0, 1, 2, 3, 4, 5, 6, 7,
284     8, 9, 10, 11, 12, 13, 14, 15,
285     16, 17, 18, 19, 20, 21, 22, 23,
286     24, 25, 26, 27, 28, 29, 30, 31,
287     32, 33, 34, 35, 36, 37, 38, 39,
288     40, 41, 42, 43, 44, 45, 46, 47,
289     48, 49, 50, 51, 52, 53, 54, 55,
290     56, 57, 58, 59, 60, 61, 62, 63,
291     64, 97, 98, 99,100,101,102,103,
292     104,105,106,107,108,109,110,111,
293     112,113,114,115,116,117,118,119,
294     120,121,122, 91, 92, 93, 94, 95,
295     96, 65, 66, 67, 68, 69, 70, 71,
296     72, 73, 74, 75, 76, 77, 78, 79,
297     80, 81, 82, 83, 84, 85, 86, 87,
298     88, 89, 90,123,124,125,126,127,
299     128,129,130,131,132,133,134,135,
300     136,137,138,139,140,141,142,143,
301     144,145,146,147,148,149,150,151,
302     152,153,154,155,156,157,158,159,
303     160,161,162,163,164,165,166,167,
304     168,169,170,171,172,173,174,175,
305     176,177,178,179,180,181,182,183,
306     184,185,186,187,188,189,190,191,
307     192,193,194,195,196,197,198,199,
308     200,201,202,203,204,205,206,207,
309     208,209,210,211,212,213,214,215,
310     216,217,218,219,220,221,222,223,
311     224,225,226,227,228,229,230,231,
312     232,233,234,235,236,237,238,239,
313     240,241,242,243,244,245,246,247,
314     248,249,250,251,252,253,254,255,
315    
316     /* This table contains bit maps for various character classes. Each map is 32
317     bytes long and the bits run from the least significant end of each byte. The
318     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319     graph, print, punct, and cntrl. Other classes are built from combinations. */
320    
321     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325    
326     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371     /* This table identifies various classes of character by individual bits:
372     0x01 white space character
373     0x02 letter
374     0x04 decimal digit
375     0x08 hexadecimal digit
376     0x10 alphanumeric or '_'
377     0x80 regular expression metacharacter or binary zero
378     */
379    
380     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412    
413 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
414     be at least an approximation of ISO 8859. In particular, there are characters
415 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
416    
417     static const unsigned char tables1[] = {
418     0,1,2,3,4,5,6,7,
419     8,9,10,11,12,13,14,15,
420     16,17,18,19,20,21,22,23,
421     24,25,26,27,28,29,30,31,
422     32,33,34,35,36,37,38,39,
423     40,41,42,43,44,45,46,47,
424     48,49,50,51,52,53,54,55,
425     56,57,58,59,60,61,62,63,
426     64,97,98,99,100,101,102,103,
427     104,105,106,107,108,109,110,111,
428     112,113,114,115,116,117,118,119,
429     120,121,122,91,92,93,94,95,
430     96,97,98,99,100,101,102,103,
431     104,105,106,107,108,109,110,111,
432     112,113,114,115,116,117,118,119,
433     120,121,122,123,124,125,126,127,
434     128,129,130,131,132,133,134,135,
435     136,137,138,139,140,141,142,143,
436     144,145,146,147,148,149,150,151,
437     152,153,154,155,156,157,158,159,
438     160,161,162,163,164,165,166,167,
439     168,169,170,171,172,173,174,175,
440     176,177,178,179,180,181,182,183,
441     184,185,186,187,188,189,190,191,
442     224,225,226,227,228,229,230,231,
443     232,233,234,235,236,237,238,239,
444     240,241,242,243,244,245,246,215,
445     248,249,250,251,252,253,254,223,
446     224,225,226,227,228,229,230,231,
447     232,233,234,235,236,237,238,239,
448     240,241,242,243,244,245,246,247,
449     248,249,250,251,252,253,254,255,
450     0,1,2,3,4,5,6,7,
451     8,9,10,11,12,13,14,15,
452     16,17,18,19,20,21,22,23,
453     24,25,26,27,28,29,30,31,
454     32,33,34,35,36,37,38,39,
455     40,41,42,43,44,45,46,47,
456     48,49,50,51,52,53,54,55,
457     56,57,58,59,60,61,62,63,
458     64,97,98,99,100,101,102,103,
459     104,105,106,107,108,109,110,111,
460     112,113,114,115,116,117,118,119,
461     120,121,122,91,92,93,94,95,
462     96,65,66,67,68,69,70,71,
463     72,73,74,75,76,77,78,79,
464     80,81,82,83,84,85,86,87,
465     88,89,90,123,124,125,126,127,
466     128,129,130,131,132,133,134,135,
467     136,137,138,139,140,141,142,143,
468     144,145,146,147,148,149,150,151,
469     152,153,154,155,156,157,158,159,
470     160,161,162,163,164,165,166,167,
471     168,169,170,171,172,173,174,175,
472     176,177,178,179,180,181,182,183,
473     184,185,186,187,188,189,190,191,
474     224,225,226,227,228,229,230,231,
475     232,233,234,235,236,237,238,239,
476     240,241,242,243,244,245,246,215,
477     248,249,250,251,252,253,254,223,
478     192,193,194,195,196,197,198,199,
479     200,201,202,203,204,205,206,207,
480     208,209,210,211,212,213,214,247,
481     216,217,218,219,220,221,222,255,
482     0,62,0,0,1,0,0,0,
483     0,0,0,0,0,0,0,0,
484     32,0,0,0,1,0,0,0,
485     0,0,0,0,0,0,0,0,
486     0,0,0,0,0,0,255,3,
487     126,0,0,0,126,0,0,0,
488     0,0,0,0,0,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,255,3,
491     0,0,0,0,0,0,0,0,
492     0,0,0,0,0,0,12,2,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     254,255,255,7,0,0,0,0,
496     0,0,0,0,0,0,0,0,
497     255,255,127,127,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     0,0,0,0,254,255,255,7,
500     0,0,0,0,0,4,32,4,
501     0,0,0,128,255,255,127,255,
502     0,0,0,0,0,0,255,3,
503     254,255,255,135,254,255,255,7,
504     0,0,0,0,0,4,44,6,
505     255,255,127,255,255,255,127,255,
506     0,0,0,0,254,255,255,255,
507     255,255,255,255,255,255,255,127,
508     0,0,0,0,254,255,255,255,
509     255,255,255,255,255,255,255,255,
510     0,2,0,0,255,255,255,255,
511     255,255,255,255,255,255,255,127,
512     0,0,0,0,255,255,255,255,
513     255,255,255,255,255,255,255,255,
514     0,0,0,0,254,255,0,252,
515     1,0,0,248,1,0,0,120,
516     0,0,0,0,254,255,255,255,
517     0,0,128,0,0,0,128,0,
518     255,255,255,255,0,0,0,0,
519     0,0,0,0,0,0,0,128,
520     255,255,255,255,0,0,0,0,
521     0,0,0,0,0,0,0,0,
522     128,0,0,0,0,0,0,0,
523     0,1,1,0,1,1,0,0,
524     0,0,0,0,0,0,0,0,
525     0,0,0,0,0,0,0,0,
526     1,0,0,0,128,0,0,0,
527     128,128,128,128,0,0,128,0,
528     28,28,28,28,28,28,28,28,
529     28,28,0,0,0,0,0,128,
530     0,26,26,26,26,26,26,18,
531     18,18,18,18,18,18,18,18,
532     18,18,18,18,18,18,18,18,
533     18,18,18,128,128,0,128,16,
534     0,26,26,26,26,26,26,18,
535     18,18,18,18,18,18,18,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,128,128,0,0,0,
538     0,0,0,0,0,1,0,0,
539     0,0,0,0,0,0,0,0,
540     0,0,0,0,0,0,0,0,
541     0,0,0,0,0,0,0,0,
542     1,0,0,0,0,0,0,0,
543     0,0,18,0,0,0,0,0,
544     0,0,20,20,0,18,0,0,
545     0,20,18,0,0,0,0,0,
546     18,18,18,18,18,18,18,18,
547     18,18,18,18,18,18,18,18,
548     18,18,18,18,18,18,18,0,
549     18,18,18,18,18,18,18,18,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,0,
553     18,18,18,18,18,18,18,18
554     };
555    
556    
557    
558 ph10 558
559     #ifndef HAVE_STRERROR
560 nigel 49 /*************************************************
561 ph10 558 * Provide strerror() for non-ANSI libraries *
562     *************************************************/
563    
564     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565     in their libraries, but can provide the same facility by this simple
566     alternative function. */
567    
568     extern int sys_nerr;
569     extern char *sys_errlist[];
570    
571     char *
572     strerror(int n)
573     {
574     if (n < 0 || n >= sys_nerr) return "unknown error number";
575     return sys_errlist[n];
576     }
577     #endif /* HAVE_STRERROR */
578    
579    
580    
581    
582     /*************************************************
583 nigel 91 * Read or extend an input line *
584     *************************************************/
585    
586     /* Input lines are read into buffer, but both patterns and data lines can be
587     continued over multiple input lines. In addition, if the buffer fills up, we
588     want to automatically expand it so as to be able to handle extremely large
589     lines that are needed for certain stress tests. When the input buffer is
590     expanded, the other two buffers must also be expanded likewise, and the
591     contents of pbuffer, which are a copy of the input for callouts, must be
592     preserved (for when expansion happens for a data line). This is not the most
593     optimal way of handling this, but hey, this is just a test program!
594    
595     Arguments:
596     f the file to read
597     start where in buffer to start (this *must* be within buffer)
598 ph10 287 prompt for stdin or readline()
599 nigel 91
600     Returns: pointer to the start of new data
601     could be a copy of start, or could be moved
602     NULL if no data read and EOF reached
603     */
604    
605     static uschar *
606 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 nigel 91 {
608     uschar *here = start;
609    
610     for (;;)
611     {
612 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
613 nigel 93
614 nigel 91 if (rlen > 1000)
615     {
616     int dlen;
617 ph10 289
618 ph10 287 /* If libreadline support is required, use readline() to read a line if the
619     input is a terminal. Note that readline() removes the trailing newline, so
620     we must put it back again, to be compatible with fgets(). */
621 ph10 289
622 ph10 287 #ifdef SUPPORT_LIBREADLINE
623     if (isatty(fileno(f)))
624     {
625 ph10 289 size_t len;
626 ph10 287 char *s = readline(prompt);
627     if (s == NULL) return (here == start)? NULL : start;
628     len = strlen(s);
629 ph10 289 if (len > 0) add_history(s);
630 ph10 287 if (len > rlen - 1) len = rlen - 1;
631     memcpy(here, s, len);
632     here[len] = '\n';
633 ph10 289 here[len+1] = 0;
634     free(s);
635 ph10 287 }
636 ph10 289 else
637     #endif
638    
639 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
640 ph10 289
641 ph10 287 {
642 ph10 516 if (f == stdin) printf("%s", prompt);
643 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
644     return (here == start)? NULL : start;
645 ph10 289 }
646    
647 nigel 91 dlen = (int)strlen((char *)here);
648     if (dlen > 0 && here[dlen - 1] == '\n') return start;
649     here += dlen;
650     }
651    
652     else
653     {
654     int new_buffer_size = 2*buffer_size;
655     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658    
659     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660     {
661     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662     exit(1);
663     }
664    
665     memcpy(new_buffer, buffer, buffer_size);
666     memcpy(new_pbuffer, pbuffer, buffer_size);
667    
668     buffer_size = new_buffer_size;
669    
670     start = new_buffer + (start - buffer);
671     here = new_buffer + (here - buffer);
672    
673     free(buffer);
674     free(dbuffer);
675     free(pbuffer);
676    
677     buffer = new_buffer;
678     dbuffer = new_dbuffer;
679     pbuffer = new_pbuffer;
680     }
681     }
682    
683     return NULL; /* Control never gets here */
684     }
685    
686    
687    
688    
689    
690    
691    
692     /*************************************************
693 nigel 63 * Read number from string *
694     *************************************************/
695    
696     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697     around with conditional compilation, just do the job by hand. It is only used
698 nigel 93 for unpicking arguments, so just keep it simple.
699 nigel 63
700     Arguments:
701     str string to be converted
702     endptr where to put the end pointer
703    
704     Returns: the unsigned long
705     */
706    
707     static int
708     get_value(unsigned char *str, unsigned char **endptr)
709     {
710     int result = 0;
711     while(*str != 0 && isspace(*str)) str++;
712     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713     *endptr = str;
714     return(result);
715     }
716    
717    
718    
719 nigel 49
720     /*************************************************
721     * Convert UTF-8 string to value *
722     *************************************************/
723    
724     /* This function takes one or more bytes that represents a UTF-8 character,
725     and returns the value of the character.
726    
727     Argument:
728 nigel 91 utf8bytes a pointer to the byte vector
729     vptr a pointer to an int to receive the value
730 nigel 49
731 nigel 91 Returns: > 0 => the number of bytes consumed
732     -6 to 0 => malformed UTF-8 character at offset = (-return)
733 nigel 49 */
734    
735 nigel 79 #if !defined NOUTF8
736    
737 nigel 67 static int
738 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
739 nigel 49 {
740 nigel 91 int c = *utf8bytes++;
741 nigel 49 int d = c;
742     int i, j, s;
743    
744     for (i = -1; i < 6; i++) /* i is number of additional bytes */
745     {
746     if ((d & 0x80) == 0) break;
747     d <<= 1;
748     }
749    
750     if (i == -1) { *vptr = c; return 1; } /* ascii character */
751     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752    
753     /* i now has a value in the range 1-5 */
754    
755 nigel 59 s = 6*i;
756 nigel 85 d = (c & utf8_table3[i]) << s;
757 nigel 49
758     for (j = 0; j < i; j++)
759     {
760 nigel 91 c = *utf8bytes++;
761 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
762 nigel 59 s -= 6;
763 nigel 49 d |= (c & 0x3f) << s;
764     }
765    
766     /* Check that encoding was the correct unique one */
767    
768 nigel 85 for (j = 0; j < utf8_table1_size; j++)
769     if (d <= utf8_table1[j]) break;
770 nigel 49 if (j != i) return -(i+1);
771    
772     /* Valid value */
773    
774     *vptr = d;
775     return i+1;
776     }
777    
778 nigel 79 #endif
779 nigel 49
780    
781 nigel 79
782 nigel 63 /*************************************************
783 nigel 85 * Convert character value to UTF-8 *
784     *************************************************/
785    
786     /* This function takes an integer value in the range 0 - 0x7fffffff
787     and encodes it as a UTF-8 character in 0 to 6 bytes.
788    
789     Arguments:
790     cvalue the character value
791 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
792 nigel 85
793     Returns: number of characters placed in the buffer
794     */
795    
796 nigel 93 #if !defined NOUTF8
797    
798 nigel 85 static int
799 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
800 nigel 85 {
801     register int i, j;
802     for (i = 0; i < utf8_table1_size; i++)
803     if (cvalue <= utf8_table1[i]) break;
804 nigel 91 utf8bytes += i;
805 nigel 85 for (j = i; j > 0; j--)
806     {
807 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 nigel 85 cvalue >>= 6;
809     }
810 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
811 nigel 85 return i + 1;
812     }
813    
814 nigel 93 #endif
815 nigel 85
816    
817 nigel 93
818 nigel 85 /*************************************************
819 nigel 63 * Print character string *
820     *************************************************/
821 nigel 49
822 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
823     mode. Yields number of characters printed. If handed a NULL file, just counts
824     chars without printing. */
825 nigel 49
826 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
827 nigel 3 {
828 nigel 85 int c = 0;
829 nigel 63 int yield = 0;
830 nigel 3
831 nigel 63 while (length-- > 0)
832 nigel 3 {
833 nigel 79 #if !defined NOUTF8
834 nigel 67 if (use_utf8)
835 nigel 63 {
836     int rc = utf82ord(p, &c);
837 nigel 3
838 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839     {
840     length -= rc - 1;
841     p += rc;
842 nigel 93 if (PRINTHEX(c))
843 nigel 63 {
844     if (f != NULL) fprintf(f, "%c", c);
845     yield++;
846     }
847     else
848     {
849 nigel 93 int n = 4;
850     if (f != NULL) fprintf(f, "\\x{%02x}", c);
851     yield += (n <= 0x000000ff)? 2 :
852     (n <= 0x00000fff)? 3 :
853     (n <= 0x0000ffff)? 4 :
854     (n <= 0x000fffff)? 5 : 6;
855 nigel 63 }
856     continue;
857     }
858     }
859 nigel 79 #endif
860 nigel 3
861 nigel 63 /* Not UTF-8, or malformed UTF-8 */
862    
863 nigel 93 c = *p++;
864     if (PRINTHEX(c))
865 nigel 3 {
866 nigel 63 if (f != NULL) fprintf(f, "%c", c);
867     yield++;
868 nigel 3 }
869 nigel 63 else
870 nigel 3 {
871 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
872     yield += 4;
873     }
874     }
875 nigel 3
876 nigel 63 return yield;
877     }
878 nigel 23
879 nigel 3
880 nigel 23
881 nigel 63 /*************************************************
882     * Callout function *
883     *************************************************/
884 nigel 3
885 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886     the match. Yield zero unless more callouts than the fail count, or the callout
887     data is not zero. */
888 nigel 3
889 nigel 63 static int callout(pcre_callout_block *cb)
890     {
891     FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 nigel 75 int i, pre_start, post_start, subject_length;
893 nigel 3
894 nigel 63 if (callout_extra)
895     {
896     fprintf(f, "Callout %d: last capture = %d\n",
897     cb->callout_number, cb->capture_last);
898 nigel 3
899 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
900     {
901     if (cb->offset_vector[i] < 0)
902     fprintf(f, "%2d: <unset>\n", i/2);
903     else
904     {
905     fprintf(f, "%2d: ", i/2);
906     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907     cb->offset_vector[i+1] - cb->offset_vector[i], f);
908     fprintf(f, "\n");
909     }
910     }
911     }
912 nigel 3
913 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
914     datails. On subsequent calls in the same match, we use pchars just to find the
915     printed lengths of the substrings. */
916 nigel 3
917 nigel 63 if (f != NULL) fprintf(f, "--->");
918 nigel 3
919 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921     cb->current_position - cb->start_match, f);
922 nigel 3
923 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924    
925 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926     cb->subject_length - cb->current_position, f);
927 nigel 3
928 nigel 63 if (f != NULL) fprintf(f, "\n");
929 nigel 9
930 nigel 63 /* Always print appropriate indicators, with callout number if not already
931 nigel 75 shown. For automatic callouts, show the pattern offset. */
932 nigel 3
933 nigel 75 if (cb->callout_number == 255)
934     {
935     fprintf(outfile, "%+3d ", cb->pattern_position);
936     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937     }
938     else
939     {
940     if (callout_extra) fprintf(outfile, " ");
941     else fprintf(outfile, "%3d ", cb->callout_number);
942     }
943 nigel 3
944 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945     fprintf(outfile, "^");
946 nigel 3
947 nigel 63 if (post_start > 0)
948     {
949     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950     fprintf(outfile, "^");
951 nigel 3 }
952    
953 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954     fprintf(outfile, " ");
955    
956     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957     pbuffer + cb->pattern_position);
958    
959 nigel 63 fprintf(outfile, "\n");
960     first_callout = 0;
961 nigel 3
962 nigel 71 if (cb->callout_data != NULL)
963 nigel 49 {
964 nigel 71 int callout_data = *((int *)(cb->callout_data));
965     if (callout_data != 0)
966     {
967     fprintf(outfile, "Callout data = %d\n", callout_data);
968     return callout_data;
969     }
970 nigel 63 }
971 nigel 49
972 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
973     (++callout_count >= callout_fail_count)? 1 : 0;
974 nigel 3 }
975    
976    
977 nigel 63 /*************************************************
978 nigel 73 * Local malloc functions *
979 nigel 63 *************************************************/
980 nigel 3
981     /* Alternative malloc function, to test functionality and show the size of the
982     compiled re. */
983    
984     static void *new_malloc(size_t size)
985     {
986 nigel 73 void *block = malloc(size);
987 nigel 43 gotten_store = size;
988 nigel 73 if (show_malloc)
989 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 nigel 73 return block;
991 nigel 3 }
992    
993 nigel 73 static void new_free(void *block)
994     {
995     if (show_malloc)
996     fprintf(outfile, "free %p\n", block);
997     free(block);
998     }
999 nigel 3
1000    
1001 nigel 73 /* For recursion malloc/free, to test stacking calls */
1002    
1003     static void *stack_malloc(size_t size)
1004     {
1005     void *block = malloc(size);
1006     if (show_malloc)
1007 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 nigel 73 return block;
1009     }
1010    
1011     static void stack_free(void *block)
1012     {
1013     if (show_malloc)
1014     fprintf(outfile, "stack_free %p\n", block);
1015     free(block);
1016     }
1017    
1018    
1019 nigel 63 /*************************************************
1020     * Call pcre_fullinfo() *
1021     *************************************************/
1022 nigel 43
1023     /* Get one piece of information from the pcre_fullinfo() function */
1024    
1025     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026     {
1027     int rc;
1028     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030     }
1031    
1032    
1033    
1034 nigel 63 /*************************************************
1035 nigel 75 * Byte flipping function *
1036     *************************************************/
1037    
1038 nigel 91 static unsigned long int
1039     byteflip(unsigned long int value, int n)
1040 nigel 75 {
1041     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042     return ((value & 0x000000ff) << 24) |
1043     ((value & 0x0000ff00) << 8) |
1044     ((value & 0x00ff0000) >> 8) |
1045     ((value & 0xff000000) >> 24);
1046     }
1047    
1048    
1049    
1050    
1051     /*************************************************
1052 nigel 87 * Check match or recursion limit *
1053     *************************************************/
1054    
1055     static int
1056     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057     int start_offset, int options, int *use_offsets, int use_size_offsets,
1058     int flag, unsigned long int *limit, int errnumber, const char *msg)
1059     {
1060     int count;
1061     int min = 0;
1062     int mid = 64;
1063     int max = -1;
1064    
1065     extra->flags |= flag;
1066    
1067     for (;;)
1068     {
1069     *limit = mid;
1070    
1071     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072     use_offsets, use_size_offsets);
1073    
1074     if (count == errnumber)
1075     {
1076     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077     min = mid;
1078     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079     }
1080    
1081     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082     count == PCRE_ERROR_PARTIAL)
1083     {
1084     if (mid == min + 1)
1085     {
1086     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087     break;
1088     }
1089     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090     max = mid;
1091     mid = (min + mid)/2;
1092     }
1093     else break; /* Some other error */
1094     }
1095    
1096     extra->flags &= ~flag;
1097     return count;
1098     }
1099    
1100    
1101    
1102     /*************************************************
1103 ph10 227 * Case-independent strncmp() function *
1104     *************************************************/
1105    
1106     /*
1107     Arguments:
1108     s first string
1109     t second string
1110     n number of characters to compare
1111    
1112     Returns: < 0, = 0, or > 0, according to the comparison
1113     */
1114    
1115     static int
1116     strncmpic(uschar *s, uschar *t, int n)
1117     {
1118     while (n--)
1119     {
1120     int c = tolower(*s++) - tolower(*t++);
1121     if (c) return c;
1122     }
1123     return 0;
1124     }
1125    
1126    
1127    
1128     /*************************************************
1129 nigel 91 * Check newline indicator *
1130     *************************************************/
1131    
1132 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133     a message and return 0 if there is no match.
1134 nigel 91
1135     Arguments:
1136     p points after the leading '<'
1137     f file for error message
1138    
1139     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140     */
1141    
1142     static int
1143     check_newline(uschar *p, FILE *f)
1144     {
1145 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1153     return 0;
1154     }
1155    
1156    
1157    
1158     /*************************************************
1159 nigel 93 * Usage function *
1160     *************************************************/
1161    
1162     static void
1163     usage(void)
1164     {
1165 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166     printf("Input and output default to stdin and stdout.\n");
1167     #ifdef SUPPORT_LIBREADLINE
1168     printf("If input is a terminal, readline() is used to read from it.\n");
1169     #else
1170     printf("This version of pcretest is not linked with readline().\n");
1171     #endif
1172     printf("\nOptions:\n");
1173 nigel 93 printf(" -b show compiled code (bytecode)\n");
1174     printf(" -C show PCRE compile-time options and exit\n");
1175     printf(" -d debug: show compiled code and information (-b and -i)\n");
1176     #if !defined NODFA
1177     printf(" -dfa force DFA matching for all subjects\n");
1178     #endif
1179     printf(" -help show usage information\n");
1180     printf(" -i show information about compiled patterns\n"
1181 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1182 nigel 93 " -m output memory used information\n"
1183     " -o <n> set size of offsets vector to <n>\n");
1184     #if !defined NOPOSIX
1185     printf(" -p use POSIX interface\n");
1186     #endif
1187     printf(" -q quiet: do not output PCRE version number at start\n");
1188     printf(" -S <n> set stack size to <n> megabytes\n");
1189     printf(" -s output store (memory) used information\n"
1190     " -t time compilation and execution\n");
1191     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192     printf(" -tm time execution (matching) only\n");
1193     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194     }
1195    
1196    
1197    
1198     /*************************************************
1199 nigel 63 * Main Program *
1200     *************************************************/
1201 nigel 43
1202 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1203     consist of a regular expression, in delimiters and optionally followed by
1204     options, followed by a set of test data, terminated by an empty line. */
1205    
1206     int main(int argc, char **argv)
1207     {
1208     FILE *infile = stdin;
1209     int options = 0;
1210     int study_options = 0;
1211 ph10 386 int default_find_match_limit = FALSE;
1212 nigel 3 int op = 1;
1213     int timeit = 0;
1214 nigel 93 int timeitm = 0;
1215 nigel 3 int showinfo = 0;
1216 nigel 31 int showstore = 0;
1217 nigel 87 int quiet = 0;
1218 nigel 53 int size_offsets = 45;
1219     int size_offsets_max;
1220 nigel 77 int *offsets = NULL;
1221 nigel 53 #if !defined NOPOSIX
1222 nigel 3 int posix = 0;
1223 nigel 53 #endif
1224 nigel 3 int debug = 0;
1225 nigel 11 int done = 0;
1226 nigel 77 int all_use_dfa = 0;
1227     int yield = 0;
1228 nigel 91 int stack_size;
1229 nigel 3
1230 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1231     that 1024 is plenty long enough for the few names we'll be testing. */
1232 nigel 69
1233 nigel 91 uschar copynames[1024];
1234     uschar getnames[1024];
1235    
1236     uschar *copynamesptr;
1237     uschar *getnamesptr;
1238    
1239 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1240 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1241 nigel 69
1242 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1243     dbuffer = (unsigned char *)malloc(buffer_size);
1244     pbuffer = (unsigned char *)malloc(buffer_size);
1245 nigel 69
1246 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1247 nigel 3
1248 nigel 93 outfile = stdout;
1249    
1250     /* The following _setmode() stuff is some Windows magic that tells its runtime
1251     library to translate CRLF into a single LF character. At least, that's what
1252     I've been told: never having used Windows I take this all on trust. Originally
1253     it set 0x8000, but then I was advised that _O_BINARY was better. */
1254    
1255 nigel 75 #if defined(_WIN32) || defined(WIN32)
1256 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1257     #endif
1258 nigel 75
1259 nigel 3 /* Scan options */
1260    
1261     while (argc > 1 && argv[op][0] == '-')
1262     {
1263 nigel 63 unsigned char *endptr;
1264 nigel 53
1265 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1266     showstore = 1;
1267 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1268 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1269 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1270     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1271 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1272 nigel 79 #if !defined NODFA
1273 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1274 nigel 79 #endif
1275 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1276 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1277     *endptr == 0))
1278 nigel 53 {
1279     op++;
1280     argc--;
1281     }
1282 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1283     {
1284     int both = argv[op][2] == 0;
1285     int temp;
1286     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1287     *endptr == 0))
1288     {
1289     timeitm = temp;
1290     op++;
1291     argc--;
1292     }
1293     else timeitm = LOOPREPEAT;
1294     if (both) timeit = timeitm;
1295     }
1296 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1297     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1298     *endptr == 0))
1299     {
1300 nigel 93 #if defined(_WIN32) || defined(WIN32)
1301 nigel 91 printf("PCRE: -S not supported on this OS\n");
1302     exit(1);
1303     #else
1304     int rc;
1305     struct rlimit rlim;
1306     getrlimit(RLIMIT_STACK, &rlim);
1307     rlim.rlim_cur = stack_size * 1024 * 1024;
1308     rc = setrlimit(RLIMIT_STACK, &rlim);
1309     if (rc != 0)
1310     {
1311     printf("PCRE: setrlimit() failed with error %d\n", rc);
1312     exit(1);
1313     }
1314     op++;
1315     argc--;
1316     #endif
1317     }
1318 nigel 53 #if !defined NOPOSIX
1319 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1320 nigel 53 #endif
1321 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1322     {
1323     int rc;
1324 ph10 392 unsigned long int lrc;
1325 nigel 63 printf("PCRE version %s\n", pcre_version());
1326     printf("Compiled with\n");
1327     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1328     printf(" %sUTF-8 support\n", rc? "" : "No ");
1329 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1330     printf(" %sUnicode properties support\n", rc? "" : "No ");
1331 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1332 ph10 391 /* Note that these values are always the ASCII values, even
1333 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1334 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1335     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1336 ph10 150 (rc == -2)? "ANYCRLF" :
1337 nigel 93 (rc == -1)? "ANY" : "???");
1338 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1339     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1340     "all Unicode newlines");
1341 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1342     printf(" Internal link size = %d\n", rc);
1343     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1344     printf(" POSIX malloc threshold = %d\n", rc);
1345 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1346     printf(" Default match limit = %ld\n", lrc);
1347     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1348     printf(" Default recursion depth limit = %ld\n", lrc);
1349 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1350     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1351 ph10 121 goto EXIT;
1352 nigel 63 }
1353 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1354     strcmp(argv[op], "--help") == 0)
1355     {
1356     usage();
1357     goto EXIT;
1358     }
1359 nigel 3 else
1360     {
1361 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1362 nigel 93 usage();
1363 nigel 77 yield = 1;
1364     goto EXIT;
1365 nigel 3 }
1366     op++;
1367     argc--;
1368     }
1369    
1370 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1371    
1372     size_offsets_max = size_offsets;
1373 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1374 nigel 53 if (offsets == NULL)
1375     {
1376     printf("** Failed to get %d bytes of memory for offsets vector\n",
1377 ph10 151 (int)(size_offsets_max * sizeof(int)));
1378 nigel 77 yield = 1;
1379     goto EXIT;
1380 nigel 53 }
1381    
1382 nigel 3 /* Sort out the input and output files */
1383    
1384     if (argc > 1)
1385     {
1386 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1387 nigel 3 if (infile == NULL)
1388     {
1389     printf("** Failed to open %s\n", argv[op]);
1390 nigel 77 yield = 1;
1391     goto EXIT;
1392 nigel 3 }
1393     }
1394    
1395     if (argc > 2)
1396     {
1397 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1398 nigel 3 if (outfile == NULL)
1399     {
1400     printf("** Failed to open %s\n", argv[op+1]);
1401 nigel 77 yield = 1;
1402     goto EXIT;
1403 nigel 3 }
1404     }
1405    
1406     /* Set alternative malloc function */
1407    
1408     pcre_malloc = new_malloc;
1409 nigel 73 pcre_free = new_free;
1410     pcre_stack_malloc = stack_malloc;
1411     pcre_stack_free = stack_free;
1412 nigel 3
1413 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1414 nigel 3
1415 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1416 nigel 3
1417     /* Main loop */
1418    
1419 nigel 11 while (!done)
1420 nigel 3 {
1421     pcre *re = NULL;
1422     pcre_extra *extra = NULL;
1423 nigel 37
1424     #if !defined NOPOSIX /* There are still compilers that require no indent */
1425 nigel 3 regex_t preg;
1426 nigel 45 int do_posix = 0;
1427 nigel 37 #endif
1428    
1429 nigel 7 const char *error;
1430 ph10 512 unsigned char *markptr;
1431 nigel 25 unsigned char *p, *pp, *ppp;
1432 nigel 75 unsigned char *to_file = NULL;
1433 nigel 53 const unsigned char *tables = NULL;
1434 nigel 75 unsigned long int true_size, true_study_size = 0;
1435     size_t size, regex_gotten_store;
1436 ph10 512 int do_mark = 0;
1437 nigel 3 int do_study = 0;
1438 nigel 25 int do_debug = debug;
1439 nigel 35 int do_G = 0;
1440     int do_g = 0;
1441 nigel 25 int do_showinfo = showinfo;
1442 nigel 35 int do_showrest = 0;
1443 nigel 75 int do_flip = 0;
1444 nigel 93 int erroroffset, len, delimiter, poffset;
1445 nigel 3
1446 nigel 67 use_utf8 = 0;
1447 ph10 211 debug_lengths = 1;
1448 nigel 63
1449 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1450 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1451 nigel 63 fflush(outfile);
1452 nigel 3
1453     p = buffer;
1454     while (isspace(*p)) p++;
1455     if (*p == 0) continue;
1456    
1457 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1458 nigel 3
1459 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1460     {
1461 nigel 91 unsigned long int magic, get_options;
1462 nigel 75 uschar sbuf[8];
1463     FILE *f;
1464    
1465     p++;
1466     pp = p + (int)strlen((char *)p);
1467     while (isspace(pp[-1])) pp--;
1468     *pp = 0;
1469    
1470     f = fopen((char *)p, "rb");
1471     if (f == NULL)
1472     {
1473     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1474     continue;
1475     }
1476    
1477     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1478    
1479     true_size =
1480     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1481     true_study_size =
1482     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1483    
1484     re = (real_pcre *)new_malloc(true_size);
1485     regex_gotten_store = gotten_store;
1486    
1487     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1488    
1489     magic = ((real_pcre *)re)->magic_number;
1490     if (magic != MAGIC_NUMBER)
1491     {
1492     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1493     {
1494     do_flip = 1;
1495     }
1496     else
1497     {
1498     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1499     fclose(f);
1500     continue;
1501     }
1502     }
1503    
1504     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1505     do_flip? " (byte-inverted)" : "", p);
1506    
1507     /* Need to know if UTF-8 for printing data strings */
1508    
1509 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1510     use_utf8 = (get_options & PCRE_UTF8) != 0;
1511 nigel 75
1512     /* Now see if there is any following study data */
1513    
1514     if (true_study_size != 0)
1515     {
1516     pcre_study_data *psd;
1517    
1518     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1519     extra->flags = PCRE_EXTRA_STUDY_DATA;
1520    
1521     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1522     extra->study_data = psd;
1523    
1524     if (fread(psd, 1, true_study_size, f) != true_study_size)
1525     {
1526     FAIL_READ:
1527     fprintf(outfile, "Failed to read data from %s\n", p);
1528     if (extra != NULL) new_free(extra);
1529     if (re != NULL) new_free(re);
1530     fclose(f);
1531     continue;
1532     }
1533     fprintf(outfile, "Study data loaded from %s\n", p);
1534     do_study = 1; /* To get the data output if requested */
1535     }
1536     else fprintf(outfile, "No study data\n");
1537    
1538     fclose(f);
1539     goto SHOW_INFO;
1540     }
1541    
1542     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1543     the pattern; if is isn't complete, read more. */
1544    
1545 nigel 3 delimiter = *p++;
1546    
1547 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1548 nigel 3 {
1549 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1550 nigel 3 goto SKIP_DATA;
1551     }
1552    
1553     pp = p;
1554 ph10 530 poffset = (int)(p - buffer);
1555 nigel 3
1556     for(;;)
1557     {
1558 nigel 29 while (*pp != 0)
1559     {
1560     if (*pp == '\\' && pp[1] != 0) pp++;
1561     else if (*pp == delimiter) break;
1562     pp++;
1563     }
1564 nigel 3 if (*pp != 0) break;
1565 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1566 nigel 3 {
1567     fprintf(outfile, "** Unexpected EOF\n");
1568 nigel 11 done = 1;
1569     goto CONTINUE;
1570 nigel 3 }
1571 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1572 nigel 3 }
1573    
1574 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1575     pointer to the correct relative point in the buffer. */
1576    
1577     p = buffer + poffset;
1578    
1579 nigel 29 /* If the first character after the delimiter is backslash, make
1580     the pattern end with backslash. This is purely to provide a way
1581     of testing for the error message when a pattern ends with backslash. */
1582    
1583     if (pp[1] == '\\') *pp++ = '\\';
1584    
1585 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1586     for callouts. */
1587 nigel 3
1588     *pp++ = 0;
1589 nigel 75 strcpy((char *)pbuffer, (char *)p);
1590 nigel 3
1591     /* Look for options after final delimiter */
1592    
1593     options = 0;
1594     study_options = 0;
1595 nigel 31 log_store = showstore; /* default from command line */
1596    
1597 nigel 3 while (*pp != 0)
1598     {
1599     switch (*pp++)
1600     {
1601 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1602 nigel 35 case 'g': do_g = 1; break;
1603 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1604     case 'm': options |= PCRE_MULTILINE; break;
1605     case 's': options |= PCRE_DOTALL; break;
1606     case 'x': options |= PCRE_EXTENDED; break;
1607 nigel 25
1608 nigel 35 case '+': do_showrest = 1; break;
1609 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1610 nigel 93 case 'B': do_debug = 1; break;
1611 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1612 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1613 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1614 nigel 75 case 'F': do_flip = 1; break;
1615 nigel 35 case 'G': do_G = 1; break;
1616 nigel 25 case 'I': do_showinfo = 1; break;
1617 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1618 ph10 512 case 'K': do_mark = 1; break;
1619 nigel 31 case 'M': log_store = 1; break;
1620 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1621 nigel 37
1622     #if !defined NOPOSIX
1623 nigel 3 case 'P': do_posix = 1; break;
1624 nigel 37 #endif
1625    
1626 nigel 3 case 'S': do_study = 1; break;
1627 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1628 ph10 535 case 'W': options |= PCRE_UCP; break;
1629 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1630 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1631 ph10 126 case 'Z': debug_lengths = 0; break;
1632 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1633 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1634 ph10 545
1635 ph10 541 case 'T':
1636     switch (*pp++)
1637     {
1638     case '0': tables = tables0; break;
1639     case '1': tables = tables1; break;
1640 ph10 545
1641 ph10 541 case '\r':
1642     case '\n':
1643 ph10 545 case ' ':
1644     case 0:
1645 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1646 ph10 545 goto SKIP_DATA;
1647    
1648     default:
1649 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1650 ph10 545 goto SKIP_DATA;
1651 ph10 541 }
1652 ph10 545 break;
1653 nigel 25
1654     case 'L':
1655     ppp = pp;
1656 nigel 93 /* The '\r' test here is so that it works on Windows. */
1657     /* The '0' test is just in case this is an unterminated line. */
1658     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1659 nigel 25 *ppp = 0;
1660     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1661     {
1662     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1663     goto SKIP_DATA;
1664     }
1665 nigel 93 locale_set = 1;
1666 nigel 25 tables = pcre_maketables();
1667     pp = ppp;
1668     break;
1669    
1670 nigel 75 case '>':
1671     to_file = pp;
1672     while (*pp != 0) pp++;
1673     while (isspace(pp[-1])) pp--;
1674     *pp = 0;
1675     break;
1676    
1677 nigel 91 case '<':
1678     {
1679 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1680 ph10 336 {
1681     options |= PCRE_JAVASCRIPT_COMPAT;
1682 ph10 345 pp += 3;
1683 ph10 336 }
1684     else
1685 ph10 345 {
1686 ph10 336 int x = check_newline(pp, outfile);
1687     if (x == 0) goto SKIP_DATA;
1688     options |= x;
1689     while (*pp++ != '>');
1690 ph10 345 }
1691 nigel 91 }
1692     break;
1693    
1694 nigel 77 case '\r': /* So that it works in Windows */
1695     case '\n':
1696     case ' ':
1697     break;
1698 nigel 75
1699 nigel 3 default:
1700     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1701     goto SKIP_DATA;
1702     }
1703     }
1704    
1705 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1706 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1707     local character tables. */
1708 nigel 3
1709 nigel 37 #if !defined NOPOSIX
1710 nigel 3 if (posix || do_posix)
1711     {
1712     int rc;
1713     int cflags = 0;
1714 nigel 75
1715 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1716     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1717 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1718 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1719     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1720 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1721 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1722 nigel 87
1723 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1724    
1725     /* Compilation failed; go back for another re, skipping to blank line
1726     if non-interactive. */
1727    
1728     if (rc != 0)
1729     {
1730 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1731 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1732     goto SKIP_DATA;
1733     }
1734     }
1735    
1736     /* Handle compiling via the native interface */
1737    
1738     else
1739 nigel 37 #endif /* !defined NOPOSIX */
1740    
1741 nigel 3 {
1742 ph10 412 unsigned long int get_options;
1743 ph10 416
1744 nigel 93 if (timeit > 0)
1745 nigel 3 {
1746     register int i;
1747     clock_t time_taken;
1748     clock_t start_time = clock();
1749 nigel 93 for (i = 0; i < timeit; i++)
1750 nigel 3 {
1751 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1752 nigel 3 if (re != NULL) free(re);
1753     }
1754     time_taken = clock() - start_time;
1755 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1756     (((double)time_taken * 1000.0) / (double)timeit) /
1757 nigel 63 (double)CLOCKS_PER_SEC);
1758 nigel 3 }
1759    
1760 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1761 nigel 3
1762     /* Compilation failed; go back for another re, skipping to blank line
1763     if non-interactive. */
1764    
1765     if (re == NULL)
1766     {
1767     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1768     SKIP_DATA:
1769     if (infile != stdin)
1770     {
1771     for (;;)
1772     {
1773 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1774 nigel 11 {
1775     done = 1;
1776     goto CONTINUE;
1777     }
1778 nigel 3 len = (int)strlen((char *)buffer);
1779     while (len > 0 && isspace(buffer[len-1])) len--;
1780     if (len == 0) break;
1781     }
1782     fprintf(outfile, "\n");
1783     }
1784 nigel 25 goto CONTINUE;
1785 nigel 3 }
1786 ph10 416
1787     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1788     within the regex; check for this so that we know how to process the data
1789 ph10 412 lines. */
1790 ph10 416
1791 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1792     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1793 nigel 3
1794 ph10 412 /* Print information if required. There are now two info-returning
1795     functions. The old one has a limited interface and returns only limited
1796     data. Check that it agrees with the newer one. */
1797 nigel 3
1798 nigel 63 if (log_store)
1799     fprintf(outfile, "Memory allocation (code space): %d\n",
1800     (int)(gotten_store -
1801     sizeof(real_pcre) -
1802     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1803    
1804 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1805     and remember the store that was got. */
1806    
1807     true_size = ((real_pcre *)re)->size;
1808     regex_gotten_store = gotten_store;
1809    
1810     /* If /S was present, study the regexp to generate additional info to
1811     help with the matching. */
1812    
1813     if (do_study)
1814     {
1815 nigel 93 if (timeit > 0)
1816 nigel 75 {
1817     register int i;
1818     clock_t time_taken;
1819     clock_t start_time = clock();
1820 nigel 93 for (i = 0; i < timeit; i++)
1821 nigel 75 extra = pcre_study(re, study_options, &error);
1822     time_taken = clock() - start_time;
1823     if (extra != NULL) free(extra);
1824 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1825     (((double)time_taken * 1000.0) / (double)timeit) /
1826 nigel 75 (double)CLOCKS_PER_SEC);
1827     }
1828     extra = pcre_study(re, study_options, &error);
1829     if (error != NULL)
1830     fprintf(outfile, "Failed to study: %s\n", error);
1831     else if (extra != NULL)
1832     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1833     }
1834 ph10 512
1835 ph10 510 /* If /K was present, we set up for handling MARK data. */
1836 ph10 512
1837 ph10 510 if (do_mark)
1838     {
1839     if (extra == NULL)
1840     {
1841     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1842     extra->flags = 0;
1843     }
1844 ph10 512 extra->mark = &markptr;
1845 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1846 ph10 512 }
1847 nigel 75
1848     /* If the 'F' option was present, we flip the bytes of all the integer
1849     fields in the regex data block and the study block. This is to make it
1850     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1851     compiled on a different architecture. */
1852    
1853     if (do_flip)
1854     {
1855     real_pcre *rre = (real_pcre *)re;
1856 ph10 259 rre->magic_number =
1857 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1858 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1859     rre->options = byteflip(rre->options, sizeof(rre->options));
1860 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1861 ph10 259 rre->top_bracket =
1862 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1863 ph10 259 rre->top_backref =
1864 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1865 ph10 259 rre->first_byte =
1866 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1867 ph10 259 rre->req_byte =
1868 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1869     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1870 nigel 75 sizeof(rre->name_table_offset));
1871 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1872 nigel 75 sizeof(rre->name_entry_size));
1873 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1874 ph10 255 sizeof(rre->name_count));
1875 nigel 75
1876     if (extra != NULL)
1877     {
1878     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1879     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1880 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1881     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1882 nigel 75 }
1883     }
1884    
1885     /* Extract information from the compiled data if required */
1886    
1887     SHOW_INFO:
1888    
1889 nigel 93 if (do_debug)
1890     {
1891     fprintf(outfile, "------------------------------------------------------------------\n");
1892 ph10 116 pcre_printint(re, outfile, debug_lengths);
1893 nigel 93 }
1894 ph10 416
1895 ph10 412 /* We already have the options in get_options (see above) */
1896 nigel 93
1897 nigel 25 if (do_showinfo)
1898 nigel 3 {
1899 ph10 412 unsigned long int all_options;
1900 nigel 79 #if !defined NOINFOCHECK
1901 nigel 43 int old_first_char, old_options, old_count;
1902 nigel 79 #endif
1903 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1904 ph10 227 hascrorlf;
1905 nigel 63 int nameentrysize, namecount;
1906     const uschar *nametable;
1907 nigel 3
1908 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1909     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1910     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1911 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1912 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1913 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1914     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1915 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1916 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1917     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1918 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1919 nigel 43
1920 nigel 79 #if !defined NOINFOCHECK
1921 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1922 nigel 3 if (count < 0) fprintf(outfile,
1923 nigel 43 "Error %d from pcre_info()\n", count);
1924 nigel 3 else
1925     {
1926 nigel 43 if (old_count != count) fprintf(outfile,
1927     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1928     old_count);
1929 nigel 37
1930 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1931     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1932     first_char, old_first_char);
1933 nigel 37
1934 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1935     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1936     get_options, old_options);
1937 nigel 43 }
1938 nigel 79 #endif
1939 nigel 43
1940 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1941 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1942 nigel 77 (int)size, (int)regex_gotten_store);
1943 nigel 43
1944     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1945     if (backrefmax > 0)
1946     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1947 nigel 63
1948     if (namecount > 0)
1949     {
1950     fprintf(outfile, "Named capturing subpatterns:\n");
1951     while (namecount-- > 0)
1952     {
1953     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1954     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1955     GET2(nametable, 0));
1956     nametable += nameentrysize;
1957     }
1958     }
1959 ph10 172
1960 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1961 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1962 nigel 63
1963 nigel 75 all_options = ((real_pcre *)re)->options;
1964 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1965 nigel 75
1966 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1967 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1968 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1969     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1970     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1971     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1972 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1973 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1974 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1975     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1976 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1977     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1978     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1979 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1980 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1981 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1982 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1983 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1984 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1985 ph10 172
1986 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1987 nigel 43
1988 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1989 nigel 91 {
1990     case PCRE_NEWLINE_CR:
1991     fprintf(outfile, "Forced newline sequence: CR\n");
1992     break;
1993 nigel 43
1994 nigel 91 case PCRE_NEWLINE_LF:
1995     fprintf(outfile, "Forced newline sequence: LF\n");
1996     break;
1997    
1998     case PCRE_NEWLINE_CRLF:
1999     fprintf(outfile, "Forced newline sequence: CRLF\n");
2000     break;
2001    
2002 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2003     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2004     break;
2005    
2006 nigel 93 case PCRE_NEWLINE_ANY:
2007     fprintf(outfile, "Forced newline sequence: ANY\n");
2008     break;
2009    
2010 nigel 91 default:
2011     break;
2012     }
2013    
2014 nigel 43 if (first_char == -1)
2015     {
2016 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2017 nigel 43 }
2018     else if (first_char < 0)
2019     {
2020     fprintf(outfile, "No first char\n");
2021     }
2022     else
2023     {
2024 nigel 63 int ch = first_char & 255;
2025 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2026 nigel 63 "" : " (caseless)";
2027 nigel 93 if (PRINTHEX(ch))
2028 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2029 nigel 3 else
2030 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2031 nigel 43 }
2032 nigel 37
2033 nigel 43 if (need_char < 0)
2034     {
2035     fprintf(outfile, "No need char\n");
2036 nigel 3 }
2037 nigel 43 else
2038     {
2039 nigel 63 int ch = need_char & 255;
2040 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2041 nigel 63 "" : " (caseless)";
2042 nigel 93 if (PRINTHEX(ch))
2043 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2044 nigel 43 else
2045 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2046 nigel 43 }
2047 nigel 75
2048     /* Don't output study size; at present it is in any case a fixed
2049     value, but it varies, depending on the computer architecture, and
2050     so messes up the test suite. (And with the /F option, it might be
2051     flipped.) */
2052    
2053     if (do_study)
2054     {
2055     if (extra == NULL)
2056     fprintf(outfile, "Study returned NULL\n");
2057     else
2058     {
2059     uschar *start_bits = NULL;
2060 ph10 455 int minlength;
2061 ph10 461
2062 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2063 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2064    
2065 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2066     if (start_bits == NULL)
2067 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2068 nigel 75 else
2069     {
2070     int i;
2071     int c = 24;
2072     fprintf(outfile, "Starting byte set: ");
2073     for (i = 0; i < 256; i++)
2074     {
2075     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2076     {
2077     if (c > 75)
2078     {
2079     fprintf(outfile, "\n ");
2080     c = 2;
2081     }
2082 nigel 93 if (PRINTHEX(i) && i != ' ')
2083 nigel 75 {
2084     fprintf(outfile, "%c ", i);
2085     c += 2;
2086     }
2087     else
2088     {
2089     fprintf(outfile, "\\x%02x ", i);
2090     c += 5;
2091     }
2092     }
2093     }
2094     fprintf(outfile, "\n");
2095     }
2096     }
2097     }
2098 nigel 3 }
2099    
2100 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2101     that is all. The first 8 bytes of the file are the regex length and then
2102     the study length, in big-endian order. */
2103 nigel 3
2104 nigel 75 if (to_file != NULL)
2105 nigel 3 {
2106 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2107     if (f == NULL)
2108 nigel 3 {
2109 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2110 nigel 3 }
2111 nigel 75 else
2112     {
2113     uschar sbuf[8];
2114 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2115     sbuf[1] = (uschar)((true_size >> 16) & 255);
2116     sbuf[2] = (uschar)((true_size >> 8) & 255);
2117     sbuf[3] = (uschar)((true_size) & 255);
2118 ph10 259
2119 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2120     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2121     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2122     sbuf[7] = (uschar)((true_study_size) & 255);
2123 nigel 3
2124 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2125     fwrite(re, 1, true_size, f) < true_size)
2126     {
2127     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2128     }
2129 nigel 3 else
2130     {
2131 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2132     if (extra != NULL)
2133 nigel 3 {
2134 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2135     true_study_size)
2136 nigel 3 {
2137 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2138     strerror(errno));
2139 nigel 3 }
2140 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2141 nigel 93
2142 nigel 3 }
2143     }
2144 nigel 75 fclose(f);
2145 nigel 3 }
2146 nigel 77
2147     new_free(re);
2148     if (extra != NULL) new_free(extra);
2149 ph10 545 if (locale_set)
2150 ph10 541 {
2151     new_free((void *)tables);
2152     setlocale(LC_CTYPE, "C");
2153 ph10 545 locale_set = 0;
2154     }
2155 nigel 75 continue; /* With next regex */
2156 nigel 3 }
2157 nigel 75 } /* End of non-POSIX compile */
2158 nigel 3
2159     /* Read data lines and test them */
2160    
2161     for (;;)
2162     {
2163 nigel 87 uschar *q;
2164 ph10 147 uschar *bptr;
2165 nigel 57 int *use_offsets = offsets;
2166 nigel 53 int use_size_offsets = size_offsets;
2167 nigel 63 int callout_data = 0;
2168     int callout_data_set = 0;
2169 nigel 3 int count, c;
2170 nigel 29 int copystrings = 0;
2171 ph10 386 int find_match_limit = default_find_match_limit;
2172 nigel 29 int getstrings = 0;
2173     int getlist = 0;
2174 nigel 39 int gmatched = 0;
2175 nigel 35 int start_offset = 0;
2176 ph10 579 int start_offset_sign = 1;
2177 nigel 41 int g_notempty = 0;
2178 nigel 77 int use_dfa = 0;
2179 nigel 3
2180     options = 0;
2181    
2182 nigel 91 *copynames = 0;
2183     *getnames = 0;
2184    
2185     copynamesptr = copynames;
2186     getnamesptr = getnames;
2187    
2188 nigel 63 pcre_callout = callout;
2189     first_callout = 1;
2190     callout_extra = 0;
2191     callout_count = 0;
2192     callout_fail_count = 999999;
2193     callout_fail_id = -1;
2194 nigel 73 show_malloc = 0;
2195 nigel 63
2196 nigel 91 if (extra != NULL) extra->flags &=
2197     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2198    
2199     len = 0;
2200     for (;;)
2201 nigel 11 {
2202 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2203 nigel 91 {
2204 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2205     {
2206 ph10 545 fprintf(outfile, "\n");
2207 ph10 537 break;
2208 ph10 545 }
2209 nigel 91 done = 1;
2210     goto CONTINUE;
2211     }
2212     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2213     len = (int)strlen((char *)buffer);
2214     if (buffer[len-1] == '\n') break;
2215 nigel 11 }
2216 nigel 3
2217     while (len > 0 && isspace(buffer[len-1])) len--;
2218     buffer[len] = 0;
2219     if (len == 0) break;
2220    
2221     p = buffer;
2222     while (isspace(*p)) p++;
2223    
2224 ph10 147 bptr = q = dbuffer;
2225 nigel 3 while ((c = *p++) != 0)
2226     {
2227     int i = 0;
2228     int n = 0;
2229 nigel 63
2230 nigel 3 if (c == '\\') switch ((c = *p++))
2231     {
2232     case 'a': c = 7; break;
2233     case 'b': c = '\b'; break;
2234     case 'e': c = 27; break;
2235     case 'f': c = '\f'; break;
2236     case 'n': c = '\n'; break;
2237     case 'r': c = '\r'; break;
2238     case 't': c = '\t'; break;
2239     case 'v': c = '\v'; break;
2240    
2241     case '0': case '1': case '2': case '3':
2242     case '4': case '5': case '6': case '7':
2243     c -= '0';
2244     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2245     c = c * 8 + *p++ - '0';
2246 nigel 91
2247     #if !defined NOUTF8
2248     if (use_utf8 && c > 255)
2249     {
2250     unsigned char buff8[8];
2251     int ii, utn;
2252     utn = ord2utf8(c, buff8);
2253     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2254     c = buff8[ii]; /* Last byte */
2255     }
2256     #endif
2257 nigel 3 break;
2258    
2259     case 'x':
2260 nigel 49
2261     /* Handle \x{..} specially - new Perl thing for utf8 */
2262    
2263 nigel 79 #if !defined NOUTF8
2264 nigel 49 if (*p == '{')
2265     {
2266     unsigned char *pt = p;
2267     c = 0;
2268     while (isxdigit(*(++pt)))
2269     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2270     if (*pt == '}')
2271     {
2272 nigel 67 unsigned char buff8[8];
2273 nigel 49 int ii, utn;
2274 ph10 355 if (use_utf8)
2275 ph10 358 {
2276 ph10 355 utn = ord2utf8(c, buff8);
2277     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2278     c = buff8[ii]; /* Last byte */
2279     }
2280     else
2281     {
2282 ph10 358 if (c > 255)
2283 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2284     "UTF-8 mode is not enabled.\n"
2285     "** Truncation will probably give the wrong result.\n", c);
2286 ph10 358 }
2287 nigel 49 p = pt + 1;
2288     break;
2289     }
2290     /* Not correct form; fall through */
2291     }
2292 nigel 79 #endif
2293 nigel 49
2294     /* Ordinary \x */
2295    
2296 nigel 3 c = 0;
2297     while (i++ < 2 && isxdigit(*p))
2298     {
2299     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2300     p++;
2301     }
2302     break;
2303    
2304 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2305 nigel 3 p--;
2306     continue;
2307    
2308 nigel 75 case '>':
2309 ph10 579 if (*p == '-')
2310 ph10 567 {
2311     start_offset_sign = -1;
2312     p++;
2313 ph10 579 }
2314 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2315 ph10 579 start_offset *= start_offset_sign;
2316 nigel 75 continue;
2317    
2318 nigel 3 case 'A': /* Option setting */
2319     options |= PCRE_ANCHORED;
2320     continue;
2321    
2322     case 'B':
2323     options |= PCRE_NOTBOL;
2324     continue;
2325    
2326 nigel 29 case 'C':
2327 nigel 63 if (isdigit(*p)) /* Set copy string */
2328     {
2329     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2330     copystrings |= 1 << n;
2331     }
2332     else if (isalnum(*p))
2333     {
2334 nigel 91 uschar *npp = copynamesptr;
2335 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2336 nigel 91 *npp++ = 0;
2337 nigel 67 *npp = 0;
2338 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2339 nigel 63 if (n < 0)
2340 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2341     copynamesptr = npp;
2342 nigel 63 }
2343     else if (*p == '+')
2344     {
2345     callout_extra = 1;
2346     p++;
2347     }
2348     else if (*p == '-')
2349     {
2350     pcre_callout = NULL;
2351     p++;
2352     }
2353     else if (*p == '!')
2354     {
2355     callout_fail_id = 0;
2356     p++;
2357     while(isdigit(*p))
2358     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2359     callout_fail_count = 0;
2360     if (*p == '!')
2361     {
2362     p++;
2363     while(isdigit(*p))
2364     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2365     }
2366     }
2367     else if (*p == '*')
2368     {
2369     int sign = 1;
2370     callout_data = 0;
2371     if (*(++p) == '-') { sign = -1; p++; }
2372     while(isdigit(*p))
2373     callout_data = callout_data * 10 + *p++ - '0';
2374     callout_data *= sign;
2375     callout_data_set = 1;
2376     }
2377 nigel 29 continue;
2378    
2379 nigel 79 #if !defined NODFA
2380 nigel 77 case 'D':
2381 nigel 79 #if !defined NOPOSIX
2382 nigel 77 if (posix || do_posix)
2383     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2384     else
2385 nigel 79 #endif
2386 nigel 77 use_dfa = 1;
2387     continue;
2388 ph10 553 #endif
2389 nigel 77
2390 ph10 553 #if !defined NODFA
2391 nigel 77 case 'F':
2392     options |= PCRE_DFA_SHORTEST;
2393     continue;
2394 nigel 79 #endif
2395 nigel 77
2396 nigel 29 case 'G':
2397 nigel 63 if (isdigit(*p))
2398     {
2399     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2400     getstrings |= 1 << n;
2401     }
2402     else if (isalnum(*p))
2403     {
2404 nigel 91 uschar *npp = getnamesptr;
2405 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2406 nigel 91 *npp++ = 0;
2407 nigel 67 *npp = 0;
2408 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2409 nigel 63 if (n < 0)
2410 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2411     getnamesptr = npp;
2412 nigel 63 }
2413 nigel 29 continue;
2414    
2415     case 'L':
2416     getlist = 1;
2417     continue;
2418    
2419 nigel 63 case 'M':
2420     find_match_limit = 1;
2421     continue;
2422    
2423 nigel 37 case 'N':
2424 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2425     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2426 ph10 461 else
2427 ph10 442 options |= PCRE_NOTEMPTY;
2428 nigel 37 continue;
2429    
2430 nigel 3 case 'O':
2431     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2432 nigel 53 if (n > size_offsets_max)
2433     {
2434     size_offsets_max = n;
2435 nigel 57 free(offsets);
2436 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2437 nigel 53 if (offsets == NULL)
2438     {
2439     printf("** Failed to get %d bytes of memory for offsets vector\n",
2440 ph10 151 (int)(size_offsets_max * sizeof(int)));
2441 nigel 77 yield = 1;
2442     goto EXIT;
2443 nigel 53 }
2444     }
2445     use_size_offsets = n;
2446 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2447 nigel 3 continue;
2448    
2449 nigel 75 case 'P':
2450 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2451 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2452 nigel 75 continue;
2453    
2454 nigel 91 case 'Q':
2455     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2456     if (extra == NULL)
2457     {
2458     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2459     extra->flags = 0;
2460     }
2461     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2462     extra->match_limit_recursion = n;
2463     continue;
2464    
2465     case 'q':
2466     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2467     if (extra == NULL)
2468     {
2469     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2470     extra->flags = 0;
2471     }
2472     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2473     extra->match_limit = n;
2474     continue;
2475    
2476 nigel 79 #if !defined NODFA
2477 nigel 77 case 'R':
2478     options |= PCRE_DFA_RESTART;
2479     continue;
2480 nigel 79 #endif
2481 nigel 77
2482 nigel 73 case 'S':
2483     show_malloc = 1;
2484     continue;
2485 ph10 392
2486 ph10 389 case 'Y':
2487     options |= PCRE_NO_START_OPTIMIZE;
2488 ph10 392 continue;
2489 nigel 73
2490 nigel 3 case 'Z':
2491     options |= PCRE_NOTEOL;
2492     continue;
2493 nigel 71
2494     case '?':
2495     options |= PCRE_NO_UTF8_CHECK;
2496     continue;
2497 nigel 91
2498     case '<':
2499     {
2500     int x = check_newline(p, outfile);
2501     if (x == 0) goto NEXT_DATA;
2502     options |= x;
2503     while (*p++ != '>');
2504     }
2505     continue;
2506 nigel 3 }
2507 nigel 9 *q++ = c;
2508 nigel 3 }
2509 nigel 9 *q = 0;
2510 ph10 530 len = (int)(q - dbuffer);
2511 ph10 545
2512 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2513 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2514 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2515 ph10 371
2516 ph10 363 #if !defined NOPOSIX
2517     if (posix || do_posix)
2518     {
2519     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2520 ph10 371 bptr += buffer_size - len - 1;
2521 ph10 363 }
2522 ph10 371 else
2523     #endif
2524 ph10 363 {
2525     memmove(bptr + buffer_size - len, bptr, len);
2526 ph10 371 bptr += buffer_size - len;
2527     }
2528 nigel 3
2529 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2530     {
2531     printf("**Match limit not relevant for DFA matching: ignored\n");
2532     find_match_limit = 0;
2533     }
2534    
2535 nigel 3 /* Handle matching via the POSIX interface, which does not
2536 nigel 63 support timing or playing with the match limit or callout data. */
2537 nigel 3
2538 nigel 37 #if !defined NOPOSIX
2539 nigel 3 if (posix || do_posix)
2540     {
2541     int rc;
2542     int eflags = 0;
2543 nigel 63 regmatch_t *pmatch = NULL;
2544     if (use_size_offsets > 0)
2545 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2546 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2547     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2548 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2549 nigel 3
2550 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2551 nigel 3
2552     if (rc != 0)
2553     {
2554 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2555 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2556     }
2557 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2558     != 0)
2559     {
2560     fprintf(outfile, "Matched with REG_NOSUB\n");
2561     }
2562 nigel 3 else
2563     {
2564 nigel 7 size_t i;
2565 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2566 nigel 3 {
2567     if (pmatch[i].rm_so >= 0)
2568     {
2569 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2570 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2571     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2572 nigel 3 fprintf(outfile, "\n");
2573 nigel 35 if (i == 0 && do_showrest)
2574     {
2575     fprintf(outfile, " 0+ ");
2576 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2577     outfile);
2578 nigel 35 fprintf(outfile, "\n");
2579     }
2580 nigel 3 }
2581     }
2582     }
2583 nigel 53 free(pmatch);
2584 nigel 3 }
2585    
2586 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2587 nigel 3
2588 nigel 37 else
2589     #endif /* !defined NOPOSIX */
2590    
2591 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2592 nigel 3 {
2593 ph10 512 markptr = NULL;
2594    
2595 nigel 93 if (timeitm > 0)
2596 nigel 3 {
2597     register int i;
2598     clock_t time_taken;
2599     clock_t start_time = clock();
2600 nigel 77
2601 nigel 79 #if !defined NODFA
2602 nigel 77 if (all_use_dfa || use_dfa)
2603     {
2604     int workspace[1000];
2605 nigel 93 for (i = 0; i < timeitm; i++)
2606 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2607 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2608     sizeof(workspace)/sizeof(int));
2609     }
2610     else
2611 nigel 79 #endif
2612 nigel 77
2613 nigel 93 for (i = 0; i < timeitm; i++)
2614 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2615 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2616 nigel 77
2617 nigel 3 time_taken = clock() - start_time;
2618 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2619     (((double)time_taken * 1000.0) / (double)timeitm) /
2620 nigel 63 (double)CLOCKS_PER_SEC);
2621 nigel 3 }
2622    
2623 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2624 nigel 87 varying limits in order to find the minimum value for the match limit and
2625     for the recursion limit. */
2626 nigel 63
2627     if (find_match_limit)
2628     {
2629     if (extra == NULL)
2630     {
2631 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2632 nigel 63 extra->flags = 0;
2633     }
2634    
2635 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2636 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2637     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2638     PCRE_ERROR_MATCHLIMIT, "match()");
2639 nigel 63
2640 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2641     options|g_notempty, use_offsets, use_size_offsets,
2642     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2643     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2644 nigel 63 }
2645    
2646     /* If callout_data is set, use the interface with additional data */
2647    
2648     else if (callout_data_set)
2649     {
2650     if (extra == NULL)
2651     {
2652 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2653 nigel 63 extra->flags = 0;
2654     }
2655     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2656 nigel 71 extra->callout_data = &callout_data;
2657 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2658     options | g_notempty, use_offsets, use_size_offsets);
2659     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2660     }
2661    
2662     /* The normal case is just to do the match once, with the default
2663     value of match_limit. */
2664    
2665 nigel 79 #if !defined NODFA
2666 nigel 77 else if (all_use_dfa || use_dfa)
2667     {
2668     int workspace[1000];
2669 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2670 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2671     sizeof(workspace)/sizeof(int));
2672     if (count == 0)
2673     {
2674     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2675     count = use_size_offsets/2;
2676     }
2677     }
2678 nigel 79 #endif
2679 nigel 77
2680 nigel 75 else
2681     {
2682     count = pcre_exec(re, extra, (char *)bptr, len,
2683     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2684 nigel 77 if (count == 0)
2685     {
2686     fprintf(outfile, "Matched, but too many substrings\n");
2687     count = use_size_offsets/3;
2688     }
2689 nigel 75 }
2690 nigel 3
2691 nigel 39 /* Matched */
2692    
2693 nigel 3 if (count >= 0)
2694     {
2695 nigel 93 int i, maxcount;
2696    
2697     #if !defined NODFA
2698     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2699     #endif
2700     maxcount = use_size_offsets/3;
2701    
2702     /* This is a check against a lunatic return value. */
2703    
2704     if (count > maxcount)
2705     {
2706     fprintf(outfile,
2707     "** PCRE error: returned count %d is too big for offset size %d\n",
2708     count, use_size_offsets);
2709     count = use_size_offsets/3;
2710     if (do_g || do_G)
2711     {
2712     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2713     do_g = do_G = FALSE; /* Break g/G loop */
2714     }
2715     }
2716    
2717 nigel 29 for (i = 0; i < count * 2; i += 2)
2718 nigel 3 {
2719 nigel 57 if (use_offsets[i] < 0)
2720 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2721     else
2722     {
2723     fprintf(outfile, "%2d: ", i/2);
2724 nigel 63 (void)pchars(bptr + use_offsets[i],
2725     use_offsets[i+1] - use_offsets[i], outfile);
2726 nigel 3 fprintf(outfile, "\n");
2727 nigel 35 if (i == 0)
2728     {
2729     if (do_showrest)
2730     {
2731     fprintf(outfile, " 0+ ");
2732 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2733     outfile);
2734 nigel 35 fprintf(outfile, "\n");
2735     }
2736     }
2737 nigel 3 }
2738     }
2739 ph10 512
2740 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2741 nigel 29
2742     for (i = 0; i < 32; i++)
2743     {
2744     if ((copystrings & (1 << i)) != 0)
2745     {
2746 nigel 91 char copybuffer[256];
2747 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2748 nigel 37 i, copybuffer, sizeof(copybuffer));
2749 nigel 29 if (rc < 0)
2750     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2751     else
2752 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2753 nigel 29 }
2754     }
2755    
2756 nigel 91 for (copynamesptr = copynames;
2757     *copynamesptr != 0;
2758     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2759     {
2760     char copybuffer[256];
2761     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2762     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2763     if (rc < 0)
2764     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2765     else
2766     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2767     }
2768    
2769 nigel 29 for (i = 0; i < 32; i++)
2770     {
2771     if ((getstrings & (1 << i)) != 0)
2772     {
2773     const char *substring;
2774 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2775 nigel 29 i, &substring);
2776     if (rc < 0)
2777     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2778     else
2779     {
2780     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2781 nigel 49 pcre_free_substring(substring);
2782 nigel 29 }
2783     }
2784     }
2785    
2786 nigel 91 for (getnamesptr = getnames;
2787     *getnamesptr != 0;
2788     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2789     {
2790     const char *substring;
2791     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2792     count, (char *)getnamesptr, &substring);
2793     if (rc < 0)
2794     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2795     else
2796     {
2797     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2798     pcre_free_substring(substring);
2799     }
2800     }
2801    
2802 nigel 29 if (getlist)
2803     {
2804     const char **stringlist;
2805 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2806 nigel 29 &stringlist);
2807     if (rc < 0)
2808     fprintf(outfile, "get substring list failed %d\n", rc);
2809     else
2810     {
2811     for (i = 0; i < count; i++)
2812     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2813     if (stringlist[i] != NULL)
2814     fprintf(outfile, "string list not terminated by NULL\n");
2815 nigel 49 /* free((void *)stringlist); */
2816     pcre_free_substring_list(stringlist);
2817 nigel 29 }
2818     }
2819 nigel 39 }
2820 nigel 29
2821 nigel 75 /* There was a partial match */
2822    
2823     else if (count == PCRE_ERROR_PARTIAL)
2824     {
2825 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2826     else fprintf(outfile, "Partial match, mark=%s", markptr);
2827 ph10 426 if (use_size_offsets > 1)
2828     {
2829     fprintf(outfile, ": ");
2830     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2831 ph10 461 outfile);
2832     }
2833 nigel 77 fprintf(outfile, "\n");
2834 nigel 75 break; /* Out of the /g loop */
2835     }
2836    
2837 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2838 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2839     to advance the start offset, and continue. We won't be at the end of the
2840     string - that was checked before setting g_notempty.
2841 nigel 39
2842 ph10 566 Complication arises in the case when the newline convention is "any",
2843 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2844     terminated by CRLF, an advance of one character just passes the \r,
2845 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2846 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2847     newline setting in the pattern; if none was set, use pcre_config() to
2848 ph10 566 find the default.
2849 ph10 144
2850 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2851     character, not one byte. */
2852    
2853 nigel 3 else
2854     {
2855 nigel 41 if (g_notempty != 0)
2856 nigel 35 {
2857 nigel 73 int onechar = 1;
2858 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2859 nigel 57 use_offsets[0] = start_offset;
2860 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2861     {
2862     int d;
2863     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2864 ph10 391 /* Note that these values are always the ASCII ones, even in
2865     EBCDIC environments. CR = 13, NL = 10. */
2866     obits = (d == 13)? PCRE_NEWLINE_CR :
2867     (d == 10)? PCRE_NEWLINE_LF :
2868     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2869 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2870 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2871     }
2872 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2873 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2874 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2875 ph10 149 &&
2876 ph10 143 start_offset < len - 1 &&
2877     bptr[start_offset] == '\r' &&
2878     bptr[start_offset+1] == '\n')
2879 ph10 144 onechar++;
2880 ph10 143 else if (use_utf8)
2881 nigel 73 {
2882     while (start_offset + onechar < len)
2883     {
2884 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2885 ph10 579 onechar++;
2886 nigel 73 }
2887     }
2888     use_offsets[1] = start_offset + onechar;
2889 nigel 35 }
2890 nigel 41 else
2891     {
2892 ph10 598 switch(count)
2893     {
2894     case PCRE_ERROR_NOMATCH:
2895 ph10 512 if (gmatched == 0)
2896 ph10 510 {
2897     if (markptr == NULL) fprintf(outfile, "No match\n");
2898     else fprintf(outfile, "No match, mark = %s\n", markptr);
2899 ph10 512 }
2900 ph10 598 break;
2901    
2902     case PCRE_ERROR_BADUTF8:
2903     case PCRE_ERROR_SHORTUTF8:
2904     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2905     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2906     if (use_size_offsets >= 2)
2907     fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2908     use_offsets[1]);
2909     fprintf(outfile, "\n");
2910     break;
2911    
2912     default:
2913     fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2914     break;
2915 nigel 41 }
2916 ph10 598
2917 nigel 41 break; /* Out of the /g loop */
2918     }
2919 nigel 3 }
2920 nigel 35
2921 nigel 39 /* If not /g or /G we are done */
2922    
2923     if (!do_g && !do_G) break;
2924    
2925 nigel 41 /* If we have matched an empty string, first check to see if we are at
2926 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2927     Perl's /g options does. This turns out to be rather cunning. First we set
2928     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2929 nigel 47 same point. If this fails (picked up above) we advance to the next
2930 ph10 143 character. */
2931 ph10 142
2932 nigel 41 g_notempty = 0;
2933 ph10 142
2934 nigel 57 if (use_offsets[0] == use_offsets[1])
2935 nigel 41 {
2936 nigel 57 if (use_offsets[0] == len) break;
2937 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2938 nigel 41 }
2939 nigel 39
2940     /* For /g, update the start offset, leaving the rest alone */
2941    
2942 ph10 143 if (do_g) start_offset = use_offsets[1];
2943 nigel 39
2944     /* For /G, update the pointer and length */
2945    
2946     else
2947 nigel 35 {
2948 ph10 143 bptr += use_offsets[1];
2949     len -= use_offsets[1];
2950 nigel 35 }
2951 nigel 39 } /* End of loop for /g and /G */
2952 nigel 91
2953     NEXT_DATA: continue;
2954 nigel 39 } /* End of loop for data lines */
2955 nigel 3
2956 nigel 11 CONTINUE:
2957 nigel 37
2958     #if !defined NOPOSIX
2959 nigel 3 if (posix || do_posix) regfree(&preg);
2960 nigel 37 #endif
2961    
2962 nigel 77 if (re != NULL) new_free(re);
2963     if (extra != NULL) new_free(extra);
2964 ph10 541 if (locale_set)
2965 nigel 25 {
2966 nigel 77 new_free((void *)tables);
2967 nigel 25 setlocale(LC_CTYPE, "C");
2968 nigel 93 locale_set = 0;
2969 nigel 25 }
2970 nigel 3 }
2971    
2972 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2973 nigel 77
2974     EXIT:
2975    
2976     if (infile != NULL && infile != stdin) fclose(infile);
2977     if (outfile != NULL && outfile != stdout) fclose(outfile);
2978    
2979     free(buffer);
2980     free(dbuffer);
2981     free(pbuffer);
2982     free(offsets);
2983    
2984     return yield;
2985 nigel 3 }
2986    
2987 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12