/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 606 - (hide annotations) (download)
Mon Jun 6 17:46:22 2011 UTC (2 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 89513 byte(s)
Tidy the API for _pcre_valid_utf8() to a more suitable form for a future public 
release. Also make -s in pcretest force a study for every regex.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 nigel 85 #define _pcre_utf8_table1 utf8_table1
116     #define _pcre_utf8_table1_size utf8_table1_size
117     #define _pcre_utf8_table2 utf8_table2
118     #define _pcre_utf8_table3 utf8_table3
119     #define _pcre_utf8_table4 utf8_table4
120     #define _pcre_utt utt
121     #define _pcre_utt_size utt_size
122 ph10 240 #define _pcre_utt_names utt_names
123 nigel 85 #define _pcre_OP_lengths OP_lengths
124    
125     #include "pcre_tables.c"
126    
127     /* We also need the pcre_printint() function for printing out compiled
128     patterns. This function is in a separate file so that it can be included in
129 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 ph10 498 know which case is being compiled. */
131 nigel 85
132 ph10 498 #define COMPILING_PCRETEST
133     #include "pcre_printint.src"
134    
135     /* The definition of the macro PRINTABLE, which determines whether to print an
136 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
137 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
138     locale has not been explicitly changed, so as to get consistent output from
139     systems that differ in their output from isprint() even in the "C" locale. */
140 nigel 93
141     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142 nigel 85
143 nigel 37 /* It is possible to compile this test program without including support for
144     testing the POSIX interface, though this is not available via the standard
145     Makefile. */
146    
147     #if !defined NOPOSIX
148 nigel 3 #include "pcreposix.h"
149 nigel 37 #endif
150 nigel 3
151 ph10 107 /* It is also possible, for the benefit of the version currently imported into
152     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153     interface to the DFA matcher (NODFA), and without the doublecheck of the old
154     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155     UTF8 support if PCRE is built without it. */
156 nigel 79
157 ph10 107 #ifndef SUPPORT_UTF8
158     #ifndef NOUTF8
159     #define NOUTF8
160     #endif
161     #endif
162 nigel 79
163 ph10 107
164 nigel 85 /* Other parameters */
165    
166 nigel 3 #ifndef CLOCKS_PER_SEC
167     #ifdef CLK_TCK
168     #define CLOCKS_PER_SEC CLK_TCK
169     #else
170     #define CLOCKS_PER_SEC 100
171     #endif
172     #endif
173    
174 nigel 93 /* This is the default loop count for timing. */
175    
176 nigel 75 #define LOOPREPEAT 500000
177 nigel 3
178 nigel 85 /* Static variables */
179    
180 nigel 3 static FILE *outfile;
181     static int log_store = 0;
182 nigel 63 static int callout_count;
183     static int callout_extra;
184     static int callout_fail_count;
185     static int callout_fail_id;
186 ph10 210 static int debug_lengths;
187 nigel 63 static int first_callout;
188 nigel 93 static int locale_set = 0;
189 nigel 73 static int show_malloc;
190 nigel 67 static int use_utf8;
191 nigel 43 static size_t gotten_store;
192 nigel 3
193 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
194    
195     static int buffer_size = 50000;
196     static uschar *buffer = NULL;
197     static uschar *dbuffer = NULL;
198 nigel 75 static uschar *pbuffer = NULL;
199 nigel 3
200 ph10 598 /* Textual explanations for runtime error codes */
201 nigel 75
202 ph10 598 static const char *errtexts[] = {
203     NULL, /* 0 is no error */
204     NULL, /* NOMATCH is handled specially */
205     "NULL argument passed",
206     "bad option value",
207     "magic number missing",
208     "unknown opcode - pattern overwritten?",
209     "no more memory",
210     NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211     "match limit exceeded",
212     "callout error code",
213     NULL, /* BADUTF8 is handled specially */
214     "bad UTF-8 offset",
215     NULL, /* PARTIAL is handled specially */
216     "not used - internal error",
217     "internal error - pattern overwritten?",
218     "bad count value",
219     "item unsupported for DFA matching",
220     "backreference condition or recursion test not supported for DFA matching",
221     "match limit not supported for DFA matching",
222     "workspace size exceeded in DFA matching",
223     "too much recursion for DFA matching",
224     "recursion limit exceeded",
225     "not used - internal error",
226     "invalid combination of newline options",
227     "bad offset value",
228     NULL /* SHORTUTF8 is handled specially */
229     };
230    
231    
232 ph10 541 /*************************************************
233     * Alternate character tables *
234     *************************************************/
235 nigel 49
236 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237     using the default tables of the library. However, the T option can be used to
238     select alternate sets of tables, for different kinds of testing. Note also that
239 ph10 541 the L (locale) option also adjusts the tables. */
240    
241 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
242 ph10 541 only ASCII characters. */
243    
244     static const unsigned char tables0[] = {
245    
246     /* This table is a lower casing table. */
247    
248     0, 1, 2, 3, 4, 5, 6, 7,
249     8, 9, 10, 11, 12, 13, 14, 15,
250     16, 17, 18, 19, 20, 21, 22, 23,
251     24, 25, 26, 27, 28, 29, 30, 31,
252     32, 33, 34, 35, 36, 37, 38, 39,
253     40, 41, 42, 43, 44, 45, 46, 47,
254     48, 49, 50, 51, 52, 53, 54, 55,
255     56, 57, 58, 59, 60, 61, 62, 63,
256     64, 97, 98, 99,100,101,102,103,
257     104,105,106,107,108,109,110,111,
258     112,113,114,115,116,117,118,119,
259     120,121,122, 91, 92, 93, 94, 95,
260     96, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122,123,124,125,126,127,
264     128,129,130,131,132,133,134,135,
265     136,137,138,139,140,141,142,143,
266     144,145,146,147,148,149,150,151,
267     152,153,154,155,156,157,158,159,
268     160,161,162,163,164,165,166,167,
269     168,169,170,171,172,173,174,175,
270     176,177,178,179,180,181,182,183,
271     184,185,186,187,188,189,190,191,
272     192,193,194,195,196,197,198,199,
273     200,201,202,203,204,205,206,207,
274     208,209,210,211,212,213,214,215,
275     216,217,218,219,220,221,222,223,
276     224,225,226,227,228,229,230,231,
277     232,233,234,235,236,237,238,239,
278     240,241,242,243,244,245,246,247,
279     248,249,250,251,252,253,254,255,
280    
281     /* This table is a case flipping table. */
282    
283     0, 1, 2, 3, 4, 5, 6, 7,
284     8, 9, 10, 11, 12, 13, 14, 15,
285     16, 17, 18, 19, 20, 21, 22, 23,
286     24, 25, 26, 27, 28, 29, 30, 31,
287     32, 33, 34, 35, 36, 37, 38, 39,
288     40, 41, 42, 43, 44, 45, 46, 47,
289     48, 49, 50, 51, 52, 53, 54, 55,
290     56, 57, 58, 59, 60, 61, 62, 63,
291     64, 97, 98, 99,100,101,102,103,
292     104,105,106,107,108,109,110,111,
293     112,113,114,115,116,117,118,119,
294     120,121,122, 91, 92, 93, 94, 95,
295     96, 65, 66, 67, 68, 69, 70, 71,
296     72, 73, 74, 75, 76, 77, 78, 79,
297     80, 81, 82, 83, 84, 85, 86, 87,
298     88, 89, 90,123,124,125,126,127,
299     128,129,130,131,132,133,134,135,
300     136,137,138,139,140,141,142,143,
301     144,145,146,147,148,149,150,151,
302     152,153,154,155,156,157,158,159,
303     160,161,162,163,164,165,166,167,
304     168,169,170,171,172,173,174,175,
305     176,177,178,179,180,181,182,183,
306     184,185,186,187,188,189,190,191,
307     192,193,194,195,196,197,198,199,
308     200,201,202,203,204,205,206,207,
309     208,209,210,211,212,213,214,215,
310     216,217,218,219,220,221,222,223,
311     224,225,226,227,228,229,230,231,
312     232,233,234,235,236,237,238,239,
313     240,241,242,243,244,245,246,247,
314     248,249,250,251,252,253,254,255,
315    
316     /* This table contains bit maps for various character classes. Each map is 32
317     bytes long and the bits run from the least significant end of each byte. The
318     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319     graph, print, punct, and cntrl. Other classes are built from combinations. */
320    
321     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325    
326     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371     /* This table identifies various classes of character by individual bits:
372     0x01 white space character
373     0x02 letter
374     0x04 decimal digit
375     0x08 hexadecimal digit
376     0x10 alphanumeric or '_'
377     0x80 regular expression metacharacter or binary zero
378     */
379    
380     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412    
413 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
414     be at least an approximation of ISO 8859. In particular, there are characters
415 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
416    
417     static const unsigned char tables1[] = {
418     0,1,2,3,4,5,6,7,
419     8,9,10,11,12,13,14,15,
420     16,17,18,19,20,21,22,23,
421     24,25,26,27,28,29,30,31,
422     32,33,34,35,36,37,38,39,
423     40,41,42,43,44,45,46,47,
424     48,49,50,51,52,53,54,55,
425     56,57,58,59,60,61,62,63,
426     64,97,98,99,100,101,102,103,
427     104,105,106,107,108,109,110,111,
428     112,113,114,115,116,117,118,119,
429     120,121,122,91,92,93,94,95,
430     96,97,98,99,100,101,102,103,
431     104,105,106,107,108,109,110,111,
432     112,113,114,115,116,117,118,119,
433     120,121,122,123,124,125,126,127,
434     128,129,130,131,132,133,134,135,
435     136,137,138,139,140,141,142,143,
436     144,145,146,147,148,149,150,151,
437     152,153,154,155,156,157,158,159,
438     160,161,162,163,164,165,166,167,
439     168,169,170,171,172,173,174,175,
440     176,177,178,179,180,181,182,183,
441     184,185,186,187,188,189,190,191,
442     224,225,226,227,228,229,230,231,
443     232,233,234,235,236,237,238,239,
444     240,241,242,243,244,245,246,215,
445     248,249,250,251,252,253,254,223,
446     224,225,226,227,228,229,230,231,
447     232,233,234,235,236,237,238,239,
448     240,241,242,243,244,245,246,247,
449     248,249,250,251,252,253,254,255,
450     0,1,2,3,4,5,6,7,
451     8,9,10,11,12,13,14,15,
452     16,17,18,19,20,21,22,23,
453     24,25,26,27,28,29,30,31,
454     32,33,34,35,36,37,38,39,
455     40,41,42,43,44,45,46,47,
456     48,49,50,51,52,53,54,55,
457     56,57,58,59,60,61,62,63,
458     64,97,98,99,100,101,102,103,
459     104,105,106,107,108,109,110,111,
460     112,113,114,115,116,117,118,119,
461     120,121,122,91,92,93,94,95,
462     96,65,66,67,68,69,70,71,
463     72,73,74,75,76,77,78,79,
464     80,81,82,83,84,85,86,87,
465     88,89,90,123,124,125,126,127,
466     128,129,130,131,132,133,134,135,
467     136,137,138,139,140,141,142,143,
468     144,145,146,147,148,149,150,151,
469     152,153,154,155,156,157,158,159,
470     160,161,162,163,164,165,166,167,
471     168,169,170,171,172,173,174,175,
472     176,177,178,179,180,181,182,183,
473     184,185,186,187,188,189,190,191,
474     224,225,226,227,228,229,230,231,
475     232,233,234,235,236,237,238,239,
476     240,241,242,243,244,245,246,215,
477     248,249,250,251,252,253,254,223,
478     192,193,194,195,196,197,198,199,
479     200,201,202,203,204,205,206,207,
480     208,209,210,211,212,213,214,247,
481     216,217,218,219,220,221,222,255,
482     0,62,0,0,1,0,0,0,
483     0,0,0,0,0,0,0,0,
484     32,0,0,0,1,0,0,0,
485     0,0,0,0,0,0,0,0,
486     0,0,0,0,0,0,255,3,
487     126,0,0,0,126,0,0,0,
488     0,0,0,0,0,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,255,3,
491     0,0,0,0,0,0,0,0,
492     0,0,0,0,0,0,12,2,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     254,255,255,7,0,0,0,0,
496     0,0,0,0,0,0,0,0,
497     255,255,127,127,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     0,0,0,0,254,255,255,7,
500     0,0,0,0,0,4,32,4,
501     0,0,0,128,255,255,127,255,
502     0,0,0,0,0,0,255,3,
503     254,255,255,135,254,255,255,7,
504     0,0,0,0,0,4,44,6,
505     255,255,127,255,255,255,127,255,
506     0,0,0,0,254,255,255,255,
507     255,255,255,255,255,255,255,127,
508     0,0,0,0,254,255,255,255,
509     255,255,255,255,255,255,255,255,
510     0,2,0,0,255,255,255,255,
511     255,255,255,255,255,255,255,127,
512     0,0,0,0,255,255,255,255,
513     255,255,255,255,255,255,255,255,
514     0,0,0,0,254,255,0,252,
515     1,0,0,248,1,0,0,120,
516     0,0,0,0,254,255,255,255,
517     0,0,128,0,0,0,128,0,
518     255,255,255,255,0,0,0,0,
519     0,0,0,0,0,0,0,128,
520     255,255,255,255,0,0,0,0,
521     0,0,0,0,0,0,0,0,
522     128,0,0,0,0,0,0,0,
523     0,1,1,0,1,1,0,0,
524     0,0,0,0,0,0,0,0,
525     0,0,0,0,0,0,0,0,
526     1,0,0,0,128,0,0,0,
527     128,128,128,128,0,0,128,0,
528     28,28,28,28,28,28,28,28,
529     28,28,0,0,0,0,0,128,
530     0,26,26,26,26,26,26,18,
531     18,18,18,18,18,18,18,18,
532     18,18,18,18,18,18,18,18,
533     18,18,18,128,128,0,128,16,
534     0,26,26,26,26,26,26,18,
535     18,18,18,18,18,18,18,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,128,128,0,0,0,
538     0,0,0,0,0,1,0,0,
539     0,0,0,0,0,0,0,0,
540     0,0,0,0,0,0,0,0,
541     0,0,0,0,0,0,0,0,
542     1,0,0,0,0,0,0,0,
543     0,0,18,0,0,0,0,0,
544     0,0,20,20,0,18,0,0,
545     0,20,18,0,0,0,0,0,
546     18,18,18,18,18,18,18,18,
547     18,18,18,18,18,18,18,18,
548     18,18,18,18,18,18,18,0,
549     18,18,18,18,18,18,18,18,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,0,
553     18,18,18,18,18,18,18,18
554     };
555    
556    
557    
558 ph10 558
559     #ifndef HAVE_STRERROR
560 nigel 49 /*************************************************
561 ph10 558 * Provide strerror() for non-ANSI libraries *
562     *************************************************/
563    
564     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565     in their libraries, but can provide the same facility by this simple
566     alternative function. */
567    
568     extern int sys_nerr;
569     extern char *sys_errlist[];
570    
571     char *
572     strerror(int n)
573     {
574     if (n < 0 || n >= sys_nerr) return "unknown error number";
575     return sys_errlist[n];
576     }
577     #endif /* HAVE_STRERROR */
578    
579    
580    
581    
582     /*************************************************
583 nigel 91 * Read or extend an input line *
584     *************************************************/
585    
586     /* Input lines are read into buffer, but both patterns and data lines can be
587     continued over multiple input lines. In addition, if the buffer fills up, we
588     want to automatically expand it so as to be able to handle extremely large
589     lines that are needed for certain stress tests. When the input buffer is
590     expanded, the other two buffers must also be expanded likewise, and the
591     contents of pbuffer, which are a copy of the input for callouts, must be
592     preserved (for when expansion happens for a data line). This is not the most
593     optimal way of handling this, but hey, this is just a test program!
594    
595     Arguments:
596     f the file to read
597     start where in buffer to start (this *must* be within buffer)
598 ph10 287 prompt for stdin or readline()
599 nigel 91
600     Returns: pointer to the start of new data
601     could be a copy of start, or could be moved
602     NULL if no data read and EOF reached
603     */
604    
605     static uschar *
606 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 nigel 91 {
608     uschar *here = start;
609    
610     for (;;)
611     {
612 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
613 nigel 93
614 nigel 91 if (rlen > 1000)
615     {
616     int dlen;
617 ph10 289
618 ph10 287 /* If libreadline support is required, use readline() to read a line if the
619     input is a terminal. Note that readline() removes the trailing newline, so
620     we must put it back again, to be compatible with fgets(). */
621 ph10 289
622 ph10 287 #ifdef SUPPORT_LIBREADLINE
623     if (isatty(fileno(f)))
624     {
625 ph10 289 size_t len;
626 ph10 287 char *s = readline(prompt);
627     if (s == NULL) return (here == start)? NULL : start;
628     len = strlen(s);
629 ph10 289 if (len > 0) add_history(s);
630 ph10 287 if (len > rlen - 1) len = rlen - 1;
631     memcpy(here, s, len);
632     here[len] = '\n';
633 ph10 289 here[len+1] = 0;
634     free(s);
635 ph10 287 }
636 ph10 289 else
637     #endif
638    
639 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
640 ph10 289
641 ph10 287 {
642 ph10 516 if (f == stdin) printf("%s", prompt);
643 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
644     return (here == start)? NULL : start;
645 ph10 289 }
646    
647 nigel 91 dlen = (int)strlen((char *)here);
648     if (dlen > 0 && here[dlen - 1] == '\n') return start;
649     here += dlen;
650     }
651    
652     else
653     {
654     int new_buffer_size = 2*buffer_size;
655     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658    
659     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660     {
661     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662     exit(1);
663     }
664    
665     memcpy(new_buffer, buffer, buffer_size);
666     memcpy(new_pbuffer, pbuffer, buffer_size);
667    
668     buffer_size = new_buffer_size;
669    
670     start = new_buffer + (start - buffer);
671     here = new_buffer + (here - buffer);
672    
673     free(buffer);
674     free(dbuffer);
675     free(pbuffer);
676    
677     buffer = new_buffer;
678     dbuffer = new_dbuffer;
679     pbuffer = new_pbuffer;
680     }
681     }
682    
683     return NULL; /* Control never gets here */
684     }
685    
686    
687    
688    
689    
690    
691    
692     /*************************************************
693 nigel 63 * Read number from string *
694     *************************************************/
695    
696     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697     around with conditional compilation, just do the job by hand. It is only used
698 nigel 93 for unpicking arguments, so just keep it simple.
699 nigel 63
700     Arguments:
701     str string to be converted
702     endptr where to put the end pointer
703    
704     Returns: the unsigned long
705     */
706    
707     static int
708     get_value(unsigned char *str, unsigned char **endptr)
709     {
710     int result = 0;
711     while(*str != 0 && isspace(*str)) str++;
712     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713     *endptr = str;
714     return(result);
715     }
716    
717    
718    
719 nigel 49
720     /*************************************************
721     * Convert UTF-8 string to value *
722     *************************************************/
723    
724     /* This function takes one or more bytes that represents a UTF-8 character,
725     and returns the value of the character.
726    
727     Argument:
728 nigel 91 utf8bytes a pointer to the byte vector
729     vptr a pointer to an int to receive the value
730 nigel 49
731 nigel 91 Returns: > 0 => the number of bytes consumed
732     -6 to 0 => malformed UTF-8 character at offset = (-return)
733 nigel 49 */
734    
735 nigel 79 #if !defined NOUTF8
736    
737 nigel 67 static int
738 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
739 nigel 49 {
740 nigel 91 int c = *utf8bytes++;
741 nigel 49 int d = c;
742     int i, j, s;
743    
744     for (i = -1; i < 6; i++) /* i is number of additional bytes */
745     {
746     if ((d & 0x80) == 0) break;
747     d <<= 1;
748     }
749    
750     if (i == -1) { *vptr = c; return 1; } /* ascii character */
751     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752    
753     /* i now has a value in the range 1-5 */
754    
755 nigel 59 s = 6*i;
756 nigel 85 d = (c & utf8_table3[i]) << s;
757 nigel 49
758     for (j = 0; j < i; j++)
759     {
760 nigel 91 c = *utf8bytes++;
761 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
762 nigel 59 s -= 6;
763 nigel 49 d |= (c & 0x3f) << s;
764     }
765    
766     /* Check that encoding was the correct unique one */
767    
768 nigel 85 for (j = 0; j < utf8_table1_size; j++)
769     if (d <= utf8_table1[j]) break;
770 nigel 49 if (j != i) return -(i+1);
771    
772     /* Valid value */
773    
774     *vptr = d;
775     return i+1;
776     }
777    
778 nigel 79 #endif
779 nigel 49
780    
781 nigel 79
782 nigel 63 /*************************************************
783 nigel 85 * Convert character value to UTF-8 *
784     *************************************************/
785    
786     /* This function takes an integer value in the range 0 - 0x7fffffff
787     and encodes it as a UTF-8 character in 0 to 6 bytes.
788    
789     Arguments:
790     cvalue the character value
791 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
792 nigel 85
793     Returns: number of characters placed in the buffer
794     */
795    
796 nigel 93 #if !defined NOUTF8
797    
798 nigel 85 static int
799 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
800 nigel 85 {
801     register int i, j;
802     for (i = 0; i < utf8_table1_size; i++)
803     if (cvalue <= utf8_table1[i]) break;
804 nigel 91 utf8bytes += i;
805 nigel 85 for (j = i; j > 0; j--)
806     {
807 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 nigel 85 cvalue >>= 6;
809     }
810 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
811 nigel 85 return i + 1;
812     }
813    
814 nigel 93 #endif
815 nigel 85
816    
817 nigel 93
818 nigel 85 /*************************************************
819 nigel 63 * Print character string *
820     *************************************************/
821 nigel 49
822 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
823     mode. Yields number of characters printed. If handed a NULL file, just counts
824     chars without printing. */
825 nigel 49
826 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
827 nigel 3 {
828 nigel 85 int c = 0;
829 nigel 63 int yield = 0;
830 nigel 3
831 nigel 63 while (length-- > 0)
832 nigel 3 {
833 nigel 79 #if !defined NOUTF8
834 nigel 67 if (use_utf8)
835 nigel 63 {
836     int rc = utf82ord(p, &c);
837 nigel 3
838 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839     {
840     length -= rc - 1;
841     p += rc;
842 nigel 93 if (PRINTHEX(c))
843 nigel 63 {
844     if (f != NULL) fprintf(f, "%c", c);
845     yield++;
846     }
847     else
848     {
849 nigel 93 int n = 4;
850     if (f != NULL) fprintf(f, "\\x{%02x}", c);
851     yield += (n <= 0x000000ff)? 2 :
852     (n <= 0x00000fff)? 3 :
853     (n <= 0x0000ffff)? 4 :
854     (n <= 0x000fffff)? 5 : 6;
855 nigel 63 }
856     continue;
857     }
858     }
859 nigel 79 #endif
860 nigel 3
861 nigel 63 /* Not UTF-8, or malformed UTF-8 */
862    
863 nigel 93 c = *p++;
864     if (PRINTHEX(c))
865 nigel 3 {
866 nigel 63 if (f != NULL) fprintf(f, "%c", c);
867     yield++;
868 nigel 3 }
869 nigel 63 else
870 nigel 3 {
871 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
872     yield += 4;
873     }
874     }
875 nigel 3
876 nigel 63 return yield;
877     }
878 nigel 23
879 nigel 3
880 nigel 23
881 nigel 63 /*************************************************
882     * Callout function *
883     *************************************************/
884 nigel 3
885 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886     the match. Yield zero unless more callouts than the fail count, or the callout
887     data is not zero. */
888 nigel 3
889 nigel 63 static int callout(pcre_callout_block *cb)
890     {
891     FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 nigel 75 int i, pre_start, post_start, subject_length;
893 nigel 3
894 nigel 63 if (callout_extra)
895     {
896     fprintf(f, "Callout %d: last capture = %d\n",
897     cb->callout_number, cb->capture_last);
898 nigel 3
899 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
900     {
901     if (cb->offset_vector[i] < 0)
902     fprintf(f, "%2d: <unset>\n", i/2);
903     else
904     {
905     fprintf(f, "%2d: ", i/2);
906     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907     cb->offset_vector[i+1] - cb->offset_vector[i], f);
908     fprintf(f, "\n");
909     }
910     }
911     }
912 nigel 3
913 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
914     datails. On subsequent calls in the same match, we use pchars just to find the
915     printed lengths of the substrings. */
916 nigel 3
917 nigel 63 if (f != NULL) fprintf(f, "--->");
918 nigel 3
919 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921     cb->current_position - cb->start_match, f);
922 nigel 3
923 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924    
925 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926     cb->subject_length - cb->current_position, f);
927 nigel 3
928 nigel 63 if (f != NULL) fprintf(f, "\n");
929 nigel 9
930 nigel 63 /* Always print appropriate indicators, with callout number if not already
931 nigel 75 shown. For automatic callouts, show the pattern offset. */
932 nigel 3
933 nigel 75 if (cb->callout_number == 255)
934     {
935     fprintf(outfile, "%+3d ", cb->pattern_position);
936     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937     }
938     else
939     {
940     if (callout_extra) fprintf(outfile, " ");
941     else fprintf(outfile, "%3d ", cb->callout_number);
942     }
943 nigel 3
944 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945     fprintf(outfile, "^");
946 nigel 3
947 nigel 63 if (post_start > 0)
948     {
949     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950     fprintf(outfile, "^");
951 nigel 3 }
952    
953 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954     fprintf(outfile, " ");
955    
956     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957     pbuffer + cb->pattern_position);
958    
959 nigel 63 fprintf(outfile, "\n");
960     first_callout = 0;
961 nigel 3
962 nigel 71 if (cb->callout_data != NULL)
963 nigel 49 {
964 nigel 71 int callout_data = *((int *)(cb->callout_data));
965     if (callout_data != 0)
966     {
967     fprintf(outfile, "Callout data = %d\n", callout_data);
968     return callout_data;
969     }
970 nigel 63 }
971 nigel 49
972 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
973     (++callout_count >= callout_fail_count)? 1 : 0;
974 nigel 3 }
975    
976    
977 nigel 63 /*************************************************
978 nigel 73 * Local malloc functions *
979 nigel 63 *************************************************/
980 nigel 3
981     /* Alternative malloc function, to test functionality and show the size of the
982     compiled re. */
983    
984     static void *new_malloc(size_t size)
985     {
986 nigel 73 void *block = malloc(size);
987 nigel 43 gotten_store = size;
988 nigel 73 if (show_malloc)
989 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 nigel 73 return block;
991 nigel 3 }
992    
993 nigel 73 static void new_free(void *block)
994     {
995     if (show_malloc)
996     fprintf(outfile, "free %p\n", block);
997     free(block);
998     }
999 nigel 3
1000    
1001 nigel 73 /* For recursion malloc/free, to test stacking calls */
1002    
1003     static void *stack_malloc(size_t size)
1004     {
1005     void *block = malloc(size);
1006     if (show_malloc)
1007 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 nigel 73 return block;
1009     }
1010    
1011     static void stack_free(void *block)
1012     {
1013     if (show_malloc)
1014     fprintf(outfile, "stack_free %p\n", block);
1015     free(block);
1016     }
1017    
1018    
1019 nigel 63 /*************************************************
1020     * Call pcre_fullinfo() *
1021     *************************************************/
1022 nigel 43
1023     /* Get one piece of information from the pcre_fullinfo() function */
1024    
1025     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026     {
1027     int rc;
1028     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030     }
1031    
1032    
1033    
1034 nigel 63 /*************************************************
1035 nigel 75 * Byte flipping function *
1036     *************************************************/
1037    
1038 nigel 91 static unsigned long int
1039     byteflip(unsigned long int value, int n)
1040 nigel 75 {
1041     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042     return ((value & 0x000000ff) << 24) |
1043     ((value & 0x0000ff00) << 8) |
1044     ((value & 0x00ff0000) >> 8) |
1045     ((value & 0xff000000) >> 24);
1046     }
1047    
1048    
1049    
1050    
1051     /*************************************************
1052 nigel 87 * Check match or recursion limit *
1053     *************************************************/
1054    
1055     static int
1056     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057     int start_offset, int options, int *use_offsets, int use_size_offsets,
1058     int flag, unsigned long int *limit, int errnumber, const char *msg)
1059     {
1060     int count;
1061     int min = 0;
1062     int mid = 64;
1063     int max = -1;
1064    
1065     extra->flags |= flag;
1066    
1067     for (;;)
1068     {
1069     *limit = mid;
1070    
1071     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072     use_offsets, use_size_offsets);
1073    
1074     if (count == errnumber)
1075     {
1076     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077     min = mid;
1078     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079     }
1080    
1081     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082     count == PCRE_ERROR_PARTIAL)
1083     {
1084     if (mid == min + 1)
1085     {
1086     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087     break;
1088     }
1089     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090     max = mid;
1091     mid = (min + mid)/2;
1092     }
1093     else break; /* Some other error */
1094     }
1095    
1096     extra->flags &= ~flag;
1097     return count;
1098     }
1099    
1100    
1101    
1102     /*************************************************
1103 ph10 227 * Case-independent strncmp() function *
1104     *************************************************/
1105    
1106     /*
1107     Arguments:
1108     s first string
1109     t second string
1110     n number of characters to compare
1111    
1112     Returns: < 0, = 0, or > 0, according to the comparison
1113     */
1114    
1115     static int
1116     strncmpic(uschar *s, uschar *t, int n)
1117     {
1118     while (n--)
1119     {
1120     int c = tolower(*s++) - tolower(*t++);
1121     if (c) return c;
1122     }
1123     return 0;
1124     }
1125    
1126    
1127    
1128     /*************************************************
1129 nigel 91 * Check newline indicator *
1130     *************************************************/
1131    
1132 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133     a message and return 0 if there is no match.
1134 nigel 91
1135     Arguments:
1136     p points after the leading '<'
1137     f file for error message
1138    
1139     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140     */
1141    
1142     static int
1143     check_newline(uschar *p, FILE *f)
1144     {
1145 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1153     return 0;
1154     }
1155    
1156    
1157    
1158     /*************************************************
1159 nigel 93 * Usage function *
1160     *************************************************/
1161    
1162     static void
1163     usage(void)
1164     {
1165 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166     printf("Input and output default to stdin and stdout.\n");
1167     #ifdef SUPPORT_LIBREADLINE
1168     printf("If input is a terminal, readline() is used to read from it.\n");
1169     #else
1170     printf("This version of pcretest is not linked with readline().\n");
1171     #endif
1172     printf("\nOptions:\n");
1173 nigel 93 printf(" -b show compiled code (bytecode)\n");
1174     printf(" -C show PCRE compile-time options and exit\n");
1175     printf(" -d debug: show compiled code and information (-b and -i)\n");
1176     #if !defined NODFA
1177     printf(" -dfa force DFA matching for all subjects\n");
1178     #endif
1179     printf(" -help show usage information\n");
1180     printf(" -i show information about compiled patterns\n"
1181 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1182 nigel 93 " -m output memory used information\n"
1183     " -o <n> set size of offsets vector to <n>\n");
1184     #if !defined NOPOSIX
1185     printf(" -p use POSIX interface\n");
1186     #endif
1187     printf(" -q quiet: do not output PCRE version number at start\n");
1188     printf(" -S <n> set stack size to <n> megabytes\n");
1189 ph10 606 printf(" -s force each pattern to be studied\n"
1190 nigel 93 " -t time compilation and execution\n");
1191     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192     printf(" -tm time execution (matching) only\n");
1193     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194     }
1195    
1196    
1197    
1198     /*************************************************
1199 nigel 63 * Main Program *
1200     *************************************************/
1201 nigel 43
1202 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1203     consist of a regular expression, in delimiters and optionally followed by
1204     options, followed by a set of test data, terminated by an empty line. */
1205    
1206     int main(int argc, char **argv)
1207     {
1208     FILE *infile = stdin;
1209     int options = 0;
1210     int study_options = 0;
1211 ph10 386 int default_find_match_limit = FALSE;
1212 nigel 3 int op = 1;
1213     int timeit = 0;
1214 nigel 93 int timeitm = 0;
1215 nigel 3 int showinfo = 0;
1216 nigel 31 int showstore = 0;
1217 ph10 606 int force_study = 0;
1218 nigel 87 int quiet = 0;
1219 nigel 53 int size_offsets = 45;
1220     int size_offsets_max;
1221 nigel 77 int *offsets = NULL;
1222 nigel 53 #if !defined NOPOSIX
1223 nigel 3 int posix = 0;
1224 nigel 53 #endif
1225 nigel 3 int debug = 0;
1226 nigel 11 int done = 0;
1227 nigel 77 int all_use_dfa = 0;
1228     int yield = 0;
1229 nigel 91 int stack_size;
1230 nigel 3
1231 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1232     that 1024 is plenty long enough for the few names we'll be testing. */
1233 nigel 69
1234 nigel 91 uschar copynames[1024];
1235     uschar getnames[1024];
1236    
1237     uschar *copynamesptr;
1238     uschar *getnamesptr;
1239    
1240 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1241 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1242 nigel 69
1243 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1244     dbuffer = (unsigned char *)malloc(buffer_size);
1245     pbuffer = (unsigned char *)malloc(buffer_size);
1246 nigel 69
1247 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1248 nigel 3
1249 nigel 93 outfile = stdout;
1250    
1251     /* The following _setmode() stuff is some Windows magic that tells its runtime
1252     library to translate CRLF into a single LF character. At least, that's what
1253     I've been told: never having used Windows I take this all on trust. Originally
1254     it set 0x8000, but then I was advised that _O_BINARY was better. */
1255    
1256 nigel 75 #if defined(_WIN32) || defined(WIN32)
1257 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1258     #endif
1259 nigel 75
1260 nigel 3 /* Scan options */
1261    
1262     while (argc > 1 && argv[op][0] == '-')
1263     {
1264 nigel 63 unsigned char *endptr;
1265 nigel 53
1266 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267     else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1271     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1272 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1273 nigel 79 #if !defined NODFA
1274 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1275 nigel 79 #endif
1276 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1277 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1278     *endptr == 0))
1279 nigel 53 {
1280     op++;
1281     argc--;
1282     }
1283 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1284     {
1285     int both = argv[op][2] == 0;
1286     int temp;
1287     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1288     *endptr == 0))
1289     {
1290     timeitm = temp;
1291     op++;
1292     argc--;
1293     }
1294     else timeitm = LOOPREPEAT;
1295     if (both) timeit = timeitm;
1296     }
1297 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1298     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1299     *endptr == 0))
1300     {
1301 nigel 93 #if defined(_WIN32) || defined(WIN32)
1302 nigel 91 printf("PCRE: -S not supported on this OS\n");
1303     exit(1);
1304     #else
1305     int rc;
1306     struct rlimit rlim;
1307     getrlimit(RLIMIT_STACK, &rlim);
1308     rlim.rlim_cur = stack_size * 1024 * 1024;
1309     rc = setrlimit(RLIMIT_STACK, &rlim);
1310     if (rc != 0)
1311     {
1312     printf("PCRE: setrlimit() failed with error %d\n", rc);
1313     exit(1);
1314     }
1315     op++;
1316     argc--;
1317     #endif
1318     }
1319 nigel 53 #if !defined NOPOSIX
1320 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1321 nigel 53 #endif
1322 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1323     {
1324     int rc;
1325 ph10 392 unsigned long int lrc;
1326 nigel 63 printf("PCRE version %s\n", pcre_version());
1327     printf("Compiled with\n");
1328     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1329     printf(" %sUTF-8 support\n", rc? "" : "No ");
1330 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1331     printf(" %sUnicode properties support\n", rc? "" : "No ");
1332 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1333 ph10 391 /* Note that these values are always the ASCII values, even
1334 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1335 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1336     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1337 ph10 150 (rc == -2)? "ANYCRLF" :
1338 nigel 93 (rc == -1)? "ANY" : "???");
1339 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1340     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1341     "all Unicode newlines");
1342 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1343     printf(" Internal link size = %d\n", rc);
1344     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1345     printf(" POSIX malloc threshold = %d\n", rc);
1346 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1347     printf(" Default match limit = %ld\n", lrc);
1348     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1349     printf(" Default recursion depth limit = %ld\n", lrc);
1350 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1351     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1352 ph10 121 goto EXIT;
1353 nigel 63 }
1354 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1355     strcmp(argv[op], "--help") == 0)
1356     {
1357     usage();
1358     goto EXIT;
1359     }
1360 nigel 3 else
1361     {
1362 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1363 nigel 93 usage();
1364 nigel 77 yield = 1;
1365     goto EXIT;
1366 nigel 3 }
1367     op++;
1368     argc--;
1369     }
1370    
1371 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1372    
1373     size_offsets_max = size_offsets;
1374 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1375 nigel 53 if (offsets == NULL)
1376     {
1377     printf("** Failed to get %d bytes of memory for offsets vector\n",
1378 ph10 151 (int)(size_offsets_max * sizeof(int)));
1379 nigel 77 yield = 1;
1380     goto EXIT;
1381 nigel 53 }
1382    
1383 nigel 3 /* Sort out the input and output files */
1384    
1385     if (argc > 1)
1386     {
1387 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1388 nigel 3 if (infile == NULL)
1389     {
1390     printf("** Failed to open %s\n", argv[op]);
1391 nigel 77 yield = 1;
1392     goto EXIT;
1393 nigel 3 }
1394     }
1395    
1396     if (argc > 2)
1397     {
1398 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1399 nigel 3 if (outfile == NULL)
1400     {
1401     printf("** Failed to open %s\n", argv[op+1]);
1402 nigel 77 yield = 1;
1403     goto EXIT;
1404 nigel 3 }
1405     }
1406    
1407     /* Set alternative malloc function */
1408    
1409     pcre_malloc = new_malloc;
1410 nigel 73 pcre_free = new_free;
1411     pcre_stack_malloc = stack_malloc;
1412     pcre_stack_free = stack_free;
1413 nigel 3
1414 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1415 nigel 3
1416 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1417 nigel 3
1418     /* Main loop */
1419    
1420 nigel 11 while (!done)
1421 nigel 3 {
1422     pcre *re = NULL;
1423     pcre_extra *extra = NULL;
1424 nigel 37
1425     #if !defined NOPOSIX /* There are still compilers that require no indent */
1426 nigel 3 regex_t preg;
1427 nigel 45 int do_posix = 0;
1428 nigel 37 #endif
1429    
1430 nigel 7 const char *error;
1431 ph10 512 unsigned char *markptr;
1432 nigel 25 unsigned char *p, *pp, *ppp;
1433 nigel 75 unsigned char *to_file = NULL;
1434 nigel 53 const unsigned char *tables = NULL;
1435 nigel 75 unsigned long int true_size, true_study_size = 0;
1436     size_t size, regex_gotten_store;
1437 ph10 512 int do_mark = 0;
1438 nigel 3 int do_study = 0;
1439 nigel 25 int do_debug = debug;
1440 nigel 35 int do_G = 0;
1441     int do_g = 0;
1442 nigel 25 int do_showinfo = showinfo;
1443 nigel 35 int do_showrest = 0;
1444 nigel 75 int do_flip = 0;
1445 nigel 93 int erroroffset, len, delimiter, poffset;
1446 nigel 3
1447 nigel 67 use_utf8 = 0;
1448 ph10 211 debug_lengths = 1;
1449 nigel 63
1450 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1451 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1452 nigel 63 fflush(outfile);
1453 nigel 3
1454     p = buffer;
1455     while (isspace(*p)) p++;
1456     if (*p == 0) continue;
1457    
1458 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1459 nigel 3
1460 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1461     {
1462 nigel 91 unsigned long int magic, get_options;
1463 nigel 75 uschar sbuf[8];
1464     FILE *f;
1465    
1466     p++;
1467     pp = p + (int)strlen((char *)p);
1468     while (isspace(pp[-1])) pp--;
1469     *pp = 0;
1470    
1471     f = fopen((char *)p, "rb");
1472     if (f == NULL)
1473     {
1474     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1475     continue;
1476     }
1477    
1478     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1479    
1480     true_size =
1481     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1482     true_study_size =
1483     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1484    
1485     re = (real_pcre *)new_malloc(true_size);
1486     regex_gotten_store = gotten_store;
1487    
1488     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1489    
1490     magic = ((real_pcre *)re)->magic_number;
1491     if (magic != MAGIC_NUMBER)
1492     {
1493     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1494     {
1495     do_flip = 1;
1496     }
1497     else
1498     {
1499     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1500     fclose(f);
1501     continue;
1502     }
1503     }
1504    
1505     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1506     do_flip? " (byte-inverted)" : "", p);
1507    
1508     /* Need to know if UTF-8 for printing data strings */
1509    
1510 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1511     use_utf8 = (get_options & PCRE_UTF8) != 0;
1512 nigel 75
1513     /* Now see if there is any following study data */
1514    
1515     if (true_study_size != 0)
1516     {
1517     pcre_study_data *psd;
1518    
1519     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1520     extra->flags = PCRE_EXTRA_STUDY_DATA;
1521    
1522     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1523     extra->study_data = psd;
1524    
1525     if (fread(psd, 1, true_study_size, f) != true_study_size)
1526     {
1527     FAIL_READ:
1528     fprintf(outfile, "Failed to read data from %s\n", p);
1529     if (extra != NULL) new_free(extra);
1530     if (re != NULL) new_free(re);
1531     fclose(f);
1532     continue;
1533     }
1534     fprintf(outfile, "Study data loaded from %s\n", p);
1535     do_study = 1; /* To get the data output if requested */
1536     }
1537     else fprintf(outfile, "No study data\n");
1538    
1539     fclose(f);
1540     goto SHOW_INFO;
1541     }
1542    
1543     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1544     the pattern; if is isn't complete, read more. */
1545    
1546 nigel 3 delimiter = *p++;
1547    
1548 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1549 nigel 3 {
1550 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1551 nigel 3 goto SKIP_DATA;
1552     }
1553    
1554     pp = p;
1555 ph10 530 poffset = (int)(p - buffer);
1556 nigel 3
1557     for(;;)
1558     {
1559 nigel 29 while (*pp != 0)
1560     {
1561     if (*pp == '\\' && pp[1] != 0) pp++;
1562     else if (*pp == delimiter) break;
1563     pp++;
1564     }
1565 nigel 3 if (*pp != 0) break;
1566 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1567 nigel 3 {
1568     fprintf(outfile, "** Unexpected EOF\n");
1569 nigel 11 done = 1;
1570     goto CONTINUE;
1571 nigel 3 }
1572 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1573 nigel 3 }
1574    
1575 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1576     pointer to the correct relative point in the buffer. */
1577    
1578     p = buffer + poffset;
1579    
1580 nigel 29 /* If the first character after the delimiter is backslash, make
1581     the pattern end with backslash. This is purely to provide a way
1582     of testing for the error message when a pattern ends with backslash. */
1583    
1584     if (pp[1] == '\\') *pp++ = '\\';
1585    
1586 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1587     for callouts. */
1588 nigel 3
1589     *pp++ = 0;
1590 nigel 75 strcpy((char *)pbuffer, (char *)p);
1591 nigel 3
1592     /* Look for options after final delimiter */
1593    
1594     options = 0;
1595     study_options = 0;
1596 nigel 31 log_store = showstore; /* default from command line */
1597    
1598 nigel 3 while (*pp != 0)
1599     {
1600     switch (*pp++)
1601     {
1602 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1603 nigel 35 case 'g': do_g = 1; break;
1604 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1605     case 'm': options |= PCRE_MULTILINE; break;
1606     case 's': options |= PCRE_DOTALL; break;
1607     case 'x': options |= PCRE_EXTENDED; break;
1608 nigel 25
1609 nigel 35 case '+': do_showrest = 1; break;
1610 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1611 nigel 93 case 'B': do_debug = 1; break;
1612 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1613 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1614 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1615 nigel 75 case 'F': do_flip = 1; break;
1616 nigel 35 case 'G': do_G = 1; break;
1617 nigel 25 case 'I': do_showinfo = 1; break;
1618 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1619 ph10 512 case 'K': do_mark = 1; break;
1620 nigel 31 case 'M': log_store = 1; break;
1621 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1622 nigel 37
1623     #if !defined NOPOSIX
1624 nigel 3 case 'P': do_posix = 1; break;
1625 nigel 37 #endif
1626    
1627 nigel 3 case 'S': do_study = 1; break;
1628 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1629 ph10 535 case 'W': options |= PCRE_UCP; break;
1630 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1631 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1632 ph10 126 case 'Z': debug_lengths = 0; break;
1633 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1634 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1635 ph10 545
1636 ph10 541 case 'T':
1637     switch (*pp++)
1638     {
1639     case '0': tables = tables0; break;
1640     case '1': tables = tables1; break;
1641 ph10 545
1642 ph10 541 case '\r':
1643     case '\n':
1644 ph10 545 case ' ':
1645     case 0:
1646 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1647 ph10 545 goto SKIP_DATA;
1648    
1649     default:
1650 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1651 ph10 545 goto SKIP_DATA;
1652 ph10 541 }
1653 ph10 545 break;
1654 nigel 25
1655     case 'L':
1656     ppp = pp;
1657 nigel 93 /* The '\r' test here is so that it works on Windows. */
1658     /* The '0' test is just in case this is an unterminated line. */
1659     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1660 nigel 25 *ppp = 0;
1661     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1662     {
1663     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1664     goto SKIP_DATA;
1665     }
1666 nigel 93 locale_set = 1;
1667 nigel 25 tables = pcre_maketables();
1668     pp = ppp;
1669     break;
1670    
1671 nigel 75 case '>':
1672     to_file = pp;
1673     while (*pp != 0) pp++;
1674     while (isspace(pp[-1])) pp--;
1675     *pp = 0;
1676     break;
1677    
1678 nigel 91 case '<':
1679     {
1680 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1681 ph10 336 {
1682     options |= PCRE_JAVASCRIPT_COMPAT;
1683 ph10 345 pp += 3;
1684 ph10 336 }
1685     else
1686 ph10 345 {
1687 ph10 336 int x = check_newline(pp, outfile);
1688     if (x == 0) goto SKIP_DATA;
1689     options |= x;
1690     while (*pp++ != '>');
1691 ph10 345 }
1692 nigel 91 }
1693     break;
1694    
1695 nigel 77 case '\r': /* So that it works in Windows */
1696     case '\n':
1697     case ' ':
1698     break;
1699 nigel 75
1700 nigel 3 default:
1701     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1702     goto SKIP_DATA;
1703     }
1704     }
1705    
1706 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1707 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1708     local character tables. */
1709 nigel 3
1710 nigel 37 #if !defined NOPOSIX
1711 nigel 3 if (posix || do_posix)
1712     {
1713     int rc;
1714     int cflags = 0;
1715 nigel 75
1716 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1717     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1718 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1719 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1720     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1721 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1722 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1723 nigel 87
1724 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1725    
1726     /* Compilation failed; go back for another re, skipping to blank line
1727     if non-interactive. */
1728    
1729     if (rc != 0)
1730     {
1731 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1732 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1733     goto SKIP_DATA;
1734     }
1735     }
1736    
1737     /* Handle compiling via the native interface */
1738    
1739     else
1740 nigel 37 #endif /* !defined NOPOSIX */
1741    
1742 nigel 3 {
1743 ph10 412 unsigned long int get_options;
1744 ph10 416
1745 nigel 93 if (timeit > 0)
1746 nigel 3 {
1747     register int i;
1748     clock_t time_taken;
1749     clock_t start_time = clock();
1750 nigel 93 for (i = 0; i < timeit; i++)
1751 nigel 3 {
1752 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1753 nigel 3 if (re != NULL) free(re);
1754     }
1755     time_taken = clock() - start_time;
1756 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1757     (((double)time_taken * 1000.0) / (double)timeit) /
1758 nigel 63 (double)CLOCKS_PER_SEC);
1759 nigel 3 }
1760    
1761 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1762 nigel 3
1763     /* Compilation failed; go back for another re, skipping to blank line
1764     if non-interactive. */
1765    
1766     if (re == NULL)
1767     {
1768     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1769     SKIP_DATA:
1770     if (infile != stdin)
1771     {
1772     for (;;)
1773     {
1774 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1775 nigel 11 {
1776     done = 1;
1777     goto CONTINUE;
1778     }
1779 nigel 3 len = (int)strlen((char *)buffer);
1780     while (len > 0 && isspace(buffer[len-1])) len--;
1781     if (len == 0) break;
1782     }
1783     fprintf(outfile, "\n");
1784     }
1785 nigel 25 goto CONTINUE;
1786 nigel 3 }
1787 ph10 416
1788     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1789     within the regex; check for this so that we know how to process the data
1790 ph10 412 lines. */
1791 ph10 416
1792 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1793     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1794 nigel 3
1795 ph10 412 /* Print information if required. There are now two info-returning
1796     functions. The old one has a limited interface and returns only limited
1797     data. Check that it agrees with the newer one. */
1798 nigel 3
1799 nigel 63 if (log_store)
1800     fprintf(outfile, "Memory allocation (code space): %d\n",
1801     (int)(gotten_store -
1802     sizeof(real_pcre) -
1803     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1804    
1805 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1806     and remember the store that was got. */
1807    
1808     true_size = ((real_pcre *)re)->size;
1809     regex_gotten_store = gotten_store;
1810    
1811 ph10 606 /* If -s or /S was present, study the regexp to generate additional info to
1812 nigel 75 help with the matching. */
1813    
1814 ph10 606 if (do_study || force_study)
1815 nigel 75 {
1816 nigel 93 if (timeit > 0)
1817 nigel 75 {
1818     register int i;
1819     clock_t time_taken;
1820     clock_t start_time = clock();
1821 nigel 93 for (i = 0; i < timeit; i++)
1822 nigel 75 extra = pcre_study(re, study_options, &error);
1823     time_taken = clock() - start_time;
1824     if (extra != NULL) free(extra);
1825 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1826     (((double)time_taken * 1000.0) / (double)timeit) /
1827 nigel 75 (double)CLOCKS_PER_SEC);
1828     }
1829     extra = pcre_study(re, study_options, &error);
1830     if (error != NULL)
1831     fprintf(outfile, "Failed to study: %s\n", error);
1832     else if (extra != NULL)
1833     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1834     }
1835 ph10 512
1836 ph10 510 /* If /K was present, we set up for handling MARK data. */
1837 ph10 512
1838 ph10 510 if (do_mark)
1839     {
1840     if (extra == NULL)
1841     {
1842     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1843     extra->flags = 0;
1844     }
1845 ph10 512 extra->mark = &markptr;
1846 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1847 ph10 512 }
1848 nigel 75
1849     /* If the 'F' option was present, we flip the bytes of all the integer
1850     fields in the regex data block and the study block. This is to make it
1851     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1852     compiled on a different architecture. */
1853    
1854     if (do_flip)
1855     {
1856     real_pcre *rre = (real_pcre *)re;
1857 ph10 259 rre->magic_number =
1858 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1859 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1860     rre->options = byteflip(rre->options, sizeof(rre->options));
1861 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1862 ph10 259 rre->top_bracket =
1863 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1864 ph10 259 rre->top_backref =
1865 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1866 ph10 259 rre->first_byte =
1867 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1868 ph10 259 rre->req_byte =
1869 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1870     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1871 nigel 75 sizeof(rre->name_table_offset));
1872 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1873 nigel 75 sizeof(rre->name_entry_size));
1874 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1875 ph10 255 sizeof(rre->name_count));
1876 nigel 75
1877     if (extra != NULL)
1878     {
1879     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1880     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1881 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1882     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1883 nigel 75 }
1884     }
1885    
1886     /* Extract information from the compiled data if required */
1887    
1888     SHOW_INFO:
1889    
1890 nigel 93 if (do_debug)
1891     {
1892     fprintf(outfile, "------------------------------------------------------------------\n");
1893 ph10 116 pcre_printint(re, outfile, debug_lengths);
1894 nigel 93 }
1895 ph10 416
1896 ph10 412 /* We already have the options in get_options (see above) */
1897 nigel 93
1898 nigel 25 if (do_showinfo)
1899 nigel 3 {
1900 ph10 412 unsigned long int all_options;
1901 nigel 79 #if !defined NOINFOCHECK
1902 nigel 43 int old_first_char, old_options, old_count;
1903 nigel 79 #endif
1904 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1905 ph10 227 hascrorlf;
1906 nigel 63 int nameentrysize, namecount;
1907     const uschar *nametable;
1908 nigel 3
1909 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1910     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1911     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1912 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1913 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1914 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1915     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1916 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1917 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1918     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1919 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1920 nigel 43
1921 nigel 79 #if !defined NOINFOCHECK
1922 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1923 nigel 3 if (count < 0) fprintf(outfile,
1924 nigel 43 "Error %d from pcre_info()\n", count);
1925 nigel 3 else
1926     {
1927 nigel 43 if (old_count != count) fprintf(outfile,
1928     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1929     old_count);
1930 nigel 37
1931 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1932     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1933     first_char, old_first_char);
1934 nigel 37
1935 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1936     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1937     get_options, old_options);
1938 nigel 43 }
1939 nigel 79 #endif
1940 nigel 43
1941 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1942 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1943 nigel 77 (int)size, (int)regex_gotten_store);
1944 nigel 43
1945     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1946     if (backrefmax > 0)
1947     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1948 nigel 63
1949     if (namecount > 0)
1950     {
1951     fprintf(outfile, "Named capturing subpatterns:\n");
1952     while (namecount-- > 0)
1953     {
1954     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1955     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1956     GET2(nametable, 0));
1957     nametable += nameentrysize;
1958     }
1959     }
1960 ph10 172
1961 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1962 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1963 nigel 63
1964 nigel 75 all_options = ((real_pcre *)re)->options;
1965 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1966 nigel 75
1967 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1968 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1969 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1970     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1971     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1972     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1973 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1974 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1975 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1976     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1977 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1978     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1979     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1980 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1981 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1982 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1983 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1984 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1985 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1986 ph10 172
1987 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1988 nigel 43
1989 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1990 nigel 91 {
1991     case PCRE_NEWLINE_CR:
1992     fprintf(outfile, "Forced newline sequence: CR\n");
1993     break;
1994 nigel 43
1995 nigel 91 case PCRE_NEWLINE_LF:
1996     fprintf(outfile, "Forced newline sequence: LF\n");
1997     break;
1998    
1999     case PCRE_NEWLINE_CRLF:
2000     fprintf(outfile, "Forced newline sequence: CRLF\n");
2001     break;
2002    
2003 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2004     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2005     break;
2006    
2007 nigel 93 case PCRE_NEWLINE_ANY:
2008     fprintf(outfile, "Forced newline sequence: ANY\n");
2009     break;
2010    
2011 nigel 91 default:
2012     break;
2013     }
2014    
2015 nigel 43 if (first_char == -1)
2016     {
2017 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2018 nigel 43 }
2019     else if (first_char < 0)
2020     {
2021     fprintf(outfile, "No first char\n");
2022     }
2023     else
2024     {
2025 nigel 63 int ch = first_char & 255;
2026 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2027 nigel 63 "" : " (caseless)";
2028 nigel 93 if (PRINTHEX(ch))
2029 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2030 nigel 3 else
2031 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2032 nigel 43 }
2033 nigel 37
2034 nigel 43 if (need_char < 0)
2035     {
2036     fprintf(outfile, "No need char\n");
2037 nigel 3 }
2038 nigel 43 else
2039     {
2040 nigel 63 int ch = need_char & 255;
2041 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2042 nigel 63 "" : " (caseless)";
2043 nigel 93 if (PRINTHEX(ch))
2044 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2045 nigel 43 else
2046 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2047 nigel 43 }
2048 nigel 75
2049     /* Don't output study size; at present it is in any case a fixed
2050     value, but it varies, depending on the computer architecture, and
2051     so messes up the test suite. (And with the /F option, it might be
2052     flipped.) */
2053    
2054 ph10 606 if (do_study || force_study)
2055 nigel 75 {
2056     if (extra == NULL)
2057     fprintf(outfile, "Study returned NULL\n");
2058     else
2059     {
2060     uschar *start_bits = NULL;
2061 ph10 455 int minlength;
2062 ph10 461
2063 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2064 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2065    
2066 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2067     if (start_bits == NULL)
2068 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2069 nigel 75 else
2070     {
2071     int i;
2072     int c = 24;
2073     fprintf(outfile, "Starting byte set: ");
2074     for (i = 0; i < 256; i++)
2075     {
2076     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2077     {
2078     if (c > 75)
2079     {
2080     fprintf(outfile, "\n ");
2081     c = 2;
2082     }
2083 nigel 93 if (PRINTHEX(i) && i != ' ')
2084 nigel 75 {
2085     fprintf(outfile, "%c ", i);
2086     c += 2;
2087     }
2088     else
2089     {
2090     fprintf(outfile, "\\x%02x ", i);
2091     c += 5;
2092     }
2093     }
2094     }
2095     fprintf(outfile, "\n");
2096     }
2097     }
2098     }
2099 nigel 3 }
2100    
2101 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2102     that is all. The first 8 bytes of the file are the regex length and then
2103     the study length, in big-endian order. */
2104 nigel 3
2105 nigel 75 if (to_file != NULL)
2106 nigel 3 {
2107 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2108     if (f == NULL)
2109 nigel 3 {
2110 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2111 nigel 3 }
2112 nigel 75 else
2113     {
2114     uschar sbuf[8];
2115 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2116     sbuf[1] = (uschar)((true_size >> 16) & 255);
2117     sbuf[2] = (uschar)((true_size >> 8) & 255);
2118     sbuf[3] = (uschar)((true_size) & 255);
2119 ph10 259
2120 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2121     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2122     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2123     sbuf[7] = (uschar)((true_study_size) & 255);
2124 nigel 3
2125 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2126     fwrite(re, 1, true_size, f) < true_size)
2127     {
2128     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2129     }
2130 nigel 3 else
2131     {
2132 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2133     if (extra != NULL)
2134 nigel 3 {
2135 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2136     true_study_size)
2137 nigel 3 {
2138 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2139     strerror(errno));
2140 nigel 3 }
2141 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2142 nigel 93
2143 nigel 3 }
2144     }
2145 nigel 75 fclose(f);
2146 nigel 3 }
2147 nigel 77
2148     new_free(re);
2149     if (extra != NULL) new_free(extra);
2150 ph10 545 if (locale_set)
2151 ph10 541 {
2152     new_free((void *)tables);
2153     setlocale(LC_CTYPE, "C");
2154 ph10 545 locale_set = 0;
2155     }
2156 nigel 75 continue; /* With next regex */
2157 nigel 3 }
2158 nigel 75 } /* End of non-POSIX compile */
2159 nigel 3
2160     /* Read data lines and test them */
2161    
2162     for (;;)
2163     {
2164 nigel 87 uschar *q;
2165 ph10 147 uschar *bptr;
2166 nigel 57 int *use_offsets = offsets;
2167 nigel 53 int use_size_offsets = size_offsets;
2168 nigel 63 int callout_data = 0;
2169     int callout_data_set = 0;
2170 nigel 3 int count, c;
2171 nigel 29 int copystrings = 0;
2172 ph10 386 int find_match_limit = default_find_match_limit;
2173 nigel 29 int getstrings = 0;
2174     int getlist = 0;
2175 nigel 39 int gmatched = 0;
2176 nigel 35 int start_offset = 0;
2177 ph10 579 int start_offset_sign = 1;
2178 nigel 41 int g_notempty = 0;
2179 nigel 77 int use_dfa = 0;
2180 nigel 3
2181     options = 0;
2182    
2183 nigel 91 *copynames = 0;
2184     *getnames = 0;
2185    
2186     copynamesptr = copynames;
2187     getnamesptr = getnames;
2188    
2189 nigel 63 pcre_callout = callout;
2190     first_callout = 1;
2191     callout_extra = 0;
2192     callout_count = 0;
2193     callout_fail_count = 999999;
2194     callout_fail_id = -1;
2195 nigel 73 show_malloc = 0;
2196 nigel 63
2197 nigel 91 if (extra != NULL) extra->flags &=
2198     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2199    
2200     len = 0;
2201     for (;;)
2202 nigel 11 {
2203 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2204 nigel 91 {
2205 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2206     {
2207 ph10 545 fprintf(outfile, "\n");
2208 ph10 537 break;
2209 ph10 545 }
2210 nigel 91 done = 1;
2211     goto CONTINUE;
2212     }
2213     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2214     len = (int)strlen((char *)buffer);
2215     if (buffer[len-1] == '\n') break;
2216 nigel 11 }
2217 nigel 3
2218     while (len > 0 && isspace(buffer[len-1])) len--;
2219     buffer[len] = 0;
2220     if (len == 0) break;
2221    
2222     p = buffer;
2223     while (isspace(*p)) p++;
2224    
2225 ph10 147 bptr = q = dbuffer;
2226 nigel 3 while ((c = *p++) != 0)
2227     {
2228     int i = 0;
2229     int n = 0;
2230 nigel 63
2231 nigel 3 if (c == '\\') switch ((c = *p++))
2232     {
2233     case 'a': c = 7; break;
2234     case 'b': c = '\b'; break;
2235     case 'e': c = 27; break;
2236     case 'f': c = '\f'; break;
2237     case 'n': c = '\n'; break;
2238     case 'r': c = '\r'; break;
2239     case 't': c = '\t'; break;
2240     case 'v': c = '\v'; break;
2241    
2242     case '0': case '1': case '2': case '3':
2243     case '4': case '5': case '6': case '7':
2244     c -= '0';
2245     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2246     c = c * 8 + *p++ - '0';
2247 nigel 91
2248     #if !defined NOUTF8
2249     if (use_utf8 && c > 255)
2250     {
2251     unsigned char buff8[8];
2252     int ii, utn;
2253     utn = ord2utf8(c, buff8);
2254     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2255     c = buff8[ii]; /* Last byte */
2256     }
2257     #endif
2258 nigel 3 break;
2259    
2260     case 'x':
2261 nigel 49
2262     /* Handle \x{..} specially - new Perl thing for utf8 */
2263    
2264 nigel 79 #if !defined NOUTF8
2265 nigel 49 if (*p == '{')
2266     {
2267     unsigned char *pt = p;
2268     c = 0;
2269     while (isxdigit(*(++pt)))
2270     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2271     if (*pt == '}')
2272     {
2273 nigel 67 unsigned char buff8[8];
2274 nigel 49 int ii, utn;
2275 ph10 355 if (use_utf8)
2276 ph10 358 {
2277 ph10 355 utn = ord2utf8(c, buff8);
2278     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2279     c = buff8[ii]; /* Last byte */
2280     }
2281     else
2282     {
2283 ph10 358 if (c > 255)
2284 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2285     "UTF-8 mode is not enabled.\n"
2286     "** Truncation will probably give the wrong result.\n", c);
2287 ph10 358 }
2288 nigel 49 p = pt + 1;
2289     break;
2290     }
2291     /* Not correct form; fall through */
2292     }
2293 nigel 79 #endif
2294 nigel 49
2295     /* Ordinary \x */
2296    
2297 nigel 3 c = 0;
2298     while (i++ < 2 && isxdigit(*p))
2299     {
2300     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2301     p++;
2302     }
2303     break;
2304    
2305 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2306 nigel 3 p--;
2307     continue;
2308    
2309 nigel 75 case '>':
2310 ph10 579 if (*p == '-')
2311 ph10 567 {
2312     start_offset_sign = -1;
2313     p++;
2314 ph10 579 }
2315 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2316 ph10 579 start_offset *= start_offset_sign;
2317 nigel 75 continue;
2318    
2319 nigel 3 case 'A': /* Option setting */
2320     options |= PCRE_ANCHORED;
2321     continue;
2322    
2323     case 'B':
2324     options |= PCRE_NOTBOL;
2325     continue;
2326    
2327 nigel 29 case 'C':
2328 nigel 63 if (isdigit(*p)) /* Set copy string */
2329     {
2330     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2331     copystrings |= 1 << n;
2332     }
2333     else if (isalnum(*p))
2334     {
2335 nigel 91 uschar *npp = copynamesptr;
2336 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2337 nigel 91 *npp++ = 0;
2338 nigel 67 *npp = 0;
2339 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2340 nigel 63 if (n < 0)
2341 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2342     copynamesptr = npp;
2343 nigel 63 }
2344     else if (*p == '+')
2345     {
2346     callout_extra = 1;
2347     p++;
2348     }
2349     else if (*p == '-')
2350     {
2351     pcre_callout = NULL;
2352     p++;
2353     }
2354     else if (*p == '!')
2355     {
2356     callout_fail_id = 0;
2357     p++;
2358     while(isdigit(*p))
2359     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2360     callout_fail_count = 0;
2361     if (*p == '!')
2362     {
2363     p++;
2364     while(isdigit(*p))
2365     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2366     }
2367     }
2368     else if (*p == '*')
2369     {
2370     int sign = 1;
2371     callout_data = 0;
2372     if (*(++p) == '-') { sign = -1; p++; }
2373     while(isdigit(*p))
2374     callout_data = callout_data * 10 + *p++ - '0';
2375     callout_data *= sign;
2376     callout_data_set = 1;
2377     }
2378 nigel 29 continue;
2379    
2380 nigel 79 #if !defined NODFA
2381 nigel 77 case 'D':
2382 nigel 79 #if !defined NOPOSIX
2383 nigel 77 if (posix || do_posix)
2384     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2385     else
2386 nigel 79 #endif
2387 nigel 77 use_dfa = 1;
2388     continue;
2389 ph10 553 #endif
2390 nigel 77
2391 ph10 553 #if !defined NODFA
2392 nigel 77 case 'F':
2393     options |= PCRE_DFA_SHORTEST;
2394     continue;
2395 nigel 79 #endif
2396 nigel 77
2397 nigel 29 case 'G':
2398 nigel 63 if (isdigit(*p))
2399     {
2400     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2401     getstrings |= 1 << n;
2402     }
2403     else if (isalnum(*p))
2404     {
2405 nigel 91 uschar *npp = getnamesptr;
2406 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2407 nigel 91 *npp++ = 0;
2408 nigel 67 *npp = 0;
2409 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2410 nigel 63 if (n < 0)
2411 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2412     getnamesptr = npp;
2413 nigel 63 }
2414 nigel 29 continue;
2415    
2416     case 'L':
2417     getlist = 1;
2418     continue;
2419    
2420 nigel 63 case 'M':
2421     find_match_limit = 1;
2422     continue;
2423    
2424 nigel 37 case 'N':
2425 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2426     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2427 ph10 461 else
2428 ph10 442 options |= PCRE_NOTEMPTY;
2429 nigel 37 continue;
2430    
2431 nigel 3 case 'O':
2432     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2433 nigel 53 if (n > size_offsets_max)
2434     {
2435     size_offsets_max = n;
2436 nigel 57 free(offsets);
2437 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2438 nigel 53 if (offsets == NULL)
2439     {
2440     printf("** Failed to get %d bytes of memory for offsets vector\n",
2441 ph10 151 (int)(size_offsets_max * sizeof(int)));
2442 nigel 77 yield = 1;
2443     goto EXIT;
2444 nigel 53 }
2445     }
2446     use_size_offsets = n;
2447 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2448 nigel 3 continue;
2449    
2450 nigel 75 case 'P':
2451 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2452 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2453 nigel 75 continue;
2454    
2455 nigel 91 case 'Q':
2456     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2457     if (extra == NULL)
2458     {
2459     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2460     extra->flags = 0;
2461     }
2462     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2463     extra->match_limit_recursion = n;
2464     continue;
2465    
2466     case 'q':
2467     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2468     if (extra == NULL)
2469     {
2470     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2471     extra->flags = 0;
2472     }
2473     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2474     extra->match_limit = n;
2475     continue;
2476    
2477 nigel 79 #if !defined NODFA
2478 nigel 77 case 'R':
2479     options |= PCRE_DFA_RESTART;
2480     continue;
2481 nigel 79 #endif
2482 nigel 77
2483 nigel 73 case 'S':
2484     show_malloc = 1;
2485     continue;
2486 ph10 392
2487 ph10 389 case 'Y':
2488     options |= PCRE_NO_START_OPTIMIZE;
2489 ph10 392 continue;
2490 nigel 73
2491 nigel 3 case 'Z':
2492     options |= PCRE_NOTEOL;
2493     continue;
2494 nigel 71
2495     case '?':
2496     options |= PCRE_NO_UTF8_CHECK;
2497     continue;
2498 nigel 91
2499     case '<':
2500     {
2501     int x = check_newline(p, outfile);
2502     if (x == 0) goto NEXT_DATA;
2503     options |= x;
2504     while (*p++ != '>');
2505     }
2506     continue;
2507 nigel 3 }
2508 nigel 9 *q++ = c;
2509 nigel 3 }
2510 nigel 9 *q = 0;
2511 ph10 530 len = (int)(q - dbuffer);
2512 ph10 545
2513 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2514 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2515 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2516 ph10 371
2517 ph10 363 #if !defined NOPOSIX
2518     if (posix || do_posix)
2519     {
2520     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2521 ph10 371 bptr += buffer_size - len - 1;
2522 ph10 363 }
2523 ph10 371 else
2524     #endif
2525 ph10 363 {
2526     memmove(bptr + buffer_size - len, bptr, len);
2527 ph10 371 bptr += buffer_size - len;
2528     }
2529 nigel 3
2530 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2531     {
2532     printf("**Match limit not relevant for DFA matching: ignored\n");
2533     find_match_limit = 0;
2534     }
2535    
2536 nigel 3 /* Handle matching via the POSIX interface, which does not
2537 nigel 63 support timing or playing with the match limit or callout data. */
2538 nigel 3
2539 nigel 37 #if !defined NOPOSIX
2540 nigel 3 if (posix || do_posix)
2541     {
2542     int rc;
2543     int eflags = 0;
2544 nigel 63 regmatch_t *pmatch = NULL;
2545     if (use_size_offsets > 0)
2546 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2547 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2548     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2549 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2550 nigel 3
2551 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2552 nigel 3
2553     if (rc != 0)
2554     {
2555 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2556 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2557     }
2558 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2559     != 0)
2560     {
2561     fprintf(outfile, "Matched with REG_NOSUB\n");
2562     }
2563 nigel 3 else
2564     {
2565 nigel 7 size_t i;
2566 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2567 nigel 3 {
2568     if (pmatch[i].rm_so >= 0)
2569     {
2570 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2571 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2572     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2573 nigel 3 fprintf(outfile, "\n");
2574 nigel 35 if (i == 0 && do_showrest)
2575     {
2576     fprintf(outfile, " 0+ ");
2577 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2578     outfile);
2579 nigel 35 fprintf(outfile, "\n");
2580     }
2581 nigel 3 }
2582     }
2583     }
2584 nigel 53 free(pmatch);
2585 nigel 3 }
2586    
2587 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2588 nigel 3
2589 nigel 37 else
2590     #endif /* !defined NOPOSIX */
2591    
2592 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2593 nigel 3 {
2594 ph10 512 markptr = NULL;
2595    
2596 nigel 93 if (timeitm > 0)
2597 nigel 3 {
2598     register int i;
2599     clock_t time_taken;
2600     clock_t start_time = clock();
2601 nigel 77
2602 nigel 79 #if !defined NODFA
2603 nigel 77 if (all_use_dfa || use_dfa)
2604     {
2605     int workspace[1000];
2606 nigel 93 for (i = 0; i < timeitm; i++)
2607 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2608 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2609     sizeof(workspace)/sizeof(int));
2610     }
2611     else
2612 nigel 79 #endif
2613 nigel 77
2614 nigel 93 for (i = 0; i < timeitm; i++)
2615 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2616 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2617 nigel 77
2618 nigel 3 time_taken = clock() - start_time;
2619 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2620     (((double)time_taken * 1000.0) / (double)timeitm) /
2621 nigel 63 (double)CLOCKS_PER_SEC);
2622 nigel 3 }
2623    
2624 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2625 nigel 87 varying limits in order to find the minimum value for the match limit and
2626     for the recursion limit. */
2627 nigel 63
2628     if (find_match_limit)
2629     {
2630     if (extra == NULL)
2631     {
2632 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2633 nigel 63 extra->flags = 0;
2634     }
2635    
2636 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2637 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2638     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2639     PCRE_ERROR_MATCHLIMIT, "match()");
2640 nigel 63
2641 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2642     options|g_notempty, use_offsets, use_size_offsets,
2643     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2644     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2645 nigel 63 }
2646    
2647     /* If callout_data is set, use the interface with additional data */
2648    
2649     else if (callout_data_set)
2650     {
2651     if (extra == NULL)
2652     {
2653 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2654 nigel 63 extra->flags = 0;
2655     }
2656     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2657 nigel 71 extra->callout_data = &callout_data;
2658 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2659     options | g_notempty, use_offsets, use_size_offsets);
2660     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2661     }
2662    
2663     /* The normal case is just to do the match once, with the default
2664     value of match_limit. */
2665    
2666 nigel 79 #if !defined NODFA
2667 nigel 77 else if (all_use_dfa || use_dfa)
2668     {
2669     int workspace[1000];
2670 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2671 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2672     sizeof(workspace)/sizeof(int));
2673     if (count == 0)
2674     {
2675     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2676     count = use_size_offsets/2;
2677     }
2678     }
2679 nigel 79 #endif
2680 nigel 77
2681 nigel 75 else
2682     {
2683     count = pcre_exec(re, extra, (char *)bptr, len,
2684     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2685 nigel 77 if (count == 0)
2686     {
2687     fprintf(outfile, "Matched, but too many substrings\n");
2688     count = use_size_offsets/3;
2689     }
2690 nigel 75 }
2691 nigel 3
2692 nigel 39 /* Matched */
2693    
2694 nigel 3 if (count >= 0)
2695     {
2696 nigel 93 int i, maxcount;
2697    
2698     #if !defined NODFA
2699     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2700     #endif
2701     maxcount = use_size_offsets/3;
2702    
2703     /* This is a check against a lunatic return value. */
2704    
2705     if (count > maxcount)
2706     {
2707     fprintf(outfile,
2708     "** PCRE error: returned count %d is too big for offset size %d\n",
2709     count, use_size_offsets);
2710     count = use_size_offsets/3;
2711     if (do_g || do_G)
2712     {
2713     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2714     do_g = do_G = FALSE; /* Break g/G loop */
2715     }
2716     }
2717    
2718 nigel 29 for (i = 0; i < count * 2; i += 2)
2719 nigel 3 {
2720 nigel 57 if (use_offsets[i] < 0)
2721 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2722     else
2723     {
2724     fprintf(outfile, "%2d: ", i/2);
2725 nigel 63 (void)pchars(bptr + use_offsets[i],
2726     use_offsets[i+1] - use_offsets[i], outfile);
2727 nigel 3 fprintf(outfile, "\n");
2728 nigel 35 if (i == 0)
2729     {
2730     if (do_showrest)
2731     {
2732     fprintf(outfile, " 0+ ");
2733 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2734     outfile);
2735 nigel 35 fprintf(outfile, "\n");
2736     }
2737     }
2738 nigel 3 }
2739     }
2740 ph10 512
2741 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2742 nigel 29
2743     for (i = 0; i < 32; i++)
2744     {
2745     if ((copystrings & (1 << i)) != 0)
2746     {
2747 nigel 91 char copybuffer[256];
2748 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2749 nigel 37 i, copybuffer, sizeof(copybuffer));
2750 nigel 29 if (rc < 0)
2751     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2752     else
2753 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2754 nigel 29 }
2755     }
2756    
2757 nigel 91 for (copynamesptr = copynames;
2758     *copynamesptr != 0;
2759     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2760     {
2761     char copybuffer[256];
2762     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2763     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2764     if (rc < 0)
2765     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2766     else
2767     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2768     }
2769    
2770 nigel 29 for (i = 0; i < 32; i++)
2771     {
2772     if ((getstrings & (1 << i)) != 0)
2773     {
2774     const char *substring;
2775 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2776 nigel 29 i, &substring);
2777     if (rc < 0)
2778     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2779     else
2780     {
2781     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2782 nigel 49 pcre_free_substring(substring);
2783 nigel 29 }
2784     }
2785     }
2786    
2787 nigel 91 for (getnamesptr = getnames;
2788     *getnamesptr != 0;
2789     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2790     {
2791     const char *substring;
2792     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2793     count, (char *)getnamesptr, &substring);
2794     if (rc < 0)
2795     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2796     else
2797     {
2798     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2799     pcre_free_substring(substring);
2800     }
2801     }
2802    
2803 nigel 29 if (getlist)
2804     {
2805     const char **stringlist;
2806 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2807 nigel 29 &stringlist);
2808     if (rc < 0)
2809     fprintf(outfile, "get substring list failed %d\n", rc);
2810     else
2811     {
2812     for (i = 0; i < count; i++)
2813     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2814     if (stringlist[i] != NULL)
2815     fprintf(outfile, "string list not terminated by NULL\n");
2816 nigel 49 /* free((void *)stringlist); */
2817     pcre_free_substring_list(stringlist);
2818 nigel 29 }
2819     }
2820 nigel 39 }
2821 nigel 29
2822 nigel 75 /* There was a partial match */
2823    
2824     else if (count == PCRE_ERROR_PARTIAL)
2825     {
2826 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2827     else fprintf(outfile, "Partial match, mark=%s", markptr);
2828 ph10 426 if (use_size_offsets > 1)
2829     {
2830     fprintf(outfile, ": ");
2831     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2832 ph10 461 outfile);
2833     }
2834 nigel 77 fprintf(outfile, "\n");
2835 nigel 75 break; /* Out of the /g loop */
2836     }
2837    
2838 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2839 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2840     to advance the start offset, and continue. We won't be at the end of the
2841     string - that was checked before setting g_notempty.
2842 nigel 39
2843 ph10 566 Complication arises in the case when the newline convention is "any",
2844 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2845     terminated by CRLF, an advance of one character just passes the \r,
2846 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2847 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2848     newline setting in the pattern; if none was set, use pcre_config() to
2849 ph10 566 find the default.
2850 ph10 144
2851 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2852     character, not one byte. */
2853    
2854 nigel 3 else
2855     {
2856 nigel 41 if (g_notempty != 0)
2857 nigel 35 {
2858 nigel 73 int onechar = 1;
2859 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2860 nigel 57 use_offsets[0] = start_offset;
2861 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2862     {
2863     int d;
2864     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2865 ph10 391 /* Note that these values are always the ASCII ones, even in
2866     EBCDIC environments. CR = 13, NL = 10. */
2867     obits = (d == 13)? PCRE_NEWLINE_CR :
2868     (d == 10)? PCRE_NEWLINE_LF :
2869     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2870 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2871 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2872     }
2873 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2874 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2875 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2876 ph10 149 &&
2877 ph10 143 start_offset < len - 1 &&
2878     bptr[start_offset] == '\r' &&
2879     bptr[start_offset+1] == '\n')
2880 ph10 144 onechar++;
2881 ph10 143 else if (use_utf8)
2882 nigel 73 {
2883     while (start_offset + onechar < len)
2884     {
2885 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2886 ph10 579 onechar++;
2887 nigel 73 }
2888     }
2889     use_offsets[1] = start_offset + onechar;
2890 nigel 35 }
2891 nigel 41 else
2892     {
2893 ph10 598 switch(count)
2894     {
2895     case PCRE_ERROR_NOMATCH:
2896 ph10 512 if (gmatched == 0)
2897 ph10 510 {
2898     if (markptr == NULL) fprintf(outfile, "No match\n");
2899     else fprintf(outfile, "No match, mark = %s\n", markptr);
2900 ph10 512 }
2901 ph10 598 break;
2902    
2903     case PCRE_ERROR_BADUTF8:
2904     case PCRE_ERROR_SHORTUTF8:
2905     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2906     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2907     if (use_size_offsets >= 2)
2908     fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2909     use_offsets[1]);
2910     fprintf(outfile, "\n");
2911     break;
2912    
2913     default:
2914 ph10 604 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2915     fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2916     else
2917     fprintf(outfile, "Error %d (Unexpected value)\n", count);
2918 ph10 598 break;
2919 nigel 41 }
2920 ph10 598
2921 nigel 41 break; /* Out of the /g loop */
2922     }
2923 nigel 3 }
2924 nigel 35
2925 nigel 39 /* If not /g or /G we are done */
2926    
2927     if (!do_g && !do_G) break;
2928    
2929 nigel 41 /* If we have matched an empty string, first check to see if we are at
2930 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2931     Perl's /g options does. This turns out to be rather cunning. First we set
2932     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2933 nigel 47 same point. If this fails (picked up above) we advance to the next
2934 ph10 143 character. */
2935 ph10 142
2936 nigel 41 g_notempty = 0;
2937 ph10 142
2938 nigel 57 if (use_offsets[0] == use_offsets[1])
2939 nigel 41 {
2940 nigel 57 if (use_offsets[0] == len) break;
2941 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2942 nigel 41 }
2943 nigel 39
2944     /* For /g, update the start offset, leaving the rest alone */
2945    
2946 ph10 143 if (do_g) start_offset = use_offsets[1];
2947 nigel 39
2948     /* For /G, update the pointer and length */
2949    
2950     else
2951 nigel 35 {
2952 ph10 143 bptr += use_offsets[1];
2953     len -= use_offsets[1];
2954 nigel 35 }
2955 nigel 39 } /* End of loop for /g and /G */
2956 nigel 91
2957     NEXT_DATA: continue;
2958 nigel 39 } /* End of loop for data lines */
2959 nigel 3
2960 nigel 11 CONTINUE:
2961 nigel 37
2962     #if !defined NOPOSIX
2963 nigel 3 if (posix || do_posix) regfree(&preg);
2964 nigel 37 #endif
2965    
2966 nigel 77 if (re != NULL) new_free(re);
2967     if (extra != NULL) new_free(extra);
2968 ph10 541 if (locale_set)
2969 nigel 25 {
2970 nigel 77 new_free((void *)tables);
2971 nigel 25 setlocale(LC_CTYPE, "C");
2972 nigel 93 locale_set = 0;
2973 nigel 25 }
2974 nigel 3 }
2975    
2976 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2977 nigel 77
2978     EXIT:
2979    
2980     if (infile != NULL && infile != stdin) fclose(infile);
2981     if (outfile != NULL && outfile != stdout) fclose(outfile);
2982    
2983     free(buffer);
2984     free(dbuffer);
2985     free(pbuffer);
2986     free(offsets);
2987    
2988     return yield;
2989 nigel 3 }
2990    
2991 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12