/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 626 - (hide annotations) (download)
Wed Jul 20 17:51:54 2011 UTC (3 years ago) by ph10
File MIME type: text/plain
File size: 91166 byte(s)
Add the /= modifier to pcretest so as to be able to check unset capturing 
parentheses at the ends of patterns.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 nigel 85 #define _pcre_utf8_table1 utf8_table1
116     #define _pcre_utf8_table1_size utf8_table1_size
117     #define _pcre_utf8_table2 utf8_table2
118     #define _pcre_utf8_table3 utf8_table3
119     #define _pcre_utf8_table4 utf8_table4
120     #define _pcre_utt utt
121     #define _pcre_utt_size utt_size
122 ph10 240 #define _pcre_utt_names utt_names
123 nigel 85 #define _pcre_OP_lengths OP_lengths
124    
125     #include "pcre_tables.c"
126    
127     /* We also need the pcre_printint() function for printing out compiled
128     patterns. This function is in a separate file so that it can be included in
129 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 ph10 498 know which case is being compiled. */
131 nigel 85
132 ph10 498 #define COMPILING_PCRETEST
133     #include "pcre_printint.src"
134    
135     /* The definition of the macro PRINTABLE, which determines whether to print an
136 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
137 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
138     locale has not been explicitly changed, so as to get consistent output from
139     systems that differ in their output from isprint() even in the "C" locale. */
140 nigel 93
141     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142 nigel 85
143 nigel 37 /* It is possible to compile this test program without including support for
144     testing the POSIX interface, though this is not available via the standard
145     Makefile. */
146    
147     #if !defined NOPOSIX
148 nigel 3 #include "pcreposix.h"
149 nigel 37 #endif
150 nigel 3
151 ph10 107 /* It is also possible, for the benefit of the version currently imported into
152     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153     interface to the DFA matcher (NODFA), and without the doublecheck of the old
154     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155     UTF8 support if PCRE is built without it. */
156 nigel 79
157 ph10 107 #ifndef SUPPORT_UTF8
158     #ifndef NOUTF8
159     #define NOUTF8
160     #endif
161     #endif
162 nigel 79
163 ph10 107
164 nigel 85 /* Other parameters */
165    
166 nigel 3 #ifndef CLOCKS_PER_SEC
167     #ifdef CLK_TCK
168     #define CLOCKS_PER_SEC CLK_TCK
169     #else
170     #define CLOCKS_PER_SEC 100
171     #endif
172     #endif
173    
174 nigel 93 /* This is the default loop count for timing. */
175    
176 nigel 75 #define LOOPREPEAT 500000
177 nigel 3
178 nigel 85 /* Static variables */
179    
180 nigel 3 static FILE *outfile;
181     static int log_store = 0;
182 nigel 63 static int callout_count;
183     static int callout_extra;
184     static int callout_fail_count;
185     static int callout_fail_id;
186 ph10 210 static int debug_lengths;
187 nigel 63 static int first_callout;
188 nigel 93 static int locale_set = 0;
189 nigel 73 static int show_malloc;
190 nigel 67 static int use_utf8;
191 nigel 43 static size_t gotten_store;
192 nigel 3
193 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
194    
195     static int buffer_size = 50000;
196     static uschar *buffer = NULL;
197     static uschar *dbuffer = NULL;
198 nigel 75 static uschar *pbuffer = NULL;
199 nigel 3
200 ph10 598 /* Textual explanations for runtime error codes */
201 nigel 75
202 ph10 598 static const char *errtexts[] = {
203     NULL, /* 0 is no error */
204     NULL, /* NOMATCH is handled specially */
205     "NULL argument passed",
206     "bad option value",
207     "magic number missing",
208     "unknown opcode - pattern overwritten?",
209     "no more memory",
210     NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211     "match limit exceeded",
212     "callout error code",
213     NULL, /* BADUTF8 is handled specially */
214     "bad UTF-8 offset",
215     NULL, /* PARTIAL is handled specially */
216     "not used - internal error",
217     "internal error - pattern overwritten?",
218     "bad count value",
219     "item unsupported for DFA matching",
220     "backreference condition or recursion test not supported for DFA matching",
221     "match limit not supported for DFA matching",
222     "workspace size exceeded in DFA matching",
223     "too much recursion for DFA matching",
224     "recursion limit exceeded",
225     "not used - internal error",
226     "invalid combination of newline options",
227     "bad offset value",
228     NULL /* SHORTUTF8 is handled specially */
229     };
230    
231    
232 ph10 541 /*************************************************
233     * Alternate character tables *
234     *************************************************/
235 nigel 49
236 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237     using the default tables of the library. However, the T option can be used to
238     select alternate sets of tables, for different kinds of testing. Note also that
239 ph10 541 the L (locale) option also adjusts the tables. */
240    
241 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
242 ph10 541 only ASCII characters. */
243    
244     static const unsigned char tables0[] = {
245    
246     /* This table is a lower casing table. */
247    
248     0, 1, 2, 3, 4, 5, 6, 7,
249     8, 9, 10, 11, 12, 13, 14, 15,
250     16, 17, 18, 19, 20, 21, 22, 23,
251     24, 25, 26, 27, 28, 29, 30, 31,
252     32, 33, 34, 35, 36, 37, 38, 39,
253     40, 41, 42, 43, 44, 45, 46, 47,
254     48, 49, 50, 51, 52, 53, 54, 55,
255     56, 57, 58, 59, 60, 61, 62, 63,
256     64, 97, 98, 99,100,101,102,103,
257     104,105,106,107,108,109,110,111,
258     112,113,114,115,116,117,118,119,
259     120,121,122, 91, 92, 93, 94, 95,
260     96, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122,123,124,125,126,127,
264     128,129,130,131,132,133,134,135,
265     136,137,138,139,140,141,142,143,
266     144,145,146,147,148,149,150,151,
267     152,153,154,155,156,157,158,159,
268     160,161,162,163,164,165,166,167,
269     168,169,170,171,172,173,174,175,
270     176,177,178,179,180,181,182,183,
271     184,185,186,187,188,189,190,191,
272     192,193,194,195,196,197,198,199,
273     200,201,202,203,204,205,206,207,
274     208,209,210,211,212,213,214,215,
275     216,217,218,219,220,221,222,223,
276     224,225,226,227,228,229,230,231,
277     232,233,234,235,236,237,238,239,
278     240,241,242,243,244,245,246,247,
279     248,249,250,251,252,253,254,255,
280    
281     /* This table is a case flipping table. */
282    
283     0, 1, 2, 3, 4, 5, 6, 7,
284     8, 9, 10, 11, 12, 13, 14, 15,
285     16, 17, 18, 19, 20, 21, 22, 23,
286     24, 25, 26, 27, 28, 29, 30, 31,
287     32, 33, 34, 35, 36, 37, 38, 39,
288     40, 41, 42, 43, 44, 45, 46, 47,
289     48, 49, 50, 51, 52, 53, 54, 55,
290     56, 57, 58, 59, 60, 61, 62, 63,
291     64, 97, 98, 99,100,101,102,103,
292     104,105,106,107,108,109,110,111,
293     112,113,114,115,116,117,118,119,
294     120,121,122, 91, 92, 93, 94, 95,
295     96, 65, 66, 67, 68, 69, 70, 71,
296     72, 73, 74, 75, 76, 77, 78, 79,
297     80, 81, 82, 83, 84, 85, 86, 87,
298     88, 89, 90,123,124,125,126,127,
299     128,129,130,131,132,133,134,135,
300     136,137,138,139,140,141,142,143,
301     144,145,146,147,148,149,150,151,
302     152,153,154,155,156,157,158,159,
303     160,161,162,163,164,165,166,167,
304     168,169,170,171,172,173,174,175,
305     176,177,178,179,180,181,182,183,
306     184,185,186,187,188,189,190,191,
307     192,193,194,195,196,197,198,199,
308     200,201,202,203,204,205,206,207,
309     208,209,210,211,212,213,214,215,
310     216,217,218,219,220,221,222,223,
311     224,225,226,227,228,229,230,231,
312     232,233,234,235,236,237,238,239,
313     240,241,242,243,244,245,246,247,
314     248,249,250,251,252,253,254,255,
315    
316     /* This table contains bit maps for various character classes. Each map is 32
317     bytes long and the bits run from the least significant end of each byte. The
318     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319     graph, print, punct, and cntrl. Other classes are built from combinations. */
320    
321     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325    
326     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371     /* This table identifies various classes of character by individual bits:
372     0x01 white space character
373     0x02 letter
374     0x04 decimal digit
375     0x08 hexadecimal digit
376     0x10 alphanumeric or '_'
377     0x80 regular expression metacharacter or binary zero
378     */
379    
380     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412    
413 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
414     be at least an approximation of ISO 8859. In particular, there are characters
415 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
416    
417     static const unsigned char tables1[] = {
418     0,1,2,3,4,5,6,7,
419     8,9,10,11,12,13,14,15,
420     16,17,18,19,20,21,22,23,
421     24,25,26,27,28,29,30,31,
422     32,33,34,35,36,37,38,39,
423     40,41,42,43,44,45,46,47,
424     48,49,50,51,52,53,54,55,
425     56,57,58,59,60,61,62,63,
426     64,97,98,99,100,101,102,103,
427     104,105,106,107,108,109,110,111,
428     112,113,114,115,116,117,118,119,
429     120,121,122,91,92,93,94,95,
430     96,97,98,99,100,101,102,103,
431     104,105,106,107,108,109,110,111,
432     112,113,114,115,116,117,118,119,
433     120,121,122,123,124,125,126,127,
434     128,129,130,131,132,133,134,135,
435     136,137,138,139,140,141,142,143,
436     144,145,146,147,148,149,150,151,
437     152,153,154,155,156,157,158,159,
438     160,161,162,163,164,165,166,167,
439     168,169,170,171,172,173,174,175,
440     176,177,178,179,180,181,182,183,
441     184,185,186,187,188,189,190,191,
442     224,225,226,227,228,229,230,231,
443     232,233,234,235,236,237,238,239,
444     240,241,242,243,244,245,246,215,
445     248,249,250,251,252,253,254,223,
446     224,225,226,227,228,229,230,231,
447     232,233,234,235,236,237,238,239,
448     240,241,242,243,244,245,246,247,
449     248,249,250,251,252,253,254,255,
450     0,1,2,3,4,5,6,7,
451     8,9,10,11,12,13,14,15,
452     16,17,18,19,20,21,22,23,
453     24,25,26,27,28,29,30,31,
454     32,33,34,35,36,37,38,39,
455     40,41,42,43,44,45,46,47,
456     48,49,50,51,52,53,54,55,
457     56,57,58,59,60,61,62,63,
458     64,97,98,99,100,101,102,103,
459     104,105,106,107,108,109,110,111,
460     112,113,114,115,116,117,118,119,
461     120,121,122,91,92,93,94,95,
462     96,65,66,67,68,69,70,71,
463     72,73,74,75,76,77,78,79,
464     80,81,82,83,84,85,86,87,
465     88,89,90,123,124,125,126,127,
466     128,129,130,131,132,133,134,135,
467     136,137,138,139,140,141,142,143,
468     144,145,146,147,148,149,150,151,
469     152,153,154,155,156,157,158,159,
470     160,161,162,163,164,165,166,167,
471     168,169,170,171,172,173,174,175,
472     176,177,178,179,180,181,182,183,
473     184,185,186,187,188,189,190,191,
474     224,225,226,227,228,229,230,231,
475     232,233,234,235,236,237,238,239,
476     240,241,242,243,244,245,246,215,
477     248,249,250,251,252,253,254,223,
478     192,193,194,195,196,197,198,199,
479     200,201,202,203,204,205,206,207,
480     208,209,210,211,212,213,214,247,
481     216,217,218,219,220,221,222,255,
482     0,62,0,0,1,0,0,0,
483     0,0,0,0,0,0,0,0,
484     32,0,0,0,1,0,0,0,
485     0,0,0,0,0,0,0,0,
486     0,0,0,0,0,0,255,3,
487     126,0,0,0,126,0,0,0,
488     0,0,0,0,0,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,255,3,
491     0,0,0,0,0,0,0,0,
492     0,0,0,0,0,0,12,2,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     254,255,255,7,0,0,0,0,
496     0,0,0,0,0,0,0,0,
497     255,255,127,127,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     0,0,0,0,254,255,255,7,
500     0,0,0,0,0,4,32,4,
501     0,0,0,128,255,255,127,255,
502     0,0,0,0,0,0,255,3,
503     254,255,255,135,254,255,255,7,
504     0,0,0,0,0,4,44,6,
505     255,255,127,255,255,255,127,255,
506     0,0,0,0,254,255,255,255,
507     255,255,255,255,255,255,255,127,
508     0,0,0,0,254,255,255,255,
509     255,255,255,255,255,255,255,255,
510     0,2,0,0,255,255,255,255,
511     255,255,255,255,255,255,255,127,
512     0,0,0,0,255,255,255,255,
513     255,255,255,255,255,255,255,255,
514     0,0,0,0,254,255,0,252,
515     1,0,0,248,1,0,0,120,
516     0,0,0,0,254,255,255,255,
517     0,0,128,0,0,0,128,0,
518     255,255,255,255,0,0,0,0,
519     0,0,0,0,0,0,0,128,
520     255,255,255,255,0,0,0,0,
521     0,0,0,0,0,0,0,0,
522     128,0,0,0,0,0,0,0,
523     0,1,1,0,1,1,0,0,
524     0,0,0,0,0,0,0,0,
525     0,0,0,0,0,0,0,0,
526     1,0,0,0,128,0,0,0,
527     128,128,128,128,0,0,128,0,
528     28,28,28,28,28,28,28,28,
529     28,28,0,0,0,0,0,128,
530     0,26,26,26,26,26,26,18,
531     18,18,18,18,18,18,18,18,
532     18,18,18,18,18,18,18,18,
533     18,18,18,128,128,0,128,16,
534     0,26,26,26,26,26,26,18,
535     18,18,18,18,18,18,18,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,128,128,0,0,0,
538     0,0,0,0,0,1,0,0,
539     0,0,0,0,0,0,0,0,
540     0,0,0,0,0,0,0,0,
541     0,0,0,0,0,0,0,0,
542     1,0,0,0,0,0,0,0,
543     0,0,18,0,0,0,0,0,
544     0,0,20,20,0,18,0,0,
545     0,20,18,0,0,0,0,0,
546     18,18,18,18,18,18,18,18,
547     18,18,18,18,18,18,18,18,
548     18,18,18,18,18,18,18,0,
549     18,18,18,18,18,18,18,18,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,0,
553     18,18,18,18,18,18,18,18
554     };
555    
556    
557    
558 ph10 558
559     #ifndef HAVE_STRERROR
560 nigel 49 /*************************************************
561 ph10 558 * Provide strerror() for non-ANSI libraries *
562     *************************************************/
563    
564     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565     in their libraries, but can provide the same facility by this simple
566     alternative function. */
567    
568     extern int sys_nerr;
569     extern char *sys_errlist[];
570    
571     char *
572     strerror(int n)
573     {
574     if (n < 0 || n >= sys_nerr) return "unknown error number";
575     return sys_errlist[n];
576     }
577     #endif /* HAVE_STRERROR */
578    
579    
580    
581    
582     /*************************************************
583 nigel 91 * Read or extend an input line *
584     *************************************************/
585    
586     /* Input lines are read into buffer, but both patterns and data lines can be
587     continued over multiple input lines. In addition, if the buffer fills up, we
588     want to automatically expand it so as to be able to handle extremely large
589     lines that are needed for certain stress tests. When the input buffer is
590     expanded, the other two buffers must also be expanded likewise, and the
591     contents of pbuffer, which are a copy of the input for callouts, must be
592     preserved (for when expansion happens for a data line). This is not the most
593     optimal way of handling this, but hey, this is just a test program!
594    
595     Arguments:
596     f the file to read
597     start where in buffer to start (this *must* be within buffer)
598 ph10 287 prompt for stdin or readline()
599 nigel 91
600     Returns: pointer to the start of new data
601     could be a copy of start, or could be moved
602     NULL if no data read and EOF reached
603     */
604    
605     static uschar *
606 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 nigel 91 {
608     uschar *here = start;
609    
610     for (;;)
611     {
612 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
613 nigel 93
614 nigel 91 if (rlen > 1000)
615     {
616     int dlen;
617 ph10 289
618 ph10 287 /* If libreadline support is required, use readline() to read a line if the
619     input is a terminal. Note that readline() removes the trailing newline, so
620     we must put it back again, to be compatible with fgets(). */
621 ph10 289
622 ph10 287 #ifdef SUPPORT_LIBREADLINE
623     if (isatty(fileno(f)))
624     {
625 ph10 289 size_t len;
626 ph10 287 char *s = readline(prompt);
627     if (s == NULL) return (here == start)? NULL : start;
628     len = strlen(s);
629 ph10 289 if (len > 0) add_history(s);
630 ph10 287 if (len > rlen - 1) len = rlen - 1;
631     memcpy(here, s, len);
632     here[len] = '\n';
633 ph10 289 here[len+1] = 0;
634     free(s);
635 ph10 287 }
636 ph10 289 else
637     #endif
638    
639 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
640 ph10 289
641 ph10 287 {
642 ph10 516 if (f == stdin) printf("%s", prompt);
643 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
644     return (here == start)? NULL : start;
645 ph10 289 }
646    
647 nigel 91 dlen = (int)strlen((char *)here);
648     if (dlen > 0 && here[dlen - 1] == '\n') return start;
649     here += dlen;
650     }
651    
652     else
653     {
654     int new_buffer_size = 2*buffer_size;
655     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658    
659     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660     {
661     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662     exit(1);
663     }
664    
665     memcpy(new_buffer, buffer, buffer_size);
666     memcpy(new_pbuffer, pbuffer, buffer_size);
667    
668     buffer_size = new_buffer_size;
669    
670     start = new_buffer + (start - buffer);
671     here = new_buffer + (here - buffer);
672    
673     free(buffer);
674     free(dbuffer);
675     free(pbuffer);
676    
677     buffer = new_buffer;
678     dbuffer = new_dbuffer;
679     pbuffer = new_pbuffer;
680     }
681     }
682    
683     return NULL; /* Control never gets here */
684     }
685    
686    
687    
688    
689    
690    
691    
692     /*************************************************
693 nigel 63 * Read number from string *
694     *************************************************/
695    
696     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697     around with conditional compilation, just do the job by hand. It is only used
698 nigel 93 for unpicking arguments, so just keep it simple.
699 nigel 63
700     Arguments:
701     str string to be converted
702     endptr where to put the end pointer
703    
704     Returns: the unsigned long
705     */
706    
707     static int
708     get_value(unsigned char *str, unsigned char **endptr)
709     {
710     int result = 0;
711     while(*str != 0 && isspace(*str)) str++;
712     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713     *endptr = str;
714     return(result);
715     }
716    
717    
718    
719 nigel 49
720     /*************************************************
721     * Convert UTF-8 string to value *
722     *************************************************/
723    
724     /* This function takes one or more bytes that represents a UTF-8 character,
725     and returns the value of the character.
726    
727     Argument:
728 nigel 91 utf8bytes a pointer to the byte vector
729     vptr a pointer to an int to receive the value
730 nigel 49
731 nigel 91 Returns: > 0 => the number of bytes consumed
732     -6 to 0 => malformed UTF-8 character at offset = (-return)
733 nigel 49 */
734    
735 nigel 79 #if !defined NOUTF8
736    
737 nigel 67 static int
738 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
739 nigel 49 {
740 nigel 91 int c = *utf8bytes++;
741 nigel 49 int d = c;
742     int i, j, s;
743    
744     for (i = -1; i < 6; i++) /* i is number of additional bytes */
745     {
746     if ((d & 0x80) == 0) break;
747     d <<= 1;
748     }
749    
750     if (i == -1) { *vptr = c; return 1; } /* ascii character */
751     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752    
753     /* i now has a value in the range 1-5 */
754    
755 nigel 59 s = 6*i;
756 nigel 85 d = (c & utf8_table3[i]) << s;
757 nigel 49
758     for (j = 0; j < i; j++)
759     {
760 nigel 91 c = *utf8bytes++;
761 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
762 nigel 59 s -= 6;
763 nigel 49 d |= (c & 0x3f) << s;
764     }
765    
766     /* Check that encoding was the correct unique one */
767    
768 nigel 85 for (j = 0; j < utf8_table1_size; j++)
769     if (d <= utf8_table1[j]) break;
770 nigel 49 if (j != i) return -(i+1);
771    
772     /* Valid value */
773    
774     *vptr = d;
775     return i+1;
776     }
777    
778 nigel 79 #endif
779 nigel 49
780    
781 nigel 79
782 nigel 63 /*************************************************
783 nigel 85 * Convert character value to UTF-8 *
784     *************************************************/
785    
786     /* This function takes an integer value in the range 0 - 0x7fffffff
787     and encodes it as a UTF-8 character in 0 to 6 bytes.
788    
789     Arguments:
790     cvalue the character value
791 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
792 nigel 85
793     Returns: number of characters placed in the buffer
794     */
795    
796 nigel 93 #if !defined NOUTF8
797    
798 nigel 85 static int
799 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
800 nigel 85 {
801     register int i, j;
802     for (i = 0; i < utf8_table1_size; i++)
803     if (cvalue <= utf8_table1[i]) break;
804 nigel 91 utf8bytes += i;
805 nigel 85 for (j = i; j > 0; j--)
806     {
807 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 nigel 85 cvalue >>= 6;
809     }
810 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
811 nigel 85 return i + 1;
812     }
813    
814 nigel 93 #endif
815 nigel 85
816    
817 nigel 93
818 nigel 85 /*************************************************
819 nigel 63 * Print character string *
820     *************************************************/
821 nigel 49
822 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
823     mode. Yields number of characters printed. If handed a NULL file, just counts
824     chars without printing. */
825 nigel 49
826 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
827 nigel 3 {
828 nigel 85 int c = 0;
829 nigel 63 int yield = 0;
830 nigel 3
831 nigel 63 while (length-- > 0)
832 nigel 3 {
833 nigel 79 #if !defined NOUTF8
834 nigel 67 if (use_utf8)
835 nigel 63 {
836     int rc = utf82ord(p, &c);
837 nigel 3
838 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839     {
840     length -= rc - 1;
841     p += rc;
842 nigel 93 if (PRINTHEX(c))
843 nigel 63 {
844     if (f != NULL) fprintf(f, "%c", c);
845     yield++;
846     }
847     else
848     {
849 nigel 93 int n = 4;
850     if (f != NULL) fprintf(f, "\\x{%02x}", c);
851     yield += (n <= 0x000000ff)? 2 :
852     (n <= 0x00000fff)? 3 :
853     (n <= 0x0000ffff)? 4 :
854     (n <= 0x000fffff)? 5 : 6;
855 nigel 63 }
856     continue;
857     }
858     }
859 nigel 79 #endif
860 nigel 3
861 nigel 63 /* Not UTF-8, or malformed UTF-8 */
862    
863 nigel 93 c = *p++;
864     if (PRINTHEX(c))
865 nigel 3 {
866 nigel 63 if (f != NULL) fprintf(f, "%c", c);
867     yield++;
868 nigel 3 }
869 nigel 63 else
870 nigel 3 {
871 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
872     yield += 4;
873     }
874     }
875 nigel 3
876 nigel 63 return yield;
877     }
878 nigel 23
879 nigel 3
880 nigel 23
881 nigel 63 /*************************************************
882     * Callout function *
883     *************************************************/
884 nigel 3
885 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886     the match. Yield zero unless more callouts than the fail count, or the callout
887     data is not zero. */
888 nigel 3
889 nigel 63 static int callout(pcre_callout_block *cb)
890     {
891     FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 nigel 75 int i, pre_start, post_start, subject_length;
893 nigel 3
894 nigel 63 if (callout_extra)
895     {
896     fprintf(f, "Callout %d: last capture = %d\n",
897     cb->callout_number, cb->capture_last);
898 nigel 3
899 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
900     {
901     if (cb->offset_vector[i] < 0)
902     fprintf(f, "%2d: <unset>\n", i/2);
903     else
904     {
905     fprintf(f, "%2d: ", i/2);
906     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907     cb->offset_vector[i+1] - cb->offset_vector[i], f);
908     fprintf(f, "\n");
909     }
910     }
911     }
912 nigel 3
913 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
914     datails. On subsequent calls in the same match, we use pchars just to find the
915     printed lengths of the substrings. */
916 nigel 3
917 nigel 63 if (f != NULL) fprintf(f, "--->");
918 nigel 3
919 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921     cb->current_position - cb->start_match, f);
922 nigel 3
923 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924    
925 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926     cb->subject_length - cb->current_position, f);
927 nigel 3
928 nigel 63 if (f != NULL) fprintf(f, "\n");
929 nigel 9
930 nigel 63 /* Always print appropriate indicators, with callout number if not already
931 nigel 75 shown. For automatic callouts, show the pattern offset. */
932 nigel 3
933 nigel 75 if (cb->callout_number == 255)
934     {
935     fprintf(outfile, "%+3d ", cb->pattern_position);
936     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937     }
938     else
939     {
940     if (callout_extra) fprintf(outfile, " ");
941     else fprintf(outfile, "%3d ", cb->callout_number);
942     }
943 nigel 3
944 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945     fprintf(outfile, "^");
946 nigel 3
947 nigel 63 if (post_start > 0)
948     {
949     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950     fprintf(outfile, "^");
951 nigel 3 }
952    
953 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954     fprintf(outfile, " ");
955    
956     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957     pbuffer + cb->pattern_position);
958    
959 nigel 63 fprintf(outfile, "\n");
960     first_callout = 0;
961 nigel 3
962 nigel 71 if (cb->callout_data != NULL)
963 nigel 49 {
964 nigel 71 int callout_data = *((int *)(cb->callout_data));
965     if (callout_data != 0)
966     {
967     fprintf(outfile, "Callout data = %d\n", callout_data);
968     return callout_data;
969     }
970 nigel 63 }
971 nigel 49
972 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
973     (++callout_count >= callout_fail_count)? 1 : 0;
974 nigel 3 }
975    
976    
977 nigel 63 /*************************************************
978 nigel 73 * Local malloc functions *
979 nigel 63 *************************************************/
980 nigel 3
981     /* Alternative malloc function, to test functionality and show the size of the
982     compiled re. */
983    
984     static void *new_malloc(size_t size)
985     {
986 nigel 73 void *block = malloc(size);
987 nigel 43 gotten_store = size;
988 nigel 73 if (show_malloc)
989 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 nigel 73 return block;
991 nigel 3 }
992    
993 nigel 73 static void new_free(void *block)
994     {
995     if (show_malloc)
996     fprintf(outfile, "free %p\n", block);
997     free(block);
998     }
999 nigel 3
1000    
1001 nigel 73 /* For recursion malloc/free, to test stacking calls */
1002    
1003     static void *stack_malloc(size_t size)
1004     {
1005     void *block = malloc(size);
1006     if (show_malloc)
1007 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 nigel 73 return block;
1009     }
1010    
1011     static void stack_free(void *block)
1012     {
1013     if (show_malloc)
1014     fprintf(outfile, "stack_free %p\n", block);
1015     free(block);
1016     }
1017    
1018    
1019 nigel 63 /*************************************************
1020     * Call pcre_fullinfo() *
1021     *************************************************/
1022 nigel 43
1023     /* Get one piece of information from the pcre_fullinfo() function */
1024    
1025     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026     {
1027     int rc;
1028     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030     }
1031    
1032    
1033    
1034 nigel 63 /*************************************************
1035 nigel 75 * Byte flipping function *
1036     *************************************************/
1037    
1038 nigel 91 static unsigned long int
1039     byteflip(unsigned long int value, int n)
1040 nigel 75 {
1041     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042     return ((value & 0x000000ff) << 24) |
1043     ((value & 0x0000ff00) << 8) |
1044     ((value & 0x00ff0000) >> 8) |
1045     ((value & 0xff000000) >> 24);
1046     }
1047    
1048    
1049    
1050    
1051     /*************************************************
1052 nigel 87 * Check match or recursion limit *
1053     *************************************************/
1054    
1055     static int
1056     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057     int start_offset, int options, int *use_offsets, int use_size_offsets,
1058     int flag, unsigned long int *limit, int errnumber, const char *msg)
1059     {
1060     int count;
1061     int min = 0;
1062     int mid = 64;
1063     int max = -1;
1064    
1065     extra->flags |= flag;
1066    
1067     for (;;)
1068     {
1069     *limit = mid;
1070    
1071     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072     use_offsets, use_size_offsets);
1073    
1074     if (count == errnumber)
1075     {
1076     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077     min = mid;
1078     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079     }
1080    
1081     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082     count == PCRE_ERROR_PARTIAL)
1083     {
1084     if (mid == min + 1)
1085     {
1086     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087     break;
1088     }
1089     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090     max = mid;
1091     mid = (min + mid)/2;
1092     }
1093     else break; /* Some other error */
1094     }
1095    
1096     extra->flags &= ~flag;
1097     return count;
1098     }
1099    
1100    
1101    
1102     /*************************************************
1103 ph10 227 * Case-independent strncmp() function *
1104     *************************************************/
1105    
1106     /*
1107     Arguments:
1108     s first string
1109     t second string
1110     n number of characters to compare
1111    
1112     Returns: < 0, = 0, or > 0, according to the comparison
1113     */
1114    
1115     static int
1116     strncmpic(uschar *s, uschar *t, int n)
1117     {
1118     while (n--)
1119     {
1120     int c = tolower(*s++) - tolower(*t++);
1121     if (c) return c;
1122     }
1123     return 0;
1124     }
1125    
1126    
1127    
1128     /*************************************************
1129 nigel 91 * Check newline indicator *
1130     *************************************************/
1131    
1132 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133     a message and return 0 if there is no match.
1134 nigel 91
1135     Arguments:
1136     p points after the leading '<'
1137     f file for error message
1138    
1139     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140     */
1141    
1142     static int
1143     check_newline(uschar *p, FILE *f)
1144     {
1145 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1153     return 0;
1154     }
1155    
1156    
1157    
1158     /*************************************************
1159 nigel 93 * Usage function *
1160     *************************************************/
1161    
1162     static void
1163     usage(void)
1164     {
1165 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166     printf("Input and output default to stdin and stdout.\n");
1167     #ifdef SUPPORT_LIBREADLINE
1168     printf("If input is a terminal, readline() is used to read from it.\n");
1169     #else
1170     printf("This version of pcretest is not linked with readline().\n");
1171     #endif
1172     printf("\nOptions:\n");
1173 nigel 93 printf(" -b show compiled code (bytecode)\n");
1174     printf(" -C show PCRE compile-time options and exit\n");
1175     printf(" -d debug: show compiled code and information (-b and -i)\n");
1176     #if !defined NODFA
1177     printf(" -dfa force DFA matching for all subjects\n");
1178     #endif
1179     printf(" -help show usage information\n");
1180     printf(" -i show information about compiled patterns\n"
1181 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1182 nigel 93 " -m output memory used information\n"
1183     " -o <n> set size of offsets vector to <n>\n");
1184     #if !defined NOPOSIX
1185     printf(" -p use POSIX interface\n");
1186     #endif
1187     printf(" -q quiet: do not output PCRE version number at start\n");
1188     printf(" -S <n> set stack size to <n> megabytes\n");
1189 ph10 606 printf(" -s force each pattern to be studied\n"
1190 nigel 93 " -t time compilation and execution\n");
1191     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192     printf(" -tm time execution (matching) only\n");
1193     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194     }
1195    
1196    
1197    
1198     /*************************************************
1199 nigel 63 * Main Program *
1200     *************************************************/
1201 nigel 43
1202 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1203     consist of a regular expression, in delimiters and optionally followed by
1204     options, followed by a set of test data, terminated by an empty line. */
1205    
1206     int main(int argc, char **argv)
1207     {
1208     FILE *infile = stdin;
1209     int options = 0;
1210     int study_options = 0;
1211 ph10 386 int default_find_match_limit = FALSE;
1212 nigel 3 int op = 1;
1213     int timeit = 0;
1214 nigel 93 int timeitm = 0;
1215 nigel 3 int showinfo = 0;
1216 nigel 31 int showstore = 0;
1217 ph10 606 int force_study = 0;
1218 nigel 87 int quiet = 0;
1219 nigel 53 int size_offsets = 45;
1220     int size_offsets_max;
1221 nigel 77 int *offsets = NULL;
1222 nigel 53 #if !defined NOPOSIX
1223 nigel 3 int posix = 0;
1224 nigel 53 #endif
1225 nigel 3 int debug = 0;
1226 nigel 11 int done = 0;
1227 nigel 77 int all_use_dfa = 0;
1228     int yield = 0;
1229 nigel 91 int stack_size;
1230 nigel 3
1231 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1232     that 1024 is plenty long enough for the few names we'll be testing. */
1233 nigel 69
1234 nigel 91 uschar copynames[1024];
1235     uschar getnames[1024];
1236    
1237     uschar *copynamesptr;
1238     uschar *getnamesptr;
1239    
1240 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1241 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1242 nigel 69
1243 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1244     dbuffer = (unsigned char *)malloc(buffer_size);
1245     pbuffer = (unsigned char *)malloc(buffer_size);
1246 nigel 69
1247 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1248 nigel 3
1249 nigel 93 outfile = stdout;
1250    
1251     /* The following _setmode() stuff is some Windows magic that tells its runtime
1252     library to translate CRLF into a single LF character. At least, that's what
1253     I've been told: never having used Windows I take this all on trust. Originally
1254     it set 0x8000, but then I was advised that _O_BINARY was better. */
1255    
1256 nigel 75 #if defined(_WIN32) || defined(WIN32)
1257 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1258     #endif
1259 nigel 75
1260 nigel 3 /* Scan options */
1261    
1262     while (argc > 1 && argv[op][0] == '-')
1263     {
1264 nigel 63 unsigned char *endptr;
1265 nigel 53
1266 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267     else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1271     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1272 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1273 nigel 79 #if !defined NODFA
1274 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1275 nigel 79 #endif
1276 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1277 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1278     *endptr == 0))
1279 nigel 53 {
1280     op++;
1281     argc--;
1282     }
1283 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1284     {
1285     int both = argv[op][2] == 0;
1286     int temp;
1287     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1288     *endptr == 0))
1289     {
1290     timeitm = temp;
1291     op++;
1292     argc--;
1293     }
1294     else timeitm = LOOPREPEAT;
1295     if (both) timeit = timeitm;
1296     }
1297 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1298     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1299     *endptr == 0))
1300     {
1301 nigel 93 #if defined(_WIN32) || defined(WIN32)
1302 nigel 91 printf("PCRE: -S not supported on this OS\n");
1303     exit(1);
1304     #else
1305     int rc;
1306     struct rlimit rlim;
1307     getrlimit(RLIMIT_STACK, &rlim);
1308     rlim.rlim_cur = stack_size * 1024 * 1024;
1309     rc = setrlimit(RLIMIT_STACK, &rlim);
1310     if (rc != 0)
1311     {
1312     printf("PCRE: setrlimit() failed with error %d\n", rc);
1313     exit(1);
1314     }
1315     op++;
1316     argc--;
1317     #endif
1318     }
1319 nigel 53 #if !defined NOPOSIX
1320 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1321 nigel 53 #endif
1322 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1323     {
1324     int rc;
1325 ph10 392 unsigned long int lrc;
1326 nigel 63 printf("PCRE version %s\n", pcre_version());
1327     printf("Compiled with\n");
1328     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1329     printf(" %sUTF-8 support\n", rc? "" : "No ");
1330 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1331     printf(" %sUnicode properties support\n", rc? "" : "No ");
1332 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1333 ph10 391 /* Note that these values are always the ASCII values, even
1334 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1335 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1336     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1337 ph10 150 (rc == -2)? "ANYCRLF" :
1338 nigel 93 (rc == -1)? "ANY" : "???");
1339 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1340     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1341     "all Unicode newlines");
1342 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1343     printf(" Internal link size = %d\n", rc);
1344     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1345     printf(" POSIX malloc threshold = %d\n", rc);
1346 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1347     printf(" Default match limit = %ld\n", lrc);
1348     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1349     printf(" Default recursion depth limit = %ld\n", lrc);
1350 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1351     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1352 ph10 121 goto EXIT;
1353 nigel 63 }
1354 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1355     strcmp(argv[op], "--help") == 0)
1356     {
1357     usage();
1358     goto EXIT;
1359     }
1360 nigel 3 else
1361     {
1362 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1363 nigel 93 usage();
1364 nigel 77 yield = 1;
1365     goto EXIT;
1366 nigel 3 }
1367     op++;
1368     argc--;
1369     }
1370    
1371 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1372    
1373     size_offsets_max = size_offsets;
1374 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1375 nigel 53 if (offsets == NULL)
1376     {
1377     printf("** Failed to get %d bytes of memory for offsets vector\n",
1378 ph10 151 (int)(size_offsets_max * sizeof(int)));
1379 nigel 77 yield = 1;
1380     goto EXIT;
1381 nigel 53 }
1382    
1383 nigel 3 /* Sort out the input and output files */
1384    
1385     if (argc > 1)
1386     {
1387 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1388 nigel 3 if (infile == NULL)
1389     {
1390     printf("** Failed to open %s\n", argv[op]);
1391 nigel 77 yield = 1;
1392     goto EXIT;
1393 nigel 3 }
1394     }
1395    
1396     if (argc > 2)
1397     {
1398 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1399 nigel 3 if (outfile == NULL)
1400     {
1401     printf("** Failed to open %s\n", argv[op+1]);
1402 nigel 77 yield = 1;
1403     goto EXIT;
1404 nigel 3 }
1405     }
1406    
1407     /* Set alternative malloc function */
1408    
1409     pcre_malloc = new_malloc;
1410 nigel 73 pcre_free = new_free;
1411     pcre_stack_malloc = stack_malloc;
1412     pcre_stack_free = stack_free;
1413 nigel 3
1414 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1415 nigel 3
1416 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1417 nigel 3
1418     /* Main loop */
1419    
1420 nigel 11 while (!done)
1421 nigel 3 {
1422     pcre *re = NULL;
1423     pcre_extra *extra = NULL;
1424 nigel 37
1425     #if !defined NOPOSIX /* There are still compilers that require no indent */
1426 nigel 3 regex_t preg;
1427 nigel 45 int do_posix = 0;
1428 nigel 37 #endif
1429    
1430 nigel 7 const char *error;
1431 ph10 512 unsigned char *markptr;
1432 nigel 25 unsigned char *p, *pp, *ppp;
1433 nigel 75 unsigned char *to_file = NULL;
1434 nigel 53 const unsigned char *tables = NULL;
1435 nigel 75 unsigned long int true_size, true_study_size = 0;
1436     size_t size, regex_gotten_store;
1437 ph10 626 int do_allcaps = 0;
1438 ph10 512 int do_mark = 0;
1439 nigel 3 int do_study = 0;
1440 ph10 612 int no_force_study = 0;
1441 nigel 25 int do_debug = debug;
1442 nigel 35 int do_G = 0;
1443     int do_g = 0;
1444 nigel 25 int do_showinfo = showinfo;
1445 nigel 35 int do_showrest = 0;
1446 ph10 616 int do_showcaprest = 0;
1447 nigel 75 int do_flip = 0;
1448 nigel 93 int erroroffset, len, delimiter, poffset;
1449 nigel 3
1450 nigel 67 use_utf8 = 0;
1451 ph10 211 debug_lengths = 1;
1452 nigel 63
1453 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1454 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1455 nigel 63 fflush(outfile);
1456 nigel 3
1457     p = buffer;
1458     while (isspace(*p)) p++;
1459     if (*p == 0) continue;
1460    
1461 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1462 nigel 3
1463 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1464     {
1465 nigel 91 unsigned long int magic, get_options;
1466 nigel 75 uschar sbuf[8];
1467     FILE *f;
1468    
1469     p++;
1470     pp = p + (int)strlen((char *)p);
1471     while (isspace(pp[-1])) pp--;
1472     *pp = 0;
1473    
1474     f = fopen((char *)p, "rb");
1475     if (f == NULL)
1476     {
1477     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1478     continue;
1479     }
1480    
1481     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1482    
1483     true_size =
1484     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1485     true_study_size =
1486     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1487    
1488     re = (real_pcre *)new_malloc(true_size);
1489     regex_gotten_store = gotten_store;
1490    
1491     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1492    
1493     magic = ((real_pcre *)re)->magic_number;
1494     if (magic != MAGIC_NUMBER)
1495     {
1496     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1497     {
1498     do_flip = 1;
1499     }
1500     else
1501     {
1502     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1503     fclose(f);
1504     continue;
1505     }
1506     }
1507    
1508 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1509 nigel 75 do_flip? " (byte-inverted)" : "", p);
1510    
1511     /* Need to know if UTF-8 for printing data strings */
1512    
1513 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1514     use_utf8 = (get_options & PCRE_UTF8) != 0;
1515 nigel 75
1516 ph10 612 /* Now see if there is any following study data. */
1517 nigel 75
1518     if (true_study_size != 0)
1519     {
1520     pcre_study_data *psd;
1521    
1522     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1523     extra->flags = PCRE_EXTRA_STUDY_DATA;
1524    
1525     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1526     extra->study_data = psd;
1527    
1528     if (fread(psd, 1, true_study_size, f) != true_study_size)
1529     {
1530     FAIL_READ:
1531     fprintf(outfile, "Failed to read data from %s\n", p);
1532     if (extra != NULL) new_free(extra);
1533     if (re != NULL) new_free(re);
1534     fclose(f);
1535     continue;
1536     }
1537     fprintf(outfile, "Study data loaded from %s\n", p);
1538     do_study = 1; /* To get the data output if requested */
1539     }
1540     else fprintf(outfile, "No study data\n");
1541    
1542     fclose(f);
1543     goto SHOW_INFO;
1544     }
1545    
1546     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1547     the pattern; if is isn't complete, read more. */
1548    
1549 nigel 3 delimiter = *p++;
1550    
1551 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1552 nigel 3 {
1553 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1554 nigel 3 goto SKIP_DATA;
1555     }
1556    
1557     pp = p;
1558 ph10 530 poffset = (int)(p - buffer);
1559 nigel 3
1560     for(;;)
1561     {
1562 nigel 29 while (*pp != 0)
1563     {
1564     if (*pp == '\\' && pp[1] != 0) pp++;
1565     else if (*pp == delimiter) break;
1566     pp++;
1567     }
1568 nigel 3 if (*pp != 0) break;
1569 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1570 nigel 3 {
1571     fprintf(outfile, "** Unexpected EOF\n");
1572 nigel 11 done = 1;
1573     goto CONTINUE;
1574 nigel 3 }
1575 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1576 nigel 3 }
1577    
1578 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1579     pointer to the correct relative point in the buffer. */
1580    
1581     p = buffer + poffset;
1582    
1583 nigel 29 /* If the first character after the delimiter is backslash, make
1584     the pattern end with backslash. This is purely to provide a way
1585     of testing for the error message when a pattern ends with backslash. */
1586    
1587     if (pp[1] == '\\') *pp++ = '\\';
1588    
1589 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1590     for callouts. */
1591 nigel 3
1592     *pp++ = 0;
1593 nigel 75 strcpy((char *)pbuffer, (char *)p);
1594 nigel 3
1595     /* Look for options after final delimiter */
1596    
1597     options = 0;
1598     study_options = 0;
1599 nigel 31 log_store = showstore; /* default from command line */
1600    
1601 nigel 3 while (*pp != 0)
1602     {
1603     switch (*pp++)
1604     {
1605 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1606 nigel 35 case 'g': do_g = 1; break;
1607 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1608     case 'm': options |= PCRE_MULTILINE; break;
1609     case 's': options |= PCRE_DOTALL; break;
1610     case 'x': options |= PCRE_EXTENDED; break;
1611 nigel 25
1612 ph10 616 case '+':
1613     if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1614     break;
1615 ph10 626
1616     case '=': do_allcaps = 1; break;
1617 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1618 nigel 93 case 'B': do_debug = 1; break;
1619 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1620 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1621 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1622 nigel 75 case 'F': do_flip = 1; break;
1623 nigel 35 case 'G': do_G = 1; break;
1624 nigel 25 case 'I': do_showinfo = 1; break;
1625 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1626 ph10 512 case 'K': do_mark = 1; break;
1627 nigel 31 case 'M': log_store = 1; break;
1628 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1629 nigel 37
1630     #if !defined NOPOSIX
1631 nigel 3 case 'P': do_posix = 1; break;
1632 nigel 37 #endif
1633    
1634 ph10 612 case 'S':
1635     if (do_study == 0) do_study = 1; else
1636     {
1637     do_study = 0;
1638     no_force_study = 1;
1639     }
1640     break;
1641    
1642 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1643 ph10 535 case 'W': options |= PCRE_UCP; break;
1644 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1645 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1646 ph10 126 case 'Z': debug_lengths = 0; break;
1647 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1648 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1649 ph10 545
1650 ph10 541 case 'T':
1651     switch (*pp++)
1652     {
1653     case '0': tables = tables0; break;
1654     case '1': tables = tables1; break;
1655 ph10 545
1656 ph10 541 case '\r':
1657     case '\n':
1658 ph10 545 case ' ':
1659     case 0:
1660 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1661 ph10 545 goto SKIP_DATA;
1662    
1663     default:
1664 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1665 ph10 545 goto SKIP_DATA;
1666 ph10 541 }
1667 ph10 545 break;
1668 nigel 25
1669     case 'L':
1670     ppp = pp;
1671 nigel 93 /* The '\r' test here is so that it works on Windows. */
1672     /* The '0' test is just in case this is an unterminated line. */
1673     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1674 nigel 25 *ppp = 0;
1675     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1676     {
1677     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1678     goto SKIP_DATA;
1679     }
1680 nigel 93 locale_set = 1;
1681 nigel 25 tables = pcre_maketables();
1682     pp = ppp;
1683     break;
1684    
1685 nigel 75 case '>':
1686     to_file = pp;
1687     while (*pp != 0) pp++;
1688     while (isspace(pp[-1])) pp--;
1689     *pp = 0;
1690     break;
1691    
1692 nigel 91 case '<':
1693     {
1694 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1695 ph10 336 {
1696     options |= PCRE_JAVASCRIPT_COMPAT;
1697 ph10 345 pp += 3;
1698 ph10 336 }
1699     else
1700 ph10 345 {
1701 ph10 336 int x = check_newline(pp, outfile);
1702     if (x == 0) goto SKIP_DATA;
1703     options |= x;
1704     while (*pp++ != '>');
1705 ph10 345 }
1706 nigel 91 }
1707     break;
1708    
1709 nigel 77 case '\r': /* So that it works in Windows */
1710     case '\n':
1711     case ' ':
1712     break;
1713 nigel 75
1714 nigel 3 default:
1715     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1716     goto SKIP_DATA;
1717     }
1718     }
1719    
1720 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1721 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1722     local character tables. */
1723 nigel 3
1724 nigel 37 #if !defined NOPOSIX
1725 nigel 3 if (posix || do_posix)
1726     {
1727     int rc;
1728     int cflags = 0;
1729 nigel 75
1730 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1731     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1732 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1733 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1734     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1735 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1736 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1737 nigel 87
1738 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1739    
1740     /* Compilation failed; go back for another re, skipping to blank line
1741     if non-interactive. */
1742    
1743     if (rc != 0)
1744     {
1745 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1746 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1747     goto SKIP_DATA;
1748     }
1749     }
1750    
1751     /* Handle compiling via the native interface */
1752    
1753     else
1754 nigel 37 #endif /* !defined NOPOSIX */
1755    
1756 nigel 3 {
1757 ph10 412 unsigned long int get_options;
1758 ph10 416
1759 nigel 93 if (timeit > 0)
1760 nigel 3 {
1761     register int i;
1762     clock_t time_taken;
1763     clock_t start_time = clock();
1764 nigel 93 for (i = 0; i < timeit; i++)
1765 nigel 3 {
1766 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1767 nigel 3 if (re != NULL) free(re);
1768     }
1769     time_taken = clock() - start_time;
1770 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1771     (((double)time_taken * 1000.0) / (double)timeit) /
1772 nigel 63 (double)CLOCKS_PER_SEC);
1773 nigel 3 }
1774    
1775 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1776 nigel 3
1777     /* Compilation failed; go back for another re, skipping to blank line
1778     if non-interactive. */
1779    
1780     if (re == NULL)
1781     {
1782     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1783     SKIP_DATA:
1784     if (infile != stdin)
1785     {
1786     for (;;)
1787     {
1788 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1789 nigel 11 {
1790     done = 1;
1791     goto CONTINUE;
1792     }
1793 nigel 3 len = (int)strlen((char *)buffer);
1794     while (len > 0 && isspace(buffer[len-1])) len--;
1795     if (len == 0) break;
1796     }
1797     fprintf(outfile, "\n");
1798     }
1799 nigel 25 goto CONTINUE;
1800 nigel 3 }
1801 ph10 416
1802     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1803     within the regex; check for this so that we know how to process the data
1804 ph10 412 lines. */
1805 ph10 416
1806 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1807     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1808 nigel 3
1809 ph10 412 /* Print information if required. There are now two info-returning
1810     functions. The old one has a limited interface and returns only limited
1811     data. Check that it agrees with the newer one. */
1812 nigel 3
1813 nigel 63 if (log_store)
1814     fprintf(outfile, "Memory allocation (code space): %d\n",
1815     (int)(gotten_store -
1816     sizeof(real_pcre) -
1817     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1818    
1819 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1820     and remember the store that was got. */
1821    
1822     true_size = ((real_pcre *)re)->size;
1823     regex_gotten_store = gotten_store;
1824    
1825 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1826     help with the matching, unless the pattern has the SS option, which
1827     suppresses the effect of /S (used for a few test patterns where studying is
1828     never sensible). */
1829 nigel 75
1830 ph10 612 if (do_study || (force_study && !no_force_study))
1831 nigel 75 {
1832 nigel 93 if (timeit > 0)
1833 nigel 75 {
1834     register int i;
1835     clock_t time_taken;
1836     clock_t start_time = clock();
1837 nigel 93 for (i = 0; i < timeit; i++)
1838 nigel 75 extra = pcre_study(re, study_options, &error);
1839     time_taken = clock() - start_time;
1840     if (extra != NULL) free(extra);
1841 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1842     (((double)time_taken * 1000.0) / (double)timeit) /
1843 nigel 75 (double)CLOCKS_PER_SEC);
1844     }
1845     extra = pcre_study(re, study_options, &error);
1846     if (error != NULL)
1847     fprintf(outfile, "Failed to study: %s\n", error);
1848     else if (extra != NULL)
1849     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1850     }
1851 ph10 512
1852 ph10 510 /* If /K was present, we set up for handling MARK data. */
1853 ph10 512
1854 ph10 510 if (do_mark)
1855     {
1856     if (extra == NULL)
1857     {
1858     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1859     extra->flags = 0;
1860     }
1861 ph10 512 extra->mark = &markptr;
1862 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1863 ph10 512 }
1864 nigel 75
1865     /* If the 'F' option was present, we flip the bytes of all the integer
1866     fields in the regex data block and the study block. This is to make it
1867     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1868     compiled on a different architecture. */
1869    
1870     if (do_flip)
1871     {
1872     real_pcre *rre = (real_pcre *)re;
1873 ph10 259 rre->magic_number =
1874 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1875 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1876     rre->options = byteflip(rre->options, sizeof(rre->options));
1877 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1878 ph10 259 rre->top_bracket =
1879 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1880 ph10 259 rre->top_backref =
1881 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1882 ph10 259 rre->first_byte =
1883 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1884 ph10 259 rre->req_byte =
1885 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1886     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1887 nigel 75 sizeof(rre->name_table_offset));
1888 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1889 nigel 75 sizeof(rre->name_entry_size));
1890 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1891 ph10 255 sizeof(rre->name_count));
1892 nigel 75
1893     if (extra != NULL)
1894     {
1895     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1896     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1897 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1898     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1899 nigel 75 }
1900     }
1901    
1902     /* Extract information from the compiled data if required */
1903    
1904     SHOW_INFO:
1905    
1906 nigel 93 if (do_debug)
1907     {
1908     fprintf(outfile, "------------------------------------------------------------------\n");
1909 ph10 116 pcre_printint(re, outfile, debug_lengths);
1910 nigel 93 }
1911 ph10 416
1912 ph10 412 /* We already have the options in get_options (see above) */
1913 nigel 93
1914 nigel 25 if (do_showinfo)
1915 nigel 3 {
1916 ph10 412 unsigned long int all_options;
1917 nigel 79 #if !defined NOINFOCHECK
1918 nigel 43 int old_first_char, old_options, old_count;
1919 nigel 79 #endif
1920 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1921 ph10 227 hascrorlf;
1922 nigel 63 int nameentrysize, namecount;
1923     const uschar *nametable;
1924 nigel 3
1925 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1926     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1927     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1928 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1929 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1930 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1931     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1932 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1933 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1934     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1935 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1936 nigel 43
1937 nigel 79 #if !defined NOINFOCHECK
1938 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1939 nigel 3 if (count < 0) fprintf(outfile,
1940 nigel 43 "Error %d from pcre_info()\n", count);
1941 nigel 3 else
1942     {
1943 nigel 43 if (old_count != count) fprintf(outfile,
1944     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1945     old_count);
1946 nigel 37
1947 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1948     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1949     first_char, old_first_char);
1950 nigel 37
1951 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1952     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1953     get_options, old_options);
1954 nigel 43 }
1955 nigel 79 #endif
1956 nigel 43
1957 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1958 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1959 nigel 77 (int)size, (int)regex_gotten_store);
1960 nigel 43
1961     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1962     if (backrefmax > 0)
1963     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1964 nigel 63
1965     if (namecount > 0)
1966     {
1967     fprintf(outfile, "Named capturing subpatterns:\n");
1968     while (namecount-- > 0)
1969     {
1970     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1971     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1972     GET2(nametable, 0));
1973     nametable += nameentrysize;
1974     }
1975     }
1976 ph10 172
1977 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1978 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1979 nigel 63
1980 nigel 75 all_options = ((real_pcre *)re)->options;
1981 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1982 nigel 75
1983 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1984 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1985 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1986     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1987     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1988     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1989 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1990 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1991 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1992     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1993 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1994     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1995     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1996 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1997 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1998 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1999 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2000 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2001 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2002 ph10 172
2003 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2004 nigel 43
2005 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2006 nigel 91 {
2007     case PCRE_NEWLINE_CR:
2008     fprintf(outfile, "Forced newline sequence: CR\n");
2009     break;
2010 nigel 43
2011 nigel 91 case PCRE_NEWLINE_LF:
2012     fprintf(outfile, "Forced newline sequence: LF\n");
2013     break;
2014    
2015     case PCRE_NEWLINE_CRLF:
2016     fprintf(outfile, "Forced newline sequence: CRLF\n");
2017     break;
2018    
2019 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2020     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2021     break;
2022    
2023 nigel 93 case PCRE_NEWLINE_ANY:
2024     fprintf(outfile, "Forced newline sequence: ANY\n");
2025     break;
2026    
2027 nigel 91 default:
2028     break;
2029     }
2030    
2031 nigel 43 if (first_char == -1)
2032     {
2033 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2034 nigel 43 }
2035     else if (first_char < 0)
2036     {
2037     fprintf(outfile, "No first char\n");
2038     }
2039     else
2040     {
2041 nigel 63 int ch = first_char & 255;
2042 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2043 nigel 63 "" : " (caseless)";
2044 nigel 93 if (PRINTHEX(ch))
2045 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2046 nigel 3 else
2047 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2048 nigel 43 }
2049 nigel 37
2050 nigel 43 if (need_char < 0)
2051     {
2052     fprintf(outfile, "No need char\n");
2053 nigel 3 }
2054 nigel 43 else
2055     {
2056 nigel 63 int ch = need_char & 255;
2057 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2058 nigel 63 "" : " (caseless)";
2059 nigel 93 if (PRINTHEX(ch))
2060 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2061 nigel 43 else
2062 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2063 nigel 43 }
2064 nigel 75
2065     /* Don't output study size; at present it is in any case a fixed
2066     value, but it varies, depending on the computer architecture, and
2067     so messes up the test suite. (And with the /F option, it might be
2068 ph10 612 flipped.) If study was forced by an external -s, don't show this
2069     information unless -i or -d was also present. This means that, except
2070     when auto-callouts are involved, the output from runs with and without
2071     -s should be identical. */
2072 nigel 75
2073 ph10 612 if (do_study || (force_study && showinfo && !no_force_study))
2074 nigel 75 {
2075     if (extra == NULL)
2076     fprintf(outfile, "Study returned NULL\n");
2077     else
2078     {
2079     uschar *start_bits = NULL;
2080 ph10 455 int minlength;
2081 ph10 461
2082 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2083 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2084    
2085 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2086     if (start_bits == NULL)
2087 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2088 nigel 75 else
2089     {
2090     int i;
2091     int c = 24;
2092     fprintf(outfile, "Starting byte set: ");
2093     for (i = 0; i < 256; i++)
2094     {
2095     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2096     {
2097     if (c > 75)
2098     {
2099     fprintf(outfile, "\n ");
2100     c = 2;
2101     }
2102 nigel 93 if (PRINTHEX(i) && i != ' ')
2103 nigel 75 {
2104     fprintf(outfile, "%c ", i);
2105     c += 2;
2106     }
2107     else
2108     {
2109     fprintf(outfile, "\\x%02x ", i);
2110     c += 5;
2111     }
2112     }
2113     }
2114     fprintf(outfile, "\n");
2115     }
2116     }
2117     }
2118 nigel 3 }
2119    
2120 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2121     that is all. The first 8 bytes of the file are the regex length and then
2122     the study length, in big-endian order. */
2123 nigel 3
2124 nigel 75 if (to_file != NULL)
2125 nigel 3 {
2126 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2127     if (f == NULL)
2128 nigel 3 {
2129 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2130 nigel 3 }
2131 nigel 75 else
2132     {
2133     uschar sbuf[8];
2134 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2135     sbuf[1] = (uschar)((true_size >> 16) & 255);
2136     sbuf[2] = (uschar)((true_size >> 8) & 255);
2137     sbuf[3] = (uschar)((true_size) & 255);
2138 ph10 259
2139 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2140     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2141     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2142     sbuf[7] = (uschar)((true_study_size) & 255);
2143 nigel 3
2144 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2145     fwrite(re, 1, true_size, f) < true_size)
2146     {
2147     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2148     }
2149 nigel 3 else
2150     {
2151 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2152    
2153     /* If there is study data, write it, but verify the writing only
2154     if the studying was requested by /S, not just by -s. */
2155    
2156 nigel 75 if (extra != NULL)
2157 nigel 3 {
2158 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2159     true_study_size)
2160 nigel 3 {
2161 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2162     strerror(errno));
2163 nigel 3 }
2164 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2165 nigel 3 }
2166     }
2167 nigel 75 fclose(f);
2168 nigel 3 }
2169 nigel 77
2170     new_free(re);
2171     if (extra != NULL) new_free(extra);
2172 ph10 545 if (locale_set)
2173 ph10 541 {
2174     new_free((void *)tables);
2175     setlocale(LC_CTYPE, "C");
2176 ph10 545 locale_set = 0;
2177     }
2178 nigel 75 continue; /* With next regex */
2179 nigel 3 }
2180 nigel 75 } /* End of non-POSIX compile */
2181 nigel 3
2182     /* Read data lines and test them */
2183    
2184     for (;;)
2185     {
2186 nigel 87 uschar *q;
2187 ph10 147 uschar *bptr;
2188 nigel 57 int *use_offsets = offsets;
2189 nigel 53 int use_size_offsets = size_offsets;
2190 nigel 63 int callout_data = 0;
2191     int callout_data_set = 0;
2192 nigel 3 int count, c;
2193 nigel 29 int copystrings = 0;
2194 ph10 386 int find_match_limit = default_find_match_limit;
2195 nigel 29 int getstrings = 0;
2196     int getlist = 0;
2197 nigel 39 int gmatched = 0;
2198 nigel 35 int start_offset = 0;
2199 ph10 579 int start_offset_sign = 1;
2200 nigel 41 int g_notempty = 0;
2201 nigel 77 int use_dfa = 0;
2202 nigel 3
2203     options = 0;
2204    
2205 nigel 91 *copynames = 0;
2206     *getnames = 0;
2207    
2208     copynamesptr = copynames;
2209     getnamesptr = getnames;
2210    
2211 nigel 63 pcre_callout = callout;
2212     first_callout = 1;
2213     callout_extra = 0;
2214     callout_count = 0;
2215     callout_fail_count = 999999;
2216     callout_fail_id = -1;
2217 nigel 73 show_malloc = 0;
2218 nigel 63
2219 nigel 91 if (extra != NULL) extra->flags &=
2220     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2221    
2222     len = 0;
2223     for (;;)
2224 nigel 11 {
2225 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2226 nigel 91 {
2227 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2228     {
2229 ph10 545 fprintf(outfile, "\n");
2230 ph10 537 break;
2231 ph10 545 }
2232 nigel 91 done = 1;
2233     goto CONTINUE;
2234     }
2235     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2236     len = (int)strlen((char *)buffer);
2237     if (buffer[len-1] == '\n') break;
2238 nigel 11 }
2239 nigel 3
2240     while (len > 0 && isspace(buffer[len-1])) len--;
2241     buffer[len] = 0;
2242     if (len == 0) break;
2243    
2244     p = buffer;
2245     while (isspace(*p)) p++;
2246    
2247 ph10 147 bptr = q = dbuffer;
2248 nigel 3 while ((c = *p++) != 0)
2249     {
2250     int i = 0;
2251     int n = 0;
2252 nigel 63
2253 nigel 3 if (c == '\\') switch ((c = *p++))
2254     {
2255     case 'a': c = 7; break;
2256     case 'b': c = '\b'; break;
2257     case 'e': c = 27; break;
2258     case 'f': c = '\f'; break;
2259     case 'n': c = '\n'; break;
2260     case 'r': c = '\r'; break;
2261     case 't': c = '\t'; break;
2262     case 'v': c = '\v'; break;
2263    
2264     case '0': case '1': case '2': case '3':
2265     case '4': case '5': case '6': case '7':
2266     c -= '0';
2267     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2268     c = c * 8 + *p++ - '0';
2269 nigel 91
2270     #if !defined NOUTF8
2271     if (use_utf8 && c > 255)
2272     {
2273     unsigned char buff8[8];
2274     int ii, utn;
2275     utn = ord2utf8(c, buff8);
2276     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2277     c = buff8[ii]; /* Last byte */
2278     }
2279     #endif
2280 nigel 3 break;
2281    
2282     case 'x':
2283 nigel 49
2284     /* Handle \x{..} specially - new Perl thing for utf8 */
2285    
2286 nigel 79 #if !defined NOUTF8
2287 nigel 49 if (*p == '{')
2288     {
2289     unsigned char *pt = p;
2290     c = 0;
2291     while (isxdigit(*(++pt)))
2292     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2293     if (*pt == '}')
2294     {
2295 nigel 67 unsigned char buff8[8];
2296 nigel 49 int ii, utn;
2297 ph10 355 if (use_utf8)
2298 ph10 358 {
2299 ph10 355 utn = ord2utf8(c, buff8);
2300     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2301     c = buff8[ii]; /* Last byte */
2302     }
2303     else
2304     {
2305 ph10 358 if (c > 255)
2306 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2307     "UTF-8 mode is not enabled.\n"
2308     "** Truncation will probably give the wrong result.\n", c);
2309 ph10 358 }
2310 nigel 49 p = pt + 1;
2311     break;
2312     }
2313     /* Not correct form; fall through */
2314     }
2315 nigel 79 #endif
2316 nigel 49
2317     /* Ordinary \x */
2318    
2319 nigel 3 c = 0;
2320     while (i++ < 2 && isxdigit(*p))
2321     {
2322     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2323     p++;
2324     }
2325     break;
2326    
2327 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2328 nigel 3 p--;
2329     continue;
2330    
2331 nigel 75 case '>':
2332 ph10 579 if (*p == '-')
2333 ph10 567 {
2334     start_offset_sign = -1;
2335     p++;
2336 ph10 579 }
2337 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2338 ph10 579 start_offset *= start_offset_sign;
2339 nigel 75 continue;
2340    
2341 nigel 3 case 'A': /* Option setting */
2342     options |= PCRE_ANCHORED;
2343     continue;
2344    
2345     case 'B':
2346     options |= PCRE_NOTBOL;
2347     continue;
2348    
2349 nigel 29 case 'C':
2350 nigel 63 if (isdigit(*p)) /* Set copy string */
2351     {
2352     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2353     copystrings |= 1 << n;
2354     }
2355     else if (isalnum(*p))
2356     {
2357 nigel 91 uschar *npp = copynamesptr;
2358 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2359 nigel 91 *npp++ = 0;
2360 nigel 67 *npp = 0;
2361 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2362 nigel 63 if (n < 0)
2363 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2364     copynamesptr = npp;
2365 nigel 63 }
2366     else if (*p == '+')
2367     {
2368     callout_extra = 1;
2369     p++;
2370     }
2371     else if (*p == '-')
2372     {
2373     pcre_callout = NULL;
2374     p++;
2375     }
2376     else if (*p == '!')
2377     {
2378     callout_fail_id = 0;
2379     p++;
2380     while(isdigit(*p))
2381     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2382     callout_fail_count = 0;
2383     if (*p == '!')
2384     {
2385     p++;
2386     while(isdigit(*p))
2387     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2388     }
2389     }
2390     else if (*p == '*')
2391     {
2392     int sign = 1;
2393     callout_data = 0;
2394     if (*(++p) == '-') { sign = -1; p++; }
2395     while(isdigit(*p))
2396     callout_data = callout_data * 10 + *p++ - '0';
2397     callout_data *= sign;
2398     callout_data_set = 1;
2399     }
2400 nigel 29 continue;
2401    
2402 nigel 79 #if !defined NODFA
2403 nigel 77 case 'D':
2404 nigel 79 #if !defined NOPOSIX
2405 nigel 77 if (posix || do_posix)
2406     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2407     else
2408 nigel 79 #endif
2409 nigel 77 use_dfa = 1;
2410     continue;
2411 ph10 553 #endif
2412 nigel 77
2413 ph10 553 #if !defined NODFA
2414 nigel 77 case 'F':
2415     options |= PCRE_DFA_SHORTEST;
2416     continue;
2417 nigel 79 #endif
2418 nigel 77
2419 nigel 29 case 'G':
2420 nigel 63 if (isdigit(*p))
2421     {
2422     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2423     getstrings |= 1 << n;
2424     }
2425     else if (isalnum(*p))
2426     {
2427 nigel 91 uschar *npp = getnamesptr;
2428 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2429 nigel 91 *npp++ = 0;
2430 nigel 67 *npp = 0;
2431 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2432 nigel 63 if (n < 0)
2433 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2434     getnamesptr = npp;
2435 nigel 63 }
2436 nigel 29 continue;
2437    
2438     case 'L':
2439     getlist = 1;
2440     continue;
2441    
2442 nigel 63 case 'M':
2443     find_match_limit = 1;
2444     continue;
2445    
2446 nigel 37 case 'N':
2447 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2448     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2449 ph10 461 else
2450 ph10 442 options |= PCRE_NOTEMPTY;
2451 nigel 37 continue;
2452    
2453 nigel 3 case 'O':
2454     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2455 nigel 53 if (n > size_offsets_max)
2456     {
2457     size_offsets_max = n;
2458 nigel 57 free(offsets);
2459 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2460 nigel 53 if (offsets == NULL)
2461     {
2462     printf("** Failed to get %d bytes of memory for offsets vector\n",
2463 ph10 151 (int)(size_offsets_max * sizeof(int)));
2464 nigel 77 yield = 1;
2465     goto EXIT;
2466 nigel 53 }
2467     }
2468     use_size_offsets = n;
2469 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2470 nigel 3 continue;
2471    
2472 nigel 75 case 'P':
2473 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2474 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2475 nigel 75 continue;
2476    
2477 nigel 91 case 'Q':
2478     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2479     if (extra == NULL)
2480     {
2481     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2482     extra->flags = 0;
2483     }
2484     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2485     extra->match_limit_recursion = n;
2486     continue;
2487    
2488     case 'q':
2489     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2490     if (extra == NULL)
2491     {
2492     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2493     extra->flags = 0;
2494     }
2495     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2496     extra->match_limit = n;
2497     continue;
2498    
2499 nigel 79 #if !defined NODFA
2500 nigel 77 case 'R':
2501     options |= PCRE_DFA_RESTART;
2502     continue;
2503 nigel 79 #endif
2504 nigel 77
2505 nigel 73 case 'S':
2506     show_malloc = 1;
2507     continue;
2508 ph10 392
2509 ph10 389 case 'Y':
2510     options |= PCRE_NO_START_OPTIMIZE;
2511 ph10 392 continue;
2512 nigel 73
2513 nigel 3 case 'Z':
2514     options |= PCRE_NOTEOL;
2515     continue;
2516 nigel 71
2517     case '?':
2518     options |= PCRE_NO_UTF8_CHECK;
2519     continue;
2520 nigel 91
2521     case '<':
2522     {
2523     int x = check_newline(p, outfile);
2524     if (x == 0) goto NEXT_DATA;
2525     options |= x;
2526     while (*p++ != '>');
2527     }
2528     continue;
2529 nigel 3 }
2530 nigel 9 *q++ = c;
2531 nigel 3 }
2532 nigel 9 *q = 0;
2533 ph10 530 len = (int)(q - dbuffer);
2534 ph10 545
2535 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2536 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2537 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2538 ph10 371
2539 ph10 363 #if !defined NOPOSIX
2540     if (posix || do_posix)
2541     {
2542     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2543 ph10 371 bptr += buffer_size - len - 1;
2544 ph10 363 }
2545 ph10 371 else
2546     #endif
2547 ph10 363 {
2548     memmove(bptr + buffer_size - len, bptr, len);
2549 ph10 371 bptr += buffer_size - len;
2550     }
2551 nigel 3
2552 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2553     {
2554     printf("**Match limit not relevant for DFA matching: ignored\n");
2555     find_match_limit = 0;
2556     }
2557    
2558 nigel 3 /* Handle matching via the POSIX interface, which does not
2559 nigel 63 support timing or playing with the match limit or callout data. */
2560 nigel 3
2561 nigel 37 #if !defined NOPOSIX
2562 nigel 3 if (posix || do_posix)
2563     {
2564     int rc;
2565     int eflags = 0;
2566 nigel 63 regmatch_t *pmatch = NULL;
2567     if (use_size_offsets > 0)
2568 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2569 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2570     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2571 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2572 nigel 3
2573 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2574 nigel 3
2575     if (rc != 0)
2576     {
2577 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2578 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2579     }
2580 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2581     != 0)
2582     {
2583     fprintf(outfile, "Matched with REG_NOSUB\n");
2584     }
2585 nigel 3 else
2586     {
2587 nigel 7 size_t i;
2588 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2589 nigel 3 {
2590     if (pmatch[i].rm_so >= 0)
2591     {
2592 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2593 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2594     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2595 nigel 3 fprintf(outfile, "\n");
2596 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2597 nigel 35 {
2598 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2599 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2600     outfile);
2601 nigel 35 fprintf(outfile, "\n");
2602     }
2603 nigel 3 }
2604     }
2605     }
2606 nigel 53 free(pmatch);
2607 nigel 3 }
2608    
2609 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2610 nigel 3
2611 nigel 37 else
2612     #endif /* !defined NOPOSIX */
2613    
2614 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2615 nigel 3 {
2616 ph10 512 markptr = NULL;
2617    
2618 nigel 93 if (timeitm > 0)
2619 nigel 3 {
2620     register int i;
2621     clock_t time_taken;
2622     clock_t start_time = clock();
2623 nigel 77
2624 nigel 79 #if !defined NODFA
2625 nigel 77 if (all_use_dfa || use_dfa)
2626     {
2627     int workspace[1000];
2628 nigel 93 for (i = 0; i < timeitm; i++)
2629 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2630 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2631     sizeof(workspace)/sizeof(int));
2632     }
2633     else
2634 nigel 79 #endif
2635 nigel 77
2636 nigel 93 for (i = 0; i < timeitm; i++)
2637 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2638 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2639 nigel 77
2640 nigel 3 time_taken = clock() - start_time;
2641 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2642     (((double)time_taken * 1000.0) / (double)timeitm) /
2643 nigel 63 (double)CLOCKS_PER_SEC);
2644 nigel 3 }
2645    
2646 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2647 nigel 87 varying limits in order to find the minimum value for the match limit and
2648     for the recursion limit. */
2649 nigel 63
2650     if (find_match_limit)
2651     {
2652     if (extra == NULL)
2653     {
2654 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2655 nigel 63 extra->flags = 0;
2656     }
2657    
2658 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2659 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2660     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2661     PCRE_ERROR_MATCHLIMIT, "match()");
2662 nigel 63
2663 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2664     options|g_notempty, use_offsets, use_size_offsets,
2665     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2666     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2667 nigel 63 }
2668    
2669     /* If callout_data is set, use the interface with additional data */
2670    
2671     else if (callout_data_set)
2672     {
2673     if (extra == NULL)
2674     {
2675 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2676 nigel 63 extra->flags = 0;
2677     }
2678     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2679 nigel 71 extra->callout_data = &callout_data;
2680 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2681     options | g_notempty, use_offsets, use_size_offsets);
2682     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2683     }
2684    
2685     /* The normal case is just to do the match once, with the default
2686     value of match_limit. */
2687    
2688 nigel 79 #if !defined NODFA
2689 nigel 77 else if (all_use_dfa || use_dfa)
2690     {
2691     int workspace[1000];
2692 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2693 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2694     sizeof(workspace)/sizeof(int));
2695     if (count == 0)
2696     {
2697     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2698     count = use_size_offsets/2;
2699     }
2700     }
2701 nigel 79 #endif
2702 nigel 77
2703 nigel 75 else
2704     {
2705     count = pcre_exec(re, extra, (char *)bptr, len,
2706     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2707 nigel 77 if (count == 0)
2708     {
2709     fprintf(outfile, "Matched, but too many substrings\n");
2710     count = use_size_offsets/3;
2711     }
2712 nigel 75 }
2713 nigel 3
2714 nigel 39 /* Matched */
2715    
2716 nigel 3 if (count >= 0)
2717     {
2718 nigel 93 int i, maxcount;
2719    
2720     #if !defined NODFA
2721     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2722     #endif
2723     maxcount = use_size_offsets/3;
2724    
2725     /* This is a check against a lunatic return value. */
2726    
2727     if (count > maxcount)
2728     {
2729     fprintf(outfile,
2730     "** PCRE error: returned count %d is too big for offset size %d\n",
2731     count, use_size_offsets);
2732     count = use_size_offsets/3;
2733     if (do_g || do_G)
2734     {
2735     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2736     do_g = do_G = FALSE; /* Break g/G loop */
2737     }
2738     }
2739 ph10 626
2740     /* do_allcaps requests showing of all captures in the pattern, to check
2741     unset ones at the end. */
2742    
2743     if (do_allcaps)
2744     {
2745     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2746     count++; /* Allow for full match */
2747     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2748     }
2749 nigel 93
2750 ph10 626 /* Output the captured substrings */
2751    
2752 nigel 29 for (i = 0; i < count * 2; i += 2)
2753 nigel 3 {
2754 nigel 57 if (use_offsets[i] < 0)
2755 ph10 626 {
2756     if (use_offsets[i] != -1)
2757     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2758     use_offsets[i], i);
2759     if (use_offsets[i+1] != -1)
2760     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2761     use_offsets[i+1], i+1);
2762 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2763 ph10 626 }
2764 nigel 3 else
2765     {
2766     fprintf(outfile, "%2d: ", i/2);
2767 nigel 63 (void)pchars(bptr + use_offsets[i],
2768     use_offsets[i+1] - use_offsets[i], outfile);
2769 nigel 3 fprintf(outfile, "\n");
2770 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2771 nigel 35 {
2772 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2773     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2774     outfile);
2775     fprintf(outfile, "\n");
2776 nigel 35 }
2777 nigel 3 }
2778     }
2779 ph10 512
2780 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2781 nigel 29
2782     for (i = 0; i < 32; i++)
2783     {
2784     if ((copystrings & (1 << i)) != 0)
2785     {
2786 nigel 91 char copybuffer[256];
2787 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2788 nigel 37 i, copybuffer, sizeof(copybuffer));
2789 nigel 29 if (rc < 0)
2790     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2791     else
2792 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2793 nigel 29 }
2794     }
2795    
2796 nigel 91 for (copynamesptr = copynames;
2797     *copynamesptr != 0;
2798     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2799     {
2800     char copybuffer[256];
2801     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2802     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2803     if (rc < 0)
2804     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2805     else
2806     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2807     }
2808    
2809 nigel 29 for (i = 0; i < 32; i++)
2810     {
2811     if ((getstrings & (1 << i)) != 0)
2812     {
2813     const char *substring;
2814 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2815 nigel 29 i, &substring);
2816     if (rc < 0)
2817     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2818     else
2819     {
2820     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2821 nigel 49 pcre_free_substring(substring);
2822 nigel 29 }
2823     }
2824     }
2825    
2826 nigel 91 for (getnamesptr = getnames;
2827     *getnamesptr != 0;
2828     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2829     {
2830     const char *substring;
2831     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2832     count, (char *)getnamesptr, &substring);
2833     if (rc < 0)
2834     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2835     else
2836     {
2837     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2838     pcre_free_substring(substring);
2839     }
2840     }
2841    
2842 nigel 29 if (getlist)
2843     {
2844     const char **stringlist;
2845 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2846 nigel 29 &stringlist);
2847     if (rc < 0)
2848     fprintf(outfile, "get substring list failed %d\n", rc);
2849     else
2850     {
2851     for (i = 0; i < count; i++)
2852     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2853     if (stringlist[i] != NULL)
2854     fprintf(outfile, "string list not terminated by NULL\n");
2855 nigel 49 /* free((void *)stringlist); */
2856     pcre_free_substring_list(stringlist);
2857 nigel 29 }
2858     }
2859 nigel 39 }
2860 nigel 29
2861 nigel 75 /* There was a partial match */
2862    
2863     else if (count == PCRE_ERROR_PARTIAL)
2864     {
2865 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2866     else fprintf(outfile, "Partial match, mark=%s", markptr);
2867 ph10 426 if (use_size_offsets > 1)
2868     {
2869     fprintf(outfile, ": ");
2870     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2871 ph10 461 outfile);
2872     }
2873 nigel 77 fprintf(outfile, "\n");
2874 nigel 75 break; /* Out of the /g loop */
2875     }
2876    
2877 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2878 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2879     to advance the start offset, and continue. We won't be at the end of the
2880     string - that was checked before setting g_notempty.
2881 nigel 39
2882 ph10 566 Complication arises in the case when the newline convention is "any",
2883 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2884     terminated by CRLF, an advance of one character just passes the \r,
2885 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2886 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2887     newline setting in the pattern; if none was set, use pcre_config() to
2888 ph10 566 find the default.
2889 ph10 144
2890 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2891     character, not one byte. */
2892    
2893 nigel 3 else
2894     {
2895 nigel 41 if (g_notempty != 0)
2896 nigel 35 {
2897 nigel 73 int onechar = 1;
2898 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2899 nigel 57 use_offsets[0] = start_offset;
2900 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2901     {
2902     int d;
2903     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2904 ph10 391 /* Note that these values are always the ASCII ones, even in
2905     EBCDIC environments. CR = 13, NL = 10. */
2906     obits = (d == 13)? PCRE_NEWLINE_CR :
2907     (d == 10)? PCRE_NEWLINE_LF :
2908     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2909 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2910 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2911     }
2912 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2913 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2914 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2915 ph10 149 &&
2916 ph10 143 start_offset < len - 1 &&
2917     bptr[start_offset] == '\r' &&
2918     bptr[start_offset+1] == '\n')
2919 ph10 144 onechar++;
2920 ph10 143 else if (use_utf8)
2921 nigel 73 {
2922     while (start_offset + onechar < len)
2923     {
2924 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2925 ph10 579 onechar++;
2926 nigel 73 }
2927     }
2928     use_offsets[1] = start_offset + onechar;
2929 nigel 35 }
2930 nigel 41 else
2931     {
2932 ph10 598 switch(count)
2933     {
2934     case PCRE_ERROR_NOMATCH:
2935 ph10 512 if (gmatched == 0)
2936 ph10 510 {
2937     if (markptr == NULL) fprintf(outfile, "No match\n");
2938     else fprintf(outfile, "No match, mark = %s\n", markptr);
2939 ph10 512 }
2940 ph10 598 break;
2941    
2942     case PCRE_ERROR_BADUTF8:
2943     case PCRE_ERROR_SHORTUTF8:
2944     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2945     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2946     if (use_size_offsets >= 2)
2947     fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2948     use_offsets[1]);
2949     fprintf(outfile, "\n");
2950     break;
2951    
2952     default:
2953 ph10 604 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2954     fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2955     else
2956     fprintf(outfile, "Error %d (Unexpected value)\n", count);
2957 ph10 598 break;
2958 nigel 41 }
2959 ph10 598
2960 nigel 41 break; /* Out of the /g loop */
2961     }
2962 nigel 3 }
2963 nigel 35
2964 nigel 39 /* If not /g or /G we are done */
2965    
2966     if (!do_g && !do_G) break;
2967    
2968 nigel 41 /* If we have matched an empty string, first check to see if we are at
2969 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2970     Perl's /g options does. This turns out to be rather cunning. First we set
2971     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2972 nigel 47 same point. If this fails (picked up above) we advance to the next
2973 ph10 143 character. */
2974 ph10 142
2975 nigel 41 g_notempty = 0;
2976 ph10 142
2977 nigel 57 if (use_offsets[0] == use_offsets[1])
2978 nigel 41 {
2979 nigel 57 if (use_offsets[0] == len) break;
2980 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2981 nigel 41 }
2982 nigel 39
2983     /* For /g, update the start offset, leaving the rest alone */
2984    
2985 ph10 143 if (do_g) start_offset = use_offsets[1];
2986 nigel 39
2987     /* For /G, update the pointer and length */
2988    
2989     else
2990 nigel 35 {
2991 ph10 143 bptr += use_offsets[1];
2992     len -= use_offsets[1];
2993 nigel 35 }
2994 nigel 39 } /* End of loop for /g and /G */
2995 nigel 91
2996     NEXT_DATA: continue;
2997 nigel 39 } /* End of loop for data lines */
2998 nigel 3
2999 nigel 11 CONTINUE:
3000 nigel 37
3001     #if !defined NOPOSIX
3002 nigel 3 if (posix || do_posix) regfree(&preg);
3003 nigel 37 #endif
3004    
3005 nigel 77 if (re != NULL) new_free(re);
3006     if (extra != NULL) new_free(extra);
3007 ph10 541 if (locale_set)
3008 nigel 25 {
3009 nigel 77 new_free((void *)tables);
3010 nigel 25 setlocale(LC_CTYPE, "C");
3011 nigel 93 locale_set = 0;
3012 nigel 25 }
3013 nigel 3 }
3014    
3015 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3016 nigel 77
3017     EXIT:
3018    
3019     if (infile != NULL && infile != stdin) fclose(infile);
3020     if (outfile != NULL && outfile != stdout) fclose(outfile);
3021    
3022     free(buffer);
3023     free(dbuffer);
3024     free(pbuffer);
3025     free(offsets);
3026    
3027     return yield;
3028 nigel 3 }
3029    
3030 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12