/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 642 - (show annotations) (download)
Thu Jul 28 18:59:40 2011 UTC (3 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 91237 byte(s)
Avoid false positive for infinite recursion by not checking conditionals at 
compile time, but add tests at runtime that also catch infinite mutual 
recursion.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192
193 /* The buffers grow automatically if very long input lines are encountered. */
194
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
199
200 /* Textual explanations for runtime error codes */
201
202 static const char *errtexts[] = {
203 NULL, /* 0 is no error */
204 NULL, /* NOMATCH is handled specially */
205 "NULL argument passed",
206 "bad option value",
207 "magic number missing",
208 "unknown opcode - pattern overwritten?",
209 "no more memory",
210 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211 "match limit exceeded",
212 "callout error code",
213 NULL, /* BADUTF8 is handled specially */
214 "bad UTF-8 offset",
215 NULL, /* PARTIAL is handled specially */
216 "not used - internal error",
217 "internal error - pattern overwritten?",
218 "bad count value",
219 "item unsupported for DFA matching",
220 "backreference condition or recursion test not supported for DFA matching",
221 "match limit not supported for DFA matching",
222 "workspace size exceeded in DFA matching",
223 "too much recursion for DFA matching",
224 "recursion limit exceeded",
225 "not used - internal error",
226 "invalid combination of newline options",
227 "bad offset value",
228 NULL, /* SHORTUTF8 is handled specially */
229 "nested recursion at the same subject position"
230 };
231
232
233 /*************************************************
234 * Alternate character tables *
235 *************************************************/
236
237 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
238 using the default tables of the library. However, the T option can be used to
239 select alternate sets of tables, for different kinds of testing. Note also that
240 the L (locale) option also adjusts the tables. */
241
242 /* This is the set of tables distributed as default with PCRE. It recognizes
243 only ASCII characters. */
244
245 static const unsigned char tables0[] = {
246
247 /* This table is a lower casing table. */
248
249 0, 1, 2, 3, 4, 5, 6, 7,
250 8, 9, 10, 11, 12, 13, 14, 15,
251 16, 17, 18, 19, 20, 21, 22, 23,
252 24, 25, 26, 27, 28, 29, 30, 31,
253 32, 33, 34, 35, 36, 37, 38, 39,
254 40, 41, 42, 43, 44, 45, 46, 47,
255 48, 49, 50, 51, 52, 53, 54, 55,
256 56, 57, 58, 59, 60, 61, 62, 63,
257 64, 97, 98, 99,100,101,102,103,
258 104,105,106,107,108,109,110,111,
259 112,113,114,115,116,117,118,119,
260 120,121,122, 91, 92, 93, 94, 95,
261 96, 97, 98, 99,100,101,102,103,
262 104,105,106,107,108,109,110,111,
263 112,113,114,115,116,117,118,119,
264 120,121,122,123,124,125,126,127,
265 128,129,130,131,132,133,134,135,
266 136,137,138,139,140,141,142,143,
267 144,145,146,147,148,149,150,151,
268 152,153,154,155,156,157,158,159,
269 160,161,162,163,164,165,166,167,
270 168,169,170,171,172,173,174,175,
271 176,177,178,179,180,181,182,183,
272 184,185,186,187,188,189,190,191,
273 192,193,194,195,196,197,198,199,
274 200,201,202,203,204,205,206,207,
275 208,209,210,211,212,213,214,215,
276 216,217,218,219,220,221,222,223,
277 224,225,226,227,228,229,230,231,
278 232,233,234,235,236,237,238,239,
279 240,241,242,243,244,245,246,247,
280 248,249,250,251,252,253,254,255,
281
282 /* This table is a case flipping table. */
283
284 0, 1, 2, 3, 4, 5, 6, 7,
285 8, 9, 10, 11, 12, 13, 14, 15,
286 16, 17, 18, 19, 20, 21, 22, 23,
287 24, 25, 26, 27, 28, 29, 30, 31,
288 32, 33, 34, 35, 36, 37, 38, 39,
289 40, 41, 42, 43, 44, 45, 46, 47,
290 48, 49, 50, 51, 52, 53, 54, 55,
291 56, 57, 58, 59, 60, 61, 62, 63,
292 64, 97, 98, 99,100,101,102,103,
293 104,105,106,107,108,109,110,111,
294 112,113,114,115,116,117,118,119,
295 120,121,122, 91, 92, 93, 94, 95,
296 96, 65, 66, 67, 68, 69, 70, 71,
297 72, 73, 74, 75, 76, 77, 78, 79,
298 80, 81, 82, 83, 84, 85, 86, 87,
299 88, 89, 90,123,124,125,126,127,
300 128,129,130,131,132,133,134,135,
301 136,137,138,139,140,141,142,143,
302 144,145,146,147,148,149,150,151,
303 152,153,154,155,156,157,158,159,
304 160,161,162,163,164,165,166,167,
305 168,169,170,171,172,173,174,175,
306 176,177,178,179,180,181,182,183,
307 184,185,186,187,188,189,190,191,
308 192,193,194,195,196,197,198,199,
309 200,201,202,203,204,205,206,207,
310 208,209,210,211,212,213,214,215,
311 216,217,218,219,220,221,222,223,
312 224,225,226,227,228,229,230,231,
313 232,233,234,235,236,237,238,239,
314 240,241,242,243,244,245,246,247,
315 248,249,250,251,252,253,254,255,
316
317 /* This table contains bit maps for various character classes. Each map is 32
318 bytes long and the bits run from the least significant end of each byte. The
319 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
320 graph, print, punct, and cntrl. Other classes are built from combinations. */
321
322 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326
327 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
328 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331
332 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341
342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346
347 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
348 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351
352 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
353 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356
357 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
358 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361
362 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
363 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366
367 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371
372 /* This table identifies various classes of character by individual bits:
373 0x01 white space character
374 0x02 letter
375 0x04 decimal digit
376 0x08 hexadecimal digit
377 0x10 alphanumeric or '_'
378 0x80 regular expression metacharacter or binary zero
379 */
380
381 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
382 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
384 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
385 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
386 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
387 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
388 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
389 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
390 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
391 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
392 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
393 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
396 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
397 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
413
414 /* This is a set of tables that came orginally from a Windows user. It seems to
415 be at least an approximation of ISO 8859. In particular, there are characters
416 greater than 128 that are marked as spaces, letters, etc. */
417
418 static const unsigned char tables1[] = {
419 0,1,2,3,4,5,6,7,
420 8,9,10,11,12,13,14,15,
421 16,17,18,19,20,21,22,23,
422 24,25,26,27,28,29,30,31,
423 32,33,34,35,36,37,38,39,
424 40,41,42,43,44,45,46,47,
425 48,49,50,51,52,53,54,55,
426 56,57,58,59,60,61,62,63,
427 64,97,98,99,100,101,102,103,
428 104,105,106,107,108,109,110,111,
429 112,113,114,115,116,117,118,119,
430 120,121,122,91,92,93,94,95,
431 96,97,98,99,100,101,102,103,
432 104,105,106,107,108,109,110,111,
433 112,113,114,115,116,117,118,119,
434 120,121,122,123,124,125,126,127,
435 128,129,130,131,132,133,134,135,
436 136,137,138,139,140,141,142,143,
437 144,145,146,147,148,149,150,151,
438 152,153,154,155,156,157,158,159,
439 160,161,162,163,164,165,166,167,
440 168,169,170,171,172,173,174,175,
441 176,177,178,179,180,181,182,183,
442 184,185,186,187,188,189,190,191,
443 224,225,226,227,228,229,230,231,
444 232,233,234,235,236,237,238,239,
445 240,241,242,243,244,245,246,215,
446 248,249,250,251,252,253,254,223,
447 224,225,226,227,228,229,230,231,
448 232,233,234,235,236,237,238,239,
449 240,241,242,243,244,245,246,247,
450 248,249,250,251,252,253,254,255,
451 0,1,2,3,4,5,6,7,
452 8,9,10,11,12,13,14,15,
453 16,17,18,19,20,21,22,23,
454 24,25,26,27,28,29,30,31,
455 32,33,34,35,36,37,38,39,
456 40,41,42,43,44,45,46,47,
457 48,49,50,51,52,53,54,55,
458 56,57,58,59,60,61,62,63,
459 64,97,98,99,100,101,102,103,
460 104,105,106,107,108,109,110,111,
461 112,113,114,115,116,117,118,119,
462 120,121,122,91,92,93,94,95,
463 96,65,66,67,68,69,70,71,
464 72,73,74,75,76,77,78,79,
465 80,81,82,83,84,85,86,87,
466 88,89,90,123,124,125,126,127,
467 128,129,130,131,132,133,134,135,
468 136,137,138,139,140,141,142,143,
469 144,145,146,147,148,149,150,151,
470 152,153,154,155,156,157,158,159,
471 160,161,162,163,164,165,166,167,
472 168,169,170,171,172,173,174,175,
473 176,177,178,179,180,181,182,183,
474 184,185,186,187,188,189,190,191,
475 224,225,226,227,228,229,230,231,
476 232,233,234,235,236,237,238,239,
477 240,241,242,243,244,245,246,215,
478 248,249,250,251,252,253,254,223,
479 192,193,194,195,196,197,198,199,
480 200,201,202,203,204,205,206,207,
481 208,209,210,211,212,213,214,247,
482 216,217,218,219,220,221,222,255,
483 0,62,0,0,1,0,0,0,
484 0,0,0,0,0,0,0,0,
485 32,0,0,0,1,0,0,0,
486 0,0,0,0,0,0,0,0,
487 0,0,0,0,0,0,255,3,
488 126,0,0,0,126,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,0,0,
491 0,0,0,0,0,0,255,3,
492 0,0,0,0,0,0,0,0,
493 0,0,0,0,0,0,12,2,
494 0,0,0,0,0,0,0,0,
495 0,0,0,0,0,0,0,0,
496 254,255,255,7,0,0,0,0,
497 0,0,0,0,0,0,0,0,
498 255,255,127,127,0,0,0,0,
499 0,0,0,0,0,0,0,0,
500 0,0,0,0,254,255,255,7,
501 0,0,0,0,0,4,32,4,
502 0,0,0,128,255,255,127,255,
503 0,0,0,0,0,0,255,3,
504 254,255,255,135,254,255,255,7,
505 0,0,0,0,0,4,44,6,
506 255,255,127,255,255,255,127,255,
507 0,0,0,0,254,255,255,255,
508 255,255,255,255,255,255,255,127,
509 0,0,0,0,254,255,255,255,
510 255,255,255,255,255,255,255,255,
511 0,2,0,0,255,255,255,255,
512 255,255,255,255,255,255,255,127,
513 0,0,0,0,255,255,255,255,
514 255,255,255,255,255,255,255,255,
515 0,0,0,0,254,255,0,252,
516 1,0,0,248,1,0,0,120,
517 0,0,0,0,254,255,255,255,
518 0,0,128,0,0,0,128,0,
519 255,255,255,255,0,0,0,0,
520 0,0,0,0,0,0,0,128,
521 255,255,255,255,0,0,0,0,
522 0,0,0,0,0,0,0,0,
523 128,0,0,0,0,0,0,0,
524 0,1,1,0,1,1,0,0,
525 0,0,0,0,0,0,0,0,
526 0,0,0,0,0,0,0,0,
527 1,0,0,0,128,0,0,0,
528 128,128,128,128,0,0,128,0,
529 28,28,28,28,28,28,28,28,
530 28,28,0,0,0,0,0,128,
531 0,26,26,26,26,26,26,18,
532 18,18,18,18,18,18,18,18,
533 18,18,18,18,18,18,18,18,
534 18,18,18,128,128,0,128,16,
535 0,26,26,26,26,26,26,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,128,128,0,0,0,
539 0,0,0,0,0,1,0,0,
540 0,0,0,0,0,0,0,0,
541 0,0,0,0,0,0,0,0,
542 0,0,0,0,0,0,0,0,
543 1,0,0,0,0,0,0,0,
544 0,0,18,0,0,0,0,0,
545 0,0,20,20,0,18,0,0,
546 0,20,18,0,0,0,0,0,
547 18,18,18,18,18,18,18,18,
548 18,18,18,18,18,18,18,18,
549 18,18,18,18,18,18,18,0,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,0,
554 18,18,18,18,18,18,18,18
555 };
556
557
558
559
560 #ifndef HAVE_STRERROR
561 /*************************************************
562 * Provide strerror() for non-ANSI libraries *
563 *************************************************/
564
565 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
566 in their libraries, but can provide the same facility by this simple
567 alternative function. */
568
569 extern int sys_nerr;
570 extern char *sys_errlist[];
571
572 char *
573 strerror(int n)
574 {
575 if (n < 0 || n >= sys_nerr) return "unknown error number";
576 return sys_errlist[n];
577 }
578 #endif /* HAVE_STRERROR */
579
580
581
582
583 /*************************************************
584 * Read or extend an input line *
585 *************************************************/
586
587 /* Input lines are read into buffer, but both patterns and data lines can be
588 continued over multiple input lines. In addition, if the buffer fills up, we
589 want to automatically expand it so as to be able to handle extremely large
590 lines that are needed for certain stress tests. When the input buffer is
591 expanded, the other two buffers must also be expanded likewise, and the
592 contents of pbuffer, which are a copy of the input for callouts, must be
593 preserved (for when expansion happens for a data line). This is not the most
594 optimal way of handling this, but hey, this is just a test program!
595
596 Arguments:
597 f the file to read
598 start where in buffer to start (this *must* be within buffer)
599 prompt for stdin or readline()
600
601 Returns: pointer to the start of new data
602 could be a copy of start, or could be moved
603 NULL if no data read and EOF reached
604 */
605
606 static uschar *
607 extend_inputline(FILE *f, uschar *start, const char *prompt)
608 {
609 uschar *here = start;
610
611 for (;;)
612 {
613 int rlen = (int)(buffer_size - (here - buffer));
614
615 if (rlen > 1000)
616 {
617 int dlen;
618
619 /* If libreadline support is required, use readline() to read a line if the
620 input is a terminal. Note that readline() removes the trailing newline, so
621 we must put it back again, to be compatible with fgets(). */
622
623 #ifdef SUPPORT_LIBREADLINE
624 if (isatty(fileno(f)))
625 {
626 size_t len;
627 char *s = readline(prompt);
628 if (s == NULL) return (here == start)? NULL : start;
629 len = strlen(s);
630 if (len > 0) add_history(s);
631 if (len > rlen - 1) len = rlen - 1;
632 memcpy(here, s, len);
633 here[len] = '\n';
634 here[len+1] = 0;
635 free(s);
636 }
637 else
638 #endif
639
640 /* Read the next line by normal means, prompting if the file is stdin. */
641
642 {
643 if (f == stdin) printf("%s", prompt);
644 if (fgets((char *)here, rlen, f) == NULL)
645 return (here == start)? NULL : start;
646 }
647
648 dlen = (int)strlen((char *)here);
649 if (dlen > 0 && here[dlen - 1] == '\n') return start;
650 here += dlen;
651 }
652
653 else
654 {
655 int new_buffer_size = 2*buffer_size;
656 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
657 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
658 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
659
660 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
661 {
662 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
663 exit(1);
664 }
665
666 memcpy(new_buffer, buffer, buffer_size);
667 memcpy(new_pbuffer, pbuffer, buffer_size);
668
669 buffer_size = new_buffer_size;
670
671 start = new_buffer + (start - buffer);
672 here = new_buffer + (here - buffer);
673
674 free(buffer);
675 free(dbuffer);
676 free(pbuffer);
677
678 buffer = new_buffer;
679 dbuffer = new_dbuffer;
680 pbuffer = new_pbuffer;
681 }
682 }
683
684 return NULL; /* Control never gets here */
685 }
686
687
688
689
690
691
692
693 /*************************************************
694 * Read number from string *
695 *************************************************/
696
697 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
698 around with conditional compilation, just do the job by hand. It is only used
699 for unpicking arguments, so just keep it simple.
700
701 Arguments:
702 str string to be converted
703 endptr where to put the end pointer
704
705 Returns: the unsigned long
706 */
707
708 static int
709 get_value(unsigned char *str, unsigned char **endptr)
710 {
711 int result = 0;
712 while(*str != 0 && isspace(*str)) str++;
713 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
714 *endptr = str;
715 return(result);
716 }
717
718
719
720
721 /*************************************************
722 * Convert UTF-8 string to value *
723 *************************************************/
724
725 /* This function takes one or more bytes that represents a UTF-8 character,
726 and returns the value of the character.
727
728 Argument:
729 utf8bytes a pointer to the byte vector
730 vptr a pointer to an int to receive the value
731
732 Returns: > 0 => the number of bytes consumed
733 -6 to 0 => malformed UTF-8 character at offset = (-return)
734 */
735
736 #if !defined NOUTF8
737
738 static int
739 utf82ord(unsigned char *utf8bytes, int *vptr)
740 {
741 int c = *utf8bytes++;
742 int d = c;
743 int i, j, s;
744
745 for (i = -1; i < 6; i++) /* i is number of additional bytes */
746 {
747 if ((d & 0x80) == 0) break;
748 d <<= 1;
749 }
750
751 if (i == -1) { *vptr = c; return 1; } /* ascii character */
752 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
753
754 /* i now has a value in the range 1-5 */
755
756 s = 6*i;
757 d = (c & utf8_table3[i]) << s;
758
759 for (j = 0; j < i; j++)
760 {
761 c = *utf8bytes++;
762 if ((c & 0xc0) != 0x80) return -(j+1);
763 s -= 6;
764 d |= (c & 0x3f) << s;
765 }
766
767 /* Check that encoding was the correct unique one */
768
769 for (j = 0; j < utf8_table1_size; j++)
770 if (d <= utf8_table1[j]) break;
771 if (j != i) return -(i+1);
772
773 /* Valid value */
774
775 *vptr = d;
776 return i+1;
777 }
778
779 #endif
780
781
782
783 /*************************************************
784 * Convert character value to UTF-8 *
785 *************************************************/
786
787 /* This function takes an integer value in the range 0 - 0x7fffffff
788 and encodes it as a UTF-8 character in 0 to 6 bytes.
789
790 Arguments:
791 cvalue the character value
792 utf8bytes pointer to buffer for result - at least 6 bytes long
793
794 Returns: number of characters placed in the buffer
795 */
796
797 #if !defined NOUTF8
798
799 static int
800 ord2utf8(int cvalue, uschar *utf8bytes)
801 {
802 register int i, j;
803 for (i = 0; i < utf8_table1_size; i++)
804 if (cvalue <= utf8_table1[i]) break;
805 utf8bytes += i;
806 for (j = i; j > 0; j--)
807 {
808 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
809 cvalue >>= 6;
810 }
811 *utf8bytes = utf8_table2[i] | cvalue;
812 return i + 1;
813 }
814
815 #endif
816
817
818
819 /*************************************************
820 * Print character string *
821 *************************************************/
822
823 /* Character string printing function. Must handle UTF-8 strings in utf8
824 mode. Yields number of characters printed. If handed a NULL file, just counts
825 chars without printing. */
826
827 static int pchars(unsigned char *p, int length, FILE *f)
828 {
829 int c = 0;
830 int yield = 0;
831
832 while (length-- > 0)
833 {
834 #if !defined NOUTF8
835 if (use_utf8)
836 {
837 int rc = utf82ord(p, &c);
838
839 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
840 {
841 length -= rc - 1;
842 p += rc;
843 if (PRINTHEX(c))
844 {
845 if (f != NULL) fprintf(f, "%c", c);
846 yield++;
847 }
848 else
849 {
850 int n = 4;
851 if (f != NULL) fprintf(f, "\\x{%02x}", c);
852 yield += (n <= 0x000000ff)? 2 :
853 (n <= 0x00000fff)? 3 :
854 (n <= 0x0000ffff)? 4 :
855 (n <= 0x000fffff)? 5 : 6;
856 }
857 continue;
858 }
859 }
860 #endif
861
862 /* Not UTF-8, or malformed UTF-8 */
863
864 c = *p++;
865 if (PRINTHEX(c))
866 {
867 if (f != NULL) fprintf(f, "%c", c);
868 yield++;
869 }
870 else
871 {
872 if (f != NULL) fprintf(f, "\\x%02x", c);
873 yield += 4;
874 }
875 }
876
877 return yield;
878 }
879
880
881
882 /*************************************************
883 * Callout function *
884 *************************************************/
885
886 /* Called from PCRE as a result of the (?C) item. We print out where we are in
887 the match. Yield zero unless more callouts than the fail count, or the callout
888 data is not zero. */
889
890 static int callout(pcre_callout_block *cb)
891 {
892 FILE *f = (first_callout | callout_extra)? outfile : NULL;
893 int i, pre_start, post_start, subject_length;
894
895 if (callout_extra)
896 {
897 fprintf(f, "Callout %d: last capture = %d\n",
898 cb->callout_number, cb->capture_last);
899
900 for (i = 0; i < cb->capture_top * 2; i += 2)
901 {
902 if (cb->offset_vector[i] < 0)
903 fprintf(f, "%2d: <unset>\n", i/2);
904 else
905 {
906 fprintf(f, "%2d: ", i/2);
907 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
908 cb->offset_vector[i+1] - cb->offset_vector[i], f);
909 fprintf(f, "\n");
910 }
911 }
912 }
913
914 /* Re-print the subject in canonical form, the first time or if giving full
915 datails. On subsequent calls in the same match, we use pchars just to find the
916 printed lengths of the substrings. */
917
918 if (f != NULL) fprintf(f, "--->");
919
920 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
921 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
922 cb->current_position - cb->start_match, f);
923
924 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
925
926 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
927 cb->subject_length - cb->current_position, f);
928
929 if (f != NULL) fprintf(f, "\n");
930
931 /* Always print appropriate indicators, with callout number if not already
932 shown. For automatic callouts, show the pattern offset. */
933
934 if (cb->callout_number == 255)
935 {
936 fprintf(outfile, "%+3d ", cb->pattern_position);
937 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
938 }
939 else
940 {
941 if (callout_extra) fprintf(outfile, " ");
942 else fprintf(outfile, "%3d ", cb->callout_number);
943 }
944
945 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
946 fprintf(outfile, "^");
947
948 if (post_start > 0)
949 {
950 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
951 fprintf(outfile, "^");
952 }
953
954 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
955 fprintf(outfile, " ");
956
957 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
958 pbuffer + cb->pattern_position);
959
960 fprintf(outfile, "\n");
961 first_callout = 0;
962
963 if (cb->callout_data != NULL)
964 {
965 int callout_data = *((int *)(cb->callout_data));
966 if (callout_data != 0)
967 {
968 fprintf(outfile, "Callout data = %d\n", callout_data);
969 return callout_data;
970 }
971 }
972
973 return (cb->callout_number != callout_fail_id)? 0 :
974 (++callout_count >= callout_fail_count)? 1 : 0;
975 }
976
977
978 /*************************************************
979 * Local malloc functions *
980 *************************************************/
981
982 /* Alternative malloc function, to test functionality and show the size of the
983 compiled re. */
984
985 static void *new_malloc(size_t size)
986 {
987 void *block = malloc(size);
988 gotten_store = size;
989 if (show_malloc)
990 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
991 return block;
992 }
993
994 static void new_free(void *block)
995 {
996 if (show_malloc)
997 fprintf(outfile, "free %p\n", block);
998 free(block);
999 }
1000
1001
1002 /* For recursion malloc/free, to test stacking calls */
1003
1004 static void *stack_malloc(size_t size)
1005 {
1006 void *block = malloc(size);
1007 if (show_malloc)
1008 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1009 return block;
1010 }
1011
1012 static void stack_free(void *block)
1013 {
1014 if (show_malloc)
1015 fprintf(outfile, "stack_free %p\n", block);
1016 free(block);
1017 }
1018
1019
1020 /*************************************************
1021 * Call pcre_fullinfo() *
1022 *************************************************/
1023
1024 /* Get one piece of information from the pcre_fullinfo() function */
1025
1026 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1027 {
1028 int rc;
1029 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1030 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1031 }
1032
1033
1034
1035 /*************************************************
1036 * Byte flipping function *
1037 *************************************************/
1038
1039 static unsigned long int
1040 byteflip(unsigned long int value, int n)
1041 {
1042 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1043 return ((value & 0x000000ff) << 24) |
1044 ((value & 0x0000ff00) << 8) |
1045 ((value & 0x00ff0000) >> 8) |
1046 ((value & 0xff000000) >> 24);
1047 }
1048
1049
1050
1051
1052 /*************************************************
1053 * Check match or recursion limit *
1054 *************************************************/
1055
1056 static int
1057 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1058 int start_offset, int options, int *use_offsets, int use_size_offsets,
1059 int flag, unsigned long int *limit, int errnumber, const char *msg)
1060 {
1061 int count;
1062 int min = 0;
1063 int mid = 64;
1064 int max = -1;
1065
1066 extra->flags |= flag;
1067
1068 for (;;)
1069 {
1070 *limit = mid;
1071
1072 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1073 use_offsets, use_size_offsets);
1074
1075 if (count == errnumber)
1076 {
1077 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1078 min = mid;
1079 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1080 }
1081
1082 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1083 count == PCRE_ERROR_PARTIAL)
1084 {
1085 if (mid == min + 1)
1086 {
1087 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1088 break;
1089 }
1090 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1091 max = mid;
1092 mid = (min + mid)/2;
1093 }
1094 else break; /* Some other error */
1095 }
1096
1097 extra->flags &= ~flag;
1098 return count;
1099 }
1100
1101
1102
1103 /*************************************************
1104 * Case-independent strncmp() function *
1105 *************************************************/
1106
1107 /*
1108 Arguments:
1109 s first string
1110 t second string
1111 n number of characters to compare
1112
1113 Returns: < 0, = 0, or > 0, according to the comparison
1114 */
1115
1116 static int
1117 strncmpic(uschar *s, uschar *t, int n)
1118 {
1119 while (n--)
1120 {
1121 int c = tolower(*s++) - tolower(*t++);
1122 if (c) return c;
1123 }
1124 return 0;
1125 }
1126
1127
1128
1129 /*************************************************
1130 * Check newline indicator *
1131 *************************************************/
1132
1133 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1134 a message and return 0 if there is no match.
1135
1136 Arguments:
1137 p points after the leading '<'
1138 f file for error message
1139
1140 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1141 */
1142
1143 static int
1144 check_newline(uschar *p, FILE *f)
1145 {
1146 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1147 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1148 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1149 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1150 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1151 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1152 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1153 fprintf(f, "Unknown newline type at: <%s\n", p);
1154 return 0;
1155 }
1156
1157
1158
1159 /*************************************************
1160 * Usage function *
1161 *************************************************/
1162
1163 static void
1164 usage(void)
1165 {
1166 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1167 printf("Input and output default to stdin and stdout.\n");
1168 #ifdef SUPPORT_LIBREADLINE
1169 printf("If input is a terminal, readline() is used to read from it.\n");
1170 #else
1171 printf("This version of pcretest is not linked with readline().\n");
1172 #endif
1173 printf("\nOptions:\n");
1174 printf(" -b show compiled code (bytecode)\n");
1175 printf(" -C show PCRE compile-time options and exit\n");
1176 printf(" -d debug: show compiled code and information (-b and -i)\n");
1177 #if !defined NODFA
1178 printf(" -dfa force DFA matching for all subjects\n");
1179 #endif
1180 printf(" -help show usage information\n");
1181 printf(" -i show information about compiled patterns\n"
1182 " -M find MATCH_LIMIT minimum for each subject\n"
1183 " -m output memory used information\n"
1184 " -o <n> set size of offsets vector to <n>\n");
1185 #if !defined NOPOSIX
1186 printf(" -p use POSIX interface\n");
1187 #endif
1188 printf(" -q quiet: do not output PCRE version number at start\n");
1189 printf(" -S <n> set stack size to <n> megabytes\n");
1190 printf(" -s force each pattern to be studied\n"
1191 " -t time compilation and execution\n");
1192 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1193 printf(" -tm time execution (matching) only\n");
1194 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1195 }
1196
1197
1198
1199 /*************************************************
1200 * Main Program *
1201 *************************************************/
1202
1203 /* Read lines from named file or stdin and write to named file or stdout; lines
1204 consist of a regular expression, in delimiters and optionally followed by
1205 options, followed by a set of test data, terminated by an empty line. */
1206
1207 int main(int argc, char **argv)
1208 {
1209 FILE *infile = stdin;
1210 int options = 0;
1211 int study_options = 0;
1212 int default_find_match_limit = FALSE;
1213 int op = 1;
1214 int timeit = 0;
1215 int timeitm = 0;
1216 int showinfo = 0;
1217 int showstore = 0;
1218 int force_study = 0;
1219 int quiet = 0;
1220 int size_offsets = 45;
1221 int size_offsets_max;
1222 int *offsets = NULL;
1223 #if !defined NOPOSIX
1224 int posix = 0;
1225 #endif
1226 int debug = 0;
1227 int done = 0;
1228 int all_use_dfa = 0;
1229 int yield = 0;
1230 int stack_size;
1231
1232 /* These vectors store, end-to-end, a list of captured substring names. Assume
1233 that 1024 is plenty long enough for the few names we'll be testing. */
1234
1235 uschar copynames[1024];
1236 uschar getnames[1024];
1237
1238 uschar *copynamesptr;
1239 uschar *getnamesptr;
1240
1241 /* Get buffers from malloc() so that Electric Fence will check their misuse
1242 when I am debugging. They grow automatically when very long lines are read. */
1243
1244 buffer = (unsigned char *)malloc(buffer_size);
1245 dbuffer = (unsigned char *)malloc(buffer_size);
1246 pbuffer = (unsigned char *)malloc(buffer_size);
1247
1248 /* The outfile variable is static so that new_malloc can use it. */
1249
1250 outfile = stdout;
1251
1252 /* The following _setmode() stuff is some Windows magic that tells its runtime
1253 library to translate CRLF into a single LF character. At least, that's what
1254 I've been told: never having used Windows I take this all on trust. Originally
1255 it set 0x8000, but then I was advised that _O_BINARY was better. */
1256
1257 #if defined(_WIN32) || defined(WIN32)
1258 _setmode( _fileno( stdout ), _O_BINARY );
1259 #endif
1260
1261 /* Scan options */
1262
1263 while (argc > 1 && argv[op][0] == '-')
1264 {
1265 unsigned char *endptr;
1266
1267 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1268 else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1269 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1270 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1271 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1272 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1273 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1274 #if !defined NODFA
1275 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1276 #endif
1277 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1278 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1279 *endptr == 0))
1280 {
1281 op++;
1282 argc--;
1283 }
1284 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1285 {
1286 int both = argv[op][2] == 0;
1287 int temp;
1288 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1289 *endptr == 0))
1290 {
1291 timeitm = temp;
1292 op++;
1293 argc--;
1294 }
1295 else timeitm = LOOPREPEAT;
1296 if (both) timeit = timeitm;
1297 }
1298 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1299 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1300 *endptr == 0))
1301 {
1302 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1303 printf("PCRE: -S not supported on this OS\n");
1304 exit(1);
1305 #else
1306 int rc;
1307 struct rlimit rlim;
1308 getrlimit(RLIMIT_STACK, &rlim);
1309 rlim.rlim_cur = stack_size * 1024 * 1024;
1310 rc = setrlimit(RLIMIT_STACK, &rlim);
1311 if (rc != 0)
1312 {
1313 printf("PCRE: setrlimit() failed with error %d\n", rc);
1314 exit(1);
1315 }
1316 op++;
1317 argc--;
1318 #endif
1319 }
1320 #if !defined NOPOSIX
1321 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1322 #endif
1323 else if (strcmp(argv[op], "-C") == 0)
1324 {
1325 int rc;
1326 unsigned long int lrc;
1327 printf("PCRE version %s\n", pcre_version());
1328 printf("Compiled with\n");
1329 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1330 printf(" %sUTF-8 support\n", rc? "" : "No ");
1331 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1332 printf(" %sUnicode properties support\n", rc? "" : "No ");
1333 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1334 /* Note that these values are always the ASCII values, even
1335 in EBCDIC environments. CR is 13 and NL is 10. */
1336 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1337 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1338 (rc == -2)? "ANYCRLF" :
1339 (rc == -1)? "ANY" : "???");
1340 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1341 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1342 "all Unicode newlines");
1343 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1344 printf(" Internal link size = %d\n", rc);
1345 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1346 printf(" POSIX malloc threshold = %d\n", rc);
1347 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1348 printf(" Default match limit = %ld\n", lrc);
1349 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1350 printf(" Default recursion depth limit = %ld\n", lrc);
1351 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1352 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1353 goto EXIT;
1354 }
1355 else if (strcmp(argv[op], "-help") == 0 ||
1356 strcmp(argv[op], "--help") == 0)
1357 {
1358 usage();
1359 goto EXIT;
1360 }
1361 else
1362 {
1363 printf("** Unknown or malformed option %s\n", argv[op]);
1364 usage();
1365 yield = 1;
1366 goto EXIT;
1367 }
1368 op++;
1369 argc--;
1370 }
1371
1372 /* Get the store for the offsets vector, and remember what it was */
1373
1374 size_offsets_max = size_offsets;
1375 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1376 if (offsets == NULL)
1377 {
1378 printf("** Failed to get %d bytes of memory for offsets vector\n",
1379 (int)(size_offsets_max * sizeof(int)));
1380 yield = 1;
1381 goto EXIT;
1382 }
1383
1384 /* Sort out the input and output files */
1385
1386 if (argc > 1)
1387 {
1388 infile = fopen(argv[op], INPUT_MODE);
1389 if (infile == NULL)
1390 {
1391 printf("** Failed to open %s\n", argv[op]);
1392 yield = 1;
1393 goto EXIT;
1394 }
1395 }
1396
1397 if (argc > 2)
1398 {
1399 outfile = fopen(argv[op+1], OUTPUT_MODE);
1400 if (outfile == NULL)
1401 {
1402 printf("** Failed to open %s\n", argv[op+1]);
1403 yield = 1;
1404 goto EXIT;
1405 }
1406 }
1407
1408 /* Set alternative malloc function */
1409
1410 pcre_malloc = new_malloc;
1411 pcre_free = new_free;
1412 pcre_stack_malloc = stack_malloc;
1413 pcre_stack_free = stack_free;
1414
1415 /* Heading line unless quiet, then prompt for first regex if stdin */
1416
1417 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1418
1419 /* Main loop */
1420
1421 while (!done)
1422 {
1423 pcre *re = NULL;
1424 pcre_extra *extra = NULL;
1425
1426 #if !defined NOPOSIX /* There are still compilers that require no indent */
1427 regex_t preg;
1428 int do_posix = 0;
1429 #endif
1430
1431 const char *error;
1432 unsigned char *markptr;
1433 unsigned char *p, *pp, *ppp;
1434 unsigned char *to_file = NULL;
1435 const unsigned char *tables = NULL;
1436 unsigned long int true_size, true_study_size = 0;
1437 size_t size, regex_gotten_store;
1438 int do_allcaps = 0;
1439 int do_mark = 0;
1440 int do_study = 0;
1441 int no_force_study = 0;
1442 int do_debug = debug;
1443 int do_G = 0;
1444 int do_g = 0;
1445 int do_showinfo = showinfo;
1446 int do_showrest = 0;
1447 int do_showcaprest = 0;
1448 int do_flip = 0;
1449 int erroroffset, len, delimiter, poffset;
1450
1451 use_utf8 = 0;
1452 debug_lengths = 1;
1453
1454 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1455 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1456 fflush(outfile);
1457
1458 p = buffer;
1459 while (isspace(*p)) p++;
1460 if (*p == 0) continue;
1461
1462 /* See if the pattern is to be loaded pre-compiled from a file. */
1463
1464 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1465 {
1466 unsigned long int magic, get_options;
1467 uschar sbuf[8];
1468 FILE *f;
1469
1470 p++;
1471 pp = p + (int)strlen((char *)p);
1472 while (isspace(pp[-1])) pp--;
1473 *pp = 0;
1474
1475 f = fopen((char *)p, "rb");
1476 if (f == NULL)
1477 {
1478 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1479 continue;
1480 }
1481
1482 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1483
1484 true_size =
1485 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1486 true_study_size =
1487 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1488
1489 re = (real_pcre *)new_malloc(true_size);
1490 regex_gotten_store = gotten_store;
1491
1492 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1493
1494 magic = ((real_pcre *)re)->magic_number;
1495 if (magic != MAGIC_NUMBER)
1496 {
1497 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1498 {
1499 do_flip = 1;
1500 }
1501 else
1502 {
1503 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1504 fclose(f);
1505 continue;
1506 }
1507 }
1508
1509 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1510 do_flip? " (byte-inverted)" : "", p);
1511
1512 /* Need to know if UTF-8 for printing data strings */
1513
1514 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1515 use_utf8 = (get_options & PCRE_UTF8) != 0;
1516
1517 /* Now see if there is any following study data. */
1518
1519 if (true_study_size != 0)
1520 {
1521 pcre_study_data *psd;
1522
1523 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1524 extra->flags = PCRE_EXTRA_STUDY_DATA;
1525
1526 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1527 extra->study_data = psd;
1528
1529 if (fread(psd, 1, true_study_size, f) != true_study_size)
1530 {
1531 FAIL_READ:
1532 fprintf(outfile, "Failed to read data from %s\n", p);
1533 if (extra != NULL) new_free(extra);
1534 if (re != NULL) new_free(re);
1535 fclose(f);
1536 continue;
1537 }
1538 fprintf(outfile, "Study data loaded from %s\n", p);
1539 do_study = 1; /* To get the data output if requested */
1540 }
1541 else fprintf(outfile, "No study data\n");
1542
1543 fclose(f);
1544 goto SHOW_INFO;
1545 }
1546
1547 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1548 the pattern; if is isn't complete, read more. */
1549
1550 delimiter = *p++;
1551
1552 if (isalnum(delimiter) || delimiter == '\\')
1553 {
1554 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1555 goto SKIP_DATA;
1556 }
1557
1558 pp = p;
1559 poffset = (int)(p - buffer);
1560
1561 for(;;)
1562 {
1563 while (*pp != 0)
1564 {
1565 if (*pp == '\\' && pp[1] != 0) pp++;
1566 else if (*pp == delimiter) break;
1567 pp++;
1568 }
1569 if (*pp != 0) break;
1570 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1571 {
1572 fprintf(outfile, "** Unexpected EOF\n");
1573 done = 1;
1574 goto CONTINUE;
1575 }
1576 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1577 }
1578
1579 /* The buffer may have moved while being extended; reset the start of data
1580 pointer to the correct relative point in the buffer. */
1581
1582 p = buffer + poffset;
1583
1584 /* If the first character after the delimiter is backslash, make
1585 the pattern end with backslash. This is purely to provide a way
1586 of testing for the error message when a pattern ends with backslash. */
1587
1588 if (pp[1] == '\\') *pp++ = '\\';
1589
1590 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1591 for callouts. */
1592
1593 *pp++ = 0;
1594 strcpy((char *)pbuffer, (char *)p);
1595
1596 /* Look for options after final delimiter */
1597
1598 options = 0;
1599 study_options = 0;
1600 log_store = showstore; /* default from command line */
1601
1602 while (*pp != 0)
1603 {
1604 switch (*pp++)
1605 {
1606 case 'f': options |= PCRE_FIRSTLINE; break;
1607 case 'g': do_g = 1; break;
1608 case 'i': options |= PCRE_CASELESS; break;
1609 case 'm': options |= PCRE_MULTILINE; break;
1610 case 's': options |= PCRE_DOTALL; break;
1611 case 'x': options |= PCRE_EXTENDED; break;
1612
1613 case '+':
1614 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1615 break;
1616
1617 case '=': do_allcaps = 1; break;
1618 case 'A': options |= PCRE_ANCHORED; break;
1619 case 'B': do_debug = 1; break;
1620 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1621 case 'D': do_debug = do_showinfo = 1; break;
1622 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1623 case 'F': do_flip = 1; break;
1624 case 'G': do_G = 1; break;
1625 case 'I': do_showinfo = 1; break;
1626 case 'J': options |= PCRE_DUPNAMES; break;
1627 case 'K': do_mark = 1; break;
1628 case 'M': log_store = 1; break;
1629 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1630
1631 #if !defined NOPOSIX
1632 case 'P': do_posix = 1; break;
1633 #endif
1634
1635 case 'S':
1636 if (do_study == 0) do_study = 1; else
1637 {
1638 do_study = 0;
1639 no_force_study = 1;
1640 }
1641 break;
1642
1643 case 'U': options |= PCRE_UNGREEDY; break;
1644 case 'W': options |= PCRE_UCP; break;
1645 case 'X': options |= PCRE_EXTRA; break;
1646 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1647 case 'Z': debug_lengths = 0; break;
1648 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1649 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1650
1651 case 'T':
1652 switch (*pp++)
1653 {
1654 case '0': tables = tables0; break;
1655 case '1': tables = tables1; break;
1656
1657 case '\r':
1658 case '\n':
1659 case ' ':
1660 case 0:
1661 fprintf(outfile, "** Missing table number after /T\n");
1662 goto SKIP_DATA;
1663
1664 default:
1665 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1666 goto SKIP_DATA;
1667 }
1668 break;
1669
1670 case 'L':
1671 ppp = pp;
1672 /* The '\r' test here is so that it works on Windows. */
1673 /* The '0' test is just in case this is an unterminated line. */
1674 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1675 *ppp = 0;
1676 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1677 {
1678 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1679 goto SKIP_DATA;
1680 }
1681 locale_set = 1;
1682 tables = pcre_maketables();
1683 pp = ppp;
1684 break;
1685
1686 case '>':
1687 to_file = pp;
1688 while (*pp != 0) pp++;
1689 while (isspace(pp[-1])) pp--;
1690 *pp = 0;
1691 break;
1692
1693 case '<':
1694 {
1695 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1696 {
1697 options |= PCRE_JAVASCRIPT_COMPAT;
1698 pp += 3;
1699 }
1700 else
1701 {
1702 int x = check_newline(pp, outfile);
1703 if (x == 0) goto SKIP_DATA;
1704 options |= x;
1705 while (*pp++ != '>');
1706 }
1707 }
1708 break;
1709
1710 case '\r': /* So that it works in Windows */
1711 case '\n':
1712 case ' ':
1713 break;
1714
1715 default:
1716 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1717 goto SKIP_DATA;
1718 }
1719 }
1720
1721 /* Handle compiling via the POSIX interface, which doesn't support the
1722 timing, showing, or debugging options, nor the ability to pass over
1723 local character tables. */
1724
1725 #if !defined NOPOSIX
1726 if (posix || do_posix)
1727 {
1728 int rc;
1729 int cflags = 0;
1730
1731 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1732 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1733 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1734 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1735 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1736 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1737 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1738
1739 rc = regcomp(&preg, (char *)p, cflags);
1740
1741 /* Compilation failed; go back for another re, skipping to blank line
1742 if non-interactive. */
1743
1744 if (rc != 0)
1745 {
1746 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1747 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1748 goto SKIP_DATA;
1749 }
1750 }
1751
1752 /* Handle compiling via the native interface */
1753
1754 else
1755 #endif /* !defined NOPOSIX */
1756
1757 {
1758 unsigned long int get_options;
1759
1760 if (timeit > 0)
1761 {
1762 register int i;
1763 clock_t time_taken;
1764 clock_t start_time = clock();
1765 for (i = 0; i < timeit; i++)
1766 {
1767 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1768 if (re != NULL) free(re);
1769 }
1770 time_taken = clock() - start_time;
1771 fprintf(outfile, "Compile time %.4f milliseconds\n",
1772 (((double)time_taken * 1000.0) / (double)timeit) /
1773 (double)CLOCKS_PER_SEC);
1774 }
1775
1776 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1777
1778 /* Compilation failed; go back for another re, skipping to blank line
1779 if non-interactive. */
1780
1781 if (re == NULL)
1782 {
1783 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1784 SKIP_DATA:
1785 if (infile != stdin)
1786 {
1787 for (;;)
1788 {
1789 if (extend_inputline(infile, buffer, NULL) == NULL)
1790 {
1791 done = 1;
1792 goto CONTINUE;
1793 }
1794 len = (int)strlen((char *)buffer);
1795 while (len > 0 && isspace(buffer[len-1])) len--;
1796 if (len == 0) break;
1797 }
1798 fprintf(outfile, "\n");
1799 }
1800 goto CONTINUE;
1801 }
1802
1803 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1804 within the regex; check for this so that we know how to process the data
1805 lines. */
1806
1807 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1808 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1809
1810 /* Print information if required. There are now two info-returning
1811 functions. The old one has a limited interface and returns only limited
1812 data. Check that it agrees with the newer one. */
1813
1814 if (log_store)
1815 fprintf(outfile, "Memory allocation (code space): %d\n",
1816 (int)(gotten_store -
1817 sizeof(real_pcre) -
1818 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1819
1820 /* Extract the size for possible writing before possibly flipping it,
1821 and remember the store that was got. */
1822
1823 true_size = ((real_pcre *)re)->size;
1824 regex_gotten_store = gotten_store;
1825
1826 /* If -s or /S was present, study the regex to generate additional info to
1827 help with the matching, unless the pattern has the SS option, which
1828 suppresses the effect of /S (used for a few test patterns where studying is
1829 never sensible). */
1830
1831 if (do_study || (force_study && !no_force_study))
1832 {
1833 if (timeit > 0)
1834 {
1835 register int i;
1836 clock_t time_taken;
1837 clock_t start_time = clock();
1838 for (i = 0; i < timeit; i++)
1839 extra = pcre_study(re, study_options, &error);
1840 time_taken = clock() - start_time;
1841 if (extra != NULL) free(extra);
1842 fprintf(outfile, " Study time %.4f milliseconds\n",
1843 (((double)time_taken * 1000.0) / (double)timeit) /
1844 (double)CLOCKS_PER_SEC);
1845 }
1846 extra = pcre_study(re, study_options, &error);
1847 if (error != NULL)
1848 fprintf(outfile, "Failed to study: %s\n", error);
1849 else if (extra != NULL)
1850 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1851 }
1852
1853 /* If /K was present, we set up for handling MARK data. */
1854
1855 if (do_mark)
1856 {
1857 if (extra == NULL)
1858 {
1859 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1860 extra->flags = 0;
1861 }
1862 extra->mark = &markptr;
1863 extra->flags |= PCRE_EXTRA_MARK;
1864 }
1865
1866 /* If the 'F' option was present, we flip the bytes of all the integer
1867 fields in the regex data block and the study block. This is to make it
1868 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1869 compiled on a different architecture. */
1870
1871 if (do_flip)
1872 {
1873 real_pcre *rre = (real_pcre *)re;
1874 rre->magic_number =
1875 byteflip(rre->magic_number, sizeof(rre->magic_number));
1876 rre->size = byteflip(rre->size, sizeof(rre->size));
1877 rre->options = byteflip(rre->options, sizeof(rre->options));
1878 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1879 rre->top_bracket =
1880 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1881 rre->top_backref =
1882 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1883 rre->first_byte =
1884 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1885 rre->req_byte =
1886 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1887 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1888 sizeof(rre->name_table_offset));
1889 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1890 sizeof(rre->name_entry_size));
1891 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1892 sizeof(rre->name_count));
1893
1894 if (extra != NULL)
1895 {
1896 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1897 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1898 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1899 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1900 }
1901 }
1902
1903 /* Extract information from the compiled data if required */
1904
1905 SHOW_INFO:
1906
1907 if (do_debug)
1908 {
1909 fprintf(outfile, "------------------------------------------------------------------\n");
1910 pcre_printint(re, outfile, debug_lengths);
1911 }
1912
1913 /* We already have the options in get_options (see above) */
1914
1915 if (do_showinfo)
1916 {
1917 unsigned long int all_options;
1918 #if !defined NOINFOCHECK
1919 int old_first_char, old_options, old_count;
1920 #endif
1921 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1922 hascrorlf;
1923 int nameentrysize, namecount;
1924 const uschar *nametable;
1925
1926 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1927 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1928 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1929 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1930 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1931 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1932 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1933 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1934 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1935 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1936 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1937
1938 #if !defined NOINFOCHECK
1939 old_count = pcre_info(re, &old_options, &old_first_char);
1940 if (count < 0) fprintf(outfile,
1941 "Error %d from pcre_info()\n", count);
1942 else
1943 {
1944 if (old_count != count) fprintf(outfile,
1945 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1946 old_count);
1947
1948 if (old_first_char != first_char) fprintf(outfile,
1949 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1950 first_char, old_first_char);
1951
1952 if (old_options != (int)get_options) fprintf(outfile,
1953 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1954 get_options, old_options);
1955 }
1956 #endif
1957
1958 if (size != regex_gotten_store) fprintf(outfile,
1959 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1960 (int)size, (int)regex_gotten_store);
1961
1962 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1963 if (backrefmax > 0)
1964 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1965
1966 if (namecount > 0)
1967 {
1968 fprintf(outfile, "Named capturing subpatterns:\n");
1969 while (namecount-- > 0)
1970 {
1971 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1972 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1973 GET2(nametable, 0));
1974 nametable += nameentrysize;
1975 }
1976 }
1977
1978 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1979 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1980
1981 all_options = ((real_pcre *)re)->options;
1982 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1983
1984 if (get_options == 0) fprintf(outfile, "No options\n");
1985 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1986 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1987 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1988 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1989 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1990 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1991 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1992 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1993 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1994 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1995 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1996 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1997 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1998 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1999 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2000 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2001 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2002 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2003
2004 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2005
2006 switch (get_options & PCRE_NEWLINE_BITS)
2007 {
2008 case PCRE_NEWLINE_CR:
2009 fprintf(outfile, "Forced newline sequence: CR\n");
2010 break;
2011
2012 case PCRE_NEWLINE_LF:
2013 fprintf(outfile, "Forced newline sequence: LF\n");
2014 break;
2015
2016 case PCRE_NEWLINE_CRLF:
2017 fprintf(outfile, "Forced newline sequence: CRLF\n");
2018 break;
2019
2020 case PCRE_NEWLINE_ANYCRLF:
2021 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2022 break;
2023
2024 case PCRE_NEWLINE_ANY:
2025 fprintf(outfile, "Forced newline sequence: ANY\n");
2026 break;
2027
2028 default:
2029 break;
2030 }
2031
2032 if (first_char == -1)
2033 {
2034 fprintf(outfile, "First char at start or follows newline\n");
2035 }
2036 else if (first_char < 0)
2037 {
2038 fprintf(outfile, "No first char\n");
2039 }
2040 else
2041 {
2042 int ch = first_char & 255;
2043 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2044 "" : " (caseless)";
2045 if (PRINTHEX(ch))
2046 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2047 else
2048 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2049 }
2050
2051 if (need_char < 0)
2052 {
2053 fprintf(outfile, "No need char\n");
2054 }
2055 else
2056 {
2057 int ch = need_char & 255;
2058 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2059 "" : " (caseless)";
2060 if (PRINTHEX(ch))
2061 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2062 else
2063 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2064 }
2065
2066 /* Don't output study size; at present it is in any case a fixed
2067 value, but it varies, depending on the computer architecture, and
2068 so messes up the test suite. (And with the /F option, it might be
2069 flipped.) If study was forced by an external -s, don't show this
2070 information unless -i or -d was also present. This means that, except
2071 when auto-callouts are involved, the output from runs with and without
2072 -s should be identical. */
2073
2074 if (do_study || (force_study && showinfo && !no_force_study))
2075 {
2076 if (extra == NULL)
2077 fprintf(outfile, "Study returned NULL\n");
2078 else
2079 {
2080 uschar *start_bits = NULL;
2081 int minlength;
2082
2083 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2084 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2085
2086 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2087 if (start_bits == NULL)
2088 fprintf(outfile, "No set of starting bytes\n");
2089 else
2090 {
2091 int i;
2092 int c = 24;
2093 fprintf(outfile, "Starting byte set: ");
2094 for (i = 0; i < 256; i++)
2095 {
2096 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2097 {
2098 if (c > 75)
2099 {
2100 fprintf(outfile, "\n ");
2101 c = 2;
2102 }
2103 if (PRINTHEX(i) && i != ' ')
2104 {
2105 fprintf(outfile, "%c ", i);
2106 c += 2;
2107 }
2108 else
2109 {
2110 fprintf(outfile, "\\x%02x ", i);
2111 c += 5;
2112 }
2113 }
2114 }
2115 fprintf(outfile, "\n");
2116 }
2117 }
2118 }
2119 }
2120
2121 /* If the '>' option was present, we write out the regex to a file, and
2122 that is all. The first 8 bytes of the file are the regex length and then
2123 the study length, in big-endian order. */
2124
2125 if (to_file != NULL)
2126 {
2127 FILE *f = fopen((char *)to_file, "wb");
2128 if (f == NULL)
2129 {
2130 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2131 }
2132 else
2133 {
2134 uschar sbuf[8];
2135 sbuf[0] = (uschar)((true_size >> 24) & 255);
2136 sbuf[1] = (uschar)((true_size >> 16) & 255);
2137 sbuf[2] = (uschar)((true_size >> 8) & 255);
2138 sbuf[3] = (uschar)((true_size) & 255);
2139
2140 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2141 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2142 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2143 sbuf[7] = (uschar)((true_study_size) & 255);
2144
2145 if (fwrite(sbuf, 1, 8, f) < 8 ||
2146 fwrite(re, 1, true_size, f) < true_size)
2147 {
2148 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2149 }
2150 else
2151 {
2152 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2153
2154 /* If there is study data, write it, but verify the writing only
2155 if the studying was requested by /S, not just by -s. */
2156
2157 if (extra != NULL)
2158 {
2159 if (fwrite(extra->study_data, 1, true_study_size, f) <
2160 true_study_size)
2161 {
2162 fprintf(outfile, "Write error on %s: %s\n", to_file,
2163 strerror(errno));
2164 }
2165 else fprintf(outfile, "Study data written to %s\n", to_file);
2166 }
2167 }
2168 fclose(f);
2169 }
2170
2171 new_free(re);
2172 if (extra != NULL) new_free(extra);
2173 if (locale_set)
2174 {
2175 new_free((void *)tables);
2176 setlocale(LC_CTYPE, "C");
2177 locale_set = 0;
2178 }
2179 continue; /* With next regex */
2180 }
2181 } /* End of non-POSIX compile */
2182
2183 /* Read data lines and test them */
2184
2185 for (;;)
2186 {
2187 uschar *q;
2188 uschar *bptr;
2189 int *use_offsets = offsets;
2190 int use_size_offsets = size_offsets;
2191 int callout_data = 0;
2192 int callout_data_set = 0;
2193 int count, c;
2194 int copystrings = 0;
2195 int find_match_limit = default_find_match_limit;
2196 int getstrings = 0;
2197 int getlist = 0;
2198 int gmatched = 0;
2199 int start_offset = 0;
2200 int start_offset_sign = 1;
2201 int g_notempty = 0;
2202 int use_dfa = 0;
2203
2204 options = 0;
2205
2206 *copynames = 0;
2207 *getnames = 0;
2208
2209 copynamesptr = copynames;
2210 getnamesptr = getnames;
2211
2212 pcre_callout = callout;
2213 first_callout = 1;
2214 callout_extra = 0;
2215 callout_count = 0;
2216 callout_fail_count = 999999;
2217 callout_fail_id = -1;
2218 show_malloc = 0;
2219
2220 if (extra != NULL) extra->flags &=
2221 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2222
2223 len = 0;
2224 for (;;)
2225 {
2226 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2227 {
2228 if (len > 0) /* Reached EOF without hitting a newline */
2229 {
2230 fprintf(outfile, "\n");
2231 break;
2232 }
2233 done = 1;
2234 goto CONTINUE;
2235 }
2236 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2237 len = (int)strlen((char *)buffer);
2238 if (buffer[len-1] == '\n') break;
2239 }
2240
2241 while (len > 0 && isspace(buffer[len-1])) len--;
2242 buffer[len] = 0;
2243 if (len == 0) break;
2244
2245 p = buffer;
2246 while (isspace(*p)) p++;
2247
2248 bptr = q = dbuffer;
2249 while ((c = *p++) != 0)
2250 {
2251 int i = 0;
2252 int n = 0;
2253
2254 if (c == '\\') switch ((c = *p++))
2255 {
2256 case 'a': c = 7; break;
2257 case 'b': c = '\b'; break;
2258 case 'e': c = 27; break;
2259 case 'f': c = '\f'; break;
2260 case 'n': c = '\n'; break;
2261 case 'r': c = '\r'; break;
2262 case 't': c = '\t'; break;
2263 case 'v': c = '\v'; break;
2264
2265 case '0': case '1': case '2': case '3':
2266 case '4': case '5': case '6': case '7':
2267 c -= '0';
2268 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2269 c = c * 8 + *p++ - '0';
2270
2271 #if !defined NOUTF8
2272 if (use_utf8 && c > 255)
2273 {
2274 unsigned char buff8[8];
2275 int ii, utn;
2276 utn = ord2utf8(c, buff8);
2277 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2278 c = buff8[ii]; /* Last byte */
2279 }
2280 #endif
2281 break;
2282
2283 case 'x':
2284
2285 /* Handle \x{..} specially - new Perl thing for utf8 */
2286
2287 #if !defined NOUTF8
2288 if (*p == '{')
2289 {
2290 unsigned char *pt = p;
2291 c = 0;
2292 while (isxdigit(*(++pt)))
2293 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2294 if (*pt == '}')
2295 {
2296 unsigned char buff8[8];
2297 int ii, utn;
2298 if (use_utf8)
2299 {
2300 utn = ord2utf8(c, buff8);
2301 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2302 c = buff8[ii]; /* Last byte */
2303 }
2304 else
2305 {
2306 if (c > 255)
2307 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2308 "UTF-8 mode is not enabled.\n"
2309 "** Truncation will probably give the wrong result.\n", c);
2310 }
2311 p = pt + 1;
2312 break;
2313 }
2314 /* Not correct form; fall through */
2315 }
2316 #endif
2317
2318 /* Ordinary \x */
2319
2320 c = 0;
2321 while (i++ < 2 && isxdigit(*p))
2322 {
2323 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2324 p++;
2325 }
2326 break;
2327
2328 case 0: /* \ followed by EOF allows for an empty line */
2329 p--;
2330 continue;
2331
2332 case '>':
2333 if (*p == '-')
2334 {
2335 start_offset_sign = -1;
2336 p++;
2337 }
2338 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2339 start_offset *= start_offset_sign;
2340 continue;
2341
2342 case 'A': /* Option setting */
2343 options |= PCRE_ANCHORED;
2344 continue;
2345
2346 case 'B':
2347 options |= PCRE_NOTBOL;
2348 continue;
2349
2350 case 'C':
2351 if (isdigit(*p)) /* Set copy string */
2352 {
2353 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2354 copystrings |= 1 << n;
2355 }
2356 else if (isalnum(*p))
2357 {
2358 uschar *npp = copynamesptr;
2359 while (isalnum(*p)) *npp++ = *p++;
2360 *npp++ = 0;
2361 *npp = 0;
2362 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2363 if (n < 0)
2364 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2365 copynamesptr = npp;
2366 }
2367 else if (*p == '+')
2368 {
2369 callout_extra = 1;
2370 p++;
2371 }
2372 else if (*p == '-')
2373 {
2374 pcre_callout = NULL;
2375 p++;
2376 }
2377 else if (*p == '!')
2378 {
2379 callout_fail_id = 0;
2380 p++;
2381 while(isdigit(*p))
2382 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2383 callout_fail_count = 0;
2384 if (*p == '!')
2385 {
2386 p++;
2387 while(isdigit(*p))
2388 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2389 }
2390 }
2391 else if (*p == '*')
2392 {
2393 int sign = 1;
2394 callout_data = 0;
2395 if (*(++p) == '-') { sign = -1; p++; }
2396 while(isdigit(*p))
2397 callout_data = callout_data * 10 + *p++ - '0';
2398 callout_data *= sign;
2399 callout_data_set = 1;
2400 }
2401 continue;
2402
2403 #if !defined NODFA
2404 case 'D':
2405 #if !defined NOPOSIX
2406 if (posix || do_posix)
2407 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2408 else
2409 #endif
2410 use_dfa = 1;
2411 continue;
2412 #endif
2413
2414 #if !defined NODFA
2415 case 'F':
2416 options |= PCRE_DFA_SHORTEST;
2417 continue;
2418 #endif
2419
2420 case 'G':
2421 if (isdigit(*p))
2422 {
2423 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2424 getstrings |= 1 << n;
2425 }
2426 else if (isalnum(*p))
2427 {
2428 uschar *npp = getnamesptr;
2429 while (isalnum(*p)) *npp++ = *p++;
2430 *npp++ = 0;
2431 *npp = 0;
2432 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2433 if (n < 0)
2434 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2435 getnamesptr = npp;
2436 }
2437 continue;
2438
2439 case 'L':
2440 getlist = 1;
2441 continue;
2442
2443 case 'M':
2444 find_match_limit = 1;
2445 continue;
2446
2447 case 'N':
2448 if ((options & PCRE_NOTEMPTY) != 0)
2449 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2450 else
2451 options |= PCRE_NOTEMPTY;
2452 continue;
2453
2454 case 'O':
2455 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2456 if (n > size_offsets_max)
2457 {
2458 size_offsets_max = n;
2459 free(offsets);
2460 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2461 if (offsets == NULL)
2462 {
2463 printf("** Failed to get %d bytes of memory for offsets vector\n",
2464 (int)(size_offsets_max * sizeof(int)));
2465 yield = 1;
2466 goto EXIT;
2467 }
2468 }
2469 use_size_offsets = n;
2470 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2471 continue;
2472
2473 case 'P':
2474 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2475 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2476 continue;
2477
2478 case 'Q':
2479 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2480 if (extra == NULL)
2481 {
2482 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2483 extra->flags = 0;
2484 }
2485 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2486 extra->match_limit_recursion = n;
2487 continue;
2488
2489 case 'q':
2490 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2491 if (extra == NULL)
2492 {
2493 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2494 extra->flags = 0;
2495 }
2496 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2497 extra->match_limit = n;
2498 continue;
2499
2500 #if !defined NODFA
2501 case 'R':
2502 options |= PCRE_DFA_RESTART;
2503 continue;
2504 #endif
2505
2506 case 'S':
2507 show_malloc = 1;
2508 continue;
2509
2510 case 'Y':
2511 options |= PCRE_NO_START_OPTIMIZE;
2512 continue;
2513
2514 case 'Z':
2515 options |= PCRE_NOTEOL;
2516 continue;
2517
2518 case '?':
2519 options |= PCRE_NO_UTF8_CHECK;
2520 continue;
2521
2522 case '<':
2523 {
2524 int x = check_newline(p, outfile);
2525 if (x == 0) goto NEXT_DATA;
2526 options |= x;
2527 while (*p++ != '>');
2528 }
2529 continue;
2530 }
2531 *q++ = c;
2532 }
2533 *q = 0;
2534 len = (int)(q - dbuffer);
2535
2536 /* Move the data to the end of the buffer so that a read over the end of
2537 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2538 we are using the POSIX interface, we must include the terminating zero. */
2539
2540 #if !defined NOPOSIX
2541 if (posix || do_posix)
2542 {
2543 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2544 bptr += buffer_size - len - 1;
2545 }
2546 else
2547 #endif
2548 {
2549 memmove(bptr + buffer_size - len, bptr, len);
2550 bptr += buffer_size - len;
2551 }
2552
2553 if ((all_use_dfa || use_dfa) && find_match_limit)
2554 {
2555 printf("**Match limit not relevant for DFA matching: ignored\n");
2556 find_match_limit = 0;
2557 }
2558
2559 /* Handle matching via the POSIX interface, which does not
2560 support timing or playing with the match limit or callout data. */
2561
2562 #if !defined NOPOSIX
2563 if (posix || do_posix)
2564 {
2565 int rc;
2566 int eflags = 0;
2567 regmatch_t *pmatch = NULL;
2568 if (use_size_offsets > 0)
2569 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2570 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2571 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2572 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2573
2574 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2575
2576 if (rc != 0)
2577 {
2578 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2579 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2580 }
2581 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2582 != 0)
2583 {
2584 fprintf(outfile, "Matched with REG_NOSUB\n");
2585 }
2586 else
2587 {
2588 size_t i;
2589 for (i = 0; i < (size_t)use_size_offsets; i++)
2590 {
2591 if (pmatch[i].rm_so >= 0)
2592 {
2593 fprintf(outfile, "%2d: ", (int)i);
2594 (void)pchars(dbuffer + pmatch[i].rm_so,
2595 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2596 fprintf(outfile, "\n");
2597 if (do_showcaprest || (i == 0 && do_showrest))
2598 {
2599 fprintf(outfile, "%2d+ ", (int)i);
2600 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2601 outfile);
2602 fprintf(outfile, "\n");
2603 }
2604 }
2605 }
2606 }
2607 free(pmatch);
2608 }
2609
2610 /* Handle matching via the native interface - repeats for /g and /G */
2611
2612 else
2613 #endif /* !defined NOPOSIX */
2614
2615 for (;; gmatched++) /* Loop for /g or /G */
2616 {
2617 markptr = NULL;
2618
2619 if (timeitm > 0)
2620 {
2621 register int i;
2622 clock_t time_taken;
2623 clock_t start_time = clock();
2624
2625 #if !defined NODFA
2626 if (all_use_dfa || use_dfa)
2627 {
2628 int workspace[1000];
2629 for (i = 0; i < timeitm; i++)
2630 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2631 options | g_notempty, use_offsets, use_size_offsets, workspace,
2632 sizeof(workspace)/sizeof(int));
2633 }
2634 else
2635 #endif
2636
2637 for (i = 0; i < timeitm; i++)
2638 count = pcre_exec(re, extra, (char *)bptr, len,
2639 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2640
2641 time_taken = clock() - start_time;
2642 fprintf(outfile, "Execute time %.4f milliseconds\n",
2643 (((double)time_taken * 1000.0) / (double)timeitm) /
2644 (double)CLOCKS_PER_SEC);
2645 }
2646
2647 /* If find_match_limit is set, we want to do repeated matches with
2648 varying limits in order to find the minimum value for the match limit and
2649 for the recursion limit. */
2650
2651 if (find_match_limit)
2652 {
2653 if (extra == NULL)
2654 {
2655 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2656 extra->flags = 0;
2657 }
2658
2659 (void)check_match_limit(re, extra, bptr, len, start_offset,
2660 options|g_notempty, use_offsets, use_size_offsets,
2661 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2662 PCRE_ERROR_MATCHLIMIT, "match()");
2663
2664 count = check_match_limit(re, extra, bptr, len, start_offset,
2665 options|g_notempty, use_offsets, use_size_offsets,
2666 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2667 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2668 }
2669
2670 /* If callout_data is set, use the interface with additional data */
2671
2672 else if (callout_data_set)
2673 {
2674 if (extra == NULL)
2675 {
2676 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2677 extra->flags = 0;
2678 }
2679 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2680 extra->callout_data = &callout_data;
2681 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2682 options | g_notempty, use_offsets, use_size_offsets);
2683 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2684 }
2685
2686 /* The normal case is just to do the match once, with the default
2687 value of match_limit. */
2688
2689 #if !defined NODFA
2690 else if (all_use_dfa || use_dfa)
2691 {
2692 int workspace[1000];
2693 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2694 options | g_notempty, use_offsets, use_size_offsets, workspace,
2695 sizeof(workspace)/sizeof(int));
2696 if (count == 0)
2697 {
2698 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2699 count = use_size_offsets/2;
2700 }
2701 }
2702 #endif
2703
2704 else
2705 {
2706 count = pcre_exec(re, extra, (char *)bptr, len,
2707 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2708 if (count == 0)
2709 {
2710 fprintf(outfile, "Matched, but too many substrings\n");
2711 count = use_size_offsets/3;
2712 }
2713 }
2714
2715 /* Matched */
2716
2717 if (count >= 0)
2718 {
2719 int i, maxcount;
2720
2721 #if !defined NODFA
2722 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2723 #endif
2724 maxcount = use_size_offsets/3;
2725
2726 /* This is a check against a lunatic return value. */
2727
2728 if (count > maxcount)
2729 {
2730 fprintf(outfile,
2731 "** PCRE error: returned count %d is too big for offset size %d\n",
2732 count, use_size_offsets);
2733 count = use_size_offsets/3;
2734 if (do_g || do_G)
2735 {
2736 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2737 do_g = do_G = FALSE; /* Break g/G loop */
2738 }
2739 }
2740
2741 /* do_allcaps requests showing of all captures in the pattern, to check
2742 unset ones at the end. */
2743
2744 if (do_allcaps)
2745 {
2746 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2747 count++; /* Allow for full match */
2748 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2749 }
2750
2751 /* Output the captured substrings */
2752
2753 for (i = 0; i < count * 2; i += 2)
2754 {
2755 if (use_offsets[i] < 0)
2756 {
2757 if (use_offsets[i] != -1)
2758 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2759 use_offsets[i], i);
2760 if (use_offsets[i+1] != -1)
2761 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2762 use_offsets[i+1], i+1);
2763 fprintf(outfile, "%2d: <unset>\n", i/2);
2764 }
2765 else
2766 {
2767 fprintf(outfile, "%2d: ", i/2);
2768 (void)pchars(bptr + use_offsets[i],
2769 use_offsets[i+1] - use_offsets[i], outfile);
2770 fprintf(outfile, "\n");
2771 if (do_showcaprest || (i == 0 && do_showrest))
2772 {
2773 fprintf(outfile, "%2d+ ", i/2);
2774 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2775 outfile);
2776 fprintf(outfile, "\n");
2777 }
2778 }
2779 }
2780
2781 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2782
2783 for (i = 0; i < 32; i++)
2784 {
2785 if ((copystrings & (1 << i)) != 0)
2786 {
2787 char copybuffer[256];
2788 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2789 i, copybuffer, sizeof(copybuffer));
2790 if (rc < 0)
2791 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2792 else
2793 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2794 }
2795 }
2796
2797 for (copynamesptr = copynames;
2798 *copynamesptr != 0;
2799 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2800 {
2801 char copybuffer[256];
2802 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2803 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2804 if (rc < 0)
2805 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2806 else
2807 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2808 }
2809
2810 for (i = 0; i < 32; i++)
2811 {
2812 if ((getstrings & (1 << i)) != 0)
2813 {
2814 const char *substring;
2815 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2816 i, &substring);
2817 if (rc < 0)
2818 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2819 else
2820 {
2821 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2822 pcre_free_substring(substring);
2823 }
2824 }
2825 }
2826
2827 for (getnamesptr = getnames;
2828 *getnamesptr != 0;
2829 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2830 {
2831 const char *substring;
2832 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2833 count, (char *)getnamesptr, &substring);
2834 if (rc < 0)
2835 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2836 else
2837 {
2838 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2839 pcre_free_substring(substring);
2840 }
2841 }
2842
2843 if (getlist)
2844 {
2845 const char **stringlist;
2846 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2847 &stringlist);
2848 if (rc < 0)
2849 fprintf(outfile, "get substring list failed %d\n", rc);
2850 else
2851 {
2852 for (i = 0; i < count; i++)
2853 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2854 if (stringlist[i] != NULL)
2855 fprintf(outfile, "string list not terminated by NULL\n");
2856 /* free((void *)stringlist); */
2857 pcre_free_substring_list(stringlist);
2858 }
2859 }
2860 }
2861
2862 /* There was a partial match */
2863
2864 else if (count == PCRE_ERROR_PARTIAL)
2865 {
2866 if (markptr == NULL) fprintf(outfile, "Partial match");
2867 else fprintf(outfile, "Partial match, mark=%s", markptr);
2868 if (use_size_offsets > 1)
2869 {
2870 fprintf(outfile, ": ");
2871 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2872 outfile);
2873 }
2874 fprintf(outfile, "\n");
2875 break; /* Out of the /g loop */
2876 }
2877
2878 /* Failed to match. If this is a /g or /G loop and we previously set
2879 g_notempty after a null match, this is not necessarily the end. We want
2880 to advance the start offset, and continue. We won't be at the end of the
2881 string - that was checked before setting g_notempty.
2882
2883 Complication arises in the case when the newline convention is "any",
2884 "crlf", or "anycrlf". If the previous match was at the end of a line
2885 terminated by CRLF, an advance of one character just passes the \r,
2886 whereas we should prefer the longer newline sequence, as does the code in
2887 pcre_exec(). Fudge the offset value to achieve this. We check for a
2888 newline setting in the pattern; if none was set, use pcre_config() to
2889 find the default.
2890
2891 Otherwise, in the case of UTF-8 matching, the advance must be one
2892 character, not one byte. */
2893
2894 else
2895 {
2896 if (g_notempty != 0)
2897 {
2898 int onechar = 1;
2899 unsigned int obits = ((real_pcre *)re)->options;
2900 use_offsets[0] = start_offset;
2901 if ((obits & PCRE_NEWLINE_BITS) == 0)
2902 {
2903 int d;
2904 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2905 /* Note that these values are always the ASCII ones, even in
2906 EBCDIC environments. CR = 13, NL = 10. */
2907 obits = (d == 13)? PCRE_NEWLINE_CR :
2908 (d == 10)? PCRE_NEWLINE_LF :
2909 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2910 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2911 (d == -1)? PCRE_NEWLINE_ANY : 0;
2912 }
2913 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2914 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2915 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2916 &&
2917 start_offset < len - 1 &&
2918 bptr[start_offset] == '\r' &&
2919 bptr[start_offset+1] == '\n')
2920 onechar++;
2921 else if (use_utf8)
2922 {
2923 while (start_offset + onechar < len)
2924 {
2925 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2926 onechar++;
2927 }
2928 }
2929 use_offsets[1] = start_offset + onechar;
2930 }
2931 else
2932 {
2933 switch(count)
2934 {
2935 case PCRE_ERROR_NOMATCH:
2936 if (gmatched == 0)
2937 {
2938 if (markptr == NULL) fprintf(outfile, "No match\n");
2939 else fprintf(outfile, "No match, mark = %s\n", markptr);
2940 }
2941 break;
2942
2943 case PCRE_ERROR_BADUTF8:
2944 case PCRE_ERROR_SHORTUTF8:
2945 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2946 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2947 if (use_size_offsets >= 2)
2948 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2949 use_offsets[1]);
2950 fprintf(outfile, "\n");
2951 break;
2952
2953 default:
2954 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2955 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2956 else
2957 fprintf(outfile, "Error %d (Unexpected value)\n", count);
2958 break;
2959 }
2960
2961 break; /* Out of the /g loop */
2962 }
2963 }
2964
2965 /* If not /g or /G we are done */
2966
2967 if (!do_g && !do_G) break;
2968
2969 /* If we have matched an empty string, first check to see if we are at
2970 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2971 Perl's /g options does. This turns out to be rather cunning. First we set
2972 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2973 same point. If this fails (picked up above) we advance to the next
2974 character. */
2975
2976 g_notempty = 0;
2977
2978 if (use_offsets[0] == use_offsets[1])
2979 {
2980 if (use_offsets[0] == len) break;
2981 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2982 }
2983
2984 /* For /g, update the start offset, leaving the rest alone */
2985
2986 if (do_g) start_offset = use_offsets[1];
2987
2988 /* For /G, update the pointer and length */
2989
2990 else
2991 {
2992 bptr += use_offsets[1];
2993 len -= use_offsets[1];
2994 }
2995 } /* End of loop for /g and /G */
2996
2997 NEXT_DATA: continue;
2998 } /* End of loop for data lines */
2999
3000 CONTINUE:
3001
3002 #if !defined NOPOSIX
3003 if (posix || do_posix) regfree(&preg);
3004 #endif
3005
3006 if (re != NULL) new_free(re);
3007 if (extra != NULL) new_free(extra);
3008 if (locale_set)
3009 {
3010 new_free((void *)tables);
3011 setlocale(LC_CTYPE, "C");
3012 locale_set = 0;
3013 }
3014 }
3015
3016 if (infile == stdin) fprintf(outfile, "\n");
3017
3018 EXIT:
3019
3020 if (infile != NULL && infile != stdin) fclose(infile);
3021 if (outfile != NULL && outfile != stdout) fclose(outfile);
3022
3023 free(buffer);
3024 free(dbuffer);
3025 free(pbuffer);
3026 free(offsets);
3027
3028 return yield;
3029 }
3030
3031 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12