/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 675 - (show annotations) (download)
Sat Aug 27 10:18:46 2011 UTC (2 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 94257 byte(s)
Change pcre_assign_jit_callback to pcre_assign_jit_stack.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utf8_char_sizes utf8_char_sizes
122 #define _pcre_utt utt
123 #define _pcre_utt_size utt_size
124 #define _pcre_utt_names utt_names
125 #define _pcre_OP_lengths OP_lengths
126
127 #include "pcre_tables.c"
128
129 /* We also need the pcre_printint() function for printing out compiled
130 patterns. This function is in a separate file so that it can be included in
131 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 know which case is being compiled. */
133
134 #define COMPILING_PCRETEST
135 #include "pcre_printint.src"
136
137 /* The definition of the macro PRINTABLE, which determines whether to print an
138 output character as-is or as a hex value when showing compiled patterns, is
139 contained in the printint.src file. We uses it here also, in cases when the
140 locale has not been explicitly changed, so as to get consistent output from
141 systems that differ in their output from isprint() even in the "C" locale. */
142
143 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144
145 /* It is possible to compile this test program without including support for
146 testing the POSIX interface, though this is not available via the standard
147 Makefile. */
148
149 #if !defined NOPOSIX
150 #include "pcreposix.h"
151 #endif
152
153 /* It is also possible, for the benefit of the version currently imported into
154 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155 interface to the DFA matcher (NODFA), and without the doublecheck of the old
156 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157 UTF8 support if PCRE is built without it. */
158
159 #ifndef SUPPORT_UTF8
160 #ifndef NOUTF8
161 #define NOUTF8
162 #endif
163 #endif
164
165
166 /* Other parameters */
167
168 #ifndef CLOCKS_PER_SEC
169 #ifdef CLK_TCK
170 #define CLOCKS_PER_SEC CLK_TCK
171 #else
172 #define CLOCKS_PER_SEC 100
173 #endif
174 #endif
175
176 /* This is the default loop count for timing. */
177
178 #define LOOPREPEAT 500000
179
180 /* Static variables */
181
182 static FILE *outfile;
183 static int log_store = 0;
184 static int callout_count;
185 static int callout_extra;
186 static int callout_fail_count;
187 static int callout_fail_id;
188 static int debug_lengths;
189 static int first_callout;
190 static int locale_set = 0;
191 static int show_malloc;
192 static int use_utf8;
193 static size_t gotten_store;
194 static const unsigned char *last_callout_mark = NULL;
195
196 /* The buffers grow automatically if very long input lines are encountered. */
197
198 static int buffer_size = 50000;
199 static uschar *buffer = NULL;
200 static uschar *dbuffer = NULL;
201 static uschar *pbuffer = NULL;
202
203 /* Textual explanations for runtime error codes */
204
205 static const char *errtexts[] = {
206 NULL, /* 0 is no error */
207 NULL, /* NOMATCH is handled specially */
208 "NULL argument passed",
209 "bad option value",
210 "magic number missing",
211 "unknown opcode - pattern overwritten?",
212 "no more memory",
213 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 "match limit exceeded",
215 "callout error code",
216 NULL, /* BADUTF8 is handled specially */
217 "bad UTF-8 offset",
218 NULL, /* PARTIAL is handled specially */
219 "not used - internal error",
220 "internal error - pattern overwritten?",
221 "bad count value",
222 "item unsupported for DFA matching",
223 "backreference condition or recursion test not supported for DFA matching",
224 "match limit not supported for DFA matching",
225 "workspace size exceeded in DFA matching",
226 "too much recursion for DFA matching",
227 "recursion limit exceeded",
228 "not used - internal error",
229 "invalid combination of newline options",
230 "bad offset value",
231 NULL, /* SHORTUTF8 is handled specially */
232 "nested recursion at the same subject position"
233 };
234
235
236 /*************************************************
237 * Alternate character tables *
238 *************************************************/
239
240 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
241 using the default tables of the library. However, the T option can be used to
242 select alternate sets of tables, for different kinds of testing. Note also that
243 the L (locale) option also adjusts the tables. */
244
245 /* This is the set of tables distributed as default with PCRE. It recognizes
246 only ASCII characters. */
247
248 static const unsigned char tables0[] = {
249
250 /* This table is a lower casing table. */
251
252 0, 1, 2, 3, 4, 5, 6, 7,
253 8, 9, 10, 11, 12, 13, 14, 15,
254 16, 17, 18, 19, 20, 21, 22, 23,
255 24, 25, 26, 27, 28, 29, 30, 31,
256 32, 33, 34, 35, 36, 37, 38, 39,
257 40, 41, 42, 43, 44, 45, 46, 47,
258 48, 49, 50, 51, 52, 53, 54, 55,
259 56, 57, 58, 59, 60, 61, 62, 63,
260 64, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122, 91, 92, 93, 94, 95,
264 96, 97, 98, 99,100,101,102,103,
265 104,105,106,107,108,109,110,111,
266 112,113,114,115,116,117,118,119,
267 120,121,122,123,124,125,126,127,
268 128,129,130,131,132,133,134,135,
269 136,137,138,139,140,141,142,143,
270 144,145,146,147,148,149,150,151,
271 152,153,154,155,156,157,158,159,
272 160,161,162,163,164,165,166,167,
273 168,169,170,171,172,173,174,175,
274 176,177,178,179,180,181,182,183,
275 184,185,186,187,188,189,190,191,
276 192,193,194,195,196,197,198,199,
277 200,201,202,203,204,205,206,207,
278 208,209,210,211,212,213,214,215,
279 216,217,218,219,220,221,222,223,
280 224,225,226,227,228,229,230,231,
281 232,233,234,235,236,237,238,239,
282 240,241,242,243,244,245,246,247,
283 248,249,250,251,252,253,254,255,
284
285 /* This table is a case flipping table. */
286
287 0, 1, 2, 3, 4, 5, 6, 7,
288 8, 9, 10, 11, 12, 13, 14, 15,
289 16, 17, 18, 19, 20, 21, 22, 23,
290 24, 25, 26, 27, 28, 29, 30, 31,
291 32, 33, 34, 35, 36, 37, 38, 39,
292 40, 41, 42, 43, 44, 45, 46, 47,
293 48, 49, 50, 51, 52, 53, 54, 55,
294 56, 57, 58, 59, 60, 61, 62, 63,
295 64, 97, 98, 99,100,101,102,103,
296 104,105,106,107,108,109,110,111,
297 112,113,114,115,116,117,118,119,
298 120,121,122, 91, 92, 93, 94, 95,
299 96, 65, 66, 67, 68, 69, 70, 71,
300 72, 73, 74, 75, 76, 77, 78, 79,
301 80, 81, 82, 83, 84, 85, 86, 87,
302 88, 89, 90,123,124,125,126,127,
303 128,129,130,131,132,133,134,135,
304 136,137,138,139,140,141,142,143,
305 144,145,146,147,148,149,150,151,
306 152,153,154,155,156,157,158,159,
307 160,161,162,163,164,165,166,167,
308 168,169,170,171,172,173,174,175,
309 176,177,178,179,180,181,182,183,
310 184,185,186,187,188,189,190,191,
311 192,193,194,195,196,197,198,199,
312 200,201,202,203,204,205,206,207,
313 208,209,210,211,212,213,214,215,
314 216,217,218,219,220,221,222,223,
315 224,225,226,227,228,229,230,231,
316 232,233,234,235,236,237,238,239,
317 240,241,242,243,244,245,246,247,
318 248,249,250,251,252,253,254,255,
319
320 /* This table contains bit maps for various character classes. Each map is 32
321 bytes long and the bits run from the least significant end of each byte. The
322 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
323 graph, print, punct, and cntrl. Other classes are built from combinations. */
324
325 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
326 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329
330 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
331 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334
335 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349
350 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
351 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
352 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354
355 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
356 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
357 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359
360 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
361 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
362 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364
365 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
366 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369
370 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374
375 /* This table identifies various classes of character by individual bits:
376 0x01 white space character
377 0x02 letter
378 0x04 decimal digit
379 0x08 hexadecimal digit
380 0x10 alphanumeric or '_'
381 0x80 regular expression metacharacter or binary zero
382 */
383
384 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
385 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
386 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
387 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
388 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
389 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
390 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
391 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
392 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
393 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
395 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
396 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
397 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
398 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
399 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
416
417 /* This is a set of tables that came orginally from a Windows user. It seems to
418 be at least an approximation of ISO 8859. In particular, there are characters
419 greater than 128 that are marked as spaces, letters, etc. */
420
421 static const unsigned char tables1[] = {
422 0,1,2,3,4,5,6,7,
423 8,9,10,11,12,13,14,15,
424 16,17,18,19,20,21,22,23,
425 24,25,26,27,28,29,30,31,
426 32,33,34,35,36,37,38,39,
427 40,41,42,43,44,45,46,47,
428 48,49,50,51,52,53,54,55,
429 56,57,58,59,60,61,62,63,
430 64,97,98,99,100,101,102,103,
431 104,105,106,107,108,109,110,111,
432 112,113,114,115,116,117,118,119,
433 120,121,122,91,92,93,94,95,
434 96,97,98,99,100,101,102,103,
435 104,105,106,107,108,109,110,111,
436 112,113,114,115,116,117,118,119,
437 120,121,122,123,124,125,126,127,
438 128,129,130,131,132,133,134,135,
439 136,137,138,139,140,141,142,143,
440 144,145,146,147,148,149,150,151,
441 152,153,154,155,156,157,158,159,
442 160,161,162,163,164,165,166,167,
443 168,169,170,171,172,173,174,175,
444 176,177,178,179,180,181,182,183,
445 184,185,186,187,188,189,190,191,
446 224,225,226,227,228,229,230,231,
447 232,233,234,235,236,237,238,239,
448 240,241,242,243,244,245,246,215,
449 248,249,250,251,252,253,254,223,
450 224,225,226,227,228,229,230,231,
451 232,233,234,235,236,237,238,239,
452 240,241,242,243,244,245,246,247,
453 248,249,250,251,252,253,254,255,
454 0,1,2,3,4,5,6,7,
455 8,9,10,11,12,13,14,15,
456 16,17,18,19,20,21,22,23,
457 24,25,26,27,28,29,30,31,
458 32,33,34,35,36,37,38,39,
459 40,41,42,43,44,45,46,47,
460 48,49,50,51,52,53,54,55,
461 56,57,58,59,60,61,62,63,
462 64,97,98,99,100,101,102,103,
463 104,105,106,107,108,109,110,111,
464 112,113,114,115,116,117,118,119,
465 120,121,122,91,92,93,94,95,
466 96,65,66,67,68,69,70,71,
467 72,73,74,75,76,77,78,79,
468 80,81,82,83,84,85,86,87,
469 88,89,90,123,124,125,126,127,
470 128,129,130,131,132,133,134,135,
471 136,137,138,139,140,141,142,143,
472 144,145,146,147,148,149,150,151,
473 152,153,154,155,156,157,158,159,
474 160,161,162,163,164,165,166,167,
475 168,169,170,171,172,173,174,175,
476 176,177,178,179,180,181,182,183,
477 184,185,186,187,188,189,190,191,
478 224,225,226,227,228,229,230,231,
479 232,233,234,235,236,237,238,239,
480 240,241,242,243,244,245,246,215,
481 248,249,250,251,252,253,254,223,
482 192,193,194,195,196,197,198,199,
483 200,201,202,203,204,205,206,207,
484 208,209,210,211,212,213,214,247,
485 216,217,218,219,220,221,222,255,
486 0,62,0,0,1,0,0,0,
487 0,0,0,0,0,0,0,0,
488 32,0,0,0,1,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,255,3,
491 126,0,0,0,126,0,0,0,
492 0,0,0,0,0,0,0,0,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,255,3,
495 0,0,0,0,0,0,0,0,
496 0,0,0,0,0,0,12,2,
497 0,0,0,0,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 254,255,255,7,0,0,0,0,
500 0,0,0,0,0,0,0,0,
501 255,255,127,127,0,0,0,0,
502 0,0,0,0,0,0,0,0,
503 0,0,0,0,254,255,255,7,
504 0,0,0,0,0,4,32,4,
505 0,0,0,128,255,255,127,255,
506 0,0,0,0,0,0,255,3,
507 254,255,255,135,254,255,255,7,
508 0,0,0,0,0,4,44,6,
509 255,255,127,255,255,255,127,255,
510 0,0,0,0,254,255,255,255,
511 255,255,255,255,255,255,255,127,
512 0,0,0,0,254,255,255,255,
513 255,255,255,255,255,255,255,255,
514 0,2,0,0,255,255,255,255,
515 255,255,255,255,255,255,255,127,
516 0,0,0,0,255,255,255,255,
517 255,255,255,255,255,255,255,255,
518 0,0,0,0,254,255,0,252,
519 1,0,0,248,1,0,0,120,
520 0,0,0,0,254,255,255,255,
521 0,0,128,0,0,0,128,0,
522 255,255,255,255,0,0,0,0,
523 0,0,0,0,0,0,0,128,
524 255,255,255,255,0,0,0,0,
525 0,0,0,0,0,0,0,0,
526 128,0,0,0,0,0,0,0,
527 0,1,1,0,1,1,0,0,
528 0,0,0,0,0,0,0,0,
529 0,0,0,0,0,0,0,0,
530 1,0,0,0,128,0,0,0,
531 128,128,128,128,0,0,128,0,
532 28,28,28,28,28,28,28,28,
533 28,28,0,0,0,0,0,128,
534 0,26,26,26,26,26,26,18,
535 18,18,18,18,18,18,18,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,128,128,0,128,16,
538 0,26,26,26,26,26,26,18,
539 18,18,18,18,18,18,18,18,
540 18,18,18,18,18,18,18,18,
541 18,18,18,128,128,0,0,0,
542 0,0,0,0,0,1,0,0,
543 0,0,0,0,0,0,0,0,
544 0,0,0,0,0,0,0,0,
545 0,0,0,0,0,0,0,0,
546 1,0,0,0,0,0,0,0,
547 0,0,18,0,0,0,0,0,
548 0,0,20,20,0,18,0,0,
549 0,20,18,0,0,0,0,0,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,0,
553 18,18,18,18,18,18,18,18,
554 18,18,18,18,18,18,18,18,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,0,
557 18,18,18,18,18,18,18,18
558 };
559
560
561
562
563 #ifndef HAVE_STRERROR
564 /*************************************************
565 * Provide strerror() for non-ANSI libraries *
566 *************************************************/
567
568 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
569 in their libraries, but can provide the same facility by this simple
570 alternative function. */
571
572 extern int sys_nerr;
573 extern char *sys_errlist[];
574
575 char *
576 strerror(int n)
577 {
578 if (n < 0 || n >= sys_nerr) return "unknown error number";
579 return sys_errlist[n];
580 }
581 #endif /* HAVE_STRERROR */
582
583
584 /*************************************************
585 * JIT memory callback *
586 *************************************************/
587
588 static pcre_jit_stack* jit_callback(void *arg)
589 {
590 return (pcre_jit_stack *)arg;
591 }
592
593
594 /*************************************************
595 * Read or extend an input line *
596 *************************************************/
597
598 /* Input lines are read into buffer, but both patterns and data lines can be
599 continued over multiple input lines. In addition, if the buffer fills up, we
600 want to automatically expand it so as to be able to handle extremely large
601 lines that are needed for certain stress tests. When the input buffer is
602 expanded, the other two buffers must also be expanded likewise, and the
603 contents of pbuffer, which are a copy of the input for callouts, must be
604 preserved (for when expansion happens for a data line). This is not the most
605 optimal way of handling this, but hey, this is just a test program!
606
607 Arguments:
608 f the file to read
609 start where in buffer to start (this *must* be within buffer)
610 prompt for stdin or readline()
611
612 Returns: pointer to the start of new data
613 could be a copy of start, or could be moved
614 NULL if no data read and EOF reached
615 */
616
617 static uschar *
618 extend_inputline(FILE *f, uschar *start, const char *prompt)
619 {
620 uschar *here = start;
621
622 for (;;)
623 {
624 int rlen = (int)(buffer_size - (here - buffer));
625
626 if (rlen > 1000)
627 {
628 int dlen;
629
630 /* If libreadline support is required, use readline() to read a line if the
631 input is a terminal. Note that readline() removes the trailing newline, so
632 we must put it back again, to be compatible with fgets(). */
633
634 #ifdef SUPPORT_LIBREADLINE
635 if (isatty(fileno(f)))
636 {
637 size_t len;
638 char *s = readline(prompt);
639 if (s == NULL) return (here == start)? NULL : start;
640 len = strlen(s);
641 if (len > 0) add_history(s);
642 if (len > rlen - 1) len = rlen - 1;
643 memcpy(here, s, len);
644 here[len] = '\n';
645 here[len+1] = 0;
646 free(s);
647 }
648 else
649 #endif
650
651 /* Read the next line by normal means, prompting if the file is stdin. */
652
653 {
654 if (f == stdin) printf("%s", prompt);
655 if (fgets((char *)here, rlen, f) == NULL)
656 return (here == start)? NULL : start;
657 }
658
659 dlen = (int)strlen((char *)here);
660 if (dlen > 0 && here[dlen - 1] == '\n') return start;
661 here += dlen;
662 }
663
664 else
665 {
666 int new_buffer_size = 2*buffer_size;
667 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
668 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
669 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
670
671 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
672 {
673 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
674 exit(1);
675 }
676
677 memcpy(new_buffer, buffer, buffer_size);
678 memcpy(new_pbuffer, pbuffer, buffer_size);
679
680 buffer_size = new_buffer_size;
681
682 start = new_buffer + (start - buffer);
683 here = new_buffer + (here - buffer);
684
685 free(buffer);
686 free(dbuffer);
687 free(pbuffer);
688
689 buffer = new_buffer;
690 dbuffer = new_dbuffer;
691 pbuffer = new_pbuffer;
692 }
693 }
694
695 return NULL; /* Control never gets here */
696 }
697
698
699
700
701
702
703
704 /*************************************************
705 * Read number from string *
706 *************************************************/
707
708 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
709 around with conditional compilation, just do the job by hand. It is only used
710 for unpicking arguments, so just keep it simple.
711
712 Arguments:
713 str string to be converted
714 endptr where to put the end pointer
715
716 Returns: the unsigned long
717 */
718
719 static int
720 get_value(unsigned char *str, unsigned char **endptr)
721 {
722 int result = 0;
723 while(*str != 0 && isspace(*str)) str++;
724 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
725 *endptr = str;
726 return(result);
727 }
728
729
730
731
732 /*************************************************
733 * Convert UTF-8 string to value *
734 *************************************************/
735
736 /* This function takes one or more bytes that represents a UTF-8 character,
737 and returns the value of the character.
738
739 Argument:
740 utf8bytes a pointer to the byte vector
741 vptr a pointer to an int to receive the value
742
743 Returns: > 0 => the number of bytes consumed
744 -6 to 0 => malformed UTF-8 character at offset = (-return)
745 */
746
747 #if !defined NOUTF8
748
749 static int
750 utf82ord(unsigned char *utf8bytes, int *vptr)
751 {
752 int c = *utf8bytes++;
753 int d = c;
754 int i, j, s;
755
756 for (i = -1; i < 6; i++) /* i is number of additional bytes */
757 {
758 if ((d & 0x80) == 0) break;
759 d <<= 1;
760 }
761
762 if (i == -1) { *vptr = c; return 1; } /* ascii character */
763 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
764
765 /* i now has a value in the range 1-5 */
766
767 s = 6*i;
768 d = (c & utf8_table3[i]) << s;
769
770 for (j = 0; j < i; j++)
771 {
772 c = *utf8bytes++;
773 if ((c & 0xc0) != 0x80) return -(j+1);
774 s -= 6;
775 d |= (c & 0x3f) << s;
776 }
777
778 /* Check that encoding was the correct unique one */
779
780 for (j = 0; j < utf8_table1_size; j++)
781 if (d <= utf8_table1[j]) break;
782 if (j != i) return -(i+1);
783
784 /* Valid value */
785
786 *vptr = d;
787 return i+1;
788 }
789
790 #endif
791
792
793
794 /*************************************************
795 * Convert character value to UTF-8 *
796 *************************************************/
797
798 /* This function takes an integer value in the range 0 - 0x7fffffff
799 and encodes it as a UTF-8 character in 0 to 6 bytes.
800
801 Arguments:
802 cvalue the character value
803 utf8bytes pointer to buffer for result - at least 6 bytes long
804
805 Returns: number of characters placed in the buffer
806 */
807
808 #if !defined NOUTF8
809
810 static int
811 ord2utf8(int cvalue, uschar *utf8bytes)
812 {
813 register int i, j;
814 for (i = 0; i < utf8_table1_size; i++)
815 if (cvalue <= utf8_table1[i]) break;
816 utf8bytes += i;
817 for (j = i; j > 0; j--)
818 {
819 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
820 cvalue >>= 6;
821 }
822 *utf8bytes = utf8_table2[i] | cvalue;
823 return i + 1;
824 }
825
826 #endif
827
828
829
830 /*************************************************
831 * Print character string *
832 *************************************************/
833
834 /* Character string printing function. Must handle UTF-8 strings in utf8
835 mode. Yields number of characters printed. If handed a NULL file, just counts
836 chars without printing. */
837
838 static int pchars(unsigned char *p, int length, FILE *f)
839 {
840 int c = 0;
841 int yield = 0;
842
843 while (length-- > 0)
844 {
845 #if !defined NOUTF8
846 if (use_utf8)
847 {
848 int rc = utf82ord(p, &c);
849
850 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
851 {
852 length -= rc - 1;
853 p += rc;
854 if (PRINTHEX(c))
855 {
856 if (f != NULL) fprintf(f, "%c", c);
857 yield++;
858 }
859 else
860 {
861 int n = 4;
862 if (f != NULL) fprintf(f, "\\x{%02x}", c);
863 yield += (n <= 0x000000ff)? 2 :
864 (n <= 0x00000fff)? 3 :
865 (n <= 0x0000ffff)? 4 :
866 (n <= 0x000fffff)? 5 : 6;
867 }
868 continue;
869 }
870 }
871 #endif
872
873 /* Not UTF-8, or malformed UTF-8 */
874
875 c = *p++;
876 if (PRINTHEX(c))
877 {
878 if (f != NULL) fprintf(f, "%c", c);
879 yield++;
880 }
881 else
882 {
883 if (f != NULL) fprintf(f, "\\x%02x", c);
884 yield += 4;
885 }
886 }
887
888 return yield;
889 }
890
891
892
893 /*************************************************
894 * Callout function *
895 *************************************************/
896
897 /* Called from PCRE as a result of the (?C) item. We print out where we are in
898 the match. Yield zero unless more callouts than the fail count, or the callout
899 data is not zero. */
900
901 static int callout(pcre_callout_block *cb)
902 {
903 FILE *f = (first_callout | callout_extra)? outfile : NULL;
904 int i, pre_start, post_start, subject_length;
905
906 if (callout_extra)
907 {
908 fprintf(f, "Callout %d: last capture = %d\n",
909 cb->callout_number, cb->capture_last);
910
911 for (i = 0; i < cb->capture_top * 2; i += 2)
912 {
913 if (cb->offset_vector[i] < 0)
914 fprintf(f, "%2d: <unset>\n", i/2);
915 else
916 {
917 fprintf(f, "%2d: ", i/2);
918 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
919 cb->offset_vector[i+1] - cb->offset_vector[i], f);
920 fprintf(f, "\n");
921 }
922 }
923 }
924
925 /* Re-print the subject in canonical form, the first time or if giving full
926 datails. On subsequent calls in the same match, we use pchars just to find the
927 printed lengths of the substrings. */
928
929 if (f != NULL) fprintf(f, "--->");
930
931 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
932 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
933 cb->current_position - cb->start_match, f);
934
935 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
936
937 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
938 cb->subject_length - cb->current_position, f);
939
940 if (f != NULL) fprintf(f, "\n");
941
942 /* Always print appropriate indicators, with callout number if not already
943 shown. For automatic callouts, show the pattern offset. */
944
945 if (cb->callout_number == 255)
946 {
947 fprintf(outfile, "%+3d ", cb->pattern_position);
948 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
949 }
950 else
951 {
952 if (callout_extra) fprintf(outfile, " ");
953 else fprintf(outfile, "%3d ", cb->callout_number);
954 }
955
956 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
957 fprintf(outfile, "^");
958
959 if (post_start > 0)
960 {
961 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
962 fprintf(outfile, "^");
963 }
964
965 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
966 fprintf(outfile, " ");
967
968 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
969 pbuffer + cb->pattern_position);
970
971 fprintf(outfile, "\n");
972 first_callout = 0;
973
974 if (cb->mark != last_callout_mark)
975 {
976 fprintf(outfile, "Latest Mark: %s\n",
977 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
978 last_callout_mark = cb->mark;
979 }
980
981 if (cb->callout_data != NULL)
982 {
983 int callout_data = *((int *)(cb->callout_data));
984 if (callout_data != 0)
985 {
986 fprintf(outfile, "Callout data = %d\n", callout_data);
987 return callout_data;
988 }
989 }
990
991 return (cb->callout_number != callout_fail_id)? 0 :
992 (++callout_count >= callout_fail_count)? 1 : 0;
993 }
994
995
996 /*************************************************
997 * Local malloc functions *
998 *************************************************/
999
1000 /* Alternative malloc function, to test functionality and save the size of a
1001 compiled re. The show_malloc variable is set only during matching. */
1002
1003 static void *new_malloc(size_t size)
1004 {
1005 void *block = malloc(size);
1006 gotten_store = size;
1007 if (show_malloc)
1008 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1009 return block;
1010 }
1011
1012 static void new_free(void *block)
1013 {
1014 if (show_malloc)
1015 fprintf(outfile, "free %p\n", block);
1016 free(block);
1017 }
1018
1019 /* For recursion malloc/free, to test stacking calls */
1020
1021 static void *stack_malloc(size_t size)
1022 {
1023 void *block = malloc(size);
1024 if (show_malloc)
1025 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1026 return block;
1027 }
1028
1029 static void stack_free(void *block)
1030 {
1031 if (show_malloc)
1032 fprintf(outfile, "stack_free %p\n", block);
1033 free(block);
1034 }
1035
1036
1037 /*************************************************
1038 * Call pcre_fullinfo() *
1039 *************************************************/
1040
1041 /* Get one piece of information from the pcre_fullinfo() function */
1042
1043 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1044 {
1045 int rc;
1046 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1047 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1048 }
1049
1050
1051
1052 /*************************************************
1053 * Check for supported JIT architecture *
1054 *************************************************/
1055
1056 /* If it won't JIT-compile a very simple regex, return FALSE. */
1057
1058 static int check_jit_arch(void)
1059 {
1060 const char *error;
1061 int erroffset, rc;
1062 pcre *re = pcre_compile("abc", 0, &error, &erroffset, NULL);
1063 pcre_extra *extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
1064 rc = extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1065 extra->executable_jit != NULL;
1066 pcre_free_study(extra);
1067 free(re);
1068 return rc;
1069 }
1070
1071
1072 /*************************************************
1073 * Byte flipping function *
1074 *************************************************/
1075
1076 static unsigned long int
1077 byteflip(unsigned long int value, int n)
1078 {
1079 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1080 return ((value & 0x000000ff) << 24) |
1081 ((value & 0x0000ff00) << 8) |
1082 ((value & 0x00ff0000) >> 8) |
1083 ((value & 0xff000000) >> 24);
1084 }
1085
1086
1087
1088
1089 /*************************************************
1090 * Check match or recursion limit *
1091 *************************************************/
1092
1093 static int
1094 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1095 int start_offset, int options, int *use_offsets, int use_size_offsets,
1096 int flag, unsigned long int *limit, int errnumber, const char *msg)
1097 {
1098 int count;
1099 int min = 0;
1100 int mid = 64;
1101 int max = -1;
1102
1103 extra->flags |= flag;
1104
1105 for (;;)
1106 {
1107 *limit = mid;
1108
1109 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1110 use_offsets, use_size_offsets);
1111
1112 if (count == errnumber)
1113 {
1114 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1115 min = mid;
1116 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1117 }
1118
1119 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1120 count == PCRE_ERROR_PARTIAL)
1121 {
1122 if (mid == min + 1)
1123 {
1124 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1125 break;
1126 }
1127 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1128 max = mid;
1129 mid = (min + mid)/2;
1130 }
1131 else break; /* Some other error */
1132 }
1133
1134 extra->flags &= ~flag;
1135 return count;
1136 }
1137
1138
1139
1140 /*************************************************
1141 * Case-independent strncmp() function *
1142 *************************************************/
1143
1144 /*
1145 Arguments:
1146 s first string
1147 t second string
1148 n number of characters to compare
1149
1150 Returns: < 0, = 0, or > 0, according to the comparison
1151 */
1152
1153 static int
1154 strncmpic(uschar *s, uschar *t, int n)
1155 {
1156 while (n--)
1157 {
1158 int c = tolower(*s++) - tolower(*t++);
1159 if (c) return c;
1160 }
1161 return 0;
1162 }
1163
1164
1165
1166 /*************************************************
1167 * Check newline indicator *
1168 *************************************************/
1169
1170 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1171 a message and return 0 if there is no match.
1172
1173 Arguments:
1174 p points after the leading '<'
1175 f file for error message
1176
1177 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1178 */
1179
1180 static int
1181 check_newline(uschar *p, FILE *f)
1182 {
1183 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1184 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1185 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1186 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1187 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1188 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1189 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1190 fprintf(f, "Unknown newline type at: <%s\n", p);
1191 return 0;
1192 }
1193
1194
1195
1196 /*************************************************
1197 * Usage function *
1198 *************************************************/
1199
1200 static void
1201 usage(void)
1202 {
1203 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1204 printf("Input and output default to stdin and stdout.\n");
1205 #ifdef SUPPORT_LIBREADLINE
1206 printf("If input is a terminal, readline() is used to read from it.\n");
1207 #else
1208 printf("This version of pcretest is not linked with readline().\n");
1209 #endif
1210 printf("\nOptions:\n");
1211 printf(" -b show compiled code (bytecode)\n");
1212 printf(" -C show PCRE compile-time options and exit\n");
1213 printf(" -d debug: show compiled code and information (-b and -i)\n");
1214 #if !defined NODFA
1215 printf(" -dfa force DFA matching for all subjects\n");
1216 #endif
1217 printf(" -help show usage information\n");
1218 printf(" -i show information about compiled patterns\n"
1219 " -M find MATCH_LIMIT minimum for each subject\n"
1220 " -m output memory used information\n"
1221 " -o <n> set size of offsets vector to <n>\n");
1222 #if !defined NOPOSIX
1223 printf(" -p use POSIX interface\n");
1224 #endif
1225 printf(" -q quiet: do not output PCRE version number at start\n");
1226 printf(" -S <n> set stack size to <n> megabytes\n");
1227 printf(" -s force each pattern to be studied at basic level\n"
1228 " -s+ force each pattern to be studied, using JIT if available\n"
1229 " -t time compilation and execution\n");
1230 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1231 printf(" -tm time execution (matching) only\n");
1232 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1233 }
1234
1235
1236
1237 /*************************************************
1238 * Main Program *
1239 *************************************************/
1240
1241 /* Read lines from named file or stdin and write to named file or stdout; lines
1242 consist of a regular expression, in delimiters and optionally followed by
1243 options, followed by a set of test data, terminated by an empty line. */
1244
1245 int main(int argc, char **argv)
1246 {
1247 FILE *infile = stdin;
1248 int options = 0;
1249 int study_options = 0;
1250 int default_find_match_limit = FALSE;
1251 int op = 1;
1252 int timeit = 0;
1253 int timeitm = 0;
1254 int showinfo = 0;
1255 int showstore = 0;
1256 int force_study = -1;
1257 int force_study_options = 0;
1258 int quiet = 0;
1259 int size_offsets = 45;
1260 int size_offsets_max;
1261 int *offsets = NULL;
1262 #if !defined NOPOSIX
1263 int posix = 0;
1264 #endif
1265 int debug = 0;
1266 int done = 0;
1267 int all_use_dfa = 0;
1268 int yield = 0;
1269 int stack_size;
1270
1271 pcre_jit_stack *jit_stack = NULL;
1272
1273
1274 /* These vectors store, end-to-end, a list of captured substring names. Assume
1275 that 1024 is plenty long enough for the few names we'll be testing. */
1276
1277 uschar copynames[1024];
1278 uschar getnames[1024];
1279
1280 uschar *copynamesptr;
1281 uschar *getnamesptr;
1282
1283 /* Get buffers from malloc() so that Electric Fence will check their misuse
1284 when I am debugging. They grow automatically when very long lines are read. */
1285
1286 buffer = (unsigned char *)malloc(buffer_size);
1287 dbuffer = (unsigned char *)malloc(buffer_size);
1288 pbuffer = (unsigned char *)malloc(buffer_size);
1289
1290 /* The outfile variable is static so that new_malloc can use it. */
1291
1292 outfile = stdout;
1293
1294 /* The following _setmode() stuff is some Windows magic that tells its runtime
1295 library to translate CRLF into a single LF character. At least, that's what
1296 I've been told: never having used Windows I take this all on trust. Originally
1297 it set 0x8000, but then I was advised that _O_BINARY was better. */
1298
1299 #if defined(_WIN32) || defined(WIN32)
1300 _setmode( _fileno( stdout ), _O_BINARY );
1301 #endif
1302
1303 /* Scan options */
1304
1305 while (argc > 1 && argv[op][0] == '-')
1306 {
1307 unsigned char *endptr;
1308
1309 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1310 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1311 else if (strcmp(argv[op], "-s+") == 0)
1312 {
1313 force_study = 1;
1314 force_study_options = PCRE_STUDY_JIT_COMPILE;
1315 }
1316 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1317 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1318 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1319 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1320 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1321 #if !defined NODFA
1322 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1323 #endif
1324 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1325 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1326 *endptr == 0))
1327 {
1328 op++;
1329 argc--;
1330 }
1331 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1332 {
1333 int both = argv[op][2] == 0;
1334 int temp;
1335 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1336 *endptr == 0))
1337 {
1338 timeitm = temp;
1339 op++;
1340 argc--;
1341 }
1342 else timeitm = LOOPREPEAT;
1343 if (both) timeit = timeitm;
1344 }
1345 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1346 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1347 *endptr == 0))
1348 {
1349 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1350 printf("PCRE: -S not supported on this OS\n");
1351 exit(1);
1352 #else
1353 int rc;
1354 struct rlimit rlim;
1355 getrlimit(RLIMIT_STACK, &rlim);
1356 rlim.rlim_cur = stack_size * 1024 * 1024;
1357 rc = setrlimit(RLIMIT_STACK, &rlim);
1358 if (rc != 0)
1359 {
1360 printf("PCRE: setrlimit() failed with error %d\n", rc);
1361 exit(1);
1362 }
1363 op++;
1364 argc--;
1365 #endif
1366 }
1367 #if !defined NOPOSIX
1368 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1369 #endif
1370 else if (strcmp(argv[op], "-C") == 0)
1371 {
1372 int rc;
1373 unsigned long int lrc;
1374 printf("PCRE version %s\n", pcre_version());
1375 printf("Compiled with\n");
1376 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1377 printf(" %sUTF-8 support\n", rc? "" : "No ");
1378 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1379 printf(" %sUnicode properties support\n", rc? "" : "No ");
1380 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1381 if (rc)
1382 printf(" Just-in-time compiler support%s\n", check_jit_arch()?
1383 "" : " (but this architecture is unsupported)");
1384 else
1385 printf(" No just-in-time compiler support\n");
1386 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1387 /* Note that these values are always the ASCII values, even
1388 in EBCDIC environments. CR is 13 and NL is 10. */
1389 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1390 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1391 (rc == -2)? "ANYCRLF" :
1392 (rc == -1)? "ANY" : "???");
1393 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1394 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1395 "all Unicode newlines");
1396 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1397 printf(" Internal link size = %d\n", rc);
1398 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1399 printf(" POSIX malloc threshold = %d\n", rc);
1400 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1401 printf(" Default match limit = %ld\n", lrc);
1402 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1403 printf(" Default recursion depth limit = %ld\n", lrc);
1404 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1405 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1406 goto EXIT;
1407 }
1408 else if (strcmp(argv[op], "-help") == 0 ||
1409 strcmp(argv[op], "--help") == 0)
1410 {
1411 usage();
1412 goto EXIT;
1413 }
1414 else
1415 {
1416 printf("** Unknown or malformed option %s\n", argv[op]);
1417 usage();
1418 yield = 1;
1419 goto EXIT;
1420 }
1421 op++;
1422 argc--;
1423 }
1424
1425 /* Get the store for the offsets vector, and remember what it was */
1426
1427 size_offsets_max = size_offsets;
1428 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1429 if (offsets == NULL)
1430 {
1431 printf("** Failed to get %d bytes of memory for offsets vector\n",
1432 (int)(size_offsets_max * sizeof(int)));
1433 yield = 1;
1434 goto EXIT;
1435 }
1436
1437 /* Sort out the input and output files */
1438
1439 if (argc > 1)
1440 {
1441 infile = fopen(argv[op], INPUT_MODE);
1442 if (infile == NULL)
1443 {
1444 printf("** Failed to open %s\n", argv[op]);
1445 yield = 1;
1446 goto EXIT;
1447 }
1448 }
1449
1450 if (argc > 2)
1451 {
1452 outfile = fopen(argv[op+1], OUTPUT_MODE);
1453 if (outfile == NULL)
1454 {
1455 printf("** Failed to open %s\n", argv[op+1]);
1456 yield = 1;
1457 goto EXIT;
1458 }
1459 }
1460
1461 /* Set alternative malloc function */
1462
1463 pcre_malloc = new_malloc;
1464 pcre_free = new_free;
1465 pcre_stack_malloc = stack_malloc;
1466 pcre_stack_free = stack_free;
1467
1468 /* Heading line unless quiet, then prompt for first regex if stdin */
1469
1470 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1471
1472 /* Main loop */
1473
1474 while (!done)
1475 {
1476 pcre *re = NULL;
1477 pcre_extra *extra = NULL;
1478
1479 #if !defined NOPOSIX /* There are still compilers that require no indent */
1480 regex_t preg;
1481 int do_posix = 0;
1482 #endif
1483
1484 const char *error;
1485 unsigned char *markptr;
1486 unsigned char *p, *pp, *ppp;
1487 unsigned char *to_file = NULL;
1488 const unsigned char *tables = NULL;
1489 unsigned long int true_size, true_study_size = 0;
1490 size_t size, regex_gotten_store;
1491 int do_allcaps = 0;
1492 int do_mark = 0;
1493 int do_study = 0;
1494 int no_force_study = 0;
1495 int do_debug = debug;
1496 int do_G = 0;
1497 int do_g = 0;
1498 int do_showinfo = showinfo;
1499 int do_showrest = 0;
1500 int do_showcaprest = 0;
1501 int do_flip = 0;
1502 int erroroffset, len, delimiter, poffset;
1503
1504 use_utf8 = 0;
1505 debug_lengths = 1;
1506
1507 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1508 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1509 fflush(outfile);
1510
1511 p = buffer;
1512 while (isspace(*p)) p++;
1513 if (*p == 0) continue;
1514
1515 /* See if the pattern is to be loaded pre-compiled from a file. */
1516
1517 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1518 {
1519 unsigned long int magic, get_options;
1520 uschar sbuf[8];
1521 FILE *f;
1522
1523 p++;
1524 pp = p + (int)strlen((char *)p);
1525 while (isspace(pp[-1])) pp--;
1526 *pp = 0;
1527
1528 f = fopen((char *)p, "rb");
1529 if (f == NULL)
1530 {
1531 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1532 continue;
1533 }
1534
1535 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1536
1537 true_size =
1538 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1539 true_study_size =
1540 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1541
1542 re = (real_pcre *)new_malloc(true_size);
1543 regex_gotten_store = gotten_store;
1544
1545 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1546
1547 magic = ((real_pcre *)re)->magic_number;
1548 if (magic != MAGIC_NUMBER)
1549 {
1550 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1551 {
1552 do_flip = 1;
1553 }
1554 else
1555 {
1556 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1557 fclose(f);
1558 continue;
1559 }
1560 }
1561
1562 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1563 do_flip? " (byte-inverted)" : "", p);
1564
1565 /* Need to know if UTF-8 for printing data strings */
1566
1567 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1568 use_utf8 = (get_options & PCRE_UTF8) != 0;
1569
1570 /* Now see if there is any following study data. */
1571
1572 if (true_study_size != 0)
1573 {
1574 pcre_study_data *psd;
1575
1576 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1577 extra->flags = PCRE_EXTRA_STUDY_DATA;
1578
1579 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1580 extra->study_data = psd;
1581
1582 if (fread(psd, 1, true_study_size, f) != true_study_size)
1583 {
1584 FAIL_READ:
1585 fprintf(outfile, "Failed to read data from %s\n", p);
1586 if (extra != NULL) pcre_free_study(extra);
1587 if (re != NULL) new_free(re);
1588 fclose(f);
1589 continue;
1590 }
1591 fprintf(outfile, "Study data loaded from %s\n", p);
1592 do_study = 1; /* To get the data output if requested */
1593 }
1594 else fprintf(outfile, "No study data\n");
1595
1596 fclose(f);
1597 goto SHOW_INFO;
1598 }
1599
1600 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1601 the pattern; if is isn't complete, read more. */
1602
1603 delimiter = *p++;
1604
1605 if (isalnum(delimiter) || delimiter == '\\')
1606 {
1607 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1608 goto SKIP_DATA;
1609 }
1610
1611 pp = p;
1612 poffset = (int)(p - buffer);
1613
1614 for(;;)
1615 {
1616 while (*pp != 0)
1617 {
1618 if (*pp == '\\' && pp[1] != 0) pp++;
1619 else if (*pp == delimiter) break;
1620 pp++;
1621 }
1622 if (*pp != 0) break;
1623 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1624 {
1625 fprintf(outfile, "** Unexpected EOF\n");
1626 done = 1;
1627 goto CONTINUE;
1628 }
1629 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1630 }
1631
1632 /* The buffer may have moved while being extended; reset the start of data
1633 pointer to the correct relative point in the buffer. */
1634
1635 p = buffer + poffset;
1636
1637 /* If the first character after the delimiter is backslash, make
1638 the pattern end with backslash. This is purely to provide a way
1639 of testing for the error message when a pattern ends with backslash. */
1640
1641 if (pp[1] == '\\') *pp++ = '\\';
1642
1643 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1644 for callouts. */
1645
1646 *pp++ = 0;
1647 strcpy((char *)pbuffer, (char *)p);
1648
1649 /* Look for options after final delimiter */
1650
1651 options = 0;
1652 log_store = showstore; /* default from command line */
1653
1654 while (*pp != 0)
1655 {
1656 switch (*pp++)
1657 {
1658 case 'f': options |= PCRE_FIRSTLINE; break;
1659 case 'g': do_g = 1; break;
1660 case 'i': options |= PCRE_CASELESS; break;
1661 case 'm': options |= PCRE_MULTILINE; break;
1662 case 's': options |= PCRE_DOTALL; break;
1663 case 'x': options |= PCRE_EXTENDED; break;
1664
1665 case '+':
1666 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1667 break;
1668
1669 case '=': do_allcaps = 1; break;
1670 case 'A': options |= PCRE_ANCHORED; break;
1671 case 'B': do_debug = 1; break;
1672 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1673 case 'D': do_debug = do_showinfo = 1; break;
1674 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1675 case 'F': do_flip = 1; break;
1676 case 'G': do_G = 1; break;
1677 case 'I': do_showinfo = 1; break;
1678 case 'J': options |= PCRE_DUPNAMES; break;
1679 case 'K': do_mark = 1; break;
1680 case 'M': log_store = 1; break;
1681 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1682
1683 #if !defined NOPOSIX
1684 case 'P': do_posix = 1; break;
1685 #endif
1686
1687 case 'S':
1688 if (do_study == 0)
1689 {
1690 do_study = 1;
1691 if (*pp == '+')
1692 {
1693 study_options |= PCRE_STUDY_JIT_COMPILE;
1694 pp++;
1695 }
1696 }
1697 else
1698 {
1699 do_study = 0;
1700 no_force_study = 1;
1701 }
1702 break;
1703
1704 case 'U': options |= PCRE_UNGREEDY; break;
1705 case 'W': options |= PCRE_UCP; break;
1706 case 'X': options |= PCRE_EXTRA; break;
1707 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1708 case 'Z': debug_lengths = 0; break;
1709 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1710 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1711
1712 case 'T':
1713 switch (*pp++)
1714 {
1715 case '0': tables = tables0; break;
1716 case '1': tables = tables1; break;
1717
1718 case '\r':
1719 case '\n':
1720 case ' ':
1721 case 0:
1722 fprintf(outfile, "** Missing table number after /T\n");
1723 goto SKIP_DATA;
1724
1725 default:
1726 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1727 goto SKIP_DATA;
1728 }
1729 break;
1730
1731 case 'L':
1732 ppp = pp;
1733 /* The '\r' test here is so that it works on Windows. */
1734 /* The '0' test is just in case this is an unterminated line. */
1735 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1736 *ppp = 0;
1737 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1738 {
1739 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1740 goto SKIP_DATA;
1741 }
1742 locale_set = 1;
1743 tables = pcre_maketables();
1744 pp = ppp;
1745 break;
1746
1747 case '>':
1748 to_file = pp;
1749 while (*pp != 0) pp++;
1750 while (isspace(pp[-1])) pp--;
1751 *pp = 0;
1752 break;
1753
1754 case '<':
1755 {
1756 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1757 {
1758 options |= PCRE_JAVASCRIPT_COMPAT;
1759 pp += 3;
1760 }
1761 else
1762 {
1763 int x = check_newline(pp, outfile);
1764 if (x == 0) goto SKIP_DATA;
1765 options |= x;
1766 while (*pp++ != '>');
1767 }
1768 }
1769 break;
1770
1771 case '\r': /* So that it works in Windows */
1772 case '\n':
1773 case ' ':
1774 break;
1775
1776 default:
1777 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1778 goto SKIP_DATA;
1779 }
1780 }
1781
1782 /* Handle compiling via the POSIX interface, which doesn't support the
1783 timing, showing, or debugging options, nor the ability to pass over
1784 local character tables. */
1785
1786 #if !defined NOPOSIX
1787 if (posix || do_posix)
1788 {
1789 int rc;
1790 int cflags = 0;
1791
1792 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1793 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1794 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1795 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1796 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1797 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1798 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1799
1800 rc = regcomp(&preg, (char *)p, cflags);
1801
1802 /* Compilation failed; go back for another re, skipping to blank line
1803 if non-interactive. */
1804
1805 if (rc != 0)
1806 {
1807 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1808 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1809 goto SKIP_DATA;
1810 }
1811 }
1812
1813 /* Handle compiling via the native interface */
1814
1815 else
1816 #endif /* !defined NOPOSIX */
1817
1818 {
1819 unsigned long int get_options;
1820
1821 if (timeit > 0)
1822 {
1823 register int i;
1824 clock_t time_taken;
1825 clock_t start_time = clock();
1826 for (i = 0; i < timeit; i++)
1827 {
1828 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1829 if (re != NULL) free(re);
1830 }
1831 time_taken = clock() - start_time;
1832 fprintf(outfile, "Compile time %.4f milliseconds\n",
1833 (((double)time_taken * 1000.0) / (double)timeit) /
1834 (double)CLOCKS_PER_SEC);
1835 }
1836
1837 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1838
1839 /* Compilation failed; go back for another re, skipping to blank line
1840 if non-interactive. */
1841
1842 if (re == NULL)
1843 {
1844 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1845 SKIP_DATA:
1846 if (infile != stdin)
1847 {
1848 for (;;)
1849 {
1850 if (extend_inputline(infile, buffer, NULL) == NULL)
1851 {
1852 done = 1;
1853 goto CONTINUE;
1854 }
1855 len = (int)strlen((char *)buffer);
1856 while (len > 0 && isspace(buffer[len-1])) len--;
1857 if (len == 0) break;
1858 }
1859 fprintf(outfile, "\n");
1860 }
1861 goto CONTINUE;
1862 }
1863
1864 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1865 within the regex; check for this so that we know how to process the data
1866 lines. */
1867
1868 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1869 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1870
1871 /* Print information if required. There are now two info-returning
1872 functions. The old one has a limited interface and returns only limited
1873 data. Check that it agrees with the newer one. */
1874
1875 if (log_store)
1876 fprintf(outfile, "Memory allocation (code space): %d\n",
1877 (int)(gotten_store -
1878 sizeof(real_pcre) -
1879 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1880
1881 /* Extract the size for possible writing before possibly flipping it,
1882 and remember the store that was got. */
1883
1884 true_size = ((real_pcre *)re)->size;
1885 regex_gotten_store = gotten_store;
1886
1887 /* If -s or /S was present, study the regex to generate additional info to
1888 help with the matching, unless the pattern has the SS option, which
1889 suppresses the effect of /S (used for a few test patterns where studying is
1890 never sensible). */
1891
1892 if (do_study || (force_study >= 0 && !no_force_study))
1893 {
1894 if (timeit > 0)
1895 {
1896 register int i;
1897 clock_t time_taken;
1898 clock_t start_time = clock();
1899 for (i = 0; i < timeit; i++)
1900 extra = pcre_study(re, study_options | force_study_options, &error);
1901 time_taken = clock() - start_time;
1902 if (extra != NULL) pcre_free_study(extra);
1903 fprintf(outfile, " Study time %.4f milliseconds\n",
1904 (((double)time_taken * 1000.0) / (double)timeit) /
1905 (double)CLOCKS_PER_SEC);
1906 }
1907 extra = pcre_study(re, study_options | force_study_options, &error);
1908 if (error != NULL)
1909 fprintf(outfile, "Failed to study: %s\n", error);
1910 else if (extra != NULL)
1911 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1912 }
1913
1914 /* If /K was present, we set up for handling MARK data. */
1915
1916 if (do_mark)
1917 {
1918 if (extra == NULL)
1919 {
1920 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1921 extra->flags = 0;
1922 }
1923 extra->mark = &markptr;
1924 extra->flags |= PCRE_EXTRA_MARK;
1925 }
1926
1927 /* If the 'F' option was present, we flip the bytes of all the integer
1928 fields in the regex data block and the study block. This is to make it
1929 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1930 compiled on a different architecture. */
1931
1932 if (do_flip)
1933 {
1934 real_pcre *rre = (real_pcre *)re;
1935 rre->magic_number =
1936 byteflip(rre->magic_number, sizeof(rre->magic_number));
1937 rre->size = byteflip(rre->size, sizeof(rre->size));
1938 rre->options = byteflip(rre->options, sizeof(rre->options));
1939 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1940 rre->top_bracket =
1941 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1942 rre->top_backref =
1943 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1944 rre->first_byte =
1945 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1946 rre->req_byte =
1947 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1948 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1949 sizeof(rre->name_table_offset));
1950 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1951 sizeof(rre->name_entry_size));
1952 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1953 sizeof(rre->name_count));
1954
1955 if (extra != NULL)
1956 {
1957 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1958 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1959 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1960 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1961 }
1962 }
1963
1964 /* Extract information from the compiled data if required */
1965
1966 SHOW_INFO:
1967
1968 if (do_debug)
1969 {
1970 fprintf(outfile, "------------------------------------------------------------------\n");
1971 pcre_printint(re, outfile, debug_lengths);
1972 }
1973
1974 /* We already have the options in get_options (see above) */
1975
1976 if (do_showinfo)
1977 {
1978 unsigned long int all_options;
1979 #if !defined NOINFOCHECK
1980 int old_first_char, old_options, old_count;
1981 #endif
1982 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1983 hascrorlf;
1984 int nameentrysize, namecount;
1985 const uschar *nametable;
1986
1987 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1988 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1989 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1990 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1991 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1992 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1993 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1994 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1995 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1996 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1997 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1998
1999 #if !defined NOINFOCHECK
2000 old_count = pcre_info(re, &old_options, &old_first_char);
2001 if (count < 0) fprintf(outfile,
2002 "Error %d from pcre_info()\n", count);
2003 else
2004 {
2005 if (old_count != count) fprintf(outfile,
2006 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2007 old_count);
2008
2009 if (old_first_char != first_char) fprintf(outfile,
2010 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2011 first_char, old_first_char);
2012
2013 if (old_options != (int)get_options) fprintf(outfile,
2014 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2015 get_options, old_options);
2016 }
2017 #endif
2018
2019 if (size != regex_gotten_store) fprintf(outfile,
2020 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2021 (int)size, (int)regex_gotten_store);
2022
2023 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2024 if (backrefmax > 0)
2025 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2026
2027 if (namecount > 0)
2028 {
2029 fprintf(outfile, "Named capturing subpatterns:\n");
2030 while (namecount-- > 0)
2031 {
2032 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2033 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2034 GET2(nametable, 0));
2035 nametable += nameentrysize;
2036 }
2037 }
2038
2039 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2040 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2041
2042 all_options = ((real_pcre *)re)->options;
2043 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2044
2045 if (get_options == 0) fprintf(outfile, "No options\n");
2046 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2047 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2048 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2049 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2050 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2051 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2052 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2053 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2054 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2055 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2056 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2057 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2058 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2059 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2060 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2061 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2062 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2063 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2064
2065 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2066
2067 switch (get_options & PCRE_NEWLINE_BITS)
2068 {
2069 case PCRE_NEWLINE_CR:
2070 fprintf(outfile, "Forced newline sequence: CR\n");
2071 break;
2072
2073 case PCRE_NEWLINE_LF:
2074 fprintf(outfile, "Forced newline sequence: LF\n");
2075 break;
2076
2077 case PCRE_NEWLINE_CRLF:
2078 fprintf(outfile, "Forced newline sequence: CRLF\n");
2079 break;
2080
2081 case PCRE_NEWLINE_ANYCRLF:
2082 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2083 break;
2084
2085 case PCRE_NEWLINE_ANY:
2086 fprintf(outfile, "Forced newline sequence: ANY\n");
2087 break;
2088
2089 default:
2090 break;
2091 }
2092
2093 if (first_char == -1)
2094 {
2095 fprintf(outfile, "First char at start or follows newline\n");
2096 }
2097 else if (first_char < 0)
2098 {
2099 fprintf(outfile, "No first char\n");
2100 }
2101 else
2102 {
2103 int ch = first_char & 255;
2104 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2105 "" : " (caseless)";
2106 if (PRINTHEX(ch))
2107 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2108 else
2109 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2110 }
2111
2112 if (need_char < 0)
2113 {
2114 fprintf(outfile, "No need char\n");
2115 }
2116 else
2117 {
2118 int ch = need_char & 255;
2119 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2120 "" : " (caseless)";
2121 if (PRINTHEX(ch))
2122 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2123 else
2124 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2125 }
2126
2127 /* Don't output study size; at present it is in any case a fixed
2128 value, but it varies, depending on the computer architecture, and
2129 so messes up the test suite. (And with the /F option, it might be
2130 flipped.) If study was forced by an external -s, don't show this
2131 information unless -i or -d was also present. This means that, except
2132 when auto-callouts are involved, the output from runs with and without
2133 -s should be identical. */
2134
2135 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2136 {
2137 if (extra == NULL)
2138 fprintf(outfile, "Study returned NULL\n");
2139 else
2140 {
2141 uschar *start_bits = NULL;
2142 int minlength;
2143
2144 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2145 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2146
2147 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2148 if (start_bits == NULL)
2149 fprintf(outfile, "No set of starting bytes\n");
2150 else
2151 {
2152 int i;
2153 int c = 24;
2154 fprintf(outfile, "Starting byte set: ");
2155 for (i = 0; i < 256; i++)
2156 {
2157 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2158 {
2159 if (c > 75)
2160 {
2161 fprintf(outfile, "\n ");
2162 c = 2;
2163 }
2164 if (PRINTHEX(i) && i != ' ')
2165 {
2166 fprintf(outfile, "%c ", i);
2167 c += 2;
2168 }
2169 else
2170 {
2171 fprintf(outfile, "\\x%02x ", i);
2172 c += 5;
2173 }
2174 }
2175 }
2176 fprintf(outfile, "\n");
2177 }
2178 }
2179
2180 /* Show this only if the JIT was set by /S, not by -s. */
2181
2182 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2183 {
2184 int jit;
2185 new_info(re, extra, PCRE_INFO_JIT, &jit);
2186 if (jit)
2187 fprintf(outfile, "JIT study was successful\n");
2188 else
2189 #ifdef SUPPORT_JIT
2190 fprintf(outfile, "JIT study was not successful\n");
2191 #else
2192 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2193 #endif
2194 }
2195 }
2196 }
2197
2198 /* If the '>' option was present, we write out the regex to a file, and
2199 that is all. The first 8 bytes of the file are the regex length and then
2200 the study length, in big-endian order. */
2201
2202 if (to_file != NULL)
2203 {
2204 FILE *f = fopen((char *)to_file, "wb");
2205 if (f == NULL)
2206 {
2207 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2208 }
2209 else
2210 {
2211 uschar sbuf[8];
2212 sbuf[0] = (uschar)((true_size >> 24) & 255);
2213 sbuf[1] = (uschar)((true_size >> 16) & 255);
2214 sbuf[2] = (uschar)((true_size >> 8) & 255);
2215 sbuf[3] = (uschar)((true_size) & 255);
2216
2217 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2218 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2219 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2220 sbuf[7] = (uschar)((true_study_size) & 255);
2221
2222 if (fwrite(sbuf, 1, 8, f) < 8 ||
2223 fwrite(re, 1, true_size, f) < true_size)
2224 {
2225 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2226 }
2227 else
2228 {
2229 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2230
2231 /* If there is study data, write it. */
2232
2233 if (extra != NULL)
2234 {
2235 if (fwrite(extra->study_data, 1, true_study_size, f) <
2236 true_study_size)
2237 {
2238 fprintf(outfile, "Write error on %s: %s\n", to_file,
2239 strerror(errno));
2240 }
2241 else fprintf(outfile, "Study data written to %s\n", to_file);
2242 }
2243 }
2244 fclose(f);
2245 }
2246
2247 new_free(re);
2248 if (extra != NULL) pcre_free_study(extra);
2249 if (locale_set)
2250 {
2251 new_free((void *)tables);
2252 setlocale(LC_CTYPE, "C");
2253 locale_set = 0;
2254 }
2255 continue; /* With next regex */
2256 }
2257 } /* End of non-POSIX compile */
2258
2259 /* Read data lines and test them */
2260
2261 for (;;)
2262 {
2263 uschar *q;
2264 uschar *bptr;
2265 int *use_offsets = offsets;
2266 int use_size_offsets = size_offsets;
2267 int callout_data = 0;
2268 int callout_data_set = 0;
2269 int count, c;
2270 int copystrings = 0;
2271 int find_match_limit = default_find_match_limit;
2272 int getstrings = 0;
2273 int getlist = 0;
2274 int gmatched = 0;
2275 int start_offset = 0;
2276 int start_offset_sign = 1;
2277 int g_notempty = 0;
2278 int use_dfa = 0;
2279
2280 options = 0;
2281
2282 *copynames = 0;
2283 *getnames = 0;
2284
2285 copynamesptr = copynames;
2286 getnamesptr = getnames;
2287
2288 pcre_callout = callout;
2289 first_callout = 1;
2290 last_callout_mark = NULL;
2291 callout_extra = 0;
2292 callout_count = 0;
2293 callout_fail_count = 999999;
2294 callout_fail_id = -1;
2295 show_malloc = 0;
2296
2297 if (extra != NULL) extra->flags &=
2298 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2299
2300 len = 0;
2301 for (;;)
2302 {
2303 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2304 {
2305 if (len > 0) /* Reached EOF without hitting a newline */
2306 {
2307 fprintf(outfile, "\n");
2308 break;
2309 }
2310 done = 1;
2311 goto CONTINUE;
2312 }
2313 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2314 len = (int)strlen((char *)buffer);
2315 if (buffer[len-1] == '\n') break;
2316 }
2317
2318 while (len > 0 && isspace(buffer[len-1])) len--;
2319 buffer[len] = 0;
2320 if (len == 0) break;
2321
2322 p = buffer;
2323 while (isspace(*p)) p++;
2324
2325 bptr = q = dbuffer;
2326 while ((c = *p++) != 0)
2327 {
2328 int i = 0;
2329 int n = 0;
2330
2331 if (c == '\\') switch ((c = *p++))
2332 {
2333 case 'a': c = 7; break;
2334 case 'b': c = '\b'; break;
2335 case 'e': c = 27; break;
2336 case 'f': c = '\f'; break;
2337 case 'n': c = '\n'; break;
2338 case 'r': c = '\r'; break;
2339 case 't': c = '\t'; break;
2340 case 'v': c = '\v'; break;
2341
2342 case '0': case '1': case '2': case '3':
2343 case '4': case '5': case '6': case '7':
2344 c -= '0';
2345 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2346 c = c * 8 + *p++ - '0';
2347
2348 #if !defined NOUTF8
2349 if (use_utf8 && c > 255)
2350 {
2351 unsigned char buff8[8];
2352 int ii, utn;
2353 utn = ord2utf8(c, buff8);
2354 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2355 c = buff8[ii]; /* Last byte */
2356 }
2357 #endif
2358 break;
2359
2360 case 'x':
2361
2362 /* Handle \x{..} specially - new Perl thing for utf8 */
2363
2364 #if !defined NOUTF8
2365 if (*p == '{')
2366 {
2367 unsigned char *pt = p;
2368 c = 0;
2369 while (isxdigit(*(++pt)))
2370 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2371 if (*pt == '}')
2372 {
2373 unsigned char buff8[8];
2374 int ii, utn;
2375 if (use_utf8)
2376 {
2377 utn = ord2utf8(c, buff8);
2378 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2379 c = buff8[ii]; /* Last byte */
2380 }
2381 else
2382 {
2383 if (c > 255)
2384 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2385 "UTF-8 mode is not enabled.\n"
2386 "** Truncation will probably give the wrong result.\n", c);
2387 }
2388 p = pt + 1;
2389 break;
2390 }
2391 /* Not correct form; fall through */
2392 }
2393 #endif
2394
2395 /* Ordinary \x */
2396
2397 c = 0;
2398 while (i++ < 2 && isxdigit(*p))
2399 {
2400 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2401 p++;
2402 }
2403 break;
2404
2405 case 0: /* \ followed by EOF allows for an empty line */
2406 p--;
2407 continue;
2408
2409 case '>':
2410 if (*p == '-')
2411 {
2412 start_offset_sign = -1;
2413 p++;
2414 }
2415 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2416 start_offset *= start_offset_sign;
2417 continue;
2418
2419 case 'A': /* Option setting */
2420 options |= PCRE_ANCHORED;
2421 continue;
2422
2423 case 'B':
2424 options |= PCRE_NOTBOL;
2425 continue;
2426
2427 case 'C':
2428 if (isdigit(*p)) /* Set copy string */
2429 {
2430 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2431 copystrings |= 1 << n;
2432 }
2433 else if (isalnum(*p))
2434 {
2435 uschar *npp = copynamesptr;
2436 while (isalnum(*p)) *npp++ = *p++;
2437 *npp++ = 0;
2438 *npp = 0;
2439 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2440 if (n < 0)
2441 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2442 copynamesptr = npp;
2443 }
2444 else if (*p == '+')
2445 {
2446 callout_extra = 1;
2447 p++;
2448 }
2449 else if (*p == '-')
2450 {
2451 pcre_callout = NULL;
2452 p++;
2453 }
2454 else if (*p == '!')
2455 {
2456 callout_fail_id = 0;
2457 p++;
2458 while(isdigit(*p))
2459 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2460 callout_fail_count = 0;
2461 if (*p == '!')
2462 {
2463 p++;
2464 while(isdigit(*p))
2465 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2466 }
2467 }
2468 else if (*p == '*')
2469 {
2470 int sign = 1;
2471 callout_data = 0;
2472 if (*(++p) == '-') { sign = -1; p++; }
2473 while(isdigit(*p))
2474 callout_data = callout_data * 10 + *p++ - '0';
2475 callout_data *= sign;
2476 callout_data_set = 1;
2477 }
2478 continue;
2479
2480 #if !defined NODFA
2481 case 'D':
2482 #if !defined NOPOSIX
2483 if (posix || do_posix)
2484 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2485 else
2486 #endif
2487 use_dfa = 1;
2488 continue;
2489 #endif
2490
2491 #if !defined NODFA
2492 case 'F':
2493 options |= PCRE_DFA_SHORTEST;
2494 continue;
2495 #endif
2496
2497 case 'G':
2498 if (isdigit(*p))
2499 {
2500 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2501 getstrings |= 1 << n;
2502 }
2503 else if (isalnum(*p))
2504 {
2505 uschar *npp = getnamesptr;
2506 while (isalnum(*p)) *npp++ = *p++;
2507 *npp++ = 0;
2508 *npp = 0;
2509 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2510 if (n < 0)
2511 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2512 getnamesptr = npp;
2513 }
2514 continue;
2515
2516 case 'J':
2517 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2518 if (extra != NULL
2519 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2520 && extra->executable_jit != NULL)
2521 {
2522 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2523 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2524 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2525 }
2526 continue;
2527
2528 case 'L':
2529 getlist = 1;
2530 continue;
2531
2532 case 'M':
2533 find_match_limit = 1;
2534 continue;
2535
2536 case 'N':
2537 if ((options & PCRE_NOTEMPTY) != 0)
2538 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2539 else
2540 options |= PCRE_NOTEMPTY;
2541 continue;
2542
2543 case 'O':
2544 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2545 if (n > size_offsets_max)
2546 {
2547 size_offsets_max = n;
2548 free(offsets);
2549 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2550 if (offsets == NULL)
2551 {
2552 printf("** Failed to get %d bytes of memory for offsets vector\n",
2553 (int)(size_offsets_max * sizeof(int)));
2554 yield = 1;
2555 goto EXIT;
2556 }
2557 }
2558 use_size_offsets = n;
2559 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2560 continue;
2561
2562 case 'P':
2563 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2564 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2565 continue;
2566
2567 case 'Q':
2568 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2569 if (extra == NULL)
2570 {
2571 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2572 extra->flags = 0;
2573 }
2574 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2575 extra->match_limit_recursion = n;
2576 continue;
2577
2578 case 'q':
2579 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2580 if (extra == NULL)
2581 {
2582 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2583 extra->flags = 0;
2584 }
2585 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2586 extra->match_limit = n;
2587 continue;
2588
2589 #if !defined NODFA
2590 case 'R':
2591 options |= PCRE_DFA_RESTART;
2592 continue;
2593 #endif
2594
2595 case 'S':
2596 show_malloc = 1;
2597 continue;
2598
2599 case 'Y':
2600 options |= PCRE_NO_START_OPTIMIZE;
2601 continue;
2602
2603 case 'Z':
2604 options |= PCRE_NOTEOL;
2605 continue;
2606
2607 case '?':
2608 options |= PCRE_NO_UTF8_CHECK;
2609 continue;
2610
2611 case '<':
2612 {
2613 int x = check_newline(p, outfile);
2614 if (x == 0) goto NEXT_DATA;
2615 options |= x;
2616 while (*p++ != '>');
2617 }
2618 continue;
2619 }
2620 *q++ = c;
2621 }
2622 *q = 0;
2623 len = (int)(q - dbuffer);
2624
2625 /* Move the data to the end of the buffer so that a read over the end of
2626 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2627 we are using the POSIX interface, we must include the terminating zero. */
2628
2629 #if !defined NOPOSIX
2630 if (posix || do_posix)
2631 {
2632 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2633 bptr += buffer_size - len - 1;
2634 }
2635 else
2636 #endif
2637 {
2638 memmove(bptr + buffer_size - len, bptr, len);
2639 bptr += buffer_size - len;
2640 }
2641
2642 if ((all_use_dfa || use_dfa) && find_match_limit)
2643 {
2644 printf("**Match limit not relevant for DFA matching: ignored\n");
2645 find_match_limit = 0;
2646 }
2647
2648 /* Handle matching via the POSIX interface, which does not
2649 support timing or playing with the match limit or callout data. */
2650
2651 #if !defined NOPOSIX
2652 if (posix || do_posix)
2653 {
2654 int rc;
2655 int eflags = 0;
2656 regmatch_t *pmatch = NULL;
2657 if (use_size_offsets > 0)
2658 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2659 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2660 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2661 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2662
2663 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2664
2665 if (rc != 0)
2666 {
2667 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2668 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2669 }
2670 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2671 != 0)
2672 {
2673 fprintf(outfile, "Matched with REG_NOSUB\n");
2674 }
2675 else
2676 {
2677 size_t i;
2678 for (i = 0; i < (size_t)use_size_offsets; i++)
2679 {
2680 if (pmatch[i].rm_so >= 0)
2681 {
2682 fprintf(outfile, "%2d: ", (int)i);
2683 (void)pchars(dbuffer + pmatch[i].rm_so,
2684 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2685 fprintf(outfile, "\n");
2686 if (do_showcaprest || (i == 0 && do_showrest))
2687 {
2688 fprintf(outfile, "%2d+ ", (int)i);
2689 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2690 outfile);
2691 fprintf(outfile, "\n");
2692 }
2693 }
2694 }
2695 }
2696 free(pmatch);
2697 }
2698
2699 /* Handle matching via the native interface - repeats for /g and /G */
2700
2701 else
2702 #endif /* !defined NOPOSIX */
2703
2704 for (;; gmatched++) /* Loop for /g or /G */
2705 {
2706 markptr = NULL;
2707
2708 if (timeitm > 0)
2709 {
2710 register int i;
2711 clock_t time_taken;
2712 clock_t start_time = clock();
2713
2714 #if !defined NODFA
2715 if (all_use_dfa || use_dfa)
2716 {
2717 int workspace[1000];
2718 for (i = 0; i < timeitm; i++)
2719 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2720 options | g_notempty, use_offsets, use_size_offsets, workspace,
2721 sizeof(workspace)/sizeof(int));
2722 }
2723 else
2724 #endif
2725
2726 for (i = 0; i < timeitm; i++)
2727 count = pcre_exec(re, extra, (char *)bptr, len,
2728 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2729
2730 time_taken = clock() - start_time;
2731 fprintf(outfile, "Execute time %.4f milliseconds\n",
2732 (((double)time_taken * 1000.0) / (double)timeitm) /
2733 (double)CLOCKS_PER_SEC);
2734 }
2735
2736 /* If find_match_limit is set, we want to do repeated matches with
2737 varying limits in order to find the minimum value for the match limit and
2738 for the recursion limit. The match limits are relevant only to the normal
2739 running of pcre_exec(), so disable the JIT optimization. This makes it
2740 possible to run the same set of tests with and without JIT externally
2741 requested. */
2742
2743 if (find_match_limit)
2744 {
2745 if (extra == NULL)
2746 {
2747 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2748 extra->flags = 0;
2749 }
2750 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2751
2752 (void)check_match_limit(re, extra, bptr, len, start_offset,
2753 options|g_notempty, use_offsets, use_size_offsets,
2754 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2755 PCRE_ERROR_MATCHLIMIT, "match()");
2756
2757 count = check_match_limit(re, extra, bptr, len, start_offset,
2758 options|g_notempty, use_offsets, use_size_offsets,
2759 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2760 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2761 }
2762
2763 /* If callout_data is set, use the interface with additional data */
2764
2765 else if (callout_data_set)
2766 {
2767 if (extra == NULL)
2768 {
2769 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2770 extra->flags = 0;
2771 }
2772 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2773 extra->callout_data = &callout_data;
2774 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2775 options | g_notempty, use_offsets, use_size_offsets);
2776 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2777 }
2778
2779 /* The normal case is just to do the match once, with the default
2780 value of match_limit. */
2781
2782 #if !defined NODFA
2783 else if (all_use_dfa || use_dfa)
2784 {
2785 int workspace[1000];
2786 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2787 options | g_notempty, use_offsets, use_size_offsets, workspace,
2788 sizeof(workspace)/sizeof(int));
2789 if (count == 0)
2790 {
2791 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2792 count = use_size_offsets/2;
2793 }
2794 }
2795 #endif
2796
2797 else
2798 {
2799 count = pcre_exec(re, extra, (char *)bptr, len,
2800 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2801 if (count == 0)
2802 {
2803 fprintf(outfile, "Matched, but too many substrings\n");
2804 count = use_size_offsets/3;
2805 }
2806 }
2807
2808 /* Matched */
2809
2810 if (count >= 0)
2811 {
2812 int i, maxcount;
2813
2814 #if !defined NODFA
2815 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2816 #endif
2817 maxcount = use_size_offsets/3;
2818
2819 /* This is a check against a lunatic return value. */
2820
2821 if (count > maxcount)
2822 {
2823 fprintf(outfile,
2824 "** PCRE error: returned count %d is too big for offset size %d\n",
2825 count, use_size_offsets);
2826 count = use_size_offsets/3;
2827 if (do_g || do_G)
2828 {
2829 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2830 do_g = do_G = FALSE; /* Break g/G loop */
2831 }
2832 }
2833
2834 /* do_allcaps requests showing of all captures in the pattern, to check
2835 unset ones at the end. */
2836
2837 if (do_allcaps)
2838 {
2839 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2840 count++; /* Allow for full match */
2841 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2842 }
2843
2844 /* Output the captured substrings */
2845
2846 for (i = 0; i < count * 2; i += 2)
2847 {
2848 if (use_offsets[i] < 0)
2849 {
2850 if (use_offsets[i] != -1)
2851 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2852 use_offsets[i], i);
2853 if (use_offsets[i+1] != -1)
2854 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2855 use_offsets[i+1], i+1);
2856 fprintf(outfile, "%2d: <unset>\n", i/2);
2857 }
2858 else
2859 {
2860 fprintf(outfile, "%2d: ", i/2);
2861 (void)pchars(bptr + use_offsets[i],
2862 use_offsets[i+1] - use_offsets[i], outfile);
2863 fprintf(outfile, "\n");
2864 if (do_showcaprest || (i == 0 && do_showrest))
2865 {
2866 fprintf(outfile, "%2d+ ", i/2);
2867 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2868 outfile);
2869 fprintf(outfile, "\n");
2870 }
2871 }
2872 }
2873
2874 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2875
2876 for (i = 0; i < 32; i++)
2877 {
2878 if ((copystrings & (1 << i)) != 0)
2879 {
2880 char copybuffer[256];
2881 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2882 i, copybuffer, sizeof(copybuffer));
2883 if (rc < 0)
2884 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2885 else
2886 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2887 }
2888 }
2889
2890 for (copynamesptr = copynames;
2891 *copynamesptr != 0;
2892 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2893 {
2894 char copybuffer[256];
2895 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2896 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2897 if (rc < 0)
2898 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2899 else
2900 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2901 }
2902
2903 for (i = 0; i < 32; i++)
2904 {
2905 if ((getstrings & (1 << i)) != 0)
2906 {
2907 const char *substring;
2908 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2909 i, &substring);
2910 if (rc < 0)
2911 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2912 else
2913 {
2914 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2915 pcre_free_substring(substring);
2916 }
2917 }
2918 }
2919
2920 for (getnamesptr = getnames;
2921 *getnamesptr != 0;
2922 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2923 {
2924 const char *substring;
2925 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2926 count, (char *)getnamesptr, &substring);
2927 if (rc < 0)
2928 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2929 else
2930 {
2931 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2932 pcre_free_substring(substring);
2933 }
2934 }
2935
2936 if (getlist)
2937 {
2938 const char **stringlist;
2939 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2940 &stringlist);
2941 if (rc < 0)
2942 fprintf(outfile, "get substring list failed %d\n", rc);
2943 else
2944 {
2945 for (i = 0; i < count; i++)
2946 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2947 if (stringlist[i] != NULL)
2948 fprintf(outfile, "string list not terminated by NULL\n");
2949 pcre_free_substring_list(stringlist);
2950 }
2951 }
2952 }
2953
2954 /* There was a partial match */
2955
2956 else if (count == PCRE_ERROR_PARTIAL)
2957 {
2958 if (markptr == NULL) fprintf(outfile, "Partial match");
2959 else fprintf(outfile, "Partial match, mark=%s", markptr);
2960 if (use_size_offsets > 1)
2961 {
2962 fprintf(outfile, ": ");
2963 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2964 outfile);
2965 }
2966 fprintf(outfile, "\n");
2967 break; /* Out of the /g loop */
2968 }
2969
2970 /* Failed to match. If this is a /g or /G loop and we previously set
2971 g_notempty after a null match, this is not necessarily the end. We want
2972 to advance the start offset, and continue. We won't be at the end of the
2973 string - that was checked before setting g_notempty.
2974
2975 Complication arises in the case when the newline convention is "any",
2976 "crlf", or "anycrlf". If the previous match was at the end of a line
2977 terminated by CRLF, an advance of one character just passes the \r,
2978 whereas we should prefer the longer newline sequence, as does the code in
2979 pcre_exec(). Fudge the offset value to achieve this. We check for a
2980 newline setting in the pattern; if none was set, use pcre_config() to
2981 find the default.
2982
2983 Otherwise, in the case of UTF-8 matching, the advance must be one
2984 character, not one byte. */
2985
2986 else
2987 {
2988 if (g_notempty != 0)
2989 {
2990 int onechar = 1;
2991 unsigned int obits = ((real_pcre *)re)->options;
2992 use_offsets[0] = start_offset;
2993 if ((obits & PCRE_NEWLINE_BITS) == 0)
2994 {
2995 int d;
2996 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2997 /* Note that these values are always the ASCII ones, even in
2998 EBCDIC environments. CR = 13, NL = 10. */
2999 obits = (d == 13)? PCRE_NEWLINE_CR :
3000 (d == 10)? PCRE_NEWLINE_LF :
3001 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3002 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3003 (d == -1)? PCRE_NEWLINE_ANY : 0;
3004 }
3005 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3006 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3007 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3008 &&
3009 start_offset < len - 1 &&
3010 bptr[start_offset] == '\r' &&
3011 bptr[start_offset+1] == '\n')
3012 onechar++;
3013 else if (use_utf8)
3014 {
3015 while (start_offset + onechar < len)
3016 {
3017 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3018 onechar++;
3019 }
3020 }
3021 use_offsets[1] = start_offset + onechar;
3022 }
3023 else
3024 {
3025 switch(count)
3026 {
3027 case PCRE_ERROR_NOMATCH:
3028 if (gmatched == 0)
3029 {
3030 if (markptr == NULL) fprintf(outfile, "No match\n");
3031 else fprintf(outfile, "No match, mark = %s\n", markptr);
3032 }
3033 break;
3034
3035 case PCRE_ERROR_BADUTF8:
3036 case PCRE_ERROR_SHORTUTF8:
3037 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3038 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3039 if (use_size_offsets >= 2)
3040 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3041 use_offsets[1]);
3042 fprintf(outfile, "\n");
3043 break;
3044
3045 default:
3046 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3047 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3048 else
3049 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3050 break;
3051 }
3052
3053 break; /* Out of the /g loop */
3054 }
3055 }
3056
3057 /* If not /g or /G we are done */
3058
3059 if (!do_g && !do_G) break;
3060
3061 /* If we have matched an empty string, first check to see if we are at
3062 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3063 Perl's /g options does. This turns out to be rather cunning. First we set
3064 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3065 same point. If this fails (picked up above) we advance to the next
3066 character. */
3067
3068 g_notempty = 0;
3069
3070 if (use_offsets[0] == use_offsets[1])
3071 {
3072 if (use_offsets[0] == len) break;
3073 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3074 }
3075
3076 /* For /g, update the start offset, leaving the rest alone */
3077
3078 if (do_g) start_offset = use_offsets[1];
3079
3080 /* For /G, update the pointer and length */
3081
3082 else
3083 {
3084 bptr += use_offsets[1];
3085 len -= use_offsets[1];
3086 }
3087 } /* End of loop for /g and /G */
3088
3089 NEXT_DATA: continue;
3090 } /* End of loop for data lines */
3091
3092 CONTINUE:
3093
3094 #if !defined NOPOSIX
3095 if (posix || do_posix) regfree(&preg);
3096 #endif
3097
3098 if (re != NULL) new_free(re);
3099 if (extra != NULL) pcre_free_study(extra);
3100 if (locale_set)
3101 {
3102 new_free((void *)tables);
3103 setlocale(LC_CTYPE, "C");
3104 locale_set = 0;
3105 }
3106 if (jit_stack != NULL)
3107 {
3108 pcre_jit_stack_free(jit_stack);
3109 jit_stack = NULL;
3110 }
3111 }
3112
3113 if (infile == stdin) fprintf(outfile, "\n");
3114
3115 EXIT:
3116
3117 if (infile != NULL && infile != stdin) fclose(infile);
3118 if (outfile != NULL && outfile != stdout) fclose(outfile);
3119
3120 free(buffer);
3121 free(dbuffer);
3122 free(pbuffer);
3123 free(offsets);
3124
3125 return yield;
3126 }
3127
3128 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12