/[pcre]/code/branches/oldtrunk8/pcretest.c
ViewVC logotype

Contents of /code/branches/oldtrunk8/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 834 - (show annotations) (download)
Wed Dec 28 15:53:12 2011 UTC (2 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 94375 byte(s)
Make a copy of the 8-bit trunk in preparation for replacing it with the
8/16-bit branch. 

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utf8_char_sizes utf8_char_sizes
122 #define _pcre_utt utt
123 #define _pcre_utt_size utt_size
124 #define _pcre_utt_names utt_names
125 #define _pcre_OP_lengths OP_lengths
126
127 #include "pcre_tables.c"
128
129 /* We also need the pcre_printint() function for printing out compiled
130 patterns. This function is in a separate file so that it can be included in
131 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 know which case is being compiled. */
133
134 #define COMPILING_PCRETEST
135 #include "pcre_printint.src"
136
137 /* The definition of the macro PRINTABLE, which determines whether to print an
138 output character as-is or as a hex value when showing compiled patterns, is
139 contained in the printint.src file. We uses it here also, in cases when the
140 locale has not been explicitly changed, so as to get consistent output from
141 systems that differ in their output from isprint() even in the "C" locale. */
142
143 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144
145 /* It is possible to compile this test program without including support for
146 testing the POSIX interface, though this is not available via the standard
147 Makefile. */
148
149 #if !defined NOPOSIX
150 #include "pcreposix.h"
151 #endif
152
153 /* It is also possible, for the benefit of the version currently imported into
154 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155 interface to the DFA matcher (NODFA), and without the doublecheck of the old
156 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157 UTF8 support if PCRE is built without it. */
158
159 #ifndef SUPPORT_UTF8
160 #ifndef NOUTF8
161 #define NOUTF8
162 #endif
163 #endif
164
165
166 /* Other parameters */
167
168 #ifndef CLOCKS_PER_SEC
169 #ifdef CLK_TCK
170 #define CLOCKS_PER_SEC CLK_TCK
171 #else
172 #define CLOCKS_PER_SEC 100
173 #endif
174 #endif
175
176 /* This is the default loop count for timing. */
177
178 #define LOOPREPEAT 500000
179
180 /* Static variables */
181
182 static FILE *outfile;
183 static int log_store = 0;
184 static int callout_count;
185 static int callout_extra;
186 static int callout_fail_count;
187 static int callout_fail_id;
188 static int debug_lengths;
189 static int first_callout;
190 static int locale_set = 0;
191 static int show_malloc;
192 static int use_utf8;
193 static size_t gotten_store;
194 static size_t first_gotten_store = 0;
195 static const unsigned char *last_callout_mark = NULL;
196
197 /* The buffers grow automatically if very long input lines are encountered. */
198
199 static int buffer_size = 50000;
200 static uschar *buffer = NULL;
201 static uschar *dbuffer = NULL;
202 static uschar *pbuffer = NULL;
203
204 /* Textual explanations for runtime error codes */
205
206 static const char *errtexts[] = {
207 NULL, /* 0 is no error */
208 NULL, /* NOMATCH is handled specially */
209 "NULL argument passed",
210 "bad option value",
211 "magic number missing",
212 "unknown opcode - pattern overwritten?",
213 "no more memory",
214 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
215 "match limit exceeded",
216 "callout error code",
217 NULL, /* BADUTF8 is handled specially */
218 "bad UTF-8 offset",
219 NULL, /* PARTIAL is handled specially */
220 "not used - internal error",
221 "internal error - pattern overwritten?",
222 "bad count value",
223 "item unsupported for DFA matching",
224 "backreference condition or recursion test not supported for DFA matching",
225 "match limit not supported for DFA matching",
226 "workspace size exceeded in DFA matching",
227 "too much recursion for DFA matching",
228 "recursion limit exceeded",
229 "not used - internal error",
230 "invalid combination of newline options",
231 "bad offset value",
232 NULL, /* SHORTUTF8 is handled specially */
233 "nested recursion at the same subject position",
234 "JIT stack limit reached"
235 };
236
237
238 /*************************************************
239 * Alternate character tables *
240 *************************************************/
241
242 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
243 using the default tables of the library. However, the T option can be used to
244 select alternate sets of tables, for different kinds of testing. Note also that
245 the L (locale) option also adjusts the tables. */
246
247 /* This is the set of tables distributed as default with PCRE. It recognizes
248 only ASCII characters. */
249
250 static const unsigned char tables0[] = {
251
252 /* This table is a lower casing table. */
253
254 0, 1, 2, 3, 4, 5, 6, 7,
255 8, 9, 10, 11, 12, 13, 14, 15,
256 16, 17, 18, 19, 20, 21, 22, 23,
257 24, 25, 26, 27, 28, 29, 30, 31,
258 32, 33, 34, 35, 36, 37, 38, 39,
259 40, 41, 42, 43, 44, 45, 46, 47,
260 48, 49, 50, 51, 52, 53, 54, 55,
261 56, 57, 58, 59, 60, 61, 62, 63,
262 64, 97, 98, 99,100,101,102,103,
263 104,105,106,107,108,109,110,111,
264 112,113,114,115,116,117,118,119,
265 120,121,122, 91, 92, 93, 94, 95,
266 96, 97, 98, 99,100,101,102,103,
267 104,105,106,107,108,109,110,111,
268 112,113,114,115,116,117,118,119,
269 120,121,122,123,124,125,126,127,
270 128,129,130,131,132,133,134,135,
271 136,137,138,139,140,141,142,143,
272 144,145,146,147,148,149,150,151,
273 152,153,154,155,156,157,158,159,
274 160,161,162,163,164,165,166,167,
275 168,169,170,171,172,173,174,175,
276 176,177,178,179,180,181,182,183,
277 184,185,186,187,188,189,190,191,
278 192,193,194,195,196,197,198,199,
279 200,201,202,203,204,205,206,207,
280 208,209,210,211,212,213,214,215,
281 216,217,218,219,220,221,222,223,
282 224,225,226,227,228,229,230,231,
283 232,233,234,235,236,237,238,239,
284 240,241,242,243,244,245,246,247,
285 248,249,250,251,252,253,254,255,
286
287 /* This table is a case flipping table. */
288
289 0, 1, 2, 3, 4, 5, 6, 7,
290 8, 9, 10, 11, 12, 13, 14, 15,
291 16, 17, 18, 19, 20, 21, 22, 23,
292 24, 25, 26, 27, 28, 29, 30, 31,
293 32, 33, 34, 35, 36, 37, 38, 39,
294 40, 41, 42, 43, 44, 45, 46, 47,
295 48, 49, 50, 51, 52, 53, 54, 55,
296 56, 57, 58, 59, 60, 61, 62, 63,
297 64, 97, 98, 99,100,101,102,103,
298 104,105,106,107,108,109,110,111,
299 112,113,114,115,116,117,118,119,
300 120,121,122, 91, 92, 93, 94, 95,
301 96, 65, 66, 67, 68, 69, 70, 71,
302 72, 73, 74, 75, 76, 77, 78, 79,
303 80, 81, 82, 83, 84, 85, 86, 87,
304 88, 89, 90,123,124,125,126,127,
305 128,129,130,131,132,133,134,135,
306 136,137,138,139,140,141,142,143,
307 144,145,146,147,148,149,150,151,
308 152,153,154,155,156,157,158,159,
309 160,161,162,163,164,165,166,167,
310 168,169,170,171,172,173,174,175,
311 176,177,178,179,180,181,182,183,
312 184,185,186,187,188,189,190,191,
313 192,193,194,195,196,197,198,199,
314 200,201,202,203,204,205,206,207,
315 208,209,210,211,212,213,214,215,
316 216,217,218,219,220,221,222,223,
317 224,225,226,227,228,229,230,231,
318 232,233,234,235,236,237,238,239,
319 240,241,242,243,244,245,246,247,
320 248,249,250,251,252,253,254,255,
321
322 /* This table contains bit maps for various character classes. Each map is 32
323 bytes long and the bits run from the least significant end of each byte. The
324 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
325 graph, print, punct, and cntrl. Other classes are built from combinations. */
326
327 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331
332 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336
337 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341
342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346
347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351
352 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
353 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356
357 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
358 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361
362 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
363 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366
367 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
368 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371
372 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
376
377 /* This table identifies various classes of character by individual bits:
378 0x01 white space character
379 0x02 letter
380 0x04 decimal digit
381 0x08 hexadecimal digit
382 0x10 alphanumeric or '_'
383 0x80 regular expression metacharacter or binary zero
384 */
385
386 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
387 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
388 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
389 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
390 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
391 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
392 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
393 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
394 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
396 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
397 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
398 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
399 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
400 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
401 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
417 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
418
419 /* This is a set of tables that came orginally from a Windows user. It seems to
420 be at least an approximation of ISO 8859. In particular, there are characters
421 greater than 128 that are marked as spaces, letters, etc. */
422
423 static const unsigned char tables1[] = {
424 0,1,2,3,4,5,6,7,
425 8,9,10,11,12,13,14,15,
426 16,17,18,19,20,21,22,23,
427 24,25,26,27,28,29,30,31,
428 32,33,34,35,36,37,38,39,
429 40,41,42,43,44,45,46,47,
430 48,49,50,51,52,53,54,55,
431 56,57,58,59,60,61,62,63,
432 64,97,98,99,100,101,102,103,
433 104,105,106,107,108,109,110,111,
434 112,113,114,115,116,117,118,119,
435 120,121,122,91,92,93,94,95,
436 96,97,98,99,100,101,102,103,
437 104,105,106,107,108,109,110,111,
438 112,113,114,115,116,117,118,119,
439 120,121,122,123,124,125,126,127,
440 128,129,130,131,132,133,134,135,
441 136,137,138,139,140,141,142,143,
442 144,145,146,147,148,149,150,151,
443 152,153,154,155,156,157,158,159,
444 160,161,162,163,164,165,166,167,
445 168,169,170,171,172,173,174,175,
446 176,177,178,179,180,181,182,183,
447 184,185,186,187,188,189,190,191,
448 224,225,226,227,228,229,230,231,
449 232,233,234,235,236,237,238,239,
450 240,241,242,243,244,245,246,215,
451 248,249,250,251,252,253,254,223,
452 224,225,226,227,228,229,230,231,
453 232,233,234,235,236,237,238,239,
454 240,241,242,243,244,245,246,247,
455 248,249,250,251,252,253,254,255,
456 0,1,2,3,4,5,6,7,
457 8,9,10,11,12,13,14,15,
458 16,17,18,19,20,21,22,23,
459 24,25,26,27,28,29,30,31,
460 32,33,34,35,36,37,38,39,
461 40,41,42,43,44,45,46,47,
462 48,49,50,51,52,53,54,55,
463 56,57,58,59,60,61,62,63,
464 64,97,98,99,100,101,102,103,
465 104,105,106,107,108,109,110,111,
466 112,113,114,115,116,117,118,119,
467 120,121,122,91,92,93,94,95,
468 96,65,66,67,68,69,70,71,
469 72,73,74,75,76,77,78,79,
470 80,81,82,83,84,85,86,87,
471 88,89,90,123,124,125,126,127,
472 128,129,130,131,132,133,134,135,
473 136,137,138,139,140,141,142,143,
474 144,145,146,147,148,149,150,151,
475 152,153,154,155,156,157,158,159,
476 160,161,162,163,164,165,166,167,
477 168,169,170,171,172,173,174,175,
478 176,177,178,179,180,181,182,183,
479 184,185,186,187,188,189,190,191,
480 224,225,226,227,228,229,230,231,
481 232,233,234,235,236,237,238,239,
482 240,241,242,243,244,245,246,215,
483 248,249,250,251,252,253,254,223,
484 192,193,194,195,196,197,198,199,
485 200,201,202,203,204,205,206,207,
486 208,209,210,211,212,213,214,247,
487 216,217,218,219,220,221,222,255,
488 0,62,0,0,1,0,0,0,
489 0,0,0,0,0,0,0,0,
490 32,0,0,0,1,0,0,0,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,255,3,
493 126,0,0,0,126,0,0,0,
494 0,0,0,0,0,0,0,0,
495 0,0,0,0,0,0,0,0,
496 0,0,0,0,0,0,255,3,
497 0,0,0,0,0,0,0,0,
498 0,0,0,0,0,0,12,2,
499 0,0,0,0,0,0,0,0,
500 0,0,0,0,0,0,0,0,
501 254,255,255,7,0,0,0,0,
502 0,0,0,0,0,0,0,0,
503 255,255,127,127,0,0,0,0,
504 0,0,0,0,0,0,0,0,
505 0,0,0,0,254,255,255,7,
506 0,0,0,0,0,4,32,4,
507 0,0,0,128,255,255,127,255,
508 0,0,0,0,0,0,255,3,
509 254,255,255,135,254,255,255,7,
510 0,0,0,0,0,4,44,6,
511 255,255,127,255,255,255,127,255,
512 0,0,0,0,254,255,255,255,
513 255,255,255,255,255,255,255,127,
514 0,0,0,0,254,255,255,255,
515 255,255,255,255,255,255,255,255,
516 0,2,0,0,255,255,255,255,
517 255,255,255,255,255,255,255,127,
518 0,0,0,0,255,255,255,255,
519 255,255,255,255,255,255,255,255,
520 0,0,0,0,254,255,0,252,
521 1,0,0,248,1,0,0,120,
522 0,0,0,0,254,255,255,255,
523 0,0,128,0,0,0,128,0,
524 255,255,255,255,0,0,0,0,
525 0,0,0,0,0,0,0,128,
526 255,255,255,255,0,0,0,0,
527 0,0,0,0,0,0,0,0,
528 128,0,0,0,0,0,0,0,
529 0,1,1,0,1,1,0,0,
530 0,0,0,0,0,0,0,0,
531 0,0,0,0,0,0,0,0,
532 1,0,0,0,128,0,0,0,
533 128,128,128,128,0,0,128,0,
534 28,28,28,28,28,28,28,28,
535 28,28,0,0,0,0,0,128,
536 0,26,26,26,26,26,26,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,18,18,18,18,18,
539 18,18,18,128,128,0,128,16,
540 0,26,26,26,26,26,26,18,
541 18,18,18,18,18,18,18,18,
542 18,18,18,18,18,18,18,18,
543 18,18,18,128,128,0,0,0,
544 0,0,0,0,0,1,0,0,
545 0,0,0,0,0,0,0,0,
546 0,0,0,0,0,0,0,0,
547 0,0,0,0,0,0,0,0,
548 1,0,0,0,0,0,0,0,
549 0,0,18,0,0,0,0,0,
550 0,0,20,20,0,18,0,0,
551 0,20,18,0,0,0,0,0,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,18,
554 18,18,18,18,18,18,18,0,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,18,
557 18,18,18,18,18,18,18,18,
558 18,18,18,18,18,18,18,0,
559 18,18,18,18,18,18,18,18
560 };
561
562
563
564
565 #ifndef HAVE_STRERROR
566 /*************************************************
567 * Provide strerror() for non-ANSI libraries *
568 *************************************************/
569
570 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
571 in their libraries, but can provide the same facility by this simple
572 alternative function. */
573
574 extern int sys_nerr;
575 extern char *sys_errlist[];
576
577 char *
578 strerror(int n)
579 {
580 if (n < 0 || n >= sys_nerr) return "unknown error number";
581 return sys_errlist[n];
582 }
583 #endif /* HAVE_STRERROR */
584
585
586 /*************************************************
587 * JIT memory callback *
588 *************************************************/
589
590 static pcre_jit_stack* jit_callback(void *arg)
591 {
592 return (pcre_jit_stack *)arg;
593 }
594
595
596 /*************************************************
597 * Read or extend an input line *
598 *************************************************/
599
600 /* Input lines are read into buffer, but both patterns and data lines can be
601 continued over multiple input lines. In addition, if the buffer fills up, we
602 want to automatically expand it so as to be able to handle extremely large
603 lines that are needed for certain stress tests. When the input buffer is
604 expanded, the other two buffers must also be expanded likewise, and the
605 contents of pbuffer, which are a copy of the input for callouts, must be
606 preserved (for when expansion happens for a data line). This is not the most
607 optimal way of handling this, but hey, this is just a test program!
608
609 Arguments:
610 f the file to read
611 start where in buffer to start (this *must* be within buffer)
612 prompt for stdin or readline()
613
614 Returns: pointer to the start of new data
615 could be a copy of start, or could be moved
616 NULL if no data read and EOF reached
617 */
618
619 static uschar *
620 extend_inputline(FILE *f, uschar *start, const char *prompt)
621 {
622 uschar *here = start;
623
624 for (;;)
625 {
626 int rlen = (int)(buffer_size - (here - buffer));
627
628 if (rlen > 1000)
629 {
630 int dlen;
631
632 /* If libreadline support is required, use readline() to read a line if the
633 input is a terminal. Note that readline() removes the trailing newline, so
634 we must put it back again, to be compatible with fgets(). */
635
636 #ifdef SUPPORT_LIBREADLINE
637 if (isatty(fileno(f)))
638 {
639 size_t len;
640 char *s = readline(prompt);
641 if (s == NULL) return (here == start)? NULL : start;
642 len = strlen(s);
643 if (len > 0) add_history(s);
644 if (len > rlen - 1) len = rlen - 1;
645 memcpy(here, s, len);
646 here[len] = '\n';
647 here[len+1] = 0;
648 free(s);
649 }
650 else
651 #endif
652
653 /* Read the next line by normal means, prompting if the file is stdin. */
654
655 {
656 if (f == stdin) printf("%s", prompt);
657 if (fgets((char *)here, rlen, f) == NULL)
658 return (here == start)? NULL : start;
659 }
660
661 dlen = (int)strlen((char *)here);
662 if (dlen > 0 && here[dlen - 1] == '\n') return start;
663 here += dlen;
664 }
665
666 else
667 {
668 int new_buffer_size = 2*buffer_size;
669 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
670 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
671 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
672
673 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
674 {
675 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
676 exit(1);
677 }
678
679 memcpy(new_buffer, buffer, buffer_size);
680 memcpy(new_pbuffer, pbuffer, buffer_size);
681
682 buffer_size = new_buffer_size;
683
684 start = new_buffer + (start - buffer);
685 here = new_buffer + (here - buffer);
686
687 free(buffer);
688 free(dbuffer);
689 free(pbuffer);
690
691 buffer = new_buffer;
692 dbuffer = new_dbuffer;
693 pbuffer = new_pbuffer;
694 }
695 }
696
697 return NULL; /* Control never gets here */
698 }
699
700
701
702
703
704
705
706 /*************************************************
707 * Read number from string *
708 *************************************************/
709
710 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
711 around with conditional compilation, just do the job by hand. It is only used
712 for unpicking arguments, so just keep it simple.
713
714 Arguments:
715 str string to be converted
716 endptr where to put the end pointer
717
718 Returns: the unsigned long
719 */
720
721 static int
722 get_value(unsigned char *str, unsigned char **endptr)
723 {
724 int result = 0;
725 while(*str != 0 && isspace(*str)) str++;
726 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
727 *endptr = str;
728 return(result);
729 }
730
731
732
733
734 /*************************************************
735 * Convert UTF-8 string to value *
736 *************************************************/
737
738 /* This function takes one or more bytes that represents a UTF-8 character,
739 and returns the value of the character.
740
741 Argument:
742 utf8bytes a pointer to the byte vector
743 vptr a pointer to an int to receive the value
744
745 Returns: > 0 => the number of bytes consumed
746 -6 to 0 => malformed UTF-8 character at offset = (-return)
747 */
748
749 #if !defined NOUTF8
750
751 static int
752 utf82ord(unsigned char *utf8bytes, int *vptr)
753 {
754 int c = *utf8bytes++;
755 int d = c;
756 int i, j, s;
757
758 for (i = -1; i < 6; i++) /* i is number of additional bytes */
759 {
760 if ((d & 0x80) == 0) break;
761 d <<= 1;
762 }
763
764 if (i == -1) { *vptr = c; return 1; } /* ascii character */
765 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
766
767 /* i now has a value in the range 1-5 */
768
769 s = 6*i;
770 d = (c & utf8_table3[i]) << s;
771
772 for (j = 0; j < i; j++)
773 {
774 c = *utf8bytes++;
775 if ((c & 0xc0) != 0x80) return -(j+1);
776 s -= 6;
777 d |= (c & 0x3f) << s;
778 }
779
780 /* Check that encoding was the correct unique one */
781
782 for (j = 0; j < utf8_table1_size; j++)
783 if (d <= utf8_table1[j]) break;
784 if (j != i) return -(i+1);
785
786 /* Valid value */
787
788 *vptr = d;
789 return i+1;
790 }
791
792 #endif
793
794
795
796 /*************************************************
797 * Convert character value to UTF-8 *
798 *************************************************/
799
800 /* This function takes an integer value in the range 0 - 0x7fffffff
801 and encodes it as a UTF-8 character in 0 to 6 bytes.
802
803 Arguments:
804 cvalue the character value
805 utf8bytes pointer to buffer for result - at least 6 bytes long
806
807 Returns: number of characters placed in the buffer
808 */
809
810 #if !defined NOUTF8
811
812 static int
813 ord2utf8(int cvalue, uschar *utf8bytes)
814 {
815 register int i, j;
816 for (i = 0; i < utf8_table1_size; i++)
817 if (cvalue <= utf8_table1[i]) break;
818 utf8bytes += i;
819 for (j = i; j > 0; j--)
820 {
821 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
822 cvalue >>= 6;
823 }
824 *utf8bytes = utf8_table2[i] | cvalue;
825 return i + 1;
826 }
827
828 #endif
829
830
831
832 /*************************************************
833 * Print character string *
834 *************************************************/
835
836 /* Character string printing function. Must handle UTF-8 strings in utf8
837 mode. Yields number of characters printed. If handed a NULL file, just counts
838 chars without printing. */
839
840 static int pchars(unsigned char *p, int length, FILE *f)
841 {
842 int c = 0;
843 int yield = 0;
844
845 while (length-- > 0)
846 {
847 #if !defined NOUTF8
848 if (use_utf8)
849 {
850 int rc = utf82ord(p, &c);
851
852 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
853 {
854 length -= rc - 1;
855 p += rc;
856 if (PRINTHEX(c))
857 {
858 if (f != NULL) fprintf(f, "%c", c);
859 yield++;
860 }
861 else
862 {
863 int n = 4;
864 if (f != NULL) fprintf(f, "\\x{%02x}", c);
865 yield += (n <= 0x000000ff)? 2 :
866 (n <= 0x00000fff)? 3 :
867 (n <= 0x0000ffff)? 4 :
868 (n <= 0x000fffff)? 5 : 6;
869 }
870 continue;
871 }
872 }
873 #endif
874
875 /* Not UTF-8, or malformed UTF-8 */
876
877 c = *p++;
878 if (PRINTHEX(c))
879 {
880 if (f != NULL) fprintf(f, "%c", c);
881 yield++;
882 }
883 else
884 {
885 if (f != NULL) fprintf(f, "\\x%02x", c);
886 yield += 4;
887 }
888 }
889
890 return yield;
891 }
892
893
894
895 /*************************************************
896 * Callout function *
897 *************************************************/
898
899 /* Called from PCRE as a result of the (?C) item. We print out where we are in
900 the match. Yield zero unless more callouts than the fail count, or the callout
901 data is not zero. */
902
903 static int callout(pcre_callout_block *cb)
904 {
905 FILE *f = (first_callout | callout_extra)? outfile : NULL;
906 int i, pre_start, post_start, subject_length;
907
908 if (callout_extra)
909 {
910 fprintf(f, "Callout %d: last capture = %d\n",
911 cb->callout_number, cb->capture_last);
912
913 for (i = 0; i < cb->capture_top * 2; i += 2)
914 {
915 if (cb->offset_vector[i] < 0)
916 fprintf(f, "%2d: <unset>\n", i/2);
917 else
918 {
919 fprintf(f, "%2d: ", i/2);
920 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
921 cb->offset_vector[i+1] - cb->offset_vector[i], f);
922 fprintf(f, "\n");
923 }
924 }
925 }
926
927 /* Re-print the subject in canonical form, the first time or if giving full
928 datails. On subsequent calls in the same match, we use pchars just to find the
929 printed lengths of the substrings. */
930
931 if (f != NULL) fprintf(f, "--->");
932
933 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
934 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
935 cb->current_position - cb->start_match, f);
936
937 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
938
939 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
940 cb->subject_length - cb->current_position, f);
941
942 if (f != NULL) fprintf(f, "\n");
943
944 /* Always print appropriate indicators, with callout number if not already
945 shown. For automatic callouts, show the pattern offset. */
946
947 if (cb->callout_number == 255)
948 {
949 fprintf(outfile, "%+3d ", cb->pattern_position);
950 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
951 }
952 else
953 {
954 if (callout_extra) fprintf(outfile, " ");
955 else fprintf(outfile, "%3d ", cb->callout_number);
956 }
957
958 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
959 fprintf(outfile, "^");
960
961 if (post_start > 0)
962 {
963 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
964 fprintf(outfile, "^");
965 }
966
967 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
968 fprintf(outfile, " ");
969
970 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
971 pbuffer + cb->pattern_position);
972
973 fprintf(outfile, "\n");
974 first_callout = 0;
975
976 if (cb->mark != last_callout_mark)
977 {
978 fprintf(outfile, "Latest Mark: %s\n",
979 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
980 last_callout_mark = cb->mark;
981 }
982
983 if (cb->callout_data != NULL)
984 {
985 int callout_data = *((int *)(cb->callout_data));
986 if (callout_data != 0)
987 {
988 fprintf(outfile, "Callout data = %d\n", callout_data);
989 return callout_data;
990 }
991 }
992
993 return (cb->callout_number != callout_fail_id)? 0 :
994 (++callout_count >= callout_fail_count)? 1 : 0;
995 }
996
997
998 /*************************************************
999 * Local malloc functions *
1000 *************************************************/
1001
1002 /* Alternative malloc function, to test functionality and save the size of a
1003 compiled re, which is the first store request that pcre_compile() makes. The
1004 show_malloc variable is set only during matching. */
1005
1006 static void *new_malloc(size_t size)
1007 {
1008 void *block = malloc(size);
1009 gotten_store = size;
1010 if (first_gotten_store == 0) first_gotten_store = size;
1011 if (show_malloc)
1012 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1013 return block;
1014 }
1015
1016 static void new_free(void *block)
1017 {
1018 if (show_malloc)
1019 fprintf(outfile, "free %p\n", block);
1020 free(block);
1021 }
1022
1023 /* For recursion malloc/free, to test stacking calls */
1024
1025 static void *stack_malloc(size_t size)
1026 {
1027 void *block = malloc(size);
1028 if (show_malloc)
1029 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1030 return block;
1031 }
1032
1033 static void stack_free(void *block)
1034 {
1035 if (show_malloc)
1036 fprintf(outfile, "stack_free %p\n", block);
1037 free(block);
1038 }
1039
1040
1041 /*************************************************
1042 * Call pcre_fullinfo() *
1043 *************************************************/
1044
1045 /* Get one piece of information from the pcre_fullinfo() function */
1046
1047 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1048 {
1049 int rc;
1050 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1051 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1052 }
1053
1054
1055
1056 /*************************************************
1057 * Byte flipping function *
1058 *************************************************/
1059
1060 static unsigned long int
1061 byteflip(unsigned long int value, int n)
1062 {
1063 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1064 return ((value & 0x000000ff) << 24) |
1065 ((value & 0x0000ff00) << 8) |
1066 ((value & 0x00ff0000) >> 8) |
1067 ((value & 0xff000000) >> 24);
1068 }
1069
1070
1071
1072
1073 /*************************************************
1074 * Check match or recursion limit *
1075 *************************************************/
1076
1077 static int
1078 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1079 int start_offset, int options, int *use_offsets, int use_size_offsets,
1080 int flag, unsigned long int *limit, int errnumber, const char *msg)
1081 {
1082 int count;
1083 int min = 0;
1084 int mid = 64;
1085 int max = -1;
1086
1087 extra->flags |= flag;
1088
1089 for (;;)
1090 {
1091 *limit = mid;
1092
1093 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1094 use_offsets, use_size_offsets);
1095
1096 if (count == errnumber)
1097 {
1098 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099 min = mid;
1100 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1101 }
1102
1103 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1104 count == PCRE_ERROR_PARTIAL)
1105 {
1106 if (mid == min + 1)
1107 {
1108 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1109 break;
1110 }
1111 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1112 max = mid;
1113 mid = (min + mid)/2;
1114 }
1115 else break; /* Some other error */
1116 }
1117
1118 extra->flags &= ~flag;
1119 return count;
1120 }
1121
1122
1123
1124 /*************************************************
1125 * Case-independent strncmp() function *
1126 *************************************************/
1127
1128 /*
1129 Arguments:
1130 s first string
1131 t second string
1132 n number of characters to compare
1133
1134 Returns: < 0, = 0, or > 0, according to the comparison
1135 */
1136
1137 static int
1138 strncmpic(uschar *s, uschar *t, int n)
1139 {
1140 while (n--)
1141 {
1142 int c = tolower(*s++) - tolower(*t++);
1143 if (c) return c;
1144 }
1145 return 0;
1146 }
1147
1148
1149
1150 /*************************************************
1151 * Check newline indicator *
1152 *************************************************/
1153
1154 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1155 a message and return 0 if there is no match.
1156
1157 Arguments:
1158 p points after the leading '<'
1159 f file for error message
1160
1161 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1162 */
1163
1164 static int
1165 check_newline(uschar *p, FILE *f)
1166 {
1167 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1168 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1169 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1170 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1171 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1172 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1173 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1174 fprintf(f, "Unknown newline type at: <%s\n", p);
1175 return 0;
1176 }
1177
1178
1179
1180 /*************************************************
1181 * Usage function *
1182 *************************************************/
1183
1184 static void
1185 usage(void)
1186 {
1187 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1188 printf("Input and output default to stdin and stdout.\n");
1189 #ifdef SUPPORT_LIBREADLINE
1190 printf("If input is a terminal, readline() is used to read from it.\n");
1191 #else
1192 printf("This version of pcretest is not linked with readline().\n");
1193 #endif
1194 printf("\nOptions:\n");
1195 printf(" -b show compiled code (bytecode)\n");
1196 printf(" -C show PCRE compile-time options and exit\n");
1197 printf(" -d debug: show compiled code and information (-b and -i)\n");
1198 #if !defined NODFA
1199 printf(" -dfa force DFA matching for all subjects\n");
1200 #endif
1201 printf(" -help show usage information\n");
1202 printf(" -i show information about compiled patterns\n"
1203 " -M find MATCH_LIMIT minimum for each subject\n"
1204 " -m output memory used information\n"
1205 " -o <n> set size of offsets vector to <n>\n");
1206 #if !defined NOPOSIX
1207 printf(" -p use POSIX interface\n");
1208 #endif
1209 printf(" -q quiet: do not output PCRE version number at start\n");
1210 printf(" -S <n> set stack size to <n> megabytes\n");
1211 printf(" -s force each pattern to be studied at basic level\n"
1212 " -s+ force each pattern to be studied, using JIT if available\n"
1213 " -t time compilation and execution\n");
1214 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1215 printf(" -tm time execution (matching) only\n");
1216 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1217 }
1218
1219
1220
1221 /*************************************************
1222 * Main Program *
1223 *************************************************/
1224
1225 /* Read lines from named file or stdin and write to named file or stdout; lines
1226 consist of a regular expression, in delimiters and optionally followed by
1227 options, followed by a set of test data, terminated by an empty line. */
1228
1229 int main(int argc, char **argv)
1230 {
1231 FILE *infile = stdin;
1232 int options = 0;
1233 int study_options = 0;
1234 int default_find_match_limit = FALSE;
1235 int op = 1;
1236 int timeit = 0;
1237 int timeitm = 0;
1238 int showinfo = 0;
1239 int showstore = 0;
1240 int force_study = -1;
1241 int force_study_options = 0;
1242 int quiet = 0;
1243 int size_offsets = 45;
1244 int size_offsets_max;
1245 int *offsets = NULL;
1246 #if !defined NOPOSIX
1247 int posix = 0;
1248 #endif
1249 int debug = 0;
1250 int done = 0;
1251 int all_use_dfa = 0;
1252 int yield = 0;
1253 int stack_size;
1254
1255 pcre_jit_stack *jit_stack = NULL;
1256
1257
1258 /* These vectors store, end-to-end, a list of captured substring names. Assume
1259 that 1024 is plenty long enough for the few names we'll be testing. */
1260
1261 uschar copynames[1024];
1262 uschar getnames[1024];
1263
1264 uschar *copynamesptr;
1265 uschar *getnamesptr;
1266
1267 /* Get buffers from malloc() so that Electric Fence will check their misuse
1268 when I am debugging. They grow automatically when very long lines are read. */
1269
1270 buffer = (unsigned char *)malloc(buffer_size);
1271 dbuffer = (unsigned char *)malloc(buffer_size);
1272 pbuffer = (unsigned char *)malloc(buffer_size);
1273
1274 /* The outfile variable is static so that new_malloc can use it. */
1275
1276 outfile = stdout;
1277
1278 /* The following _setmode() stuff is some Windows magic that tells its runtime
1279 library to translate CRLF into a single LF character. At least, that's what
1280 I've been told: never having used Windows I take this all on trust. Originally
1281 it set 0x8000, but then I was advised that _O_BINARY was better. */
1282
1283 #if defined(_WIN32) || defined(WIN32)
1284 _setmode( _fileno( stdout ), _O_BINARY );
1285 #endif
1286
1287 /* Scan options */
1288
1289 while (argc > 1 && argv[op][0] == '-')
1290 {
1291 unsigned char *endptr;
1292
1293 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1294 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1295 else if (strcmp(argv[op], "-s+") == 0)
1296 {
1297 force_study = 1;
1298 force_study_options = PCRE_STUDY_JIT_COMPILE;
1299 }
1300 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1301 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1302 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1303 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1304 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1305 #if !defined NODFA
1306 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1307 #endif
1308 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1309 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1310 *endptr == 0))
1311 {
1312 op++;
1313 argc--;
1314 }
1315 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1316 {
1317 int both = argv[op][2] == 0;
1318 int temp;
1319 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1320 *endptr == 0))
1321 {
1322 timeitm = temp;
1323 op++;
1324 argc--;
1325 }
1326 else timeitm = LOOPREPEAT;
1327 if (both) timeit = timeitm;
1328 }
1329 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1330 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1331 *endptr == 0))
1332 {
1333 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1334 printf("PCRE: -S not supported on this OS\n");
1335 exit(1);
1336 #else
1337 int rc;
1338 struct rlimit rlim;
1339 getrlimit(RLIMIT_STACK, &rlim);
1340 rlim.rlim_cur = stack_size * 1024 * 1024;
1341 rc = setrlimit(RLIMIT_STACK, &rlim);
1342 if (rc != 0)
1343 {
1344 printf("PCRE: setrlimit() failed with error %d\n", rc);
1345 exit(1);
1346 }
1347 op++;
1348 argc--;
1349 #endif
1350 }
1351 #if !defined NOPOSIX
1352 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1353 #endif
1354 else if (strcmp(argv[op], "-C") == 0)
1355 {
1356 int rc;
1357 unsigned long int lrc;
1358 printf("PCRE version %s\n", pcre_version());
1359 printf("Compiled with\n");
1360 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1361 printf(" %sUTF-8 support\n", rc? "" : "No ");
1362 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1363 printf(" %sUnicode properties support\n", rc? "" : "No ");
1364 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1365 if (rc)
1366 printf(" Just-in-time compiler support\n");
1367 else
1368 printf(" No just-in-time compiler support\n");
1369 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1370 /* Note that these values are always the ASCII values, even
1371 in EBCDIC environments. CR is 13 and NL is 10. */
1372 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1373 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1374 (rc == -2)? "ANYCRLF" :
1375 (rc == -1)? "ANY" : "???");
1376 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1377 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1378 "all Unicode newlines");
1379 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1380 printf(" Internal link size = %d\n", rc);
1381 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1382 printf(" POSIX malloc threshold = %d\n", rc);
1383 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1384 printf(" Default match limit = %ld\n", lrc);
1385 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1386 printf(" Default recursion depth limit = %ld\n", lrc);
1387 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1388 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1389 goto EXIT;
1390 }
1391 else if (strcmp(argv[op], "-help") == 0 ||
1392 strcmp(argv[op], "--help") == 0)
1393 {
1394 usage();
1395 goto EXIT;
1396 }
1397 else
1398 {
1399 printf("** Unknown or malformed option %s\n", argv[op]);
1400 usage();
1401 yield = 1;
1402 goto EXIT;
1403 }
1404 op++;
1405 argc--;
1406 }
1407
1408 /* Get the store for the offsets vector, and remember what it was */
1409
1410 size_offsets_max = size_offsets;
1411 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1412 if (offsets == NULL)
1413 {
1414 printf("** Failed to get %d bytes of memory for offsets vector\n",
1415 (int)(size_offsets_max * sizeof(int)));
1416 yield = 1;
1417 goto EXIT;
1418 }
1419
1420 /* Sort out the input and output files */
1421
1422 if (argc > 1)
1423 {
1424 infile = fopen(argv[op], INPUT_MODE);
1425 if (infile == NULL)
1426 {
1427 printf("** Failed to open %s\n", argv[op]);
1428 yield = 1;
1429 goto EXIT;
1430 }
1431 }
1432
1433 if (argc > 2)
1434 {
1435 outfile = fopen(argv[op+1], OUTPUT_MODE);
1436 if (outfile == NULL)
1437 {
1438 printf("** Failed to open %s\n", argv[op+1]);
1439 yield = 1;
1440 goto EXIT;
1441 }
1442 }
1443
1444 /* Set alternative malloc function */
1445
1446 pcre_malloc = new_malloc;
1447 pcre_free = new_free;
1448 pcre_stack_malloc = stack_malloc;
1449 pcre_stack_free = stack_free;
1450
1451 /* Heading line unless quiet, then prompt for first regex if stdin */
1452
1453 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1454
1455 /* Main loop */
1456
1457 while (!done)
1458 {
1459 pcre *re = NULL;
1460 pcre_extra *extra = NULL;
1461
1462 #if !defined NOPOSIX /* There are still compilers that require no indent */
1463 regex_t preg;
1464 int do_posix = 0;
1465 #endif
1466
1467 const char *error;
1468 unsigned char *markptr;
1469 unsigned char *p, *pp, *ppp;
1470 unsigned char *to_file = NULL;
1471 const unsigned char *tables = NULL;
1472 unsigned long int true_size, true_study_size = 0;
1473 size_t size, regex_gotten_store;
1474 int do_allcaps = 0;
1475 int do_mark = 0;
1476 int do_study = 0;
1477 int no_force_study = 0;
1478 int do_debug = debug;
1479 int do_G = 0;
1480 int do_g = 0;
1481 int do_showinfo = showinfo;
1482 int do_showrest = 0;
1483 int do_showcaprest = 0;
1484 int do_flip = 0;
1485 int erroroffset, len, delimiter, poffset;
1486
1487 use_utf8 = 0;
1488 debug_lengths = 1;
1489
1490 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1491 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1492 fflush(outfile);
1493
1494 p = buffer;
1495 while (isspace(*p)) p++;
1496 if (*p == 0) continue;
1497
1498 /* See if the pattern is to be loaded pre-compiled from a file. */
1499
1500 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1501 {
1502 unsigned long int magic, get_options;
1503 uschar sbuf[8];
1504 FILE *f;
1505
1506 p++;
1507 pp = p + (int)strlen((char *)p);
1508 while (isspace(pp[-1])) pp--;
1509 *pp = 0;
1510
1511 f = fopen((char *)p, "rb");
1512 if (f == NULL)
1513 {
1514 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1515 continue;
1516 }
1517
1518 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1519
1520 true_size =
1521 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1522 true_study_size =
1523 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1524
1525 re = (real_pcre *)new_malloc(true_size);
1526 regex_gotten_store = first_gotten_store;
1527
1528 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1529
1530 magic = ((real_pcre *)re)->magic_number;
1531 if (magic != MAGIC_NUMBER)
1532 {
1533 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1534 {
1535 do_flip = 1;
1536 }
1537 else
1538 {
1539 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1540 fclose(f);
1541 continue;
1542 }
1543 }
1544
1545 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1546 do_flip? " (byte-inverted)" : "", p);
1547
1548 /* Need to know if UTF-8 for printing data strings */
1549
1550 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1551 use_utf8 = (get_options & PCRE_UTF8) != 0;
1552
1553 /* Now see if there is any following study data. */
1554
1555 if (true_study_size != 0)
1556 {
1557 pcre_study_data *psd;
1558
1559 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1560 extra->flags = PCRE_EXTRA_STUDY_DATA;
1561
1562 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1563 extra->study_data = psd;
1564
1565 if (fread(psd, 1, true_study_size, f) != true_study_size)
1566 {
1567 FAIL_READ:
1568 fprintf(outfile, "Failed to read data from %s\n", p);
1569 if (extra != NULL) pcre_free_study(extra);
1570 if (re != NULL) new_free(re);
1571 fclose(f);
1572 continue;
1573 }
1574 fprintf(outfile, "Study data loaded from %s\n", p);
1575 do_study = 1; /* To get the data output if requested */
1576 }
1577 else fprintf(outfile, "No study data\n");
1578
1579 fclose(f);
1580 goto SHOW_INFO;
1581 }
1582
1583 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1584 the pattern; if is isn't complete, read more. */
1585
1586 delimiter = *p++;
1587
1588 if (isalnum(delimiter) || delimiter == '\\')
1589 {
1590 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1591 goto SKIP_DATA;
1592 }
1593
1594 pp = p;
1595 poffset = (int)(p - buffer);
1596
1597 for(;;)
1598 {
1599 while (*pp != 0)
1600 {
1601 if (*pp == '\\' && pp[1] != 0) pp++;
1602 else if (*pp == delimiter) break;
1603 pp++;
1604 }
1605 if (*pp != 0) break;
1606 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1607 {
1608 fprintf(outfile, "** Unexpected EOF\n");
1609 done = 1;
1610 goto CONTINUE;
1611 }
1612 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1613 }
1614
1615 /* The buffer may have moved while being extended; reset the start of data
1616 pointer to the correct relative point in the buffer. */
1617
1618 p = buffer + poffset;
1619
1620 /* If the first character after the delimiter is backslash, make
1621 the pattern end with backslash. This is purely to provide a way
1622 of testing for the error message when a pattern ends with backslash. */
1623
1624 if (pp[1] == '\\') *pp++ = '\\';
1625
1626 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1627 for callouts. */
1628
1629 *pp++ = 0;
1630 strcpy((char *)pbuffer, (char *)p);
1631
1632 /* Look for options after final delimiter */
1633
1634 options = 0;
1635 study_options = 0;
1636 log_store = showstore; /* default from command line */
1637
1638 while (*pp != 0)
1639 {
1640 switch (*pp++)
1641 {
1642 case 'f': options |= PCRE_FIRSTLINE; break;
1643 case 'g': do_g = 1; break;
1644 case 'i': options |= PCRE_CASELESS; break;
1645 case 'm': options |= PCRE_MULTILINE; break;
1646 case 's': options |= PCRE_DOTALL; break;
1647 case 'x': options |= PCRE_EXTENDED; break;
1648
1649 case '+':
1650 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1651 break;
1652
1653 case '=': do_allcaps = 1; break;
1654 case 'A': options |= PCRE_ANCHORED; break;
1655 case 'B': do_debug = 1; break;
1656 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1657 case 'D': do_debug = do_showinfo = 1; break;
1658 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1659 case 'F': do_flip = 1; break;
1660 case 'G': do_G = 1; break;
1661 case 'I': do_showinfo = 1; break;
1662 case 'J': options |= PCRE_DUPNAMES; break;
1663 case 'K': do_mark = 1; break;
1664 case 'M': log_store = 1; break;
1665 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1666
1667 #if !defined NOPOSIX
1668 case 'P': do_posix = 1; break;
1669 #endif
1670
1671 case 'S':
1672 if (do_study == 0)
1673 {
1674 do_study = 1;
1675 if (*pp == '+')
1676 {
1677 study_options |= PCRE_STUDY_JIT_COMPILE;
1678 pp++;
1679 }
1680 }
1681 else
1682 {
1683 do_study = 0;
1684 no_force_study = 1;
1685 }
1686 break;
1687
1688 case 'U': options |= PCRE_UNGREEDY; break;
1689 case 'W': options |= PCRE_UCP; break;
1690 case 'X': options |= PCRE_EXTRA; break;
1691 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1692 case 'Z': debug_lengths = 0; break;
1693 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1694 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1695
1696 case 'T':
1697 switch (*pp++)
1698 {
1699 case '0': tables = tables0; break;
1700 case '1': tables = tables1; break;
1701
1702 case '\r':
1703 case '\n':
1704 case ' ':
1705 case 0:
1706 fprintf(outfile, "** Missing table number after /T\n");
1707 goto SKIP_DATA;
1708
1709 default:
1710 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1711 goto SKIP_DATA;
1712 }
1713 break;
1714
1715 case 'L':
1716 ppp = pp;
1717 /* The '\r' test here is so that it works on Windows. */
1718 /* The '0' test is just in case this is an unterminated line. */
1719 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1720 *ppp = 0;
1721 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1722 {
1723 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1724 goto SKIP_DATA;
1725 }
1726 locale_set = 1;
1727 tables = pcre_maketables();
1728 pp = ppp;
1729 break;
1730
1731 case '>':
1732 to_file = pp;
1733 while (*pp != 0) pp++;
1734 while (isspace(pp[-1])) pp--;
1735 *pp = 0;
1736 break;
1737
1738 case '<':
1739 {
1740 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1741 {
1742 options |= PCRE_JAVASCRIPT_COMPAT;
1743 pp += 3;
1744 }
1745 else
1746 {
1747 int x = check_newline(pp, outfile);
1748 if (x == 0) goto SKIP_DATA;
1749 options |= x;
1750 while (*pp++ != '>');
1751 }
1752 }
1753 break;
1754
1755 case '\r': /* So that it works in Windows */
1756 case '\n':
1757 case ' ':
1758 break;
1759
1760 default:
1761 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1762 goto SKIP_DATA;
1763 }
1764 }
1765
1766 /* Handle compiling via the POSIX interface, which doesn't support the
1767 timing, showing, or debugging options, nor the ability to pass over
1768 local character tables. */
1769
1770 #if !defined NOPOSIX
1771 if (posix || do_posix)
1772 {
1773 int rc;
1774 int cflags = 0;
1775
1776 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1777 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1778 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1779 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1780 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1781 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1782 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1783
1784 first_gotten_store = 0;
1785 rc = regcomp(&preg, (char *)p, cflags);
1786
1787 /* Compilation failed; go back for another re, skipping to blank line
1788 if non-interactive. */
1789
1790 if (rc != 0)
1791 {
1792 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1793 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1794 goto SKIP_DATA;
1795 }
1796 }
1797
1798 /* Handle compiling via the native interface */
1799
1800 else
1801 #endif /* !defined NOPOSIX */
1802
1803 {
1804 unsigned long int get_options;
1805
1806 if (timeit > 0)
1807 {
1808 register int i;
1809 clock_t time_taken;
1810 clock_t start_time = clock();
1811 for (i = 0; i < timeit; i++)
1812 {
1813 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1814 if (re != NULL) free(re);
1815 }
1816 time_taken = clock() - start_time;
1817 fprintf(outfile, "Compile time %.4f milliseconds\n",
1818 (((double)time_taken * 1000.0) / (double)timeit) /
1819 (double)CLOCKS_PER_SEC);
1820 }
1821
1822 first_gotten_store = 0;
1823 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1824
1825 /* Compilation failed; go back for another re, skipping to blank line
1826 if non-interactive. */
1827
1828 if (re == NULL)
1829 {
1830 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1831 SKIP_DATA:
1832 if (infile != stdin)
1833 {
1834 for (;;)
1835 {
1836 if (extend_inputline(infile, buffer, NULL) == NULL)
1837 {
1838 done = 1;
1839 goto CONTINUE;
1840 }
1841 len = (int)strlen((char *)buffer);
1842 while (len > 0 && isspace(buffer[len-1])) len--;
1843 if (len == 0) break;
1844 }
1845 fprintf(outfile, "\n");
1846 }
1847 goto CONTINUE;
1848 }
1849
1850 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1851 within the regex; check for this so that we know how to process the data
1852 lines. */
1853
1854 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1855 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1856
1857 /* Extract the size for possible writing before possibly flipping it,
1858 and remember the store that was got. */
1859
1860 true_size = ((real_pcre *)re)->size;
1861 regex_gotten_store = first_gotten_store;
1862
1863 /* Output code size information if requested */
1864
1865 if (log_store)
1866 fprintf(outfile, "Memory allocation (code space): %d\n",
1867 (int)(first_gotten_store -
1868 sizeof(real_pcre) -
1869 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1870
1871 /* If -s or /S was present, study the regex to generate additional info to
1872 help with the matching, unless the pattern has the SS option, which
1873 suppresses the effect of /S (used for a few test patterns where studying is
1874 never sensible). */
1875
1876 if (do_study || (force_study >= 0 && !no_force_study))
1877 {
1878 if (timeit > 0)
1879 {
1880 register int i;
1881 clock_t time_taken;
1882 clock_t start_time = clock();
1883 for (i = 0; i < timeit; i++)
1884 extra = pcre_study(re, study_options | force_study_options, &error);
1885 time_taken = clock() - start_time;
1886 if (extra != NULL) pcre_free_study(extra);
1887 fprintf(outfile, " Study time %.4f milliseconds\n",
1888 (((double)time_taken * 1000.0) / (double)timeit) /
1889 (double)CLOCKS_PER_SEC);
1890 }
1891 extra = pcre_study(re, study_options | force_study_options, &error);
1892 if (error != NULL)
1893 fprintf(outfile, "Failed to study: %s\n", error);
1894 else if (extra != NULL)
1895 {
1896 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1897 if (log_store)
1898 {
1899 size_t jitsize;
1900 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
1901 if (jitsize != 0)
1902 fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
1903 }
1904 }
1905 }
1906
1907 /* If /K was present, we set up for handling MARK data. */
1908
1909 if (do_mark)
1910 {
1911 if (extra == NULL)
1912 {
1913 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1914 extra->flags = 0;
1915 }
1916 extra->mark = &markptr;
1917 extra->flags |= PCRE_EXTRA_MARK;
1918 }
1919
1920 /* If the 'F' option was present, we flip the bytes of all the integer
1921 fields in the regex data block and the study block. This is to make it
1922 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1923 compiled on a different architecture. */
1924
1925 if (do_flip)
1926 {
1927 real_pcre *rre = (real_pcre *)re;
1928 rre->magic_number =
1929 byteflip(rre->magic_number, sizeof(rre->magic_number));
1930 rre->size = byteflip(rre->size, sizeof(rre->size));
1931 rre->options = byteflip(rre->options, sizeof(rre->options));
1932 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1933 rre->top_bracket =
1934 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1935 rre->top_backref =
1936 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1937 rre->first_byte =
1938 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1939 rre->req_byte =
1940 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1941 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1942 sizeof(rre->name_table_offset));
1943 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1944 sizeof(rre->name_entry_size));
1945 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1946 sizeof(rre->name_count));
1947
1948 if (extra != NULL)
1949 {
1950 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1951 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1952 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1953 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1954 }
1955 }
1956
1957 /* Extract information from the compiled data if required. There are now
1958 two info-returning functions. The old one has a limited interface and
1959 returns only limited data. Check that it agrees with the newer one. */
1960
1961 SHOW_INFO:
1962
1963 if (do_debug)
1964 {
1965 fprintf(outfile, "------------------------------------------------------------------\n");
1966 pcre_printint(re, outfile, debug_lengths);
1967 }
1968
1969 /* We already have the options in get_options (see above) */
1970
1971 if (do_showinfo)
1972 {
1973 unsigned long int all_options;
1974 #if !defined NOINFOCHECK
1975 int old_first_char, old_options, old_count;
1976 #endif
1977 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1978 hascrorlf;
1979 int nameentrysize, namecount;
1980 const uschar *nametable;
1981
1982 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1983 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1984 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1985 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1986 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1987 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1988 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1989 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1990 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1991 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1992 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1993
1994 #if !defined NOINFOCHECK
1995 old_count = pcre_info(re, &old_options, &old_first_char);
1996 if (count < 0) fprintf(outfile,
1997 "Error %d from pcre_info()\n", count);
1998 else
1999 {
2000 if (old_count != count) fprintf(outfile,
2001 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2002 old_count);
2003
2004 if (old_first_char != first_char) fprintf(outfile,
2005 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2006 first_char, old_first_char);
2007
2008 if (old_options != (int)get_options) fprintf(outfile,
2009 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2010 get_options, old_options);
2011 }
2012 #endif
2013
2014 if (size != regex_gotten_store) fprintf(outfile,
2015 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2016 (int)size, (int)regex_gotten_store);
2017
2018 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2019 if (backrefmax > 0)
2020 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2021
2022 if (namecount > 0)
2023 {
2024 fprintf(outfile, "Named capturing subpatterns:\n");
2025 while (namecount-- > 0)
2026 {
2027 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2028 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2029 GET2(nametable, 0));
2030 nametable += nameentrysize;
2031 }
2032 }
2033
2034 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2035 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2036
2037 all_options = ((real_pcre *)re)->options;
2038 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2039
2040 if (get_options == 0) fprintf(outfile, "No options\n");
2041 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2042 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2043 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2044 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2045 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2046 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2047 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2048 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2049 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2050 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2051 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2052 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2053 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2054 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2055 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2056 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2057 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2058 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2059
2060 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2061
2062 switch (get_options & PCRE_NEWLINE_BITS)
2063 {
2064 case PCRE_NEWLINE_CR:
2065 fprintf(outfile, "Forced newline sequence: CR\n");
2066 break;
2067
2068 case PCRE_NEWLINE_LF:
2069 fprintf(outfile, "Forced newline sequence: LF\n");
2070 break;
2071
2072 case PCRE_NEWLINE_CRLF:
2073 fprintf(outfile, "Forced newline sequence: CRLF\n");
2074 break;
2075
2076 case PCRE_NEWLINE_ANYCRLF:
2077 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2078 break;
2079
2080 case PCRE_NEWLINE_ANY:
2081 fprintf(outfile, "Forced newline sequence: ANY\n");
2082 break;
2083
2084 default:
2085 break;
2086 }
2087
2088 if (first_char == -1)
2089 {
2090 fprintf(outfile, "First char at start or follows newline\n");
2091 }
2092 else if (first_char < 0)
2093 {
2094 fprintf(outfile, "No first char\n");
2095 }
2096 else
2097 {
2098 int ch = first_char & 255;
2099 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2100 "" : " (caseless)";
2101 if (PRINTHEX(ch))
2102 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2103 else
2104 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2105 }
2106
2107 if (need_char < 0)
2108 {
2109 fprintf(outfile, "No need char\n");
2110 }
2111 else
2112 {
2113 int ch = need_char & 255;
2114 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2115 "" : " (caseless)";
2116 if (PRINTHEX(ch))
2117 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2118 else
2119 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2120 }
2121
2122 /* Don't output study size; at present it is in any case a fixed
2123 value, but it varies, depending on the computer architecture, and
2124 so messes up the test suite. (And with the /F option, it might be
2125 flipped.) If study was forced by an external -s, don't show this
2126 information unless -i or -d was also present. This means that, except
2127 when auto-callouts are involved, the output from runs with and without
2128 -s should be identical. */
2129
2130 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2131 {
2132 if (extra == NULL)
2133 fprintf(outfile, "Study returned NULL\n");
2134 else
2135 {
2136 uschar *start_bits = NULL;
2137 int minlength;
2138
2139 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2140 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2141
2142 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2143 if (start_bits == NULL)
2144 fprintf(outfile, "No set of starting bytes\n");
2145 else
2146 {
2147 int i;
2148 int c = 24;
2149 fprintf(outfile, "Starting byte set: ");
2150 for (i = 0; i < 256; i++)
2151 {
2152 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2153 {
2154 if (c > 75)
2155 {
2156 fprintf(outfile, "\n ");
2157 c = 2;
2158 }
2159 if (PRINTHEX(i) && i != ' ')
2160 {
2161 fprintf(outfile, "%c ", i);
2162 c += 2;
2163 }
2164 else
2165 {
2166 fprintf(outfile, "\\x%02x ", i);
2167 c += 5;
2168 }
2169 }
2170 }
2171 fprintf(outfile, "\n");
2172 }
2173 }
2174
2175 /* Show this only if the JIT was set by /S, not by -s. */
2176
2177 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2178 {
2179 int jit;
2180 new_info(re, extra, PCRE_INFO_JIT, &jit);
2181 if (jit)
2182 fprintf(outfile, "JIT study was successful\n");
2183 else
2184 #ifdef SUPPORT_JIT
2185 fprintf(outfile, "JIT study was not successful\n");
2186 #else
2187 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2188 #endif
2189 }
2190 }
2191 }
2192
2193 /* If the '>' option was present, we write out the regex to a file, and
2194 that is all. The first 8 bytes of the file are the regex length and then
2195 the study length, in big-endian order. */
2196
2197 if (to_file != NULL)
2198 {
2199 FILE *f = fopen((char *)to_file, "wb");
2200 if (f == NULL)
2201 {
2202 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2203 }
2204 else
2205 {
2206 uschar sbuf[8];
2207 sbuf[0] = (uschar)((true_size >> 24) & 255);
2208 sbuf[1] = (uschar)((true_size >> 16) & 255);
2209 sbuf[2] = (uschar)((true_size >> 8) & 255);
2210 sbuf[3] = (uschar)((true_size) & 255);
2211
2212 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2213 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2214 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2215 sbuf[7] = (uschar)((true_study_size) & 255);
2216
2217 if (fwrite(sbuf, 1, 8, f) < 8 ||
2218 fwrite(re, 1, true_size, f) < true_size)
2219 {
2220 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2221 }
2222 else
2223 {
2224 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2225
2226 /* If there is study data, write it. */
2227
2228 if (extra != NULL)
2229 {
2230 if (fwrite(extra->study_data, 1, true_study_size, f) <
2231 true_study_size)
2232 {
2233 fprintf(outfile, "Write error on %s: %s\n", to_file,
2234 strerror(errno));
2235 }
2236 else fprintf(outfile, "Study data written to %s\n", to_file);
2237 }
2238 }
2239 fclose(f);
2240 }
2241
2242 new_free(re);
2243 if (extra != NULL) pcre_free_study(extra);
2244 if (locale_set)
2245 {
2246 new_free((void *)tables);
2247 setlocale(LC_CTYPE, "C");
2248 locale_set = 0;
2249 }
2250 continue; /* With next regex */
2251 }
2252 } /* End of non-POSIX compile */
2253
2254 /* Read data lines and test them */
2255
2256 for (;;)
2257 {
2258 uschar *q;
2259 uschar *bptr;
2260 int *use_offsets = offsets;
2261 int use_size_offsets = size_offsets;
2262 int callout_data = 0;
2263 int callout_data_set = 0;
2264 int count, c;
2265 int copystrings = 0;
2266 int find_match_limit = default_find_match_limit;
2267 int getstrings = 0;
2268 int getlist = 0;
2269 int gmatched = 0;
2270 int start_offset = 0;
2271 int start_offset_sign = 1;
2272 int g_notempty = 0;
2273 int use_dfa = 0;
2274
2275 options = 0;
2276
2277 *copynames = 0;
2278 *getnames = 0;
2279
2280 copynamesptr = copynames;
2281 getnamesptr = getnames;
2282
2283 pcre_callout = callout;
2284 first_callout = 1;
2285 last_callout_mark = NULL;
2286 callout_extra = 0;
2287 callout_count = 0;
2288 callout_fail_count = 999999;
2289 callout_fail_id = -1;
2290 show_malloc = 0;
2291
2292 if (extra != NULL) extra->flags &=
2293 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2294
2295 len = 0;
2296 for (;;)
2297 {
2298 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2299 {
2300 if (len > 0) /* Reached EOF without hitting a newline */
2301 {
2302 fprintf(outfile, "\n");
2303 break;
2304 }
2305 done = 1;
2306 goto CONTINUE;
2307 }
2308 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2309 len = (int)strlen((char *)buffer);
2310 if (buffer[len-1] == '\n') break;
2311 }
2312
2313 while (len > 0 && isspace(buffer[len-1])) len--;
2314 buffer[len] = 0;
2315 if (len == 0) break;
2316
2317 p = buffer;
2318 while (isspace(*p)) p++;
2319
2320 bptr = q = dbuffer;
2321 while ((c = *p++) != 0)
2322 {
2323 int i = 0;
2324 int n = 0;
2325
2326 if (c == '\\') switch ((c = *p++))
2327 {
2328 case 'a': c = 7; break;
2329 case 'b': c = '\b'; break;
2330 case 'e': c = 27; break;
2331 case 'f': c = '\f'; break;
2332 case 'n': c = '\n'; break;
2333 case 'r': c = '\r'; break;
2334 case 't': c = '\t'; break;
2335 case 'v': c = '\v'; break;
2336
2337 case '0': case '1': case '2': case '3':
2338 case '4': case '5': case '6': case '7':
2339 c -= '0';
2340 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2341 c = c * 8 + *p++ - '0';
2342
2343 #if !defined NOUTF8
2344 if (use_utf8 && c > 255)
2345 {
2346 unsigned char buff8[8];
2347 int ii, utn;
2348 utn = ord2utf8(c, buff8);
2349 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2350 c = buff8[ii]; /* Last byte */
2351 }
2352 #endif
2353 break;
2354
2355 case 'x':
2356
2357 /* Handle \x{..} specially - new Perl thing for utf8 */
2358
2359 #if !defined NOUTF8
2360 if (*p == '{')
2361 {
2362 unsigned char *pt = p;
2363 c = 0;
2364
2365 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2366 when isxdigit() is a macro that refers to its argument more than
2367 once. This is banned by the C Standard, but apparently happens in at
2368 least one MacOS environment. */
2369
2370 for (pt++; isxdigit(*pt); pt++)
2371 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2372 if (*pt == '}')
2373 {
2374 unsigned char buff8[8];
2375 int ii, utn;
2376 if (use_utf8)
2377 {
2378 utn = ord2utf8(c, buff8);
2379 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2380 c = buff8[ii]; /* Last byte */
2381 }
2382 else
2383 {
2384 if (c > 255)
2385 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2386 "UTF-8 mode is not enabled.\n"
2387 "** Truncation will probably give the wrong result.\n", c);
2388 }
2389 p = pt + 1;
2390 break;
2391 }
2392 /* Not correct form; fall through */
2393 }
2394 #endif
2395
2396 /* Ordinary \x */
2397
2398 c = 0;
2399 while (i++ < 2 && isxdigit(*p))
2400 {
2401 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2402 p++;
2403 }
2404 break;
2405
2406 case 0: /* \ followed by EOF allows for an empty line */
2407 p--;
2408 continue;
2409
2410 case '>':
2411 if (*p == '-')
2412 {
2413 start_offset_sign = -1;
2414 p++;
2415 }
2416 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2417 start_offset *= start_offset_sign;
2418 continue;
2419
2420 case 'A': /* Option setting */
2421 options |= PCRE_ANCHORED;
2422 continue;
2423
2424 case 'B':
2425 options |= PCRE_NOTBOL;
2426 continue;
2427
2428 case 'C':
2429 if (isdigit(*p)) /* Set copy string */
2430 {
2431 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2432 copystrings |= 1 << n;
2433 }
2434 else if (isalnum(*p))
2435 {
2436 uschar *npp = copynamesptr;
2437 while (isalnum(*p)) *npp++ = *p++;
2438 *npp++ = 0;
2439 *npp = 0;
2440 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2441 if (n < 0)
2442 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2443 copynamesptr = npp;
2444 }
2445 else if (*p == '+')
2446 {
2447 callout_extra = 1;
2448 p++;
2449 }
2450 else if (*p == '-')
2451 {
2452 pcre_callout = NULL;
2453 p++;
2454 }
2455 else if (*p == '!')
2456 {
2457 callout_fail_id = 0;
2458 p++;
2459 while(isdigit(*p))
2460 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2461 callout_fail_count = 0;
2462 if (*p == '!')
2463 {
2464 p++;
2465 while(isdigit(*p))
2466 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2467 }
2468 }
2469 else if (*p == '*')
2470 {
2471 int sign = 1;
2472 callout_data = 0;
2473 if (*(++p) == '-') { sign = -1; p++; }
2474 while(isdigit(*p))
2475 callout_data = callout_data * 10 + *p++ - '0';
2476 callout_data *= sign;
2477 callout_data_set = 1;
2478 }
2479 continue;
2480
2481 #if !defined NODFA
2482 case 'D':
2483 #if !defined NOPOSIX
2484 if (posix || do_posix)
2485 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2486 else
2487 #endif
2488 use_dfa = 1;
2489 continue;
2490 #endif
2491
2492 #if !defined NODFA
2493 case 'F':
2494 options |= PCRE_DFA_SHORTEST;
2495 continue;
2496 #endif
2497
2498 case 'G':
2499 if (isdigit(*p))
2500 {
2501 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2502 getstrings |= 1 << n;
2503 }
2504 else if (isalnum(*p))
2505 {
2506 uschar *npp = getnamesptr;
2507 while (isalnum(*p)) *npp++ = *p++;
2508 *npp++ = 0;
2509 *npp = 0;
2510 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2511 if (n < 0)
2512 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2513 getnamesptr = npp;
2514 }
2515 continue;
2516
2517 case 'J':
2518 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2519 if (extra != NULL
2520 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2521 && extra->executable_jit != NULL)
2522 {
2523 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2524 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2525 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2526 }
2527 continue;
2528
2529 case 'L':
2530 getlist = 1;
2531 continue;
2532
2533 case 'M':
2534 find_match_limit = 1;
2535 continue;
2536
2537 case 'N':
2538 if ((options & PCRE_NOTEMPTY) != 0)
2539 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2540 else
2541 options |= PCRE_NOTEMPTY;
2542 continue;
2543
2544 case 'O':
2545 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2546 if (n > size_offsets_max)
2547 {
2548 size_offsets_max = n;
2549 free(offsets);
2550 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2551 if (offsets == NULL)
2552 {
2553 printf("** Failed to get %d bytes of memory for offsets vector\n",
2554 (int)(size_offsets_max * sizeof(int)));
2555 yield = 1;
2556 goto EXIT;
2557 }
2558 }
2559 use_size_offsets = n;
2560 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2561 continue;
2562
2563 case 'P':
2564 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2565 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2566 continue;
2567
2568 case 'Q':
2569 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2570 if (extra == NULL)
2571 {
2572 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2573 extra->flags = 0;
2574 }
2575 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2576 extra->match_limit_recursion = n;
2577 continue;
2578
2579 case 'q':
2580 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2581 if (extra == NULL)
2582 {
2583 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2584 extra->flags = 0;
2585 }
2586 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2587 extra->match_limit = n;
2588 continue;
2589
2590 #if !defined NODFA
2591 case 'R':
2592 options |= PCRE_DFA_RESTART;
2593 continue;
2594 #endif
2595
2596 case 'S':
2597 show_malloc = 1;
2598 continue;
2599
2600 case 'Y':
2601 options |= PCRE_NO_START_OPTIMIZE;
2602 continue;
2603
2604 case 'Z':
2605 options |= PCRE_NOTEOL;
2606 continue;
2607
2608 case '?':
2609 options |= PCRE_NO_UTF8_CHECK;
2610 continue;
2611
2612 case '<':
2613 {
2614 int x = check_newline(p, outfile);
2615 if (x == 0) goto NEXT_DATA;
2616 options |= x;
2617 while (*p++ != '>');
2618 }
2619 continue;
2620 }
2621 *q++ = c;
2622 }
2623 *q = 0;
2624 len = (int)(q - dbuffer);
2625
2626 /* Move the data to the end of the buffer so that a read over the end of
2627 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2628 we are using the POSIX interface, we must include the terminating zero. */
2629
2630 #if !defined NOPOSIX
2631 if (posix || do_posix)
2632 {
2633 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2634 bptr += buffer_size - len - 1;
2635 }
2636 else
2637 #endif
2638 {
2639 memmove(bptr + buffer_size - len, bptr, len);
2640 bptr += buffer_size - len;
2641 }
2642
2643 if ((all_use_dfa || use_dfa) && find_match_limit)
2644 {
2645 printf("**Match limit not relevant for DFA matching: ignored\n");
2646 find_match_limit = 0;
2647 }
2648
2649 /* Handle matching via the POSIX interface, which does not
2650 support timing or playing with the match limit or callout data. */
2651
2652 #if !defined NOPOSIX
2653 if (posix || do_posix)
2654 {
2655 int rc;
2656 int eflags = 0;
2657 regmatch_t *pmatch = NULL;
2658 if (use_size_offsets > 0)
2659 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2660 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2661 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2662 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2663
2664 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2665
2666 if (rc != 0)
2667 {
2668 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2669 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2670 }
2671 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2672 != 0)
2673 {
2674 fprintf(outfile, "Matched with REG_NOSUB\n");
2675 }
2676 else
2677 {
2678 size_t i;
2679 for (i = 0; i < (size_t)use_size_offsets; i++)
2680 {
2681 if (pmatch[i].rm_so >= 0)
2682 {
2683 fprintf(outfile, "%2d: ", (int)i);
2684 (void)pchars(dbuffer + pmatch[i].rm_so,
2685 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2686 fprintf(outfile, "\n");
2687 if (do_showcaprest || (i == 0 && do_showrest))
2688 {
2689 fprintf(outfile, "%2d+ ", (int)i);
2690 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2691 outfile);
2692 fprintf(outfile, "\n");
2693 }
2694 }
2695 }
2696 }
2697 free(pmatch);
2698 }
2699
2700 /* Handle matching via the native interface - repeats for /g and /G */
2701
2702 else
2703 #endif /* !defined NOPOSIX */
2704
2705 for (;; gmatched++) /* Loop for /g or /G */
2706 {
2707 markptr = NULL;
2708
2709 if (timeitm > 0)
2710 {
2711 register int i;
2712 clock_t time_taken;
2713 clock_t start_time = clock();
2714
2715 #if !defined NODFA
2716 if (all_use_dfa || use_dfa)
2717 {
2718 int workspace[1000];
2719 for (i = 0; i < timeitm; i++)
2720 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2721 options | g_notempty, use_offsets, use_size_offsets, workspace,
2722 sizeof(workspace)/sizeof(int));
2723 }
2724 else
2725 #endif
2726
2727 for (i = 0; i < timeitm; i++)
2728 count = pcre_exec(re, extra, (char *)bptr, len,
2729 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2730
2731 time_taken = clock() - start_time;
2732 fprintf(outfile, "Execute time %.4f milliseconds\n",
2733 (((double)time_taken * 1000.0) / (double)timeitm) /
2734 (double)CLOCKS_PER_SEC);
2735 }
2736
2737 /* If find_match_limit is set, we want to do repeated matches with
2738 varying limits in order to find the minimum value for the match limit and
2739 for the recursion limit. The match limits are relevant only to the normal
2740 running of pcre_exec(), so disable the JIT optimization. This makes it
2741 possible to run the same set of tests with and without JIT externally
2742 requested. */
2743
2744 if (find_match_limit)
2745 {
2746 if (extra == NULL)
2747 {
2748 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2749 extra->flags = 0;
2750 }
2751 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2752
2753 (void)check_match_limit(re, extra, bptr, len, start_offset,
2754 options|g_notempty, use_offsets, use_size_offsets,
2755 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2756 PCRE_ERROR_MATCHLIMIT, "match()");
2757
2758 count = check_match_limit(re, extra, bptr, len, start_offset,
2759 options|g_notempty, use_offsets, use_size_offsets,
2760 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2761 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2762 }
2763
2764 /* If callout_data is set, use the interface with additional data */
2765
2766 else if (callout_data_set)
2767 {
2768 if (extra == NULL)
2769 {
2770 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2771 extra->flags = 0;
2772 }
2773 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2774 extra->callout_data = &callout_data;
2775 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2776 options | g_notempty, use_offsets, use_size_offsets);
2777 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2778 }
2779
2780 /* The normal case is just to do the match once, with the default
2781 value of match_limit. */
2782
2783 #if !defined NODFA
2784 else if (all_use_dfa || use_dfa)
2785 {
2786 int workspace[1000];
2787 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2788 options | g_notempty, use_offsets, use_size_offsets, workspace,
2789 sizeof(workspace)/sizeof(int));
2790 if (count == 0)
2791 {
2792 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2793 count = use_size_offsets/2;
2794 }
2795 }
2796 #endif
2797
2798 else
2799 {
2800 count = pcre_exec(re, extra, (char *)bptr, len,
2801 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2802 if (count == 0)
2803 {
2804 fprintf(outfile, "Matched, but too many substrings\n");
2805 count = use_size_offsets/3;
2806 }
2807 }
2808
2809 /* Matched */
2810
2811 if (count >= 0)
2812 {
2813 int i, maxcount;
2814
2815 #if !defined NODFA
2816 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2817 #endif
2818 maxcount = use_size_offsets/3;
2819
2820 /* This is a check against a lunatic return value. */
2821
2822 if (count > maxcount)
2823 {
2824 fprintf(outfile,
2825 "** PCRE error: returned count %d is too big for offset size %d\n",
2826 count, use_size_offsets);
2827 count = use_size_offsets/3;
2828 if (do_g || do_G)
2829 {
2830 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2831 do_g = do_G = FALSE; /* Break g/G loop */
2832 }
2833 }
2834
2835 /* do_allcaps requests showing of all captures in the pattern, to check
2836 unset ones at the end. */
2837
2838 if (do_allcaps)
2839 {
2840 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2841 count++; /* Allow for full match */
2842 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2843 }
2844
2845 /* Output the captured substrings */
2846
2847 for (i = 0; i < count * 2; i += 2)
2848 {
2849 if (use_offsets[i] < 0)
2850 {
2851 if (use_offsets[i] != -1)
2852 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2853 use_offsets[i], i);
2854 if (use_offsets[i+1] != -1)
2855 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2856 use_offsets[i+1], i+1);
2857 fprintf(outfile, "%2d: <unset>\n", i/2);
2858 }
2859 else
2860 {
2861 fprintf(outfile, "%2d: ", i/2);
2862 (void)pchars(bptr + use_offsets[i],
2863 use_offsets[i+1] - use_offsets[i], outfile);
2864 fprintf(outfile, "\n");
2865 if (do_showcaprest || (i == 0 && do_showrest))
2866 {
2867 fprintf(outfile, "%2d+ ", i/2);
2868 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2869 outfile);
2870 fprintf(outfile, "\n");
2871 }
2872 }
2873 }
2874
2875 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2876
2877 for (i = 0; i < 32; i++)
2878 {
2879 if ((copystrings & (1 << i)) != 0)
2880 {
2881 char copybuffer[256];
2882 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2883 i, copybuffer, sizeof(copybuffer));
2884 if (rc < 0)
2885 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2886 else
2887 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2888 }
2889 }
2890
2891 for (copynamesptr = copynames;
2892 *copynamesptr != 0;
2893 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2894 {
2895 char copybuffer[256];
2896 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2897 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2898 if (rc < 0)
2899 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2900 else
2901 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2902 }
2903
2904 for (i = 0; i < 32; i++)
2905 {
2906 if ((getstrings & (1 << i)) != 0)
2907 {
2908 const char *substring;
2909 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2910 i, &substring);
2911 if (rc < 0)
2912 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2913 else
2914 {
2915 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2916 pcre_free_substring(substring);
2917 }
2918 }
2919 }
2920
2921 for (getnamesptr = getnames;
2922 *getnamesptr != 0;
2923 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2924 {
2925 const char *substring;
2926 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2927 count, (char *)getnamesptr, &substring);
2928 if (rc < 0)
2929 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2930 else
2931 {
2932 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2933 pcre_free_substring(substring);
2934 }
2935 }
2936
2937 if (getlist)
2938 {
2939 const char **stringlist;
2940 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2941 &stringlist);
2942 if (rc < 0)
2943 fprintf(outfile, "get substring list failed %d\n", rc);
2944 else
2945 {
2946 for (i = 0; i < count; i++)
2947 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2948 if (stringlist[i] != NULL)
2949 fprintf(outfile, "string list not terminated by NULL\n");
2950 pcre_free_substring_list(stringlist);
2951 }
2952 }
2953 }
2954
2955 /* There was a partial match */
2956
2957 else if (count == PCRE_ERROR_PARTIAL)
2958 {
2959 if (markptr == NULL) fprintf(outfile, "Partial match");
2960 else fprintf(outfile, "Partial match, mark=%s", markptr);
2961 if (use_size_offsets > 1)
2962 {
2963 fprintf(outfile, ": ");
2964 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2965 outfile);
2966 }
2967 fprintf(outfile, "\n");
2968 break; /* Out of the /g loop */
2969 }
2970
2971 /* Failed to match. If this is a /g or /G loop and we previously set
2972 g_notempty after a null match, this is not necessarily the end. We want
2973 to advance the start offset, and continue. We won't be at the end of the
2974 string - that was checked before setting g_notempty.
2975
2976 Complication arises in the case when the newline convention is "any",
2977 "crlf", or "anycrlf". If the previous match was at the end of a line
2978 terminated by CRLF, an advance of one character just passes the \r,
2979 whereas we should prefer the longer newline sequence, as does the code in
2980 pcre_exec(). Fudge the offset value to achieve this. We check for a
2981 newline setting in the pattern; if none was set, use pcre_config() to
2982 find the default.
2983
2984 Otherwise, in the case of UTF-8 matching, the advance must be one
2985 character, not one byte. */
2986
2987 else
2988 {
2989 if (g_notempty != 0)
2990 {
2991 int onechar = 1;
2992 unsigned int obits = ((real_pcre *)re)->options;
2993 use_offsets[0] = start_offset;
2994 if ((obits & PCRE_NEWLINE_BITS) == 0)
2995 {
2996 int d;
2997 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2998 /* Note that these values are always the ASCII ones, even in
2999 EBCDIC environments. CR = 13, NL = 10. */
3000 obits = (d == 13)? PCRE_NEWLINE_CR :
3001 (d == 10)? PCRE_NEWLINE_LF :
3002 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3003 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3004 (d == -1)? PCRE_NEWLINE_ANY : 0;
3005 }
3006 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3007 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3008 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3009 &&
3010 start_offset < len - 1 &&
3011 bptr[start_offset] == '\r' &&
3012 bptr[start_offset+1] == '\n')
3013 onechar++;
3014 else if (use_utf8)
3015 {
3016 while (start_offset + onechar < len)
3017 {
3018 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3019 onechar++;
3020 }
3021 }
3022 use_offsets[1] = start_offset + onechar;
3023 }
3024 else
3025 {
3026 switch(count)
3027 {
3028 case PCRE_ERROR_NOMATCH:
3029 if (gmatched == 0)
3030 {
3031 if (markptr == NULL) fprintf(outfile, "No match\n");
3032 else fprintf(outfile, "No match, mark = %s\n", markptr);
3033 }
3034 break;
3035
3036 case PCRE_ERROR_BADUTF8:
3037 case PCRE_ERROR_SHORTUTF8:
3038 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3039 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3040 if (use_size_offsets >= 2)
3041 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3042 use_offsets[1]);
3043 fprintf(outfile, "\n");
3044 break;
3045
3046 default:
3047 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3048 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3049 else
3050 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3051 break;
3052 }
3053
3054 break; /* Out of the /g loop */
3055 }
3056 }
3057
3058 /* If not /g or /G we are done */
3059
3060 if (!do_g && !do_G) break;
3061
3062 /* If we have matched an empty string, first check to see if we are at
3063 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3064 Perl's /g options does. This turns out to be rather cunning. First we set
3065 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3066 same point. If this fails (picked up above) we advance to the next
3067 character. */
3068
3069 g_notempty = 0;
3070
3071 if (use_offsets[0] == use_offsets[1])
3072 {
3073 if (use_offsets[0] == len) break;
3074 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3075 }
3076
3077 /* For /g, update the start offset, leaving the rest alone */
3078
3079 if (do_g) start_offset = use_offsets[1];
3080
3081 /* For /G, update the pointer and length */
3082
3083 else
3084 {
3085 bptr += use_offsets[1];
3086 len -= use_offsets[1];
3087 }
3088 } /* End of loop for /g and /G */
3089
3090 NEXT_DATA: continue;
3091 } /* End of loop for data lines */
3092
3093 CONTINUE:
3094
3095 #if !defined NOPOSIX
3096 if (posix || do_posix) regfree(&preg);
3097 #endif
3098
3099 if (re != NULL) new_free(re);
3100 if (extra != NULL) pcre_free_study(extra);
3101 if (locale_set)
3102 {
3103 new_free((void *)tables);
3104 setlocale(LC_CTYPE, "C");
3105 locale_set = 0;
3106 }
3107 if (jit_stack != NULL)
3108 {
3109 pcre_jit_stack_free(jit_stack);
3110 jit_stack = NULL;
3111 }
3112 }
3113
3114 if (infile == stdin) fprintf(outfile, "\n");
3115
3116 EXIT:
3117
3118 if (infile != NULL && infile != stdin) fclose(infile);
3119 if (outfile != NULL && outfile != stdout) fclose(outfile);
3120
3121 free(buffer);
3122 free(dbuffer);
3123 free(pbuffer);
3124 free(offsets);
3125
3126 return yield;
3127 }
3128
3129 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12