/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 773 - (show annotations) (download)
Wed Nov 30 18:10:27 2011 UTC (2 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 94084 byte(s)
Expand compile workspace for very many forward references. This ups the limit 
by a factor of 100.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utf8_char_sizes utf8_char_sizes
122 #define _pcre_utt utt
123 #define _pcre_utt_size utt_size
124 #define _pcre_utt_names utt_names
125 #define _pcre_OP_lengths OP_lengths
126
127 #include "pcre_tables.c"
128
129 /* We also need the pcre_printint() function for printing out compiled
130 patterns. This function is in a separate file so that it can be included in
131 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 know which case is being compiled. */
133
134 #define COMPILING_PCRETEST
135 #include "pcre_printint.src"
136
137 /* The definition of the macro PRINTABLE, which determines whether to print an
138 output character as-is or as a hex value when showing compiled patterns, is
139 contained in the printint.src file. We uses it here also, in cases when the
140 locale has not been explicitly changed, so as to get consistent output from
141 systems that differ in their output from isprint() even in the "C" locale. */
142
143 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144
145 /* It is possible to compile this test program without including support for
146 testing the POSIX interface, though this is not available via the standard
147 Makefile. */
148
149 #if !defined NOPOSIX
150 #include "pcreposix.h"
151 #endif
152
153 /* It is also possible, for the benefit of the version currently imported into
154 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155 interface to the DFA matcher (NODFA), and without the doublecheck of the old
156 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157 UTF8 support if PCRE is built without it. */
158
159 #ifndef SUPPORT_UTF8
160 #ifndef NOUTF8
161 #define NOUTF8
162 #endif
163 #endif
164
165
166 /* Other parameters */
167
168 #ifndef CLOCKS_PER_SEC
169 #ifdef CLK_TCK
170 #define CLOCKS_PER_SEC CLK_TCK
171 #else
172 #define CLOCKS_PER_SEC 100
173 #endif
174 #endif
175
176 /* This is the default loop count for timing. */
177
178 #define LOOPREPEAT 500000
179
180 /* Static variables */
181
182 static FILE *outfile;
183 static int log_store = 0;
184 static int callout_count;
185 static int callout_extra;
186 static int callout_fail_count;
187 static int callout_fail_id;
188 static int debug_lengths;
189 static int first_callout;
190 static int locale_set = 0;
191 static int show_malloc;
192 static int use_utf8;
193 static size_t gotten_store;
194 static size_t first_gotten_store = 0;
195 static const unsigned char *last_callout_mark = NULL;
196
197 /* The buffers grow automatically if very long input lines are encountered. */
198
199 static int buffer_size = 50000;
200 static uschar *buffer = NULL;
201 static uschar *dbuffer = NULL;
202 static uschar *pbuffer = NULL;
203
204 /* Textual explanations for runtime error codes */
205
206 static const char *errtexts[] = {
207 NULL, /* 0 is no error */
208 NULL, /* NOMATCH is handled specially */
209 "NULL argument passed",
210 "bad option value",
211 "magic number missing",
212 "unknown opcode - pattern overwritten?",
213 "no more memory",
214 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
215 "match limit exceeded",
216 "callout error code",
217 NULL, /* BADUTF8 is handled specially */
218 "bad UTF-8 offset",
219 NULL, /* PARTIAL is handled specially */
220 "not used - internal error",
221 "internal error - pattern overwritten?",
222 "bad count value",
223 "item unsupported for DFA matching",
224 "backreference condition or recursion test not supported for DFA matching",
225 "match limit not supported for DFA matching",
226 "workspace size exceeded in DFA matching",
227 "too much recursion for DFA matching",
228 "recursion limit exceeded",
229 "not used - internal error",
230 "invalid combination of newline options",
231 "bad offset value",
232 NULL, /* SHORTUTF8 is handled specially */
233 "nested recursion at the same subject position",
234 "JIT stack limit reached"
235 };
236
237
238 /*************************************************
239 * Alternate character tables *
240 *************************************************/
241
242 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
243 using the default tables of the library. However, the T option can be used to
244 select alternate sets of tables, for different kinds of testing. Note also that
245 the L (locale) option also adjusts the tables. */
246
247 /* This is the set of tables distributed as default with PCRE. It recognizes
248 only ASCII characters. */
249
250 static const unsigned char tables0[] = {
251
252 /* This table is a lower casing table. */
253
254 0, 1, 2, 3, 4, 5, 6, 7,
255 8, 9, 10, 11, 12, 13, 14, 15,
256 16, 17, 18, 19, 20, 21, 22, 23,
257 24, 25, 26, 27, 28, 29, 30, 31,
258 32, 33, 34, 35, 36, 37, 38, 39,
259 40, 41, 42, 43, 44, 45, 46, 47,
260 48, 49, 50, 51, 52, 53, 54, 55,
261 56, 57, 58, 59, 60, 61, 62, 63,
262 64, 97, 98, 99,100,101,102,103,
263 104,105,106,107,108,109,110,111,
264 112,113,114,115,116,117,118,119,
265 120,121,122, 91, 92, 93, 94, 95,
266 96, 97, 98, 99,100,101,102,103,
267 104,105,106,107,108,109,110,111,
268 112,113,114,115,116,117,118,119,
269 120,121,122,123,124,125,126,127,
270 128,129,130,131,132,133,134,135,
271 136,137,138,139,140,141,142,143,
272 144,145,146,147,148,149,150,151,
273 152,153,154,155,156,157,158,159,
274 160,161,162,163,164,165,166,167,
275 168,169,170,171,172,173,174,175,
276 176,177,178,179,180,181,182,183,
277 184,185,186,187,188,189,190,191,
278 192,193,194,195,196,197,198,199,
279 200,201,202,203,204,205,206,207,
280 208,209,210,211,212,213,214,215,
281 216,217,218,219,220,221,222,223,
282 224,225,226,227,228,229,230,231,
283 232,233,234,235,236,237,238,239,
284 240,241,242,243,244,245,246,247,
285 248,249,250,251,252,253,254,255,
286
287 /* This table is a case flipping table. */
288
289 0, 1, 2, 3, 4, 5, 6, 7,
290 8, 9, 10, 11, 12, 13, 14, 15,
291 16, 17, 18, 19, 20, 21, 22, 23,
292 24, 25, 26, 27, 28, 29, 30, 31,
293 32, 33, 34, 35, 36, 37, 38, 39,
294 40, 41, 42, 43, 44, 45, 46, 47,
295 48, 49, 50, 51, 52, 53, 54, 55,
296 56, 57, 58, 59, 60, 61, 62, 63,
297 64, 97, 98, 99,100,101,102,103,
298 104,105,106,107,108,109,110,111,
299 112,113,114,115,116,117,118,119,
300 120,121,122, 91, 92, 93, 94, 95,
301 96, 65, 66, 67, 68, 69, 70, 71,
302 72, 73, 74, 75, 76, 77, 78, 79,
303 80, 81, 82, 83, 84, 85, 86, 87,
304 88, 89, 90,123,124,125,126,127,
305 128,129,130,131,132,133,134,135,
306 136,137,138,139,140,141,142,143,
307 144,145,146,147,148,149,150,151,
308 152,153,154,155,156,157,158,159,
309 160,161,162,163,164,165,166,167,
310 168,169,170,171,172,173,174,175,
311 176,177,178,179,180,181,182,183,
312 184,185,186,187,188,189,190,191,
313 192,193,194,195,196,197,198,199,
314 200,201,202,203,204,205,206,207,
315 208,209,210,211,212,213,214,215,
316 216,217,218,219,220,221,222,223,
317 224,225,226,227,228,229,230,231,
318 232,233,234,235,236,237,238,239,
319 240,241,242,243,244,245,246,247,
320 248,249,250,251,252,253,254,255,
321
322 /* This table contains bit maps for various character classes. Each map is 32
323 bytes long and the bits run from the least significant end of each byte. The
324 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
325 graph, print, punct, and cntrl. Other classes are built from combinations. */
326
327 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331
332 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
333 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336
337 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341
342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346
347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351
352 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
353 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356
357 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
358 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361
362 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
363 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366
367 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
368 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371
372 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
376
377 /* This table identifies various classes of character by individual bits:
378 0x01 white space character
379 0x02 letter
380 0x04 decimal digit
381 0x08 hexadecimal digit
382 0x10 alphanumeric or '_'
383 0x80 regular expression metacharacter or binary zero
384 */
385
386 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
387 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
388 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
389 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
390 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
391 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
392 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
393 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
394 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
396 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
397 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
398 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
399 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
400 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
401 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
417 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
418
419 /* This is a set of tables that came orginally from a Windows user. It seems to
420 be at least an approximation of ISO 8859. In particular, there are characters
421 greater than 128 that are marked as spaces, letters, etc. */
422
423 static const unsigned char tables1[] = {
424 0,1,2,3,4,5,6,7,
425 8,9,10,11,12,13,14,15,
426 16,17,18,19,20,21,22,23,
427 24,25,26,27,28,29,30,31,
428 32,33,34,35,36,37,38,39,
429 40,41,42,43,44,45,46,47,
430 48,49,50,51,52,53,54,55,
431 56,57,58,59,60,61,62,63,
432 64,97,98,99,100,101,102,103,
433 104,105,106,107,108,109,110,111,
434 112,113,114,115,116,117,118,119,
435 120,121,122,91,92,93,94,95,
436 96,97,98,99,100,101,102,103,
437 104,105,106,107,108,109,110,111,
438 112,113,114,115,116,117,118,119,
439 120,121,122,123,124,125,126,127,
440 128,129,130,131,132,133,134,135,
441 136,137,138,139,140,141,142,143,
442 144,145,146,147,148,149,150,151,
443 152,153,154,155,156,157,158,159,
444 160,161,162,163,164,165,166,167,
445 168,169,170,171,172,173,174,175,
446 176,177,178,179,180,181,182,183,
447 184,185,186,187,188,189,190,191,
448 224,225,226,227,228,229,230,231,
449 232,233,234,235,236,237,238,239,
450 240,241,242,243,244,245,246,215,
451 248,249,250,251,252,253,254,223,
452 224,225,226,227,228,229,230,231,
453 232,233,234,235,236,237,238,239,
454 240,241,242,243,244,245,246,247,
455 248,249,250,251,252,253,254,255,
456 0,1,2,3,4,5,6,7,
457 8,9,10,11,12,13,14,15,
458 16,17,18,19,20,21,22,23,
459 24,25,26,27,28,29,30,31,
460 32,33,34,35,36,37,38,39,
461 40,41,42,43,44,45,46,47,
462 48,49,50,51,52,53,54,55,
463 56,57,58,59,60,61,62,63,
464 64,97,98,99,100,101,102,103,
465 104,105,106,107,108,109,110,111,
466 112,113,114,115,116,117,118,119,
467 120,121,122,91,92,93,94,95,
468 96,65,66,67,68,69,70,71,
469 72,73,74,75,76,77,78,79,
470 80,81,82,83,84,85,86,87,
471 88,89,90,123,124,125,126,127,
472 128,129,130,131,132,133,134,135,
473 136,137,138,139,140,141,142,143,
474 144,145,146,147,148,149,150,151,
475 152,153,154,155,156,157,158,159,
476 160,161,162,163,164,165,166,167,
477 168,169,170,171,172,173,174,175,
478 176,177,178,179,180,181,182,183,
479 184,185,186,187,188,189,190,191,
480 224,225,226,227,228,229,230,231,
481 232,233,234,235,236,237,238,239,
482 240,241,242,243,244,245,246,215,
483 248,249,250,251,252,253,254,223,
484 192,193,194,195,196,197,198,199,
485 200,201,202,203,204,205,206,207,
486 208,209,210,211,212,213,214,247,
487 216,217,218,219,220,221,222,255,
488 0,62,0,0,1,0,0,0,
489 0,0,0,0,0,0,0,0,
490 32,0,0,0,1,0,0,0,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,255,3,
493 126,0,0,0,126,0,0,0,
494 0,0,0,0,0,0,0,0,
495 0,0,0,0,0,0,0,0,
496 0,0,0,0,0,0,255,3,
497 0,0,0,0,0,0,0,0,
498 0,0,0,0,0,0,12,2,
499 0,0,0,0,0,0,0,0,
500 0,0,0,0,0,0,0,0,
501 254,255,255,7,0,0,0,0,
502 0,0,0,0,0,0,0,0,
503 255,255,127,127,0,0,0,0,
504 0,0,0,0,0,0,0,0,
505 0,0,0,0,254,255,255,7,
506 0,0,0,0,0,4,32,4,
507 0,0,0,128,255,255,127,255,
508 0,0,0,0,0,0,255,3,
509 254,255,255,135,254,255,255,7,
510 0,0,0,0,0,4,44,6,
511 255,255,127,255,255,255,127,255,
512 0,0,0,0,254,255,255,255,
513 255,255,255,255,255,255,255,127,
514 0,0,0,0,254,255,255,255,
515 255,255,255,255,255,255,255,255,
516 0,2,0,0,255,255,255,255,
517 255,255,255,255,255,255,255,127,
518 0,0,0,0,255,255,255,255,
519 255,255,255,255,255,255,255,255,
520 0,0,0,0,254,255,0,252,
521 1,0,0,248,1,0,0,120,
522 0,0,0,0,254,255,255,255,
523 0,0,128,0,0,0,128,0,
524 255,255,255,255,0,0,0,0,
525 0,0,0,0,0,0,0,128,
526 255,255,255,255,0,0,0,0,
527 0,0,0,0,0,0,0,0,
528 128,0,0,0,0,0,0,0,
529 0,1,1,0,1,1,0,0,
530 0,0,0,0,0,0,0,0,
531 0,0,0,0,0,0,0,0,
532 1,0,0,0,128,0,0,0,
533 128,128,128,128,0,0,128,0,
534 28,28,28,28,28,28,28,28,
535 28,28,0,0,0,0,0,128,
536 0,26,26,26,26,26,26,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,18,18,18,18,18,
539 18,18,18,128,128,0,128,16,
540 0,26,26,26,26,26,26,18,
541 18,18,18,18,18,18,18,18,
542 18,18,18,18,18,18,18,18,
543 18,18,18,128,128,0,0,0,
544 0,0,0,0,0,1,0,0,
545 0,0,0,0,0,0,0,0,
546 0,0,0,0,0,0,0,0,
547 0,0,0,0,0,0,0,0,
548 1,0,0,0,0,0,0,0,
549 0,0,18,0,0,0,0,0,
550 0,0,20,20,0,18,0,0,
551 0,20,18,0,0,0,0,0,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,18,
554 18,18,18,18,18,18,18,0,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,18,
557 18,18,18,18,18,18,18,18,
558 18,18,18,18,18,18,18,0,
559 18,18,18,18,18,18,18,18
560 };
561
562
563
564
565 #ifndef HAVE_STRERROR
566 /*************************************************
567 * Provide strerror() for non-ANSI libraries *
568 *************************************************/
569
570 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
571 in their libraries, but can provide the same facility by this simple
572 alternative function. */
573
574 extern int sys_nerr;
575 extern char *sys_errlist[];
576
577 char *
578 strerror(int n)
579 {
580 if (n < 0 || n >= sys_nerr) return "unknown error number";
581 return sys_errlist[n];
582 }
583 #endif /* HAVE_STRERROR */
584
585
586 /*************************************************
587 * JIT memory callback *
588 *************************************************/
589
590 static pcre_jit_stack* jit_callback(void *arg)
591 {
592 return (pcre_jit_stack *)arg;
593 }
594
595
596 /*************************************************
597 * Read or extend an input line *
598 *************************************************/
599
600 /* Input lines are read into buffer, but both patterns and data lines can be
601 continued over multiple input lines. In addition, if the buffer fills up, we
602 want to automatically expand it so as to be able to handle extremely large
603 lines that are needed for certain stress tests. When the input buffer is
604 expanded, the other two buffers must also be expanded likewise, and the
605 contents of pbuffer, which are a copy of the input for callouts, must be
606 preserved (for when expansion happens for a data line). This is not the most
607 optimal way of handling this, but hey, this is just a test program!
608
609 Arguments:
610 f the file to read
611 start where in buffer to start (this *must* be within buffer)
612 prompt for stdin or readline()
613
614 Returns: pointer to the start of new data
615 could be a copy of start, or could be moved
616 NULL if no data read and EOF reached
617 */
618
619 static uschar *
620 extend_inputline(FILE *f, uschar *start, const char *prompt)
621 {
622 uschar *here = start;
623
624 for (;;)
625 {
626 int rlen = (int)(buffer_size - (here - buffer));
627
628 if (rlen > 1000)
629 {
630 int dlen;
631
632 /* If libreadline support is required, use readline() to read a line if the
633 input is a terminal. Note that readline() removes the trailing newline, so
634 we must put it back again, to be compatible with fgets(). */
635
636 #ifdef SUPPORT_LIBREADLINE
637 if (isatty(fileno(f)))
638 {
639 size_t len;
640 char *s = readline(prompt);
641 if (s == NULL) return (here == start)? NULL : start;
642 len = strlen(s);
643 if (len > 0) add_history(s);
644 if (len > rlen - 1) len = rlen - 1;
645 memcpy(here, s, len);
646 here[len] = '\n';
647 here[len+1] = 0;
648 free(s);
649 }
650 else
651 #endif
652
653 /* Read the next line by normal means, prompting if the file is stdin. */
654
655 {
656 if (f == stdin) printf("%s", prompt);
657 if (fgets((char *)here, rlen, f) == NULL)
658 return (here == start)? NULL : start;
659 }
660
661 dlen = (int)strlen((char *)here);
662 if (dlen > 0 && here[dlen - 1] == '\n') return start;
663 here += dlen;
664 }
665
666 else
667 {
668 int new_buffer_size = 2*buffer_size;
669 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
670 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
671 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
672
673 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
674 {
675 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
676 exit(1);
677 }
678
679 memcpy(new_buffer, buffer, buffer_size);
680 memcpy(new_pbuffer, pbuffer, buffer_size);
681
682 buffer_size = new_buffer_size;
683
684 start = new_buffer + (start - buffer);
685 here = new_buffer + (here - buffer);
686
687 free(buffer);
688 free(dbuffer);
689 free(pbuffer);
690
691 buffer = new_buffer;
692 dbuffer = new_dbuffer;
693 pbuffer = new_pbuffer;
694 }
695 }
696
697 return NULL; /* Control never gets here */
698 }
699
700
701
702
703
704
705
706 /*************************************************
707 * Read number from string *
708 *************************************************/
709
710 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
711 around with conditional compilation, just do the job by hand. It is only used
712 for unpicking arguments, so just keep it simple.
713
714 Arguments:
715 str string to be converted
716 endptr where to put the end pointer
717
718 Returns: the unsigned long
719 */
720
721 static int
722 get_value(unsigned char *str, unsigned char **endptr)
723 {
724 int result = 0;
725 while(*str != 0 && isspace(*str)) str++;
726 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
727 *endptr = str;
728 return(result);
729 }
730
731
732
733
734 /*************************************************
735 * Convert UTF-8 string to value *
736 *************************************************/
737
738 /* This function takes one or more bytes that represents a UTF-8 character,
739 and returns the value of the character.
740
741 Argument:
742 utf8bytes a pointer to the byte vector
743 vptr a pointer to an int to receive the value
744
745 Returns: > 0 => the number of bytes consumed
746 -6 to 0 => malformed UTF-8 character at offset = (-return)
747 */
748
749 #if !defined NOUTF8
750
751 static int
752 utf82ord(unsigned char *utf8bytes, int *vptr)
753 {
754 int c = *utf8bytes++;
755 int d = c;
756 int i, j, s;
757
758 for (i = -1; i < 6; i++) /* i is number of additional bytes */
759 {
760 if ((d & 0x80) == 0) break;
761 d <<= 1;
762 }
763
764 if (i == -1) { *vptr = c; return 1; } /* ascii character */
765 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
766
767 /* i now has a value in the range 1-5 */
768
769 s = 6*i;
770 d = (c & utf8_table3[i]) << s;
771
772 for (j = 0; j < i; j++)
773 {
774 c = *utf8bytes++;
775 if ((c & 0xc0) != 0x80) return -(j+1);
776 s -= 6;
777 d |= (c & 0x3f) << s;
778 }
779
780 /* Check that encoding was the correct unique one */
781
782 for (j = 0; j < utf8_table1_size; j++)
783 if (d <= utf8_table1[j]) break;
784 if (j != i) return -(i+1);
785
786 /* Valid value */
787
788 *vptr = d;
789 return i+1;
790 }
791
792 #endif
793
794
795
796 /*************************************************
797 * Convert character value to UTF-8 *
798 *************************************************/
799
800 /* This function takes an integer value in the range 0 - 0x7fffffff
801 and encodes it as a UTF-8 character in 0 to 6 bytes.
802
803 Arguments:
804 cvalue the character value
805 utf8bytes pointer to buffer for result - at least 6 bytes long
806
807 Returns: number of characters placed in the buffer
808 */
809
810 #if !defined NOUTF8
811
812 static int
813 ord2utf8(int cvalue, uschar *utf8bytes)
814 {
815 register int i, j;
816 for (i = 0; i < utf8_table1_size; i++)
817 if (cvalue <= utf8_table1[i]) break;
818 utf8bytes += i;
819 for (j = i; j > 0; j--)
820 {
821 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
822 cvalue >>= 6;
823 }
824 *utf8bytes = utf8_table2[i] | cvalue;
825 return i + 1;
826 }
827
828 #endif
829
830
831
832 /*************************************************
833 * Print character string *
834 *************************************************/
835
836 /* Character string printing function. Must handle UTF-8 strings in utf8
837 mode. Yields number of characters printed. If handed a NULL file, just counts
838 chars without printing. */
839
840 static int pchars(unsigned char *p, int length, FILE *f)
841 {
842 int c = 0;
843 int yield = 0;
844
845 while (length-- > 0)
846 {
847 #if !defined NOUTF8
848 if (use_utf8)
849 {
850 int rc = utf82ord(p, &c);
851
852 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
853 {
854 length -= rc - 1;
855 p += rc;
856 if (PRINTHEX(c))
857 {
858 if (f != NULL) fprintf(f, "%c", c);
859 yield++;
860 }
861 else
862 {
863 int n = 4;
864 if (f != NULL) fprintf(f, "\\x{%02x}", c);
865 yield += (n <= 0x000000ff)? 2 :
866 (n <= 0x00000fff)? 3 :
867 (n <= 0x0000ffff)? 4 :
868 (n <= 0x000fffff)? 5 : 6;
869 }
870 continue;
871 }
872 }
873 #endif
874
875 /* Not UTF-8, or malformed UTF-8 */
876
877 c = *p++;
878 if (PRINTHEX(c))
879 {
880 if (f != NULL) fprintf(f, "%c", c);
881 yield++;
882 }
883 else
884 {
885 if (f != NULL) fprintf(f, "\\x%02x", c);
886 yield += 4;
887 }
888 }
889
890 return yield;
891 }
892
893
894
895 /*************************************************
896 * Callout function *
897 *************************************************/
898
899 /* Called from PCRE as a result of the (?C) item. We print out where we are in
900 the match. Yield zero unless more callouts than the fail count, or the callout
901 data is not zero. */
902
903 static int callout(pcre_callout_block *cb)
904 {
905 FILE *f = (first_callout | callout_extra)? outfile : NULL;
906 int i, pre_start, post_start, subject_length;
907
908 if (callout_extra)
909 {
910 fprintf(f, "Callout %d: last capture = %d\n",
911 cb->callout_number, cb->capture_last);
912
913 for (i = 0; i < cb->capture_top * 2; i += 2)
914 {
915 if (cb->offset_vector[i] < 0)
916 fprintf(f, "%2d: <unset>\n", i/2);
917 else
918 {
919 fprintf(f, "%2d: ", i/2);
920 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
921 cb->offset_vector[i+1] - cb->offset_vector[i], f);
922 fprintf(f, "\n");
923 }
924 }
925 }
926
927 /* Re-print the subject in canonical form, the first time or if giving full
928 datails. On subsequent calls in the same match, we use pchars just to find the
929 printed lengths of the substrings. */
930
931 if (f != NULL) fprintf(f, "--->");
932
933 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
934 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
935 cb->current_position - cb->start_match, f);
936
937 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
938
939 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
940 cb->subject_length - cb->current_position, f);
941
942 if (f != NULL) fprintf(f, "\n");
943
944 /* Always print appropriate indicators, with callout number if not already
945 shown. For automatic callouts, show the pattern offset. */
946
947 if (cb->callout_number == 255)
948 {
949 fprintf(outfile, "%+3d ", cb->pattern_position);
950 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
951 }
952 else
953 {
954 if (callout_extra) fprintf(outfile, " ");
955 else fprintf(outfile, "%3d ", cb->callout_number);
956 }
957
958 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
959 fprintf(outfile, "^");
960
961 if (post_start > 0)
962 {
963 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
964 fprintf(outfile, "^");
965 }
966
967 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
968 fprintf(outfile, " ");
969
970 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
971 pbuffer + cb->pattern_position);
972
973 fprintf(outfile, "\n");
974 first_callout = 0;
975
976 if (cb->mark != last_callout_mark)
977 {
978 fprintf(outfile, "Latest Mark: %s\n",
979 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
980 last_callout_mark = cb->mark;
981 }
982
983 if (cb->callout_data != NULL)
984 {
985 int callout_data = *((int *)(cb->callout_data));
986 if (callout_data != 0)
987 {
988 fprintf(outfile, "Callout data = %d\n", callout_data);
989 return callout_data;
990 }
991 }
992
993 return (cb->callout_number != callout_fail_id)? 0 :
994 (++callout_count >= callout_fail_count)? 1 : 0;
995 }
996
997
998 /*************************************************
999 * Local malloc functions *
1000 *************************************************/
1001
1002 /* Alternative malloc function, to test functionality and save the size of a
1003 compiled re, which is the first store request that pcre_compile() makes. The
1004 show_malloc variable is set only during matching. */
1005
1006 static void *new_malloc(size_t size)
1007 {
1008 void *block = malloc(size);
1009 gotten_store = size;
1010 if (first_gotten_store == 0) first_gotten_store = size;
1011 if (show_malloc)
1012 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1013 return block;
1014 }
1015
1016 static void new_free(void *block)
1017 {
1018 if (show_malloc)
1019 fprintf(outfile, "free %p\n", block);
1020 free(block);
1021 }
1022
1023 /* For recursion malloc/free, to test stacking calls */
1024
1025 static void *stack_malloc(size_t size)
1026 {
1027 void *block = malloc(size);
1028 if (show_malloc)
1029 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1030 return block;
1031 }
1032
1033 static void stack_free(void *block)
1034 {
1035 if (show_malloc)
1036 fprintf(outfile, "stack_free %p\n", block);
1037 free(block);
1038 }
1039
1040
1041 /*************************************************
1042 * Call pcre_fullinfo() *
1043 *************************************************/
1044
1045 /* Get one piece of information from the pcre_fullinfo() function */
1046
1047 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1048 {
1049 int rc;
1050 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1051 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1052 }
1053
1054
1055
1056 /*************************************************
1057 * Byte flipping function *
1058 *************************************************/
1059
1060 static unsigned long int
1061 byteflip(unsigned long int value, int n)
1062 {
1063 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1064 return ((value & 0x000000ff) << 24) |
1065 ((value & 0x0000ff00) << 8) |
1066 ((value & 0x00ff0000) >> 8) |
1067 ((value & 0xff000000) >> 24);
1068 }
1069
1070
1071
1072
1073 /*************************************************
1074 * Check match or recursion limit *
1075 *************************************************/
1076
1077 static int
1078 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1079 int start_offset, int options, int *use_offsets, int use_size_offsets,
1080 int flag, unsigned long int *limit, int errnumber, const char *msg)
1081 {
1082 int count;
1083 int min = 0;
1084 int mid = 64;
1085 int max = -1;
1086
1087 extra->flags |= flag;
1088
1089 for (;;)
1090 {
1091 *limit = mid;
1092
1093 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1094 use_offsets, use_size_offsets);
1095
1096 if (count == errnumber)
1097 {
1098 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099 min = mid;
1100 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1101 }
1102
1103 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1104 count == PCRE_ERROR_PARTIAL)
1105 {
1106 if (mid == min + 1)
1107 {
1108 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1109 break;
1110 }
1111 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1112 max = mid;
1113 mid = (min + mid)/2;
1114 }
1115 else break; /* Some other error */
1116 }
1117
1118 extra->flags &= ~flag;
1119 return count;
1120 }
1121
1122
1123
1124 /*************************************************
1125 * Case-independent strncmp() function *
1126 *************************************************/
1127
1128 /*
1129 Arguments:
1130 s first string
1131 t second string
1132 n number of characters to compare
1133
1134 Returns: < 0, = 0, or > 0, according to the comparison
1135 */
1136
1137 static int
1138 strncmpic(uschar *s, uschar *t, int n)
1139 {
1140 while (n--)
1141 {
1142 int c = tolower(*s++) - tolower(*t++);
1143 if (c) return c;
1144 }
1145 return 0;
1146 }
1147
1148
1149
1150 /*************************************************
1151 * Check newline indicator *
1152 *************************************************/
1153
1154 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1155 a message and return 0 if there is no match.
1156
1157 Arguments:
1158 p points after the leading '<'
1159 f file for error message
1160
1161 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1162 */
1163
1164 static int
1165 check_newline(uschar *p, FILE *f)
1166 {
1167 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1168 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1169 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1170 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1171 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1172 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1173 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1174 fprintf(f, "Unknown newline type at: <%s\n", p);
1175 return 0;
1176 }
1177
1178
1179
1180 /*************************************************
1181 * Usage function *
1182 *************************************************/
1183
1184 static void
1185 usage(void)
1186 {
1187 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1188 printf("Input and output default to stdin and stdout.\n");
1189 #ifdef SUPPORT_LIBREADLINE
1190 printf("If input is a terminal, readline() is used to read from it.\n");
1191 #else
1192 printf("This version of pcretest is not linked with readline().\n");
1193 #endif
1194 printf("\nOptions:\n");
1195 printf(" -b show compiled code (bytecode)\n");
1196 printf(" -C show PCRE compile-time options and exit\n");
1197 printf(" -d debug: show compiled code and information (-b and -i)\n");
1198 #if !defined NODFA
1199 printf(" -dfa force DFA matching for all subjects\n");
1200 #endif
1201 printf(" -help show usage information\n");
1202 printf(" -i show information about compiled patterns\n"
1203 " -M find MATCH_LIMIT minimum for each subject\n"
1204 " -m output memory used information\n"
1205 " -o <n> set size of offsets vector to <n>\n");
1206 #if !defined NOPOSIX
1207 printf(" -p use POSIX interface\n");
1208 #endif
1209 printf(" -q quiet: do not output PCRE version number at start\n");
1210 printf(" -S <n> set stack size to <n> megabytes\n");
1211 printf(" -s force each pattern to be studied at basic level\n"
1212 " -s+ force each pattern to be studied, using JIT if available\n"
1213 " -t time compilation and execution\n");
1214 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1215 printf(" -tm time execution (matching) only\n");
1216 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1217 }
1218
1219
1220
1221 /*************************************************
1222 * Main Program *
1223 *************************************************/
1224
1225 /* Read lines from named file or stdin and write to named file or stdout; lines
1226 consist of a regular expression, in delimiters and optionally followed by
1227 options, followed by a set of test data, terminated by an empty line. */
1228
1229 int main(int argc, char **argv)
1230 {
1231 FILE *infile = stdin;
1232 int options = 0;
1233 int study_options = 0;
1234 int default_find_match_limit = FALSE;
1235 int op = 1;
1236 int timeit = 0;
1237 int timeitm = 0;
1238 int showinfo = 0;
1239 int showstore = 0;
1240 int force_study = -1;
1241 int force_study_options = 0;
1242 int quiet = 0;
1243 int size_offsets = 45;
1244 int size_offsets_max;
1245 int *offsets = NULL;
1246 #if !defined NOPOSIX
1247 int posix = 0;
1248 #endif
1249 int debug = 0;
1250 int done = 0;
1251 int all_use_dfa = 0;
1252 int yield = 0;
1253 int stack_size;
1254
1255 pcre_jit_stack *jit_stack = NULL;
1256
1257
1258 /* These vectors store, end-to-end, a list of captured substring names. Assume
1259 that 1024 is plenty long enough for the few names we'll be testing. */
1260
1261 uschar copynames[1024];
1262 uschar getnames[1024];
1263
1264 uschar *copynamesptr;
1265 uschar *getnamesptr;
1266
1267 /* Get buffers from malloc() so that Electric Fence will check their misuse
1268 when I am debugging. They grow automatically when very long lines are read. */
1269
1270 buffer = (unsigned char *)malloc(buffer_size);
1271 dbuffer = (unsigned char *)malloc(buffer_size);
1272 pbuffer = (unsigned char *)malloc(buffer_size);
1273
1274 /* The outfile variable is static so that new_malloc can use it. */
1275
1276 outfile = stdout;
1277
1278 /* The following _setmode() stuff is some Windows magic that tells its runtime
1279 library to translate CRLF into a single LF character. At least, that's what
1280 I've been told: never having used Windows I take this all on trust. Originally
1281 it set 0x8000, but then I was advised that _O_BINARY was better. */
1282
1283 #if defined(_WIN32) || defined(WIN32)
1284 _setmode( _fileno( stdout ), _O_BINARY );
1285 #endif
1286
1287 /* Scan options */
1288
1289 while (argc > 1 && argv[op][0] == '-')
1290 {
1291 unsigned char *endptr;
1292
1293 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1294 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1295 else if (strcmp(argv[op], "-s+") == 0)
1296 {
1297 force_study = 1;
1298 force_study_options = PCRE_STUDY_JIT_COMPILE;
1299 }
1300 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1301 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1302 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1303 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1304 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1305 #if !defined NODFA
1306 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1307 #endif
1308 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1309 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1310 *endptr == 0))
1311 {
1312 op++;
1313 argc--;
1314 }
1315 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1316 {
1317 int both = argv[op][2] == 0;
1318 int temp;
1319 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1320 *endptr == 0))
1321 {
1322 timeitm = temp;
1323 op++;
1324 argc--;
1325 }
1326 else timeitm = LOOPREPEAT;
1327 if (both) timeit = timeitm;
1328 }
1329 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1330 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1331 *endptr == 0))
1332 {
1333 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1334 printf("PCRE: -S not supported on this OS\n");
1335 exit(1);
1336 #else
1337 int rc;
1338 struct rlimit rlim;
1339 getrlimit(RLIMIT_STACK, &rlim);
1340 rlim.rlim_cur = stack_size * 1024 * 1024;
1341 rc = setrlimit(RLIMIT_STACK, &rlim);
1342 if (rc != 0)
1343 {
1344 printf("PCRE: setrlimit() failed with error %d\n", rc);
1345 exit(1);
1346 }
1347 op++;
1348 argc--;
1349 #endif
1350 }
1351 #if !defined NOPOSIX
1352 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1353 #endif
1354 else if (strcmp(argv[op], "-C") == 0)
1355 {
1356 int rc;
1357 unsigned long int lrc;
1358 printf("PCRE version %s\n", pcre_version());
1359 printf("Compiled with\n");
1360 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1361 printf(" %sUTF-8 support\n", rc? "" : "No ");
1362 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1363 printf(" %sUnicode properties support\n", rc? "" : "No ");
1364 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1365 if (rc)
1366 printf(" Just-in-time compiler support\n");
1367 else
1368 printf(" No just-in-time compiler support\n");
1369 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1370 /* Note that these values are always the ASCII values, even
1371 in EBCDIC environments. CR is 13 and NL is 10. */
1372 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1373 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1374 (rc == -2)? "ANYCRLF" :
1375 (rc == -1)? "ANY" : "???");
1376 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1377 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1378 "all Unicode newlines");
1379 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1380 printf(" Internal link size = %d\n", rc);
1381 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1382 printf(" POSIX malloc threshold = %d\n", rc);
1383 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1384 printf(" Default match limit = %ld\n", lrc);
1385 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1386 printf(" Default recursion depth limit = %ld\n", lrc);
1387 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1388 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1389 goto EXIT;
1390 }
1391 else if (strcmp(argv[op], "-help") == 0 ||
1392 strcmp(argv[op], "--help") == 0)
1393 {
1394 usage();
1395 goto EXIT;
1396 }
1397 else
1398 {
1399 printf("** Unknown or malformed option %s\n", argv[op]);
1400 usage();
1401 yield = 1;
1402 goto EXIT;
1403 }
1404 op++;
1405 argc--;
1406 }
1407
1408 /* Get the store for the offsets vector, and remember what it was */
1409
1410 size_offsets_max = size_offsets;
1411 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1412 if (offsets == NULL)
1413 {
1414 printf("** Failed to get %d bytes of memory for offsets vector\n",
1415 (int)(size_offsets_max * sizeof(int)));
1416 yield = 1;
1417 goto EXIT;
1418 }
1419
1420 /* Sort out the input and output files */
1421
1422 if (argc > 1)
1423 {
1424 infile = fopen(argv[op], INPUT_MODE);
1425 if (infile == NULL)
1426 {
1427 printf("** Failed to open %s\n", argv[op]);
1428 yield = 1;
1429 goto EXIT;
1430 }
1431 }
1432
1433 if (argc > 2)
1434 {
1435 outfile = fopen(argv[op+1], OUTPUT_MODE);
1436 if (outfile == NULL)
1437 {
1438 printf("** Failed to open %s\n", argv[op+1]);
1439 yield = 1;
1440 goto EXIT;
1441 }
1442 }
1443
1444 /* Set alternative malloc function */
1445
1446 pcre_malloc = new_malloc;
1447 pcre_free = new_free;
1448 pcre_stack_malloc = stack_malloc;
1449 pcre_stack_free = stack_free;
1450
1451 /* Heading line unless quiet, then prompt for first regex if stdin */
1452
1453 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1454
1455 /* Main loop */
1456
1457 while (!done)
1458 {
1459 pcre *re = NULL;
1460 pcre_extra *extra = NULL;
1461
1462 #if !defined NOPOSIX /* There are still compilers that require no indent */
1463 regex_t preg;
1464 int do_posix = 0;
1465 #endif
1466
1467 const char *error;
1468 unsigned char *markptr;
1469 unsigned char *p, *pp, *ppp;
1470 unsigned char *to_file = NULL;
1471 const unsigned char *tables = NULL;
1472 unsigned long int true_size, true_study_size = 0;
1473 size_t size, regex_gotten_store;
1474 int do_allcaps = 0;
1475 int do_mark = 0;
1476 int do_study = 0;
1477 int no_force_study = 0;
1478 int do_debug = debug;
1479 int do_G = 0;
1480 int do_g = 0;
1481 int do_showinfo = showinfo;
1482 int do_showrest = 0;
1483 int do_showcaprest = 0;
1484 int do_flip = 0;
1485 int erroroffset, len, delimiter, poffset;
1486
1487 use_utf8 = 0;
1488 debug_lengths = 1;
1489
1490 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1491 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1492 fflush(outfile);
1493
1494 p = buffer;
1495 while (isspace(*p)) p++;
1496 if (*p == 0) continue;
1497
1498 /* See if the pattern is to be loaded pre-compiled from a file. */
1499
1500 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1501 {
1502 unsigned long int magic, get_options;
1503 uschar sbuf[8];
1504 FILE *f;
1505
1506 p++;
1507 pp = p + (int)strlen((char *)p);
1508 while (isspace(pp[-1])) pp--;
1509 *pp = 0;
1510
1511 f = fopen((char *)p, "rb");
1512 if (f == NULL)
1513 {
1514 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1515 continue;
1516 }
1517
1518 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1519
1520 true_size =
1521 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1522 true_study_size =
1523 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1524
1525 re = (real_pcre *)new_malloc(true_size);
1526 regex_gotten_store = first_gotten_store;
1527
1528 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1529
1530 magic = ((real_pcre *)re)->magic_number;
1531 if (magic != MAGIC_NUMBER)
1532 {
1533 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1534 {
1535 do_flip = 1;
1536 }
1537 else
1538 {
1539 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1540 fclose(f);
1541 continue;
1542 }
1543 }
1544
1545 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1546 do_flip? " (byte-inverted)" : "", p);
1547
1548 /* Need to know if UTF-8 for printing data strings */
1549
1550 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1551 use_utf8 = (get_options & PCRE_UTF8) != 0;
1552
1553 /* Now see if there is any following study data. */
1554
1555 if (true_study_size != 0)
1556 {
1557 pcre_study_data *psd;
1558
1559 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1560 extra->flags = PCRE_EXTRA_STUDY_DATA;
1561
1562 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1563 extra->study_data = psd;
1564
1565 if (fread(psd, 1, true_study_size, f) != true_study_size)
1566 {
1567 FAIL_READ:
1568 fprintf(outfile, "Failed to read data from %s\n", p);
1569 if (extra != NULL) pcre_free_study(extra);
1570 if (re != NULL) new_free(re);
1571 fclose(f);
1572 continue;
1573 }
1574 fprintf(outfile, "Study data loaded from %s\n", p);
1575 do_study = 1; /* To get the data output if requested */
1576 }
1577 else fprintf(outfile, "No study data\n");
1578
1579 fclose(f);
1580 goto SHOW_INFO;
1581 }
1582
1583 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1584 the pattern; if is isn't complete, read more. */
1585
1586 delimiter = *p++;
1587
1588 if (isalnum(delimiter) || delimiter == '\\')
1589 {
1590 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1591 goto SKIP_DATA;
1592 }
1593
1594 pp = p;
1595 poffset = (int)(p - buffer);
1596
1597 for(;;)
1598 {
1599 while (*pp != 0)
1600 {
1601 if (*pp == '\\' && pp[1] != 0) pp++;
1602 else if (*pp == delimiter) break;
1603 pp++;
1604 }
1605 if (*pp != 0) break;
1606 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1607 {
1608 fprintf(outfile, "** Unexpected EOF\n");
1609 done = 1;
1610 goto CONTINUE;
1611 }
1612 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1613 }
1614
1615 /* The buffer may have moved while being extended; reset the start of data
1616 pointer to the correct relative point in the buffer. */
1617
1618 p = buffer + poffset;
1619
1620 /* If the first character after the delimiter is backslash, make
1621 the pattern end with backslash. This is purely to provide a way
1622 of testing for the error message when a pattern ends with backslash. */
1623
1624 if (pp[1] == '\\') *pp++ = '\\';
1625
1626 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1627 for callouts. */
1628
1629 *pp++ = 0;
1630 strcpy((char *)pbuffer, (char *)p);
1631
1632 /* Look for options after final delimiter */
1633
1634 options = 0;
1635 log_store = showstore; /* default from command line */
1636
1637 while (*pp != 0)
1638 {
1639 switch (*pp++)
1640 {
1641 case 'f': options |= PCRE_FIRSTLINE; break;
1642 case 'g': do_g = 1; break;
1643 case 'i': options |= PCRE_CASELESS; break;
1644 case 'm': options |= PCRE_MULTILINE; break;
1645 case 's': options |= PCRE_DOTALL; break;
1646 case 'x': options |= PCRE_EXTENDED; break;
1647
1648 case '+':
1649 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1650 break;
1651
1652 case '=': do_allcaps = 1; break;
1653 case 'A': options |= PCRE_ANCHORED; break;
1654 case 'B': do_debug = 1; break;
1655 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1656 case 'D': do_debug = do_showinfo = 1; break;
1657 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1658 case 'F': do_flip = 1; break;
1659 case 'G': do_G = 1; break;
1660 case 'I': do_showinfo = 1; break;
1661 case 'J': options |= PCRE_DUPNAMES; break;
1662 case 'K': do_mark = 1; break;
1663 case 'M': log_store = 1; break;
1664 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1665
1666 #if !defined NOPOSIX
1667 case 'P': do_posix = 1; break;
1668 #endif
1669
1670 case 'S':
1671 if (do_study == 0)
1672 {
1673 do_study = 1;
1674 if (*pp == '+')
1675 {
1676 study_options |= PCRE_STUDY_JIT_COMPILE;
1677 pp++;
1678 }
1679 }
1680 else
1681 {
1682 do_study = 0;
1683 no_force_study = 1;
1684 }
1685 break;
1686
1687 case 'U': options |= PCRE_UNGREEDY; break;
1688 case 'W': options |= PCRE_UCP; break;
1689 case 'X': options |= PCRE_EXTRA; break;
1690 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1691 case 'Z': debug_lengths = 0; break;
1692 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1693 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1694
1695 case 'T':
1696 switch (*pp++)
1697 {
1698 case '0': tables = tables0; break;
1699 case '1': tables = tables1; break;
1700
1701 case '\r':
1702 case '\n':
1703 case ' ':
1704 case 0:
1705 fprintf(outfile, "** Missing table number after /T\n");
1706 goto SKIP_DATA;
1707
1708 default:
1709 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1710 goto SKIP_DATA;
1711 }
1712 break;
1713
1714 case 'L':
1715 ppp = pp;
1716 /* The '\r' test here is so that it works on Windows. */
1717 /* The '0' test is just in case this is an unterminated line. */
1718 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1719 *ppp = 0;
1720 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1721 {
1722 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1723 goto SKIP_DATA;
1724 }
1725 locale_set = 1;
1726 tables = pcre_maketables();
1727 pp = ppp;
1728 break;
1729
1730 case '>':
1731 to_file = pp;
1732 while (*pp != 0) pp++;
1733 while (isspace(pp[-1])) pp--;
1734 *pp = 0;
1735 break;
1736
1737 case '<':
1738 {
1739 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1740 {
1741 options |= PCRE_JAVASCRIPT_COMPAT;
1742 pp += 3;
1743 }
1744 else
1745 {
1746 int x = check_newline(pp, outfile);
1747 if (x == 0) goto SKIP_DATA;
1748 options |= x;
1749 while (*pp++ != '>');
1750 }
1751 }
1752 break;
1753
1754 case '\r': /* So that it works in Windows */
1755 case '\n':
1756 case ' ':
1757 break;
1758
1759 default:
1760 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1761 goto SKIP_DATA;
1762 }
1763 }
1764
1765 /* Handle compiling via the POSIX interface, which doesn't support the
1766 timing, showing, or debugging options, nor the ability to pass over
1767 local character tables. */
1768
1769 #if !defined NOPOSIX
1770 if (posix || do_posix)
1771 {
1772 int rc;
1773 int cflags = 0;
1774
1775 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1776 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1777 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1778 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1779 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1780 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1781 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1782
1783 first_gotten_store = 0;
1784 rc = regcomp(&preg, (char *)p, cflags);
1785
1786 /* Compilation failed; go back for another re, skipping to blank line
1787 if non-interactive. */
1788
1789 if (rc != 0)
1790 {
1791 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1792 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1793 goto SKIP_DATA;
1794 }
1795 }
1796
1797 /* Handle compiling via the native interface */
1798
1799 else
1800 #endif /* !defined NOPOSIX */
1801
1802 {
1803 unsigned long int get_options;
1804
1805 if (timeit > 0)
1806 {
1807 register int i;
1808 clock_t time_taken;
1809 clock_t start_time = clock();
1810 for (i = 0; i < timeit; i++)
1811 {
1812 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1813 if (re != NULL) free(re);
1814 }
1815 time_taken = clock() - start_time;
1816 fprintf(outfile, "Compile time %.4f milliseconds\n",
1817 (((double)time_taken * 1000.0) / (double)timeit) /
1818 (double)CLOCKS_PER_SEC);
1819 }
1820
1821 first_gotten_store = 0;
1822 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1823
1824 /* Compilation failed; go back for another re, skipping to blank line
1825 if non-interactive. */
1826
1827 if (re == NULL)
1828 {
1829 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1830 SKIP_DATA:
1831 if (infile != stdin)
1832 {
1833 for (;;)
1834 {
1835 if (extend_inputline(infile, buffer, NULL) == NULL)
1836 {
1837 done = 1;
1838 goto CONTINUE;
1839 }
1840 len = (int)strlen((char *)buffer);
1841 while (len > 0 && isspace(buffer[len-1])) len--;
1842 if (len == 0) break;
1843 }
1844 fprintf(outfile, "\n");
1845 }
1846 goto CONTINUE;
1847 }
1848
1849 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1850 within the regex; check for this so that we know how to process the data
1851 lines. */
1852
1853 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1854 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1855
1856 /* Print information if required. There are now two info-returning
1857 functions. The old one has a limited interface and returns only limited
1858 data. Check that it agrees with the newer one. */
1859
1860 if (log_store)
1861 fprintf(outfile, "Memory allocation (code space): %d\n",
1862 (int)(first_gotten_store -
1863 sizeof(real_pcre) -
1864 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1865
1866 /* Extract the size for possible writing before possibly flipping it,
1867 and remember the store that was got. */
1868
1869 true_size = ((real_pcre *)re)->size;
1870 regex_gotten_store = first_gotten_store;
1871
1872 /* If -s or /S was present, study the regex to generate additional info to
1873 help with the matching, unless the pattern has the SS option, which
1874 suppresses the effect of /S (used for a few test patterns where studying is
1875 never sensible). */
1876
1877 if (do_study || (force_study >= 0 && !no_force_study))
1878 {
1879 if (timeit > 0)
1880 {
1881 register int i;
1882 clock_t time_taken;
1883 clock_t start_time = clock();
1884 for (i = 0; i < timeit; i++)
1885 extra = pcre_study(re, study_options | force_study_options, &error);
1886 time_taken = clock() - start_time;
1887 if (extra != NULL) pcre_free_study(extra);
1888 fprintf(outfile, " Study time %.4f milliseconds\n",
1889 (((double)time_taken * 1000.0) / (double)timeit) /
1890 (double)CLOCKS_PER_SEC);
1891 }
1892 extra = pcre_study(re, study_options | force_study_options, &error);
1893 if (error != NULL)
1894 fprintf(outfile, "Failed to study: %s\n", error);
1895 else if (extra != NULL)
1896 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1897 }
1898
1899 /* If /K was present, we set up for handling MARK data. */
1900
1901 if (do_mark)
1902 {
1903 if (extra == NULL)
1904 {
1905 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1906 extra->flags = 0;
1907 }
1908 extra->mark = &markptr;
1909 extra->flags |= PCRE_EXTRA_MARK;
1910 }
1911
1912 /* If the 'F' option was present, we flip the bytes of all the integer
1913 fields in the regex data block and the study block. This is to make it
1914 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1915 compiled on a different architecture. */
1916
1917 if (do_flip)
1918 {
1919 real_pcre *rre = (real_pcre *)re;
1920 rre->magic_number =
1921 byteflip(rre->magic_number, sizeof(rre->magic_number));
1922 rre->size = byteflip(rre->size, sizeof(rre->size));
1923 rre->options = byteflip(rre->options, sizeof(rre->options));
1924 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1925 rre->top_bracket =
1926 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1927 rre->top_backref =
1928 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1929 rre->first_byte =
1930 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1931 rre->req_byte =
1932 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1933 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1934 sizeof(rre->name_table_offset));
1935 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1936 sizeof(rre->name_entry_size));
1937 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1938 sizeof(rre->name_count));
1939
1940 if (extra != NULL)
1941 {
1942 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1943 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1944 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1945 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1946 }
1947 }
1948
1949 /* Extract information from the compiled data if required */
1950
1951 SHOW_INFO:
1952
1953 if (do_debug)
1954 {
1955 fprintf(outfile, "------------------------------------------------------------------\n");
1956 pcre_printint(re, outfile, debug_lengths);
1957 }
1958
1959 /* We already have the options in get_options (see above) */
1960
1961 if (do_showinfo)
1962 {
1963 unsigned long int all_options;
1964 #if !defined NOINFOCHECK
1965 int old_first_char, old_options, old_count;
1966 #endif
1967 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1968 hascrorlf;
1969 int nameentrysize, namecount;
1970 const uschar *nametable;
1971
1972 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1973 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1974 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1975 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1976 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1977 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1978 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1979 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1980 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1981 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1982 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1983
1984 #if !defined NOINFOCHECK
1985 old_count = pcre_info(re, &old_options, &old_first_char);
1986 if (count < 0) fprintf(outfile,
1987 "Error %d from pcre_info()\n", count);
1988 else
1989 {
1990 if (old_count != count) fprintf(outfile,
1991 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1992 old_count);
1993
1994 if (old_first_char != first_char) fprintf(outfile,
1995 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1996 first_char, old_first_char);
1997
1998 if (old_options != (int)get_options) fprintf(outfile,
1999 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2000 get_options, old_options);
2001 }
2002 #endif
2003
2004 if (size != regex_gotten_store) fprintf(outfile,
2005 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2006 (int)size, (int)regex_gotten_store);
2007
2008 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2009 if (backrefmax > 0)
2010 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2011
2012 if (namecount > 0)
2013 {
2014 fprintf(outfile, "Named capturing subpatterns:\n");
2015 while (namecount-- > 0)
2016 {
2017 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2018 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2019 GET2(nametable, 0));
2020 nametable += nameentrysize;
2021 }
2022 }
2023
2024 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2025 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2026
2027 all_options = ((real_pcre *)re)->options;
2028 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2029
2030 if (get_options == 0) fprintf(outfile, "No options\n");
2031 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2032 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2033 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2034 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2035 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2036 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2037 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2038 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2039 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2040 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2041 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2042 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2043 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2044 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2045 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2046 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2047 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2048 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2049
2050 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2051
2052 switch (get_options & PCRE_NEWLINE_BITS)
2053 {
2054 case PCRE_NEWLINE_CR:
2055 fprintf(outfile, "Forced newline sequence: CR\n");
2056 break;
2057
2058 case PCRE_NEWLINE_LF:
2059 fprintf(outfile, "Forced newline sequence: LF\n");
2060 break;
2061
2062 case PCRE_NEWLINE_CRLF:
2063 fprintf(outfile, "Forced newline sequence: CRLF\n");
2064 break;
2065
2066 case PCRE_NEWLINE_ANYCRLF:
2067 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2068 break;
2069
2070 case PCRE_NEWLINE_ANY:
2071 fprintf(outfile, "Forced newline sequence: ANY\n");
2072 break;
2073
2074 default:
2075 break;
2076 }
2077
2078 if (first_char == -1)
2079 {
2080 fprintf(outfile, "First char at start or follows newline\n");
2081 }
2082 else if (first_char < 0)
2083 {
2084 fprintf(outfile, "No first char\n");
2085 }
2086 else
2087 {
2088 int ch = first_char & 255;
2089 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2090 "" : " (caseless)";
2091 if (PRINTHEX(ch))
2092 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2093 else
2094 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2095 }
2096
2097 if (need_char < 0)
2098 {
2099 fprintf(outfile, "No need char\n");
2100 }
2101 else
2102 {
2103 int ch = need_char & 255;
2104 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2105 "" : " (caseless)";
2106 if (PRINTHEX(ch))
2107 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2108 else
2109 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2110 }
2111
2112 /* Don't output study size; at present it is in any case a fixed
2113 value, but it varies, depending on the computer architecture, and
2114 so messes up the test suite. (And with the /F option, it might be
2115 flipped.) If study was forced by an external -s, don't show this
2116 information unless -i or -d was also present. This means that, except
2117 when auto-callouts are involved, the output from runs with and without
2118 -s should be identical. */
2119
2120 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2121 {
2122 if (extra == NULL)
2123 fprintf(outfile, "Study returned NULL\n");
2124 else
2125 {
2126 uschar *start_bits = NULL;
2127 int minlength;
2128
2129 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2130 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2131
2132 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2133 if (start_bits == NULL)
2134 fprintf(outfile, "No set of starting bytes\n");
2135 else
2136 {
2137 int i;
2138 int c = 24;
2139 fprintf(outfile, "Starting byte set: ");
2140 for (i = 0; i < 256; i++)
2141 {
2142 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2143 {
2144 if (c > 75)
2145 {
2146 fprintf(outfile, "\n ");
2147 c = 2;
2148 }
2149 if (PRINTHEX(i) && i != ' ')
2150 {
2151 fprintf(outfile, "%c ", i);
2152 c += 2;
2153 }
2154 else
2155 {
2156 fprintf(outfile, "\\x%02x ", i);
2157 c += 5;
2158 }
2159 }
2160 }
2161 fprintf(outfile, "\n");
2162 }
2163 }
2164
2165 /* Show this only if the JIT was set by /S, not by -s. */
2166
2167 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2168 {
2169 int jit;
2170 new_info(re, extra, PCRE_INFO_JIT, &jit);
2171 if (jit)
2172 fprintf(outfile, "JIT study was successful\n");
2173 else
2174 #ifdef SUPPORT_JIT
2175 fprintf(outfile, "JIT study was not successful\n");
2176 #else
2177 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2178 #endif
2179 }
2180 }
2181 }
2182
2183 /* If the '>' option was present, we write out the regex to a file, and
2184 that is all. The first 8 bytes of the file are the regex length and then
2185 the study length, in big-endian order. */
2186
2187 if (to_file != NULL)
2188 {
2189 FILE *f = fopen((char *)to_file, "wb");
2190 if (f == NULL)
2191 {
2192 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2193 }
2194 else
2195 {
2196 uschar sbuf[8];
2197 sbuf[0] = (uschar)((true_size >> 24) & 255);
2198 sbuf[1] = (uschar)((true_size >> 16) & 255);
2199 sbuf[2] = (uschar)((true_size >> 8) & 255);
2200 sbuf[3] = (uschar)((true_size) & 255);
2201
2202 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2203 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2204 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2205 sbuf[7] = (uschar)((true_study_size) & 255);
2206
2207 if (fwrite(sbuf, 1, 8, f) < 8 ||
2208 fwrite(re, 1, true_size, f) < true_size)
2209 {
2210 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2211 }
2212 else
2213 {
2214 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2215
2216 /* If there is study data, write it. */
2217
2218 if (extra != NULL)
2219 {
2220 if (fwrite(extra->study_data, 1, true_study_size, f) <
2221 true_study_size)
2222 {
2223 fprintf(outfile, "Write error on %s: %s\n", to_file,
2224 strerror(errno));
2225 }
2226 else fprintf(outfile, "Study data written to %s\n", to_file);
2227 }
2228 }
2229 fclose(f);
2230 }
2231
2232 new_free(re);
2233 if (extra != NULL) pcre_free_study(extra);
2234 if (locale_set)
2235 {
2236 new_free((void *)tables);
2237 setlocale(LC_CTYPE, "C");
2238 locale_set = 0;
2239 }
2240 continue; /* With next regex */
2241 }
2242 } /* End of non-POSIX compile */
2243
2244 /* Read data lines and test them */
2245
2246 for (;;)
2247 {
2248 uschar *q;
2249 uschar *bptr;
2250 int *use_offsets = offsets;
2251 int use_size_offsets = size_offsets;
2252 int callout_data = 0;
2253 int callout_data_set = 0;
2254 int count, c;
2255 int copystrings = 0;
2256 int find_match_limit = default_find_match_limit;
2257 int getstrings = 0;
2258 int getlist = 0;
2259 int gmatched = 0;
2260 int start_offset = 0;
2261 int start_offset_sign = 1;
2262 int g_notempty = 0;
2263 int use_dfa = 0;
2264
2265 options = 0;
2266
2267 *copynames = 0;
2268 *getnames = 0;
2269
2270 copynamesptr = copynames;
2271 getnamesptr = getnames;
2272
2273 pcre_callout = callout;
2274 first_callout = 1;
2275 last_callout_mark = NULL;
2276 callout_extra = 0;
2277 callout_count = 0;
2278 callout_fail_count = 999999;
2279 callout_fail_id = -1;
2280 show_malloc = 0;
2281
2282 if (extra != NULL) extra->flags &=
2283 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2284
2285 len = 0;
2286 for (;;)
2287 {
2288 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2289 {
2290 if (len > 0) /* Reached EOF without hitting a newline */
2291 {
2292 fprintf(outfile, "\n");
2293 break;
2294 }
2295 done = 1;
2296 goto CONTINUE;
2297 }
2298 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2299 len = (int)strlen((char *)buffer);
2300 if (buffer[len-1] == '\n') break;
2301 }
2302
2303 while (len > 0 && isspace(buffer[len-1])) len--;
2304 buffer[len] = 0;
2305 if (len == 0) break;
2306
2307 p = buffer;
2308 while (isspace(*p)) p++;
2309
2310 bptr = q = dbuffer;
2311 while ((c = *p++) != 0)
2312 {
2313 int i = 0;
2314 int n = 0;
2315
2316 if (c == '\\') switch ((c = *p++))
2317 {
2318 case 'a': c = 7; break;
2319 case 'b': c = '\b'; break;
2320 case 'e': c = 27; break;
2321 case 'f': c = '\f'; break;
2322 case 'n': c = '\n'; break;
2323 case 'r': c = '\r'; break;
2324 case 't': c = '\t'; break;
2325 case 'v': c = '\v'; break;
2326
2327 case '0': case '1': case '2': case '3':
2328 case '4': case '5': case '6': case '7':
2329 c -= '0';
2330 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2331 c = c * 8 + *p++ - '0';
2332
2333 #if !defined NOUTF8
2334 if (use_utf8 && c > 255)
2335 {
2336 unsigned char buff8[8];
2337 int ii, utn;
2338 utn = ord2utf8(c, buff8);
2339 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2340 c = buff8[ii]; /* Last byte */
2341 }
2342 #endif
2343 break;
2344
2345 case 'x':
2346
2347 /* Handle \x{..} specially - new Perl thing for utf8 */
2348
2349 #if !defined NOUTF8
2350 if (*p == '{')
2351 {
2352 unsigned char *pt = p;
2353 c = 0;
2354
2355 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2356 when isxdigit() is a macro that refers to its argument more than
2357 once. This is banned by the C Standard, but apparently happens in at
2358 least one MacOS environment. */
2359
2360 for (pt++; isxdigit(*pt); pt++)
2361 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2362 if (*pt == '}')
2363 {
2364 unsigned char buff8[8];
2365 int ii, utn;
2366 if (use_utf8)
2367 {
2368 utn = ord2utf8(c, buff8);
2369 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2370 c = buff8[ii]; /* Last byte */
2371 }
2372 else
2373 {
2374 if (c > 255)
2375 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2376 "UTF-8 mode is not enabled.\n"
2377 "** Truncation will probably give the wrong result.\n", c);
2378 }
2379 p = pt + 1;
2380 break;
2381 }
2382 /* Not correct form; fall through */
2383 }
2384 #endif
2385
2386 /* Ordinary \x */
2387
2388 c = 0;
2389 while (i++ < 2 && isxdigit(*p))
2390 {
2391 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2392 p++;
2393 }
2394 break;
2395
2396 case 0: /* \ followed by EOF allows for an empty line */
2397 p--;
2398 continue;
2399
2400 case '>':
2401 if (*p == '-')
2402 {
2403 start_offset_sign = -1;
2404 p++;
2405 }
2406 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2407 start_offset *= start_offset_sign;
2408 continue;
2409
2410 case 'A': /* Option setting */
2411 options |= PCRE_ANCHORED;
2412 continue;
2413
2414 case 'B':
2415 options |= PCRE_NOTBOL;
2416 continue;
2417
2418 case 'C':
2419 if (isdigit(*p)) /* Set copy string */
2420 {
2421 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2422 copystrings |= 1 << n;
2423 }
2424 else if (isalnum(*p))
2425 {
2426 uschar *npp = copynamesptr;
2427 while (isalnum(*p)) *npp++ = *p++;
2428 *npp++ = 0;
2429 *npp = 0;
2430 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2431 if (n < 0)
2432 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2433 copynamesptr = npp;
2434 }
2435 else if (*p == '+')
2436 {
2437 callout_extra = 1;
2438 p++;
2439 }
2440 else if (*p == '-')
2441 {
2442 pcre_callout = NULL;
2443 p++;
2444 }
2445 else if (*p == '!')
2446 {
2447 callout_fail_id = 0;
2448 p++;
2449 while(isdigit(*p))
2450 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2451 callout_fail_count = 0;
2452 if (*p == '!')
2453 {
2454 p++;
2455 while(isdigit(*p))
2456 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2457 }
2458 }
2459 else if (*p == '*')
2460 {
2461 int sign = 1;
2462 callout_data = 0;
2463 if (*(++p) == '-') { sign = -1; p++; }
2464 while(isdigit(*p))
2465 callout_data = callout_data * 10 + *p++ - '0';
2466 callout_data *= sign;
2467 callout_data_set = 1;
2468 }
2469 continue;
2470
2471 #if !defined NODFA
2472 case 'D':
2473 #if !defined NOPOSIX
2474 if (posix || do_posix)
2475 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2476 else
2477 #endif
2478 use_dfa = 1;
2479 continue;
2480 #endif
2481
2482 #if !defined NODFA
2483 case 'F':
2484 options |= PCRE_DFA_SHORTEST;
2485 continue;
2486 #endif
2487
2488 case 'G':
2489 if (isdigit(*p))
2490 {
2491 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2492 getstrings |= 1 << n;
2493 }
2494 else if (isalnum(*p))
2495 {
2496 uschar *npp = getnamesptr;
2497 while (isalnum(*p)) *npp++ = *p++;
2498 *npp++ = 0;
2499 *npp = 0;
2500 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2501 if (n < 0)
2502 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2503 getnamesptr = npp;
2504 }
2505 continue;
2506
2507 case 'J':
2508 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2509 if (extra != NULL
2510 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2511 && extra->executable_jit != NULL)
2512 {
2513 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2514 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2515 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2516 }
2517 continue;
2518
2519 case 'L':
2520 getlist = 1;
2521 continue;
2522
2523 case 'M':
2524 find_match_limit = 1;
2525 continue;
2526
2527 case 'N':
2528 if ((options & PCRE_NOTEMPTY) != 0)
2529 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2530 else
2531 options |= PCRE_NOTEMPTY;
2532 continue;
2533
2534 case 'O':
2535 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2536 if (n > size_offsets_max)
2537 {
2538 size_offsets_max = n;
2539 free(offsets);
2540 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2541 if (offsets == NULL)
2542 {
2543 printf("** Failed to get %d bytes of memory for offsets vector\n",
2544 (int)(size_offsets_max * sizeof(int)));
2545 yield = 1;
2546 goto EXIT;
2547 }
2548 }
2549 use_size_offsets = n;
2550 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2551 continue;
2552
2553 case 'P':
2554 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2555 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2556 continue;
2557
2558 case 'Q':
2559 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2560 if (extra == NULL)
2561 {
2562 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2563 extra->flags = 0;
2564 }
2565 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2566 extra->match_limit_recursion = n;
2567 continue;
2568
2569 case 'q':
2570 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2571 if (extra == NULL)
2572 {
2573 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2574 extra->flags = 0;
2575 }
2576 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2577 extra->match_limit = n;
2578 continue;
2579
2580 #if !defined NODFA
2581 case 'R':
2582 options |= PCRE_DFA_RESTART;
2583 continue;
2584 #endif
2585
2586 case 'S':
2587 show_malloc = 1;
2588 continue;
2589
2590 case 'Y':
2591 options |= PCRE_NO_START_OPTIMIZE;
2592 continue;
2593
2594 case 'Z':
2595 options |= PCRE_NOTEOL;
2596 continue;
2597
2598 case '?':
2599 options |= PCRE_NO_UTF8_CHECK;
2600 continue;
2601
2602 case '<':
2603 {
2604 int x = check_newline(p, outfile);
2605 if (x == 0) goto NEXT_DATA;
2606 options |= x;
2607 while (*p++ != '>');
2608 }
2609 continue;
2610 }
2611 *q++ = c;
2612 }
2613 *q = 0;
2614 len = (int)(q - dbuffer);
2615
2616 /* Move the data to the end of the buffer so that a read over the end of
2617 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2618 we are using the POSIX interface, we must include the terminating zero. */
2619
2620 #if !defined NOPOSIX
2621 if (posix || do_posix)
2622 {
2623 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2624 bptr += buffer_size - len - 1;
2625 }
2626 else
2627 #endif
2628 {
2629 memmove(bptr + buffer_size - len, bptr, len);
2630 bptr += buffer_size - len;
2631 }
2632
2633 if ((all_use_dfa || use_dfa) && find_match_limit)
2634 {
2635 printf("**Match limit not relevant for DFA matching: ignored\n");
2636 find_match_limit = 0;
2637 }
2638
2639 /* Handle matching via the POSIX interface, which does not
2640 support timing or playing with the match limit or callout data. */
2641
2642 #if !defined NOPOSIX
2643 if (posix || do_posix)
2644 {
2645 int rc;
2646 int eflags = 0;
2647 regmatch_t *pmatch = NULL;
2648 if (use_size_offsets > 0)
2649 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2650 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2651 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2652 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2653
2654 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2655
2656 if (rc != 0)
2657 {
2658 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2659 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2660 }
2661 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2662 != 0)
2663 {
2664 fprintf(outfile, "Matched with REG_NOSUB\n");
2665 }
2666 else
2667 {
2668 size_t i;
2669 for (i = 0; i < (size_t)use_size_offsets; i++)
2670 {
2671 if (pmatch[i].rm_so >= 0)
2672 {
2673 fprintf(outfile, "%2d: ", (int)i);
2674 (void)pchars(dbuffer + pmatch[i].rm_so,
2675 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2676 fprintf(outfile, "\n");
2677 if (do_showcaprest || (i == 0 && do_showrest))
2678 {
2679 fprintf(outfile, "%2d+ ", (int)i);
2680 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2681 outfile);
2682 fprintf(outfile, "\n");
2683 }
2684 }
2685 }
2686 }
2687 free(pmatch);
2688 }
2689
2690 /* Handle matching via the native interface - repeats for /g and /G */
2691
2692 else
2693 #endif /* !defined NOPOSIX */
2694
2695 for (;; gmatched++) /* Loop for /g or /G */
2696 {
2697 markptr = NULL;
2698
2699 if (timeitm > 0)
2700 {
2701 register int i;
2702 clock_t time_taken;
2703 clock_t start_time = clock();
2704
2705 #if !defined NODFA
2706 if (all_use_dfa || use_dfa)
2707 {
2708 int workspace[1000];
2709 for (i = 0; i < timeitm; i++)
2710 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2711 options | g_notempty, use_offsets, use_size_offsets, workspace,
2712 sizeof(workspace)/sizeof(int));
2713 }
2714 else
2715 #endif
2716
2717 for (i = 0; i < timeitm; i++)
2718 count = pcre_exec(re, extra, (char *)bptr, len,
2719 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2720
2721 time_taken = clock() - start_time;
2722 fprintf(outfile, "Execute time %.4f milliseconds\n",
2723 (((double)time_taken * 1000.0) / (double)timeitm) /
2724 (double)CLOCKS_PER_SEC);
2725 }
2726
2727 /* If find_match_limit is set, we want to do repeated matches with
2728 varying limits in order to find the minimum value for the match limit and
2729 for the recursion limit. The match limits are relevant only to the normal
2730 running of pcre_exec(), so disable the JIT optimization. This makes it
2731 possible to run the same set of tests with and without JIT externally
2732 requested. */
2733
2734 if (find_match_limit)
2735 {
2736 if (extra == NULL)
2737 {
2738 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2739 extra->flags = 0;
2740 }
2741 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2742
2743 (void)check_match_limit(re, extra, bptr, len, start_offset,
2744 options|g_notempty, use_offsets, use_size_offsets,
2745 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2746 PCRE_ERROR_MATCHLIMIT, "match()");
2747
2748 count = check_match_limit(re, extra, bptr, len, start_offset,
2749 options|g_notempty, use_offsets, use_size_offsets,
2750 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2751 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2752 }
2753
2754 /* If callout_data is set, use the interface with additional data */
2755
2756 else if (callout_data_set)
2757 {
2758 if (extra == NULL)
2759 {
2760 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2761 extra->flags = 0;
2762 }
2763 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2764 extra->callout_data = &callout_data;
2765 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2766 options | g_notempty, use_offsets, use_size_offsets);
2767 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2768 }
2769
2770 /* The normal case is just to do the match once, with the default
2771 value of match_limit. */
2772
2773 #if !defined NODFA
2774 else if (all_use_dfa || use_dfa)
2775 {
2776 int workspace[1000];
2777 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2778 options | g_notempty, use_offsets, use_size_offsets, workspace,
2779 sizeof(workspace)/sizeof(int));
2780 if (count == 0)
2781 {
2782 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2783 count = use_size_offsets/2;
2784 }
2785 }
2786 #endif
2787
2788 else
2789 {
2790 count = pcre_exec(re, extra, (char *)bptr, len,
2791 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2792 if (count == 0)
2793 {
2794 fprintf(outfile, "Matched, but too many substrings\n");
2795 count = use_size_offsets/3;
2796 }
2797 }
2798
2799 /* Matched */
2800
2801 if (count >= 0)
2802 {
2803 int i, maxcount;
2804
2805 #if !defined NODFA
2806 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2807 #endif
2808 maxcount = use_size_offsets/3;
2809
2810 /* This is a check against a lunatic return value. */
2811
2812 if (count > maxcount)
2813 {
2814 fprintf(outfile,
2815 "** PCRE error: returned count %d is too big for offset size %d\n",
2816 count, use_size_offsets);
2817 count = use_size_offsets/3;
2818 if (do_g || do_G)
2819 {
2820 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2821 do_g = do_G = FALSE; /* Break g/G loop */
2822 }
2823 }
2824
2825 /* do_allcaps requests showing of all captures in the pattern, to check
2826 unset ones at the end. */
2827
2828 if (do_allcaps)
2829 {
2830 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2831 count++; /* Allow for full match */
2832 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2833 }
2834
2835 /* Output the captured substrings */
2836
2837 for (i = 0; i < count * 2; i += 2)
2838 {
2839 if (use_offsets[i] < 0)
2840 {
2841 if (use_offsets[i] != -1)
2842 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2843 use_offsets[i], i);
2844 if (use_offsets[i+1] != -1)
2845 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2846 use_offsets[i+1], i+1);
2847 fprintf(outfile, "%2d: <unset>\n", i/2);
2848 }
2849 else
2850 {
2851 fprintf(outfile, "%2d: ", i/2);
2852 (void)pchars(bptr + use_offsets[i],
2853 use_offsets[i+1] - use_offsets[i], outfile);
2854 fprintf(outfile, "\n");
2855 if (do_showcaprest || (i == 0 && do_showrest))
2856 {
2857 fprintf(outfile, "%2d+ ", i/2);
2858 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2859 outfile);
2860 fprintf(outfile, "\n");
2861 }
2862 }
2863 }
2864
2865 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2866
2867 for (i = 0; i < 32; i++)
2868 {
2869 if ((copystrings & (1 << i)) != 0)
2870 {
2871 char copybuffer[256];
2872 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2873 i, copybuffer, sizeof(copybuffer));
2874 if (rc < 0)
2875 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2876 else
2877 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2878 }
2879 }
2880
2881 for (copynamesptr = copynames;
2882 *copynamesptr != 0;
2883 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2884 {
2885 char copybuffer[256];
2886 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2887 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2888 if (rc < 0)
2889 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2890 else
2891 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2892 }
2893
2894 for (i = 0; i < 32; i++)
2895 {
2896 if ((getstrings & (1 << i)) != 0)
2897 {
2898 const char *substring;
2899 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2900 i, &substring);
2901 if (rc < 0)
2902 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2903 else
2904 {
2905 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2906 pcre_free_substring(substring);
2907 }
2908 }
2909 }
2910
2911 for (getnamesptr = getnames;
2912 *getnamesptr != 0;
2913 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2914 {
2915 const char *substring;
2916 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2917 count, (char *)getnamesptr, &substring);
2918 if (rc < 0)
2919 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2920 else
2921 {
2922 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2923 pcre_free_substring(substring);
2924 }
2925 }
2926
2927 if (getlist)
2928 {
2929 const char **stringlist;
2930 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2931 &stringlist);
2932 if (rc < 0)
2933 fprintf(outfile, "get substring list failed %d\n", rc);
2934 else
2935 {
2936 for (i = 0; i < count; i++)
2937 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2938 if (stringlist[i] != NULL)
2939 fprintf(outfile, "string list not terminated by NULL\n");
2940 pcre_free_substring_list(stringlist);
2941 }
2942 }
2943 }
2944
2945 /* There was a partial match */
2946
2947 else if (count == PCRE_ERROR_PARTIAL)
2948 {
2949 if (markptr == NULL) fprintf(outfile, "Partial match");
2950 else fprintf(outfile, "Partial match, mark=%s", markptr);
2951 if (use_size_offsets > 1)
2952 {
2953 fprintf(outfile, ": ");
2954 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2955 outfile);
2956 }
2957 fprintf(outfile, "\n");
2958 break; /* Out of the /g loop */
2959 }
2960
2961 /* Failed to match. If this is a /g or /G loop and we previously set
2962 g_notempty after a null match, this is not necessarily the end. We want
2963 to advance the start offset, and continue. We won't be at the end of the
2964 string - that was checked before setting g_notempty.
2965
2966 Complication arises in the case when the newline convention is "any",
2967 "crlf", or "anycrlf". If the previous match was at the end of a line
2968 terminated by CRLF, an advance of one character just passes the \r,
2969 whereas we should prefer the longer newline sequence, as does the code in
2970 pcre_exec(). Fudge the offset value to achieve this. We check for a
2971 newline setting in the pattern; if none was set, use pcre_config() to
2972 find the default.
2973
2974 Otherwise, in the case of UTF-8 matching, the advance must be one
2975 character, not one byte. */
2976
2977 else
2978 {
2979 if (g_notempty != 0)
2980 {
2981 int onechar = 1;
2982 unsigned int obits = ((real_pcre *)re)->options;
2983 use_offsets[0] = start_offset;
2984 if ((obits & PCRE_NEWLINE_BITS) == 0)
2985 {
2986 int d;
2987 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2988 /* Note that these values are always the ASCII ones, even in
2989 EBCDIC environments. CR = 13, NL = 10. */
2990 obits = (d == 13)? PCRE_NEWLINE_CR :
2991 (d == 10)? PCRE_NEWLINE_LF :
2992 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2993 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2994 (d == -1)? PCRE_NEWLINE_ANY : 0;
2995 }
2996 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2997 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2998 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2999 &&
3000 start_offset < len - 1 &&
3001 bptr[start_offset] == '\r' &&
3002 bptr[start_offset+1] == '\n')
3003 onechar++;
3004 else if (use_utf8)
3005 {
3006 while (start_offset + onechar < len)
3007 {
3008 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3009 onechar++;
3010 }
3011 }
3012 use_offsets[1] = start_offset + onechar;
3013 }
3014 else
3015 {
3016 switch(count)
3017 {
3018 case PCRE_ERROR_NOMATCH:
3019 if (gmatched == 0)
3020 {
3021 if (markptr == NULL) fprintf(outfile, "No match\n");
3022 else fprintf(outfile, "No match, mark = %s\n", markptr);
3023 }
3024 break;
3025
3026 case PCRE_ERROR_BADUTF8:
3027 case PCRE_ERROR_SHORTUTF8:
3028 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3029 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3030 if (use_size_offsets >= 2)
3031 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3032 use_offsets[1]);
3033 fprintf(outfile, "\n");
3034 break;
3035
3036 default:
3037 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3038 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3039 else
3040 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3041 break;
3042 }
3043
3044 break; /* Out of the /g loop */
3045 }
3046 }
3047
3048 /* If not /g or /G we are done */
3049
3050 if (!do_g && !do_G) break;
3051
3052 /* If we have matched an empty string, first check to see if we are at
3053 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3054 Perl's /g options does. This turns out to be rather cunning. First we set
3055 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3056 same point. If this fails (picked up above) we advance to the next
3057 character. */
3058
3059 g_notempty = 0;
3060
3061 if (use_offsets[0] == use_offsets[1])
3062 {
3063 if (use_offsets[0] == len) break;
3064 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3065 }
3066
3067 /* For /g, update the start offset, leaving the rest alone */
3068
3069 if (do_g) start_offset = use_offsets[1];
3070
3071 /* For /G, update the pointer and length */
3072
3073 else
3074 {
3075 bptr += use_offsets[1];
3076 len -= use_offsets[1];
3077 }
3078 } /* End of loop for /g and /G */
3079
3080 NEXT_DATA: continue;
3081 } /* End of loop for data lines */
3082
3083 CONTINUE:
3084
3085 #if !defined NOPOSIX
3086 if (posix || do_posix) regfree(&preg);
3087 #endif
3088
3089 if (re != NULL) new_free(re);
3090 if (extra != NULL) pcre_free_study(extra);
3091 if (locale_set)
3092 {
3093 new_free((void *)tables);
3094 setlocale(LC_CTYPE, "C");
3095 locale_set = 0;
3096 }
3097 if (jit_stack != NULL)
3098 {
3099 pcre_jit_stack_free(jit_stack);
3100 jit_stack = NULL;
3101 }
3102 }
3103
3104 if (infile == stdin) fprintf(outfile, "\n");
3105
3106 EXIT:
3107
3108 if (infile != NULL && infile != stdin) fclose(infile);
3109 if (outfile != NULL && outfile != stdout) fclose(outfile);
3110
3111 free(buffer);
3112 free(dbuffer);
3113 free(pbuffer);
3114 free(offsets);
3115
3116 return yield;
3117 }
3118
3119 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12