/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 735 - (show annotations) (download)
Thu Oct 13 15:51:27 2011 UTC (2 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 93876 byte(s)
Rewrite code that broke under Mac OS (isxdigit with ++ in its argument).

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_ucp_typerange ucp_typerange
116 #define _pcre_utf8_table1 utf8_table1
117 #define _pcre_utf8_table1_size utf8_table1_size
118 #define _pcre_utf8_table2 utf8_table2
119 #define _pcre_utf8_table3 utf8_table3
120 #define _pcre_utf8_table4 utf8_table4
121 #define _pcre_utf8_char_sizes utf8_char_sizes
122 #define _pcre_utt utt
123 #define _pcre_utt_size utt_size
124 #define _pcre_utt_names utt_names
125 #define _pcre_OP_lengths OP_lengths
126
127 #include "pcre_tables.c"
128
129 /* We also need the pcre_printint() function for printing out compiled
130 patterns. This function is in a separate file so that it can be included in
131 pcre_compile.c when that module is compiled with debugging enabled. It needs to
132 know which case is being compiled. */
133
134 #define COMPILING_PCRETEST
135 #include "pcre_printint.src"
136
137 /* The definition of the macro PRINTABLE, which determines whether to print an
138 output character as-is or as a hex value when showing compiled patterns, is
139 contained in the printint.src file. We uses it here also, in cases when the
140 locale has not been explicitly changed, so as to get consistent output from
141 systems that differ in their output from isprint() even in the "C" locale. */
142
143 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
144
145 /* It is possible to compile this test program without including support for
146 testing the POSIX interface, though this is not available via the standard
147 Makefile. */
148
149 #if !defined NOPOSIX
150 #include "pcreposix.h"
151 #endif
152
153 /* It is also possible, for the benefit of the version currently imported into
154 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
155 interface to the DFA matcher (NODFA), and without the doublecheck of the old
156 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
157 UTF8 support if PCRE is built without it. */
158
159 #ifndef SUPPORT_UTF8
160 #ifndef NOUTF8
161 #define NOUTF8
162 #endif
163 #endif
164
165
166 /* Other parameters */
167
168 #ifndef CLOCKS_PER_SEC
169 #ifdef CLK_TCK
170 #define CLOCKS_PER_SEC CLK_TCK
171 #else
172 #define CLOCKS_PER_SEC 100
173 #endif
174 #endif
175
176 /* This is the default loop count for timing. */
177
178 #define LOOPREPEAT 500000
179
180 /* Static variables */
181
182 static FILE *outfile;
183 static int log_store = 0;
184 static int callout_count;
185 static int callout_extra;
186 static int callout_fail_count;
187 static int callout_fail_id;
188 static int debug_lengths;
189 static int first_callout;
190 static int locale_set = 0;
191 static int show_malloc;
192 static int use_utf8;
193 static size_t gotten_store;
194 static const unsigned char *last_callout_mark = NULL;
195
196 /* The buffers grow automatically if very long input lines are encountered. */
197
198 static int buffer_size = 50000;
199 static uschar *buffer = NULL;
200 static uschar *dbuffer = NULL;
201 static uschar *pbuffer = NULL;
202
203 /* Textual explanations for runtime error codes */
204
205 static const char *errtexts[] = {
206 NULL, /* 0 is no error */
207 NULL, /* NOMATCH is handled specially */
208 "NULL argument passed",
209 "bad option value",
210 "magic number missing",
211 "unknown opcode - pattern overwritten?",
212 "no more memory",
213 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 "match limit exceeded",
215 "callout error code",
216 NULL, /* BADUTF8 is handled specially */
217 "bad UTF-8 offset",
218 NULL, /* PARTIAL is handled specially */
219 "not used - internal error",
220 "internal error - pattern overwritten?",
221 "bad count value",
222 "item unsupported for DFA matching",
223 "backreference condition or recursion test not supported for DFA matching",
224 "match limit not supported for DFA matching",
225 "workspace size exceeded in DFA matching",
226 "too much recursion for DFA matching",
227 "recursion limit exceeded",
228 "not used - internal error",
229 "invalid combination of newline options",
230 "bad offset value",
231 NULL, /* SHORTUTF8 is handled specially */
232 "nested recursion at the same subject position",
233 "JIT stack limit reached"
234 };
235
236
237 /*************************************************
238 * Alternate character tables *
239 *************************************************/
240
241 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242 using the default tables of the library. However, the T option can be used to
243 select alternate sets of tables, for different kinds of testing. Note also that
244 the L (locale) option also adjusts the tables. */
245
246 /* This is the set of tables distributed as default with PCRE. It recognizes
247 only ASCII characters. */
248
249 static const unsigned char tables0[] = {
250
251 /* This table is a lower casing table. */
252
253 0, 1, 2, 3, 4, 5, 6, 7,
254 8, 9, 10, 11, 12, 13, 14, 15,
255 16, 17, 18, 19, 20, 21, 22, 23,
256 24, 25, 26, 27, 28, 29, 30, 31,
257 32, 33, 34, 35, 36, 37, 38, 39,
258 40, 41, 42, 43, 44, 45, 46, 47,
259 48, 49, 50, 51, 52, 53, 54, 55,
260 56, 57, 58, 59, 60, 61, 62, 63,
261 64, 97, 98, 99,100,101,102,103,
262 104,105,106,107,108,109,110,111,
263 112,113,114,115,116,117,118,119,
264 120,121,122, 91, 92, 93, 94, 95,
265 96, 97, 98, 99,100,101,102,103,
266 104,105,106,107,108,109,110,111,
267 112,113,114,115,116,117,118,119,
268 120,121,122,123,124,125,126,127,
269 128,129,130,131,132,133,134,135,
270 136,137,138,139,140,141,142,143,
271 144,145,146,147,148,149,150,151,
272 152,153,154,155,156,157,158,159,
273 160,161,162,163,164,165,166,167,
274 168,169,170,171,172,173,174,175,
275 176,177,178,179,180,181,182,183,
276 184,185,186,187,188,189,190,191,
277 192,193,194,195,196,197,198,199,
278 200,201,202,203,204,205,206,207,
279 208,209,210,211,212,213,214,215,
280 216,217,218,219,220,221,222,223,
281 224,225,226,227,228,229,230,231,
282 232,233,234,235,236,237,238,239,
283 240,241,242,243,244,245,246,247,
284 248,249,250,251,252,253,254,255,
285
286 /* This table is a case flipping table. */
287
288 0, 1, 2, 3, 4, 5, 6, 7,
289 8, 9, 10, 11, 12, 13, 14, 15,
290 16, 17, 18, 19, 20, 21, 22, 23,
291 24, 25, 26, 27, 28, 29, 30, 31,
292 32, 33, 34, 35, 36, 37, 38, 39,
293 40, 41, 42, 43, 44, 45, 46, 47,
294 48, 49, 50, 51, 52, 53, 54, 55,
295 56, 57, 58, 59, 60, 61, 62, 63,
296 64, 97, 98, 99,100,101,102,103,
297 104,105,106,107,108,109,110,111,
298 112,113,114,115,116,117,118,119,
299 120,121,122, 91, 92, 93, 94, 95,
300 96, 65, 66, 67, 68, 69, 70, 71,
301 72, 73, 74, 75, 76, 77, 78, 79,
302 80, 81, 82, 83, 84, 85, 86, 87,
303 88, 89, 90,123,124,125,126,127,
304 128,129,130,131,132,133,134,135,
305 136,137,138,139,140,141,142,143,
306 144,145,146,147,148,149,150,151,
307 152,153,154,155,156,157,158,159,
308 160,161,162,163,164,165,166,167,
309 168,169,170,171,172,173,174,175,
310 176,177,178,179,180,181,182,183,
311 184,185,186,187,188,189,190,191,
312 192,193,194,195,196,197,198,199,
313 200,201,202,203,204,205,206,207,
314 208,209,210,211,212,213,214,215,
315 216,217,218,219,220,221,222,223,
316 224,225,226,227,228,229,230,231,
317 232,233,234,235,236,237,238,239,
318 240,241,242,243,244,245,246,247,
319 248,249,250,251,252,253,254,255,
320
321 /* This table contains bit maps for various character classes. Each map is 32
322 bytes long and the bits run from the least significant end of each byte. The
323 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324 graph, print, punct, and cntrl. Other classes are built from combinations. */
325
326 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375
376 /* This table identifies various classes of character by individual bits:
377 0x01 white space character
378 0x02 letter
379 0x04 decimal digit
380 0x08 hexadecimal digit
381 0x10 alphanumeric or '_'
382 0x80 regular expression metacharacter or binary zero
383 */
384
385 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
386 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
387 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
388 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
389 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
390 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
391 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
392 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
393 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
396 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
397 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
398 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
399 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
400 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417
418 /* This is a set of tables that came orginally from a Windows user. It seems to
419 be at least an approximation of ISO 8859. In particular, there are characters
420 greater than 128 that are marked as spaces, letters, etc. */
421
422 static const unsigned char tables1[] = {
423 0,1,2,3,4,5,6,7,
424 8,9,10,11,12,13,14,15,
425 16,17,18,19,20,21,22,23,
426 24,25,26,27,28,29,30,31,
427 32,33,34,35,36,37,38,39,
428 40,41,42,43,44,45,46,47,
429 48,49,50,51,52,53,54,55,
430 56,57,58,59,60,61,62,63,
431 64,97,98,99,100,101,102,103,
432 104,105,106,107,108,109,110,111,
433 112,113,114,115,116,117,118,119,
434 120,121,122,91,92,93,94,95,
435 96,97,98,99,100,101,102,103,
436 104,105,106,107,108,109,110,111,
437 112,113,114,115,116,117,118,119,
438 120,121,122,123,124,125,126,127,
439 128,129,130,131,132,133,134,135,
440 136,137,138,139,140,141,142,143,
441 144,145,146,147,148,149,150,151,
442 152,153,154,155,156,157,158,159,
443 160,161,162,163,164,165,166,167,
444 168,169,170,171,172,173,174,175,
445 176,177,178,179,180,181,182,183,
446 184,185,186,187,188,189,190,191,
447 224,225,226,227,228,229,230,231,
448 232,233,234,235,236,237,238,239,
449 240,241,242,243,244,245,246,215,
450 248,249,250,251,252,253,254,223,
451 224,225,226,227,228,229,230,231,
452 232,233,234,235,236,237,238,239,
453 240,241,242,243,244,245,246,247,
454 248,249,250,251,252,253,254,255,
455 0,1,2,3,4,5,6,7,
456 8,9,10,11,12,13,14,15,
457 16,17,18,19,20,21,22,23,
458 24,25,26,27,28,29,30,31,
459 32,33,34,35,36,37,38,39,
460 40,41,42,43,44,45,46,47,
461 48,49,50,51,52,53,54,55,
462 56,57,58,59,60,61,62,63,
463 64,97,98,99,100,101,102,103,
464 104,105,106,107,108,109,110,111,
465 112,113,114,115,116,117,118,119,
466 120,121,122,91,92,93,94,95,
467 96,65,66,67,68,69,70,71,
468 72,73,74,75,76,77,78,79,
469 80,81,82,83,84,85,86,87,
470 88,89,90,123,124,125,126,127,
471 128,129,130,131,132,133,134,135,
472 136,137,138,139,140,141,142,143,
473 144,145,146,147,148,149,150,151,
474 152,153,154,155,156,157,158,159,
475 160,161,162,163,164,165,166,167,
476 168,169,170,171,172,173,174,175,
477 176,177,178,179,180,181,182,183,
478 184,185,186,187,188,189,190,191,
479 224,225,226,227,228,229,230,231,
480 232,233,234,235,236,237,238,239,
481 240,241,242,243,244,245,246,215,
482 248,249,250,251,252,253,254,223,
483 192,193,194,195,196,197,198,199,
484 200,201,202,203,204,205,206,207,
485 208,209,210,211,212,213,214,247,
486 216,217,218,219,220,221,222,255,
487 0,62,0,0,1,0,0,0,
488 0,0,0,0,0,0,0,0,
489 32,0,0,0,1,0,0,0,
490 0,0,0,0,0,0,0,0,
491 0,0,0,0,0,0,255,3,
492 126,0,0,0,126,0,0,0,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 0,0,0,0,0,0,255,3,
496 0,0,0,0,0,0,0,0,
497 0,0,0,0,0,0,12,2,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,0,0,0,0,
500 254,255,255,7,0,0,0,0,
501 0,0,0,0,0,0,0,0,
502 255,255,127,127,0,0,0,0,
503 0,0,0,0,0,0,0,0,
504 0,0,0,0,254,255,255,7,
505 0,0,0,0,0,4,32,4,
506 0,0,0,128,255,255,127,255,
507 0,0,0,0,0,0,255,3,
508 254,255,255,135,254,255,255,7,
509 0,0,0,0,0,4,44,6,
510 255,255,127,255,255,255,127,255,
511 0,0,0,0,254,255,255,255,
512 255,255,255,255,255,255,255,127,
513 0,0,0,0,254,255,255,255,
514 255,255,255,255,255,255,255,255,
515 0,2,0,0,255,255,255,255,
516 255,255,255,255,255,255,255,127,
517 0,0,0,0,255,255,255,255,
518 255,255,255,255,255,255,255,255,
519 0,0,0,0,254,255,0,252,
520 1,0,0,248,1,0,0,120,
521 0,0,0,0,254,255,255,255,
522 0,0,128,0,0,0,128,0,
523 255,255,255,255,0,0,0,0,
524 0,0,0,0,0,0,0,128,
525 255,255,255,255,0,0,0,0,
526 0,0,0,0,0,0,0,0,
527 128,0,0,0,0,0,0,0,
528 0,1,1,0,1,1,0,0,
529 0,0,0,0,0,0,0,0,
530 0,0,0,0,0,0,0,0,
531 1,0,0,0,128,0,0,0,
532 128,128,128,128,0,0,128,0,
533 28,28,28,28,28,28,28,28,
534 28,28,0,0,0,0,0,128,
535 0,26,26,26,26,26,26,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,128,128,0,128,16,
539 0,26,26,26,26,26,26,18,
540 18,18,18,18,18,18,18,18,
541 18,18,18,18,18,18,18,18,
542 18,18,18,128,128,0,0,0,
543 0,0,0,0,0,1,0,0,
544 0,0,0,0,0,0,0,0,
545 0,0,0,0,0,0,0,0,
546 0,0,0,0,0,0,0,0,
547 1,0,0,0,0,0,0,0,
548 0,0,18,0,0,0,0,0,
549 0,0,20,20,0,18,0,0,
550 0,20,18,0,0,0,0,0,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,0,
554 18,18,18,18,18,18,18,18,
555 18,18,18,18,18,18,18,18,
556 18,18,18,18,18,18,18,18,
557 18,18,18,18,18,18,18,0,
558 18,18,18,18,18,18,18,18
559 };
560
561
562
563
564 #ifndef HAVE_STRERROR
565 /*************************************************
566 * Provide strerror() for non-ANSI libraries *
567 *************************************************/
568
569 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570 in their libraries, but can provide the same facility by this simple
571 alternative function. */
572
573 extern int sys_nerr;
574 extern char *sys_errlist[];
575
576 char *
577 strerror(int n)
578 {
579 if (n < 0 || n >= sys_nerr) return "unknown error number";
580 return sys_errlist[n];
581 }
582 #endif /* HAVE_STRERROR */
583
584
585 /*************************************************
586 * JIT memory callback *
587 *************************************************/
588
589 static pcre_jit_stack* jit_callback(void *arg)
590 {
591 return (pcre_jit_stack *)arg;
592 }
593
594
595 /*************************************************
596 * Read or extend an input line *
597 *************************************************/
598
599 /* Input lines are read into buffer, but both patterns and data lines can be
600 continued over multiple input lines. In addition, if the buffer fills up, we
601 want to automatically expand it so as to be able to handle extremely large
602 lines that are needed for certain stress tests. When the input buffer is
603 expanded, the other two buffers must also be expanded likewise, and the
604 contents of pbuffer, which are a copy of the input for callouts, must be
605 preserved (for when expansion happens for a data line). This is not the most
606 optimal way of handling this, but hey, this is just a test program!
607
608 Arguments:
609 f the file to read
610 start where in buffer to start (this *must* be within buffer)
611 prompt for stdin or readline()
612
613 Returns: pointer to the start of new data
614 could be a copy of start, or could be moved
615 NULL if no data read and EOF reached
616 */
617
618 static uschar *
619 extend_inputline(FILE *f, uschar *start, const char *prompt)
620 {
621 uschar *here = start;
622
623 for (;;)
624 {
625 int rlen = (int)(buffer_size - (here - buffer));
626
627 if (rlen > 1000)
628 {
629 int dlen;
630
631 /* If libreadline support is required, use readline() to read a line if the
632 input is a terminal. Note that readline() removes the trailing newline, so
633 we must put it back again, to be compatible with fgets(). */
634
635 #ifdef SUPPORT_LIBREADLINE
636 if (isatty(fileno(f)))
637 {
638 size_t len;
639 char *s = readline(prompt);
640 if (s == NULL) return (here == start)? NULL : start;
641 len = strlen(s);
642 if (len > 0) add_history(s);
643 if (len > rlen - 1) len = rlen - 1;
644 memcpy(here, s, len);
645 here[len] = '\n';
646 here[len+1] = 0;
647 free(s);
648 }
649 else
650 #endif
651
652 /* Read the next line by normal means, prompting if the file is stdin. */
653
654 {
655 if (f == stdin) printf("%s", prompt);
656 if (fgets((char *)here, rlen, f) == NULL)
657 return (here == start)? NULL : start;
658 }
659
660 dlen = (int)strlen((char *)here);
661 if (dlen > 0 && here[dlen - 1] == '\n') return start;
662 here += dlen;
663 }
664
665 else
666 {
667 int new_buffer_size = 2*buffer_size;
668 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
669 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671
672 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673 {
674 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
675 exit(1);
676 }
677
678 memcpy(new_buffer, buffer, buffer_size);
679 memcpy(new_pbuffer, pbuffer, buffer_size);
680
681 buffer_size = new_buffer_size;
682
683 start = new_buffer + (start - buffer);
684 here = new_buffer + (here - buffer);
685
686 free(buffer);
687 free(dbuffer);
688 free(pbuffer);
689
690 buffer = new_buffer;
691 dbuffer = new_dbuffer;
692 pbuffer = new_pbuffer;
693 }
694 }
695
696 return NULL; /* Control never gets here */
697 }
698
699
700
701
702
703
704
705 /*************************************************
706 * Read number from string *
707 *************************************************/
708
709 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
710 around with conditional compilation, just do the job by hand. It is only used
711 for unpicking arguments, so just keep it simple.
712
713 Arguments:
714 str string to be converted
715 endptr where to put the end pointer
716
717 Returns: the unsigned long
718 */
719
720 static int
721 get_value(unsigned char *str, unsigned char **endptr)
722 {
723 int result = 0;
724 while(*str != 0 && isspace(*str)) str++;
725 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
726 *endptr = str;
727 return(result);
728 }
729
730
731
732
733 /*************************************************
734 * Convert UTF-8 string to value *
735 *************************************************/
736
737 /* This function takes one or more bytes that represents a UTF-8 character,
738 and returns the value of the character.
739
740 Argument:
741 utf8bytes a pointer to the byte vector
742 vptr a pointer to an int to receive the value
743
744 Returns: > 0 => the number of bytes consumed
745 -6 to 0 => malformed UTF-8 character at offset = (-return)
746 */
747
748 #if !defined NOUTF8
749
750 static int
751 utf82ord(unsigned char *utf8bytes, int *vptr)
752 {
753 int c = *utf8bytes++;
754 int d = c;
755 int i, j, s;
756
757 for (i = -1; i < 6; i++) /* i is number of additional bytes */
758 {
759 if ((d & 0x80) == 0) break;
760 d <<= 1;
761 }
762
763 if (i == -1) { *vptr = c; return 1; } /* ascii character */
764 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
765
766 /* i now has a value in the range 1-5 */
767
768 s = 6*i;
769 d = (c & utf8_table3[i]) << s;
770
771 for (j = 0; j < i; j++)
772 {
773 c = *utf8bytes++;
774 if ((c & 0xc0) != 0x80) return -(j+1);
775 s -= 6;
776 d |= (c & 0x3f) << s;
777 }
778
779 /* Check that encoding was the correct unique one */
780
781 for (j = 0; j < utf8_table1_size; j++)
782 if (d <= utf8_table1[j]) break;
783 if (j != i) return -(i+1);
784
785 /* Valid value */
786
787 *vptr = d;
788 return i+1;
789 }
790
791 #endif
792
793
794
795 /*************************************************
796 * Convert character value to UTF-8 *
797 *************************************************/
798
799 /* This function takes an integer value in the range 0 - 0x7fffffff
800 and encodes it as a UTF-8 character in 0 to 6 bytes.
801
802 Arguments:
803 cvalue the character value
804 utf8bytes pointer to buffer for result - at least 6 bytes long
805
806 Returns: number of characters placed in the buffer
807 */
808
809 #if !defined NOUTF8
810
811 static int
812 ord2utf8(int cvalue, uschar *utf8bytes)
813 {
814 register int i, j;
815 for (i = 0; i < utf8_table1_size; i++)
816 if (cvalue <= utf8_table1[i]) break;
817 utf8bytes += i;
818 for (j = i; j > 0; j--)
819 {
820 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
821 cvalue >>= 6;
822 }
823 *utf8bytes = utf8_table2[i] | cvalue;
824 return i + 1;
825 }
826
827 #endif
828
829
830
831 /*************************************************
832 * Print character string *
833 *************************************************/
834
835 /* Character string printing function. Must handle UTF-8 strings in utf8
836 mode. Yields number of characters printed. If handed a NULL file, just counts
837 chars without printing. */
838
839 static int pchars(unsigned char *p, int length, FILE *f)
840 {
841 int c = 0;
842 int yield = 0;
843
844 while (length-- > 0)
845 {
846 #if !defined NOUTF8
847 if (use_utf8)
848 {
849 int rc = utf82ord(p, &c);
850
851 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
852 {
853 length -= rc - 1;
854 p += rc;
855 if (PRINTHEX(c))
856 {
857 if (f != NULL) fprintf(f, "%c", c);
858 yield++;
859 }
860 else
861 {
862 int n = 4;
863 if (f != NULL) fprintf(f, "\\x{%02x}", c);
864 yield += (n <= 0x000000ff)? 2 :
865 (n <= 0x00000fff)? 3 :
866 (n <= 0x0000ffff)? 4 :
867 (n <= 0x000fffff)? 5 : 6;
868 }
869 continue;
870 }
871 }
872 #endif
873
874 /* Not UTF-8, or malformed UTF-8 */
875
876 c = *p++;
877 if (PRINTHEX(c))
878 {
879 if (f != NULL) fprintf(f, "%c", c);
880 yield++;
881 }
882 else
883 {
884 if (f != NULL) fprintf(f, "\\x%02x", c);
885 yield += 4;
886 }
887 }
888
889 return yield;
890 }
891
892
893
894 /*************************************************
895 * Callout function *
896 *************************************************/
897
898 /* Called from PCRE as a result of the (?C) item. We print out where we are in
899 the match. Yield zero unless more callouts than the fail count, or the callout
900 data is not zero. */
901
902 static int callout(pcre_callout_block *cb)
903 {
904 FILE *f = (first_callout | callout_extra)? outfile : NULL;
905 int i, pre_start, post_start, subject_length;
906
907 if (callout_extra)
908 {
909 fprintf(f, "Callout %d: last capture = %d\n",
910 cb->callout_number, cb->capture_last);
911
912 for (i = 0; i < cb->capture_top * 2; i += 2)
913 {
914 if (cb->offset_vector[i] < 0)
915 fprintf(f, "%2d: <unset>\n", i/2);
916 else
917 {
918 fprintf(f, "%2d: ", i/2);
919 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
920 cb->offset_vector[i+1] - cb->offset_vector[i], f);
921 fprintf(f, "\n");
922 }
923 }
924 }
925
926 /* Re-print the subject in canonical form, the first time or if giving full
927 datails. On subsequent calls in the same match, we use pchars just to find the
928 printed lengths of the substrings. */
929
930 if (f != NULL) fprintf(f, "--->");
931
932 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
933 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
934 cb->current_position - cb->start_match, f);
935
936 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
937
938 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
939 cb->subject_length - cb->current_position, f);
940
941 if (f != NULL) fprintf(f, "\n");
942
943 /* Always print appropriate indicators, with callout number if not already
944 shown. For automatic callouts, show the pattern offset. */
945
946 if (cb->callout_number == 255)
947 {
948 fprintf(outfile, "%+3d ", cb->pattern_position);
949 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
950 }
951 else
952 {
953 if (callout_extra) fprintf(outfile, " ");
954 else fprintf(outfile, "%3d ", cb->callout_number);
955 }
956
957 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
958 fprintf(outfile, "^");
959
960 if (post_start > 0)
961 {
962 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
963 fprintf(outfile, "^");
964 }
965
966 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
967 fprintf(outfile, " ");
968
969 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
970 pbuffer + cb->pattern_position);
971
972 fprintf(outfile, "\n");
973 first_callout = 0;
974
975 if (cb->mark != last_callout_mark)
976 {
977 fprintf(outfile, "Latest Mark: %s\n",
978 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979 last_callout_mark = cb->mark;
980 }
981
982 if (cb->callout_data != NULL)
983 {
984 int callout_data = *((int *)(cb->callout_data));
985 if (callout_data != 0)
986 {
987 fprintf(outfile, "Callout data = %d\n", callout_data);
988 return callout_data;
989 }
990 }
991
992 return (cb->callout_number != callout_fail_id)? 0 :
993 (++callout_count >= callout_fail_count)? 1 : 0;
994 }
995
996
997 /*************************************************
998 * Local malloc functions *
999 *************************************************/
1000
1001 /* Alternative malloc function, to test functionality and save the size of a
1002 compiled re. The show_malloc variable is set only during matching. */
1003
1004 static void *new_malloc(size_t size)
1005 {
1006 void *block = malloc(size);
1007 gotten_store = size;
1008 if (show_malloc)
1009 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1010 return block;
1011 }
1012
1013 static void new_free(void *block)
1014 {
1015 if (show_malloc)
1016 fprintf(outfile, "free %p\n", block);
1017 free(block);
1018 }
1019
1020 /* For recursion malloc/free, to test stacking calls */
1021
1022 static void *stack_malloc(size_t size)
1023 {
1024 void *block = malloc(size);
1025 if (show_malloc)
1026 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1027 return block;
1028 }
1029
1030 static void stack_free(void *block)
1031 {
1032 if (show_malloc)
1033 fprintf(outfile, "stack_free %p\n", block);
1034 free(block);
1035 }
1036
1037
1038 /*************************************************
1039 * Call pcre_fullinfo() *
1040 *************************************************/
1041
1042 /* Get one piece of information from the pcre_fullinfo() function */
1043
1044 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1045 {
1046 int rc;
1047 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1048 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1049 }
1050
1051
1052
1053 /*************************************************
1054 * Byte flipping function *
1055 *************************************************/
1056
1057 static unsigned long int
1058 byteflip(unsigned long int value, int n)
1059 {
1060 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1061 return ((value & 0x000000ff) << 24) |
1062 ((value & 0x0000ff00) << 8) |
1063 ((value & 0x00ff0000) >> 8) |
1064 ((value & 0xff000000) >> 24);
1065 }
1066
1067
1068
1069
1070 /*************************************************
1071 * Check match or recursion limit *
1072 *************************************************/
1073
1074 static int
1075 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1076 int start_offset, int options, int *use_offsets, int use_size_offsets,
1077 int flag, unsigned long int *limit, int errnumber, const char *msg)
1078 {
1079 int count;
1080 int min = 0;
1081 int mid = 64;
1082 int max = -1;
1083
1084 extra->flags |= flag;
1085
1086 for (;;)
1087 {
1088 *limit = mid;
1089
1090 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1091 use_offsets, use_size_offsets);
1092
1093 if (count == errnumber)
1094 {
1095 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1096 min = mid;
1097 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1098 }
1099
1100 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1101 count == PCRE_ERROR_PARTIAL)
1102 {
1103 if (mid == min + 1)
1104 {
1105 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1106 break;
1107 }
1108 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1109 max = mid;
1110 mid = (min + mid)/2;
1111 }
1112 else break; /* Some other error */
1113 }
1114
1115 extra->flags &= ~flag;
1116 return count;
1117 }
1118
1119
1120
1121 /*************************************************
1122 * Case-independent strncmp() function *
1123 *************************************************/
1124
1125 /*
1126 Arguments:
1127 s first string
1128 t second string
1129 n number of characters to compare
1130
1131 Returns: < 0, = 0, or > 0, according to the comparison
1132 */
1133
1134 static int
1135 strncmpic(uschar *s, uschar *t, int n)
1136 {
1137 while (n--)
1138 {
1139 int c = tolower(*s++) - tolower(*t++);
1140 if (c) return c;
1141 }
1142 return 0;
1143 }
1144
1145
1146
1147 /*************************************************
1148 * Check newline indicator *
1149 *************************************************/
1150
1151 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1152 a message and return 0 if there is no match.
1153
1154 Arguments:
1155 p points after the leading '<'
1156 f file for error message
1157
1158 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1159 */
1160
1161 static int
1162 check_newline(uschar *p, FILE *f)
1163 {
1164 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1165 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1166 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1167 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1168 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1169 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1170 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1171 fprintf(f, "Unknown newline type at: <%s\n", p);
1172 return 0;
1173 }
1174
1175
1176
1177 /*************************************************
1178 * Usage function *
1179 *************************************************/
1180
1181 static void
1182 usage(void)
1183 {
1184 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1185 printf("Input and output default to stdin and stdout.\n");
1186 #ifdef SUPPORT_LIBREADLINE
1187 printf("If input is a terminal, readline() is used to read from it.\n");
1188 #else
1189 printf("This version of pcretest is not linked with readline().\n");
1190 #endif
1191 printf("\nOptions:\n");
1192 printf(" -b show compiled code (bytecode)\n");
1193 printf(" -C show PCRE compile-time options and exit\n");
1194 printf(" -d debug: show compiled code and information (-b and -i)\n");
1195 #if !defined NODFA
1196 printf(" -dfa force DFA matching for all subjects\n");
1197 #endif
1198 printf(" -help show usage information\n");
1199 printf(" -i show information about compiled patterns\n"
1200 " -M find MATCH_LIMIT minimum for each subject\n"
1201 " -m output memory used information\n"
1202 " -o <n> set size of offsets vector to <n>\n");
1203 #if !defined NOPOSIX
1204 printf(" -p use POSIX interface\n");
1205 #endif
1206 printf(" -q quiet: do not output PCRE version number at start\n");
1207 printf(" -S <n> set stack size to <n> megabytes\n");
1208 printf(" -s force each pattern to be studied at basic level\n"
1209 " -s+ force each pattern to be studied, using JIT if available\n"
1210 " -t time compilation and execution\n");
1211 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1212 printf(" -tm time execution (matching) only\n");
1213 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1214 }
1215
1216
1217
1218 /*************************************************
1219 * Main Program *
1220 *************************************************/
1221
1222 /* Read lines from named file or stdin and write to named file or stdout; lines
1223 consist of a regular expression, in delimiters and optionally followed by
1224 options, followed by a set of test data, terminated by an empty line. */
1225
1226 int main(int argc, char **argv)
1227 {
1228 FILE *infile = stdin;
1229 int options = 0;
1230 int study_options = 0;
1231 int default_find_match_limit = FALSE;
1232 int op = 1;
1233 int timeit = 0;
1234 int timeitm = 0;
1235 int showinfo = 0;
1236 int showstore = 0;
1237 int force_study = -1;
1238 int force_study_options = 0;
1239 int quiet = 0;
1240 int size_offsets = 45;
1241 int size_offsets_max;
1242 int *offsets = NULL;
1243 #if !defined NOPOSIX
1244 int posix = 0;
1245 #endif
1246 int debug = 0;
1247 int done = 0;
1248 int all_use_dfa = 0;
1249 int yield = 0;
1250 int stack_size;
1251
1252 pcre_jit_stack *jit_stack = NULL;
1253
1254
1255 /* These vectors store, end-to-end, a list of captured substring names. Assume
1256 that 1024 is plenty long enough for the few names we'll be testing. */
1257
1258 uschar copynames[1024];
1259 uschar getnames[1024];
1260
1261 uschar *copynamesptr;
1262 uschar *getnamesptr;
1263
1264 /* Get buffers from malloc() so that Electric Fence will check their misuse
1265 when I am debugging. They grow automatically when very long lines are read. */
1266
1267 buffer = (unsigned char *)malloc(buffer_size);
1268 dbuffer = (unsigned char *)malloc(buffer_size);
1269 pbuffer = (unsigned char *)malloc(buffer_size);
1270
1271 /* The outfile variable is static so that new_malloc can use it. */
1272
1273 outfile = stdout;
1274
1275 /* The following _setmode() stuff is some Windows magic that tells its runtime
1276 library to translate CRLF into a single LF character. At least, that's what
1277 I've been told: never having used Windows I take this all on trust. Originally
1278 it set 0x8000, but then I was advised that _O_BINARY was better. */
1279
1280 #if defined(_WIN32) || defined(WIN32)
1281 _setmode( _fileno( stdout ), _O_BINARY );
1282 #endif
1283
1284 /* Scan options */
1285
1286 while (argc > 1 && argv[op][0] == '-')
1287 {
1288 unsigned char *endptr;
1289
1290 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1291 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1292 else if (strcmp(argv[op], "-s+") == 0)
1293 {
1294 force_study = 1;
1295 force_study_options = PCRE_STUDY_JIT_COMPILE;
1296 }
1297 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1298 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1299 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1300 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1301 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1302 #if !defined NODFA
1303 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1304 #endif
1305 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1306 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1307 *endptr == 0))
1308 {
1309 op++;
1310 argc--;
1311 }
1312 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1313 {
1314 int both = argv[op][2] == 0;
1315 int temp;
1316 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1317 *endptr == 0))
1318 {
1319 timeitm = temp;
1320 op++;
1321 argc--;
1322 }
1323 else timeitm = LOOPREPEAT;
1324 if (both) timeit = timeitm;
1325 }
1326 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1327 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1328 *endptr == 0))
1329 {
1330 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1331 printf("PCRE: -S not supported on this OS\n");
1332 exit(1);
1333 #else
1334 int rc;
1335 struct rlimit rlim;
1336 getrlimit(RLIMIT_STACK, &rlim);
1337 rlim.rlim_cur = stack_size * 1024 * 1024;
1338 rc = setrlimit(RLIMIT_STACK, &rlim);
1339 if (rc != 0)
1340 {
1341 printf("PCRE: setrlimit() failed with error %d\n", rc);
1342 exit(1);
1343 }
1344 op++;
1345 argc--;
1346 #endif
1347 }
1348 #if !defined NOPOSIX
1349 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1350 #endif
1351 else if (strcmp(argv[op], "-C") == 0)
1352 {
1353 int rc;
1354 unsigned long int lrc;
1355 printf("PCRE version %s\n", pcre_version());
1356 printf("Compiled with\n");
1357 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1358 printf(" %sUTF-8 support\n", rc? "" : "No ");
1359 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1360 printf(" %sUnicode properties support\n", rc? "" : "No ");
1361 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1362 if (rc)
1363 printf(" Just-in-time compiler support\n");
1364 else
1365 printf(" No just-in-time compiler support\n");
1366 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1367 /* Note that these values are always the ASCII values, even
1368 in EBCDIC environments. CR is 13 and NL is 10. */
1369 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1370 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1371 (rc == -2)? "ANYCRLF" :
1372 (rc == -1)? "ANY" : "???");
1373 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1374 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1375 "all Unicode newlines");
1376 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1377 printf(" Internal link size = %d\n", rc);
1378 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1379 printf(" POSIX malloc threshold = %d\n", rc);
1380 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1381 printf(" Default match limit = %ld\n", lrc);
1382 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1383 printf(" Default recursion depth limit = %ld\n", lrc);
1384 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1385 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1386 goto EXIT;
1387 }
1388 else if (strcmp(argv[op], "-help") == 0 ||
1389 strcmp(argv[op], "--help") == 0)
1390 {
1391 usage();
1392 goto EXIT;
1393 }
1394 else
1395 {
1396 printf("** Unknown or malformed option %s\n", argv[op]);
1397 usage();
1398 yield = 1;
1399 goto EXIT;
1400 }
1401 op++;
1402 argc--;
1403 }
1404
1405 /* Get the store for the offsets vector, and remember what it was */
1406
1407 size_offsets_max = size_offsets;
1408 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409 if (offsets == NULL)
1410 {
1411 printf("** Failed to get %d bytes of memory for offsets vector\n",
1412 (int)(size_offsets_max * sizeof(int)));
1413 yield = 1;
1414 goto EXIT;
1415 }
1416
1417 /* Sort out the input and output files */
1418
1419 if (argc > 1)
1420 {
1421 infile = fopen(argv[op], INPUT_MODE);
1422 if (infile == NULL)
1423 {
1424 printf("** Failed to open %s\n", argv[op]);
1425 yield = 1;
1426 goto EXIT;
1427 }
1428 }
1429
1430 if (argc > 2)
1431 {
1432 outfile = fopen(argv[op+1], OUTPUT_MODE);
1433 if (outfile == NULL)
1434 {
1435 printf("** Failed to open %s\n", argv[op+1]);
1436 yield = 1;
1437 goto EXIT;
1438 }
1439 }
1440
1441 /* Set alternative malloc function */
1442
1443 pcre_malloc = new_malloc;
1444 pcre_free = new_free;
1445 pcre_stack_malloc = stack_malloc;
1446 pcre_stack_free = stack_free;
1447
1448 /* Heading line unless quiet, then prompt for first regex if stdin */
1449
1450 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1451
1452 /* Main loop */
1453
1454 while (!done)
1455 {
1456 pcre *re = NULL;
1457 pcre_extra *extra = NULL;
1458
1459 #if !defined NOPOSIX /* There are still compilers that require no indent */
1460 regex_t preg;
1461 int do_posix = 0;
1462 #endif
1463
1464 const char *error;
1465 unsigned char *markptr;
1466 unsigned char *p, *pp, *ppp;
1467 unsigned char *to_file = NULL;
1468 const unsigned char *tables = NULL;
1469 unsigned long int true_size, true_study_size = 0;
1470 size_t size, regex_gotten_store;
1471 int do_allcaps = 0;
1472 int do_mark = 0;
1473 int do_study = 0;
1474 int no_force_study = 0;
1475 int do_debug = debug;
1476 int do_G = 0;
1477 int do_g = 0;
1478 int do_showinfo = showinfo;
1479 int do_showrest = 0;
1480 int do_showcaprest = 0;
1481 int do_flip = 0;
1482 int erroroffset, len, delimiter, poffset;
1483
1484 use_utf8 = 0;
1485 debug_lengths = 1;
1486
1487 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1488 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1489 fflush(outfile);
1490
1491 p = buffer;
1492 while (isspace(*p)) p++;
1493 if (*p == 0) continue;
1494
1495 /* See if the pattern is to be loaded pre-compiled from a file. */
1496
1497 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1498 {
1499 unsigned long int magic, get_options;
1500 uschar sbuf[8];
1501 FILE *f;
1502
1503 p++;
1504 pp = p + (int)strlen((char *)p);
1505 while (isspace(pp[-1])) pp--;
1506 *pp = 0;
1507
1508 f = fopen((char *)p, "rb");
1509 if (f == NULL)
1510 {
1511 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1512 continue;
1513 }
1514
1515 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1516
1517 true_size =
1518 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1519 true_study_size =
1520 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1521
1522 re = (real_pcre *)new_malloc(true_size);
1523 regex_gotten_store = gotten_store;
1524
1525 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1526
1527 magic = ((real_pcre *)re)->magic_number;
1528 if (magic != MAGIC_NUMBER)
1529 {
1530 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1531 {
1532 do_flip = 1;
1533 }
1534 else
1535 {
1536 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1537 fclose(f);
1538 continue;
1539 }
1540 }
1541
1542 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1543 do_flip? " (byte-inverted)" : "", p);
1544
1545 /* Need to know if UTF-8 for printing data strings */
1546
1547 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1548 use_utf8 = (get_options & PCRE_UTF8) != 0;
1549
1550 /* Now see if there is any following study data. */
1551
1552 if (true_study_size != 0)
1553 {
1554 pcre_study_data *psd;
1555
1556 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1557 extra->flags = PCRE_EXTRA_STUDY_DATA;
1558
1559 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1560 extra->study_data = psd;
1561
1562 if (fread(psd, 1, true_study_size, f) != true_study_size)
1563 {
1564 FAIL_READ:
1565 fprintf(outfile, "Failed to read data from %s\n", p);
1566 if (extra != NULL) pcre_free_study(extra);
1567 if (re != NULL) new_free(re);
1568 fclose(f);
1569 continue;
1570 }
1571 fprintf(outfile, "Study data loaded from %s\n", p);
1572 do_study = 1; /* To get the data output if requested */
1573 }
1574 else fprintf(outfile, "No study data\n");
1575
1576 fclose(f);
1577 goto SHOW_INFO;
1578 }
1579
1580 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1581 the pattern; if is isn't complete, read more. */
1582
1583 delimiter = *p++;
1584
1585 if (isalnum(delimiter) || delimiter == '\\')
1586 {
1587 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1588 goto SKIP_DATA;
1589 }
1590
1591 pp = p;
1592 poffset = (int)(p - buffer);
1593
1594 for(;;)
1595 {
1596 while (*pp != 0)
1597 {
1598 if (*pp == '\\' && pp[1] != 0) pp++;
1599 else if (*pp == delimiter) break;
1600 pp++;
1601 }
1602 if (*pp != 0) break;
1603 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1604 {
1605 fprintf(outfile, "** Unexpected EOF\n");
1606 done = 1;
1607 goto CONTINUE;
1608 }
1609 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1610 }
1611
1612 /* The buffer may have moved while being extended; reset the start of data
1613 pointer to the correct relative point in the buffer. */
1614
1615 p = buffer + poffset;
1616
1617 /* If the first character after the delimiter is backslash, make
1618 the pattern end with backslash. This is purely to provide a way
1619 of testing for the error message when a pattern ends with backslash. */
1620
1621 if (pp[1] == '\\') *pp++ = '\\';
1622
1623 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1624 for callouts. */
1625
1626 *pp++ = 0;
1627 strcpy((char *)pbuffer, (char *)p);
1628
1629 /* Look for options after final delimiter */
1630
1631 options = 0;
1632 log_store = showstore; /* default from command line */
1633
1634 while (*pp != 0)
1635 {
1636 switch (*pp++)
1637 {
1638 case 'f': options |= PCRE_FIRSTLINE; break;
1639 case 'g': do_g = 1; break;
1640 case 'i': options |= PCRE_CASELESS; break;
1641 case 'm': options |= PCRE_MULTILINE; break;
1642 case 's': options |= PCRE_DOTALL; break;
1643 case 'x': options |= PCRE_EXTENDED; break;
1644
1645 case '+':
1646 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1647 break;
1648
1649 case '=': do_allcaps = 1; break;
1650 case 'A': options |= PCRE_ANCHORED; break;
1651 case 'B': do_debug = 1; break;
1652 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1653 case 'D': do_debug = do_showinfo = 1; break;
1654 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1655 case 'F': do_flip = 1; break;
1656 case 'G': do_G = 1; break;
1657 case 'I': do_showinfo = 1; break;
1658 case 'J': options |= PCRE_DUPNAMES; break;
1659 case 'K': do_mark = 1; break;
1660 case 'M': log_store = 1; break;
1661 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1662
1663 #if !defined NOPOSIX
1664 case 'P': do_posix = 1; break;
1665 #endif
1666
1667 case 'S':
1668 if (do_study == 0)
1669 {
1670 do_study = 1;
1671 if (*pp == '+')
1672 {
1673 study_options |= PCRE_STUDY_JIT_COMPILE;
1674 pp++;
1675 }
1676 }
1677 else
1678 {
1679 do_study = 0;
1680 no_force_study = 1;
1681 }
1682 break;
1683
1684 case 'U': options |= PCRE_UNGREEDY; break;
1685 case 'W': options |= PCRE_UCP; break;
1686 case 'X': options |= PCRE_EXTRA; break;
1687 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1688 case 'Z': debug_lengths = 0; break;
1689 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1690 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1691
1692 case 'T':
1693 switch (*pp++)
1694 {
1695 case '0': tables = tables0; break;
1696 case '1': tables = tables1; break;
1697
1698 case '\r':
1699 case '\n':
1700 case ' ':
1701 case 0:
1702 fprintf(outfile, "** Missing table number after /T\n");
1703 goto SKIP_DATA;
1704
1705 default:
1706 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1707 goto SKIP_DATA;
1708 }
1709 break;
1710
1711 case 'L':
1712 ppp = pp;
1713 /* The '\r' test here is so that it works on Windows. */
1714 /* The '0' test is just in case this is an unterminated line. */
1715 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1716 *ppp = 0;
1717 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1718 {
1719 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1720 goto SKIP_DATA;
1721 }
1722 locale_set = 1;
1723 tables = pcre_maketables();
1724 pp = ppp;
1725 break;
1726
1727 case '>':
1728 to_file = pp;
1729 while (*pp != 0) pp++;
1730 while (isspace(pp[-1])) pp--;
1731 *pp = 0;
1732 break;
1733
1734 case '<':
1735 {
1736 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1737 {
1738 options |= PCRE_JAVASCRIPT_COMPAT;
1739 pp += 3;
1740 }
1741 else
1742 {
1743 int x = check_newline(pp, outfile);
1744 if (x == 0) goto SKIP_DATA;
1745 options |= x;
1746 while (*pp++ != '>');
1747 }
1748 }
1749 break;
1750
1751 case '\r': /* So that it works in Windows */
1752 case '\n':
1753 case ' ':
1754 break;
1755
1756 default:
1757 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1758 goto SKIP_DATA;
1759 }
1760 }
1761
1762 /* Handle compiling via the POSIX interface, which doesn't support the
1763 timing, showing, or debugging options, nor the ability to pass over
1764 local character tables. */
1765
1766 #if !defined NOPOSIX
1767 if (posix || do_posix)
1768 {
1769 int rc;
1770 int cflags = 0;
1771
1772 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1773 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1774 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1775 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1776 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1777 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1778 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1779
1780 rc = regcomp(&preg, (char *)p, cflags);
1781
1782 /* Compilation failed; go back for another re, skipping to blank line
1783 if non-interactive. */
1784
1785 if (rc != 0)
1786 {
1787 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1788 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1789 goto SKIP_DATA;
1790 }
1791 }
1792
1793 /* Handle compiling via the native interface */
1794
1795 else
1796 #endif /* !defined NOPOSIX */
1797
1798 {
1799 unsigned long int get_options;
1800
1801 if (timeit > 0)
1802 {
1803 register int i;
1804 clock_t time_taken;
1805 clock_t start_time = clock();
1806 for (i = 0; i < timeit; i++)
1807 {
1808 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1809 if (re != NULL) free(re);
1810 }
1811 time_taken = clock() - start_time;
1812 fprintf(outfile, "Compile time %.4f milliseconds\n",
1813 (((double)time_taken * 1000.0) / (double)timeit) /
1814 (double)CLOCKS_PER_SEC);
1815 }
1816
1817 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1818
1819 /* Compilation failed; go back for another re, skipping to blank line
1820 if non-interactive. */
1821
1822 if (re == NULL)
1823 {
1824 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1825 SKIP_DATA:
1826 if (infile != stdin)
1827 {
1828 for (;;)
1829 {
1830 if (extend_inputline(infile, buffer, NULL) == NULL)
1831 {
1832 done = 1;
1833 goto CONTINUE;
1834 }
1835 len = (int)strlen((char *)buffer);
1836 while (len > 0 && isspace(buffer[len-1])) len--;
1837 if (len == 0) break;
1838 }
1839 fprintf(outfile, "\n");
1840 }
1841 goto CONTINUE;
1842 }
1843
1844 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1845 within the regex; check for this so that we know how to process the data
1846 lines. */
1847
1848 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1849 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1850
1851 /* Print information if required. There are now two info-returning
1852 functions. The old one has a limited interface and returns only limited
1853 data. Check that it agrees with the newer one. */
1854
1855 if (log_store)
1856 fprintf(outfile, "Memory allocation (code space): %d\n",
1857 (int)(gotten_store -
1858 sizeof(real_pcre) -
1859 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1860
1861 /* Extract the size for possible writing before possibly flipping it,
1862 and remember the store that was got. */
1863
1864 true_size = ((real_pcre *)re)->size;
1865 regex_gotten_store = gotten_store;
1866
1867 /* If -s or /S was present, study the regex to generate additional info to
1868 help with the matching, unless the pattern has the SS option, which
1869 suppresses the effect of /S (used for a few test patterns where studying is
1870 never sensible). */
1871
1872 if (do_study || (force_study >= 0 && !no_force_study))
1873 {
1874 if (timeit > 0)
1875 {
1876 register int i;
1877 clock_t time_taken;
1878 clock_t start_time = clock();
1879 for (i = 0; i < timeit; i++)
1880 extra = pcre_study(re, study_options | force_study_options, &error);
1881 time_taken = clock() - start_time;
1882 if (extra != NULL) pcre_free_study(extra);
1883 fprintf(outfile, " Study time %.4f milliseconds\n",
1884 (((double)time_taken * 1000.0) / (double)timeit) /
1885 (double)CLOCKS_PER_SEC);
1886 }
1887 extra = pcre_study(re, study_options | force_study_options, &error);
1888 if (error != NULL)
1889 fprintf(outfile, "Failed to study: %s\n", error);
1890 else if (extra != NULL)
1891 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1892 }
1893
1894 /* If /K was present, we set up for handling MARK data. */
1895
1896 if (do_mark)
1897 {
1898 if (extra == NULL)
1899 {
1900 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1901 extra->flags = 0;
1902 }
1903 extra->mark = &markptr;
1904 extra->flags |= PCRE_EXTRA_MARK;
1905 }
1906
1907 /* If the 'F' option was present, we flip the bytes of all the integer
1908 fields in the regex data block and the study block. This is to make it
1909 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1910 compiled on a different architecture. */
1911
1912 if (do_flip)
1913 {
1914 real_pcre *rre = (real_pcre *)re;
1915 rre->magic_number =
1916 byteflip(rre->magic_number, sizeof(rre->magic_number));
1917 rre->size = byteflip(rre->size, sizeof(rre->size));
1918 rre->options = byteflip(rre->options, sizeof(rre->options));
1919 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1920 rre->top_bracket =
1921 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1922 rre->top_backref =
1923 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1924 rre->first_byte =
1925 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1926 rre->req_byte =
1927 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1928 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1929 sizeof(rre->name_table_offset));
1930 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1931 sizeof(rre->name_entry_size));
1932 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1933 sizeof(rre->name_count));
1934
1935 if (extra != NULL)
1936 {
1937 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1938 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1939 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1940 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1941 }
1942 }
1943
1944 /* Extract information from the compiled data if required */
1945
1946 SHOW_INFO:
1947
1948 if (do_debug)
1949 {
1950 fprintf(outfile, "------------------------------------------------------------------\n");
1951 pcre_printint(re, outfile, debug_lengths);
1952 }
1953
1954 /* We already have the options in get_options (see above) */
1955
1956 if (do_showinfo)
1957 {
1958 unsigned long int all_options;
1959 #if !defined NOINFOCHECK
1960 int old_first_char, old_options, old_count;
1961 #endif
1962 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1963 hascrorlf;
1964 int nameentrysize, namecount;
1965 const uschar *nametable;
1966
1967 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1968 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1969 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1970 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1971 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1972 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1973 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1974 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1975 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1976 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1977 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1978
1979 #if !defined NOINFOCHECK
1980 old_count = pcre_info(re, &old_options, &old_first_char);
1981 if (count < 0) fprintf(outfile,
1982 "Error %d from pcre_info()\n", count);
1983 else
1984 {
1985 if (old_count != count) fprintf(outfile,
1986 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1987 old_count);
1988
1989 if (old_first_char != first_char) fprintf(outfile,
1990 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1991 first_char, old_first_char);
1992
1993 if (old_options != (int)get_options) fprintf(outfile,
1994 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1995 get_options, old_options);
1996 }
1997 #endif
1998
1999 if (size != regex_gotten_store) fprintf(outfile,
2000 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2001 (int)size, (int)regex_gotten_store);
2002
2003 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2004 if (backrefmax > 0)
2005 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2006
2007 if (namecount > 0)
2008 {
2009 fprintf(outfile, "Named capturing subpatterns:\n");
2010 while (namecount-- > 0)
2011 {
2012 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2013 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2014 GET2(nametable, 0));
2015 nametable += nameentrysize;
2016 }
2017 }
2018
2019 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2020 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2021
2022 all_options = ((real_pcre *)re)->options;
2023 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2024
2025 if (get_options == 0) fprintf(outfile, "No options\n");
2026 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2027 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2028 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2029 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2030 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2031 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2032 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2033 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2034 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2035 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2036 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2037 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2038 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2039 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2040 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2041 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2042 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2043 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2044
2045 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2046
2047 switch (get_options & PCRE_NEWLINE_BITS)
2048 {
2049 case PCRE_NEWLINE_CR:
2050 fprintf(outfile, "Forced newline sequence: CR\n");
2051 break;
2052
2053 case PCRE_NEWLINE_LF:
2054 fprintf(outfile, "Forced newline sequence: LF\n");
2055 break;
2056
2057 case PCRE_NEWLINE_CRLF:
2058 fprintf(outfile, "Forced newline sequence: CRLF\n");
2059 break;
2060
2061 case PCRE_NEWLINE_ANYCRLF:
2062 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2063 break;
2064
2065 case PCRE_NEWLINE_ANY:
2066 fprintf(outfile, "Forced newline sequence: ANY\n");
2067 break;
2068
2069 default:
2070 break;
2071 }
2072
2073 if (first_char == -1)
2074 {
2075 fprintf(outfile, "First char at start or follows newline\n");
2076 }
2077 else if (first_char < 0)
2078 {
2079 fprintf(outfile, "No first char\n");
2080 }
2081 else
2082 {
2083 int ch = first_char & 255;
2084 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2085 "" : " (caseless)";
2086 if (PRINTHEX(ch))
2087 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2088 else
2089 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2090 }
2091
2092 if (need_char < 0)
2093 {
2094 fprintf(outfile, "No need char\n");
2095 }
2096 else
2097 {
2098 int ch = need_char & 255;
2099 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2100 "" : " (caseless)";
2101 if (PRINTHEX(ch))
2102 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2103 else
2104 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2105 }
2106
2107 /* Don't output study size; at present it is in any case a fixed
2108 value, but it varies, depending on the computer architecture, and
2109 so messes up the test suite. (And with the /F option, it might be
2110 flipped.) If study was forced by an external -s, don't show this
2111 information unless -i or -d was also present. This means that, except
2112 when auto-callouts are involved, the output from runs with and without
2113 -s should be identical. */
2114
2115 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2116 {
2117 if (extra == NULL)
2118 fprintf(outfile, "Study returned NULL\n");
2119 else
2120 {
2121 uschar *start_bits = NULL;
2122 int minlength;
2123
2124 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2125 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2126
2127 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2128 if (start_bits == NULL)
2129 fprintf(outfile, "No set of starting bytes\n");
2130 else
2131 {
2132 int i;
2133 int c = 24;
2134 fprintf(outfile, "Starting byte set: ");
2135 for (i = 0; i < 256; i++)
2136 {
2137 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2138 {
2139 if (c > 75)
2140 {
2141 fprintf(outfile, "\n ");
2142 c = 2;
2143 }
2144 if (PRINTHEX(i) && i != ' ')
2145 {
2146 fprintf(outfile, "%c ", i);
2147 c += 2;
2148 }
2149 else
2150 {
2151 fprintf(outfile, "\\x%02x ", i);
2152 c += 5;
2153 }
2154 }
2155 }
2156 fprintf(outfile, "\n");
2157 }
2158 }
2159
2160 /* Show this only if the JIT was set by /S, not by -s. */
2161
2162 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2163 {
2164 int jit;
2165 new_info(re, extra, PCRE_INFO_JIT, &jit);
2166 if (jit)
2167 fprintf(outfile, "JIT study was successful\n");
2168 else
2169 #ifdef SUPPORT_JIT
2170 fprintf(outfile, "JIT study was not successful\n");
2171 #else
2172 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2173 #endif
2174 }
2175 }
2176 }
2177
2178 /* If the '>' option was present, we write out the regex to a file, and
2179 that is all. The first 8 bytes of the file are the regex length and then
2180 the study length, in big-endian order. */
2181
2182 if (to_file != NULL)
2183 {
2184 FILE *f = fopen((char *)to_file, "wb");
2185 if (f == NULL)
2186 {
2187 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2188 }
2189 else
2190 {
2191 uschar sbuf[8];
2192 sbuf[0] = (uschar)((true_size >> 24) & 255);
2193 sbuf[1] = (uschar)((true_size >> 16) & 255);
2194 sbuf[2] = (uschar)((true_size >> 8) & 255);
2195 sbuf[3] = (uschar)((true_size) & 255);
2196
2197 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2198 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2199 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2200 sbuf[7] = (uschar)((true_study_size) & 255);
2201
2202 if (fwrite(sbuf, 1, 8, f) < 8 ||
2203 fwrite(re, 1, true_size, f) < true_size)
2204 {
2205 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2206 }
2207 else
2208 {
2209 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2210
2211 /* If there is study data, write it. */
2212
2213 if (extra != NULL)
2214 {
2215 if (fwrite(extra->study_data, 1, true_study_size, f) <
2216 true_study_size)
2217 {
2218 fprintf(outfile, "Write error on %s: %s\n", to_file,
2219 strerror(errno));
2220 }
2221 else fprintf(outfile, "Study data written to %s\n", to_file);
2222 }
2223 }
2224 fclose(f);
2225 }
2226
2227 new_free(re);
2228 if (extra != NULL) pcre_free_study(extra);
2229 if (locale_set)
2230 {
2231 new_free((void *)tables);
2232 setlocale(LC_CTYPE, "C");
2233 locale_set = 0;
2234 }
2235 continue; /* With next regex */
2236 }
2237 } /* End of non-POSIX compile */
2238
2239 /* Read data lines and test them */
2240
2241 for (;;)
2242 {
2243 uschar *q;
2244 uschar *bptr;
2245 int *use_offsets = offsets;
2246 int use_size_offsets = size_offsets;
2247 int callout_data = 0;
2248 int callout_data_set = 0;
2249 int count, c;
2250 int copystrings = 0;
2251 int find_match_limit = default_find_match_limit;
2252 int getstrings = 0;
2253 int getlist = 0;
2254 int gmatched = 0;
2255 int start_offset = 0;
2256 int start_offset_sign = 1;
2257 int g_notempty = 0;
2258 int use_dfa = 0;
2259
2260 options = 0;
2261
2262 *copynames = 0;
2263 *getnames = 0;
2264
2265 copynamesptr = copynames;
2266 getnamesptr = getnames;
2267
2268 pcre_callout = callout;
2269 first_callout = 1;
2270 last_callout_mark = NULL;
2271 callout_extra = 0;
2272 callout_count = 0;
2273 callout_fail_count = 999999;
2274 callout_fail_id = -1;
2275 show_malloc = 0;
2276
2277 if (extra != NULL) extra->flags &=
2278 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2279
2280 len = 0;
2281 for (;;)
2282 {
2283 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2284 {
2285 if (len > 0) /* Reached EOF without hitting a newline */
2286 {
2287 fprintf(outfile, "\n");
2288 break;
2289 }
2290 done = 1;
2291 goto CONTINUE;
2292 }
2293 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2294 len = (int)strlen((char *)buffer);
2295 if (buffer[len-1] == '\n') break;
2296 }
2297
2298 while (len > 0 && isspace(buffer[len-1])) len--;
2299 buffer[len] = 0;
2300 if (len == 0) break;
2301
2302 p = buffer;
2303 while (isspace(*p)) p++;
2304
2305 bptr = q = dbuffer;
2306 while ((c = *p++) != 0)
2307 {
2308 int i = 0;
2309 int n = 0;
2310
2311 if (c == '\\') switch ((c = *p++))
2312 {
2313 case 'a': c = 7; break;
2314 case 'b': c = '\b'; break;
2315 case 'e': c = 27; break;
2316 case 'f': c = '\f'; break;
2317 case 'n': c = '\n'; break;
2318 case 'r': c = '\r'; break;
2319 case 't': c = '\t'; break;
2320 case 'v': c = '\v'; break;
2321
2322 case '0': case '1': case '2': case '3':
2323 case '4': case '5': case '6': case '7':
2324 c -= '0';
2325 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2326 c = c * 8 + *p++ - '0';
2327
2328 #if !defined NOUTF8
2329 if (use_utf8 && c > 255)
2330 {
2331 unsigned char buff8[8];
2332 int ii, utn;
2333 utn = ord2utf8(c, buff8);
2334 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2335 c = buff8[ii]; /* Last byte */
2336 }
2337 #endif
2338 break;
2339
2340 case 'x':
2341
2342 /* Handle \x{..} specially - new Perl thing for utf8 */
2343
2344 #if !defined NOUTF8
2345 if (*p == '{')
2346 {
2347 unsigned char *pt = p;
2348 c = 0;
2349
2350 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2351 when isxdigit() is a macro that refers to its argument more than
2352 once. This is banned by the C Standard, but apparently happens in at
2353 least one MacOS environment. */
2354
2355 for (pt++; isxdigit(*pt); pt++)
2356 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2357 if (*pt == '}')
2358 {
2359 unsigned char buff8[8];
2360 int ii, utn;
2361 if (use_utf8)
2362 {
2363 utn = ord2utf8(c, buff8);
2364 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2365 c = buff8[ii]; /* Last byte */
2366 }
2367 else
2368 {
2369 if (c > 255)
2370 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2371 "UTF-8 mode is not enabled.\n"
2372 "** Truncation will probably give the wrong result.\n", c);
2373 }
2374 p = pt + 1;
2375 break;
2376 }
2377 /* Not correct form; fall through */
2378 }
2379 #endif
2380
2381 /* Ordinary \x */
2382
2383 c = 0;
2384 while (i++ < 2 && isxdigit(*p))
2385 {
2386 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2387 p++;
2388 }
2389 break;
2390
2391 case 0: /* \ followed by EOF allows for an empty line */
2392 p--;
2393 continue;
2394
2395 case '>':
2396 if (*p == '-')
2397 {
2398 start_offset_sign = -1;
2399 p++;
2400 }
2401 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2402 start_offset *= start_offset_sign;
2403 continue;
2404
2405 case 'A': /* Option setting */
2406 options |= PCRE_ANCHORED;
2407 continue;
2408
2409 case 'B':
2410 options |= PCRE_NOTBOL;
2411 continue;
2412
2413 case 'C':
2414 if (isdigit(*p)) /* Set copy string */
2415 {
2416 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2417 copystrings |= 1 << n;
2418 }
2419 else if (isalnum(*p))
2420 {
2421 uschar *npp = copynamesptr;
2422 while (isalnum(*p)) *npp++ = *p++;
2423 *npp++ = 0;
2424 *npp = 0;
2425 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2426 if (n < 0)
2427 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2428 copynamesptr = npp;
2429 }
2430 else if (*p == '+')
2431 {
2432 callout_extra = 1;
2433 p++;
2434 }
2435 else if (*p == '-')
2436 {
2437 pcre_callout = NULL;
2438 p++;
2439 }
2440 else if (*p == '!')
2441 {
2442 callout_fail_id = 0;
2443 p++;
2444 while(isdigit(*p))
2445 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2446 callout_fail_count = 0;
2447 if (*p == '!')
2448 {
2449 p++;
2450 while(isdigit(*p))
2451 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2452 }
2453 }
2454 else if (*p == '*')
2455 {
2456 int sign = 1;
2457 callout_data = 0;
2458 if (*(++p) == '-') { sign = -1; p++; }
2459 while(isdigit(*p))
2460 callout_data = callout_data * 10 + *p++ - '0';
2461 callout_data *= sign;
2462 callout_data_set = 1;
2463 }
2464 continue;
2465
2466 #if !defined NODFA
2467 case 'D':
2468 #if !defined NOPOSIX
2469 if (posix || do_posix)
2470 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2471 else
2472 #endif
2473 use_dfa = 1;
2474 continue;
2475 #endif
2476
2477 #if !defined NODFA
2478 case 'F':
2479 options |= PCRE_DFA_SHORTEST;
2480 continue;
2481 #endif
2482
2483 case 'G':
2484 if (isdigit(*p))
2485 {
2486 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2487 getstrings |= 1 << n;
2488 }
2489 else if (isalnum(*p))
2490 {
2491 uschar *npp = getnamesptr;
2492 while (isalnum(*p)) *npp++ = *p++;
2493 *npp++ = 0;
2494 *npp = 0;
2495 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2496 if (n < 0)
2497 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2498 getnamesptr = npp;
2499 }
2500 continue;
2501
2502 case 'J':
2503 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2504 if (extra != NULL
2505 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2506 && extra->executable_jit != NULL)
2507 {
2508 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2509 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2510 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2511 }
2512 continue;
2513
2514 case 'L':
2515 getlist = 1;
2516 continue;
2517
2518 case 'M':
2519 find_match_limit = 1;
2520 continue;
2521
2522 case 'N':
2523 if ((options & PCRE_NOTEMPTY) != 0)
2524 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2525 else
2526 options |= PCRE_NOTEMPTY;
2527 continue;
2528
2529 case 'O':
2530 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2531 if (n > size_offsets_max)
2532 {
2533 size_offsets_max = n;
2534 free(offsets);
2535 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2536 if (offsets == NULL)
2537 {
2538 printf("** Failed to get %d bytes of memory for offsets vector\n",
2539 (int)(size_offsets_max * sizeof(int)));
2540 yield = 1;
2541 goto EXIT;
2542 }
2543 }
2544 use_size_offsets = n;
2545 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2546 continue;
2547
2548 case 'P':
2549 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2550 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2551 continue;
2552
2553 case 'Q':
2554 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2555 if (extra == NULL)
2556 {
2557 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2558 extra->flags = 0;
2559 }
2560 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2561 extra->match_limit_recursion = n;
2562 continue;
2563
2564 case 'q':
2565 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2566 if (extra == NULL)
2567 {
2568 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2569 extra->flags = 0;
2570 }
2571 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2572 extra->match_limit = n;
2573 continue;
2574
2575 #if !defined NODFA
2576 case 'R':
2577 options |= PCRE_DFA_RESTART;
2578 continue;
2579 #endif
2580
2581 case 'S':
2582 show_malloc = 1;
2583 continue;
2584
2585 case 'Y':
2586 options |= PCRE_NO_START_OPTIMIZE;
2587 continue;
2588
2589 case 'Z':
2590 options |= PCRE_NOTEOL;
2591 continue;
2592
2593 case '?':
2594 options |= PCRE_NO_UTF8_CHECK;
2595 continue;
2596
2597 case '<':
2598 {
2599 int x = check_newline(p, outfile);
2600 if (x == 0) goto NEXT_DATA;
2601 options |= x;
2602 while (*p++ != '>');
2603 }
2604 continue;
2605 }
2606 *q++ = c;
2607 }
2608 *q = 0;
2609 len = (int)(q - dbuffer);
2610
2611 /* Move the data to the end of the buffer so that a read over the end of
2612 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2613 we are using the POSIX interface, we must include the terminating zero. */
2614
2615 #if !defined NOPOSIX
2616 if (posix || do_posix)
2617 {
2618 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2619 bptr += buffer_size - len - 1;
2620 }
2621 else
2622 #endif
2623 {
2624 memmove(bptr + buffer_size - len, bptr, len);
2625 bptr += buffer_size - len;
2626 }
2627
2628 if ((all_use_dfa || use_dfa) && find_match_limit)
2629 {
2630 printf("**Match limit not relevant for DFA matching: ignored\n");
2631 find_match_limit = 0;
2632 }
2633
2634 /* Handle matching via the POSIX interface, which does not
2635 support timing or playing with the match limit or callout data. */
2636
2637 #if !defined NOPOSIX
2638 if (posix || do_posix)
2639 {
2640 int rc;
2641 int eflags = 0;
2642 regmatch_t *pmatch = NULL;
2643 if (use_size_offsets > 0)
2644 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2645 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2646 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2647 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2648
2649 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2650
2651 if (rc != 0)
2652 {
2653 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2654 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2655 }
2656 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2657 != 0)
2658 {
2659 fprintf(outfile, "Matched with REG_NOSUB\n");
2660 }
2661 else
2662 {
2663 size_t i;
2664 for (i = 0; i < (size_t)use_size_offsets; i++)
2665 {
2666 if (pmatch[i].rm_so >= 0)
2667 {
2668 fprintf(outfile, "%2d: ", (int)i);
2669 (void)pchars(dbuffer + pmatch[i].rm_so,
2670 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2671 fprintf(outfile, "\n");
2672 if (do_showcaprest || (i == 0 && do_showrest))
2673 {
2674 fprintf(outfile, "%2d+ ", (int)i);
2675 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2676 outfile);
2677 fprintf(outfile, "\n");
2678 }
2679 }
2680 }
2681 }
2682 free(pmatch);
2683 }
2684
2685 /* Handle matching via the native interface - repeats for /g and /G */
2686
2687 else
2688 #endif /* !defined NOPOSIX */
2689
2690 for (;; gmatched++) /* Loop for /g or /G */
2691 {
2692 markptr = NULL;
2693
2694 if (timeitm > 0)
2695 {
2696 register int i;
2697 clock_t time_taken;
2698 clock_t start_time = clock();
2699
2700 #if !defined NODFA
2701 if (all_use_dfa || use_dfa)
2702 {
2703 int workspace[1000];
2704 for (i = 0; i < timeitm; i++)
2705 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2706 options | g_notempty, use_offsets, use_size_offsets, workspace,
2707 sizeof(workspace)/sizeof(int));
2708 }
2709 else
2710 #endif
2711
2712 for (i = 0; i < timeitm; i++)
2713 count = pcre_exec(re, extra, (char *)bptr, len,
2714 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2715
2716 time_taken = clock() - start_time;
2717 fprintf(outfile, "Execute time %.4f milliseconds\n",
2718 (((double)time_taken * 1000.0) / (double)timeitm) /
2719 (double)CLOCKS_PER_SEC);
2720 }
2721
2722 /* If find_match_limit is set, we want to do repeated matches with
2723 varying limits in order to find the minimum value for the match limit and
2724 for the recursion limit. The match limits are relevant only to the normal
2725 running of pcre_exec(), so disable the JIT optimization. This makes it
2726 possible to run the same set of tests with and without JIT externally
2727 requested. */
2728
2729 if (find_match_limit)
2730 {
2731 if (extra == NULL)
2732 {
2733 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2734 extra->flags = 0;
2735 }
2736 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2737
2738 (void)check_match_limit(re, extra, bptr, len, start_offset,
2739 options|g_notempty, use_offsets, use_size_offsets,
2740 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2741 PCRE_ERROR_MATCHLIMIT, "match()");
2742
2743 count = check_match_limit(re, extra, bptr, len, start_offset,
2744 options|g_notempty, use_offsets, use_size_offsets,
2745 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2746 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2747 }
2748
2749 /* If callout_data is set, use the interface with additional data */
2750
2751 else if (callout_data_set)
2752 {
2753 if (extra == NULL)
2754 {
2755 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2756 extra->flags = 0;
2757 }
2758 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2759 extra->callout_data = &callout_data;
2760 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2761 options | g_notempty, use_offsets, use_size_offsets);
2762 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2763 }
2764
2765 /* The normal case is just to do the match once, with the default
2766 value of match_limit. */
2767
2768 #if !defined NODFA
2769 else if (all_use_dfa || use_dfa)
2770 {
2771 int workspace[1000];
2772 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2773 options | g_notempty, use_offsets, use_size_offsets, workspace,
2774 sizeof(workspace)/sizeof(int));
2775 if (count == 0)
2776 {
2777 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2778 count = use_size_offsets/2;
2779 }
2780 }
2781 #endif
2782
2783 else
2784 {
2785 count = pcre_exec(re, extra, (char *)bptr, len,
2786 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2787 if (count == 0)
2788 {
2789 fprintf(outfile, "Matched, but too many substrings\n");
2790 count = use_size_offsets/3;
2791 }
2792 }
2793
2794 /* Matched */
2795
2796 if (count >= 0)
2797 {
2798 int i, maxcount;
2799
2800 #if !defined NODFA
2801 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2802 #endif
2803 maxcount = use_size_offsets/3;
2804
2805 /* This is a check against a lunatic return value. */
2806
2807 if (count > maxcount)
2808 {
2809 fprintf(outfile,
2810 "** PCRE error: returned count %d is too big for offset size %d\n",
2811 count, use_size_offsets);
2812 count = use_size_offsets/3;
2813 if (do_g || do_G)
2814 {
2815 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2816 do_g = do_G = FALSE; /* Break g/G loop */
2817 }
2818 }
2819
2820 /* do_allcaps requests showing of all captures in the pattern, to check
2821 unset ones at the end. */
2822
2823 if (do_allcaps)
2824 {
2825 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2826 count++; /* Allow for full match */
2827 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2828 }
2829
2830 /* Output the captured substrings */
2831
2832 for (i = 0; i < count * 2; i += 2)
2833 {
2834 if (use_offsets[i] < 0)
2835 {
2836 if (use_offsets[i] != -1)
2837 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2838 use_offsets[i], i);
2839 if (use_offsets[i+1] != -1)
2840 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2841 use_offsets[i+1], i+1);
2842 fprintf(outfile, "%2d: <unset>\n", i/2);
2843 }
2844 else
2845 {
2846 fprintf(outfile, "%2d: ", i/2);
2847 (void)pchars(bptr + use_offsets[i],
2848 use_offsets[i+1] - use_offsets[i], outfile);
2849 fprintf(outfile, "\n");
2850 if (do_showcaprest || (i == 0 && do_showrest))
2851 {
2852 fprintf(outfile, "%2d+ ", i/2);
2853 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2854 outfile);
2855 fprintf(outfile, "\n");
2856 }
2857 }
2858 }
2859
2860 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2861
2862 for (i = 0; i < 32; i++)
2863 {
2864 if ((copystrings & (1 << i)) != 0)
2865 {
2866 char copybuffer[256];
2867 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2868 i, copybuffer, sizeof(copybuffer));
2869 if (rc < 0)
2870 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2871 else
2872 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2873 }
2874 }
2875
2876 for (copynamesptr = copynames;
2877 *copynamesptr != 0;
2878 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2879 {
2880 char copybuffer[256];
2881 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2882 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2883 if (rc < 0)
2884 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2885 else
2886 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2887 }
2888
2889 for (i = 0; i < 32; i++)
2890 {
2891 if ((getstrings & (1 << i)) != 0)
2892 {
2893 const char *substring;
2894 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2895 i, &substring);
2896 if (rc < 0)
2897 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2898 else
2899 {
2900 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2901 pcre_free_substring(substring);
2902 }
2903 }
2904 }
2905
2906 for (getnamesptr = getnames;
2907 *getnamesptr != 0;
2908 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2909 {
2910 const char *substring;
2911 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2912 count, (char *)getnamesptr, &substring);
2913 if (rc < 0)
2914 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2915 else
2916 {
2917 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2918 pcre_free_substring(substring);
2919 }
2920 }
2921
2922 if (getlist)
2923 {
2924 const char **stringlist;
2925 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2926 &stringlist);
2927 if (rc < 0)
2928 fprintf(outfile, "get substring list failed %d\n", rc);
2929 else
2930 {
2931 for (i = 0; i < count; i++)
2932 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2933 if (stringlist[i] != NULL)
2934 fprintf(outfile, "string list not terminated by NULL\n");
2935 pcre_free_substring_list(stringlist);
2936 }
2937 }
2938 }
2939
2940 /* There was a partial match */
2941
2942 else if (count == PCRE_ERROR_PARTIAL)
2943 {
2944 if (markptr == NULL) fprintf(outfile, "Partial match");
2945 else fprintf(outfile, "Partial match, mark=%s", markptr);
2946 if (use_size_offsets > 1)
2947 {
2948 fprintf(outfile, ": ");
2949 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2950 outfile);
2951 }
2952 fprintf(outfile, "\n");
2953 break; /* Out of the /g loop */
2954 }
2955
2956 /* Failed to match. If this is a /g or /G loop and we previously set
2957 g_notempty after a null match, this is not necessarily the end. We want
2958 to advance the start offset, and continue. We won't be at the end of the
2959 string - that was checked before setting g_notempty.
2960
2961 Complication arises in the case when the newline convention is "any",
2962 "crlf", or "anycrlf". If the previous match was at the end of a line
2963 terminated by CRLF, an advance of one character just passes the \r,
2964 whereas we should prefer the longer newline sequence, as does the code in
2965 pcre_exec(). Fudge the offset value to achieve this. We check for a
2966 newline setting in the pattern; if none was set, use pcre_config() to
2967 find the default.
2968
2969 Otherwise, in the case of UTF-8 matching, the advance must be one
2970 character, not one byte. */
2971
2972 else
2973 {
2974 if (g_notempty != 0)
2975 {
2976 int onechar = 1;
2977 unsigned int obits = ((real_pcre *)re)->options;
2978 use_offsets[0] = start_offset;
2979 if ((obits & PCRE_NEWLINE_BITS) == 0)
2980 {
2981 int d;
2982 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2983 /* Note that these values are always the ASCII ones, even in
2984 EBCDIC environments. CR = 13, NL = 10. */
2985 obits = (d == 13)? PCRE_NEWLINE_CR :
2986 (d == 10)? PCRE_NEWLINE_LF :
2987 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2988 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2989 (d == -1)? PCRE_NEWLINE_ANY : 0;
2990 }
2991 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2992 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2993 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2994 &&
2995 start_offset < len - 1 &&
2996 bptr[start_offset] == '\r' &&
2997 bptr[start_offset+1] == '\n')
2998 onechar++;
2999 else if (use_utf8)
3000 {
3001 while (start_offset + onechar < len)
3002 {
3003 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3004 onechar++;
3005 }
3006 }
3007 use_offsets[1] = start_offset + onechar;
3008 }
3009 else
3010 {
3011 switch(count)
3012 {
3013 case PCRE_ERROR_NOMATCH:
3014 if (gmatched == 0)
3015 {
3016 if (markptr == NULL) fprintf(outfile, "No match\n");
3017 else fprintf(outfile, "No match, mark = %s\n", markptr);
3018 }
3019 break;
3020
3021 case PCRE_ERROR_BADUTF8:
3022 case PCRE_ERROR_SHORTUTF8:
3023 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3024 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3025 if (use_size_offsets >= 2)
3026 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3027 use_offsets[1]);
3028 fprintf(outfile, "\n");
3029 break;
3030
3031 default:
3032 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3033 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3034 else
3035 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3036 break;
3037 }
3038
3039 break; /* Out of the /g loop */
3040 }
3041 }
3042
3043 /* If not /g or /G we are done */
3044
3045 if (!do_g && !do_G) break;
3046
3047 /* If we have matched an empty string, first check to see if we are at
3048 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3049 Perl's /g options does. This turns out to be rather cunning. First we set
3050 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3051 same point. If this fails (picked up above) we advance to the next
3052 character. */
3053
3054 g_notempty = 0;
3055
3056 if (use_offsets[0] == use_offsets[1])
3057 {
3058 if (use_offsets[0] == len) break;
3059 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3060 }
3061
3062 /* For /g, update the start offset, leaving the rest alone */
3063
3064 if (do_g) start_offset = use_offsets[1];
3065
3066 /* For /G, update the pointer and length */
3067
3068 else
3069 {
3070 bptr += use_offsets[1];
3071 len -= use_offsets[1];
3072 }
3073 } /* End of loop for /g and /G */
3074
3075 NEXT_DATA: continue;
3076 } /* End of loop for data lines */
3077
3078 CONTINUE:
3079
3080 #if !defined NOPOSIX
3081 if (posix || do_posix) regfree(&preg);
3082 #endif
3083
3084 if (re != NULL) new_free(re);
3085 if (extra != NULL) pcre_free_study(extra);
3086 if (locale_set)
3087 {
3088 new_free((void *)tables);
3089 setlocale(LC_CTYPE, "C");
3090 locale_set = 0;
3091 }
3092 if (jit_stack != NULL)
3093 {
3094 pcre_jit_stack_free(jit_stack);
3095 jit_stack = NULL;
3096 }
3097 }
3098
3099 if (infile == stdin) fprintf(outfile, "\n");
3100
3101 EXIT:
3102
3103 if (infile != NULL && infile != stdin) fclose(infile);
3104 if (outfile != NULL && outfile != stdout) fclose(outfile);
3105
3106 free(buffer);
3107 free(dbuffer);
3108 free(pbuffer);
3109 free(offsets);
3110
3111 return yield;
3112 }
3113
3114 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12