/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 641 - (show annotations) (download)
Mon Jul 25 16:56:54 2011 UTC (3 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 91186 byte(s)
Add minix to list of OS not supporting -S in pcretest.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192
193 /* The buffers grow automatically if very long input lines are encountered. */
194
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
199
200 /* Textual explanations for runtime error codes */
201
202 static const char *errtexts[] = {
203 NULL, /* 0 is no error */
204 NULL, /* NOMATCH is handled specially */
205 "NULL argument passed",
206 "bad option value",
207 "magic number missing",
208 "unknown opcode - pattern overwritten?",
209 "no more memory",
210 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211 "match limit exceeded",
212 "callout error code",
213 NULL, /* BADUTF8 is handled specially */
214 "bad UTF-8 offset",
215 NULL, /* PARTIAL is handled specially */
216 "not used - internal error",
217 "internal error - pattern overwritten?",
218 "bad count value",
219 "item unsupported for DFA matching",
220 "backreference condition or recursion test not supported for DFA matching",
221 "match limit not supported for DFA matching",
222 "workspace size exceeded in DFA matching",
223 "too much recursion for DFA matching",
224 "recursion limit exceeded",
225 "not used - internal error",
226 "invalid combination of newline options",
227 "bad offset value",
228 NULL /* SHORTUTF8 is handled specially */
229 };
230
231
232 /*************************************************
233 * Alternate character tables *
234 *************************************************/
235
236 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237 using the default tables of the library. However, the T option can be used to
238 select alternate sets of tables, for different kinds of testing. Note also that
239 the L (locale) option also adjusts the tables. */
240
241 /* This is the set of tables distributed as default with PCRE. It recognizes
242 only ASCII characters. */
243
244 static const unsigned char tables0[] = {
245
246 /* This table is a lower casing table. */
247
248 0, 1, 2, 3, 4, 5, 6, 7,
249 8, 9, 10, 11, 12, 13, 14, 15,
250 16, 17, 18, 19, 20, 21, 22, 23,
251 24, 25, 26, 27, 28, 29, 30, 31,
252 32, 33, 34, 35, 36, 37, 38, 39,
253 40, 41, 42, 43, 44, 45, 46, 47,
254 48, 49, 50, 51, 52, 53, 54, 55,
255 56, 57, 58, 59, 60, 61, 62, 63,
256 64, 97, 98, 99,100,101,102,103,
257 104,105,106,107,108,109,110,111,
258 112,113,114,115,116,117,118,119,
259 120,121,122, 91, 92, 93, 94, 95,
260 96, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122,123,124,125,126,127,
264 128,129,130,131,132,133,134,135,
265 136,137,138,139,140,141,142,143,
266 144,145,146,147,148,149,150,151,
267 152,153,154,155,156,157,158,159,
268 160,161,162,163,164,165,166,167,
269 168,169,170,171,172,173,174,175,
270 176,177,178,179,180,181,182,183,
271 184,185,186,187,188,189,190,191,
272 192,193,194,195,196,197,198,199,
273 200,201,202,203,204,205,206,207,
274 208,209,210,211,212,213,214,215,
275 216,217,218,219,220,221,222,223,
276 224,225,226,227,228,229,230,231,
277 232,233,234,235,236,237,238,239,
278 240,241,242,243,244,245,246,247,
279 248,249,250,251,252,253,254,255,
280
281 /* This table is a case flipping table. */
282
283 0, 1, 2, 3, 4, 5, 6, 7,
284 8, 9, 10, 11, 12, 13, 14, 15,
285 16, 17, 18, 19, 20, 21, 22, 23,
286 24, 25, 26, 27, 28, 29, 30, 31,
287 32, 33, 34, 35, 36, 37, 38, 39,
288 40, 41, 42, 43, 44, 45, 46, 47,
289 48, 49, 50, 51, 52, 53, 54, 55,
290 56, 57, 58, 59, 60, 61, 62, 63,
291 64, 97, 98, 99,100,101,102,103,
292 104,105,106,107,108,109,110,111,
293 112,113,114,115,116,117,118,119,
294 120,121,122, 91, 92, 93, 94, 95,
295 96, 65, 66, 67, 68, 69, 70, 71,
296 72, 73, 74, 75, 76, 77, 78, 79,
297 80, 81, 82, 83, 84, 85, 86, 87,
298 88, 89, 90,123,124,125,126,127,
299 128,129,130,131,132,133,134,135,
300 136,137,138,139,140,141,142,143,
301 144,145,146,147,148,149,150,151,
302 152,153,154,155,156,157,158,159,
303 160,161,162,163,164,165,166,167,
304 168,169,170,171,172,173,174,175,
305 176,177,178,179,180,181,182,183,
306 184,185,186,187,188,189,190,191,
307 192,193,194,195,196,197,198,199,
308 200,201,202,203,204,205,206,207,
309 208,209,210,211,212,213,214,215,
310 216,217,218,219,220,221,222,223,
311 224,225,226,227,228,229,230,231,
312 232,233,234,235,236,237,238,239,
313 240,241,242,243,244,245,246,247,
314 248,249,250,251,252,253,254,255,
315
316 /* This table contains bit maps for various character classes. Each map is 32
317 bytes long and the bits run from the least significant end of each byte. The
318 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319 graph, print, punct, and cntrl. Other classes are built from combinations. */
320
321 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325
326 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 /* This table identifies various classes of character by individual bits:
372 0x01 white space character
373 0x02 letter
374 0x04 decimal digit
375 0x08 hexadecimal digit
376 0x10 alphanumeric or '_'
377 0x80 regular expression metacharacter or binary zero
378 */
379
380 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412
413 /* This is a set of tables that came orginally from a Windows user. It seems to
414 be at least an approximation of ISO 8859. In particular, there are characters
415 greater than 128 that are marked as spaces, letters, etc. */
416
417 static const unsigned char tables1[] = {
418 0,1,2,3,4,5,6,7,
419 8,9,10,11,12,13,14,15,
420 16,17,18,19,20,21,22,23,
421 24,25,26,27,28,29,30,31,
422 32,33,34,35,36,37,38,39,
423 40,41,42,43,44,45,46,47,
424 48,49,50,51,52,53,54,55,
425 56,57,58,59,60,61,62,63,
426 64,97,98,99,100,101,102,103,
427 104,105,106,107,108,109,110,111,
428 112,113,114,115,116,117,118,119,
429 120,121,122,91,92,93,94,95,
430 96,97,98,99,100,101,102,103,
431 104,105,106,107,108,109,110,111,
432 112,113,114,115,116,117,118,119,
433 120,121,122,123,124,125,126,127,
434 128,129,130,131,132,133,134,135,
435 136,137,138,139,140,141,142,143,
436 144,145,146,147,148,149,150,151,
437 152,153,154,155,156,157,158,159,
438 160,161,162,163,164,165,166,167,
439 168,169,170,171,172,173,174,175,
440 176,177,178,179,180,181,182,183,
441 184,185,186,187,188,189,190,191,
442 224,225,226,227,228,229,230,231,
443 232,233,234,235,236,237,238,239,
444 240,241,242,243,244,245,246,215,
445 248,249,250,251,252,253,254,223,
446 224,225,226,227,228,229,230,231,
447 232,233,234,235,236,237,238,239,
448 240,241,242,243,244,245,246,247,
449 248,249,250,251,252,253,254,255,
450 0,1,2,3,4,5,6,7,
451 8,9,10,11,12,13,14,15,
452 16,17,18,19,20,21,22,23,
453 24,25,26,27,28,29,30,31,
454 32,33,34,35,36,37,38,39,
455 40,41,42,43,44,45,46,47,
456 48,49,50,51,52,53,54,55,
457 56,57,58,59,60,61,62,63,
458 64,97,98,99,100,101,102,103,
459 104,105,106,107,108,109,110,111,
460 112,113,114,115,116,117,118,119,
461 120,121,122,91,92,93,94,95,
462 96,65,66,67,68,69,70,71,
463 72,73,74,75,76,77,78,79,
464 80,81,82,83,84,85,86,87,
465 88,89,90,123,124,125,126,127,
466 128,129,130,131,132,133,134,135,
467 136,137,138,139,140,141,142,143,
468 144,145,146,147,148,149,150,151,
469 152,153,154,155,156,157,158,159,
470 160,161,162,163,164,165,166,167,
471 168,169,170,171,172,173,174,175,
472 176,177,178,179,180,181,182,183,
473 184,185,186,187,188,189,190,191,
474 224,225,226,227,228,229,230,231,
475 232,233,234,235,236,237,238,239,
476 240,241,242,243,244,245,246,215,
477 248,249,250,251,252,253,254,223,
478 192,193,194,195,196,197,198,199,
479 200,201,202,203,204,205,206,207,
480 208,209,210,211,212,213,214,247,
481 216,217,218,219,220,221,222,255,
482 0,62,0,0,1,0,0,0,
483 0,0,0,0,0,0,0,0,
484 32,0,0,0,1,0,0,0,
485 0,0,0,0,0,0,0,0,
486 0,0,0,0,0,0,255,3,
487 126,0,0,0,126,0,0,0,
488 0,0,0,0,0,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,255,3,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,12,2,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 254,255,255,7,0,0,0,0,
496 0,0,0,0,0,0,0,0,
497 255,255,127,127,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,254,255,255,7,
500 0,0,0,0,0,4,32,4,
501 0,0,0,128,255,255,127,255,
502 0,0,0,0,0,0,255,3,
503 254,255,255,135,254,255,255,7,
504 0,0,0,0,0,4,44,6,
505 255,255,127,255,255,255,127,255,
506 0,0,0,0,254,255,255,255,
507 255,255,255,255,255,255,255,127,
508 0,0,0,0,254,255,255,255,
509 255,255,255,255,255,255,255,255,
510 0,2,0,0,255,255,255,255,
511 255,255,255,255,255,255,255,127,
512 0,0,0,0,255,255,255,255,
513 255,255,255,255,255,255,255,255,
514 0,0,0,0,254,255,0,252,
515 1,0,0,248,1,0,0,120,
516 0,0,0,0,254,255,255,255,
517 0,0,128,0,0,0,128,0,
518 255,255,255,255,0,0,0,0,
519 0,0,0,0,0,0,0,128,
520 255,255,255,255,0,0,0,0,
521 0,0,0,0,0,0,0,0,
522 128,0,0,0,0,0,0,0,
523 0,1,1,0,1,1,0,0,
524 0,0,0,0,0,0,0,0,
525 0,0,0,0,0,0,0,0,
526 1,0,0,0,128,0,0,0,
527 128,128,128,128,0,0,128,0,
528 28,28,28,28,28,28,28,28,
529 28,28,0,0,0,0,0,128,
530 0,26,26,26,26,26,26,18,
531 18,18,18,18,18,18,18,18,
532 18,18,18,18,18,18,18,18,
533 18,18,18,128,128,0,128,16,
534 0,26,26,26,26,26,26,18,
535 18,18,18,18,18,18,18,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,128,128,0,0,0,
538 0,0,0,0,0,1,0,0,
539 0,0,0,0,0,0,0,0,
540 0,0,0,0,0,0,0,0,
541 0,0,0,0,0,0,0,0,
542 1,0,0,0,0,0,0,0,
543 0,0,18,0,0,0,0,0,
544 0,0,20,20,0,18,0,0,
545 0,20,18,0,0,0,0,0,
546 18,18,18,18,18,18,18,18,
547 18,18,18,18,18,18,18,18,
548 18,18,18,18,18,18,18,0,
549 18,18,18,18,18,18,18,18,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,0,
553 18,18,18,18,18,18,18,18
554 };
555
556
557
558
559 #ifndef HAVE_STRERROR
560 /*************************************************
561 * Provide strerror() for non-ANSI libraries *
562 *************************************************/
563
564 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565 in their libraries, but can provide the same facility by this simple
566 alternative function. */
567
568 extern int sys_nerr;
569 extern char *sys_errlist[];
570
571 char *
572 strerror(int n)
573 {
574 if (n < 0 || n >= sys_nerr) return "unknown error number";
575 return sys_errlist[n];
576 }
577 #endif /* HAVE_STRERROR */
578
579
580
581
582 /*************************************************
583 * Read or extend an input line *
584 *************************************************/
585
586 /* Input lines are read into buffer, but both patterns and data lines can be
587 continued over multiple input lines. In addition, if the buffer fills up, we
588 want to automatically expand it so as to be able to handle extremely large
589 lines that are needed for certain stress tests. When the input buffer is
590 expanded, the other two buffers must also be expanded likewise, and the
591 contents of pbuffer, which are a copy of the input for callouts, must be
592 preserved (for when expansion happens for a data line). This is not the most
593 optimal way of handling this, but hey, this is just a test program!
594
595 Arguments:
596 f the file to read
597 start where in buffer to start (this *must* be within buffer)
598 prompt for stdin or readline()
599
600 Returns: pointer to the start of new data
601 could be a copy of start, or could be moved
602 NULL if no data read and EOF reached
603 */
604
605 static uschar *
606 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 {
608 uschar *here = start;
609
610 for (;;)
611 {
612 int rlen = (int)(buffer_size - (here - buffer));
613
614 if (rlen > 1000)
615 {
616 int dlen;
617
618 /* If libreadline support is required, use readline() to read a line if the
619 input is a terminal. Note that readline() removes the trailing newline, so
620 we must put it back again, to be compatible with fgets(). */
621
622 #ifdef SUPPORT_LIBREADLINE
623 if (isatty(fileno(f)))
624 {
625 size_t len;
626 char *s = readline(prompt);
627 if (s == NULL) return (here == start)? NULL : start;
628 len = strlen(s);
629 if (len > 0) add_history(s);
630 if (len > rlen - 1) len = rlen - 1;
631 memcpy(here, s, len);
632 here[len] = '\n';
633 here[len+1] = 0;
634 free(s);
635 }
636 else
637 #endif
638
639 /* Read the next line by normal means, prompting if the file is stdin. */
640
641 {
642 if (f == stdin) printf("%s", prompt);
643 if (fgets((char *)here, rlen, f) == NULL)
644 return (here == start)? NULL : start;
645 }
646
647 dlen = (int)strlen((char *)here);
648 if (dlen > 0 && here[dlen - 1] == '\n') return start;
649 here += dlen;
650 }
651
652 else
653 {
654 int new_buffer_size = 2*buffer_size;
655 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658
659 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660 {
661 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662 exit(1);
663 }
664
665 memcpy(new_buffer, buffer, buffer_size);
666 memcpy(new_pbuffer, pbuffer, buffer_size);
667
668 buffer_size = new_buffer_size;
669
670 start = new_buffer + (start - buffer);
671 here = new_buffer + (here - buffer);
672
673 free(buffer);
674 free(dbuffer);
675 free(pbuffer);
676
677 buffer = new_buffer;
678 dbuffer = new_dbuffer;
679 pbuffer = new_pbuffer;
680 }
681 }
682
683 return NULL; /* Control never gets here */
684 }
685
686
687
688
689
690
691
692 /*************************************************
693 * Read number from string *
694 *************************************************/
695
696 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697 around with conditional compilation, just do the job by hand. It is only used
698 for unpicking arguments, so just keep it simple.
699
700 Arguments:
701 str string to be converted
702 endptr where to put the end pointer
703
704 Returns: the unsigned long
705 */
706
707 static int
708 get_value(unsigned char *str, unsigned char **endptr)
709 {
710 int result = 0;
711 while(*str != 0 && isspace(*str)) str++;
712 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713 *endptr = str;
714 return(result);
715 }
716
717
718
719
720 /*************************************************
721 * Convert UTF-8 string to value *
722 *************************************************/
723
724 /* This function takes one or more bytes that represents a UTF-8 character,
725 and returns the value of the character.
726
727 Argument:
728 utf8bytes a pointer to the byte vector
729 vptr a pointer to an int to receive the value
730
731 Returns: > 0 => the number of bytes consumed
732 -6 to 0 => malformed UTF-8 character at offset = (-return)
733 */
734
735 #if !defined NOUTF8
736
737 static int
738 utf82ord(unsigned char *utf8bytes, int *vptr)
739 {
740 int c = *utf8bytes++;
741 int d = c;
742 int i, j, s;
743
744 for (i = -1; i < 6; i++) /* i is number of additional bytes */
745 {
746 if ((d & 0x80) == 0) break;
747 d <<= 1;
748 }
749
750 if (i == -1) { *vptr = c; return 1; } /* ascii character */
751 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752
753 /* i now has a value in the range 1-5 */
754
755 s = 6*i;
756 d = (c & utf8_table3[i]) << s;
757
758 for (j = 0; j < i; j++)
759 {
760 c = *utf8bytes++;
761 if ((c & 0xc0) != 0x80) return -(j+1);
762 s -= 6;
763 d |= (c & 0x3f) << s;
764 }
765
766 /* Check that encoding was the correct unique one */
767
768 for (j = 0; j < utf8_table1_size; j++)
769 if (d <= utf8_table1[j]) break;
770 if (j != i) return -(i+1);
771
772 /* Valid value */
773
774 *vptr = d;
775 return i+1;
776 }
777
778 #endif
779
780
781
782 /*************************************************
783 * Convert character value to UTF-8 *
784 *************************************************/
785
786 /* This function takes an integer value in the range 0 - 0x7fffffff
787 and encodes it as a UTF-8 character in 0 to 6 bytes.
788
789 Arguments:
790 cvalue the character value
791 utf8bytes pointer to buffer for result - at least 6 bytes long
792
793 Returns: number of characters placed in the buffer
794 */
795
796 #if !defined NOUTF8
797
798 static int
799 ord2utf8(int cvalue, uschar *utf8bytes)
800 {
801 register int i, j;
802 for (i = 0; i < utf8_table1_size; i++)
803 if (cvalue <= utf8_table1[i]) break;
804 utf8bytes += i;
805 for (j = i; j > 0; j--)
806 {
807 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 cvalue >>= 6;
809 }
810 *utf8bytes = utf8_table2[i] | cvalue;
811 return i + 1;
812 }
813
814 #endif
815
816
817
818 /*************************************************
819 * Print character string *
820 *************************************************/
821
822 /* Character string printing function. Must handle UTF-8 strings in utf8
823 mode. Yields number of characters printed. If handed a NULL file, just counts
824 chars without printing. */
825
826 static int pchars(unsigned char *p, int length, FILE *f)
827 {
828 int c = 0;
829 int yield = 0;
830
831 while (length-- > 0)
832 {
833 #if !defined NOUTF8
834 if (use_utf8)
835 {
836 int rc = utf82ord(p, &c);
837
838 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839 {
840 length -= rc - 1;
841 p += rc;
842 if (PRINTHEX(c))
843 {
844 if (f != NULL) fprintf(f, "%c", c);
845 yield++;
846 }
847 else
848 {
849 int n = 4;
850 if (f != NULL) fprintf(f, "\\x{%02x}", c);
851 yield += (n <= 0x000000ff)? 2 :
852 (n <= 0x00000fff)? 3 :
853 (n <= 0x0000ffff)? 4 :
854 (n <= 0x000fffff)? 5 : 6;
855 }
856 continue;
857 }
858 }
859 #endif
860
861 /* Not UTF-8, or malformed UTF-8 */
862
863 c = *p++;
864 if (PRINTHEX(c))
865 {
866 if (f != NULL) fprintf(f, "%c", c);
867 yield++;
868 }
869 else
870 {
871 if (f != NULL) fprintf(f, "\\x%02x", c);
872 yield += 4;
873 }
874 }
875
876 return yield;
877 }
878
879
880
881 /*************************************************
882 * Callout function *
883 *************************************************/
884
885 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886 the match. Yield zero unless more callouts than the fail count, or the callout
887 data is not zero. */
888
889 static int callout(pcre_callout_block *cb)
890 {
891 FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 int i, pre_start, post_start, subject_length;
893
894 if (callout_extra)
895 {
896 fprintf(f, "Callout %d: last capture = %d\n",
897 cb->callout_number, cb->capture_last);
898
899 for (i = 0; i < cb->capture_top * 2; i += 2)
900 {
901 if (cb->offset_vector[i] < 0)
902 fprintf(f, "%2d: <unset>\n", i/2);
903 else
904 {
905 fprintf(f, "%2d: ", i/2);
906 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907 cb->offset_vector[i+1] - cb->offset_vector[i], f);
908 fprintf(f, "\n");
909 }
910 }
911 }
912
913 /* Re-print the subject in canonical form, the first time or if giving full
914 datails. On subsequent calls in the same match, we use pchars just to find the
915 printed lengths of the substrings. */
916
917 if (f != NULL) fprintf(f, "--->");
918
919 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921 cb->current_position - cb->start_match, f);
922
923 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924
925 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926 cb->subject_length - cb->current_position, f);
927
928 if (f != NULL) fprintf(f, "\n");
929
930 /* Always print appropriate indicators, with callout number if not already
931 shown. For automatic callouts, show the pattern offset. */
932
933 if (cb->callout_number == 255)
934 {
935 fprintf(outfile, "%+3d ", cb->pattern_position);
936 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937 }
938 else
939 {
940 if (callout_extra) fprintf(outfile, " ");
941 else fprintf(outfile, "%3d ", cb->callout_number);
942 }
943
944 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945 fprintf(outfile, "^");
946
947 if (post_start > 0)
948 {
949 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950 fprintf(outfile, "^");
951 }
952
953 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954 fprintf(outfile, " ");
955
956 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957 pbuffer + cb->pattern_position);
958
959 fprintf(outfile, "\n");
960 first_callout = 0;
961
962 if (cb->callout_data != NULL)
963 {
964 int callout_data = *((int *)(cb->callout_data));
965 if (callout_data != 0)
966 {
967 fprintf(outfile, "Callout data = %d\n", callout_data);
968 return callout_data;
969 }
970 }
971
972 return (cb->callout_number != callout_fail_id)? 0 :
973 (++callout_count >= callout_fail_count)? 1 : 0;
974 }
975
976
977 /*************************************************
978 * Local malloc functions *
979 *************************************************/
980
981 /* Alternative malloc function, to test functionality and show the size of the
982 compiled re. */
983
984 static void *new_malloc(size_t size)
985 {
986 void *block = malloc(size);
987 gotten_store = size;
988 if (show_malloc)
989 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 return block;
991 }
992
993 static void new_free(void *block)
994 {
995 if (show_malloc)
996 fprintf(outfile, "free %p\n", block);
997 free(block);
998 }
999
1000
1001 /* For recursion malloc/free, to test stacking calls */
1002
1003 static void *stack_malloc(size_t size)
1004 {
1005 void *block = malloc(size);
1006 if (show_malloc)
1007 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 return block;
1009 }
1010
1011 static void stack_free(void *block)
1012 {
1013 if (show_malloc)
1014 fprintf(outfile, "stack_free %p\n", block);
1015 free(block);
1016 }
1017
1018
1019 /*************************************************
1020 * Call pcre_fullinfo() *
1021 *************************************************/
1022
1023 /* Get one piece of information from the pcre_fullinfo() function */
1024
1025 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026 {
1027 int rc;
1028 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030 }
1031
1032
1033
1034 /*************************************************
1035 * Byte flipping function *
1036 *************************************************/
1037
1038 static unsigned long int
1039 byteflip(unsigned long int value, int n)
1040 {
1041 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042 return ((value & 0x000000ff) << 24) |
1043 ((value & 0x0000ff00) << 8) |
1044 ((value & 0x00ff0000) >> 8) |
1045 ((value & 0xff000000) >> 24);
1046 }
1047
1048
1049
1050
1051 /*************************************************
1052 * Check match or recursion limit *
1053 *************************************************/
1054
1055 static int
1056 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057 int start_offset, int options, int *use_offsets, int use_size_offsets,
1058 int flag, unsigned long int *limit, int errnumber, const char *msg)
1059 {
1060 int count;
1061 int min = 0;
1062 int mid = 64;
1063 int max = -1;
1064
1065 extra->flags |= flag;
1066
1067 for (;;)
1068 {
1069 *limit = mid;
1070
1071 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072 use_offsets, use_size_offsets);
1073
1074 if (count == errnumber)
1075 {
1076 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077 min = mid;
1078 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079 }
1080
1081 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082 count == PCRE_ERROR_PARTIAL)
1083 {
1084 if (mid == min + 1)
1085 {
1086 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087 break;
1088 }
1089 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090 max = mid;
1091 mid = (min + mid)/2;
1092 }
1093 else break; /* Some other error */
1094 }
1095
1096 extra->flags &= ~flag;
1097 return count;
1098 }
1099
1100
1101
1102 /*************************************************
1103 * Case-independent strncmp() function *
1104 *************************************************/
1105
1106 /*
1107 Arguments:
1108 s first string
1109 t second string
1110 n number of characters to compare
1111
1112 Returns: < 0, = 0, or > 0, according to the comparison
1113 */
1114
1115 static int
1116 strncmpic(uschar *s, uschar *t, int n)
1117 {
1118 while (n--)
1119 {
1120 int c = tolower(*s++) - tolower(*t++);
1121 if (c) return c;
1122 }
1123 return 0;
1124 }
1125
1126
1127
1128 /*************************************************
1129 * Check newline indicator *
1130 *************************************************/
1131
1132 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133 a message and return 0 if there is no match.
1134
1135 Arguments:
1136 p points after the leading '<'
1137 f file for error message
1138
1139 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140 */
1141
1142 static int
1143 check_newline(uschar *p, FILE *f)
1144 {
1145 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 fprintf(f, "Unknown newline type at: <%s\n", p);
1153 return 0;
1154 }
1155
1156
1157
1158 /*************************************************
1159 * Usage function *
1160 *************************************************/
1161
1162 static void
1163 usage(void)
1164 {
1165 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166 printf("Input and output default to stdin and stdout.\n");
1167 #ifdef SUPPORT_LIBREADLINE
1168 printf("If input is a terminal, readline() is used to read from it.\n");
1169 #else
1170 printf("This version of pcretest is not linked with readline().\n");
1171 #endif
1172 printf("\nOptions:\n");
1173 printf(" -b show compiled code (bytecode)\n");
1174 printf(" -C show PCRE compile-time options and exit\n");
1175 printf(" -d debug: show compiled code and information (-b and -i)\n");
1176 #if !defined NODFA
1177 printf(" -dfa force DFA matching for all subjects\n");
1178 #endif
1179 printf(" -help show usage information\n");
1180 printf(" -i show information about compiled patterns\n"
1181 " -M find MATCH_LIMIT minimum for each subject\n"
1182 " -m output memory used information\n"
1183 " -o <n> set size of offsets vector to <n>\n");
1184 #if !defined NOPOSIX
1185 printf(" -p use POSIX interface\n");
1186 #endif
1187 printf(" -q quiet: do not output PCRE version number at start\n");
1188 printf(" -S <n> set stack size to <n> megabytes\n");
1189 printf(" -s force each pattern to be studied\n"
1190 " -t time compilation and execution\n");
1191 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192 printf(" -tm time execution (matching) only\n");
1193 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194 }
1195
1196
1197
1198 /*************************************************
1199 * Main Program *
1200 *************************************************/
1201
1202 /* Read lines from named file or stdin and write to named file or stdout; lines
1203 consist of a regular expression, in delimiters and optionally followed by
1204 options, followed by a set of test data, terminated by an empty line. */
1205
1206 int main(int argc, char **argv)
1207 {
1208 FILE *infile = stdin;
1209 int options = 0;
1210 int study_options = 0;
1211 int default_find_match_limit = FALSE;
1212 int op = 1;
1213 int timeit = 0;
1214 int timeitm = 0;
1215 int showinfo = 0;
1216 int showstore = 0;
1217 int force_study = 0;
1218 int quiet = 0;
1219 int size_offsets = 45;
1220 int size_offsets_max;
1221 int *offsets = NULL;
1222 #if !defined NOPOSIX
1223 int posix = 0;
1224 #endif
1225 int debug = 0;
1226 int done = 0;
1227 int all_use_dfa = 0;
1228 int yield = 0;
1229 int stack_size;
1230
1231 /* These vectors store, end-to-end, a list of captured substring names. Assume
1232 that 1024 is plenty long enough for the few names we'll be testing. */
1233
1234 uschar copynames[1024];
1235 uschar getnames[1024];
1236
1237 uschar *copynamesptr;
1238 uschar *getnamesptr;
1239
1240 /* Get buffers from malloc() so that Electric Fence will check their misuse
1241 when I am debugging. They grow automatically when very long lines are read. */
1242
1243 buffer = (unsigned char *)malloc(buffer_size);
1244 dbuffer = (unsigned char *)malloc(buffer_size);
1245 pbuffer = (unsigned char *)malloc(buffer_size);
1246
1247 /* The outfile variable is static so that new_malloc can use it. */
1248
1249 outfile = stdout;
1250
1251 /* The following _setmode() stuff is some Windows magic that tells its runtime
1252 library to translate CRLF into a single LF character. At least, that's what
1253 I've been told: never having used Windows I take this all on trust. Originally
1254 it set 0x8000, but then I was advised that _O_BINARY was better. */
1255
1256 #if defined(_WIN32) || defined(WIN32)
1257 _setmode( _fileno( stdout ), _O_BINARY );
1258 #endif
1259
1260 /* Scan options */
1261
1262 while (argc > 1 && argv[op][0] == '-')
1263 {
1264 unsigned char *endptr;
1265
1266 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1267 else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1268 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1269 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1270 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1271 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1272 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1273 #if !defined NODFA
1274 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1275 #endif
1276 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1277 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1278 *endptr == 0))
1279 {
1280 op++;
1281 argc--;
1282 }
1283 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1284 {
1285 int both = argv[op][2] == 0;
1286 int temp;
1287 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1288 *endptr == 0))
1289 {
1290 timeitm = temp;
1291 op++;
1292 argc--;
1293 }
1294 else timeitm = LOOPREPEAT;
1295 if (both) timeit = timeitm;
1296 }
1297 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1298 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1299 *endptr == 0))
1300 {
1301 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1302 printf("PCRE: -S not supported on this OS\n");
1303 exit(1);
1304 #else
1305 int rc;
1306 struct rlimit rlim;
1307 getrlimit(RLIMIT_STACK, &rlim);
1308 rlim.rlim_cur = stack_size * 1024 * 1024;
1309 rc = setrlimit(RLIMIT_STACK, &rlim);
1310 if (rc != 0)
1311 {
1312 printf("PCRE: setrlimit() failed with error %d\n", rc);
1313 exit(1);
1314 }
1315 op++;
1316 argc--;
1317 #endif
1318 }
1319 #if !defined NOPOSIX
1320 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1321 #endif
1322 else if (strcmp(argv[op], "-C") == 0)
1323 {
1324 int rc;
1325 unsigned long int lrc;
1326 printf("PCRE version %s\n", pcre_version());
1327 printf("Compiled with\n");
1328 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1329 printf(" %sUTF-8 support\n", rc? "" : "No ");
1330 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1331 printf(" %sUnicode properties support\n", rc? "" : "No ");
1332 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1333 /* Note that these values are always the ASCII values, even
1334 in EBCDIC environments. CR is 13 and NL is 10. */
1335 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1336 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1337 (rc == -2)? "ANYCRLF" :
1338 (rc == -1)? "ANY" : "???");
1339 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1340 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1341 "all Unicode newlines");
1342 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1343 printf(" Internal link size = %d\n", rc);
1344 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1345 printf(" POSIX malloc threshold = %d\n", rc);
1346 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1347 printf(" Default match limit = %ld\n", lrc);
1348 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1349 printf(" Default recursion depth limit = %ld\n", lrc);
1350 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1351 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1352 goto EXIT;
1353 }
1354 else if (strcmp(argv[op], "-help") == 0 ||
1355 strcmp(argv[op], "--help") == 0)
1356 {
1357 usage();
1358 goto EXIT;
1359 }
1360 else
1361 {
1362 printf("** Unknown or malformed option %s\n", argv[op]);
1363 usage();
1364 yield = 1;
1365 goto EXIT;
1366 }
1367 op++;
1368 argc--;
1369 }
1370
1371 /* Get the store for the offsets vector, and remember what it was */
1372
1373 size_offsets_max = size_offsets;
1374 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1375 if (offsets == NULL)
1376 {
1377 printf("** Failed to get %d bytes of memory for offsets vector\n",
1378 (int)(size_offsets_max * sizeof(int)));
1379 yield = 1;
1380 goto EXIT;
1381 }
1382
1383 /* Sort out the input and output files */
1384
1385 if (argc > 1)
1386 {
1387 infile = fopen(argv[op], INPUT_MODE);
1388 if (infile == NULL)
1389 {
1390 printf("** Failed to open %s\n", argv[op]);
1391 yield = 1;
1392 goto EXIT;
1393 }
1394 }
1395
1396 if (argc > 2)
1397 {
1398 outfile = fopen(argv[op+1], OUTPUT_MODE);
1399 if (outfile == NULL)
1400 {
1401 printf("** Failed to open %s\n", argv[op+1]);
1402 yield = 1;
1403 goto EXIT;
1404 }
1405 }
1406
1407 /* Set alternative malloc function */
1408
1409 pcre_malloc = new_malloc;
1410 pcre_free = new_free;
1411 pcre_stack_malloc = stack_malloc;
1412 pcre_stack_free = stack_free;
1413
1414 /* Heading line unless quiet, then prompt for first regex if stdin */
1415
1416 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1417
1418 /* Main loop */
1419
1420 while (!done)
1421 {
1422 pcre *re = NULL;
1423 pcre_extra *extra = NULL;
1424
1425 #if !defined NOPOSIX /* There are still compilers that require no indent */
1426 regex_t preg;
1427 int do_posix = 0;
1428 #endif
1429
1430 const char *error;
1431 unsigned char *markptr;
1432 unsigned char *p, *pp, *ppp;
1433 unsigned char *to_file = NULL;
1434 const unsigned char *tables = NULL;
1435 unsigned long int true_size, true_study_size = 0;
1436 size_t size, regex_gotten_store;
1437 int do_allcaps = 0;
1438 int do_mark = 0;
1439 int do_study = 0;
1440 int no_force_study = 0;
1441 int do_debug = debug;
1442 int do_G = 0;
1443 int do_g = 0;
1444 int do_showinfo = showinfo;
1445 int do_showrest = 0;
1446 int do_showcaprest = 0;
1447 int do_flip = 0;
1448 int erroroffset, len, delimiter, poffset;
1449
1450 use_utf8 = 0;
1451 debug_lengths = 1;
1452
1453 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1454 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1455 fflush(outfile);
1456
1457 p = buffer;
1458 while (isspace(*p)) p++;
1459 if (*p == 0) continue;
1460
1461 /* See if the pattern is to be loaded pre-compiled from a file. */
1462
1463 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1464 {
1465 unsigned long int magic, get_options;
1466 uschar sbuf[8];
1467 FILE *f;
1468
1469 p++;
1470 pp = p + (int)strlen((char *)p);
1471 while (isspace(pp[-1])) pp--;
1472 *pp = 0;
1473
1474 f = fopen((char *)p, "rb");
1475 if (f == NULL)
1476 {
1477 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1478 continue;
1479 }
1480
1481 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1482
1483 true_size =
1484 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1485 true_study_size =
1486 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1487
1488 re = (real_pcre *)new_malloc(true_size);
1489 regex_gotten_store = gotten_store;
1490
1491 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1492
1493 magic = ((real_pcre *)re)->magic_number;
1494 if (magic != MAGIC_NUMBER)
1495 {
1496 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1497 {
1498 do_flip = 1;
1499 }
1500 else
1501 {
1502 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1503 fclose(f);
1504 continue;
1505 }
1506 }
1507
1508 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1509 do_flip? " (byte-inverted)" : "", p);
1510
1511 /* Need to know if UTF-8 for printing data strings */
1512
1513 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1514 use_utf8 = (get_options & PCRE_UTF8) != 0;
1515
1516 /* Now see if there is any following study data. */
1517
1518 if (true_study_size != 0)
1519 {
1520 pcre_study_data *psd;
1521
1522 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1523 extra->flags = PCRE_EXTRA_STUDY_DATA;
1524
1525 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1526 extra->study_data = psd;
1527
1528 if (fread(psd, 1, true_study_size, f) != true_study_size)
1529 {
1530 FAIL_READ:
1531 fprintf(outfile, "Failed to read data from %s\n", p);
1532 if (extra != NULL) new_free(extra);
1533 if (re != NULL) new_free(re);
1534 fclose(f);
1535 continue;
1536 }
1537 fprintf(outfile, "Study data loaded from %s\n", p);
1538 do_study = 1; /* To get the data output if requested */
1539 }
1540 else fprintf(outfile, "No study data\n");
1541
1542 fclose(f);
1543 goto SHOW_INFO;
1544 }
1545
1546 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1547 the pattern; if is isn't complete, read more. */
1548
1549 delimiter = *p++;
1550
1551 if (isalnum(delimiter) || delimiter == '\\')
1552 {
1553 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1554 goto SKIP_DATA;
1555 }
1556
1557 pp = p;
1558 poffset = (int)(p - buffer);
1559
1560 for(;;)
1561 {
1562 while (*pp != 0)
1563 {
1564 if (*pp == '\\' && pp[1] != 0) pp++;
1565 else if (*pp == delimiter) break;
1566 pp++;
1567 }
1568 if (*pp != 0) break;
1569 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1570 {
1571 fprintf(outfile, "** Unexpected EOF\n");
1572 done = 1;
1573 goto CONTINUE;
1574 }
1575 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1576 }
1577
1578 /* The buffer may have moved while being extended; reset the start of data
1579 pointer to the correct relative point in the buffer. */
1580
1581 p = buffer + poffset;
1582
1583 /* If the first character after the delimiter is backslash, make
1584 the pattern end with backslash. This is purely to provide a way
1585 of testing for the error message when a pattern ends with backslash. */
1586
1587 if (pp[1] == '\\') *pp++ = '\\';
1588
1589 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1590 for callouts. */
1591
1592 *pp++ = 0;
1593 strcpy((char *)pbuffer, (char *)p);
1594
1595 /* Look for options after final delimiter */
1596
1597 options = 0;
1598 study_options = 0;
1599 log_store = showstore; /* default from command line */
1600
1601 while (*pp != 0)
1602 {
1603 switch (*pp++)
1604 {
1605 case 'f': options |= PCRE_FIRSTLINE; break;
1606 case 'g': do_g = 1; break;
1607 case 'i': options |= PCRE_CASELESS; break;
1608 case 'm': options |= PCRE_MULTILINE; break;
1609 case 's': options |= PCRE_DOTALL; break;
1610 case 'x': options |= PCRE_EXTENDED; break;
1611
1612 case '+':
1613 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1614 break;
1615
1616 case '=': do_allcaps = 1; break;
1617 case 'A': options |= PCRE_ANCHORED; break;
1618 case 'B': do_debug = 1; break;
1619 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1620 case 'D': do_debug = do_showinfo = 1; break;
1621 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1622 case 'F': do_flip = 1; break;
1623 case 'G': do_G = 1; break;
1624 case 'I': do_showinfo = 1; break;
1625 case 'J': options |= PCRE_DUPNAMES; break;
1626 case 'K': do_mark = 1; break;
1627 case 'M': log_store = 1; break;
1628 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1629
1630 #if !defined NOPOSIX
1631 case 'P': do_posix = 1; break;
1632 #endif
1633
1634 case 'S':
1635 if (do_study == 0) do_study = 1; else
1636 {
1637 do_study = 0;
1638 no_force_study = 1;
1639 }
1640 break;
1641
1642 case 'U': options |= PCRE_UNGREEDY; break;
1643 case 'W': options |= PCRE_UCP; break;
1644 case 'X': options |= PCRE_EXTRA; break;
1645 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1646 case 'Z': debug_lengths = 0; break;
1647 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1648 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1649
1650 case 'T':
1651 switch (*pp++)
1652 {
1653 case '0': tables = tables0; break;
1654 case '1': tables = tables1; break;
1655
1656 case '\r':
1657 case '\n':
1658 case ' ':
1659 case 0:
1660 fprintf(outfile, "** Missing table number after /T\n");
1661 goto SKIP_DATA;
1662
1663 default:
1664 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1665 goto SKIP_DATA;
1666 }
1667 break;
1668
1669 case 'L':
1670 ppp = pp;
1671 /* The '\r' test here is so that it works on Windows. */
1672 /* The '0' test is just in case this is an unterminated line. */
1673 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1674 *ppp = 0;
1675 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1676 {
1677 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1678 goto SKIP_DATA;
1679 }
1680 locale_set = 1;
1681 tables = pcre_maketables();
1682 pp = ppp;
1683 break;
1684
1685 case '>':
1686 to_file = pp;
1687 while (*pp != 0) pp++;
1688 while (isspace(pp[-1])) pp--;
1689 *pp = 0;
1690 break;
1691
1692 case '<':
1693 {
1694 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1695 {
1696 options |= PCRE_JAVASCRIPT_COMPAT;
1697 pp += 3;
1698 }
1699 else
1700 {
1701 int x = check_newline(pp, outfile);
1702 if (x == 0) goto SKIP_DATA;
1703 options |= x;
1704 while (*pp++ != '>');
1705 }
1706 }
1707 break;
1708
1709 case '\r': /* So that it works in Windows */
1710 case '\n':
1711 case ' ':
1712 break;
1713
1714 default:
1715 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1716 goto SKIP_DATA;
1717 }
1718 }
1719
1720 /* Handle compiling via the POSIX interface, which doesn't support the
1721 timing, showing, or debugging options, nor the ability to pass over
1722 local character tables. */
1723
1724 #if !defined NOPOSIX
1725 if (posix || do_posix)
1726 {
1727 int rc;
1728 int cflags = 0;
1729
1730 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1731 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1732 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1733 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1734 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1735 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1736 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1737
1738 rc = regcomp(&preg, (char *)p, cflags);
1739
1740 /* Compilation failed; go back for another re, skipping to blank line
1741 if non-interactive. */
1742
1743 if (rc != 0)
1744 {
1745 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1746 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1747 goto SKIP_DATA;
1748 }
1749 }
1750
1751 /* Handle compiling via the native interface */
1752
1753 else
1754 #endif /* !defined NOPOSIX */
1755
1756 {
1757 unsigned long int get_options;
1758
1759 if (timeit > 0)
1760 {
1761 register int i;
1762 clock_t time_taken;
1763 clock_t start_time = clock();
1764 for (i = 0; i < timeit; i++)
1765 {
1766 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1767 if (re != NULL) free(re);
1768 }
1769 time_taken = clock() - start_time;
1770 fprintf(outfile, "Compile time %.4f milliseconds\n",
1771 (((double)time_taken * 1000.0) / (double)timeit) /
1772 (double)CLOCKS_PER_SEC);
1773 }
1774
1775 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1776
1777 /* Compilation failed; go back for another re, skipping to blank line
1778 if non-interactive. */
1779
1780 if (re == NULL)
1781 {
1782 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1783 SKIP_DATA:
1784 if (infile != stdin)
1785 {
1786 for (;;)
1787 {
1788 if (extend_inputline(infile, buffer, NULL) == NULL)
1789 {
1790 done = 1;
1791 goto CONTINUE;
1792 }
1793 len = (int)strlen((char *)buffer);
1794 while (len > 0 && isspace(buffer[len-1])) len--;
1795 if (len == 0) break;
1796 }
1797 fprintf(outfile, "\n");
1798 }
1799 goto CONTINUE;
1800 }
1801
1802 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1803 within the regex; check for this so that we know how to process the data
1804 lines. */
1805
1806 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1807 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1808
1809 /* Print information if required. There are now two info-returning
1810 functions. The old one has a limited interface and returns only limited
1811 data. Check that it agrees with the newer one. */
1812
1813 if (log_store)
1814 fprintf(outfile, "Memory allocation (code space): %d\n",
1815 (int)(gotten_store -
1816 sizeof(real_pcre) -
1817 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1818
1819 /* Extract the size for possible writing before possibly flipping it,
1820 and remember the store that was got. */
1821
1822 true_size = ((real_pcre *)re)->size;
1823 regex_gotten_store = gotten_store;
1824
1825 /* If -s or /S was present, study the regex to generate additional info to
1826 help with the matching, unless the pattern has the SS option, which
1827 suppresses the effect of /S (used for a few test patterns where studying is
1828 never sensible). */
1829
1830 if (do_study || (force_study && !no_force_study))
1831 {
1832 if (timeit > 0)
1833 {
1834 register int i;
1835 clock_t time_taken;
1836 clock_t start_time = clock();
1837 for (i = 0; i < timeit; i++)
1838 extra = pcre_study(re, study_options, &error);
1839 time_taken = clock() - start_time;
1840 if (extra != NULL) free(extra);
1841 fprintf(outfile, " Study time %.4f milliseconds\n",
1842 (((double)time_taken * 1000.0) / (double)timeit) /
1843 (double)CLOCKS_PER_SEC);
1844 }
1845 extra = pcre_study(re, study_options, &error);
1846 if (error != NULL)
1847 fprintf(outfile, "Failed to study: %s\n", error);
1848 else if (extra != NULL)
1849 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1850 }
1851
1852 /* If /K was present, we set up for handling MARK data. */
1853
1854 if (do_mark)
1855 {
1856 if (extra == NULL)
1857 {
1858 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1859 extra->flags = 0;
1860 }
1861 extra->mark = &markptr;
1862 extra->flags |= PCRE_EXTRA_MARK;
1863 }
1864
1865 /* If the 'F' option was present, we flip the bytes of all the integer
1866 fields in the regex data block and the study block. This is to make it
1867 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1868 compiled on a different architecture. */
1869
1870 if (do_flip)
1871 {
1872 real_pcre *rre = (real_pcre *)re;
1873 rre->magic_number =
1874 byteflip(rre->magic_number, sizeof(rre->magic_number));
1875 rre->size = byteflip(rre->size, sizeof(rre->size));
1876 rre->options = byteflip(rre->options, sizeof(rre->options));
1877 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1878 rre->top_bracket =
1879 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1880 rre->top_backref =
1881 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1882 rre->first_byte =
1883 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1884 rre->req_byte =
1885 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1886 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1887 sizeof(rre->name_table_offset));
1888 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1889 sizeof(rre->name_entry_size));
1890 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1891 sizeof(rre->name_count));
1892
1893 if (extra != NULL)
1894 {
1895 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1896 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1897 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1898 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1899 }
1900 }
1901
1902 /* Extract information from the compiled data if required */
1903
1904 SHOW_INFO:
1905
1906 if (do_debug)
1907 {
1908 fprintf(outfile, "------------------------------------------------------------------\n");
1909 pcre_printint(re, outfile, debug_lengths);
1910 }
1911
1912 /* We already have the options in get_options (see above) */
1913
1914 if (do_showinfo)
1915 {
1916 unsigned long int all_options;
1917 #if !defined NOINFOCHECK
1918 int old_first_char, old_options, old_count;
1919 #endif
1920 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1921 hascrorlf;
1922 int nameentrysize, namecount;
1923 const uschar *nametable;
1924
1925 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1926 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1927 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1928 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1929 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1930 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1931 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1932 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1933 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1934 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1935 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1936
1937 #if !defined NOINFOCHECK
1938 old_count = pcre_info(re, &old_options, &old_first_char);
1939 if (count < 0) fprintf(outfile,
1940 "Error %d from pcre_info()\n", count);
1941 else
1942 {
1943 if (old_count != count) fprintf(outfile,
1944 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1945 old_count);
1946
1947 if (old_first_char != first_char) fprintf(outfile,
1948 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1949 first_char, old_first_char);
1950
1951 if (old_options != (int)get_options) fprintf(outfile,
1952 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1953 get_options, old_options);
1954 }
1955 #endif
1956
1957 if (size != regex_gotten_store) fprintf(outfile,
1958 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1959 (int)size, (int)regex_gotten_store);
1960
1961 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1962 if (backrefmax > 0)
1963 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1964
1965 if (namecount > 0)
1966 {
1967 fprintf(outfile, "Named capturing subpatterns:\n");
1968 while (namecount-- > 0)
1969 {
1970 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1971 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1972 GET2(nametable, 0));
1973 nametable += nameentrysize;
1974 }
1975 }
1976
1977 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1978 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1979
1980 all_options = ((real_pcre *)re)->options;
1981 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1982
1983 if (get_options == 0) fprintf(outfile, "No options\n");
1984 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1985 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1986 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1987 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1988 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1989 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1990 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1991 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1992 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1993 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1994 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1995 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1996 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1997 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1998 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1999 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2000 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2001 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2002
2003 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2004
2005 switch (get_options & PCRE_NEWLINE_BITS)
2006 {
2007 case PCRE_NEWLINE_CR:
2008 fprintf(outfile, "Forced newline sequence: CR\n");
2009 break;
2010
2011 case PCRE_NEWLINE_LF:
2012 fprintf(outfile, "Forced newline sequence: LF\n");
2013 break;
2014
2015 case PCRE_NEWLINE_CRLF:
2016 fprintf(outfile, "Forced newline sequence: CRLF\n");
2017 break;
2018
2019 case PCRE_NEWLINE_ANYCRLF:
2020 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2021 break;
2022
2023 case PCRE_NEWLINE_ANY:
2024 fprintf(outfile, "Forced newline sequence: ANY\n");
2025 break;
2026
2027 default:
2028 break;
2029 }
2030
2031 if (first_char == -1)
2032 {
2033 fprintf(outfile, "First char at start or follows newline\n");
2034 }
2035 else if (first_char < 0)
2036 {
2037 fprintf(outfile, "No first char\n");
2038 }
2039 else
2040 {
2041 int ch = first_char & 255;
2042 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2043 "" : " (caseless)";
2044 if (PRINTHEX(ch))
2045 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2046 else
2047 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2048 }
2049
2050 if (need_char < 0)
2051 {
2052 fprintf(outfile, "No need char\n");
2053 }
2054 else
2055 {
2056 int ch = need_char & 255;
2057 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2058 "" : " (caseless)";
2059 if (PRINTHEX(ch))
2060 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2061 else
2062 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2063 }
2064
2065 /* Don't output study size; at present it is in any case a fixed
2066 value, but it varies, depending on the computer architecture, and
2067 so messes up the test suite. (And with the /F option, it might be
2068 flipped.) If study was forced by an external -s, don't show this
2069 information unless -i or -d was also present. This means that, except
2070 when auto-callouts are involved, the output from runs with and without
2071 -s should be identical. */
2072
2073 if (do_study || (force_study && showinfo && !no_force_study))
2074 {
2075 if (extra == NULL)
2076 fprintf(outfile, "Study returned NULL\n");
2077 else
2078 {
2079 uschar *start_bits = NULL;
2080 int minlength;
2081
2082 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2083 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2084
2085 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2086 if (start_bits == NULL)
2087 fprintf(outfile, "No set of starting bytes\n");
2088 else
2089 {
2090 int i;
2091 int c = 24;
2092 fprintf(outfile, "Starting byte set: ");
2093 for (i = 0; i < 256; i++)
2094 {
2095 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2096 {
2097 if (c > 75)
2098 {
2099 fprintf(outfile, "\n ");
2100 c = 2;
2101 }
2102 if (PRINTHEX(i) && i != ' ')
2103 {
2104 fprintf(outfile, "%c ", i);
2105 c += 2;
2106 }
2107 else
2108 {
2109 fprintf(outfile, "\\x%02x ", i);
2110 c += 5;
2111 }
2112 }
2113 }
2114 fprintf(outfile, "\n");
2115 }
2116 }
2117 }
2118 }
2119
2120 /* If the '>' option was present, we write out the regex to a file, and
2121 that is all. The first 8 bytes of the file are the regex length and then
2122 the study length, in big-endian order. */
2123
2124 if (to_file != NULL)
2125 {
2126 FILE *f = fopen((char *)to_file, "wb");
2127 if (f == NULL)
2128 {
2129 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2130 }
2131 else
2132 {
2133 uschar sbuf[8];
2134 sbuf[0] = (uschar)((true_size >> 24) & 255);
2135 sbuf[1] = (uschar)((true_size >> 16) & 255);
2136 sbuf[2] = (uschar)((true_size >> 8) & 255);
2137 sbuf[3] = (uschar)((true_size) & 255);
2138
2139 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2140 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2141 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2142 sbuf[7] = (uschar)((true_study_size) & 255);
2143
2144 if (fwrite(sbuf, 1, 8, f) < 8 ||
2145 fwrite(re, 1, true_size, f) < true_size)
2146 {
2147 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2148 }
2149 else
2150 {
2151 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2152
2153 /* If there is study data, write it, but verify the writing only
2154 if the studying was requested by /S, not just by -s. */
2155
2156 if (extra != NULL)
2157 {
2158 if (fwrite(extra->study_data, 1, true_study_size, f) <
2159 true_study_size)
2160 {
2161 fprintf(outfile, "Write error on %s: %s\n", to_file,
2162 strerror(errno));
2163 }
2164 else fprintf(outfile, "Study data written to %s\n", to_file);
2165 }
2166 }
2167 fclose(f);
2168 }
2169
2170 new_free(re);
2171 if (extra != NULL) new_free(extra);
2172 if (locale_set)
2173 {
2174 new_free((void *)tables);
2175 setlocale(LC_CTYPE, "C");
2176 locale_set = 0;
2177 }
2178 continue; /* With next regex */
2179 }
2180 } /* End of non-POSIX compile */
2181
2182 /* Read data lines and test them */
2183
2184 for (;;)
2185 {
2186 uschar *q;
2187 uschar *bptr;
2188 int *use_offsets = offsets;
2189 int use_size_offsets = size_offsets;
2190 int callout_data = 0;
2191 int callout_data_set = 0;
2192 int count, c;
2193 int copystrings = 0;
2194 int find_match_limit = default_find_match_limit;
2195 int getstrings = 0;
2196 int getlist = 0;
2197 int gmatched = 0;
2198 int start_offset = 0;
2199 int start_offset_sign = 1;
2200 int g_notempty = 0;
2201 int use_dfa = 0;
2202
2203 options = 0;
2204
2205 *copynames = 0;
2206 *getnames = 0;
2207
2208 copynamesptr = copynames;
2209 getnamesptr = getnames;
2210
2211 pcre_callout = callout;
2212 first_callout = 1;
2213 callout_extra = 0;
2214 callout_count = 0;
2215 callout_fail_count = 999999;
2216 callout_fail_id = -1;
2217 show_malloc = 0;
2218
2219 if (extra != NULL) extra->flags &=
2220 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2221
2222 len = 0;
2223 for (;;)
2224 {
2225 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2226 {
2227 if (len > 0) /* Reached EOF without hitting a newline */
2228 {
2229 fprintf(outfile, "\n");
2230 break;
2231 }
2232 done = 1;
2233 goto CONTINUE;
2234 }
2235 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2236 len = (int)strlen((char *)buffer);
2237 if (buffer[len-1] == '\n') break;
2238 }
2239
2240 while (len > 0 && isspace(buffer[len-1])) len--;
2241 buffer[len] = 0;
2242 if (len == 0) break;
2243
2244 p = buffer;
2245 while (isspace(*p)) p++;
2246
2247 bptr = q = dbuffer;
2248 while ((c = *p++) != 0)
2249 {
2250 int i = 0;
2251 int n = 0;
2252
2253 if (c == '\\') switch ((c = *p++))
2254 {
2255 case 'a': c = 7; break;
2256 case 'b': c = '\b'; break;
2257 case 'e': c = 27; break;
2258 case 'f': c = '\f'; break;
2259 case 'n': c = '\n'; break;
2260 case 'r': c = '\r'; break;
2261 case 't': c = '\t'; break;
2262 case 'v': c = '\v'; break;
2263
2264 case '0': case '1': case '2': case '3':
2265 case '4': case '5': case '6': case '7':
2266 c -= '0';
2267 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2268 c = c * 8 + *p++ - '0';
2269
2270 #if !defined NOUTF8
2271 if (use_utf8 && c > 255)
2272 {
2273 unsigned char buff8[8];
2274 int ii, utn;
2275 utn = ord2utf8(c, buff8);
2276 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2277 c = buff8[ii]; /* Last byte */
2278 }
2279 #endif
2280 break;
2281
2282 case 'x':
2283
2284 /* Handle \x{..} specially - new Perl thing for utf8 */
2285
2286 #if !defined NOUTF8
2287 if (*p == '{')
2288 {
2289 unsigned char *pt = p;
2290 c = 0;
2291 while (isxdigit(*(++pt)))
2292 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2293 if (*pt == '}')
2294 {
2295 unsigned char buff8[8];
2296 int ii, utn;
2297 if (use_utf8)
2298 {
2299 utn = ord2utf8(c, buff8);
2300 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2301 c = buff8[ii]; /* Last byte */
2302 }
2303 else
2304 {
2305 if (c > 255)
2306 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2307 "UTF-8 mode is not enabled.\n"
2308 "** Truncation will probably give the wrong result.\n", c);
2309 }
2310 p = pt + 1;
2311 break;
2312 }
2313 /* Not correct form; fall through */
2314 }
2315 #endif
2316
2317 /* Ordinary \x */
2318
2319 c = 0;
2320 while (i++ < 2 && isxdigit(*p))
2321 {
2322 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2323 p++;
2324 }
2325 break;
2326
2327 case 0: /* \ followed by EOF allows for an empty line */
2328 p--;
2329 continue;
2330
2331 case '>':
2332 if (*p == '-')
2333 {
2334 start_offset_sign = -1;
2335 p++;
2336 }
2337 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2338 start_offset *= start_offset_sign;
2339 continue;
2340
2341 case 'A': /* Option setting */
2342 options |= PCRE_ANCHORED;
2343 continue;
2344
2345 case 'B':
2346 options |= PCRE_NOTBOL;
2347 continue;
2348
2349 case 'C':
2350 if (isdigit(*p)) /* Set copy string */
2351 {
2352 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2353 copystrings |= 1 << n;
2354 }
2355 else if (isalnum(*p))
2356 {
2357 uschar *npp = copynamesptr;
2358 while (isalnum(*p)) *npp++ = *p++;
2359 *npp++ = 0;
2360 *npp = 0;
2361 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2362 if (n < 0)
2363 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2364 copynamesptr = npp;
2365 }
2366 else if (*p == '+')
2367 {
2368 callout_extra = 1;
2369 p++;
2370 }
2371 else if (*p == '-')
2372 {
2373 pcre_callout = NULL;
2374 p++;
2375 }
2376 else if (*p == '!')
2377 {
2378 callout_fail_id = 0;
2379 p++;
2380 while(isdigit(*p))
2381 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2382 callout_fail_count = 0;
2383 if (*p == '!')
2384 {
2385 p++;
2386 while(isdigit(*p))
2387 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2388 }
2389 }
2390 else if (*p == '*')
2391 {
2392 int sign = 1;
2393 callout_data = 0;
2394 if (*(++p) == '-') { sign = -1; p++; }
2395 while(isdigit(*p))
2396 callout_data = callout_data * 10 + *p++ - '0';
2397 callout_data *= sign;
2398 callout_data_set = 1;
2399 }
2400 continue;
2401
2402 #if !defined NODFA
2403 case 'D':
2404 #if !defined NOPOSIX
2405 if (posix || do_posix)
2406 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2407 else
2408 #endif
2409 use_dfa = 1;
2410 continue;
2411 #endif
2412
2413 #if !defined NODFA
2414 case 'F':
2415 options |= PCRE_DFA_SHORTEST;
2416 continue;
2417 #endif
2418
2419 case 'G':
2420 if (isdigit(*p))
2421 {
2422 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2423 getstrings |= 1 << n;
2424 }
2425 else if (isalnum(*p))
2426 {
2427 uschar *npp = getnamesptr;
2428 while (isalnum(*p)) *npp++ = *p++;
2429 *npp++ = 0;
2430 *npp = 0;
2431 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2432 if (n < 0)
2433 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2434 getnamesptr = npp;
2435 }
2436 continue;
2437
2438 case 'L':
2439 getlist = 1;
2440 continue;
2441
2442 case 'M':
2443 find_match_limit = 1;
2444 continue;
2445
2446 case 'N':
2447 if ((options & PCRE_NOTEMPTY) != 0)
2448 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2449 else
2450 options |= PCRE_NOTEMPTY;
2451 continue;
2452
2453 case 'O':
2454 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2455 if (n > size_offsets_max)
2456 {
2457 size_offsets_max = n;
2458 free(offsets);
2459 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2460 if (offsets == NULL)
2461 {
2462 printf("** Failed to get %d bytes of memory for offsets vector\n",
2463 (int)(size_offsets_max * sizeof(int)));
2464 yield = 1;
2465 goto EXIT;
2466 }
2467 }
2468 use_size_offsets = n;
2469 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2470 continue;
2471
2472 case 'P':
2473 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2474 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2475 continue;
2476
2477 case 'Q':
2478 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2479 if (extra == NULL)
2480 {
2481 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2482 extra->flags = 0;
2483 }
2484 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2485 extra->match_limit_recursion = n;
2486 continue;
2487
2488 case 'q':
2489 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2490 if (extra == NULL)
2491 {
2492 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2493 extra->flags = 0;
2494 }
2495 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2496 extra->match_limit = n;
2497 continue;
2498
2499 #if !defined NODFA
2500 case 'R':
2501 options |= PCRE_DFA_RESTART;
2502 continue;
2503 #endif
2504
2505 case 'S':
2506 show_malloc = 1;
2507 continue;
2508
2509 case 'Y':
2510 options |= PCRE_NO_START_OPTIMIZE;
2511 continue;
2512
2513 case 'Z':
2514 options |= PCRE_NOTEOL;
2515 continue;
2516
2517 case '?':
2518 options |= PCRE_NO_UTF8_CHECK;
2519 continue;
2520
2521 case '<':
2522 {
2523 int x = check_newline(p, outfile);
2524 if (x == 0) goto NEXT_DATA;
2525 options |= x;
2526 while (*p++ != '>');
2527 }
2528 continue;
2529 }
2530 *q++ = c;
2531 }
2532 *q = 0;
2533 len = (int)(q - dbuffer);
2534
2535 /* Move the data to the end of the buffer so that a read over the end of
2536 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2537 we are using the POSIX interface, we must include the terminating zero. */
2538
2539 #if !defined NOPOSIX
2540 if (posix || do_posix)
2541 {
2542 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2543 bptr += buffer_size - len - 1;
2544 }
2545 else
2546 #endif
2547 {
2548 memmove(bptr + buffer_size - len, bptr, len);
2549 bptr += buffer_size - len;
2550 }
2551
2552 if ((all_use_dfa || use_dfa) && find_match_limit)
2553 {
2554 printf("**Match limit not relevant for DFA matching: ignored\n");
2555 find_match_limit = 0;
2556 }
2557
2558 /* Handle matching via the POSIX interface, which does not
2559 support timing or playing with the match limit or callout data. */
2560
2561 #if !defined NOPOSIX
2562 if (posix || do_posix)
2563 {
2564 int rc;
2565 int eflags = 0;
2566 regmatch_t *pmatch = NULL;
2567 if (use_size_offsets > 0)
2568 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2569 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2570 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2571 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2572
2573 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2574
2575 if (rc != 0)
2576 {
2577 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2578 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2579 }
2580 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2581 != 0)
2582 {
2583 fprintf(outfile, "Matched with REG_NOSUB\n");
2584 }
2585 else
2586 {
2587 size_t i;
2588 for (i = 0; i < (size_t)use_size_offsets; i++)
2589 {
2590 if (pmatch[i].rm_so >= 0)
2591 {
2592 fprintf(outfile, "%2d: ", (int)i);
2593 (void)pchars(dbuffer + pmatch[i].rm_so,
2594 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2595 fprintf(outfile, "\n");
2596 if (do_showcaprest || (i == 0 && do_showrest))
2597 {
2598 fprintf(outfile, "%2d+ ", (int)i);
2599 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2600 outfile);
2601 fprintf(outfile, "\n");
2602 }
2603 }
2604 }
2605 }
2606 free(pmatch);
2607 }
2608
2609 /* Handle matching via the native interface - repeats for /g and /G */
2610
2611 else
2612 #endif /* !defined NOPOSIX */
2613
2614 for (;; gmatched++) /* Loop for /g or /G */
2615 {
2616 markptr = NULL;
2617
2618 if (timeitm > 0)
2619 {
2620 register int i;
2621 clock_t time_taken;
2622 clock_t start_time = clock();
2623
2624 #if !defined NODFA
2625 if (all_use_dfa || use_dfa)
2626 {
2627 int workspace[1000];
2628 for (i = 0; i < timeitm; i++)
2629 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2630 options | g_notempty, use_offsets, use_size_offsets, workspace,
2631 sizeof(workspace)/sizeof(int));
2632 }
2633 else
2634 #endif
2635
2636 for (i = 0; i < timeitm; i++)
2637 count = pcre_exec(re, extra, (char *)bptr, len,
2638 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2639
2640 time_taken = clock() - start_time;
2641 fprintf(outfile, "Execute time %.4f milliseconds\n",
2642 (((double)time_taken * 1000.0) / (double)timeitm) /
2643 (double)CLOCKS_PER_SEC);
2644 }
2645
2646 /* If find_match_limit is set, we want to do repeated matches with
2647 varying limits in order to find the minimum value for the match limit and
2648 for the recursion limit. */
2649
2650 if (find_match_limit)
2651 {
2652 if (extra == NULL)
2653 {
2654 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2655 extra->flags = 0;
2656 }
2657
2658 (void)check_match_limit(re, extra, bptr, len, start_offset,
2659 options|g_notempty, use_offsets, use_size_offsets,
2660 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2661 PCRE_ERROR_MATCHLIMIT, "match()");
2662
2663 count = check_match_limit(re, extra, bptr, len, start_offset,
2664 options|g_notempty, use_offsets, use_size_offsets,
2665 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2666 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2667 }
2668
2669 /* If callout_data is set, use the interface with additional data */
2670
2671 else if (callout_data_set)
2672 {
2673 if (extra == NULL)
2674 {
2675 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2676 extra->flags = 0;
2677 }
2678 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2679 extra->callout_data = &callout_data;
2680 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2681 options | g_notempty, use_offsets, use_size_offsets);
2682 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2683 }
2684
2685 /* The normal case is just to do the match once, with the default
2686 value of match_limit. */
2687
2688 #if !defined NODFA
2689 else if (all_use_dfa || use_dfa)
2690 {
2691 int workspace[1000];
2692 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2693 options | g_notempty, use_offsets, use_size_offsets, workspace,
2694 sizeof(workspace)/sizeof(int));
2695 if (count == 0)
2696 {
2697 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2698 count = use_size_offsets/2;
2699 }
2700 }
2701 #endif
2702
2703 else
2704 {
2705 count = pcre_exec(re, extra, (char *)bptr, len,
2706 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2707 if (count == 0)
2708 {
2709 fprintf(outfile, "Matched, but too many substrings\n");
2710 count = use_size_offsets/3;
2711 }
2712 }
2713
2714 /* Matched */
2715
2716 if (count >= 0)
2717 {
2718 int i, maxcount;
2719
2720 #if !defined NODFA
2721 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2722 #endif
2723 maxcount = use_size_offsets/3;
2724
2725 /* This is a check against a lunatic return value. */
2726
2727 if (count > maxcount)
2728 {
2729 fprintf(outfile,
2730 "** PCRE error: returned count %d is too big for offset size %d\n",
2731 count, use_size_offsets);
2732 count = use_size_offsets/3;
2733 if (do_g || do_G)
2734 {
2735 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2736 do_g = do_G = FALSE; /* Break g/G loop */
2737 }
2738 }
2739
2740 /* do_allcaps requests showing of all captures in the pattern, to check
2741 unset ones at the end. */
2742
2743 if (do_allcaps)
2744 {
2745 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2746 count++; /* Allow for full match */
2747 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2748 }
2749
2750 /* Output the captured substrings */
2751
2752 for (i = 0; i < count * 2; i += 2)
2753 {
2754 if (use_offsets[i] < 0)
2755 {
2756 if (use_offsets[i] != -1)
2757 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2758 use_offsets[i], i);
2759 if (use_offsets[i+1] != -1)
2760 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2761 use_offsets[i+1], i+1);
2762 fprintf(outfile, "%2d: <unset>\n", i/2);
2763 }
2764 else
2765 {
2766 fprintf(outfile, "%2d: ", i/2);
2767 (void)pchars(bptr + use_offsets[i],
2768 use_offsets[i+1] - use_offsets[i], outfile);
2769 fprintf(outfile, "\n");
2770 if (do_showcaprest || (i == 0 && do_showrest))
2771 {
2772 fprintf(outfile, "%2d+ ", i/2);
2773 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2774 outfile);
2775 fprintf(outfile, "\n");
2776 }
2777 }
2778 }
2779
2780 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2781
2782 for (i = 0; i < 32; i++)
2783 {
2784 if ((copystrings & (1 << i)) != 0)
2785 {
2786 char copybuffer[256];
2787 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2788 i, copybuffer, sizeof(copybuffer));
2789 if (rc < 0)
2790 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2791 else
2792 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2793 }
2794 }
2795
2796 for (copynamesptr = copynames;
2797 *copynamesptr != 0;
2798 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2799 {
2800 char copybuffer[256];
2801 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2802 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2803 if (rc < 0)
2804 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2805 else
2806 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2807 }
2808
2809 for (i = 0; i < 32; i++)
2810 {
2811 if ((getstrings & (1 << i)) != 0)
2812 {
2813 const char *substring;
2814 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2815 i, &substring);
2816 if (rc < 0)
2817 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2818 else
2819 {
2820 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2821 pcre_free_substring(substring);
2822 }
2823 }
2824 }
2825
2826 for (getnamesptr = getnames;
2827 *getnamesptr != 0;
2828 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2829 {
2830 const char *substring;
2831 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2832 count, (char *)getnamesptr, &substring);
2833 if (rc < 0)
2834 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2835 else
2836 {
2837 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2838 pcre_free_substring(substring);
2839 }
2840 }
2841
2842 if (getlist)
2843 {
2844 const char **stringlist;
2845 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2846 &stringlist);
2847 if (rc < 0)
2848 fprintf(outfile, "get substring list failed %d\n", rc);
2849 else
2850 {
2851 for (i = 0; i < count; i++)
2852 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2853 if (stringlist[i] != NULL)
2854 fprintf(outfile, "string list not terminated by NULL\n");
2855 /* free((void *)stringlist); */
2856 pcre_free_substring_list(stringlist);
2857 }
2858 }
2859 }
2860
2861 /* There was a partial match */
2862
2863 else if (count == PCRE_ERROR_PARTIAL)
2864 {
2865 if (markptr == NULL) fprintf(outfile, "Partial match");
2866 else fprintf(outfile, "Partial match, mark=%s", markptr);
2867 if (use_size_offsets > 1)
2868 {
2869 fprintf(outfile, ": ");
2870 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2871 outfile);
2872 }
2873 fprintf(outfile, "\n");
2874 break; /* Out of the /g loop */
2875 }
2876
2877 /* Failed to match. If this is a /g or /G loop and we previously set
2878 g_notempty after a null match, this is not necessarily the end. We want
2879 to advance the start offset, and continue. We won't be at the end of the
2880 string - that was checked before setting g_notempty.
2881
2882 Complication arises in the case when the newline convention is "any",
2883 "crlf", or "anycrlf". If the previous match was at the end of a line
2884 terminated by CRLF, an advance of one character just passes the \r,
2885 whereas we should prefer the longer newline sequence, as does the code in
2886 pcre_exec(). Fudge the offset value to achieve this. We check for a
2887 newline setting in the pattern; if none was set, use pcre_config() to
2888 find the default.
2889
2890 Otherwise, in the case of UTF-8 matching, the advance must be one
2891 character, not one byte. */
2892
2893 else
2894 {
2895 if (g_notempty != 0)
2896 {
2897 int onechar = 1;
2898 unsigned int obits = ((real_pcre *)re)->options;
2899 use_offsets[0] = start_offset;
2900 if ((obits & PCRE_NEWLINE_BITS) == 0)
2901 {
2902 int d;
2903 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2904 /* Note that these values are always the ASCII ones, even in
2905 EBCDIC environments. CR = 13, NL = 10. */
2906 obits = (d == 13)? PCRE_NEWLINE_CR :
2907 (d == 10)? PCRE_NEWLINE_LF :
2908 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2909 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2910 (d == -1)? PCRE_NEWLINE_ANY : 0;
2911 }
2912 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2913 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2914 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2915 &&
2916 start_offset < len - 1 &&
2917 bptr[start_offset] == '\r' &&
2918 bptr[start_offset+1] == '\n')
2919 onechar++;
2920 else if (use_utf8)
2921 {
2922 while (start_offset + onechar < len)
2923 {
2924 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2925 onechar++;
2926 }
2927 }
2928 use_offsets[1] = start_offset + onechar;
2929 }
2930 else
2931 {
2932 switch(count)
2933 {
2934 case PCRE_ERROR_NOMATCH:
2935 if (gmatched == 0)
2936 {
2937 if (markptr == NULL) fprintf(outfile, "No match\n");
2938 else fprintf(outfile, "No match, mark = %s\n", markptr);
2939 }
2940 break;
2941
2942 case PCRE_ERROR_BADUTF8:
2943 case PCRE_ERROR_SHORTUTF8:
2944 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2945 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2946 if (use_size_offsets >= 2)
2947 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2948 use_offsets[1]);
2949 fprintf(outfile, "\n");
2950 break;
2951
2952 default:
2953 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2954 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2955 else
2956 fprintf(outfile, "Error %d (Unexpected value)\n", count);
2957 break;
2958 }
2959
2960 break; /* Out of the /g loop */
2961 }
2962 }
2963
2964 /* If not /g or /G we are done */
2965
2966 if (!do_g && !do_G) break;
2967
2968 /* If we have matched an empty string, first check to see if we are at
2969 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2970 Perl's /g options does. This turns out to be rather cunning. First we set
2971 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2972 same point. If this fails (picked up above) we advance to the next
2973 character. */
2974
2975 g_notempty = 0;
2976
2977 if (use_offsets[0] == use_offsets[1])
2978 {
2979 if (use_offsets[0] == len) break;
2980 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2981 }
2982
2983 /* For /g, update the start offset, leaving the rest alone */
2984
2985 if (do_g) start_offset = use_offsets[1];
2986
2987 /* For /G, update the pointer and length */
2988
2989 else
2990 {
2991 bptr += use_offsets[1];
2992 len -= use_offsets[1];
2993 }
2994 } /* End of loop for /g and /G */
2995
2996 NEXT_DATA: continue;
2997 } /* End of loop for data lines */
2998
2999 CONTINUE:
3000
3001 #if !defined NOPOSIX
3002 if (posix || do_posix) regfree(&preg);
3003 #endif
3004
3005 if (re != NULL) new_free(re);
3006 if (extra != NULL) new_free(extra);
3007 if (locale_set)
3008 {
3009 new_free((void *)tables);
3010 setlocale(LC_CTYPE, "C");
3011 locale_set = 0;
3012 }
3013 }
3014
3015 if (infile == stdin) fprintf(outfile, "\n");
3016
3017 EXIT:
3018
3019 if (infile != NULL && infile != stdin) fclose(infile);
3020 if (outfile != NULL && outfile != stdout) fclose(outfile);
3021
3022 free(buffer);
3023 free(dbuffer);
3024 free(pbuffer);
3025 free(offsets);
3026
3027 return yield;
3028 }
3029
3030 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12