/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 654 - (show annotations) (download)
Tue Aug 2 11:00:40 2011 UTC (3 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 91340 byte(s)
Documentation and general text tidies in preparation for test release.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192 static const unsigned char *last_callout_mark = NULL;
193
194 /* The buffers grow automatically if very long input lines are encountered. */
195
196 static int buffer_size = 50000;
197 static uschar *buffer = NULL;
198 static uschar *dbuffer = NULL;
199 static uschar *pbuffer = NULL;
200
201 /* Textual explanations for runtime error codes */
202
203 static const char *errtexts[] = {
204 NULL, /* 0 is no error */
205 NULL, /* NOMATCH is handled specially */
206 "NULL argument passed",
207 "bad option value",
208 "magic number missing",
209 "unknown opcode - pattern overwritten?",
210 "no more memory",
211 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
212 "match limit exceeded",
213 "callout error code",
214 NULL, /* BADUTF8 is handled specially */
215 "bad UTF-8 offset",
216 NULL, /* PARTIAL is handled specially */
217 "not used - internal error",
218 "internal error - pattern overwritten?",
219 "bad count value",
220 "item unsupported for DFA matching",
221 "backreference condition or recursion test not supported for DFA matching",
222 "match limit not supported for DFA matching",
223 "workspace size exceeded in DFA matching",
224 "too much recursion for DFA matching",
225 "recursion limit exceeded",
226 "not used - internal error",
227 "invalid combination of newline options",
228 "bad offset value",
229 NULL, /* SHORTUTF8 is handled specially */
230 "nested recursion at the same subject position"
231 };
232
233
234 /*************************************************
235 * Alternate character tables *
236 *************************************************/
237
238 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
239 using the default tables of the library. However, the T option can be used to
240 select alternate sets of tables, for different kinds of testing. Note also that
241 the L (locale) option also adjusts the tables. */
242
243 /* This is the set of tables distributed as default with PCRE. It recognizes
244 only ASCII characters. */
245
246 static const unsigned char tables0[] = {
247
248 /* This table is a lower casing table. */
249
250 0, 1, 2, 3, 4, 5, 6, 7,
251 8, 9, 10, 11, 12, 13, 14, 15,
252 16, 17, 18, 19, 20, 21, 22, 23,
253 24, 25, 26, 27, 28, 29, 30, 31,
254 32, 33, 34, 35, 36, 37, 38, 39,
255 40, 41, 42, 43, 44, 45, 46, 47,
256 48, 49, 50, 51, 52, 53, 54, 55,
257 56, 57, 58, 59, 60, 61, 62, 63,
258 64, 97, 98, 99,100,101,102,103,
259 104,105,106,107,108,109,110,111,
260 112,113,114,115,116,117,118,119,
261 120,121,122, 91, 92, 93, 94, 95,
262 96, 97, 98, 99,100,101,102,103,
263 104,105,106,107,108,109,110,111,
264 112,113,114,115,116,117,118,119,
265 120,121,122,123,124,125,126,127,
266 128,129,130,131,132,133,134,135,
267 136,137,138,139,140,141,142,143,
268 144,145,146,147,148,149,150,151,
269 152,153,154,155,156,157,158,159,
270 160,161,162,163,164,165,166,167,
271 168,169,170,171,172,173,174,175,
272 176,177,178,179,180,181,182,183,
273 184,185,186,187,188,189,190,191,
274 192,193,194,195,196,197,198,199,
275 200,201,202,203,204,205,206,207,
276 208,209,210,211,212,213,214,215,
277 216,217,218,219,220,221,222,223,
278 224,225,226,227,228,229,230,231,
279 232,233,234,235,236,237,238,239,
280 240,241,242,243,244,245,246,247,
281 248,249,250,251,252,253,254,255,
282
283 /* This table is a case flipping table. */
284
285 0, 1, 2, 3, 4, 5, 6, 7,
286 8, 9, 10, 11, 12, 13, 14, 15,
287 16, 17, 18, 19, 20, 21, 22, 23,
288 24, 25, 26, 27, 28, 29, 30, 31,
289 32, 33, 34, 35, 36, 37, 38, 39,
290 40, 41, 42, 43, 44, 45, 46, 47,
291 48, 49, 50, 51, 52, 53, 54, 55,
292 56, 57, 58, 59, 60, 61, 62, 63,
293 64, 97, 98, 99,100,101,102,103,
294 104,105,106,107,108,109,110,111,
295 112,113,114,115,116,117,118,119,
296 120,121,122, 91, 92, 93, 94, 95,
297 96, 65, 66, 67, 68, 69, 70, 71,
298 72, 73, 74, 75, 76, 77, 78, 79,
299 80, 81, 82, 83, 84, 85, 86, 87,
300 88, 89, 90,123,124,125,126,127,
301 128,129,130,131,132,133,134,135,
302 136,137,138,139,140,141,142,143,
303 144,145,146,147,148,149,150,151,
304 152,153,154,155,156,157,158,159,
305 160,161,162,163,164,165,166,167,
306 168,169,170,171,172,173,174,175,
307 176,177,178,179,180,181,182,183,
308 184,185,186,187,188,189,190,191,
309 192,193,194,195,196,197,198,199,
310 200,201,202,203,204,205,206,207,
311 208,209,210,211,212,213,214,215,
312 216,217,218,219,220,221,222,223,
313 224,225,226,227,228,229,230,231,
314 232,233,234,235,236,237,238,239,
315 240,241,242,243,244,245,246,247,
316 248,249,250,251,252,253,254,255,
317
318 /* This table contains bit maps for various character classes. Each map is 32
319 bytes long and the bits run from the least significant end of each byte. The
320 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
321 graph, print, punct, and cntrl. Other classes are built from combinations. */
322
323 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327
328 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
329 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
332
333 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347
348 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
349 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
350 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
351 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
352
353 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
354 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
355 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357
358 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
359 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362
363 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
364 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367
368 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
372
373 /* This table identifies various classes of character by individual bits:
374 0x01 white space character
375 0x02 letter
376 0x04 decimal digit
377 0x08 hexadecimal digit
378 0x10 alphanumeric or '_'
379 0x80 regular expression metacharacter or binary zero
380 */
381
382 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
383 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
384 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
385 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
386 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
387 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
388 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
389 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
390 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
391 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
392 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
393 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
394 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
395 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
396 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
397 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
414
415 /* This is a set of tables that came orginally from a Windows user. It seems to
416 be at least an approximation of ISO 8859. In particular, there are characters
417 greater than 128 that are marked as spaces, letters, etc. */
418
419 static const unsigned char tables1[] = {
420 0,1,2,3,4,5,6,7,
421 8,9,10,11,12,13,14,15,
422 16,17,18,19,20,21,22,23,
423 24,25,26,27,28,29,30,31,
424 32,33,34,35,36,37,38,39,
425 40,41,42,43,44,45,46,47,
426 48,49,50,51,52,53,54,55,
427 56,57,58,59,60,61,62,63,
428 64,97,98,99,100,101,102,103,
429 104,105,106,107,108,109,110,111,
430 112,113,114,115,116,117,118,119,
431 120,121,122,91,92,93,94,95,
432 96,97,98,99,100,101,102,103,
433 104,105,106,107,108,109,110,111,
434 112,113,114,115,116,117,118,119,
435 120,121,122,123,124,125,126,127,
436 128,129,130,131,132,133,134,135,
437 136,137,138,139,140,141,142,143,
438 144,145,146,147,148,149,150,151,
439 152,153,154,155,156,157,158,159,
440 160,161,162,163,164,165,166,167,
441 168,169,170,171,172,173,174,175,
442 176,177,178,179,180,181,182,183,
443 184,185,186,187,188,189,190,191,
444 224,225,226,227,228,229,230,231,
445 232,233,234,235,236,237,238,239,
446 240,241,242,243,244,245,246,215,
447 248,249,250,251,252,253,254,223,
448 224,225,226,227,228,229,230,231,
449 232,233,234,235,236,237,238,239,
450 240,241,242,243,244,245,246,247,
451 248,249,250,251,252,253,254,255,
452 0,1,2,3,4,5,6,7,
453 8,9,10,11,12,13,14,15,
454 16,17,18,19,20,21,22,23,
455 24,25,26,27,28,29,30,31,
456 32,33,34,35,36,37,38,39,
457 40,41,42,43,44,45,46,47,
458 48,49,50,51,52,53,54,55,
459 56,57,58,59,60,61,62,63,
460 64,97,98,99,100,101,102,103,
461 104,105,106,107,108,109,110,111,
462 112,113,114,115,116,117,118,119,
463 120,121,122,91,92,93,94,95,
464 96,65,66,67,68,69,70,71,
465 72,73,74,75,76,77,78,79,
466 80,81,82,83,84,85,86,87,
467 88,89,90,123,124,125,126,127,
468 128,129,130,131,132,133,134,135,
469 136,137,138,139,140,141,142,143,
470 144,145,146,147,148,149,150,151,
471 152,153,154,155,156,157,158,159,
472 160,161,162,163,164,165,166,167,
473 168,169,170,171,172,173,174,175,
474 176,177,178,179,180,181,182,183,
475 184,185,186,187,188,189,190,191,
476 224,225,226,227,228,229,230,231,
477 232,233,234,235,236,237,238,239,
478 240,241,242,243,244,245,246,215,
479 248,249,250,251,252,253,254,223,
480 192,193,194,195,196,197,198,199,
481 200,201,202,203,204,205,206,207,
482 208,209,210,211,212,213,214,247,
483 216,217,218,219,220,221,222,255,
484 0,62,0,0,1,0,0,0,
485 0,0,0,0,0,0,0,0,
486 32,0,0,0,1,0,0,0,
487 0,0,0,0,0,0,0,0,
488 0,0,0,0,0,0,255,3,
489 126,0,0,0,126,0,0,0,
490 0,0,0,0,0,0,0,0,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,255,3,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,12,2,
495 0,0,0,0,0,0,0,0,
496 0,0,0,0,0,0,0,0,
497 254,255,255,7,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 255,255,127,127,0,0,0,0,
500 0,0,0,0,0,0,0,0,
501 0,0,0,0,254,255,255,7,
502 0,0,0,0,0,4,32,4,
503 0,0,0,128,255,255,127,255,
504 0,0,0,0,0,0,255,3,
505 254,255,255,135,254,255,255,7,
506 0,0,0,0,0,4,44,6,
507 255,255,127,255,255,255,127,255,
508 0,0,0,0,254,255,255,255,
509 255,255,255,255,255,255,255,127,
510 0,0,0,0,254,255,255,255,
511 255,255,255,255,255,255,255,255,
512 0,2,0,0,255,255,255,255,
513 255,255,255,255,255,255,255,127,
514 0,0,0,0,255,255,255,255,
515 255,255,255,255,255,255,255,255,
516 0,0,0,0,254,255,0,252,
517 1,0,0,248,1,0,0,120,
518 0,0,0,0,254,255,255,255,
519 0,0,128,0,0,0,128,0,
520 255,255,255,255,0,0,0,0,
521 0,0,0,0,0,0,0,128,
522 255,255,255,255,0,0,0,0,
523 0,0,0,0,0,0,0,0,
524 128,0,0,0,0,0,0,0,
525 0,1,1,0,1,1,0,0,
526 0,0,0,0,0,0,0,0,
527 0,0,0,0,0,0,0,0,
528 1,0,0,0,128,0,0,0,
529 128,128,128,128,0,0,128,0,
530 28,28,28,28,28,28,28,28,
531 28,28,0,0,0,0,0,128,
532 0,26,26,26,26,26,26,18,
533 18,18,18,18,18,18,18,18,
534 18,18,18,18,18,18,18,18,
535 18,18,18,128,128,0,128,16,
536 0,26,26,26,26,26,26,18,
537 18,18,18,18,18,18,18,18,
538 18,18,18,18,18,18,18,18,
539 18,18,18,128,128,0,0,0,
540 0,0,0,0,0,1,0,0,
541 0,0,0,0,0,0,0,0,
542 0,0,0,0,0,0,0,0,
543 0,0,0,0,0,0,0,0,
544 1,0,0,0,0,0,0,0,
545 0,0,18,0,0,0,0,0,
546 0,0,20,20,0,18,0,0,
547 0,20,18,0,0,0,0,0,
548 18,18,18,18,18,18,18,18,
549 18,18,18,18,18,18,18,18,
550 18,18,18,18,18,18,18,0,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,18,
553 18,18,18,18,18,18,18,18,
554 18,18,18,18,18,18,18,0,
555 18,18,18,18,18,18,18,18
556 };
557
558
559
560
561 #ifndef HAVE_STRERROR
562 /*************************************************
563 * Provide strerror() for non-ANSI libraries *
564 *************************************************/
565
566 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
567 in their libraries, but can provide the same facility by this simple
568 alternative function. */
569
570 extern int sys_nerr;
571 extern char *sys_errlist[];
572
573 char *
574 strerror(int n)
575 {
576 if (n < 0 || n >= sys_nerr) return "unknown error number";
577 return sys_errlist[n];
578 }
579 #endif /* HAVE_STRERROR */
580
581
582
583
584 /*************************************************
585 * Read or extend an input line *
586 *************************************************/
587
588 /* Input lines are read into buffer, but both patterns and data lines can be
589 continued over multiple input lines. In addition, if the buffer fills up, we
590 want to automatically expand it so as to be able to handle extremely large
591 lines that are needed for certain stress tests. When the input buffer is
592 expanded, the other two buffers must also be expanded likewise, and the
593 contents of pbuffer, which are a copy of the input for callouts, must be
594 preserved (for when expansion happens for a data line). This is not the most
595 optimal way of handling this, but hey, this is just a test program!
596
597 Arguments:
598 f the file to read
599 start where in buffer to start (this *must* be within buffer)
600 prompt for stdin or readline()
601
602 Returns: pointer to the start of new data
603 could be a copy of start, or could be moved
604 NULL if no data read and EOF reached
605 */
606
607 static uschar *
608 extend_inputline(FILE *f, uschar *start, const char *prompt)
609 {
610 uschar *here = start;
611
612 for (;;)
613 {
614 int rlen = (int)(buffer_size - (here - buffer));
615
616 if (rlen > 1000)
617 {
618 int dlen;
619
620 /* If libreadline support is required, use readline() to read a line if the
621 input is a terminal. Note that readline() removes the trailing newline, so
622 we must put it back again, to be compatible with fgets(). */
623
624 #ifdef SUPPORT_LIBREADLINE
625 if (isatty(fileno(f)))
626 {
627 size_t len;
628 char *s = readline(prompt);
629 if (s == NULL) return (here == start)? NULL : start;
630 len = strlen(s);
631 if (len > 0) add_history(s);
632 if (len > rlen - 1) len = rlen - 1;
633 memcpy(here, s, len);
634 here[len] = '\n';
635 here[len+1] = 0;
636 free(s);
637 }
638 else
639 #endif
640
641 /* Read the next line by normal means, prompting if the file is stdin. */
642
643 {
644 if (f == stdin) printf("%s", prompt);
645 if (fgets((char *)here, rlen, f) == NULL)
646 return (here == start)? NULL : start;
647 }
648
649 dlen = (int)strlen((char *)here);
650 if (dlen > 0 && here[dlen - 1] == '\n') return start;
651 here += dlen;
652 }
653
654 else
655 {
656 int new_buffer_size = 2*buffer_size;
657 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
658 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
659 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
660
661 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
662 {
663 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
664 exit(1);
665 }
666
667 memcpy(new_buffer, buffer, buffer_size);
668 memcpy(new_pbuffer, pbuffer, buffer_size);
669
670 buffer_size = new_buffer_size;
671
672 start = new_buffer + (start - buffer);
673 here = new_buffer + (here - buffer);
674
675 free(buffer);
676 free(dbuffer);
677 free(pbuffer);
678
679 buffer = new_buffer;
680 dbuffer = new_dbuffer;
681 pbuffer = new_pbuffer;
682 }
683 }
684
685 return NULL; /* Control never gets here */
686 }
687
688
689
690
691
692
693
694 /*************************************************
695 * Read number from string *
696 *************************************************/
697
698 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
699 around with conditional compilation, just do the job by hand. It is only used
700 for unpicking arguments, so just keep it simple.
701
702 Arguments:
703 str string to be converted
704 endptr where to put the end pointer
705
706 Returns: the unsigned long
707 */
708
709 static int
710 get_value(unsigned char *str, unsigned char **endptr)
711 {
712 int result = 0;
713 while(*str != 0 && isspace(*str)) str++;
714 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
715 *endptr = str;
716 return(result);
717 }
718
719
720
721
722 /*************************************************
723 * Convert UTF-8 string to value *
724 *************************************************/
725
726 /* This function takes one or more bytes that represents a UTF-8 character,
727 and returns the value of the character.
728
729 Argument:
730 utf8bytes a pointer to the byte vector
731 vptr a pointer to an int to receive the value
732
733 Returns: > 0 => the number of bytes consumed
734 -6 to 0 => malformed UTF-8 character at offset = (-return)
735 */
736
737 #if !defined NOUTF8
738
739 static int
740 utf82ord(unsigned char *utf8bytes, int *vptr)
741 {
742 int c = *utf8bytes++;
743 int d = c;
744 int i, j, s;
745
746 for (i = -1; i < 6; i++) /* i is number of additional bytes */
747 {
748 if ((d & 0x80) == 0) break;
749 d <<= 1;
750 }
751
752 if (i == -1) { *vptr = c; return 1; } /* ascii character */
753 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
754
755 /* i now has a value in the range 1-5 */
756
757 s = 6*i;
758 d = (c & utf8_table3[i]) << s;
759
760 for (j = 0; j < i; j++)
761 {
762 c = *utf8bytes++;
763 if ((c & 0xc0) != 0x80) return -(j+1);
764 s -= 6;
765 d |= (c & 0x3f) << s;
766 }
767
768 /* Check that encoding was the correct unique one */
769
770 for (j = 0; j < utf8_table1_size; j++)
771 if (d <= utf8_table1[j]) break;
772 if (j != i) return -(i+1);
773
774 /* Valid value */
775
776 *vptr = d;
777 return i+1;
778 }
779
780 #endif
781
782
783
784 /*************************************************
785 * Convert character value to UTF-8 *
786 *************************************************/
787
788 /* This function takes an integer value in the range 0 - 0x7fffffff
789 and encodes it as a UTF-8 character in 0 to 6 bytes.
790
791 Arguments:
792 cvalue the character value
793 utf8bytes pointer to buffer for result - at least 6 bytes long
794
795 Returns: number of characters placed in the buffer
796 */
797
798 #if !defined NOUTF8
799
800 static int
801 ord2utf8(int cvalue, uschar *utf8bytes)
802 {
803 register int i, j;
804 for (i = 0; i < utf8_table1_size; i++)
805 if (cvalue <= utf8_table1[i]) break;
806 utf8bytes += i;
807 for (j = i; j > 0; j--)
808 {
809 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
810 cvalue >>= 6;
811 }
812 *utf8bytes = utf8_table2[i] | cvalue;
813 return i + 1;
814 }
815
816 #endif
817
818
819
820 /*************************************************
821 * Print character string *
822 *************************************************/
823
824 /* Character string printing function. Must handle UTF-8 strings in utf8
825 mode. Yields number of characters printed. If handed a NULL file, just counts
826 chars without printing. */
827
828 static int pchars(unsigned char *p, int length, FILE *f)
829 {
830 int c = 0;
831 int yield = 0;
832
833 while (length-- > 0)
834 {
835 #if !defined NOUTF8
836 if (use_utf8)
837 {
838 int rc = utf82ord(p, &c);
839
840 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
841 {
842 length -= rc - 1;
843 p += rc;
844 if (PRINTHEX(c))
845 {
846 if (f != NULL) fprintf(f, "%c", c);
847 yield++;
848 }
849 else
850 {
851 int n = 4;
852 if (f != NULL) fprintf(f, "\\x{%02x}", c);
853 yield += (n <= 0x000000ff)? 2 :
854 (n <= 0x00000fff)? 3 :
855 (n <= 0x0000ffff)? 4 :
856 (n <= 0x000fffff)? 5 : 6;
857 }
858 continue;
859 }
860 }
861 #endif
862
863 /* Not UTF-8, or malformed UTF-8 */
864
865 c = *p++;
866 if (PRINTHEX(c))
867 {
868 if (f != NULL) fprintf(f, "%c", c);
869 yield++;
870 }
871 else
872 {
873 if (f != NULL) fprintf(f, "\\x%02x", c);
874 yield += 4;
875 }
876 }
877
878 return yield;
879 }
880
881
882
883 /*************************************************
884 * Callout function *
885 *************************************************/
886
887 /* Called from PCRE as a result of the (?C) item. We print out where we are in
888 the match. Yield zero unless more callouts than the fail count, or the callout
889 data is not zero. */
890
891 static int callout(pcre_callout_block *cb)
892 {
893 FILE *f = (first_callout | callout_extra)? outfile : NULL;
894 int i, pre_start, post_start, subject_length;
895
896 if (callout_extra)
897 {
898 fprintf(f, "Callout %d: last capture = %d\n",
899 cb->callout_number, cb->capture_last);
900
901 for (i = 0; i < cb->capture_top * 2; i += 2)
902 {
903 if (cb->offset_vector[i] < 0)
904 fprintf(f, "%2d: <unset>\n", i/2);
905 else
906 {
907 fprintf(f, "%2d: ", i/2);
908 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
909 cb->offset_vector[i+1] - cb->offset_vector[i], f);
910 fprintf(f, "\n");
911 }
912 }
913 }
914
915 /* Re-print the subject in canonical form, the first time or if giving full
916 datails. On subsequent calls in the same match, we use pchars just to find the
917 printed lengths of the substrings. */
918
919 if (f != NULL) fprintf(f, "--->");
920
921 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
922 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
923 cb->current_position - cb->start_match, f);
924
925 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
926
927 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
928 cb->subject_length - cb->current_position, f);
929
930 if (f != NULL) fprintf(f, "\n");
931
932 /* Always print appropriate indicators, with callout number if not already
933 shown. For automatic callouts, show the pattern offset. */
934
935 if (cb->callout_number == 255)
936 {
937 fprintf(outfile, "%+3d ", cb->pattern_position);
938 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
939 }
940 else
941 {
942 if (callout_extra) fprintf(outfile, " ");
943 else fprintf(outfile, "%3d ", cb->callout_number);
944 }
945
946 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
947 fprintf(outfile, "^");
948
949 if (post_start > 0)
950 {
951 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
952 fprintf(outfile, "^");
953 }
954
955 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
956 fprintf(outfile, " ");
957
958 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
959 pbuffer + cb->pattern_position);
960
961 fprintf(outfile, "\n");
962 first_callout = 0;
963
964 if (cb->mark != last_callout_mark)
965 {
966 fprintf(outfile, "Latest Mark: %s\n",
967 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
968 last_callout_mark = cb->mark;
969 }
970
971 if (cb->callout_data != NULL)
972 {
973 int callout_data = *((int *)(cb->callout_data));
974 if (callout_data != 0)
975 {
976 fprintf(outfile, "Callout data = %d\n", callout_data);
977 return callout_data;
978 }
979 }
980
981 return (cb->callout_number != callout_fail_id)? 0 :
982 (++callout_count >= callout_fail_count)? 1 : 0;
983 }
984
985
986 /*************************************************
987 * Local malloc functions *
988 *************************************************/
989
990 /* Alternative malloc function, to test functionality and show the size of the
991 compiled re. */
992
993 static void *new_malloc(size_t size)
994 {
995 void *block = malloc(size);
996 gotten_store = size;
997 if (show_malloc)
998 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
999 return block;
1000 }
1001
1002 static void new_free(void *block)
1003 {
1004 if (show_malloc)
1005 fprintf(outfile, "free %p\n", block);
1006 free(block);
1007 }
1008
1009
1010 /* For recursion malloc/free, to test stacking calls */
1011
1012 static void *stack_malloc(size_t size)
1013 {
1014 void *block = malloc(size);
1015 if (show_malloc)
1016 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1017 return block;
1018 }
1019
1020 static void stack_free(void *block)
1021 {
1022 if (show_malloc)
1023 fprintf(outfile, "stack_free %p\n", block);
1024 free(block);
1025 }
1026
1027
1028 /*************************************************
1029 * Call pcre_fullinfo() *
1030 *************************************************/
1031
1032 /* Get one piece of information from the pcre_fullinfo() function */
1033
1034 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1035 {
1036 int rc;
1037 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1038 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1039 }
1040
1041
1042
1043 /*************************************************
1044 * Byte flipping function *
1045 *************************************************/
1046
1047 static unsigned long int
1048 byteflip(unsigned long int value, int n)
1049 {
1050 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1051 return ((value & 0x000000ff) << 24) |
1052 ((value & 0x0000ff00) << 8) |
1053 ((value & 0x00ff0000) >> 8) |
1054 ((value & 0xff000000) >> 24);
1055 }
1056
1057
1058
1059
1060 /*************************************************
1061 * Check match or recursion limit *
1062 *************************************************/
1063
1064 static int
1065 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1066 int start_offset, int options, int *use_offsets, int use_size_offsets,
1067 int flag, unsigned long int *limit, int errnumber, const char *msg)
1068 {
1069 int count;
1070 int min = 0;
1071 int mid = 64;
1072 int max = -1;
1073
1074 extra->flags |= flag;
1075
1076 for (;;)
1077 {
1078 *limit = mid;
1079
1080 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1081 use_offsets, use_size_offsets);
1082
1083 if (count == errnumber)
1084 {
1085 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1086 min = mid;
1087 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1088 }
1089
1090 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1091 count == PCRE_ERROR_PARTIAL)
1092 {
1093 if (mid == min + 1)
1094 {
1095 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1096 break;
1097 }
1098 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1099 max = mid;
1100 mid = (min + mid)/2;
1101 }
1102 else break; /* Some other error */
1103 }
1104
1105 extra->flags &= ~flag;
1106 return count;
1107 }
1108
1109
1110
1111 /*************************************************
1112 * Case-independent strncmp() function *
1113 *************************************************/
1114
1115 /*
1116 Arguments:
1117 s first string
1118 t second string
1119 n number of characters to compare
1120
1121 Returns: < 0, = 0, or > 0, according to the comparison
1122 */
1123
1124 static int
1125 strncmpic(uschar *s, uschar *t, int n)
1126 {
1127 while (n--)
1128 {
1129 int c = tolower(*s++) - tolower(*t++);
1130 if (c) return c;
1131 }
1132 return 0;
1133 }
1134
1135
1136
1137 /*************************************************
1138 * Check newline indicator *
1139 *************************************************/
1140
1141 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1142 a message and return 0 if there is no match.
1143
1144 Arguments:
1145 p points after the leading '<'
1146 f file for error message
1147
1148 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1149 */
1150
1151 static int
1152 check_newline(uschar *p, FILE *f)
1153 {
1154 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1155 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1156 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1157 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1158 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1159 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1160 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1161 fprintf(f, "Unknown newline type at: <%s\n", p);
1162 return 0;
1163 }
1164
1165
1166
1167 /*************************************************
1168 * Usage function *
1169 *************************************************/
1170
1171 static void
1172 usage(void)
1173 {
1174 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1175 printf("Input and output default to stdin and stdout.\n");
1176 #ifdef SUPPORT_LIBREADLINE
1177 printf("If input is a terminal, readline() is used to read from it.\n");
1178 #else
1179 printf("This version of pcretest is not linked with readline().\n");
1180 #endif
1181 printf("\nOptions:\n");
1182 printf(" -b show compiled code (bytecode)\n");
1183 printf(" -C show PCRE compile-time options and exit\n");
1184 printf(" -d debug: show compiled code and information (-b and -i)\n");
1185 #if !defined NODFA
1186 printf(" -dfa force DFA matching for all subjects\n");
1187 #endif
1188 printf(" -help show usage information\n");
1189 printf(" -i show information about compiled patterns\n"
1190 " -M find MATCH_LIMIT minimum for each subject\n"
1191 " -m output memory used information\n"
1192 " -o <n> set size of offsets vector to <n>\n");
1193 #if !defined NOPOSIX
1194 printf(" -p use POSIX interface\n");
1195 #endif
1196 printf(" -q quiet: do not output PCRE version number at start\n");
1197 printf(" -S <n> set stack size to <n> megabytes\n");
1198 printf(" -s force each pattern to be studied\n"
1199 " -t time compilation and execution\n");
1200 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1201 printf(" -tm time execution (matching) only\n");
1202 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1203 }
1204
1205
1206
1207 /*************************************************
1208 * Main Program *
1209 *************************************************/
1210
1211 /* Read lines from named file or stdin and write to named file or stdout; lines
1212 consist of a regular expression, in delimiters and optionally followed by
1213 options, followed by a set of test data, terminated by an empty line. */
1214
1215 int main(int argc, char **argv)
1216 {
1217 FILE *infile = stdin;
1218 int options = 0;
1219 int study_options = 0;
1220 int default_find_match_limit = FALSE;
1221 int op = 1;
1222 int timeit = 0;
1223 int timeitm = 0;
1224 int showinfo = 0;
1225 int showstore = 0;
1226 int force_study = 0;
1227 int quiet = 0;
1228 int size_offsets = 45;
1229 int size_offsets_max;
1230 int *offsets = NULL;
1231 #if !defined NOPOSIX
1232 int posix = 0;
1233 #endif
1234 int debug = 0;
1235 int done = 0;
1236 int all_use_dfa = 0;
1237 int yield = 0;
1238 int stack_size;
1239
1240 /* These vectors store, end-to-end, a list of captured substring names. Assume
1241 that 1024 is plenty long enough for the few names we'll be testing. */
1242
1243 uschar copynames[1024];
1244 uschar getnames[1024];
1245
1246 uschar *copynamesptr;
1247 uschar *getnamesptr;
1248
1249 /* Get buffers from malloc() so that Electric Fence will check their misuse
1250 when I am debugging. They grow automatically when very long lines are read. */
1251
1252 buffer = (unsigned char *)malloc(buffer_size);
1253 dbuffer = (unsigned char *)malloc(buffer_size);
1254 pbuffer = (unsigned char *)malloc(buffer_size);
1255
1256 /* The outfile variable is static so that new_malloc can use it. */
1257
1258 outfile = stdout;
1259
1260 /* The following _setmode() stuff is some Windows magic that tells its runtime
1261 library to translate CRLF into a single LF character. At least, that's what
1262 I've been told: never having used Windows I take this all on trust. Originally
1263 it set 0x8000, but then I was advised that _O_BINARY was better. */
1264
1265 #if defined(_WIN32) || defined(WIN32)
1266 _setmode( _fileno( stdout ), _O_BINARY );
1267 #endif
1268
1269 /* Scan options */
1270
1271 while (argc > 1 && argv[op][0] == '-')
1272 {
1273 unsigned char *endptr;
1274
1275 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1276 else if (strcmp(argv[op], "-s") == 0) force_study = 1;
1277 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1278 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1279 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1280 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1281 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1282 #if !defined NODFA
1283 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1284 #endif
1285 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1286 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1287 *endptr == 0))
1288 {
1289 op++;
1290 argc--;
1291 }
1292 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1293 {
1294 int both = argv[op][2] == 0;
1295 int temp;
1296 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1297 *endptr == 0))
1298 {
1299 timeitm = temp;
1300 op++;
1301 argc--;
1302 }
1303 else timeitm = LOOPREPEAT;
1304 if (both) timeit = timeitm;
1305 }
1306 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1307 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1308 *endptr == 0))
1309 {
1310 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1311 printf("PCRE: -S not supported on this OS\n");
1312 exit(1);
1313 #else
1314 int rc;
1315 struct rlimit rlim;
1316 getrlimit(RLIMIT_STACK, &rlim);
1317 rlim.rlim_cur = stack_size * 1024 * 1024;
1318 rc = setrlimit(RLIMIT_STACK, &rlim);
1319 if (rc != 0)
1320 {
1321 printf("PCRE: setrlimit() failed with error %d\n", rc);
1322 exit(1);
1323 }
1324 op++;
1325 argc--;
1326 #endif
1327 }
1328 #if !defined NOPOSIX
1329 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1330 #endif
1331 else if (strcmp(argv[op], "-C") == 0)
1332 {
1333 int rc;
1334 unsigned long int lrc;
1335 printf("PCRE version %s\n", pcre_version());
1336 printf("Compiled with\n");
1337 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1338 printf(" %sUTF-8 support\n", rc? "" : "No ");
1339 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1340 printf(" %sUnicode properties support\n", rc? "" : "No ");
1341 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1342 /* Note that these values are always the ASCII values, even
1343 in EBCDIC environments. CR is 13 and NL is 10. */
1344 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1345 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1346 (rc == -2)? "ANYCRLF" :
1347 (rc == -1)? "ANY" : "???");
1348 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1349 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1350 "all Unicode newlines");
1351 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1352 printf(" Internal link size = %d\n", rc);
1353 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1354 printf(" POSIX malloc threshold = %d\n", rc);
1355 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1356 printf(" Default match limit = %ld\n", lrc);
1357 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1358 printf(" Default recursion depth limit = %ld\n", lrc);
1359 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1360 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1361 goto EXIT;
1362 }
1363 else if (strcmp(argv[op], "-help") == 0 ||
1364 strcmp(argv[op], "--help") == 0)
1365 {
1366 usage();
1367 goto EXIT;
1368 }
1369 else
1370 {
1371 printf("** Unknown or malformed option %s\n", argv[op]);
1372 usage();
1373 yield = 1;
1374 goto EXIT;
1375 }
1376 op++;
1377 argc--;
1378 }
1379
1380 /* Get the store for the offsets vector, and remember what it was */
1381
1382 size_offsets_max = size_offsets;
1383 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1384 if (offsets == NULL)
1385 {
1386 printf("** Failed to get %d bytes of memory for offsets vector\n",
1387 (int)(size_offsets_max * sizeof(int)));
1388 yield = 1;
1389 goto EXIT;
1390 }
1391
1392 /* Sort out the input and output files */
1393
1394 if (argc > 1)
1395 {
1396 infile = fopen(argv[op], INPUT_MODE);
1397 if (infile == NULL)
1398 {
1399 printf("** Failed to open %s\n", argv[op]);
1400 yield = 1;
1401 goto EXIT;
1402 }
1403 }
1404
1405 if (argc > 2)
1406 {
1407 outfile = fopen(argv[op+1], OUTPUT_MODE);
1408 if (outfile == NULL)
1409 {
1410 printf("** Failed to open %s\n", argv[op+1]);
1411 yield = 1;
1412 goto EXIT;
1413 }
1414 }
1415
1416 /* Set alternative malloc function */
1417
1418 pcre_malloc = new_malloc;
1419 pcre_free = new_free;
1420 pcre_stack_malloc = stack_malloc;
1421 pcre_stack_free = stack_free;
1422
1423 /* Heading line unless quiet, then prompt for first regex if stdin */
1424
1425 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1426
1427 /* Main loop */
1428
1429 while (!done)
1430 {
1431 pcre *re = NULL;
1432 pcre_extra *extra = NULL;
1433
1434 #if !defined NOPOSIX /* There are still compilers that require no indent */
1435 regex_t preg;
1436 int do_posix = 0;
1437 #endif
1438
1439 const char *error;
1440 unsigned char *markptr;
1441 unsigned char *p, *pp, *ppp;
1442 unsigned char *to_file = NULL;
1443 const unsigned char *tables = NULL;
1444 unsigned long int true_size, true_study_size = 0;
1445 size_t size, regex_gotten_store;
1446 int do_allcaps = 0;
1447 int do_mark = 0;
1448 int do_study = 0;
1449 int no_force_study = 0;
1450 int do_debug = debug;
1451 int do_G = 0;
1452 int do_g = 0;
1453 int do_showinfo = showinfo;
1454 int do_showrest = 0;
1455 int do_showcaprest = 0;
1456 int do_flip = 0;
1457 int erroroffset, len, delimiter, poffset;
1458
1459 use_utf8 = 0;
1460 debug_lengths = 1;
1461
1462 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1463 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1464 fflush(outfile);
1465
1466 p = buffer;
1467 while (isspace(*p)) p++;
1468 if (*p == 0) continue;
1469
1470 /* See if the pattern is to be loaded pre-compiled from a file. */
1471
1472 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1473 {
1474 unsigned long int magic, get_options;
1475 uschar sbuf[8];
1476 FILE *f;
1477
1478 p++;
1479 pp = p + (int)strlen((char *)p);
1480 while (isspace(pp[-1])) pp--;
1481 *pp = 0;
1482
1483 f = fopen((char *)p, "rb");
1484 if (f == NULL)
1485 {
1486 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1487 continue;
1488 }
1489
1490 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1491
1492 true_size =
1493 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1494 true_study_size =
1495 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1496
1497 re = (real_pcre *)new_malloc(true_size);
1498 regex_gotten_store = gotten_store;
1499
1500 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1501
1502 magic = ((real_pcre *)re)->magic_number;
1503 if (magic != MAGIC_NUMBER)
1504 {
1505 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1506 {
1507 do_flip = 1;
1508 }
1509 else
1510 {
1511 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1512 fclose(f);
1513 continue;
1514 }
1515 }
1516
1517 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1518 do_flip? " (byte-inverted)" : "", p);
1519
1520 /* Need to know if UTF-8 for printing data strings */
1521
1522 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1523 use_utf8 = (get_options & PCRE_UTF8) != 0;
1524
1525 /* Now see if there is any following study data. */
1526
1527 if (true_study_size != 0)
1528 {
1529 pcre_study_data *psd;
1530
1531 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1532 extra->flags = PCRE_EXTRA_STUDY_DATA;
1533
1534 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1535 extra->study_data = psd;
1536
1537 if (fread(psd, 1, true_study_size, f) != true_study_size)
1538 {
1539 FAIL_READ:
1540 fprintf(outfile, "Failed to read data from %s\n", p);
1541 if (extra != NULL) new_free(extra);
1542 if (re != NULL) new_free(re);
1543 fclose(f);
1544 continue;
1545 }
1546 fprintf(outfile, "Study data loaded from %s\n", p);
1547 do_study = 1; /* To get the data output if requested */
1548 }
1549 else fprintf(outfile, "No study data\n");
1550
1551 fclose(f);
1552 goto SHOW_INFO;
1553 }
1554
1555 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1556 the pattern; if is isn't complete, read more. */
1557
1558 delimiter = *p++;
1559
1560 if (isalnum(delimiter) || delimiter == '\\')
1561 {
1562 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1563 goto SKIP_DATA;
1564 }
1565
1566 pp = p;
1567 poffset = (int)(p - buffer);
1568
1569 for(;;)
1570 {
1571 while (*pp != 0)
1572 {
1573 if (*pp == '\\' && pp[1] != 0) pp++;
1574 else if (*pp == delimiter) break;
1575 pp++;
1576 }
1577 if (*pp != 0) break;
1578 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1579 {
1580 fprintf(outfile, "** Unexpected EOF\n");
1581 done = 1;
1582 goto CONTINUE;
1583 }
1584 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1585 }
1586
1587 /* The buffer may have moved while being extended; reset the start of data
1588 pointer to the correct relative point in the buffer. */
1589
1590 p = buffer + poffset;
1591
1592 /* If the first character after the delimiter is backslash, make
1593 the pattern end with backslash. This is purely to provide a way
1594 of testing for the error message when a pattern ends with backslash. */
1595
1596 if (pp[1] == '\\') *pp++ = '\\';
1597
1598 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1599 for callouts. */
1600
1601 *pp++ = 0;
1602 strcpy((char *)pbuffer, (char *)p);
1603
1604 /* Look for options after final delimiter */
1605
1606 options = 0;
1607 study_options = 0;
1608 log_store = showstore; /* default from command line */
1609
1610 while (*pp != 0)
1611 {
1612 switch (*pp++)
1613 {
1614 case 'f': options |= PCRE_FIRSTLINE; break;
1615 case 'g': do_g = 1; break;
1616 case 'i': options |= PCRE_CASELESS; break;
1617 case 'm': options |= PCRE_MULTILINE; break;
1618 case 's': options |= PCRE_DOTALL; break;
1619 case 'x': options |= PCRE_EXTENDED; break;
1620
1621 case '+':
1622 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1623 break;
1624
1625 case '=': do_allcaps = 1; break;
1626 case 'A': options |= PCRE_ANCHORED; break;
1627 case 'B': do_debug = 1; break;
1628 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1629 case 'D': do_debug = do_showinfo = 1; break;
1630 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1631 case 'F': do_flip = 1; break;
1632 case 'G': do_G = 1; break;
1633 case 'I': do_showinfo = 1; break;
1634 case 'J': options |= PCRE_DUPNAMES; break;
1635 case 'K': do_mark = 1; break;
1636 case 'M': log_store = 1; break;
1637 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1638
1639 #if !defined NOPOSIX
1640 case 'P': do_posix = 1; break;
1641 #endif
1642
1643 case 'S':
1644 if (do_study == 0) do_study = 1; else
1645 {
1646 do_study = 0;
1647 no_force_study = 1;
1648 }
1649 break;
1650
1651 case 'U': options |= PCRE_UNGREEDY; break;
1652 case 'W': options |= PCRE_UCP; break;
1653 case 'X': options |= PCRE_EXTRA; break;
1654 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1655 case 'Z': debug_lengths = 0; break;
1656 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1657 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1658
1659 case 'T':
1660 switch (*pp++)
1661 {
1662 case '0': tables = tables0; break;
1663 case '1': tables = tables1; break;
1664
1665 case '\r':
1666 case '\n':
1667 case ' ':
1668 case 0:
1669 fprintf(outfile, "** Missing table number after /T\n");
1670 goto SKIP_DATA;
1671
1672 default:
1673 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1674 goto SKIP_DATA;
1675 }
1676 break;
1677
1678 case 'L':
1679 ppp = pp;
1680 /* The '\r' test here is so that it works on Windows. */
1681 /* The '0' test is just in case this is an unterminated line. */
1682 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1683 *ppp = 0;
1684 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1685 {
1686 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1687 goto SKIP_DATA;
1688 }
1689 locale_set = 1;
1690 tables = pcre_maketables();
1691 pp = ppp;
1692 break;
1693
1694 case '>':
1695 to_file = pp;
1696 while (*pp != 0) pp++;
1697 while (isspace(pp[-1])) pp--;
1698 *pp = 0;
1699 break;
1700
1701 case '<':
1702 {
1703 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1704 {
1705 options |= PCRE_JAVASCRIPT_COMPAT;
1706 pp += 3;
1707 }
1708 else
1709 {
1710 int x = check_newline(pp, outfile);
1711 if (x == 0) goto SKIP_DATA;
1712 options |= x;
1713 while (*pp++ != '>');
1714 }
1715 }
1716 break;
1717
1718 case '\r': /* So that it works in Windows */
1719 case '\n':
1720 case ' ':
1721 break;
1722
1723 default:
1724 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1725 goto SKIP_DATA;
1726 }
1727 }
1728
1729 /* Handle compiling via the POSIX interface, which doesn't support the
1730 timing, showing, or debugging options, nor the ability to pass over
1731 local character tables. */
1732
1733 #if !defined NOPOSIX
1734 if (posix || do_posix)
1735 {
1736 int rc;
1737 int cflags = 0;
1738
1739 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1740 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1741 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1742 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1743 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1744 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1745 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1746
1747 rc = regcomp(&preg, (char *)p, cflags);
1748
1749 /* Compilation failed; go back for another re, skipping to blank line
1750 if non-interactive. */
1751
1752 if (rc != 0)
1753 {
1754 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1755 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1756 goto SKIP_DATA;
1757 }
1758 }
1759
1760 /* Handle compiling via the native interface */
1761
1762 else
1763 #endif /* !defined NOPOSIX */
1764
1765 {
1766 unsigned long int get_options;
1767
1768 if (timeit > 0)
1769 {
1770 register int i;
1771 clock_t time_taken;
1772 clock_t start_time = clock();
1773 for (i = 0; i < timeit; i++)
1774 {
1775 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1776 if (re != NULL) free(re);
1777 }
1778 time_taken = clock() - start_time;
1779 fprintf(outfile, "Compile time %.4f milliseconds\n",
1780 (((double)time_taken * 1000.0) / (double)timeit) /
1781 (double)CLOCKS_PER_SEC);
1782 }
1783
1784 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1785
1786 /* Compilation failed; go back for another re, skipping to blank line
1787 if non-interactive. */
1788
1789 if (re == NULL)
1790 {
1791 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1792 SKIP_DATA:
1793 if (infile != stdin)
1794 {
1795 for (;;)
1796 {
1797 if (extend_inputline(infile, buffer, NULL) == NULL)
1798 {
1799 done = 1;
1800 goto CONTINUE;
1801 }
1802 len = (int)strlen((char *)buffer);
1803 while (len > 0 && isspace(buffer[len-1])) len--;
1804 if (len == 0) break;
1805 }
1806 fprintf(outfile, "\n");
1807 }
1808 goto CONTINUE;
1809 }
1810
1811 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1812 within the regex; check for this so that we know how to process the data
1813 lines. */
1814
1815 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1816 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1817
1818 /* Print information if required. There are now two info-returning
1819 functions. The old one has a limited interface and returns only limited
1820 data. Check that it agrees with the newer one. */
1821
1822 if (log_store)
1823 fprintf(outfile, "Memory allocation (code space): %d\n",
1824 (int)(gotten_store -
1825 sizeof(real_pcre) -
1826 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1827
1828 /* Extract the size for possible writing before possibly flipping it,
1829 and remember the store that was got. */
1830
1831 true_size = ((real_pcre *)re)->size;
1832 regex_gotten_store = gotten_store;
1833
1834 /* If -s or /S was present, study the regex to generate additional info to
1835 help with the matching, unless the pattern has the SS option, which
1836 suppresses the effect of /S (used for a few test patterns where studying is
1837 never sensible). */
1838
1839 if (do_study || (force_study && !no_force_study))
1840 {
1841 if (timeit > 0)
1842 {
1843 register int i;
1844 clock_t time_taken;
1845 clock_t start_time = clock();
1846 for (i = 0; i < timeit; i++)
1847 extra = pcre_study(re, study_options, &error);
1848 time_taken = clock() - start_time;
1849 if (extra != NULL) free(extra);
1850 fprintf(outfile, " Study time %.4f milliseconds\n",
1851 (((double)time_taken * 1000.0) / (double)timeit) /
1852 (double)CLOCKS_PER_SEC);
1853 }
1854 extra = pcre_study(re, study_options, &error);
1855 if (error != NULL)
1856 fprintf(outfile, "Failed to study: %s\n", error);
1857 else if (extra != NULL)
1858 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1859 }
1860
1861 /* If /K was present, we set up for handling MARK data. */
1862
1863 if (do_mark)
1864 {
1865 if (extra == NULL)
1866 {
1867 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1868 extra->flags = 0;
1869 }
1870 extra->mark = &markptr;
1871 extra->flags |= PCRE_EXTRA_MARK;
1872 }
1873
1874 /* If the 'F' option was present, we flip the bytes of all the integer
1875 fields in the regex data block and the study block. This is to make it
1876 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1877 compiled on a different architecture. */
1878
1879 if (do_flip)
1880 {
1881 real_pcre *rre = (real_pcre *)re;
1882 rre->magic_number =
1883 byteflip(rre->magic_number, sizeof(rre->magic_number));
1884 rre->size = byteflip(rre->size, sizeof(rre->size));
1885 rre->options = byteflip(rre->options, sizeof(rre->options));
1886 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1887 rre->top_bracket =
1888 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1889 rre->top_backref =
1890 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1891 rre->first_byte =
1892 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1893 rre->req_byte =
1894 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1895 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1896 sizeof(rre->name_table_offset));
1897 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1898 sizeof(rre->name_entry_size));
1899 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1900 sizeof(rre->name_count));
1901
1902 if (extra != NULL)
1903 {
1904 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1905 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1906 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1907 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1908 }
1909 }
1910
1911 /* Extract information from the compiled data if required */
1912
1913 SHOW_INFO:
1914
1915 if (do_debug)
1916 {
1917 fprintf(outfile, "------------------------------------------------------------------\n");
1918 pcre_printint(re, outfile, debug_lengths);
1919 }
1920
1921 /* We already have the options in get_options (see above) */
1922
1923 if (do_showinfo)
1924 {
1925 unsigned long int all_options;
1926 #if !defined NOINFOCHECK
1927 int old_first_char, old_options, old_count;
1928 #endif
1929 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1930 hascrorlf;
1931 int nameentrysize, namecount;
1932 const uschar *nametable;
1933
1934 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1935 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1936 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1937 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1938 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1939 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1940 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1941 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1942 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1943 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1944 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1945
1946 #if !defined NOINFOCHECK
1947 old_count = pcre_info(re, &old_options, &old_first_char);
1948 if (count < 0) fprintf(outfile,
1949 "Error %d from pcre_info()\n", count);
1950 else
1951 {
1952 if (old_count != count) fprintf(outfile,
1953 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1954 old_count);
1955
1956 if (old_first_char != first_char) fprintf(outfile,
1957 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1958 first_char, old_first_char);
1959
1960 if (old_options != (int)get_options) fprintf(outfile,
1961 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1962 get_options, old_options);
1963 }
1964 #endif
1965
1966 if (size != regex_gotten_store) fprintf(outfile,
1967 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1968 (int)size, (int)regex_gotten_store);
1969
1970 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1971 if (backrefmax > 0)
1972 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1973
1974 if (namecount > 0)
1975 {
1976 fprintf(outfile, "Named capturing subpatterns:\n");
1977 while (namecount-- > 0)
1978 {
1979 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1980 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1981 GET2(nametable, 0));
1982 nametable += nameentrysize;
1983 }
1984 }
1985
1986 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1987 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1988
1989 all_options = ((real_pcre *)re)->options;
1990 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1991
1992 if (get_options == 0) fprintf(outfile, "No options\n");
1993 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1994 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1995 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1996 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1997 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1998 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1999 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2000 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2001 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2002 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2003 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2004 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2005 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2006 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2007 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2008 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2009 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2010 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2011
2012 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2013
2014 switch (get_options & PCRE_NEWLINE_BITS)
2015 {
2016 case PCRE_NEWLINE_CR:
2017 fprintf(outfile, "Forced newline sequence: CR\n");
2018 break;
2019
2020 case PCRE_NEWLINE_LF:
2021 fprintf(outfile, "Forced newline sequence: LF\n");
2022 break;
2023
2024 case PCRE_NEWLINE_CRLF:
2025 fprintf(outfile, "Forced newline sequence: CRLF\n");
2026 break;
2027
2028 case PCRE_NEWLINE_ANYCRLF:
2029 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2030 break;
2031
2032 case PCRE_NEWLINE_ANY:
2033 fprintf(outfile, "Forced newline sequence: ANY\n");
2034 break;
2035
2036 default:
2037 break;
2038 }
2039
2040 if (first_char == -1)
2041 {
2042 fprintf(outfile, "First char at start or follows newline\n");
2043 }
2044 else if (first_char < 0)
2045 {
2046 fprintf(outfile, "No first char\n");
2047 }
2048 else
2049 {
2050 int ch = first_char & 255;
2051 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2052 "" : " (caseless)";
2053 if (PRINTHEX(ch))
2054 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2055 else
2056 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2057 }
2058
2059 if (need_char < 0)
2060 {
2061 fprintf(outfile, "No need char\n");
2062 }
2063 else
2064 {
2065 int ch = need_char & 255;
2066 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2067 "" : " (caseless)";
2068 if (PRINTHEX(ch))
2069 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2070 else
2071 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2072 }
2073
2074 /* Don't output study size; at present it is in any case a fixed
2075 value, but it varies, depending on the computer architecture, and
2076 so messes up the test suite. (And with the /F option, it might be
2077 flipped.) If study was forced by an external -s, don't show this
2078 information unless -i or -d was also present. This means that, except
2079 when auto-callouts are involved, the output from runs with and without
2080 -s should be identical. */
2081
2082 if (do_study || (force_study && showinfo && !no_force_study))
2083 {
2084 if (extra == NULL)
2085 fprintf(outfile, "Study returned NULL\n");
2086 else
2087 {
2088 uschar *start_bits = NULL;
2089 int minlength;
2090
2091 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2092 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2093
2094 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2095 if (start_bits == NULL)
2096 fprintf(outfile, "No set of starting bytes\n");
2097 else
2098 {
2099 int i;
2100 int c = 24;
2101 fprintf(outfile, "Starting byte set: ");
2102 for (i = 0; i < 256; i++)
2103 {
2104 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2105 {
2106 if (c > 75)
2107 {
2108 fprintf(outfile, "\n ");
2109 c = 2;
2110 }
2111 if (PRINTHEX(i) && i != ' ')
2112 {
2113 fprintf(outfile, "%c ", i);
2114 c += 2;
2115 }
2116 else
2117 {
2118 fprintf(outfile, "\\x%02x ", i);
2119 c += 5;
2120 }
2121 }
2122 }
2123 fprintf(outfile, "\n");
2124 }
2125 }
2126 }
2127 }
2128
2129 /* If the '>' option was present, we write out the regex to a file, and
2130 that is all. The first 8 bytes of the file are the regex length and then
2131 the study length, in big-endian order. */
2132
2133 if (to_file != NULL)
2134 {
2135 FILE *f = fopen((char *)to_file, "wb");
2136 if (f == NULL)
2137 {
2138 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2139 }
2140 else
2141 {
2142 uschar sbuf[8];
2143 sbuf[0] = (uschar)((true_size >> 24) & 255);
2144 sbuf[1] = (uschar)((true_size >> 16) & 255);
2145 sbuf[2] = (uschar)((true_size >> 8) & 255);
2146 sbuf[3] = (uschar)((true_size) & 255);
2147
2148 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2149 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2150 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2151 sbuf[7] = (uschar)((true_study_size) & 255);
2152
2153 if (fwrite(sbuf, 1, 8, f) < 8 ||
2154 fwrite(re, 1, true_size, f) < true_size)
2155 {
2156 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2157 }
2158 else
2159 {
2160 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2161
2162 /* If there is study data, write it, but verify the writing only
2163 if the studying was requested by /S, not just by -s. */
2164
2165 if (extra != NULL)
2166 {
2167 if (fwrite(extra->study_data, 1, true_study_size, f) <
2168 true_study_size)
2169 {
2170 fprintf(outfile, "Write error on %s: %s\n", to_file,
2171 strerror(errno));
2172 }
2173 else fprintf(outfile, "Study data written to %s\n", to_file);
2174 }
2175 }
2176 fclose(f);
2177 }
2178
2179 new_free(re);
2180 if (extra != NULL) new_free(extra);
2181 if (locale_set)
2182 {
2183 new_free((void *)tables);
2184 setlocale(LC_CTYPE, "C");
2185 locale_set = 0;
2186 }
2187 continue; /* With next regex */
2188 }
2189 } /* End of non-POSIX compile */
2190
2191 /* Read data lines and test them */
2192
2193 for (;;)
2194 {
2195 uschar *q;
2196 uschar *bptr;
2197 int *use_offsets = offsets;
2198 int use_size_offsets = size_offsets;
2199 int callout_data = 0;
2200 int callout_data_set = 0;
2201 int count, c;
2202 int copystrings = 0;
2203 int find_match_limit = default_find_match_limit;
2204 int getstrings = 0;
2205 int getlist = 0;
2206 int gmatched = 0;
2207 int start_offset = 0;
2208 int start_offset_sign = 1;
2209 int g_notempty = 0;
2210 int use_dfa = 0;
2211
2212 options = 0;
2213
2214 *copynames = 0;
2215 *getnames = 0;
2216
2217 copynamesptr = copynames;
2218 getnamesptr = getnames;
2219
2220 pcre_callout = callout;
2221 first_callout = 1;
2222 last_callout_mark = NULL;
2223 callout_extra = 0;
2224 callout_count = 0;
2225 callout_fail_count = 999999;
2226 callout_fail_id = -1;
2227 show_malloc = 0;
2228
2229 if (extra != NULL) extra->flags &=
2230 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2231
2232 len = 0;
2233 for (;;)
2234 {
2235 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2236 {
2237 if (len > 0) /* Reached EOF without hitting a newline */
2238 {
2239 fprintf(outfile, "\n");
2240 break;
2241 }
2242 done = 1;
2243 goto CONTINUE;
2244 }
2245 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2246 len = (int)strlen((char *)buffer);
2247 if (buffer[len-1] == '\n') break;
2248 }
2249
2250 while (len > 0 && isspace(buffer[len-1])) len--;
2251 buffer[len] = 0;
2252 if (len == 0) break;
2253
2254 p = buffer;
2255 while (isspace(*p)) p++;
2256
2257 bptr = q = dbuffer;
2258 while ((c = *p++) != 0)
2259 {
2260 int i = 0;
2261 int n = 0;
2262
2263 if (c == '\\') switch ((c = *p++))
2264 {
2265 case 'a': c = 7; break;
2266 case 'b': c = '\b'; break;
2267 case 'e': c = 27; break;
2268 case 'f': c = '\f'; break;
2269 case 'n': c = '\n'; break;
2270 case 'r': c = '\r'; break;
2271 case 't': c = '\t'; break;
2272 case 'v': c = '\v'; break;
2273
2274 case '0': case '1': case '2': case '3':
2275 case '4': case '5': case '6': case '7':
2276 c -= '0';
2277 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2278 c = c * 8 + *p++ - '0';
2279
2280 #if !defined NOUTF8
2281 if (use_utf8 && c > 255)
2282 {
2283 unsigned char buff8[8];
2284 int ii, utn;
2285 utn = ord2utf8(c, buff8);
2286 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2287 c = buff8[ii]; /* Last byte */
2288 }
2289 #endif
2290 break;
2291
2292 case 'x':
2293
2294 /* Handle \x{..} specially - new Perl thing for utf8 */
2295
2296 #if !defined NOUTF8
2297 if (*p == '{')
2298 {
2299 unsigned char *pt = p;
2300 c = 0;
2301 while (isxdigit(*(++pt)))
2302 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2303 if (*pt == '}')
2304 {
2305 unsigned char buff8[8];
2306 int ii, utn;
2307 if (use_utf8)
2308 {
2309 utn = ord2utf8(c, buff8);
2310 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2311 c = buff8[ii]; /* Last byte */
2312 }
2313 else
2314 {
2315 if (c > 255)
2316 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2317 "UTF-8 mode is not enabled.\n"
2318 "** Truncation will probably give the wrong result.\n", c);
2319 }
2320 p = pt + 1;
2321 break;
2322 }
2323 /* Not correct form; fall through */
2324 }
2325 #endif
2326
2327 /* Ordinary \x */
2328
2329 c = 0;
2330 while (i++ < 2 && isxdigit(*p))
2331 {
2332 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2333 p++;
2334 }
2335 break;
2336
2337 case 0: /* \ followed by EOF allows for an empty line */
2338 p--;
2339 continue;
2340
2341 case '>':
2342 if (*p == '-')
2343 {
2344 start_offset_sign = -1;
2345 p++;
2346 }
2347 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2348 start_offset *= start_offset_sign;
2349 continue;
2350
2351 case 'A': /* Option setting */
2352 options |= PCRE_ANCHORED;
2353 continue;
2354
2355 case 'B':
2356 options |= PCRE_NOTBOL;
2357 continue;
2358
2359 case 'C':
2360 if (isdigit(*p)) /* Set copy string */
2361 {
2362 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2363 copystrings |= 1 << n;
2364 }
2365 else if (isalnum(*p))
2366 {
2367 uschar *npp = copynamesptr;
2368 while (isalnum(*p)) *npp++ = *p++;
2369 *npp++ = 0;
2370 *npp = 0;
2371 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2372 if (n < 0)
2373 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2374 copynamesptr = npp;
2375 }
2376 else if (*p == '+')
2377 {
2378 callout_extra = 1;
2379 p++;
2380 }
2381 else if (*p == '-')
2382 {
2383 pcre_callout = NULL;
2384 p++;
2385 }
2386 else if (*p == '!')
2387 {
2388 callout_fail_id = 0;
2389 p++;
2390 while(isdigit(*p))
2391 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2392 callout_fail_count = 0;
2393 if (*p == '!')
2394 {
2395 p++;
2396 while(isdigit(*p))
2397 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2398 }
2399 }
2400 else if (*p == '*')
2401 {
2402 int sign = 1;
2403 callout_data = 0;
2404 if (*(++p) == '-') { sign = -1; p++; }
2405 while(isdigit(*p))
2406 callout_data = callout_data * 10 + *p++ - '0';
2407 callout_data *= sign;
2408 callout_data_set = 1;
2409 }
2410 continue;
2411
2412 #if !defined NODFA
2413 case 'D':
2414 #if !defined NOPOSIX
2415 if (posix || do_posix)
2416 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2417 else
2418 #endif
2419 use_dfa = 1;
2420 continue;
2421 #endif
2422
2423 #if !defined NODFA
2424 case 'F':
2425 options |= PCRE_DFA_SHORTEST;
2426 continue;
2427 #endif
2428
2429 case 'G':
2430 if (isdigit(*p))
2431 {
2432 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2433 getstrings |= 1 << n;
2434 }
2435 else if (isalnum(*p))
2436 {
2437 uschar *npp = getnamesptr;
2438 while (isalnum(*p)) *npp++ = *p++;
2439 *npp++ = 0;
2440 *npp = 0;
2441 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2442 if (n < 0)
2443 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2444 getnamesptr = npp;
2445 }
2446 continue;
2447
2448 case 'L':
2449 getlist = 1;
2450 continue;
2451
2452 case 'M':
2453 find_match_limit = 1;
2454 continue;
2455
2456 case 'N':
2457 if ((options & PCRE_NOTEMPTY) != 0)
2458 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2459 else
2460 options |= PCRE_NOTEMPTY;
2461 continue;
2462
2463 case 'O':
2464 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2465 if (n > size_offsets_max)
2466 {
2467 size_offsets_max = n;
2468 free(offsets);
2469 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2470 if (offsets == NULL)
2471 {
2472 printf("** Failed to get %d bytes of memory for offsets vector\n",
2473 (int)(size_offsets_max * sizeof(int)));
2474 yield = 1;
2475 goto EXIT;
2476 }
2477 }
2478 use_size_offsets = n;
2479 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2480 continue;
2481
2482 case 'P':
2483 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2484 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2485 continue;
2486
2487 case 'Q':
2488 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2489 if (extra == NULL)
2490 {
2491 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2492 extra->flags = 0;
2493 }
2494 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2495 extra->match_limit_recursion = n;
2496 continue;
2497
2498 case 'q':
2499 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2500 if (extra == NULL)
2501 {
2502 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2503 extra->flags = 0;
2504 }
2505 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2506 extra->match_limit = n;
2507 continue;
2508
2509 #if !defined NODFA
2510 case 'R':
2511 options |= PCRE_DFA_RESTART;
2512 continue;
2513 #endif
2514
2515 case 'S':
2516 show_malloc = 1;
2517 continue;
2518
2519 case 'Y':
2520 options |= PCRE_NO_START_OPTIMIZE;
2521 continue;
2522
2523 case 'Z':
2524 options |= PCRE_NOTEOL;
2525 continue;
2526
2527 case '?':
2528 options |= PCRE_NO_UTF8_CHECK;
2529 continue;
2530
2531 case '<':
2532 {
2533 int x = check_newline(p, outfile);
2534 if (x == 0) goto NEXT_DATA;
2535 options |= x;
2536 while (*p++ != '>');
2537 }
2538 continue;
2539 }
2540 *q++ = c;
2541 }
2542 *q = 0;
2543 len = (int)(q - dbuffer);
2544
2545 /* Move the data to the end of the buffer so that a read over the end of
2546 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2547 we are using the POSIX interface, we must include the terminating zero. */
2548
2549 #if !defined NOPOSIX
2550 if (posix || do_posix)
2551 {
2552 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2553 bptr += buffer_size - len - 1;
2554 }
2555 else
2556 #endif
2557 {
2558 memmove(bptr + buffer_size - len, bptr, len);
2559 bptr += buffer_size - len;
2560 }
2561
2562 if ((all_use_dfa || use_dfa) && find_match_limit)
2563 {
2564 printf("**Match limit not relevant for DFA matching: ignored\n");
2565 find_match_limit = 0;
2566 }
2567
2568 /* Handle matching via the POSIX interface, which does not
2569 support timing or playing with the match limit or callout data. */
2570
2571 #if !defined NOPOSIX
2572 if (posix || do_posix)
2573 {
2574 int rc;
2575 int eflags = 0;
2576 regmatch_t *pmatch = NULL;
2577 if (use_size_offsets > 0)
2578 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2579 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2580 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2581 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2582
2583 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2584
2585 if (rc != 0)
2586 {
2587 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2588 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2589 }
2590 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2591 != 0)
2592 {
2593 fprintf(outfile, "Matched with REG_NOSUB\n");
2594 }
2595 else
2596 {
2597 size_t i;
2598 for (i = 0; i < (size_t)use_size_offsets; i++)
2599 {
2600 if (pmatch[i].rm_so >= 0)
2601 {
2602 fprintf(outfile, "%2d: ", (int)i);
2603 (void)pchars(dbuffer + pmatch[i].rm_so,
2604 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2605 fprintf(outfile, "\n");
2606 if (do_showcaprest || (i == 0 && do_showrest))
2607 {
2608 fprintf(outfile, "%2d+ ", (int)i);
2609 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2610 outfile);
2611 fprintf(outfile, "\n");
2612 }
2613 }
2614 }
2615 }
2616 free(pmatch);
2617 }
2618
2619 /* Handle matching via the native interface - repeats for /g and /G */
2620
2621 else
2622 #endif /* !defined NOPOSIX */
2623
2624 for (;; gmatched++) /* Loop for /g or /G */
2625 {
2626 markptr = NULL;
2627
2628 if (timeitm > 0)
2629 {
2630 register int i;
2631 clock_t time_taken;
2632 clock_t start_time = clock();
2633
2634 #if !defined NODFA
2635 if (all_use_dfa || use_dfa)
2636 {
2637 int workspace[1000];
2638 for (i = 0; i < timeitm; i++)
2639 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2640 options | g_notempty, use_offsets, use_size_offsets, workspace,
2641 sizeof(workspace)/sizeof(int));
2642 }
2643 else
2644 #endif
2645
2646 for (i = 0; i < timeitm; i++)
2647 count = pcre_exec(re, extra, (char *)bptr, len,
2648 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2649
2650 time_taken = clock() - start_time;
2651 fprintf(outfile, "Execute time %.4f milliseconds\n",
2652 (((double)time_taken * 1000.0) / (double)timeitm) /
2653 (double)CLOCKS_PER_SEC);
2654 }
2655
2656 /* If find_match_limit is set, we want to do repeated matches with
2657 varying limits in order to find the minimum value for the match limit and
2658 for the recursion limit. */
2659
2660 if (find_match_limit)
2661 {
2662 if (extra == NULL)
2663 {
2664 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2665 extra->flags = 0;
2666 }
2667
2668 (void)check_match_limit(re, extra, bptr, len, start_offset,
2669 options|g_notempty, use_offsets, use_size_offsets,
2670 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2671 PCRE_ERROR_MATCHLIMIT, "match()");
2672
2673 count = check_match_limit(re, extra, bptr, len, start_offset,
2674 options|g_notempty, use_offsets, use_size_offsets,
2675 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2676 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2677 }
2678
2679 /* If callout_data is set, use the interface with additional data */
2680
2681 else if (callout_data_set)
2682 {
2683 if (extra == NULL)
2684 {
2685 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2686 extra->flags = 0;
2687 }
2688 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2689 extra->callout_data = &callout_data;
2690 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2691 options | g_notempty, use_offsets, use_size_offsets);
2692 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2693 }
2694
2695 /* The normal case is just to do the match once, with the default
2696 value of match_limit. */
2697
2698 #if !defined NODFA
2699 else if (all_use_dfa || use_dfa)
2700 {
2701 int workspace[1000];
2702 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2703 options | g_notempty, use_offsets, use_size_offsets, workspace,
2704 sizeof(workspace)/sizeof(int));
2705 if (count == 0)
2706 {
2707 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2708 count = use_size_offsets/2;
2709 }
2710 }
2711 #endif
2712
2713 else
2714 {
2715 count = pcre_exec(re, extra, (char *)bptr, len,
2716 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2717 if (count == 0)
2718 {
2719 fprintf(outfile, "Matched, but too many substrings\n");
2720 count = use_size_offsets/3;
2721 }
2722 }
2723
2724 /* Matched */
2725
2726 if (count >= 0)
2727 {
2728 int i, maxcount;
2729
2730 #if !defined NODFA
2731 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2732 #endif
2733 maxcount = use_size_offsets/3;
2734
2735 /* This is a check against a lunatic return value. */
2736
2737 if (count > maxcount)
2738 {
2739 fprintf(outfile,
2740 "** PCRE error: returned count %d is too big for offset size %d\n",
2741 count, use_size_offsets);
2742 count = use_size_offsets/3;
2743 if (do_g || do_G)
2744 {
2745 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2746 do_g = do_G = FALSE; /* Break g/G loop */
2747 }
2748 }
2749
2750 /* do_allcaps requests showing of all captures in the pattern, to check
2751 unset ones at the end. */
2752
2753 if (do_allcaps)
2754 {
2755 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2756 count++; /* Allow for full match */
2757 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2758 }
2759
2760 /* Output the captured substrings */
2761
2762 for (i = 0; i < count * 2; i += 2)
2763 {
2764 if (use_offsets[i] < 0)
2765 {
2766 if (use_offsets[i] != -1)
2767 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2768 use_offsets[i], i);
2769 if (use_offsets[i+1] != -1)
2770 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2771 use_offsets[i+1], i+1);
2772 fprintf(outfile, "%2d: <unset>\n", i/2);
2773 }
2774 else
2775 {
2776 fprintf(outfile, "%2d: ", i/2);
2777 (void)pchars(bptr + use_offsets[i],
2778 use_offsets[i+1] - use_offsets[i], outfile);
2779 fprintf(outfile, "\n");
2780 if (do_showcaprest || (i == 0 && do_showrest))
2781 {
2782 fprintf(outfile, "%2d+ ", i/2);
2783 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2784 outfile);
2785 fprintf(outfile, "\n");
2786 }
2787 }
2788 }
2789
2790 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2791
2792 for (i = 0; i < 32; i++)
2793 {
2794 if ((copystrings & (1 << i)) != 0)
2795 {
2796 char copybuffer[256];
2797 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2798 i, copybuffer, sizeof(copybuffer));
2799 if (rc < 0)
2800 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2801 else
2802 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2803 }
2804 }
2805
2806 for (copynamesptr = copynames;
2807 *copynamesptr != 0;
2808 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2809 {
2810 char copybuffer[256];
2811 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2812 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2813 if (rc < 0)
2814 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2815 else
2816 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2817 }
2818
2819 for (i = 0; i < 32; i++)
2820 {
2821 if ((getstrings & (1 << i)) != 0)
2822 {
2823 const char *substring;
2824 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2825 i, &substring);
2826 if (rc < 0)
2827 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2828 else
2829 {
2830 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2831 pcre_free_substring(substring);
2832 }
2833 }
2834 }
2835
2836 for (getnamesptr = getnames;
2837 *getnamesptr != 0;
2838 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2839 {
2840 const char *substring;
2841 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2842 count, (char *)getnamesptr, &substring);
2843 if (rc < 0)
2844 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2845 else
2846 {
2847 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2848 pcre_free_substring(substring);
2849 }
2850 }
2851
2852 if (getlist)
2853 {
2854 const char **stringlist;
2855 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2856 &stringlist);
2857 if (rc < 0)
2858 fprintf(outfile, "get substring list failed %d\n", rc);
2859 else
2860 {
2861 for (i = 0; i < count; i++)
2862 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2863 if (stringlist[i] != NULL)
2864 fprintf(outfile, "string list not terminated by NULL\n");
2865 /* free((void *)stringlist); */
2866 pcre_free_substring_list(stringlist);
2867 }
2868 }
2869 }
2870
2871 /* There was a partial match */
2872
2873 else if (count == PCRE_ERROR_PARTIAL)
2874 {
2875 if (markptr == NULL) fprintf(outfile, "Partial match");
2876 else fprintf(outfile, "Partial match, mark=%s", markptr);
2877 if (use_size_offsets > 1)
2878 {
2879 fprintf(outfile, ": ");
2880 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2881 outfile);
2882 }
2883 fprintf(outfile, "\n");
2884 break; /* Out of the /g loop */
2885 }
2886
2887 /* Failed to match. If this is a /g or /G loop and we previously set
2888 g_notempty after a null match, this is not necessarily the end. We want
2889 to advance the start offset, and continue. We won't be at the end of the
2890 string - that was checked before setting g_notempty.
2891
2892 Complication arises in the case when the newline convention is "any",
2893 "crlf", or "anycrlf". If the previous match was at the end of a line
2894 terminated by CRLF, an advance of one character just passes the \r,
2895 whereas we should prefer the longer newline sequence, as does the code in
2896 pcre_exec(). Fudge the offset value to achieve this. We check for a
2897 newline setting in the pattern; if none was set, use pcre_config() to
2898 find the default.
2899
2900 Otherwise, in the case of UTF-8 matching, the advance must be one
2901 character, not one byte. */
2902
2903 else
2904 {
2905 if (g_notempty != 0)
2906 {
2907 int onechar = 1;
2908 unsigned int obits = ((real_pcre *)re)->options;
2909 use_offsets[0] = start_offset;
2910 if ((obits & PCRE_NEWLINE_BITS) == 0)
2911 {
2912 int d;
2913 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2914 /* Note that these values are always the ASCII ones, even in
2915 EBCDIC environments. CR = 13, NL = 10. */
2916 obits = (d == 13)? PCRE_NEWLINE_CR :
2917 (d == 10)? PCRE_NEWLINE_LF :
2918 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2919 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2920 (d == -1)? PCRE_NEWLINE_ANY : 0;
2921 }
2922 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2923 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2924 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2925 &&
2926 start_offset < len - 1 &&
2927 bptr[start_offset] == '\r' &&
2928 bptr[start_offset+1] == '\n')
2929 onechar++;
2930 else if (use_utf8)
2931 {
2932 while (start_offset + onechar < len)
2933 {
2934 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2935 onechar++;
2936 }
2937 }
2938 use_offsets[1] = start_offset + onechar;
2939 }
2940 else
2941 {
2942 switch(count)
2943 {
2944 case PCRE_ERROR_NOMATCH:
2945 if (gmatched == 0)
2946 {
2947 if (markptr == NULL) fprintf(outfile, "No match\n");
2948 else fprintf(outfile, "No match, mark = %s\n", markptr);
2949 }
2950 break;
2951
2952 case PCRE_ERROR_BADUTF8:
2953 case PCRE_ERROR_SHORTUTF8:
2954 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2955 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2956 if (use_size_offsets >= 2)
2957 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2958 use_offsets[1]);
2959 fprintf(outfile, "\n");
2960 break;
2961
2962 default:
2963 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
2964 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2965 else
2966 fprintf(outfile, "Error %d (Unexpected value)\n", count);
2967 break;
2968 }
2969
2970 break; /* Out of the /g loop */
2971 }
2972 }
2973
2974 /* If not /g or /G we are done */
2975
2976 if (!do_g && !do_G) break;
2977
2978 /* If we have matched an empty string, first check to see if we are at
2979 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2980 Perl's /g options does. This turns out to be rather cunning. First we set
2981 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2982 same point. If this fails (picked up above) we advance to the next
2983 character. */
2984
2985 g_notempty = 0;
2986
2987 if (use_offsets[0] == use_offsets[1])
2988 {
2989 if (use_offsets[0] == len) break;
2990 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2991 }
2992
2993 /* For /g, update the start offset, leaving the rest alone */
2994
2995 if (do_g) start_offset = use_offsets[1];
2996
2997 /* For /G, update the pointer and length */
2998
2999 else
3000 {
3001 bptr += use_offsets[1];
3002 len -= use_offsets[1];
3003 }
3004 } /* End of loop for /g and /G */
3005
3006 NEXT_DATA: continue;
3007 } /* End of loop for data lines */
3008
3009 CONTINUE:
3010
3011 #if !defined NOPOSIX
3012 if (posix || do_posix) regfree(&preg);
3013 #endif
3014
3015 if (re != NULL) new_free(re);
3016 if (extra != NULL) new_free(extra);
3017 if (locale_set)
3018 {
3019 new_free((void *)tables);
3020 setlocale(LC_CTYPE, "C");
3021 locale_set = 0;
3022 }
3023 }
3024
3025 if (infile == stdin) fprintf(outfile, "\n");
3026
3027 EXIT:
3028
3029 if (infile != NULL && infile != stdin) fclose(infile);
3030 if (outfile != NULL && outfile != stdout) fclose(outfile);
3031
3032 free(buffer);
3033 free(dbuffer);
3034 free(pbuffer);
3035 free(offsets);
3036
3037 return yield;
3038 }
3039
3040 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12