/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 598 - (show annotations) (download)
Sat May 7 15:37:31 2011 UTC (3 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 89264 byte(s)
Pass back detailed info when UTF-8 check fails at runtime.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 /* A user sent this fix for Borland Builder 5 under Windows. */
83
84 #ifdef __BORLANDC__
85 #define _setmode(handle, mode) setmode(handle, mode)
86 #endif
87
88 /* Not Windows */
89
90 #else
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
95 #endif
96
97
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
102
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
106
107 #include "pcre.h"
108 #include "pcre_internal.h"
109
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
113
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
124
125 #include "pcre_tables.c"
126
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
131
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
134
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
140
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
145 Makefile. */
146
147 #if !defined NOPOSIX
148 #include "pcreposix.h"
149 #endif
150
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
156
157 #ifndef SUPPORT_UTF8
158 #ifndef NOUTF8
159 #define NOUTF8
160 #endif
161 #endif
162
163
164 /* Other parameters */
165
166 #ifndef CLOCKS_PER_SEC
167 #ifdef CLK_TCK
168 #define CLOCKS_PER_SEC CLK_TCK
169 #else
170 #define CLOCKS_PER_SEC 100
171 #endif
172 #endif
173
174 /* This is the default loop count for timing. */
175
176 #define LOOPREPEAT 500000
177
178 /* Static variables */
179
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
190 static int use_utf8;
191 static size_t gotten_store;
192
193 /* The buffers grow automatically if very long input lines are encountered. */
194
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
199
200 /* Textual explanations for runtime error codes */
201
202 static const char *errtexts[] = {
203 NULL, /* 0 is no error */
204 NULL, /* NOMATCH is handled specially */
205 "NULL argument passed",
206 "bad option value",
207 "magic number missing",
208 "unknown opcode - pattern overwritten?",
209 "no more memory",
210 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
211 "match limit exceeded",
212 "callout error code",
213 NULL, /* BADUTF8 is handled specially */
214 "bad UTF-8 offset",
215 NULL, /* PARTIAL is handled specially */
216 "not used - internal error",
217 "internal error - pattern overwritten?",
218 "bad count value",
219 "item unsupported for DFA matching",
220 "backreference condition or recursion test not supported for DFA matching",
221 "match limit not supported for DFA matching",
222 "workspace size exceeded in DFA matching",
223 "too much recursion for DFA matching",
224 "recursion limit exceeded",
225 "not used - internal error",
226 "invalid combination of newline options",
227 "bad offset value",
228 NULL /* SHORTUTF8 is handled specially */
229 };
230
231
232 /*************************************************
233 * Alternate character tables *
234 *************************************************/
235
236 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
237 using the default tables of the library. However, the T option can be used to
238 select alternate sets of tables, for different kinds of testing. Note also that
239 the L (locale) option also adjusts the tables. */
240
241 /* This is the set of tables distributed as default with PCRE. It recognizes
242 only ASCII characters. */
243
244 static const unsigned char tables0[] = {
245
246 /* This table is a lower casing table. */
247
248 0, 1, 2, 3, 4, 5, 6, 7,
249 8, 9, 10, 11, 12, 13, 14, 15,
250 16, 17, 18, 19, 20, 21, 22, 23,
251 24, 25, 26, 27, 28, 29, 30, 31,
252 32, 33, 34, 35, 36, 37, 38, 39,
253 40, 41, 42, 43, 44, 45, 46, 47,
254 48, 49, 50, 51, 52, 53, 54, 55,
255 56, 57, 58, 59, 60, 61, 62, 63,
256 64, 97, 98, 99,100,101,102,103,
257 104,105,106,107,108,109,110,111,
258 112,113,114,115,116,117,118,119,
259 120,121,122, 91, 92, 93, 94, 95,
260 96, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122,123,124,125,126,127,
264 128,129,130,131,132,133,134,135,
265 136,137,138,139,140,141,142,143,
266 144,145,146,147,148,149,150,151,
267 152,153,154,155,156,157,158,159,
268 160,161,162,163,164,165,166,167,
269 168,169,170,171,172,173,174,175,
270 176,177,178,179,180,181,182,183,
271 184,185,186,187,188,189,190,191,
272 192,193,194,195,196,197,198,199,
273 200,201,202,203,204,205,206,207,
274 208,209,210,211,212,213,214,215,
275 216,217,218,219,220,221,222,223,
276 224,225,226,227,228,229,230,231,
277 232,233,234,235,236,237,238,239,
278 240,241,242,243,244,245,246,247,
279 248,249,250,251,252,253,254,255,
280
281 /* This table is a case flipping table. */
282
283 0, 1, 2, 3, 4, 5, 6, 7,
284 8, 9, 10, 11, 12, 13, 14, 15,
285 16, 17, 18, 19, 20, 21, 22, 23,
286 24, 25, 26, 27, 28, 29, 30, 31,
287 32, 33, 34, 35, 36, 37, 38, 39,
288 40, 41, 42, 43, 44, 45, 46, 47,
289 48, 49, 50, 51, 52, 53, 54, 55,
290 56, 57, 58, 59, 60, 61, 62, 63,
291 64, 97, 98, 99,100,101,102,103,
292 104,105,106,107,108,109,110,111,
293 112,113,114,115,116,117,118,119,
294 120,121,122, 91, 92, 93, 94, 95,
295 96, 65, 66, 67, 68, 69, 70, 71,
296 72, 73, 74, 75, 76, 77, 78, 79,
297 80, 81, 82, 83, 84, 85, 86, 87,
298 88, 89, 90,123,124,125,126,127,
299 128,129,130,131,132,133,134,135,
300 136,137,138,139,140,141,142,143,
301 144,145,146,147,148,149,150,151,
302 152,153,154,155,156,157,158,159,
303 160,161,162,163,164,165,166,167,
304 168,169,170,171,172,173,174,175,
305 176,177,178,179,180,181,182,183,
306 184,185,186,187,188,189,190,191,
307 192,193,194,195,196,197,198,199,
308 200,201,202,203,204,205,206,207,
309 208,209,210,211,212,213,214,215,
310 216,217,218,219,220,221,222,223,
311 224,225,226,227,228,229,230,231,
312 232,233,234,235,236,237,238,239,
313 240,241,242,243,244,245,246,247,
314 248,249,250,251,252,253,254,255,
315
316 /* This table contains bit maps for various character classes. Each map is 32
317 bytes long and the bits run from the least significant end of each byte. The
318 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
319 graph, print, punct, and cntrl. Other classes are built from combinations. */
320
321 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325
326 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
327 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330
331 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340
341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345
346 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
347 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350
351 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
352 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355
356 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
357 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360
361 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
362 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365
366 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370
371 /* This table identifies various classes of character by individual bits:
372 0x01 white space character
373 0x02 letter
374 0x04 decimal digit
375 0x08 hexadecimal digit
376 0x10 alphanumeric or '_'
377 0x80 regular expression metacharacter or binary zero
378 */
379
380 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
381 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
384 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
385 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
386 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
387 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
388 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
389 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
390 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
391 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
392 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
393 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
394 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
395 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
396 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
397 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
406 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
407 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
411 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
412
413 /* This is a set of tables that came orginally from a Windows user. It seems to
414 be at least an approximation of ISO 8859. In particular, there are characters
415 greater than 128 that are marked as spaces, letters, etc. */
416
417 static const unsigned char tables1[] = {
418 0,1,2,3,4,5,6,7,
419 8,9,10,11,12,13,14,15,
420 16,17,18,19,20,21,22,23,
421 24,25,26,27,28,29,30,31,
422 32,33,34,35,36,37,38,39,
423 40,41,42,43,44,45,46,47,
424 48,49,50,51,52,53,54,55,
425 56,57,58,59,60,61,62,63,
426 64,97,98,99,100,101,102,103,
427 104,105,106,107,108,109,110,111,
428 112,113,114,115,116,117,118,119,
429 120,121,122,91,92,93,94,95,
430 96,97,98,99,100,101,102,103,
431 104,105,106,107,108,109,110,111,
432 112,113,114,115,116,117,118,119,
433 120,121,122,123,124,125,126,127,
434 128,129,130,131,132,133,134,135,
435 136,137,138,139,140,141,142,143,
436 144,145,146,147,148,149,150,151,
437 152,153,154,155,156,157,158,159,
438 160,161,162,163,164,165,166,167,
439 168,169,170,171,172,173,174,175,
440 176,177,178,179,180,181,182,183,
441 184,185,186,187,188,189,190,191,
442 224,225,226,227,228,229,230,231,
443 232,233,234,235,236,237,238,239,
444 240,241,242,243,244,245,246,215,
445 248,249,250,251,252,253,254,223,
446 224,225,226,227,228,229,230,231,
447 232,233,234,235,236,237,238,239,
448 240,241,242,243,244,245,246,247,
449 248,249,250,251,252,253,254,255,
450 0,1,2,3,4,5,6,7,
451 8,9,10,11,12,13,14,15,
452 16,17,18,19,20,21,22,23,
453 24,25,26,27,28,29,30,31,
454 32,33,34,35,36,37,38,39,
455 40,41,42,43,44,45,46,47,
456 48,49,50,51,52,53,54,55,
457 56,57,58,59,60,61,62,63,
458 64,97,98,99,100,101,102,103,
459 104,105,106,107,108,109,110,111,
460 112,113,114,115,116,117,118,119,
461 120,121,122,91,92,93,94,95,
462 96,65,66,67,68,69,70,71,
463 72,73,74,75,76,77,78,79,
464 80,81,82,83,84,85,86,87,
465 88,89,90,123,124,125,126,127,
466 128,129,130,131,132,133,134,135,
467 136,137,138,139,140,141,142,143,
468 144,145,146,147,148,149,150,151,
469 152,153,154,155,156,157,158,159,
470 160,161,162,163,164,165,166,167,
471 168,169,170,171,172,173,174,175,
472 176,177,178,179,180,181,182,183,
473 184,185,186,187,188,189,190,191,
474 224,225,226,227,228,229,230,231,
475 232,233,234,235,236,237,238,239,
476 240,241,242,243,244,245,246,215,
477 248,249,250,251,252,253,254,223,
478 192,193,194,195,196,197,198,199,
479 200,201,202,203,204,205,206,207,
480 208,209,210,211,212,213,214,247,
481 216,217,218,219,220,221,222,255,
482 0,62,0,0,1,0,0,0,
483 0,0,0,0,0,0,0,0,
484 32,0,0,0,1,0,0,0,
485 0,0,0,0,0,0,0,0,
486 0,0,0,0,0,0,255,3,
487 126,0,0,0,126,0,0,0,
488 0,0,0,0,0,0,0,0,
489 0,0,0,0,0,0,0,0,
490 0,0,0,0,0,0,255,3,
491 0,0,0,0,0,0,0,0,
492 0,0,0,0,0,0,12,2,
493 0,0,0,0,0,0,0,0,
494 0,0,0,0,0,0,0,0,
495 254,255,255,7,0,0,0,0,
496 0,0,0,0,0,0,0,0,
497 255,255,127,127,0,0,0,0,
498 0,0,0,0,0,0,0,0,
499 0,0,0,0,254,255,255,7,
500 0,0,0,0,0,4,32,4,
501 0,0,0,128,255,255,127,255,
502 0,0,0,0,0,0,255,3,
503 254,255,255,135,254,255,255,7,
504 0,0,0,0,0,4,44,6,
505 255,255,127,255,255,255,127,255,
506 0,0,0,0,254,255,255,255,
507 255,255,255,255,255,255,255,127,
508 0,0,0,0,254,255,255,255,
509 255,255,255,255,255,255,255,255,
510 0,2,0,0,255,255,255,255,
511 255,255,255,255,255,255,255,127,
512 0,0,0,0,255,255,255,255,
513 255,255,255,255,255,255,255,255,
514 0,0,0,0,254,255,0,252,
515 1,0,0,248,1,0,0,120,
516 0,0,0,0,254,255,255,255,
517 0,0,128,0,0,0,128,0,
518 255,255,255,255,0,0,0,0,
519 0,0,0,0,0,0,0,128,
520 255,255,255,255,0,0,0,0,
521 0,0,0,0,0,0,0,0,
522 128,0,0,0,0,0,0,0,
523 0,1,1,0,1,1,0,0,
524 0,0,0,0,0,0,0,0,
525 0,0,0,0,0,0,0,0,
526 1,0,0,0,128,0,0,0,
527 128,128,128,128,0,0,128,0,
528 28,28,28,28,28,28,28,28,
529 28,28,0,0,0,0,0,128,
530 0,26,26,26,26,26,26,18,
531 18,18,18,18,18,18,18,18,
532 18,18,18,18,18,18,18,18,
533 18,18,18,128,128,0,128,16,
534 0,26,26,26,26,26,26,18,
535 18,18,18,18,18,18,18,18,
536 18,18,18,18,18,18,18,18,
537 18,18,18,128,128,0,0,0,
538 0,0,0,0,0,1,0,0,
539 0,0,0,0,0,0,0,0,
540 0,0,0,0,0,0,0,0,
541 0,0,0,0,0,0,0,0,
542 1,0,0,0,0,0,0,0,
543 0,0,18,0,0,0,0,0,
544 0,0,20,20,0,18,0,0,
545 0,20,18,0,0,0,0,0,
546 18,18,18,18,18,18,18,18,
547 18,18,18,18,18,18,18,18,
548 18,18,18,18,18,18,18,0,
549 18,18,18,18,18,18,18,18,
550 18,18,18,18,18,18,18,18,
551 18,18,18,18,18,18,18,18,
552 18,18,18,18,18,18,18,0,
553 18,18,18,18,18,18,18,18
554 };
555
556
557
558
559 #ifndef HAVE_STRERROR
560 /*************************************************
561 * Provide strerror() for non-ANSI libraries *
562 *************************************************/
563
564 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
565 in their libraries, but can provide the same facility by this simple
566 alternative function. */
567
568 extern int sys_nerr;
569 extern char *sys_errlist[];
570
571 char *
572 strerror(int n)
573 {
574 if (n < 0 || n >= sys_nerr) return "unknown error number";
575 return sys_errlist[n];
576 }
577 #endif /* HAVE_STRERROR */
578
579
580
581
582 /*************************************************
583 * Read or extend an input line *
584 *************************************************/
585
586 /* Input lines are read into buffer, but both patterns and data lines can be
587 continued over multiple input lines. In addition, if the buffer fills up, we
588 want to automatically expand it so as to be able to handle extremely large
589 lines that are needed for certain stress tests. When the input buffer is
590 expanded, the other two buffers must also be expanded likewise, and the
591 contents of pbuffer, which are a copy of the input for callouts, must be
592 preserved (for when expansion happens for a data line). This is not the most
593 optimal way of handling this, but hey, this is just a test program!
594
595 Arguments:
596 f the file to read
597 start where in buffer to start (this *must* be within buffer)
598 prompt for stdin or readline()
599
600 Returns: pointer to the start of new data
601 could be a copy of start, or could be moved
602 NULL if no data read and EOF reached
603 */
604
605 static uschar *
606 extend_inputline(FILE *f, uschar *start, const char *prompt)
607 {
608 uschar *here = start;
609
610 for (;;)
611 {
612 int rlen = (int)(buffer_size - (here - buffer));
613
614 if (rlen > 1000)
615 {
616 int dlen;
617
618 /* If libreadline support is required, use readline() to read a line if the
619 input is a terminal. Note that readline() removes the trailing newline, so
620 we must put it back again, to be compatible with fgets(). */
621
622 #ifdef SUPPORT_LIBREADLINE
623 if (isatty(fileno(f)))
624 {
625 size_t len;
626 char *s = readline(prompt);
627 if (s == NULL) return (here == start)? NULL : start;
628 len = strlen(s);
629 if (len > 0) add_history(s);
630 if (len > rlen - 1) len = rlen - 1;
631 memcpy(here, s, len);
632 here[len] = '\n';
633 here[len+1] = 0;
634 free(s);
635 }
636 else
637 #endif
638
639 /* Read the next line by normal means, prompting if the file is stdin. */
640
641 {
642 if (f == stdin) printf("%s", prompt);
643 if (fgets((char *)here, rlen, f) == NULL)
644 return (here == start)? NULL : start;
645 }
646
647 dlen = (int)strlen((char *)here);
648 if (dlen > 0 && here[dlen - 1] == '\n') return start;
649 here += dlen;
650 }
651
652 else
653 {
654 int new_buffer_size = 2*buffer_size;
655 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
656 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
657 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
658
659 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
660 {
661 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
662 exit(1);
663 }
664
665 memcpy(new_buffer, buffer, buffer_size);
666 memcpy(new_pbuffer, pbuffer, buffer_size);
667
668 buffer_size = new_buffer_size;
669
670 start = new_buffer + (start - buffer);
671 here = new_buffer + (here - buffer);
672
673 free(buffer);
674 free(dbuffer);
675 free(pbuffer);
676
677 buffer = new_buffer;
678 dbuffer = new_dbuffer;
679 pbuffer = new_pbuffer;
680 }
681 }
682
683 return NULL; /* Control never gets here */
684 }
685
686
687
688
689
690
691
692 /*************************************************
693 * Read number from string *
694 *************************************************/
695
696 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
697 around with conditional compilation, just do the job by hand. It is only used
698 for unpicking arguments, so just keep it simple.
699
700 Arguments:
701 str string to be converted
702 endptr where to put the end pointer
703
704 Returns: the unsigned long
705 */
706
707 static int
708 get_value(unsigned char *str, unsigned char **endptr)
709 {
710 int result = 0;
711 while(*str != 0 && isspace(*str)) str++;
712 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
713 *endptr = str;
714 return(result);
715 }
716
717
718
719
720 /*************************************************
721 * Convert UTF-8 string to value *
722 *************************************************/
723
724 /* This function takes one or more bytes that represents a UTF-8 character,
725 and returns the value of the character.
726
727 Argument:
728 utf8bytes a pointer to the byte vector
729 vptr a pointer to an int to receive the value
730
731 Returns: > 0 => the number of bytes consumed
732 -6 to 0 => malformed UTF-8 character at offset = (-return)
733 */
734
735 #if !defined NOUTF8
736
737 static int
738 utf82ord(unsigned char *utf8bytes, int *vptr)
739 {
740 int c = *utf8bytes++;
741 int d = c;
742 int i, j, s;
743
744 for (i = -1; i < 6; i++) /* i is number of additional bytes */
745 {
746 if ((d & 0x80) == 0) break;
747 d <<= 1;
748 }
749
750 if (i == -1) { *vptr = c; return 1; } /* ascii character */
751 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
752
753 /* i now has a value in the range 1-5 */
754
755 s = 6*i;
756 d = (c & utf8_table3[i]) << s;
757
758 for (j = 0; j < i; j++)
759 {
760 c = *utf8bytes++;
761 if ((c & 0xc0) != 0x80) return -(j+1);
762 s -= 6;
763 d |= (c & 0x3f) << s;
764 }
765
766 /* Check that encoding was the correct unique one */
767
768 for (j = 0; j < utf8_table1_size; j++)
769 if (d <= utf8_table1[j]) break;
770 if (j != i) return -(i+1);
771
772 /* Valid value */
773
774 *vptr = d;
775 return i+1;
776 }
777
778 #endif
779
780
781
782 /*************************************************
783 * Convert character value to UTF-8 *
784 *************************************************/
785
786 /* This function takes an integer value in the range 0 - 0x7fffffff
787 and encodes it as a UTF-8 character in 0 to 6 bytes.
788
789 Arguments:
790 cvalue the character value
791 utf8bytes pointer to buffer for result - at least 6 bytes long
792
793 Returns: number of characters placed in the buffer
794 */
795
796 #if !defined NOUTF8
797
798 static int
799 ord2utf8(int cvalue, uschar *utf8bytes)
800 {
801 register int i, j;
802 for (i = 0; i < utf8_table1_size; i++)
803 if (cvalue <= utf8_table1[i]) break;
804 utf8bytes += i;
805 for (j = i; j > 0; j--)
806 {
807 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
808 cvalue >>= 6;
809 }
810 *utf8bytes = utf8_table2[i] | cvalue;
811 return i + 1;
812 }
813
814 #endif
815
816
817
818 /*************************************************
819 * Print character string *
820 *************************************************/
821
822 /* Character string printing function. Must handle UTF-8 strings in utf8
823 mode. Yields number of characters printed. If handed a NULL file, just counts
824 chars without printing. */
825
826 static int pchars(unsigned char *p, int length, FILE *f)
827 {
828 int c = 0;
829 int yield = 0;
830
831 while (length-- > 0)
832 {
833 #if !defined NOUTF8
834 if (use_utf8)
835 {
836 int rc = utf82ord(p, &c);
837
838 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
839 {
840 length -= rc - 1;
841 p += rc;
842 if (PRINTHEX(c))
843 {
844 if (f != NULL) fprintf(f, "%c", c);
845 yield++;
846 }
847 else
848 {
849 int n = 4;
850 if (f != NULL) fprintf(f, "\\x{%02x}", c);
851 yield += (n <= 0x000000ff)? 2 :
852 (n <= 0x00000fff)? 3 :
853 (n <= 0x0000ffff)? 4 :
854 (n <= 0x000fffff)? 5 : 6;
855 }
856 continue;
857 }
858 }
859 #endif
860
861 /* Not UTF-8, or malformed UTF-8 */
862
863 c = *p++;
864 if (PRINTHEX(c))
865 {
866 if (f != NULL) fprintf(f, "%c", c);
867 yield++;
868 }
869 else
870 {
871 if (f != NULL) fprintf(f, "\\x%02x", c);
872 yield += 4;
873 }
874 }
875
876 return yield;
877 }
878
879
880
881 /*************************************************
882 * Callout function *
883 *************************************************/
884
885 /* Called from PCRE as a result of the (?C) item. We print out where we are in
886 the match. Yield zero unless more callouts than the fail count, or the callout
887 data is not zero. */
888
889 static int callout(pcre_callout_block *cb)
890 {
891 FILE *f = (first_callout | callout_extra)? outfile : NULL;
892 int i, pre_start, post_start, subject_length;
893
894 if (callout_extra)
895 {
896 fprintf(f, "Callout %d: last capture = %d\n",
897 cb->callout_number, cb->capture_last);
898
899 for (i = 0; i < cb->capture_top * 2; i += 2)
900 {
901 if (cb->offset_vector[i] < 0)
902 fprintf(f, "%2d: <unset>\n", i/2);
903 else
904 {
905 fprintf(f, "%2d: ", i/2);
906 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
907 cb->offset_vector[i+1] - cb->offset_vector[i], f);
908 fprintf(f, "\n");
909 }
910 }
911 }
912
913 /* Re-print the subject in canonical form, the first time or if giving full
914 datails. On subsequent calls in the same match, we use pchars just to find the
915 printed lengths of the substrings. */
916
917 if (f != NULL) fprintf(f, "--->");
918
919 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
920 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
921 cb->current_position - cb->start_match, f);
922
923 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
924
925 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
926 cb->subject_length - cb->current_position, f);
927
928 if (f != NULL) fprintf(f, "\n");
929
930 /* Always print appropriate indicators, with callout number if not already
931 shown. For automatic callouts, show the pattern offset. */
932
933 if (cb->callout_number == 255)
934 {
935 fprintf(outfile, "%+3d ", cb->pattern_position);
936 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
937 }
938 else
939 {
940 if (callout_extra) fprintf(outfile, " ");
941 else fprintf(outfile, "%3d ", cb->callout_number);
942 }
943
944 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
945 fprintf(outfile, "^");
946
947 if (post_start > 0)
948 {
949 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
950 fprintf(outfile, "^");
951 }
952
953 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
954 fprintf(outfile, " ");
955
956 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
957 pbuffer + cb->pattern_position);
958
959 fprintf(outfile, "\n");
960 first_callout = 0;
961
962 if (cb->callout_data != NULL)
963 {
964 int callout_data = *((int *)(cb->callout_data));
965 if (callout_data != 0)
966 {
967 fprintf(outfile, "Callout data = %d\n", callout_data);
968 return callout_data;
969 }
970 }
971
972 return (cb->callout_number != callout_fail_id)? 0 :
973 (++callout_count >= callout_fail_count)? 1 : 0;
974 }
975
976
977 /*************************************************
978 * Local malloc functions *
979 *************************************************/
980
981 /* Alternative malloc function, to test functionality and show the size of the
982 compiled re. */
983
984 static void *new_malloc(size_t size)
985 {
986 void *block = malloc(size);
987 gotten_store = size;
988 if (show_malloc)
989 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
990 return block;
991 }
992
993 static void new_free(void *block)
994 {
995 if (show_malloc)
996 fprintf(outfile, "free %p\n", block);
997 free(block);
998 }
999
1000
1001 /* For recursion malloc/free, to test stacking calls */
1002
1003 static void *stack_malloc(size_t size)
1004 {
1005 void *block = malloc(size);
1006 if (show_malloc)
1007 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1008 return block;
1009 }
1010
1011 static void stack_free(void *block)
1012 {
1013 if (show_malloc)
1014 fprintf(outfile, "stack_free %p\n", block);
1015 free(block);
1016 }
1017
1018
1019 /*************************************************
1020 * Call pcre_fullinfo() *
1021 *************************************************/
1022
1023 /* Get one piece of information from the pcre_fullinfo() function */
1024
1025 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1026 {
1027 int rc;
1028 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1029 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1030 }
1031
1032
1033
1034 /*************************************************
1035 * Byte flipping function *
1036 *************************************************/
1037
1038 static unsigned long int
1039 byteflip(unsigned long int value, int n)
1040 {
1041 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1042 return ((value & 0x000000ff) << 24) |
1043 ((value & 0x0000ff00) << 8) |
1044 ((value & 0x00ff0000) >> 8) |
1045 ((value & 0xff000000) >> 24);
1046 }
1047
1048
1049
1050
1051 /*************************************************
1052 * Check match or recursion limit *
1053 *************************************************/
1054
1055 static int
1056 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1057 int start_offset, int options, int *use_offsets, int use_size_offsets,
1058 int flag, unsigned long int *limit, int errnumber, const char *msg)
1059 {
1060 int count;
1061 int min = 0;
1062 int mid = 64;
1063 int max = -1;
1064
1065 extra->flags |= flag;
1066
1067 for (;;)
1068 {
1069 *limit = mid;
1070
1071 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1072 use_offsets, use_size_offsets);
1073
1074 if (count == errnumber)
1075 {
1076 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1077 min = mid;
1078 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1079 }
1080
1081 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1082 count == PCRE_ERROR_PARTIAL)
1083 {
1084 if (mid == min + 1)
1085 {
1086 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1087 break;
1088 }
1089 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1090 max = mid;
1091 mid = (min + mid)/2;
1092 }
1093 else break; /* Some other error */
1094 }
1095
1096 extra->flags &= ~flag;
1097 return count;
1098 }
1099
1100
1101
1102 /*************************************************
1103 * Case-independent strncmp() function *
1104 *************************************************/
1105
1106 /*
1107 Arguments:
1108 s first string
1109 t second string
1110 n number of characters to compare
1111
1112 Returns: < 0, = 0, or > 0, according to the comparison
1113 */
1114
1115 static int
1116 strncmpic(uschar *s, uschar *t, int n)
1117 {
1118 while (n--)
1119 {
1120 int c = tolower(*s++) - tolower(*t++);
1121 if (c) return c;
1122 }
1123 return 0;
1124 }
1125
1126
1127
1128 /*************************************************
1129 * Check newline indicator *
1130 *************************************************/
1131
1132 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1133 a message and return 0 if there is no match.
1134
1135 Arguments:
1136 p points after the leading '<'
1137 f file for error message
1138
1139 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1140 */
1141
1142 static int
1143 check_newline(uschar *p, FILE *f)
1144 {
1145 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1146 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1147 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1148 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1149 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1150 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1151 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1152 fprintf(f, "Unknown newline type at: <%s\n", p);
1153 return 0;
1154 }
1155
1156
1157
1158 /*************************************************
1159 * Usage function *
1160 *************************************************/
1161
1162 static void
1163 usage(void)
1164 {
1165 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1166 printf("Input and output default to stdin and stdout.\n");
1167 #ifdef SUPPORT_LIBREADLINE
1168 printf("If input is a terminal, readline() is used to read from it.\n");
1169 #else
1170 printf("This version of pcretest is not linked with readline().\n");
1171 #endif
1172 printf("\nOptions:\n");
1173 printf(" -b show compiled code (bytecode)\n");
1174 printf(" -C show PCRE compile-time options and exit\n");
1175 printf(" -d debug: show compiled code and information (-b and -i)\n");
1176 #if !defined NODFA
1177 printf(" -dfa force DFA matching for all subjects\n");
1178 #endif
1179 printf(" -help show usage information\n");
1180 printf(" -i show information about compiled patterns\n"
1181 " -M find MATCH_LIMIT minimum for each subject\n"
1182 " -m output memory used information\n"
1183 " -o <n> set size of offsets vector to <n>\n");
1184 #if !defined NOPOSIX
1185 printf(" -p use POSIX interface\n");
1186 #endif
1187 printf(" -q quiet: do not output PCRE version number at start\n");
1188 printf(" -S <n> set stack size to <n> megabytes\n");
1189 printf(" -s output store (memory) used information\n"
1190 " -t time compilation and execution\n");
1191 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1192 printf(" -tm time execution (matching) only\n");
1193 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1194 }
1195
1196
1197
1198 /*************************************************
1199 * Main Program *
1200 *************************************************/
1201
1202 /* Read lines from named file or stdin and write to named file or stdout; lines
1203 consist of a regular expression, in delimiters and optionally followed by
1204 options, followed by a set of test data, terminated by an empty line. */
1205
1206 int main(int argc, char **argv)
1207 {
1208 FILE *infile = stdin;
1209 int options = 0;
1210 int study_options = 0;
1211 int default_find_match_limit = FALSE;
1212 int op = 1;
1213 int timeit = 0;
1214 int timeitm = 0;
1215 int showinfo = 0;
1216 int showstore = 0;
1217 int quiet = 0;
1218 int size_offsets = 45;
1219 int size_offsets_max;
1220 int *offsets = NULL;
1221 #if !defined NOPOSIX
1222 int posix = 0;
1223 #endif
1224 int debug = 0;
1225 int done = 0;
1226 int all_use_dfa = 0;
1227 int yield = 0;
1228 int stack_size;
1229
1230 /* These vectors store, end-to-end, a list of captured substring names. Assume
1231 that 1024 is plenty long enough for the few names we'll be testing. */
1232
1233 uschar copynames[1024];
1234 uschar getnames[1024];
1235
1236 uschar *copynamesptr;
1237 uschar *getnamesptr;
1238
1239 /* Get buffers from malloc() so that Electric Fence will check their misuse
1240 when I am debugging. They grow automatically when very long lines are read. */
1241
1242 buffer = (unsigned char *)malloc(buffer_size);
1243 dbuffer = (unsigned char *)malloc(buffer_size);
1244 pbuffer = (unsigned char *)malloc(buffer_size);
1245
1246 /* The outfile variable is static so that new_malloc can use it. */
1247
1248 outfile = stdout;
1249
1250 /* The following _setmode() stuff is some Windows magic that tells its runtime
1251 library to translate CRLF into a single LF character. At least, that's what
1252 I've been told: never having used Windows I take this all on trust. Originally
1253 it set 0x8000, but then I was advised that _O_BINARY was better. */
1254
1255 #if defined(_WIN32) || defined(WIN32)
1256 _setmode( _fileno( stdout ), _O_BINARY );
1257 #endif
1258
1259 /* Scan options */
1260
1261 while (argc > 1 && argv[op][0] == '-')
1262 {
1263 unsigned char *endptr;
1264
1265 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1266 showstore = 1;
1267 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1268 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1269 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1270 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1271 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1272 #if !defined NODFA
1273 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1274 #endif
1275 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1276 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1277 *endptr == 0))
1278 {
1279 op++;
1280 argc--;
1281 }
1282 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1283 {
1284 int both = argv[op][2] == 0;
1285 int temp;
1286 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1287 *endptr == 0))
1288 {
1289 timeitm = temp;
1290 op++;
1291 argc--;
1292 }
1293 else timeitm = LOOPREPEAT;
1294 if (both) timeit = timeitm;
1295 }
1296 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1297 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1298 *endptr == 0))
1299 {
1300 #if defined(_WIN32) || defined(WIN32)
1301 printf("PCRE: -S not supported on this OS\n");
1302 exit(1);
1303 #else
1304 int rc;
1305 struct rlimit rlim;
1306 getrlimit(RLIMIT_STACK, &rlim);
1307 rlim.rlim_cur = stack_size * 1024 * 1024;
1308 rc = setrlimit(RLIMIT_STACK, &rlim);
1309 if (rc != 0)
1310 {
1311 printf("PCRE: setrlimit() failed with error %d\n", rc);
1312 exit(1);
1313 }
1314 op++;
1315 argc--;
1316 #endif
1317 }
1318 #if !defined NOPOSIX
1319 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1320 #endif
1321 else if (strcmp(argv[op], "-C") == 0)
1322 {
1323 int rc;
1324 unsigned long int lrc;
1325 printf("PCRE version %s\n", pcre_version());
1326 printf("Compiled with\n");
1327 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1328 printf(" %sUTF-8 support\n", rc? "" : "No ");
1329 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1330 printf(" %sUnicode properties support\n", rc? "" : "No ");
1331 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1332 /* Note that these values are always the ASCII values, even
1333 in EBCDIC environments. CR is 13 and NL is 10. */
1334 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1335 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1336 (rc == -2)? "ANYCRLF" :
1337 (rc == -1)? "ANY" : "???");
1338 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1339 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1340 "all Unicode newlines");
1341 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1342 printf(" Internal link size = %d\n", rc);
1343 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1344 printf(" POSIX malloc threshold = %d\n", rc);
1345 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1346 printf(" Default match limit = %ld\n", lrc);
1347 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1348 printf(" Default recursion depth limit = %ld\n", lrc);
1349 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1350 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1351 goto EXIT;
1352 }
1353 else if (strcmp(argv[op], "-help") == 0 ||
1354 strcmp(argv[op], "--help") == 0)
1355 {
1356 usage();
1357 goto EXIT;
1358 }
1359 else
1360 {
1361 printf("** Unknown or malformed option %s\n", argv[op]);
1362 usage();
1363 yield = 1;
1364 goto EXIT;
1365 }
1366 op++;
1367 argc--;
1368 }
1369
1370 /* Get the store for the offsets vector, and remember what it was */
1371
1372 size_offsets_max = size_offsets;
1373 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1374 if (offsets == NULL)
1375 {
1376 printf("** Failed to get %d bytes of memory for offsets vector\n",
1377 (int)(size_offsets_max * sizeof(int)));
1378 yield = 1;
1379 goto EXIT;
1380 }
1381
1382 /* Sort out the input and output files */
1383
1384 if (argc > 1)
1385 {
1386 infile = fopen(argv[op], INPUT_MODE);
1387 if (infile == NULL)
1388 {
1389 printf("** Failed to open %s\n", argv[op]);
1390 yield = 1;
1391 goto EXIT;
1392 }
1393 }
1394
1395 if (argc > 2)
1396 {
1397 outfile = fopen(argv[op+1], OUTPUT_MODE);
1398 if (outfile == NULL)
1399 {
1400 printf("** Failed to open %s\n", argv[op+1]);
1401 yield = 1;
1402 goto EXIT;
1403 }
1404 }
1405
1406 /* Set alternative malloc function */
1407
1408 pcre_malloc = new_malloc;
1409 pcre_free = new_free;
1410 pcre_stack_malloc = stack_malloc;
1411 pcre_stack_free = stack_free;
1412
1413 /* Heading line unless quiet, then prompt for first regex if stdin */
1414
1415 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1416
1417 /* Main loop */
1418
1419 while (!done)
1420 {
1421 pcre *re = NULL;
1422 pcre_extra *extra = NULL;
1423
1424 #if !defined NOPOSIX /* There are still compilers that require no indent */
1425 regex_t preg;
1426 int do_posix = 0;
1427 #endif
1428
1429 const char *error;
1430 unsigned char *markptr;
1431 unsigned char *p, *pp, *ppp;
1432 unsigned char *to_file = NULL;
1433 const unsigned char *tables = NULL;
1434 unsigned long int true_size, true_study_size = 0;
1435 size_t size, regex_gotten_store;
1436 int do_mark = 0;
1437 int do_study = 0;
1438 int do_debug = debug;
1439 int do_G = 0;
1440 int do_g = 0;
1441 int do_showinfo = showinfo;
1442 int do_showrest = 0;
1443 int do_flip = 0;
1444 int erroroffset, len, delimiter, poffset;
1445
1446 use_utf8 = 0;
1447 debug_lengths = 1;
1448
1449 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1450 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1451 fflush(outfile);
1452
1453 p = buffer;
1454 while (isspace(*p)) p++;
1455 if (*p == 0) continue;
1456
1457 /* See if the pattern is to be loaded pre-compiled from a file. */
1458
1459 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1460 {
1461 unsigned long int magic, get_options;
1462 uschar sbuf[8];
1463 FILE *f;
1464
1465 p++;
1466 pp = p + (int)strlen((char *)p);
1467 while (isspace(pp[-1])) pp--;
1468 *pp = 0;
1469
1470 f = fopen((char *)p, "rb");
1471 if (f == NULL)
1472 {
1473 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1474 continue;
1475 }
1476
1477 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1478
1479 true_size =
1480 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1481 true_study_size =
1482 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1483
1484 re = (real_pcre *)new_malloc(true_size);
1485 regex_gotten_store = gotten_store;
1486
1487 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1488
1489 magic = ((real_pcre *)re)->magic_number;
1490 if (magic != MAGIC_NUMBER)
1491 {
1492 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1493 {
1494 do_flip = 1;
1495 }
1496 else
1497 {
1498 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1499 fclose(f);
1500 continue;
1501 }
1502 }
1503
1504 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1505 do_flip? " (byte-inverted)" : "", p);
1506
1507 /* Need to know if UTF-8 for printing data strings */
1508
1509 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1510 use_utf8 = (get_options & PCRE_UTF8) != 0;
1511
1512 /* Now see if there is any following study data */
1513
1514 if (true_study_size != 0)
1515 {
1516 pcre_study_data *psd;
1517
1518 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1519 extra->flags = PCRE_EXTRA_STUDY_DATA;
1520
1521 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1522 extra->study_data = psd;
1523
1524 if (fread(psd, 1, true_study_size, f) != true_study_size)
1525 {
1526 FAIL_READ:
1527 fprintf(outfile, "Failed to read data from %s\n", p);
1528 if (extra != NULL) new_free(extra);
1529 if (re != NULL) new_free(re);
1530 fclose(f);
1531 continue;
1532 }
1533 fprintf(outfile, "Study data loaded from %s\n", p);
1534 do_study = 1; /* To get the data output if requested */
1535 }
1536 else fprintf(outfile, "No study data\n");
1537
1538 fclose(f);
1539 goto SHOW_INFO;
1540 }
1541
1542 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1543 the pattern; if is isn't complete, read more. */
1544
1545 delimiter = *p++;
1546
1547 if (isalnum(delimiter) || delimiter == '\\')
1548 {
1549 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1550 goto SKIP_DATA;
1551 }
1552
1553 pp = p;
1554 poffset = (int)(p - buffer);
1555
1556 for(;;)
1557 {
1558 while (*pp != 0)
1559 {
1560 if (*pp == '\\' && pp[1] != 0) pp++;
1561 else if (*pp == delimiter) break;
1562 pp++;
1563 }
1564 if (*pp != 0) break;
1565 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1566 {
1567 fprintf(outfile, "** Unexpected EOF\n");
1568 done = 1;
1569 goto CONTINUE;
1570 }
1571 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1572 }
1573
1574 /* The buffer may have moved while being extended; reset the start of data
1575 pointer to the correct relative point in the buffer. */
1576
1577 p = buffer + poffset;
1578
1579 /* If the first character after the delimiter is backslash, make
1580 the pattern end with backslash. This is purely to provide a way
1581 of testing for the error message when a pattern ends with backslash. */
1582
1583 if (pp[1] == '\\') *pp++ = '\\';
1584
1585 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1586 for callouts. */
1587
1588 *pp++ = 0;
1589 strcpy((char *)pbuffer, (char *)p);
1590
1591 /* Look for options after final delimiter */
1592
1593 options = 0;
1594 study_options = 0;
1595 log_store = showstore; /* default from command line */
1596
1597 while (*pp != 0)
1598 {
1599 switch (*pp++)
1600 {
1601 case 'f': options |= PCRE_FIRSTLINE; break;
1602 case 'g': do_g = 1; break;
1603 case 'i': options |= PCRE_CASELESS; break;
1604 case 'm': options |= PCRE_MULTILINE; break;
1605 case 's': options |= PCRE_DOTALL; break;
1606 case 'x': options |= PCRE_EXTENDED; break;
1607
1608 case '+': do_showrest = 1; break;
1609 case 'A': options |= PCRE_ANCHORED; break;
1610 case 'B': do_debug = 1; break;
1611 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1612 case 'D': do_debug = do_showinfo = 1; break;
1613 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1614 case 'F': do_flip = 1; break;
1615 case 'G': do_G = 1; break;
1616 case 'I': do_showinfo = 1; break;
1617 case 'J': options |= PCRE_DUPNAMES; break;
1618 case 'K': do_mark = 1; break;
1619 case 'M': log_store = 1; break;
1620 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1621
1622 #if !defined NOPOSIX
1623 case 'P': do_posix = 1; break;
1624 #endif
1625
1626 case 'S': do_study = 1; break;
1627 case 'U': options |= PCRE_UNGREEDY; break;
1628 case 'W': options |= PCRE_UCP; break;
1629 case 'X': options |= PCRE_EXTRA; break;
1630 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1631 case 'Z': debug_lengths = 0; break;
1632 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1633 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1634
1635 case 'T':
1636 switch (*pp++)
1637 {
1638 case '0': tables = tables0; break;
1639 case '1': tables = tables1; break;
1640
1641 case '\r':
1642 case '\n':
1643 case ' ':
1644 case 0:
1645 fprintf(outfile, "** Missing table number after /T\n");
1646 goto SKIP_DATA;
1647
1648 default:
1649 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1650 goto SKIP_DATA;
1651 }
1652 break;
1653
1654 case 'L':
1655 ppp = pp;
1656 /* The '\r' test here is so that it works on Windows. */
1657 /* The '0' test is just in case this is an unterminated line. */
1658 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1659 *ppp = 0;
1660 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1661 {
1662 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1663 goto SKIP_DATA;
1664 }
1665 locale_set = 1;
1666 tables = pcre_maketables();
1667 pp = ppp;
1668 break;
1669
1670 case '>':
1671 to_file = pp;
1672 while (*pp != 0) pp++;
1673 while (isspace(pp[-1])) pp--;
1674 *pp = 0;
1675 break;
1676
1677 case '<':
1678 {
1679 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1680 {
1681 options |= PCRE_JAVASCRIPT_COMPAT;
1682 pp += 3;
1683 }
1684 else
1685 {
1686 int x = check_newline(pp, outfile);
1687 if (x == 0) goto SKIP_DATA;
1688 options |= x;
1689 while (*pp++ != '>');
1690 }
1691 }
1692 break;
1693
1694 case '\r': /* So that it works in Windows */
1695 case '\n':
1696 case ' ':
1697 break;
1698
1699 default:
1700 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1701 goto SKIP_DATA;
1702 }
1703 }
1704
1705 /* Handle compiling via the POSIX interface, which doesn't support the
1706 timing, showing, or debugging options, nor the ability to pass over
1707 local character tables. */
1708
1709 #if !defined NOPOSIX
1710 if (posix || do_posix)
1711 {
1712 int rc;
1713 int cflags = 0;
1714
1715 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1716 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1717 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1718 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1719 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1720 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1721 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1722
1723 rc = regcomp(&preg, (char *)p, cflags);
1724
1725 /* Compilation failed; go back for another re, skipping to blank line
1726 if non-interactive. */
1727
1728 if (rc != 0)
1729 {
1730 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1731 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1732 goto SKIP_DATA;
1733 }
1734 }
1735
1736 /* Handle compiling via the native interface */
1737
1738 else
1739 #endif /* !defined NOPOSIX */
1740
1741 {
1742 unsigned long int get_options;
1743
1744 if (timeit > 0)
1745 {
1746 register int i;
1747 clock_t time_taken;
1748 clock_t start_time = clock();
1749 for (i = 0; i < timeit; i++)
1750 {
1751 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1752 if (re != NULL) free(re);
1753 }
1754 time_taken = clock() - start_time;
1755 fprintf(outfile, "Compile time %.4f milliseconds\n",
1756 (((double)time_taken * 1000.0) / (double)timeit) /
1757 (double)CLOCKS_PER_SEC);
1758 }
1759
1760 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1761
1762 /* Compilation failed; go back for another re, skipping to blank line
1763 if non-interactive. */
1764
1765 if (re == NULL)
1766 {
1767 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1768 SKIP_DATA:
1769 if (infile != stdin)
1770 {
1771 for (;;)
1772 {
1773 if (extend_inputline(infile, buffer, NULL) == NULL)
1774 {
1775 done = 1;
1776 goto CONTINUE;
1777 }
1778 len = (int)strlen((char *)buffer);
1779 while (len > 0 && isspace(buffer[len-1])) len--;
1780 if (len == 0) break;
1781 }
1782 fprintf(outfile, "\n");
1783 }
1784 goto CONTINUE;
1785 }
1786
1787 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1788 within the regex; check for this so that we know how to process the data
1789 lines. */
1790
1791 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1792 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1793
1794 /* Print information if required. There are now two info-returning
1795 functions. The old one has a limited interface and returns only limited
1796 data. Check that it agrees with the newer one. */
1797
1798 if (log_store)
1799 fprintf(outfile, "Memory allocation (code space): %d\n",
1800 (int)(gotten_store -
1801 sizeof(real_pcre) -
1802 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1803
1804 /* Extract the size for possible writing before possibly flipping it,
1805 and remember the store that was got. */
1806
1807 true_size = ((real_pcre *)re)->size;
1808 regex_gotten_store = gotten_store;
1809
1810 /* If /S was present, study the regexp to generate additional info to
1811 help with the matching. */
1812
1813 if (do_study)
1814 {
1815 if (timeit > 0)
1816 {
1817 register int i;
1818 clock_t time_taken;
1819 clock_t start_time = clock();
1820 for (i = 0; i < timeit; i++)
1821 extra = pcre_study(re, study_options, &error);
1822 time_taken = clock() - start_time;
1823 if (extra != NULL) free(extra);
1824 fprintf(outfile, " Study time %.4f milliseconds\n",
1825 (((double)time_taken * 1000.0) / (double)timeit) /
1826 (double)CLOCKS_PER_SEC);
1827 }
1828 extra = pcre_study(re, study_options, &error);
1829 if (error != NULL)
1830 fprintf(outfile, "Failed to study: %s\n", error);
1831 else if (extra != NULL)
1832 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1833 }
1834
1835 /* If /K was present, we set up for handling MARK data. */
1836
1837 if (do_mark)
1838 {
1839 if (extra == NULL)
1840 {
1841 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1842 extra->flags = 0;
1843 }
1844 extra->mark = &markptr;
1845 extra->flags |= PCRE_EXTRA_MARK;
1846 }
1847
1848 /* If the 'F' option was present, we flip the bytes of all the integer
1849 fields in the regex data block and the study block. This is to make it
1850 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1851 compiled on a different architecture. */
1852
1853 if (do_flip)
1854 {
1855 real_pcre *rre = (real_pcre *)re;
1856 rre->magic_number =
1857 byteflip(rre->magic_number, sizeof(rre->magic_number));
1858 rre->size = byteflip(rre->size, sizeof(rre->size));
1859 rre->options = byteflip(rre->options, sizeof(rre->options));
1860 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1861 rre->top_bracket =
1862 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1863 rre->top_backref =
1864 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1865 rre->first_byte =
1866 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1867 rre->req_byte =
1868 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1869 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1870 sizeof(rre->name_table_offset));
1871 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1872 sizeof(rre->name_entry_size));
1873 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1874 sizeof(rre->name_count));
1875
1876 if (extra != NULL)
1877 {
1878 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1879 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1880 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1881 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1882 }
1883 }
1884
1885 /* Extract information from the compiled data if required */
1886
1887 SHOW_INFO:
1888
1889 if (do_debug)
1890 {
1891 fprintf(outfile, "------------------------------------------------------------------\n");
1892 pcre_printint(re, outfile, debug_lengths);
1893 }
1894
1895 /* We already have the options in get_options (see above) */
1896
1897 if (do_showinfo)
1898 {
1899 unsigned long int all_options;
1900 #if !defined NOINFOCHECK
1901 int old_first_char, old_options, old_count;
1902 #endif
1903 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1904 hascrorlf;
1905 int nameentrysize, namecount;
1906 const uschar *nametable;
1907
1908 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1909 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1910 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1911 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1912 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1913 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1914 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1915 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1916 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1917 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1918 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1919
1920 #if !defined NOINFOCHECK
1921 old_count = pcre_info(re, &old_options, &old_first_char);
1922 if (count < 0) fprintf(outfile,
1923 "Error %d from pcre_info()\n", count);
1924 else
1925 {
1926 if (old_count != count) fprintf(outfile,
1927 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1928 old_count);
1929
1930 if (old_first_char != first_char) fprintf(outfile,
1931 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1932 first_char, old_first_char);
1933
1934 if (old_options != (int)get_options) fprintf(outfile,
1935 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1936 get_options, old_options);
1937 }
1938 #endif
1939
1940 if (size != regex_gotten_store) fprintf(outfile,
1941 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1942 (int)size, (int)regex_gotten_store);
1943
1944 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1945 if (backrefmax > 0)
1946 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1947
1948 if (namecount > 0)
1949 {
1950 fprintf(outfile, "Named capturing subpatterns:\n");
1951 while (namecount-- > 0)
1952 {
1953 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1954 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1955 GET2(nametable, 0));
1956 nametable += nameentrysize;
1957 }
1958 }
1959
1960 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1961 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1962
1963 all_options = ((real_pcre *)re)->options;
1964 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1965
1966 if (get_options == 0) fprintf(outfile, "No options\n");
1967 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1968 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1969 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1970 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1971 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1972 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1973 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1974 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1975 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1976 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1977 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1978 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1979 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1980 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1981 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1982 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1983 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1984 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1985
1986 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1987
1988 switch (get_options & PCRE_NEWLINE_BITS)
1989 {
1990 case PCRE_NEWLINE_CR:
1991 fprintf(outfile, "Forced newline sequence: CR\n");
1992 break;
1993
1994 case PCRE_NEWLINE_LF:
1995 fprintf(outfile, "Forced newline sequence: LF\n");
1996 break;
1997
1998 case PCRE_NEWLINE_CRLF:
1999 fprintf(outfile, "Forced newline sequence: CRLF\n");
2000 break;
2001
2002 case PCRE_NEWLINE_ANYCRLF:
2003 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2004 break;
2005
2006 case PCRE_NEWLINE_ANY:
2007 fprintf(outfile, "Forced newline sequence: ANY\n");
2008 break;
2009
2010 default:
2011 break;
2012 }
2013
2014 if (first_char == -1)
2015 {
2016 fprintf(outfile, "First char at start or follows newline\n");
2017 }
2018 else if (first_char < 0)
2019 {
2020 fprintf(outfile, "No first char\n");
2021 }
2022 else
2023 {
2024 int ch = first_char & 255;
2025 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2026 "" : " (caseless)";
2027 if (PRINTHEX(ch))
2028 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2029 else
2030 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2031 }
2032
2033 if (need_char < 0)
2034 {
2035 fprintf(outfile, "No need char\n");
2036 }
2037 else
2038 {
2039 int ch = need_char & 255;
2040 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2041 "" : " (caseless)";
2042 if (PRINTHEX(ch))
2043 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2044 else
2045 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2046 }
2047
2048 /* Don't output study size; at present it is in any case a fixed
2049 value, but it varies, depending on the computer architecture, and
2050 so messes up the test suite. (And with the /F option, it might be
2051 flipped.) */
2052
2053 if (do_study)
2054 {
2055 if (extra == NULL)
2056 fprintf(outfile, "Study returned NULL\n");
2057 else
2058 {
2059 uschar *start_bits = NULL;
2060 int minlength;
2061
2062 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2063 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2064
2065 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2066 if (start_bits == NULL)
2067 fprintf(outfile, "No set of starting bytes\n");
2068 else
2069 {
2070 int i;
2071 int c = 24;
2072 fprintf(outfile, "Starting byte set: ");
2073 for (i = 0; i < 256; i++)
2074 {
2075 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2076 {
2077 if (c > 75)
2078 {
2079 fprintf(outfile, "\n ");
2080 c = 2;
2081 }
2082 if (PRINTHEX(i) && i != ' ')
2083 {
2084 fprintf(outfile, "%c ", i);
2085 c += 2;
2086 }
2087 else
2088 {
2089 fprintf(outfile, "\\x%02x ", i);
2090 c += 5;
2091 }
2092 }
2093 }
2094 fprintf(outfile, "\n");
2095 }
2096 }
2097 }
2098 }
2099
2100 /* If the '>' option was present, we write out the regex to a file, and
2101 that is all. The first 8 bytes of the file are the regex length and then
2102 the study length, in big-endian order. */
2103
2104 if (to_file != NULL)
2105 {
2106 FILE *f = fopen((char *)to_file, "wb");
2107 if (f == NULL)
2108 {
2109 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2110 }
2111 else
2112 {
2113 uschar sbuf[8];
2114 sbuf[0] = (uschar)((true_size >> 24) & 255);
2115 sbuf[1] = (uschar)((true_size >> 16) & 255);
2116 sbuf[2] = (uschar)((true_size >> 8) & 255);
2117 sbuf[3] = (uschar)((true_size) & 255);
2118
2119 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2120 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2121 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2122 sbuf[7] = (uschar)((true_study_size) & 255);
2123
2124 if (fwrite(sbuf, 1, 8, f) < 8 ||
2125 fwrite(re, 1, true_size, f) < true_size)
2126 {
2127 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2128 }
2129 else
2130 {
2131 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2132 if (extra != NULL)
2133 {
2134 if (fwrite(extra->study_data, 1, true_study_size, f) <
2135 true_study_size)
2136 {
2137 fprintf(outfile, "Write error on %s: %s\n", to_file,
2138 strerror(errno));
2139 }
2140 else fprintf(outfile, "Study data written to %s\n", to_file);
2141
2142 }
2143 }
2144 fclose(f);
2145 }
2146
2147 new_free(re);
2148 if (extra != NULL) new_free(extra);
2149 if (locale_set)
2150 {
2151 new_free((void *)tables);
2152 setlocale(LC_CTYPE, "C");
2153 locale_set = 0;
2154 }
2155 continue; /* With next regex */
2156 }
2157 } /* End of non-POSIX compile */
2158
2159 /* Read data lines and test them */
2160
2161 for (;;)
2162 {
2163 uschar *q;
2164 uschar *bptr;
2165 int *use_offsets = offsets;
2166 int use_size_offsets = size_offsets;
2167 int callout_data = 0;
2168 int callout_data_set = 0;
2169 int count, c;
2170 int copystrings = 0;
2171 int find_match_limit = default_find_match_limit;
2172 int getstrings = 0;
2173 int getlist = 0;
2174 int gmatched = 0;
2175 int start_offset = 0;
2176 int start_offset_sign = 1;
2177 int g_notempty = 0;
2178 int use_dfa = 0;
2179
2180 options = 0;
2181
2182 *copynames = 0;
2183 *getnames = 0;
2184
2185 copynamesptr = copynames;
2186 getnamesptr = getnames;
2187
2188 pcre_callout = callout;
2189 first_callout = 1;
2190 callout_extra = 0;
2191 callout_count = 0;
2192 callout_fail_count = 999999;
2193 callout_fail_id = -1;
2194 show_malloc = 0;
2195
2196 if (extra != NULL) extra->flags &=
2197 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2198
2199 len = 0;
2200 for (;;)
2201 {
2202 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2203 {
2204 if (len > 0) /* Reached EOF without hitting a newline */
2205 {
2206 fprintf(outfile, "\n");
2207 break;
2208 }
2209 done = 1;
2210 goto CONTINUE;
2211 }
2212 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2213 len = (int)strlen((char *)buffer);
2214 if (buffer[len-1] == '\n') break;
2215 }
2216
2217 while (len > 0 && isspace(buffer[len-1])) len--;
2218 buffer[len] = 0;
2219 if (len == 0) break;
2220
2221 p = buffer;
2222 while (isspace(*p)) p++;
2223
2224 bptr = q = dbuffer;
2225 while ((c = *p++) != 0)
2226 {
2227 int i = 0;
2228 int n = 0;
2229
2230 if (c == '\\') switch ((c = *p++))
2231 {
2232 case 'a': c = 7; break;
2233 case 'b': c = '\b'; break;
2234 case 'e': c = 27; break;
2235 case 'f': c = '\f'; break;
2236 case 'n': c = '\n'; break;
2237 case 'r': c = '\r'; break;
2238 case 't': c = '\t'; break;
2239 case 'v': c = '\v'; break;
2240
2241 case '0': case '1': case '2': case '3':
2242 case '4': case '5': case '6': case '7':
2243 c -= '0';
2244 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2245 c = c * 8 + *p++ - '0';
2246
2247 #if !defined NOUTF8
2248 if (use_utf8 && c > 255)
2249 {
2250 unsigned char buff8[8];
2251 int ii, utn;
2252 utn = ord2utf8(c, buff8);
2253 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2254 c = buff8[ii]; /* Last byte */
2255 }
2256 #endif
2257 break;
2258
2259 case 'x':
2260
2261 /* Handle \x{..} specially - new Perl thing for utf8 */
2262
2263 #if !defined NOUTF8
2264 if (*p == '{')
2265 {
2266 unsigned char *pt = p;
2267 c = 0;
2268 while (isxdigit(*(++pt)))
2269 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2270 if (*pt == '}')
2271 {
2272 unsigned char buff8[8];
2273 int ii, utn;
2274 if (use_utf8)
2275 {
2276 utn = ord2utf8(c, buff8);
2277 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2278 c = buff8[ii]; /* Last byte */
2279 }
2280 else
2281 {
2282 if (c > 255)
2283 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2284 "UTF-8 mode is not enabled.\n"
2285 "** Truncation will probably give the wrong result.\n", c);
2286 }
2287 p = pt + 1;
2288 break;
2289 }
2290 /* Not correct form; fall through */
2291 }
2292 #endif
2293
2294 /* Ordinary \x */
2295
2296 c = 0;
2297 while (i++ < 2 && isxdigit(*p))
2298 {
2299 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2300 p++;
2301 }
2302 break;
2303
2304 case 0: /* \ followed by EOF allows for an empty line */
2305 p--;
2306 continue;
2307
2308 case '>':
2309 if (*p == '-')
2310 {
2311 start_offset_sign = -1;
2312 p++;
2313 }
2314 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2315 start_offset *= start_offset_sign;
2316 continue;
2317
2318 case 'A': /* Option setting */
2319 options |= PCRE_ANCHORED;
2320 continue;
2321
2322 case 'B':
2323 options |= PCRE_NOTBOL;
2324 continue;
2325
2326 case 'C':
2327 if (isdigit(*p)) /* Set copy string */
2328 {
2329 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2330 copystrings |= 1 << n;
2331 }
2332 else if (isalnum(*p))
2333 {
2334 uschar *npp = copynamesptr;
2335 while (isalnum(*p)) *npp++ = *p++;
2336 *npp++ = 0;
2337 *npp = 0;
2338 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2339 if (n < 0)
2340 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2341 copynamesptr = npp;
2342 }
2343 else if (*p == '+')
2344 {
2345 callout_extra = 1;
2346 p++;
2347 }
2348 else if (*p == '-')
2349 {
2350 pcre_callout = NULL;
2351 p++;
2352 }
2353 else if (*p == '!')
2354 {
2355 callout_fail_id = 0;
2356 p++;
2357 while(isdigit(*p))
2358 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2359 callout_fail_count = 0;
2360 if (*p == '!')
2361 {
2362 p++;
2363 while(isdigit(*p))
2364 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2365 }
2366 }
2367 else if (*p == '*')
2368 {
2369 int sign = 1;
2370 callout_data = 0;
2371 if (*(++p) == '-') { sign = -1; p++; }
2372 while(isdigit(*p))
2373 callout_data = callout_data * 10 + *p++ - '0';
2374 callout_data *= sign;
2375 callout_data_set = 1;
2376 }
2377 continue;
2378
2379 #if !defined NODFA
2380 case 'D':
2381 #if !defined NOPOSIX
2382 if (posix || do_posix)
2383 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2384 else
2385 #endif
2386 use_dfa = 1;
2387 continue;
2388 #endif
2389
2390 #if !defined NODFA
2391 case 'F':
2392 options |= PCRE_DFA_SHORTEST;
2393 continue;
2394 #endif
2395
2396 case 'G':
2397 if (isdigit(*p))
2398 {
2399 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2400 getstrings |= 1 << n;
2401 }
2402 else if (isalnum(*p))
2403 {
2404 uschar *npp = getnamesptr;
2405 while (isalnum(*p)) *npp++ = *p++;
2406 *npp++ = 0;
2407 *npp = 0;
2408 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2409 if (n < 0)
2410 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2411 getnamesptr = npp;
2412 }
2413 continue;
2414
2415 case 'L':
2416 getlist = 1;
2417 continue;
2418
2419 case 'M':
2420 find_match_limit = 1;
2421 continue;
2422
2423 case 'N':
2424 if ((options & PCRE_NOTEMPTY) != 0)
2425 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2426 else
2427 options |= PCRE_NOTEMPTY;
2428 continue;
2429
2430 case 'O':
2431 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2432 if (n > size_offsets_max)
2433 {
2434 size_offsets_max = n;
2435 free(offsets);
2436 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2437 if (offsets == NULL)
2438 {
2439 printf("** Failed to get %d bytes of memory for offsets vector\n",
2440 (int)(size_offsets_max * sizeof(int)));
2441 yield = 1;
2442 goto EXIT;
2443 }
2444 }
2445 use_size_offsets = n;
2446 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2447 continue;
2448
2449 case 'P':
2450 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2451 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2452 continue;
2453
2454 case 'Q':
2455 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2456 if (extra == NULL)
2457 {
2458 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2459 extra->flags = 0;
2460 }
2461 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2462 extra->match_limit_recursion = n;
2463 continue;
2464
2465 case 'q':
2466 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2467 if (extra == NULL)
2468 {
2469 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2470 extra->flags = 0;
2471 }
2472 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2473 extra->match_limit = n;
2474 continue;
2475
2476 #if !defined NODFA
2477 case 'R':
2478 options |= PCRE_DFA_RESTART;
2479 continue;
2480 #endif
2481
2482 case 'S':
2483 show_malloc = 1;
2484 continue;
2485
2486 case 'Y':
2487 options |= PCRE_NO_START_OPTIMIZE;
2488 continue;
2489
2490 case 'Z':
2491 options |= PCRE_NOTEOL;
2492 continue;
2493
2494 case '?':
2495 options |= PCRE_NO_UTF8_CHECK;
2496 continue;
2497
2498 case '<':
2499 {
2500 int x = check_newline(p, outfile);
2501 if (x == 0) goto NEXT_DATA;
2502 options |= x;
2503 while (*p++ != '>');
2504 }
2505 continue;
2506 }
2507 *q++ = c;
2508 }
2509 *q = 0;
2510 len = (int)(q - dbuffer);
2511
2512 /* Move the data to the end of the buffer so that a read over the end of
2513 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2514 we are using the POSIX interface, we must include the terminating zero. */
2515
2516 #if !defined NOPOSIX
2517 if (posix || do_posix)
2518 {
2519 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2520 bptr += buffer_size - len - 1;
2521 }
2522 else
2523 #endif
2524 {
2525 memmove(bptr + buffer_size - len, bptr, len);
2526 bptr += buffer_size - len;
2527 }
2528
2529 if ((all_use_dfa || use_dfa) && find_match_limit)
2530 {
2531 printf("**Match limit not relevant for DFA matching: ignored\n");
2532 find_match_limit = 0;
2533 }
2534
2535 /* Handle matching via the POSIX interface, which does not
2536 support timing or playing with the match limit or callout data. */
2537
2538 #if !defined NOPOSIX
2539 if (posix || do_posix)
2540 {
2541 int rc;
2542 int eflags = 0;
2543 regmatch_t *pmatch = NULL;
2544 if (use_size_offsets > 0)
2545 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2546 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2547 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2548 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2549
2550 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2551
2552 if (rc != 0)
2553 {
2554 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2555 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2556 }
2557 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2558 != 0)
2559 {
2560 fprintf(outfile, "Matched with REG_NOSUB\n");
2561 }
2562 else
2563 {
2564 size_t i;
2565 for (i = 0; i < (size_t)use_size_offsets; i++)
2566 {
2567 if (pmatch[i].rm_so >= 0)
2568 {
2569 fprintf(outfile, "%2d: ", (int)i);
2570 (void)pchars(dbuffer + pmatch[i].rm_so,
2571 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2572 fprintf(outfile, "\n");
2573 if (i == 0 && do_showrest)
2574 {
2575 fprintf(outfile, " 0+ ");
2576 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2577 outfile);
2578 fprintf(outfile, "\n");
2579 }
2580 }
2581 }
2582 }
2583 free(pmatch);
2584 }
2585
2586 /* Handle matching via the native interface - repeats for /g and /G */
2587
2588 else
2589 #endif /* !defined NOPOSIX */
2590
2591 for (;; gmatched++) /* Loop for /g or /G */
2592 {
2593 markptr = NULL;
2594
2595 if (timeitm > 0)
2596 {
2597 register int i;
2598 clock_t time_taken;
2599 clock_t start_time = clock();
2600
2601 #if !defined NODFA
2602 if (all_use_dfa || use_dfa)
2603 {
2604 int workspace[1000];
2605 for (i = 0; i < timeitm; i++)
2606 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2607 options | g_notempty, use_offsets, use_size_offsets, workspace,
2608 sizeof(workspace)/sizeof(int));
2609 }
2610 else
2611 #endif
2612
2613 for (i = 0; i < timeitm; i++)
2614 count = pcre_exec(re, extra, (char *)bptr, len,
2615 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2616
2617 time_taken = clock() - start_time;
2618 fprintf(outfile, "Execute time %.4f milliseconds\n",
2619 (((double)time_taken * 1000.0) / (double)timeitm) /
2620 (double)CLOCKS_PER_SEC);
2621 }
2622
2623 /* If find_match_limit is set, we want to do repeated matches with
2624 varying limits in order to find the minimum value for the match limit and
2625 for the recursion limit. */
2626
2627 if (find_match_limit)
2628 {
2629 if (extra == NULL)
2630 {
2631 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2632 extra->flags = 0;
2633 }
2634
2635 (void)check_match_limit(re, extra, bptr, len, start_offset,
2636 options|g_notempty, use_offsets, use_size_offsets,
2637 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2638 PCRE_ERROR_MATCHLIMIT, "match()");
2639
2640 count = check_match_limit(re, extra, bptr, len, start_offset,
2641 options|g_notempty, use_offsets, use_size_offsets,
2642 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2643 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2644 }
2645
2646 /* If callout_data is set, use the interface with additional data */
2647
2648 else if (callout_data_set)
2649 {
2650 if (extra == NULL)
2651 {
2652 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2653 extra->flags = 0;
2654 }
2655 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2656 extra->callout_data = &callout_data;
2657 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2658 options | g_notempty, use_offsets, use_size_offsets);
2659 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2660 }
2661
2662 /* The normal case is just to do the match once, with the default
2663 value of match_limit. */
2664
2665 #if !defined NODFA
2666 else if (all_use_dfa || use_dfa)
2667 {
2668 int workspace[1000];
2669 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2670 options | g_notempty, use_offsets, use_size_offsets, workspace,
2671 sizeof(workspace)/sizeof(int));
2672 if (count == 0)
2673 {
2674 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2675 count = use_size_offsets/2;
2676 }
2677 }
2678 #endif
2679
2680 else
2681 {
2682 count = pcre_exec(re, extra, (char *)bptr, len,
2683 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2684 if (count == 0)
2685 {
2686 fprintf(outfile, "Matched, but too many substrings\n");
2687 count = use_size_offsets/3;
2688 }
2689 }
2690
2691 /* Matched */
2692
2693 if (count >= 0)
2694 {
2695 int i, maxcount;
2696
2697 #if !defined NODFA
2698 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2699 #endif
2700 maxcount = use_size_offsets/3;
2701
2702 /* This is a check against a lunatic return value. */
2703
2704 if (count > maxcount)
2705 {
2706 fprintf(outfile,
2707 "** PCRE error: returned count %d is too big for offset size %d\n",
2708 count, use_size_offsets);
2709 count = use_size_offsets/3;
2710 if (do_g || do_G)
2711 {
2712 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2713 do_g = do_G = FALSE; /* Break g/G loop */
2714 }
2715 }
2716
2717 for (i = 0; i < count * 2; i += 2)
2718 {
2719 if (use_offsets[i] < 0)
2720 fprintf(outfile, "%2d: <unset>\n", i/2);
2721 else
2722 {
2723 fprintf(outfile, "%2d: ", i/2);
2724 (void)pchars(bptr + use_offsets[i],
2725 use_offsets[i+1] - use_offsets[i], outfile);
2726 fprintf(outfile, "\n");
2727 if (i == 0)
2728 {
2729 if (do_showrest)
2730 {
2731 fprintf(outfile, " 0+ ");
2732 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2733 outfile);
2734 fprintf(outfile, "\n");
2735 }
2736 }
2737 }
2738 }
2739
2740 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2741
2742 for (i = 0; i < 32; i++)
2743 {
2744 if ((copystrings & (1 << i)) != 0)
2745 {
2746 char copybuffer[256];
2747 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2748 i, copybuffer, sizeof(copybuffer));
2749 if (rc < 0)
2750 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2751 else
2752 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2753 }
2754 }
2755
2756 for (copynamesptr = copynames;
2757 *copynamesptr != 0;
2758 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2759 {
2760 char copybuffer[256];
2761 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2762 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2763 if (rc < 0)
2764 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2765 else
2766 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2767 }
2768
2769 for (i = 0; i < 32; i++)
2770 {
2771 if ((getstrings & (1 << i)) != 0)
2772 {
2773 const char *substring;
2774 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2775 i, &substring);
2776 if (rc < 0)
2777 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2778 else
2779 {
2780 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2781 pcre_free_substring(substring);
2782 }
2783 }
2784 }
2785
2786 for (getnamesptr = getnames;
2787 *getnamesptr != 0;
2788 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2789 {
2790 const char *substring;
2791 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2792 count, (char *)getnamesptr, &substring);
2793 if (rc < 0)
2794 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2795 else
2796 {
2797 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2798 pcre_free_substring(substring);
2799 }
2800 }
2801
2802 if (getlist)
2803 {
2804 const char **stringlist;
2805 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2806 &stringlist);
2807 if (rc < 0)
2808 fprintf(outfile, "get substring list failed %d\n", rc);
2809 else
2810 {
2811 for (i = 0; i < count; i++)
2812 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2813 if (stringlist[i] != NULL)
2814 fprintf(outfile, "string list not terminated by NULL\n");
2815 /* free((void *)stringlist); */
2816 pcre_free_substring_list(stringlist);
2817 }
2818 }
2819 }
2820
2821 /* There was a partial match */
2822
2823 else if (count == PCRE_ERROR_PARTIAL)
2824 {
2825 if (markptr == NULL) fprintf(outfile, "Partial match");
2826 else fprintf(outfile, "Partial match, mark=%s", markptr);
2827 if (use_size_offsets > 1)
2828 {
2829 fprintf(outfile, ": ");
2830 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2831 outfile);
2832 }
2833 fprintf(outfile, "\n");
2834 break; /* Out of the /g loop */
2835 }
2836
2837 /* Failed to match. If this is a /g or /G loop and we previously set
2838 g_notempty after a null match, this is not necessarily the end. We want
2839 to advance the start offset, and continue. We won't be at the end of the
2840 string - that was checked before setting g_notempty.
2841
2842 Complication arises in the case when the newline convention is "any",
2843 "crlf", or "anycrlf". If the previous match was at the end of a line
2844 terminated by CRLF, an advance of one character just passes the \r,
2845 whereas we should prefer the longer newline sequence, as does the code in
2846 pcre_exec(). Fudge the offset value to achieve this. We check for a
2847 newline setting in the pattern; if none was set, use pcre_config() to
2848 find the default.
2849
2850 Otherwise, in the case of UTF-8 matching, the advance must be one
2851 character, not one byte. */
2852
2853 else
2854 {
2855 if (g_notempty != 0)
2856 {
2857 int onechar = 1;
2858 unsigned int obits = ((real_pcre *)re)->options;
2859 use_offsets[0] = start_offset;
2860 if ((obits & PCRE_NEWLINE_BITS) == 0)
2861 {
2862 int d;
2863 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2864 /* Note that these values are always the ASCII ones, even in
2865 EBCDIC environments. CR = 13, NL = 10. */
2866 obits = (d == 13)? PCRE_NEWLINE_CR :
2867 (d == 10)? PCRE_NEWLINE_LF :
2868 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2869 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2870 (d == -1)? PCRE_NEWLINE_ANY : 0;
2871 }
2872 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2873 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2874 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2875 &&
2876 start_offset < len - 1 &&
2877 bptr[start_offset] == '\r' &&
2878 bptr[start_offset+1] == '\n')
2879 onechar++;
2880 else if (use_utf8)
2881 {
2882 while (start_offset + onechar < len)
2883 {
2884 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2885 onechar++;
2886 }
2887 }
2888 use_offsets[1] = start_offset + onechar;
2889 }
2890 else
2891 {
2892 switch(count)
2893 {
2894 case PCRE_ERROR_NOMATCH:
2895 if (gmatched == 0)
2896 {
2897 if (markptr == NULL) fprintf(outfile, "No match\n");
2898 else fprintf(outfile, "No match, mark = %s\n", markptr);
2899 }
2900 break;
2901
2902 case PCRE_ERROR_BADUTF8:
2903 case PCRE_ERROR_SHORTUTF8:
2904 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
2905 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
2906 if (use_size_offsets >= 2)
2907 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
2908 use_offsets[1]);
2909 fprintf(outfile, "\n");
2910 break;
2911
2912 default:
2913 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
2914 break;
2915 }
2916
2917 break; /* Out of the /g loop */
2918 }
2919 }
2920
2921 /* If not /g or /G we are done */
2922
2923 if (!do_g && !do_G) break;
2924
2925 /* If we have matched an empty string, first check to see if we are at
2926 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2927 Perl's /g options does. This turns out to be rather cunning. First we set
2928 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2929 same point. If this fails (picked up above) we advance to the next
2930 character. */
2931
2932 g_notempty = 0;
2933
2934 if (use_offsets[0] == use_offsets[1])
2935 {
2936 if (use_offsets[0] == len) break;
2937 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2938 }
2939
2940 /* For /g, update the start offset, leaving the rest alone */
2941
2942 if (do_g) start_offset = use_offsets[1];
2943
2944 /* For /G, update the pointer and length */
2945
2946 else
2947 {
2948 bptr += use_offsets[1];
2949 len -= use_offsets[1];
2950 }
2951 } /* End of loop for /g and /G */
2952
2953 NEXT_DATA: continue;
2954 } /* End of loop for data lines */
2955
2956 CONTINUE:
2957
2958 #if !defined NOPOSIX
2959 if (posix || do_posix) regfree(&preg);
2960 #endif
2961
2962 if (re != NULL) new_free(re);
2963 if (extra != NULL) new_free(extra);
2964 if (locale_set)
2965 {
2966 new_free((void *)tables);
2967 setlocale(LC_CTYPE, "C");
2968 locale_set = 0;
2969 }
2970 }
2971
2972 if (infile == stdin) fprintf(outfile, "\n");
2973
2974 EXIT:
2975
2976 if (infile != NULL && infile != stdin) fclose(infile);
2977 if (outfile != NULL && outfile != stdout) fclose(outfile);
2978
2979 free(buffer);
2980 free(dbuffer);
2981 free(pbuffer);
2982 free(offsets);
2983
2984 return yield;
2985 }
2986
2987 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12