/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 576 - (show annotations) (download)
Sun Nov 21 18:45:10 2010 UTC (3 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 87486 byte(s)
Added support for (*NO_START_OPT)

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 #else
83 #include <sys/time.h> /* These two includes are needed */
84 #include <sys/resource.h> /* for setrlimit(). */
85 #define INPUT_MODE "rb"
86 #define OUTPUT_MODE "wb"
87 #endif
88
89
90 /* We have to include pcre_internal.h because we need the internal info for
91 displaying the results of pcre_study() and we also need to know about the
92 internal macros, structures, and other internal data values; pcretest has
93 "inside information" compared to a program that strictly follows the PCRE API.
94
95 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97 appropriately for an application, not for building PCRE. */
98
99 #include "pcre.h"
100 #include "pcre_internal.h"
101
102 /* We need access to some of the data tables that PCRE uses. So as not to have
103 to keep two copies, we include the source file here, changing the names of the
104 external symbols to prevent clashes. */
105
106 #define _pcre_ucp_gentype ucp_gentype
107 #define _pcre_utf8_table1 utf8_table1
108 #define _pcre_utf8_table1_size utf8_table1_size
109 #define _pcre_utf8_table2 utf8_table2
110 #define _pcre_utf8_table3 utf8_table3
111 #define _pcre_utf8_table4 utf8_table4
112 #define _pcre_utt utt
113 #define _pcre_utt_size utt_size
114 #define _pcre_utt_names utt_names
115 #define _pcre_OP_lengths OP_lengths
116
117 #include "pcre_tables.c"
118
119 /* We also need the pcre_printint() function for printing out compiled
120 patterns. This function is in a separate file so that it can be included in
121 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 know which case is being compiled. */
123
124 #define COMPILING_PCRETEST
125 #include "pcre_printint.src"
126
127 /* The definition of the macro PRINTABLE, which determines whether to print an
128 output character as-is or as a hex value when showing compiled patterns, is
129 contained in the printint.src file. We uses it here also, in cases when the
130 locale has not been explicitly changed, so as to get consistent output from
131 systems that differ in their output from isprint() even in the "C" locale. */
132
133 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134
135 /* It is possible to compile this test program without including support for
136 testing the POSIX interface, though this is not available via the standard
137 Makefile. */
138
139 #if !defined NOPOSIX
140 #include "pcreposix.h"
141 #endif
142
143 /* It is also possible, for the benefit of the version currently imported into
144 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145 interface to the DFA matcher (NODFA), and without the doublecheck of the old
146 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147 UTF8 support if PCRE is built without it. */
148
149 #ifndef SUPPORT_UTF8
150 #ifndef NOUTF8
151 #define NOUTF8
152 #endif
153 #endif
154
155
156 /* Other parameters */
157
158 #ifndef CLOCKS_PER_SEC
159 #ifdef CLK_TCK
160 #define CLOCKS_PER_SEC CLK_TCK
161 #else
162 #define CLOCKS_PER_SEC 100
163 #endif
164 #endif
165
166 /* This is the default loop count for timing. */
167
168 #define LOOPREPEAT 500000
169
170 /* Static variables */
171
172 static FILE *outfile;
173 static int log_store = 0;
174 static int callout_count;
175 static int callout_extra;
176 static int callout_fail_count;
177 static int callout_fail_id;
178 static int debug_lengths;
179 static int first_callout;
180 static int locale_set = 0;
181 static int show_malloc;
182 static int use_utf8;
183 static size_t gotten_store;
184
185 /* The buffers grow automatically if very long input lines are encountered. */
186
187 static int buffer_size = 50000;
188 static uschar *buffer = NULL;
189 static uschar *dbuffer = NULL;
190 static uschar *pbuffer = NULL;
191
192
193 /*************************************************
194 * Alternate character tables *
195 *************************************************/
196
197 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198 using the default tables of the library. However, the T option can be used to
199 select alternate sets of tables, for different kinds of testing. Note also that
200 the L (locale) option also adjusts the tables. */
201
202 /* This is the set of tables distributed as default with PCRE. It recognizes
203 only ASCII characters. */
204
205 static const unsigned char tables0[] = {
206
207 /* This table is a lower casing table. */
208
209 0, 1, 2, 3, 4, 5, 6, 7,
210 8, 9, 10, 11, 12, 13, 14, 15,
211 16, 17, 18, 19, 20, 21, 22, 23,
212 24, 25, 26, 27, 28, 29, 30, 31,
213 32, 33, 34, 35, 36, 37, 38, 39,
214 40, 41, 42, 43, 44, 45, 46, 47,
215 48, 49, 50, 51, 52, 53, 54, 55,
216 56, 57, 58, 59, 60, 61, 62, 63,
217 64, 97, 98, 99,100,101,102,103,
218 104,105,106,107,108,109,110,111,
219 112,113,114,115,116,117,118,119,
220 120,121,122, 91, 92, 93, 94, 95,
221 96, 97, 98, 99,100,101,102,103,
222 104,105,106,107,108,109,110,111,
223 112,113,114,115,116,117,118,119,
224 120,121,122,123,124,125,126,127,
225 128,129,130,131,132,133,134,135,
226 136,137,138,139,140,141,142,143,
227 144,145,146,147,148,149,150,151,
228 152,153,154,155,156,157,158,159,
229 160,161,162,163,164,165,166,167,
230 168,169,170,171,172,173,174,175,
231 176,177,178,179,180,181,182,183,
232 184,185,186,187,188,189,190,191,
233 192,193,194,195,196,197,198,199,
234 200,201,202,203,204,205,206,207,
235 208,209,210,211,212,213,214,215,
236 216,217,218,219,220,221,222,223,
237 224,225,226,227,228,229,230,231,
238 232,233,234,235,236,237,238,239,
239 240,241,242,243,244,245,246,247,
240 248,249,250,251,252,253,254,255,
241
242 /* This table is a case flipping table. */
243
244 0, 1, 2, 3, 4, 5, 6, 7,
245 8, 9, 10, 11, 12, 13, 14, 15,
246 16, 17, 18, 19, 20, 21, 22, 23,
247 24, 25, 26, 27, 28, 29, 30, 31,
248 32, 33, 34, 35, 36, 37, 38, 39,
249 40, 41, 42, 43, 44, 45, 46, 47,
250 48, 49, 50, 51, 52, 53, 54, 55,
251 56, 57, 58, 59, 60, 61, 62, 63,
252 64, 97, 98, 99,100,101,102,103,
253 104,105,106,107,108,109,110,111,
254 112,113,114,115,116,117,118,119,
255 120,121,122, 91, 92, 93, 94, 95,
256 96, 65, 66, 67, 68, 69, 70, 71,
257 72, 73, 74, 75, 76, 77, 78, 79,
258 80, 81, 82, 83, 84, 85, 86, 87,
259 88, 89, 90,123,124,125,126,127,
260 128,129,130,131,132,133,134,135,
261 136,137,138,139,140,141,142,143,
262 144,145,146,147,148,149,150,151,
263 152,153,154,155,156,157,158,159,
264 160,161,162,163,164,165,166,167,
265 168,169,170,171,172,173,174,175,
266 176,177,178,179,180,181,182,183,
267 184,185,186,187,188,189,190,191,
268 192,193,194,195,196,197,198,199,
269 200,201,202,203,204,205,206,207,
270 208,209,210,211,212,213,214,215,
271 216,217,218,219,220,221,222,223,
272 224,225,226,227,228,229,230,231,
273 232,233,234,235,236,237,238,239,
274 240,241,242,243,244,245,246,247,
275 248,249,250,251,252,253,254,255,
276
277 /* This table contains bit maps for various character classes. Each map is 32
278 bytes long and the bits run from the least significant end of each byte. The
279 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280 graph, print, punct, and cntrl. Other classes are built from combinations. */
281
282 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286
287 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291
292 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296
297 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301
302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306
307 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311
312 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316
317 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321
322 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326
327 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331
332 /* This table identifies various classes of character by individual bits:
333 0x01 white space character
334 0x02 letter
335 0x04 decimal digit
336 0x08 hexadecimal digit
337 0x10 alphanumeric or '_'
338 0x80 regular expression metacharacter or binary zero
339 */
340
341 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
342 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
345 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
346 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
347 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
348 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
349 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
350 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
351 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
352 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
353 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
354 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
355 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
356 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
357 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373
374 /* This is a set of tables that came orginally from a Windows user. It seems to
375 be at least an approximation of ISO 8859. In particular, there are characters
376 greater than 128 that are marked as spaces, letters, etc. */
377
378 static const unsigned char tables1[] = {
379 0,1,2,3,4,5,6,7,
380 8,9,10,11,12,13,14,15,
381 16,17,18,19,20,21,22,23,
382 24,25,26,27,28,29,30,31,
383 32,33,34,35,36,37,38,39,
384 40,41,42,43,44,45,46,47,
385 48,49,50,51,52,53,54,55,
386 56,57,58,59,60,61,62,63,
387 64,97,98,99,100,101,102,103,
388 104,105,106,107,108,109,110,111,
389 112,113,114,115,116,117,118,119,
390 120,121,122,91,92,93,94,95,
391 96,97,98,99,100,101,102,103,
392 104,105,106,107,108,109,110,111,
393 112,113,114,115,116,117,118,119,
394 120,121,122,123,124,125,126,127,
395 128,129,130,131,132,133,134,135,
396 136,137,138,139,140,141,142,143,
397 144,145,146,147,148,149,150,151,
398 152,153,154,155,156,157,158,159,
399 160,161,162,163,164,165,166,167,
400 168,169,170,171,172,173,174,175,
401 176,177,178,179,180,181,182,183,
402 184,185,186,187,188,189,190,191,
403 224,225,226,227,228,229,230,231,
404 232,233,234,235,236,237,238,239,
405 240,241,242,243,244,245,246,215,
406 248,249,250,251,252,253,254,223,
407 224,225,226,227,228,229,230,231,
408 232,233,234,235,236,237,238,239,
409 240,241,242,243,244,245,246,247,
410 248,249,250,251,252,253,254,255,
411 0,1,2,3,4,5,6,7,
412 8,9,10,11,12,13,14,15,
413 16,17,18,19,20,21,22,23,
414 24,25,26,27,28,29,30,31,
415 32,33,34,35,36,37,38,39,
416 40,41,42,43,44,45,46,47,
417 48,49,50,51,52,53,54,55,
418 56,57,58,59,60,61,62,63,
419 64,97,98,99,100,101,102,103,
420 104,105,106,107,108,109,110,111,
421 112,113,114,115,116,117,118,119,
422 120,121,122,91,92,93,94,95,
423 96,65,66,67,68,69,70,71,
424 72,73,74,75,76,77,78,79,
425 80,81,82,83,84,85,86,87,
426 88,89,90,123,124,125,126,127,
427 128,129,130,131,132,133,134,135,
428 136,137,138,139,140,141,142,143,
429 144,145,146,147,148,149,150,151,
430 152,153,154,155,156,157,158,159,
431 160,161,162,163,164,165,166,167,
432 168,169,170,171,172,173,174,175,
433 176,177,178,179,180,181,182,183,
434 184,185,186,187,188,189,190,191,
435 224,225,226,227,228,229,230,231,
436 232,233,234,235,236,237,238,239,
437 240,241,242,243,244,245,246,215,
438 248,249,250,251,252,253,254,223,
439 192,193,194,195,196,197,198,199,
440 200,201,202,203,204,205,206,207,
441 208,209,210,211,212,213,214,247,
442 216,217,218,219,220,221,222,255,
443 0,62,0,0,1,0,0,0,
444 0,0,0,0,0,0,0,0,
445 32,0,0,0,1,0,0,0,
446 0,0,0,0,0,0,0,0,
447 0,0,0,0,0,0,255,3,
448 126,0,0,0,126,0,0,0,
449 0,0,0,0,0,0,0,0,
450 0,0,0,0,0,0,0,0,
451 0,0,0,0,0,0,255,3,
452 0,0,0,0,0,0,0,0,
453 0,0,0,0,0,0,12,2,
454 0,0,0,0,0,0,0,0,
455 0,0,0,0,0,0,0,0,
456 254,255,255,7,0,0,0,0,
457 0,0,0,0,0,0,0,0,
458 255,255,127,127,0,0,0,0,
459 0,0,0,0,0,0,0,0,
460 0,0,0,0,254,255,255,7,
461 0,0,0,0,0,4,32,4,
462 0,0,0,128,255,255,127,255,
463 0,0,0,0,0,0,255,3,
464 254,255,255,135,254,255,255,7,
465 0,0,0,0,0,4,44,6,
466 255,255,127,255,255,255,127,255,
467 0,0,0,0,254,255,255,255,
468 255,255,255,255,255,255,255,127,
469 0,0,0,0,254,255,255,255,
470 255,255,255,255,255,255,255,255,
471 0,2,0,0,255,255,255,255,
472 255,255,255,255,255,255,255,127,
473 0,0,0,0,255,255,255,255,
474 255,255,255,255,255,255,255,255,
475 0,0,0,0,254,255,0,252,
476 1,0,0,248,1,0,0,120,
477 0,0,0,0,254,255,255,255,
478 0,0,128,0,0,0,128,0,
479 255,255,255,255,0,0,0,0,
480 0,0,0,0,0,0,0,128,
481 255,255,255,255,0,0,0,0,
482 0,0,0,0,0,0,0,0,
483 128,0,0,0,0,0,0,0,
484 0,1,1,0,1,1,0,0,
485 0,0,0,0,0,0,0,0,
486 0,0,0,0,0,0,0,0,
487 1,0,0,0,128,0,0,0,
488 128,128,128,128,0,0,128,0,
489 28,28,28,28,28,28,28,28,
490 28,28,0,0,0,0,0,128,
491 0,26,26,26,26,26,26,18,
492 18,18,18,18,18,18,18,18,
493 18,18,18,18,18,18,18,18,
494 18,18,18,128,128,0,128,16,
495 0,26,26,26,26,26,26,18,
496 18,18,18,18,18,18,18,18,
497 18,18,18,18,18,18,18,18,
498 18,18,18,128,128,0,0,0,
499 0,0,0,0,0,1,0,0,
500 0,0,0,0,0,0,0,0,
501 0,0,0,0,0,0,0,0,
502 0,0,0,0,0,0,0,0,
503 1,0,0,0,0,0,0,0,
504 0,0,18,0,0,0,0,0,
505 0,0,20,20,0,18,0,0,
506 0,20,18,0,0,0,0,0,
507 18,18,18,18,18,18,18,18,
508 18,18,18,18,18,18,18,18,
509 18,18,18,18,18,18,18,0,
510 18,18,18,18,18,18,18,18,
511 18,18,18,18,18,18,18,18,
512 18,18,18,18,18,18,18,18,
513 18,18,18,18,18,18,18,0,
514 18,18,18,18,18,18,18,18
515 };
516
517
518
519
520 #ifndef HAVE_STRERROR
521 /*************************************************
522 * Provide strerror() for non-ANSI libraries *
523 *************************************************/
524
525 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
526 in their libraries, but can provide the same facility by this simple
527 alternative function. */
528
529 extern int sys_nerr;
530 extern char *sys_errlist[];
531
532 char *
533 strerror(int n)
534 {
535 if (n < 0 || n >= sys_nerr) return "unknown error number";
536 return sys_errlist[n];
537 }
538 #endif /* HAVE_STRERROR */
539
540
541
542
543 /*************************************************
544 * Read or extend an input line *
545 *************************************************/
546
547 /* Input lines are read into buffer, but both patterns and data lines can be
548 continued over multiple input lines. In addition, if the buffer fills up, we
549 want to automatically expand it so as to be able to handle extremely large
550 lines that are needed for certain stress tests. When the input buffer is
551 expanded, the other two buffers must also be expanded likewise, and the
552 contents of pbuffer, which are a copy of the input for callouts, must be
553 preserved (for when expansion happens for a data line). This is not the most
554 optimal way of handling this, but hey, this is just a test program!
555
556 Arguments:
557 f the file to read
558 start where in buffer to start (this *must* be within buffer)
559 prompt for stdin or readline()
560
561 Returns: pointer to the start of new data
562 could be a copy of start, or could be moved
563 NULL if no data read and EOF reached
564 */
565
566 static uschar *
567 extend_inputline(FILE *f, uschar *start, const char *prompt)
568 {
569 uschar *here = start;
570
571 for (;;)
572 {
573 int rlen = (int)(buffer_size - (here - buffer));
574
575 if (rlen > 1000)
576 {
577 int dlen;
578
579 /* If libreadline support is required, use readline() to read a line if the
580 input is a terminal. Note that readline() removes the trailing newline, so
581 we must put it back again, to be compatible with fgets(). */
582
583 #ifdef SUPPORT_LIBREADLINE
584 if (isatty(fileno(f)))
585 {
586 size_t len;
587 char *s = readline(prompt);
588 if (s == NULL) return (here == start)? NULL : start;
589 len = strlen(s);
590 if (len > 0) add_history(s);
591 if (len > rlen - 1) len = rlen - 1;
592 memcpy(here, s, len);
593 here[len] = '\n';
594 here[len+1] = 0;
595 free(s);
596 }
597 else
598 #endif
599
600 /* Read the next line by normal means, prompting if the file is stdin. */
601
602 {
603 if (f == stdin) printf("%s", prompt);
604 if (fgets((char *)here, rlen, f) == NULL)
605 return (here == start)? NULL : start;
606 }
607
608 dlen = (int)strlen((char *)here);
609 if (dlen > 0 && here[dlen - 1] == '\n') return start;
610 here += dlen;
611 }
612
613 else
614 {
615 int new_buffer_size = 2*buffer_size;
616 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
617 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
618 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
619
620 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
621 {
622 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
623 exit(1);
624 }
625
626 memcpy(new_buffer, buffer, buffer_size);
627 memcpy(new_pbuffer, pbuffer, buffer_size);
628
629 buffer_size = new_buffer_size;
630
631 start = new_buffer + (start - buffer);
632 here = new_buffer + (here - buffer);
633
634 free(buffer);
635 free(dbuffer);
636 free(pbuffer);
637
638 buffer = new_buffer;
639 dbuffer = new_dbuffer;
640 pbuffer = new_pbuffer;
641 }
642 }
643
644 return NULL; /* Control never gets here */
645 }
646
647
648
649
650
651
652
653 /*************************************************
654 * Read number from string *
655 *************************************************/
656
657 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
658 around with conditional compilation, just do the job by hand. It is only used
659 for unpicking arguments, so just keep it simple.
660
661 Arguments:
662 str string to be converted
663 endptr where to put the end pointer
664
665 Returns: the unsigned long
666 */
667
668 static int
669 get_value(unsigned char *str, unsigned char **endptr)
670 {
671 int result = 0;
672 while(*str != 0 && isspace(*str)) str++;
673 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
674 *endptr = str;
675 return(result);
676 }
677
678
679
680
681 /*************************************************
682 * Convert UTF-8 string to value *
683 *************************************************/
684
685 /* This function takes one or more bytes that represents a UTF-8 character,
686 and returns the value of the character.
687
688 Argument:
689 utf8bytes a pointer to the byte vector
690 vptr a pointer to an int to receive the value
691
692 Returns: > 0 => the number of bytes consumed
693 -6 to 0 => malformed UTF-8 character at offset = (-return)
694 */
695
696 #if !defined NOUTF8
697
698 static int
699 utf82ord(unsigned char *utf8bytes, int *vptr)
700 {
701 int c = *utf8bytes++;
702 int d = c;
703 int i, j, s;
704
705 for (i = -1; i < 6; i++) /* i is number of additional bytes */
706 {
707 if ((d & 0x80) == 0) break;
708 d <<= 1;
709 }
710
711 if (i == -1) { *vptr = c; return 1; } /* ascii character */
712 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
713
714 /* i now has a value in the range 1-5 */
715
716 s = 6*i;
717 d = (c & utf8_table3[i]) << s;
718
719 for (j = 0; j < i; j++)
720 {
721 c = *utf8bytes++;
722 if ((c & 0xc0) != 0x80) return -(j+1);
723 s -= 6;
724 d |= (c & 0x3f) << s;
725 }
726
727 /* Check that encoding was the correct unique one */
728
729 for (j = 0; j < utf8_table1_size; j++)
730 if (d <= utf8_table1[j]) break;
731 if (j != i) return -(i+1);
732
733 /* Valid value */
734
735 *vptr = d;
736 return i+1;
737 }
738
739 #endif
740
741
742
743 /*************************************************
744 * Convert character value to UTF-8 *
745 *************************************************/
746
747 /* This function takes an integer value in the range 0 - 0x7fffffff
748 and encodes it as a UTF-8 character in 0 to 6 bytes.
749
750 Arguments:
751 cvalue the character value
752 utf8bytes pointer to buffer for result - at least 6 bytes long
753
754 Returns: number of characters placed in the buffer
755 */
756
757 #if !defined NOUTF8
758
759 static int
760 ord2utf8(int cvalue, uschar *utf8bytes)
761 {
762 register int i, j;
763 for (i = 0; i < utf8_table1_size; i++)
764 if (cvalue <= utf8_table1[i]) break;
765 utf8bytes += i;
766 for (j = i; j > 0; j--)
767 {
768 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
769 cvalue >>= 6;
770 }
771 *utf8bytes = utf8_table2[i] | cvalue;
772 return i + 1;
773 }
774
775 #endif
776
777
778
779 /*************************************************
780 * Print character string *
781 *************************************************/
782
783 /* Character string printing function. Must handle UTF-8 strings in utf8
784 mode. Yields number of characters printed. If handed a NULL file, just counts
785 chars without printing. */
786
787 static int pchars(unsigned char *p, int length, FILE *f)
788 {
789 int c = 0;
790 int yield = 0;
791
792 while (length-- > 0)
793 {
794 #if !defined NOUTF8
795 if (use_utf8)
796 {
797 int rc = utf82ord(p, &c);
798
799 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
800 {
801 length -= rc - 1;
802 p += rc;
803 if (PRINTHEX(c))
804 {
805 if (f != NULL) fprintf(f, "%c", c);
806 yield++;
807 }
808 else
809 {
810 int n = 4;
811 if (f != NULL) fprintf(f, "\\x{%02x}", c);
812 yield += (n <= 0x000000ff)? 2 :
813 (n <= 0x00000fff)? 3 :
814 (n <= 0x0000ffff)? 4 :
815 (n <= 0x000fffff)? 5 : 6;
816 }
817 continue;
818 }
819 }
820 #endif
821
822 /* Not UTF-8, or malformed UTF-8 */
823
824 c = *p++;
825 if (PRINTHEX(c))
826 {
827 if (f != NULL) fprintf(f, "%c", c);
828 yield++;
829 }
830 else
831 {
832 if (f != NULL) fprintf(f, "\\x%02x", c);
833 yield += 4;
834 }
835 }
836
837 return yield;
838 }
839
840
841
842 /*************************************************
843 * Callout function *
844 *************************************************/
845
846 /* Called from PCRE as a result of the (?C) item. We print out where we are in
847 the match. Yield zero unless more callouts than the fail count, or the callout
848 data is not zero. */
849
850 static int callout(pcre_callout_block *cb)
851 {
852 FILE *f = (first_callout | callout_extra)? outfile : NULL;
853 int i, pre_start, post_start, subject_length;
854
855 if (callout_extra)
856 {
857 fprintf(f, "Callout %d: last capture = %d\n",
858 cb->callout_number, cb->capture_last);
859
860 for (i = 0; i < cb->capture_top * 2; i += 2)
861 {
862 if (cb->offset_vector[i] < 0)
863 fprintf(f, "%2d: <unset>\n", i/2);
864 else
865 {
866 fprintf(f, "%2d: ", i/2);
867 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
868 cb->offset_vector[i+1] - cb->offset_vector[i], f);
869 fprintf(f, "\n");
870 }
871 }
872 }
873
874 /* Re-print the subject in canonical form, the first time or if giving full
875 datails. On subsequent calls in the same match, we use pchars just to find the
876 printed lengths of the substrings. */
877
878 if (f != NULL) fprintf(f, "--->");
879
880 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
881 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
882 cb->current_position - cb->start_match, f);
883
884 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
885
886 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
887 cb->subject_length - cb->current_position, f);
888
889 if (f != NULL) fprintf(f, "\n");
890
891 /* Always print appropriate indicators, with callout number if not already
892 shown. For automatic callouts, show the pattern offset. */
893
894 if (cb->callout_number == 255)
895 {
896 fprintf(outfile, "%+3d ", cb->pattern_position);
897 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
898 }
899 else
900 {
901 if (callout_extra) fprintf(outfile, " ");
902 else fprintf(outfile, "%3d ", cb->callout_number);
903 }
904
905 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
906 fprintf(outfile, "^");
907
908 if (post_start > 0)
909 {
910 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
911 fprintf(outfile, "^");
912 }
913
914 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
915 fprintf(outfile, " ");
916
917 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
918 pbuffer + cb->pattern_position);
919
920 fprintf(outfile, "\n");
921 first_callout = 0;
922
923 if (cb->callout_data != NULL)
924 {
925 int callout_data = *((int *)(cb->callout_data));
926 if (callout_data != 0)
927 {
928 fprintf(outfile, "Callout data = %d\n", callout_data);
929 return callout_data;
930 }
931 }
932
933 return (cb->callout_number != callout_fail_id)? 0 :
934 (++callout_count >= callout_fail_count)? 1 : 0;
935 }
936
937
938 /*************************************************
939 * Local malloc functions *
940 *************************************************/
941
942 /* Alternative malloc function, to test functionality and show the size of the
943 compiled re. */
944
945 static void *new_malloc(size_t size)
946 {
947 void *block = malloc(size);
948 gotten_store = size;
949 if (show_malloc)
950 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
951 return block;
952 }
953
954 static void new_free(void *block)
955 {
956 if (show_malloc)
957 fprintf(outfile, "free %p\n", block);
958 free(block);
959 }
960
961
962 /* For recursion malloc/free, to test stacking calls */
963
964 static void *stack_malloc(size_t size)
965 {
966 void *block = malloc(size);
967 if (show_malloc)
968 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
969 return block;
970 }
971
972 static void stack_free(void *block)
973 {
974 if (show_malloc)
975 fprintf(outfile, "stack_free %p\n", block);
976 free(block);
977 }
978
979
980 /*************************************************
981 * Call pcre_fullinfo() *
982 *************************************************/
983
984 /* Get one piece of information from the pcre_fullinfo() function */
985
986 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
987 {
988 int rc;
989 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
990 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
991 }
992
993
994
995 /*************************************************
996 * Byte flipping function *
997 *************************************************/
998
999 static unsigned long int
1000 byteflip(unsigned long int value, int n)
1001 {
1002 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1003 return ((value & 0x000000ff) << 24) |
1004 ((value & 0x0000ff00) << 8) |
1005 ((value & 0x00ff0000) >> 8) |
1006 ((value & 0xff000000) >> 24);
1007 }
1008
1009
1010
1011
1012 /*************************************************
1013 * Check match or recursion limit *
1014 *************************************************/
1015
1016 static int
1017 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1018 int start_offset, int options, int *use_offsets, int use_size_offsets,
1019 int flag, unsigned long int *limit, int errnumber, const char *msg)
1020 {
1021 int count;
1022 int min = 0;
1023 int mid = 64;
1024 int max = -1;
1025
1026 extra->flags |= flag;
1027
1028 for (;;)
1029 {
1030 *limit = mid;
1031
1032 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1033 use_offsets, use_size_offsets);
1034
1035 if (count == errnumber)
1036 {
1037 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1038 min = mid;
1039 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1040 }
1041
1042 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1043 count == PCRE_ERROR_PARTIAL)
1044 {
1045 if (mid == min + 1)
1046 {
1047 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1048 break;
1049 }
1050 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1051 max = mid;
1052 mid = (min + mid)/2;
1053 }
1054 else break; /* Some other error */
1055 }
1056
1057 extra->flags &= ~flag;
1058 return count;
1059 }
1060
1061
1062
1063 /*************************************************
1064 * Case-independent strncmp() function *
1065 *************************************************/
1066
1067 /*
1068 Arguments:
1069 s first string
1070 t second string
1071 n number of characters to compare
1072
1073 Returns: < 0, = 0, or > 0, according to the comparison
1074 */
1075
1076 static int
1077 strncmpic(uschar *s, uschar *t, int n)
1078 {
1079 while (n--)
1080 {
1081 int c = tolower(*s++) - tolower(*t++);
1082 if (c) return c;
1083 }
1084 return 0;
1085 }
1086
1087
1088
1089 /*************************************************
1090 * Check newline indicator *
1091 *************************************************/
1092
1093 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1094 a message and return 0 if there is no match.
1095
1096 Arguments:
1097 p points after the leading '<'
1098 f file for error message
1099
1100 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1101 */
1102
1103 static int
1104 check_newline(uschar *p, FILE *f)
1105 {
1106 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1107 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1108 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1109 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1110 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1111 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1112 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1113 fprintf(f, "Unknown newline type at: <%s\n", p);
1114 return 0;
1115 }
1116
1117
1118
1119 /*************************************************
1120 * Usage function *
1121 *************************************************/
1122
1123 static void
1124 usage(void)
1125 {
1126 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1127 printf("Input and output default to stdin and stdout.\n");
1128 #ifdef SUPPORT_LIBREADLINE
1129 printf("If input is a terminal, readline() is used to read from it.\n");
1130 #else
1131 printf("This version of pcretest is not linked with readline().\n");
1132 #endif
1133 printf("\nOptions:\n");
1134 printf(" -b show compiled code (bytecode)\n");
1135 printf(" -C show PCRE compile-time options and exit\n");
1136 printf(" -d debug: show compiled code and information (-b and -i)\n");
1137 #if !defined NODFA
1138 printf(" -dfa force DFA matching for all subjects\n");
1139 #endif
1140 printf(" -help show usage information\n");
1141 printf(" -i show information about compiled patterns\n"
1142 " -M find MATCH_LIMIT minimum for each subject\n"
1143 " -m output memory used information\n"
1144 " -o <n> set size of offsets vector to <n>\n");
1145 #if !defined NOPOSIX
1146 printf(" -p use POSIX interface\n");
1147 #endif
1148 printf(" -q quiet: do not output PCRE version number at start\n");
1149 printf(" -S <n> set stack size to <n> megabytes\n");
1150 printf(" -s output store (memory) used information\n"
1151 " -t time compilation and execution\n");
1152 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1153 printf(" -tm time execution (matching) only\n");
1154 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1155 }
1156
1157
1158
1159 /*************************************************
1160 * Main Program *
1161 *************************************************/
1162
1163 /* Read lines from named file or stdin and write to named file or stdout; lines
1164 consist of a regular expression, in delimiters and optionally followed by
1165 options, followed by a set of test data, terminated by an empty line. */
1166
1167 int main(int argc, char **argv)
1168 {
1169 FILE *infile = stdin;
1170 int options = 0;
1171 int study_options = 0;
1172 int default_find_match_limit = FALSE;
1173 int op = 1;
1174 int timeit = 0;
1175 int timeitm = 0;
1176 int showinfo = 0;
1177 int showstore = 0;
1178 int quiet = 0;
1179 int size_offsets = 45;
1180 int size_offsets_max;
1181 int *offsets = NULL;
1182 #if !defined NOPOSIX
1183 int posix = 0;
1184 #endif
1185 int debug = 0;
1186 int done = 0;
1187 int all_use_dfa = 0;
1188 int yield = 0;
1189 int stack_size;
1190
1191 /* These vectors store, end-to-end, a list of captured substring names. Assume
1192 that 1024 is plenty long enough for the few names we'll be testing. */
1193
1194 uschar copynames[1024];
1195 uschar getnames[1024];
1196
1197 uschar *copynamesptr;
1198 uschar *getnamesptr;
1199
1200 /* Get buffers from malloc() so that Electric Fence will check their misuse
1201 when I am debugging. They grow automatically when very long lines are read. */
1202
1203 buffer = (unsigned char *)malloc(buffer_size);
1204 dbuffer = (unsigned char *)malloc(buffer_size);
1205 pbuffer = (unsigned char *)malloc(buffer_size);
1206
1207 /* The outfile variable is static so that new_malloc can use it. */
1208
1209 outfile = stdout;
1210
1211 /* The following _setmode() stuff is some Windows magic that tells its runtime
1212 library to translate CRLF into a single LF character. At least, that's what
1213 I've been told: never having used Windows I take this all on trust. Originally
1214 it set 0x8000, but then I was advised that _O_BINARY was better. */
1215
1216 #if defined(_WIN32) || defined(WIN32)
1217 _setmode( _fileno( stdout ), _O_BINARY );
1218 #endif
1219
1220 /* Scan options */
1221
1222 while (argc > 1 && argv[op][0] == '-')
1223 {
1224 unsigned char *endptr;
1225
1226 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1227 showstore = 1;
1228 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1229 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1230 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1231 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1232 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1233 #if !defined NODFA
1234 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1235 #endif
1236 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1237 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1238 *endptr == 0))
1239 {
1240 op++;
1241 argc--;
1242 }
1243 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1244 {
1245 int both = argv[op][2] == 0;
1246 int temp;
1247 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1248 *endptr == 0))
1249 {
1250 timeitm = temp;
1251 op++;
1252 argc--;
1253 }
1254 else timeitm = LOOPREPEAT;
1255 if (both) timeit = timeitm;
1256 }
1257 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1258 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1259 *endptr == 0))
1260 {
1261 #if defined(_WIN32) || defined(WIN32)
1262 printf("PCRE: -S not supported on this OS\n");
1263 exit(1);
1264 #else
1265 int rc;
1266 struct rlimit rlim;
1267 getrlimit(RLIMIT_STACK, &rlim);
1268 rlim.rlim_cur = stack_size * 1024 * 1024;
1269 rc = setrlimit(RLIMIT_STACK, &rlim);
1270 if (rc != 0)
1271 {
1272 printf("PCRE: setrlimit() failed with error %d\n", rc);
1273 exit(1);
1274 }
1275 op++;
1276 argc--;
1277 #endif
1278 }
1279 #if !defined NOPOSIX
1280 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1281 #endif
1282 else if (strcmp(argv[op], "-C") == 0)
1283 {
1284 int rc;
1285 unsigned long int lrc;
1286 printf("PCRE version %s\n", pcre_version());
1287 printf("Compiled with\n");
1288 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1289 printf(" %sUTF-8 support\n", rc? "" : "No ");
1290 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1291 printf(" %sUnicode properties support\n", rc? "" : "No ");
1292 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1293 /* Note that these values are always the ASCII values, even
1294 in EBCDIC environments. CR is 13 and NL is 10. */
1295 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1296 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1297 (rc == -2)? "ANYCRLF" :
1298 (rc == -1)? "ANY" : "???");
1299 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1300 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1301 "all Unicode newlines");
1302 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1303 printf(" Internal link size = %d\n", rc);
1304 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1305 printf(" POSIX malloc threshold = %d\n", rc);
1306 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1307 printf(" Default match limit = %ld\n", lrc);
1308 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1309 printf(" Default recursion depth limit = %ld\n", lrc);
1310 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1311 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1312 goto EXIT;
1313 }
1314 else if (strcmp(argv[op], "-help") == 0 ||
1315 strcmp(argv[op], "--help") == 0)
1316 {
1317 usage();
1318 goto EXIT;
1319 }
1320 else
1321 {
1322 printf("** Unknown or malformed option %s\n", argv[op]);
1323 usage();
1324 yield = 1;
1325 goto EXIT;
1326 }
1327 op++;
1328 argc--;
1329 }
1330
1331 /* Get the store for the offsets vector, and remember what it was */
1332
1333 size_offsets_max = size_offsets;
1334 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1335 if (offsets == NULL)
1336 {
1337 printf("** Failed to get %d bytes of memory for offsets vector\n",
1338 (int)(size_offsets_max * sizeof(int)));
1339 yield = 1;
1340 goto EXIT;
1341 }
1342
1343 /* Sort out the input and output files */
1344
1345 if (argc > 1)
1346 {
1347 infile = fopen(argv[op], INPUT_MODE);
1348 if (infile == NULL)
1349 {
1350 printf("** Failed to open %s\n", argv[op]);
1351 yield = 1;
1352 goto EXIT;
1353 }
1354 }
1355
1356 if (argc > 2)
1357 {
1358 outfile = fopen(argv[op+1], OUTPUT_MODE);
1359 if (outfile == NULL)
1360 {
1361 printf("** Failed to open %s\n", argv[op+1]);
1362 yield = 1;
1363 goto EXIT;
1364 }
1365 }
1366
1367 /* Set alternative malloc function */
1368
1369 pcre_malloc = new_malloc;
1370 pcre_free = new_free;
1371 pcre_stack_malloc = stack_malloc;
1372 pcre_stack_free = stack_free;
1373
1374 /* Heading line unless quiet, then prompt for first regex if stdin */
1375
1376 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1377
1378 /* Main loop */
1379
1380 while (!done)
1381 {
1382 pcre *re = NULL;
1383 pcre_extra *extra = NULL;
1384
1385 #if !defined NOPOSIX /* There are still compilers that require no indent */
1386 regex_t preg;
1387 int do_posix = 0;
1388 #endif
1389
1390 const char *error;
1391 unsigned char *markptr;
1392 unsigned char *p, *pp, *ppp;
1393 unsigned char *to_file = NULL;
1394 const unsigned char *tables = NULL;
1395 unsigned long int true_size, true_study_size = 0;
1396 size_t size, regex_gotten_store;
1397 int do_mark = 0;
1398 int do_study = 0;
1399 int do_debug = debug;
1400 int do_G = 0;
1401 int do_g = 0;
1402 int do_showinfo = showinfo;
1403 int do_showrest = 0;
1404 int do_flip = 0;
1405 int erroroffset, len, delimiter, poffset;
1406
1407 use_utf8 = 0;
1408 debug_lengths = 1;
1409
1410 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1411 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1412 fflush(outfile);
1413
1414 p = buffer;
1415 while (isspace(*p)) p++;
1416 if (*p == 0) continue;
1417
1418 /* See if the pattern is to be loaded pre-compiled from a file. */
1419
1420 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1421 {
1422 unsigned long int magic, get_options;
1423 uschar sbuf[8];
1424 FILE *f;
1425
1426 p++;
1427 pp = p + (int)strlen((char *)p);
1428 while (isspace(pp[-1])) pp--;
1429 *pp = 0;
1430
1431 f = fopen((char *)p, "rb");
1432 if (f == NULL)
1433 {
1434 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1435 continue;
1436 }
1437
1438 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1439
1440 true_size =
1441 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1442 true_study_size =
1443 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1444
1445 re = (real_pcre *)new_malloc(true_size);
1446 regex_gotten_store = gotten_store;
1447
1448 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1449
1450 magic = ((real_pcre *)re)->magic_number;
1451 if (magic != MAGIC_NUMBER)
1452 {
1453 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1454 {
1455 do_flip = 1;
1456 }
1457 else
1458 {
1459 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1460 fclose(f);
1461 continue;
1462 }
1463 }
1464
1465 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1466 do_flip? " (byte-inverted)" : "", p);
1467
1468 /* Need to know if UTF-8 for printing data strings */
1469
1470 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1471 use_utf8 = (get_options & PCRE_UTF8) != 0;
1472
1473 /* Now see if there is any following study data */
1474
1475 if (true_study_size != 0)
1476 {
1477 pcre_study_data *psd;
1478
1479 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1480 extra->flags = PCRE_EXTRA_STUDY_DATA;
1481
1482 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1483 extra->study_data = psd;
1484
1485 if (fread(psd, 1, true_study_size, f) != true_study_size)
1486 {
1487 FAIL_READ:
1488 fprintf(outfile, "Failed to read data from %s\n", p);
1489 if (extra != NULL) new_free(extra);
1490 if (re != NULL) new_free(re);
1491 fclose(f);
1492 continue;
1493 }
1494 fprintf(outfile, "Study data loaded from %s\n", p);
1495 do_study = 1; /* To get the data output if requested */
1496 }
1497 else fprintf(outfile, "No study data\n");
1498
1499 fclose(f);
1500 goto SHOW_INFO;
1501 }
1502
1503 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1504 the pattern; if is isn't complete, read more. */
1505
1506 delimiter = *p++;
1507
1508 if (isalnum(delimiter) || delimiter == '\\')
1509 {
1510 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1511 goto SKIP_DATA;
1512 }
1513
1514 pp = p;
1515 poffset = (int)(p - buffer);
1516
1517 for(;;)
1518 {
1519 while (*pp != 0)
1520 {
1521 if (*pp == '\\' && pp[1] != 0) pp++;
1522 else if (*pp == delimiter) break;
1523 pp++;
1524 }
1525 if (*pp != 0) break;
1526 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1527 {
1528 fprintf(outfile, "** Unexpected EOF\n");
1529 done = 1;
1530 goto CONTINUE;
1531 }
1532 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1533 }
1534
1535 /* The buffer may have moved while being extended; reset the start of data
1536 pointer to the correct relative point in the buffer. */
1537
1538 p = buffer + poffset;
1539
1540 /* If the first character after the delimiter is backslash, make
1541 the pattern end with backslash. This is purely to provide a way
1542 of testing for the error message when a pattern ends with backslash. */
1543
1544 if (pp[1] == '\\') *pp++ = '\\';
1545
1546 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1547 for callouts. */
1548
1549 *pp++ = 0;
1550 strcpy((char *)pbuffer, (char *)p);
1551
1552 /* Look for options after final delimiter */
1553
1554 options = 0;
1555 study_options = 0;
1556 log_store = showstore; /* default from command line */
1557
1558 while (*pp != 0)
1559 {
1560 switch (*pp++)
1561 {
1562 case 'f': options |= PCRE_FIRSTLINE; break;
1563 case 'g': do_g = 1; break;
1564 case 'i': options |= PCRE_CASELESS; break;
1565 case 'm': options |= PCRE_MULTILINE; break;
1566 case 's': options |= PCRE_DOTALL; break;
1567 case 'x': options |= PCRE_EXTENDED; break;
1568
1569 case '+': do_showrest = 1; break;
1570 case 'A': options |= PCRE_ANCHORED; break;
1571 case 'B': do_debug = 1; break;
1572 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1573 case 'D': do_debug = do_showinfo = 1; break;
1574 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1575 case 'F': do_flip = 1; break;
1576 case 'G': do_G = 1; break;
1577 case 'I': do_showinfo = 1; break;
1578 case 'J': options |= PCRE_DUPNAMES; break;
1579 case 'K': do_mark = 1; break;
1580 case 'M': log_store = 1; break;
1581 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1582
1583 #if !defined NOPOSIX
1584 case 'P': do_posix = 1; break;
1585 #endif
1586
1587 case 'S': do_study = 1; break;
1588 case 'U': options |= PCRE_UNGREEDY; break;
1589 case 'W': options |= PCRE_UCP; break;
1590 case 'X': options |= PCRE_EXTRA; break;
1591 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1592 case 'Z': debug_lengths = 0; break;
1593 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1594 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1595
1596 case 'T':
1597 switch (*pp++)
1598 {
1599 case '0': tables = tables0; break;
1600 case '1': tables = tables1; break;
1601
1602 case '\r':
1603 case '\n':
1604 case ' ':
1605 case 0:
1606 fprintf(outfile, "** Missing table number after /T\n");
1607 goto SKIP_DATA;
1608
1609 default:
1610 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1611 goto SKIP_DATA;
1612 }
1613 break;
1614
1615 case 'L':
1616 ppp = pp;
1617 /* The '\r' test here is so that it works on Windows. */
1618 /* The '0' test is just in case this is an unterminated line. */
1619 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1620 *ppp = 0;
1621 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1622 {
1623 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1624 goto SKIP_DATA;
1625 }
1626 locale_set = 1;
1627 tables = pcre_maketables();
1628 pp = ppp;
1629 break;
1630
1631 case '>':
1632 to_file = pp;
1633 while (*pp != 0) pp++;
1634 while (isspace(pp[-1])) pp--;
1635 *pp = 0;
1636 break;
1637
1638 case '<':
1639 {
1640 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1641 {
1642 options |= PCRE_JAVASCRIPT_COMPAT;
1643 pp += 3;
1644 }
1645 else
1646 {
1647 int x = check_newline(pp, outfile);
1648 if (x == 0) goto SKIP_DATA;
1649 options |= x;
1650 while (*pp++ != '>');
1651 }
1652 }
1653 break;
1654
1655 case '\r': /* So that it works in Windows */
1656 case '\n':
1657 case ' ':
1658 break;
1659
1660 default:
1661 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1662 goto SKIP_DATA;
1663 }
1664 }
1665
1666 /* Handle compiling via the POSIX interface, which doesn't support the
1667 timing, showing, or debugging options, nor the ability to pass over
1668 local character tables. */
1669
1670 #if !defined NOPOSIX
1671 if (posix || do_posix)
1672 {
1673 int rc;
1674 int cflags = 0;
1675
1676 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1677 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1678 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1679 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1680 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1681 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1682 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1683
1684 rc = regcomp(&preg, (char *)p, cflags);
1685
1686 /* Compilation failed; go back for another re, skipping to blank line
1687 if non-interactive. */
1688
1689 if (rc != 0)
1690 {
1691 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1692 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1693 goto SKIP_DATA;
1694 }
1695 }
1696
1697 /* Handle compiling via the native interface */
1698
1699 else
1700 #endif /* !defined NOPOSIX */
1701
1702 {
1703 unsigned long int get_options;
1704
1705 if (timeit > 0)
1706 {
1707 register int i;
1708 clock_t time_taken;
1709 clock_t start_time = clock();
1710 for (i = 0; i < timeit; i++)
1711 {
1712 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1713 if (re != NULL) free(re);
1714 }
1715 time_taken = clock() - start_time;
1716 fprintf(outfile, "Compile time %.4f milliseconds\n",
1717 (((double)time_taken * 1000.0) / (double)timeit) /
1718 (double)CLOCKS_PER_SEC);
1719 }
1720
1721 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1722
1723 /* Compilation failed; go back for another re, skipping to blank line
1724 if non-interactive. */
1725
1726 if (re == NULL)
1727 {
1728 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1729 SKIP_DATA:
1730 if (infile != stdin)
1731 {
1732 for (;;)
1733 {
1734 if (extend_inputline(infile, buffer, NULL) == NULL)
1735 {
1736 done = 1;
1737 goto CONTINUE;
1738 }
1739 len = (int)strlen((char *)buffer);
1740 while (len > 0 && isspace(buffer[len-1])) len--;
1741 if (len == 0) break;
1742 }
1743 fprintf(outfile, "\n");
1744 }
1745 goto CONTINUE;
1746 }
1747
1748 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1749 within the regex; check for this so that we know how to process the data
1750 lines. */
1751
1752 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1753 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1754
1755 /* Print information if required. There are now two info-returning
1756 functions. The old one has a limited interface and returns only limited
1757 data. Check that it agrees with the newer one. */
1758
1759 if (log_store)
1760 fprintf(outfile, "Memory allocation (code space): %d\n",
1761 (int)(gotten_store -
1762 sizeof(real_pcre) -
1763 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1764
1765 /* Extract the size for possible writing before possibly flipping it,
1766 and remember the store that was got. */
1767
1768 true_size = ((real_pcre *)re)->size;
1769 regex_gotten_store = gotten_store;
1770
1771 /* If /S was present, study the regexp to generate additional info to
1772 help with the matching. */
1773
1774 if (do_study)
1775 {
1776 if (timeit > 0)
1777 {
1778 register int i;
1779 clock_t time_taken;
1780 clock_t start_time = clock();
1781 for (i = 0; i < timeit; i++)
1782 extra = pcre_study(re, study_options, &error);
1783 time_taken = clock() - start_time;
1784 if (extra != NULL) free(extra);
1785 fprintf(outfile, " Study time %.4f milliseconds\n",
1786 (((double)time_taken * 1000.0) / (double)timeit) /
1787 (double)CLOCKS_PER_SEC);
1788 }
1789 extra = pcre_study(re, study_options, &error);
1790 if (error != NULL)
1791 fprintf(outfile, "Failed to study: %s\n", error);
1792 else if (extra != NULL)
1793 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1794 }
1795
1796 /* If /K was present, we set up for handling MARK data. */
1797
1798 if (do_mark)
1799 {
1800 if (extra == NULL)
1801 {
1802 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1803 extra->flags = 0;
1804 }
1805 extra->mark = &markptr;
1806 extra->flags |= PCRE_EXTRA_MARK;
1807 }
1808
1809 /* If the 'F' option was present, we flip the bytes of all the integer
1810 fields in the regex data block and the study block. This is to make it
1811 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1812 compiled on a different architecture. */
1813
1814 if (do_flip)
1815 {
1816 real_pcre *rre = (real_pcre *)re;
1817 rre->magic_number =
1818 byteflip(rre->magic_number, sizeof(rre->magic_number));
1819 rre->size = byteflip(rre->size, sizeof(rre->size));
1820 rre->options = byteflip(rre->options, sizeof(rre->options));
1821 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1822 rre->top_bracket =
1823 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1824 rre->top_backref =
1825 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1826 rre->first_byte =
1827 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1828 rre->req_byte =
1829 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1830 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1831 sizeof(rre->name_table_offset));
1832 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1833 sizeof(rre->name_entry_size));
1834 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1835 sizeof(rre->name_count));
1836
1837 if (extra != NULL)
1838 {
1839 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1840 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1841 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1842 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1843 }
1844 }
1845
1846 /* Extract information from the compiled data if required */
1847
1848 SHOW_INFO:
1849
1850 if (do_debug)
1851 {
1852 fprintf(outfile, "------------------------------------------------------------------\n");
1853 pcre_printint(re, outfile, debug_lengths);
1854 }
1855
1856 /* We already have the options in get_options (see above) */
1857
1858 if (do_showinfo)
1859 {
1860 unsigned long int all_options;
1861 #if !defined NOINFOCHECK
1862 int old_first_char, old_options, old_count;
1863 #endif
1864 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1865 hascrorlf;
1866 int nameentrysize, namecount;
1867 const uschar *nametable;
1868
1869 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1870 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1871 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1872 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1873 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1874 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1875 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1876 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1877 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1878 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1879 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1880
1881 #if !defined NOINFOCHECK
1882 old_count = pcre_info(re, &old_options, &old_first_char);
1883 if (count < 0) fprintf(outfile,
1884 "Error %d from pcre_info()\n", count);
1885 else
1886 {
1887 if (old_count != count) fprintf(outfile,
1888 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1889 old_count);
1890
1891 if (old_first_char != first_char) fprintf(outfile,
1892 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1893 first_char, old_first_char);
1894
1895 if (old_options != (int)get_options) fprintf(outfile,
1896 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1897 get_options, old_options);
1898 }
1899 #endif
1900
1901 if (size != regex_gotten_store) fprintf(outfile,
1902 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1903 (int)size, (int)regex_gotten_store);
1904
1905 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1906 if (backrefmax > 0)
1907 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1908
1909 if (namecount > 0)
1910 {
1911 fprintf(outfile, "Named capturing subpatterns:\n");
1912 while (namecount-- > 0)
1913 {
1914 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1915 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1916 GET2(nametable, 0));
1917 nametable += nameentrysize;
1918 }
1919 }
1920
1921 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1922 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1923
1924 all_options = ((real_pcre *)re)->options;
1925 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1926
1927 if (get_options == 0) fprintf(outfile, "No options\n");
1928 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1929 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1930 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1931 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1932 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1933 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1934 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1935 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1936 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1937 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1938 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1939 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1940 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1941 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1942 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1943 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1944 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1945 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1946
1947 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1948
1949 switch (get_options & PCRE_NEWLINE_BITS)
1950 {
1951 case PCRE_NEWLINE_CR:
1952 fprintf(outfile, "Forced newline sequence: CR\n");
1953 break;
1954
1955 case PCRE_NEWLINE_LF:
1956 fprintf(outfile, "Forced newline sequence: LF\n");
1957 break;
1958
1959 case PCRE_NEWLINE_CRLF:
1960 fprintf(outfile, "Forced newline sequence: CRLF\n");
1961 break;
1962
1963 case PCRE_NEWLINE_ANYCRLF:
1964 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1965 break;
1966
1967 case PCRE_NEWLINE_ANY:
1968 fprintf(outfile, "Forced newline sequence: ANY\n");
1969 break;
1970
1971 default:
1972 break;
1973 }
1974
1975 if (first_char == -1)
1976 {
1977 fprintf(outfile, "First char at start or follows newline\n");
1978 }
1979 else if (first_char < 0)
1980 {
1981 fprintf(outfile, "No first char\n");
1982 }
1983 else
1984 {
1985 int ch = first_char & 255;
1986 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1987 "" : " (caseless)";
1988 if (PRINTHEX(ch))
1989 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1990 else
1991 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1992 }
1993
1994 if (need_char < 0)
1995 {
1996 fprintf(outfile, "No need char\n");
1997 }
1998 else
1999 {
2000 int ch = need_char & 255;
2001 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2002 "" : " (caseless)";
2003 if (PRINTHEX(ch))
2004 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2005 else
2006 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2007 }
2008
2009 /* Don't output study size; at present it is in any case a fixed
2010 value, but it varies, depending on the computer architecture, and
2011 so messes up the test suite. (And with the /F option, it might be
2012 flipped.) */
2013
2014 if (do_study)
2015 {
2016 if (extra == NULL)
2017 fprintf(outfile, "Study returned NULL\n");
2018 else
2019 {
2020 uschar *start_bits = NULL;
2021 int minlength;
2022
2023 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2024 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2025
2026 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2027 if (start_bits == NULL)
2028 fprintf(outfile, "No set of starting bytes\n");
2029 else
2030 {
2031 int i;
2032 int c = 24;
2033 fprintf(outfile, "Starting byte set: ");
2034 for (i = 0; i < 256; i++)
2035 {
2036 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2037 {
2038 if (c > 75)
2039 {
2040 fprintf(outfile, "\n ");
2041 c = 2;
2042 }
2043 if (PRINTHEX(i) && i != ' ')
2044 {
2045 fprintf(outfile, "%c ", i);
2046 c += 2;
2047 }
2048 else
2049 {
2050 fprintf(outfile, "\\x%02x ", i);
2051 c += 5;
2052 }
2053 }
2054 }
2055 fprintf(outfile, "\n");
2056 }
2057 }
2058 }
2059 }
2060
2061 /* If the '>' option was present, we write out the regex to a file, and
2062 that is all. The first 8 bytes of the file are the regex length and then
2063 the study length, in big-endian order. */
2064
2065 if (to_file != NULL)
2066 {
2067 FILE *f = fopen((char *)to_file, "wb");
2068 if (f == NULL)
2069 {
2070 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2071 }
2072 else
2073 {
2074 uschar sbuf[8];
2075 sbuf[0] = (uschar)((true_size >> 24) & 255);
2076 sbuf[1] = (uschar)((true_size >> 16) & 255);
2077 sbuf[2] = (uschar)((true_size >> 8) & 255);
2078 sbuf[3] = (uschar)((true_size) & 255);
2079
2080 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2081 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2082 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2083 sbuf[7] = (uschar)((true_study_size) & 255);
2084
2085 if (fwrite(sbuf, 1, 8, f) < 8 ||
2086 fwrite(re, 1, true_size, f) < true_size)
2087 {
2088 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2089 }
2090 else
2091 {
2092 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2093 if (extra != NULL)
2094 {
2095 if (fwrite(extra->study_data, 1, true_study_size, f) <
2096 true_study_size)
2097 {
2098 fprintf(outfile, "Write error on %s: %s\n", to_file,
2099 strerror(errno));
2100 }
2101 else fprintf(outfile, "Study data written to %s\n", to_file);
2102
2103 }
2104 }
2105 fclose(f);
2106 }
2107
2108 new_free(re);
2109 if (extra != NULL) new_free(extra);
2110 if (locale_set)
2111 {
2112 new_free((void *)tables);
2113 setlocale(LC_CTYPE, "C");
2114 locale_set = 0;
2115 }
2116 continue; /* With next regex */
2117 }
2118 } /* End of non-POSIX compile */
2119
2120 /* Read data lines and test them */
2121
2122 for (;;)
2123 {
2124 uschar *q;
2125 uschar *bptr;
2126 int *use_offsets = offsets;
2127 int use_size_offsets = size_offsets;
2128 int callout_data = 0;
2129 int callout_data_set = 0;
2130 int count, c;
2131 int copystrings = 0;
2132 int find_match_limit = default_find_match_limit;
2133 int getstrings = 0;
2134 int getlist = 0;
2135 int gmatched = 0;
2136 int start_offset = 0;
2137 int start_offset_sign = 1;
2138 int g_notempty = 0;
2139 int use_dfa = 0;
2140
2141 options = 0;
2142
2143 *copynames = 0;
2144 *getnames = 0;
2145
2146 copynamesptr = copynames;
2147 getnamesptr = getnames;
2148
2149 pcre_callout = callout;
2150 first_callout = 1;
2151 callout_extra = 0;
2152 callout_count = 0;
2153 callout_fail_count = 999999;
2154 callout_fail_id = -1;
2155 show_malloc = 0;
2156
2157 if (extra != NULL) extra->flags &=
2158 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2159
2160 len = 0;
2161 for (;;)
2162 {
2163 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2164 {
2165 if (len > 0) /* Reached EOF without hitting a newline */
2166 {
2167 fprintf(outfile, "\n");
2168 break;
2169 }
2170 done = 1;
2171 goto CONTINUE;
2172 }
2173 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2174 len = (int)strlen((char *)buffer);
2175 if (buffer[len-1] == '\n') break;
2176 }
2177
2178 while (len > 0 && isspace(buffer[len-1])) len--;
2179 buffer[len] = 0;
2180 if (len == 0) break;
2181
2182 p = buffer;
2183 while (isspace(*p)) p++;
2184
2185 bptr = q = dbuffer;
2186 while ((c = *p++) != 0)
2187 {
2188 int i = 0;
2189 int n = 0;
2190
2191 if (c == '\\') switch ((c = *p++))
2192 {
2193 case 'a': c = 7; break;
2194 case 'b': c = '\b'; break;
2195 case 'e': c = 27; break;
2196 case 'f': c = '\f'; break;
2197 case 'n': c = '\n'; break;
2198 case 'r': c = '\r'; break;
2199 case 't': c = '\t'; break;
2200 case 'v': c = '\v'; break;
2201
2202 case '0': case '1': case '2': case '3':
2203 case '4': case '5': case '6': case '7':
2204 c -= '0';
2205 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2206 c = c * 8 + *p++ - '0';
2207
2208 #if !defined NOUTF8
2209 if (use_utf8 && c > 255)
2210 {
2211 unsigned char buff8[8];
2212 int ii, utn;
2213 utn = ord2utf8(c, buff8);
2214 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2215 c = buff8[ii]; /* Last byte */
2216 }
2217 #endif
2218 break;
2219
2220 case 'x':
2221
2222 /* Handle \x{..} specially - new Perl thing for utf8 */
2223
2224 #if !defined NOUTF8
2225 if (*p == '{')
2226 {
2227 unsigned char *pt = p;
2228 c = 0;
2229 while (isxdigit(*(++pt)))
2230 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2231 if (*pt == '}')
2232 {
2233 unsigned char buff8[8];
2234 int ii, utn;
2235 if (use_utf8)
2236 {
2237 utn = ord2utf8(c, buff8);
2238 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2239 c = buff8[ii]; /* Last byte */
2240 }
2241 else
2242 {
2243 if (c > 255)
2244 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2245 "UTF-8 mode is not enabled.\n"
2246 "** Truncation will probably give the wrong result.\n", c);
2247 }
2248 p = pt + 1;
2249 break;
2250 }
2251 /* Not correct form; fall through */
2252 }
2253 #endif
2254
2255 /* Ordinary \x */
2256
2257 c = 0;
2258 while (i++ < 2 && isxdigit(*p))
2259 {
2260 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2261 p++;
2262 }
2263 break;
2264
2265 case 0: /* \ followed by EOF allows for an empty line */
2266 p--;
2267 continue;
2268
2269 case '>':
2270 if (*p == '-')
2271 {
2272 start_offset_sign = -1;
2273 p++;
2274 }
2275 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2276 start_offset *= start_offset_sign;
2277 continue;
2278
2279 case 'A': /* Option setting */
2280 options |= PCRE_ANCHORED;
2281 continue;
2282
2283 case 'B':
2284 options |= PCRE_NOTBOL;
2285 continue;
2286
2287 case 'C':
2288 if (isdigit(*p)) /* Set copy string */
2289 {
2290 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2291 copystrings |= 1 << n;
2292 }
2293 else if (isalnum(*p))
2294 {
2295 uschar *npp = copynamesptr;
2296 while (isalnum(*p)) *npp++ = *p++;
2297 *npp++ = 0;
2298 *npp = 0;
2299 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2300 if (n < 0)
2301 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2302 copynamesptr = npp;
2303 }
2304 else if (*p == '+')
2305 {
2306 callout_extra = 1;
2307 p++;
2308 }
2309 else if (*p == '-')
2310 {
2311 pcre_callout = NULL;
2312 p++;
2313 }
2314 else if (*p == '!')
2315 {
2316 callout_fail_id = 0;
2317 p++;
2318 while(isdigit(*p))
2319 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2320 callout_fail_count = 0;
2321 if (*p == '!')
2322 {
2323 p++;
2324 while(isdigit(*p))
2325 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2326 }
2327 }
2328 else if (*p == '*')
2329 {
2330 int sign = 1;
2331 callout_data = 0;
2332 if (*(++p) == '-') { sign = -1; p++; }
2333 while(isdigit(*p))
2334 callout_data = callout_data * 10 + *p++ - '0';
2335 callout_data *= sign;
2336 callout_data_set = 1;
2337 }
2338 continue;
2339
2340 #if !defined NODFA
2341 case 'D':
2342 #if !defined NOPOSIX
2343 if (posix || do_posix)
2344 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2345 else
2346 #endif
2347 use_dfa = 1;
2348 continue;
2349 #endif
2350
2351 #if !defined NODFA
2352 case 'F':
2353 options |= PCRE_DFA_SHORTEST;
2354 continue;
2355 #endif
2356
2357 case 'G':
2358 if (isdigit(*p))
2359 {
2360 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2361 getstrings |= 1 << n;
2362 }
2363 else if (isalnum(*p))
2364 {
2365 uschar *npp = getnamesptr;
2366 while (isalnum(*p)) *npp++ = *p++;
2367 *npp++ = 0;
2368 *npp = 0;
2369 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2370 if (n < 0)
2371 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2372 getnamesptr = npp;
2373 }
2374 continue;
2375
2376 case 'L':
2377 getlist = 1;
2378 continue;
2379
2380 case 'M':
2381 find_match_limit = 1;
2382 continue;
2383
2384 case 'N':
2385 if ((options & PCRE_NOTEMPTY) != 0)
2386 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2387 else
2388 options |= PCRE_NOTEMPTY;
2389 continue;
2390
2391 case 'O':
2392 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2393 if (n > size_offsets_max)
2394 {
2395 size_offsets_max = n;
2396 free(offsets);
2397 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2398 if (offsets == NULL)
2399 {
2400 printf("** Failed to get %d bytes of memory for offsets vector\n",
2401 (int)(size_offsets_max * sizeof(int)));
2402 yield = 1;
2403 goto EXIT;
2404 }
2405 }
2406 use_size_offsets = n;
2407 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2408 continue;
2409
2410 case 'P':
2411 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2412 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2413 continue;
2414
2415 case 'Q':
2416 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2417 if (extra == NULL)
2418 {
2419 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2420 extra->flags = 0;
2421 }
2422 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2423 extra->match_limit_recursion = n;
2424 continue;
2425
2426 case 'q':
2427 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2428 if (extra == NULL)
2429 {
2430 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2431 extra->flags = 0;
2432 }
2433 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2434 extra->match_limit = n;
2435 continue;
2436
2437 #if !defined NODFA
2438 case 'R':
2439 options |= PCRE_DFA_RESTART;
2440 continue;
2441 #endif
2442
2443 case 'S':
2444 show_malloc = 1;
2445 continue;
2446
2447 case 'Y':
2448 options |= PCRE_NO_START_OPTIMIZE;
2449 continue;
2450
2451 case 'Z':
2452 options |= PCRE_NOTEOL;
2453 continue;
2454
2455 case '?':
2456 options |= PCRE_NO_UTF8_CHECK;
2457 continue;
2458
2459 case '<':
2460 {
2461 int x = check_newline(p, outfile);
2462 if (x == 0) goto NEXT_DATA;
2463 options |= x;
2464 while (*p++ != '>');
2465 }
2466 continue;
2467 }
2468 *q++ = c;
2469 }
2470 *q = 0;
2471 len = (int)(q - dbuffer);
2472
2473 /* Move the data to the end of the buffer so that a read over the end of
2474 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2475 we are using the POSIX interface, we must include the terminating zero. */
2476
2477 #if !defined NOPOSIX
2478 if (posix || do_posix)
2479 {
2480 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2481 bptr += buffer_size - len - 1;
2482 }
2483 else
2484 #endif
2485 {
2486 memmove(bptr + buffer_size - len, bptr, len);
2487 bptr += buffer_size - len;
2488 }
2489
2490 if ((all_use_dfa || use_dfa) && find_match_limit)
2491 {
2492 printf("**Match limit not relevant for DFA matching: ignored\n");
2493 find_match_limit = 0;
2494 }
2495
2496 /* Handle matching via the POSIX interface, which does not
2497 support timing or playing with the match limit or callout data. */
2498
2499 #if !defined NOPOSIX
2500 if (posix || do_posix)
2501 {
2502 int rc;
2503 int eflags = 0;
2504 regmatch_t *pmatch = NULL;
2505 if (use_size_offsets > 0)
2506 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2507 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2508 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2509 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2510
2511 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2512
2513 if (rc != 0)
2514 {
2515 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2516 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2517 }
2518 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2519 != 0)
2520 {
2521 fprintf(outfile, "Matched with REG_NOSUB\n");
2522 }
2523 else
2524 {
2525 size_t i;
2526 for (i = 0; i < (size_t)use_size_offsets; i++)
2527 {
2528 if (pmatch[i].rm_so >= 0)
2529 {
2530 fprintf(outfile, "%2d: ", (int)i);
2531 (void)pchars(dbuffer + pmatch[i].rm_so,
2532 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2533 fprintf(outfile, "\n");
2534 if (i == 0 && do_showrest)
2535 {
2536 fprintf(outfile, " 0+ ");
2537 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2538 outfile);
2539 fprintf(outfile, "\n");
2540 }
2541 }
2542 }
2543 }
2544 free(pmatch);
2545 }
2546
2547 /* Handle matching via the native interface - repeats for /g and /G */
2548
2549 else
2550 #endif /* !defined NOPOSIX */
2551
2552 for (;; gmatched++) /* Loop for /g or /G */
2553 {
2554 markptr = NULL;
2555
2556 if (timeitm > 0)
2557 {
2558 register int i;
2559 clock_t time_taken;
2560 clock_t start_time = clock();
2561
2562 #if !defined NODFA
2563 if (all_use_dfa || use_dfa)
2564 {
2565 int workspace[1000];
2566 for (i = 0; i < timeitm; i++)
2567 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2568 options | g_notempty, use_offsets, use_size_offsets, workspace,
2569 sizeof(workspace)/sizeof(int));
2570 }
2571 else
2572 #endif
2573
2574 for (i = 0; i < timeitm; i++)
2575 count = pcre_exec(re, extra, (char *)bptr, len,
2576 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2577
2578 time_taken = clock() - start_time;
2579 fprintf(outfile, "Execute time %.4f milliseconds\n",
2580 (((double)time_taken * 1000.0) / (double)timeitm) /
2581 (double)CLOCKS_PER_SEC);
2582 }
2583
2584 /* If find_match_limit is set, we want to do repeated matches with
2585 varying limits in order to find the minimum value for the match limit and
2586 for the recursion limit. */
2587
2588 if (find_match_limit)
2589 {
2590 if (extra == NULL)
2591 {
2592 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2593 extra->flags = 0;
2594 }
2595
2596 (void)check_match_limit(re, extra, bptr, len, start_offset,
2597 options|g_notempty, use_offsets, use_size_offsets,
2598 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2599 PCRE_ERROR_MATCHLIMIT, "match()");
2600
2601 count = check_match_limit(re, extra, bptr, len, start_offset,
2602 options|g_notempty, use_offsets, use_size_offsets,
2603 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2604 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2605 }
2606
2607 /* If callout_data is set, use the interface with additional data */
2608
2609 else if (callout_data_set)
2610 {
2611 if (extra == NULL)
2612 {
2613 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2614 extra->flags = 0;
2615 }
2616 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2617 extra->callout_data = &callout_data;
2618 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2619 options | g_notempty, use_offsets, use_size_offsets);
2620 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2621 }
2622
2623 /* The normal case is just to do the match once, with the default
2624 value of match_limit. */
2625
2626 #if !defined NODFA
2627 else if (all_use_dfa || use_dfa)
2628 {
2629 int workspace[1000];
2630 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2631 options | g_notempty, use_offsets, use_size_offsets, workspace,
2632 sizeof(workspace)/sizeof(int));
2633 if (count == 0)
2634 {
2635 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2636 count = use_size_offsets/2;
2637 }
2638 }
2639 #endif
2640
2641 else
2642 {
2643 count = pcre_exec(re, extra, (char *)bptr, len,
2644 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2645 if (count == 0)
2646 {
2647 fprintf(outfile, "Matched, but too many substrings\n");
2648 count = use_size_offsets/3;
2649 }
2650 }
2651
2652 /* Matched */
2653
2654 if (count >= 0)
2655 {
2656 int i, maxcount;
2657
2658 #if !defined NODFA
2659 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2660 #endif
2661 maxcount = use_size_offsets/3;
2662
2663 /* This is a check against a lunatic return value. */
2664
2665 if (count > maxcount)
2666 {
2667 fprintf(outfile,
2668 "** PCRE error: returned count %d is too big for offset size %d\n",
2669 count, use_size_offsets);
2670 count = use_size_offsets/3;
2671 if (do_g || do_G)
2672 {
2673 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2674 do_g = do_G = FALSE; /* Break g/G loop */
2675 }
2676 }
2677
2678 for (i = 0; i < count * 2; i += 2)
2679 {
2680 if (use_offsets[i] < 0)
2681 fprintf(outfile, "%2d: <unset>\n", i/2);
2682 else
2683 {
2684 fprintf(outfile, "%2d: ", i/2);
2685 (void)pchars(bptr + use_offsets[i],
2686 use_offsets[i+1] - use_offsets[i], outfile);
2687 fprintf(outfile, "\n");
2688 if (i == 0)
2689 {
2690 if (do_showrest)
2691 {
2692 fprintf(outfile, " 0+ ");
2693 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2694 outfile);
2695 fprintf(outfile, "\n");
2696 }
2697 }
2698 }
2699 }
2700
2701 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2702
2703 for (i = 0; i < 32; i++)
2704 {
2705 if ((copystrings & (1 << i)) != 0)
2706 {
2707 char copybuffer[256];
2708 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2709 i, copybuffer, sizeof(copybuffer));
2710 if (rc < 0)
2711 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2712 else
2713 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2714 }
2715 }
2716
2717 for (copynamesptr = copynames;
2718 *copynamesptr != 0;
2719 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2720 {
2721 char copybuffer[256];
2722 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2723 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2724 if (rc < 0)
2725 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2726 else
2727 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2728 }
2729
2730 for (i = 0; i < 32; i++)
2731 {
2732 if ((getstrings & (1 << i)) != 0)
2733 {
2734 const char *substring;
2735 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2736 i, &substring);
2737 if (rc < 0)
2738 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2739 else
2740 {
2741 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2742 pcre_free_substring(substring);
2743 }
2744 }
2745 }
2746
2747 for (getnamesptr = getnames;
2748 *getnamesptr != 0;
2749 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2750 {
2751 const char *substring;
2752 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2753 count, (char *)getnamesptr, &substring);
2754 if (rc < 0)
2755 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2756 else
2757 {
2758 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2759 pcre_free_substring(substring);
2760 }
2761 }
2762
2763 if (getlist)
2764 {
2765 const char **stringlist;
2766 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2767 &stringlist);
2768 if (rc < 0)
2769 fprintf(outfile, "get substring list failed %d\n", rc);
2770 else
2771 {
2772 for (i = 0; i < count; i++)
2773 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2774 if (stringlist[i] != NULL)
2775 fprintf(outfile, "string list not terminated by NULL\n");
2776 /* free((void *)stringlist); */
2777 pcre_free_substring_list(stringlist);
2778 }
2779 }
2780 }
2781
2782 /* There was a partial match */
2783
2784 else if (count == PCRE_ERROR_PARTIAL)
2785 {
2786 if (markptr == NULL) fprintf(outfile, "Partial match");
2787 else fprintf(outfile, "Partial match, mark=%s", markptr);
2788 if (use_size_offsets > 1)
2789 {
2790 fprintf(outfile, ": ");
2791 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2792 outfile);
2793 }
2794 fprintf(outfile, "\n");
2795 break; /* Out of the /g loop */
2796 }
2797
2798 /* Failed to match. If this is a /g or /G loop and we previously set
2799 g_notempty after a null match, this is not necessarily the end. We want
2800 to advance the start offset, and continue. We won't be at the end of the
2801 string - that was checked before setting g_notempty.
2802
2803 Complication arises in the case when the newline convention is "any",
2804 "crlf", or "anycrlf". If the previous match was at the end of a line
2805 terminated by CRLF, an advance of one character just passes the \r,
2806 whereas we should prefer the longer newline sequence, as does the code in
2807 pcre_exec(). Fudge the offset value to achieve this. We check for a
2808 newline setting in the pattern; if none was set, use pcre_config() to
2809 find the default.
2810
2811 Otherwise, in the case of UTF-8 matching, the advance must be one
2812 character, not one byte. */
2813
2814 else
2815 {
2816 if (g_notempty != 0)
2817 {
2818 int onechar = 1;
2819 unsigned int obits = ((real_pcre *)re)->options;
2820 use_offsets[0] = start_offset;
2821 if ((obits & PCRE_NEWLINE_BITS) == 0)
2822 {
2823 int d;
2824 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2825 /* Note that these values are always the ASCII ones, even in
2826 EBCDIC environments. CR = 13, NL = 10. */
2827 obits = (d == 13)? PCRE_NEWLINE_CR :
2828 (d == 10)? PCRE_NEWLINE_LF :
2829 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2830 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2831 (d == -1)? PCRE_NEWLINE_ANY : 0;
2832 }
2833 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2834 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2835 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2836 &&
2837 start_offset < len - 1 &&
2838 bptr[start_offset] == '\r' &&
2839 bptr[start_offset+1] == '\n')
2840 onechar++;
2841 else if (use_utf8)
2842 {
2843 while (start_offset + onechar < len)
2844 {
2845 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2846 onechar++;
2847 }
2848 }
2849 use_offsets[1] = start_offset + onechar;
2850 }
2851 else
2852 {
2853 if (count == PCRE_ERROR_NOMATCH)
2854 {
2855 if (gmatched == 0)
2856 {
2857 if (markptr == NULL) fprintf(outfile, "No match\n");
2858 else fprintf(outfile, "No match, mark = %s\n", markptr);
2859 }
2860 }
2861 else fprintf(outfile, "Error %d\n", count);
2862 break; /* Out of the /g loop */
2863 }
2864 }
2865
2866 /* If not /g or /G we are done */
2867
2868 if (!do_g && !do_G) break;
2869
2870 /* If we have matched an empty string, first check to see if we are at
2871 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2872 Perl's /g options does. This turns out to be rather cunning. First we set
2873 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2874 same point. If this fails (picked up above) we advance to the next
2875 character. */
2876
2877 g_notempty = 0;
2878
2879 if (use_offsets[0] == use_offsets[1])
2880 {
2881 if (use_offsets[0] == len) break;
2882 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2883 }
2884
2885 /* For /g, update the start offset, leaving the rest alone */
2886
2887 if (do_g) start_offset = use_offsets[1];
2888
2889 /* For /G, update the pointer and length */
2890
2891 else
2892 {
2893 bptr += use_offsets[1];
2894 len -= use_offsets[1];
2895 }
2896 } /* End of loop for /g and /G */
2897
2898 NEXT_DATA: continue;
2899 } /* End of loop for data lines */
2900
2901 CONTINUE:
2902
2903 #if !defined NOPOSIX
2904 if (posix || do_posix) regfree(&preg);
2905 #endif
2906
2907 if (re != NULL) new_free(re);
2908 if (extra != NULL) new_free(extra);
2909 if (locale_set)
2910 {
2911 new_free((void *)tables);
2912 setlocale(LC_CTYPE, "C");
2913 locale_set = 0;
2914 }
2915 }
2916
2917 if (infile == stdin) fprintf(outfile, "\n");
2918
2919 EXIT:
2920
2921 if (infile != NULL && infile != stdin) fclose(infile);
2922 if (outfile != NULL && outfile != stdout) fclose(outfile);
2923
2924 free(buffer);
2925 free(dbuffer);
2926 free(pbuffer);
2927 free(offsets);
2928
2929 return yield;
2930 }
2931
2932 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12