/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 553 - (show annotations) (download)
Fri Oct 22 15:57:50 2010 UTC (3 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 86492 byte(s)
Change the way PCRE_PARTIAL_HARD handles \z, \Z, \b, \B, and $.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 #else
83 #include <sys/time.h> /* These two includes are needed */
84 #include <sys/resource.h> /* for setrlimit(). */
85 #define INPUT_MODE "rb"
86 #define OUTPUT_MODE "wb"
87 #endif
88
89
90 /* We have to include pcre_internal.h because we need the internal info for
91 displaying the results of pcre_study() and we also need to know about the
92 internal macros, structures, and other internal data values; pcretest has
93 "inside information" compared to a program that strictly follows the PCRE API.
94
95 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97 appropriately for an application, not for building PCRE. */
98
99 #include "pcre.h"
100 #include "pcre_internal.h"
101
102 /* We need access to some of the data tables that PCRE uses. So as not to have
103 to keep two copies, we include the source file here, changing the names of the
104 external symbols to prevent clashes. */
105
106 #define _pcre_ucp_gentype ucp_gentype
107 #define _pcre_utf8_table1 utf8_table1
108 #define _pcre_utf8_table1_size utf8_table1_size
109 #define _pcre_utf8_table2 utf8_table2
110 #define _pcre_utf8_table3 utf8_table3
111 #define _pcre_utf8_table4 utf8_table4
112 #define _pcre_utt utt
113 #define _pcre_utt_size utt_size
114 #define _pcre_utt_names utt_names
115 #define _pcre_OP_lengths OP_lengths
116
117 #include "pcre_tables.c"
118
119 /* We also need the pcre_printint() function for printing out compiled
120 patterns. This function is in a separate file so that it can be included in
121 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 know which case is being compiled. */
123
124 #define COMPILING_PCRETEST
125 #include "pcre_printint.src"
126
127 /* The definition of the macro PRINTABLE, which determines whether to print an
128 output character as-is or as a hex value when showing compiled patterns, is
129 contained in the printint.src file. We uses it here also, in cases when the
130 locale has not been explicitly changed, so as to get consistent output from
131 systems that differ in their output from isprint() even in the "C" locale. */
132
133 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134
135 /* It is possible to compile this test program without including support for
136 testing the POSIX interface, though this is not available via the standard
137 Makefile. */
138
139 #if !defined NOPOSIX
140 #include "pcreposix.h"
141 #endif
142
143 /* It is also possible, for the benefit of the version currently imported into
144 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145 interface to the DFA matcher (NODFA), and without the doublecheck of the old
146 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147 UTF8 support if PCRE is built without it. */
148
149 #ifndef SUPPORT_UTF8
150 #ifndef NOUTF8
151 #define NOUTF8
152 #endif
153 #endif
154
155
156 /* Other parameters */
157
158 #ifndef CLOCKS_PER_SEC
159 #ifdef CLK_TCK
160 #define CLOCKS_PER_SEC CLK_TCK
161 #else
162 #define CLOCKS_PER_SEC 100
163 #endif
164 #endif
165
166 /* This is the default loop count for timing. */
167
168 #define LOOPREPEAT 500000
169
170 /* Static variables */
171
172 static FILE *outfile;
173 static int log_store = 0;
174 static int callout_count;
175 static int callout_extra;
176 static int callout_fail_count;
177 static int callout_fail_id;
178 static int debug_lengths;
179 static int first_callout;
180 static int locale_set = 0;
181 static int show_malloc;
182 static int use_utf8;
183 static size_t gotten_store;
184
185 /* The buffers grow automatically if very long input lines are encountered. */
186
187 static int buffer_size = 50000;
188 static uschar *buffer = NULL;
189 static uschar *dbuffer = NULL;
190 static uschar *pbuffer = NULL;
191
192
193 /*************************************************
194 * Alternate character tables *
195 *************************************************/
196
197 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198 using the default tables of the library. However, the T option can be used to
199 select alternate sets of tables, for different kinds of testing. Note also that
200 the L (locale) option also adjusts the tables. */
201
202 /* This is the set of tables distributed as default with PCRE. It recognizes
203 only ASCII characters. */
204
205 static const unsigned char tables0[] = {
206
207 /* This table is a lower casing table. */
208
209 0, 1, 2, 3, 4, 5, 6, 7,
210 8, 9, 10, 11, 12, 13, 14, 15,
211 16, 17, 18, 19, 20, 21, 22, 23,
212 24, 25, 26, 27, 28, 29, 30, 31,
213 32, 33, 34, 35, 36, 37, 38, 39,
214 40, 41, 42, 43, 44, 45, 46, 47,
215 48, 49, 50, 51, 52, 53, 54, 55,
216 56, 57, 58, 59, 60, 61, 62, 63,
217 64, 97, 98, 99,100,101,102,103,
218 104,105,106,107,108,109,110,111,
219 112,113,114,115,116,117,118,119,
220 120,121,122, 91, 92, 93, 94, 95,
221 96, 97, 98, 99,100,101,102,103,
222 104,105,106,107,108,109,110,111,
223 112,113,114,115,116,117,118,119,
224 120,121,122,123,124,125,126,127,
225 128,129,130,131,132,133,134,135,
226 136,137,138,139,140,141,142,143,
227 144,145,146,147,148,149,150,151,
228 152,153,154,155,156,157,158,159,
229 160,161,162,163,164,165,166,167,
230 168,169,170,171,172,173,174,175,
231 176,177,178,179,180,181,182,183,
232 184,185,186,187,188,189,190,191,
233 192,193,194,195,196,197,198,199,
234 200,201,202,203,204,205,206,207,
235 208,209,210,211,212,213,214,215,
236 216,217,218,219,220,221,222,223,
237 224,225,226,227,228,229,230,231,
238 232,233,234,235,236,237,238,239,
239 240,241,242,243,244,245,246,247,
240 248,249,250,251,252,253,254,255,
241
242 /* This table is a case flipping table. */
243
244 0, 1, 2, 3, 4, 5, 6, 7,
245 8, 9, 10, 11, 12, 13, 14, 15,
246 16, 17, 18, 19, 20, 21, 22, 23,
247 24, 25, 26, 27, 28, 29, 30, 31,
248 32, 33, 34, 35, 36, 37, 38, 39,
249 40, 41, 42, 43, 44, 45, 46, 47,
250 48, 49, 50, 51, 52, 53, 54, 55,
251 56, 57, 58, 59, 60, 61, 62, 63,
252 64, 97, 98, 99,100,101,102,103,
253 104,105,106,107,108,109,110,111,
254 112,113,114,115,116,117,118,119,
255 120,121,122, 91, 92, 93, 94, 95,
256 96, 65, 66, 67, 68, 69, 70, 71,
257 72, 73, 74, 75, 76, 77, 78, 79,
258 80, 81, 82, 83, 84, 85, 86, 87,
259 88, 89, 90,123,124,125,126,127,
260 128,129,130,131,132,133,134,135,
261 136,137,138,139,140,141,142,143,
262 144,145,146,147,148,149,150,151,
263 152,153,154,155,156,157,158,159,
264 160,161,162,163,164,165,166,167,
265 168,169,170,171,172,173,174,175,
266 176,177,178,179,180,181,182,183,
267 184,185,186,187,188,189,190,191,
268 192,193,194,195,196,197,198,199,
269 200,201,202,203,204,205,206,207,
270 208,209,210,211,212,213,214,215,
271 216,217,218,219,220,221,222,223,
272 224,225,226,227,228,229,230,231,
273 232,233,234,235,236,237,238,239,
274 240,241,242,243,244,245,246,247,
275 248,249,250,251,252,253,254,255,
276
277 /* This table contains bit maps for various character classes. Each map is 32
278 bytes long and the bits run from the least significant end of each byte. The
279 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280 graph, print, punct, and cntrl. Other classes are built from combinations. */
281
282 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286
287 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291
292 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296
297 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301
302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306
307 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311
312 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316
317 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321
322 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326
327 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331
332 /* This table identifies various classes of character by individual bits:
333 0x01 white space character
334 0x02 letter
335 0x04 decimal digit
336 0x08 hexadecimal digit
337 0x10 alphanumeric or '_'
338 0x80 regular expression metacharacter or binary zero
339 */
340
341 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
342 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
345 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
346 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
347 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
348 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
349 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
350 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
351 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
352 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
353 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
354 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
355 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
356 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
357 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373
374 /* This is a set of tables that came orginally from a Windows user. It seems to
375 be at least an approximation of ISO 8859. In particular, there are characters
376 greater than 128 that are marked as spaces, letters, etc. */
377
378 static const unsigned char tables1[] = {
379 0,1,2,3,4,5,6,7,
380 8,9,10,11,12,13,14,15,
381 16,17,18,19,20,21,22,23,
382 24,25,26,27,28,29,30,31,
383 32,33,34,35,36,37,38,39,
384 40,41,42,43,44,45,46,47,
385 48,49,50,51,52,53,54,55,
386 56,57,58,59,60,61,62,63,
387 64,97,98,99,100,101,102,103,
388 104,105,106,107,108,109,110,111,
389 112,113,114,115,116,117,118,119,
390 120,121,122,91,92,93,94,95,
391 96,97,98,99,100,101,102,103,
392 104,105,106,107,108,109,110,111,
393 112,113,114,115,116,117,118,119,
394 120,121,122,123,124,125,126,127,
395 128,129,130,131,132,133,134,135,
396 136,137,138,139,140,141,142,143,
397 144,145,146,147,148,149,150,151,
398 152,153,154,155,156,157,158,159,
399 160,161,162,163,164,165,166,167,
400 168,169,170,171,172,173,174,175,
401 176,177,178,179,180,181,182,183,
402 184,185,186,187,188,189,190,191,
403 224,225,226,227,228,229,230,231,
404 232,233,234,235,236,237,238,239,
405 240,241,242,243,244,245,246,215,
406 248,249,250,251,252,253,254,223,
407 224,225,226,227,228,229,230,231,
408 232,233,234,235,236,237,238,239,
409 240,241,242,243,244,245,246,247,
410 248,249,250,251,252,253,254,255,
411 0,1,2,3,4,5,6,7,
412 8,9,10,11,12,13,14,15,
413 16,17,18,19,20,21,22,23,
414 24,25,26,27,28,29,30,31,
415 32,33,34,35,36,37,38,39,
416 40,41,42,43,44,45,46,47,
417 48,49,50,51,52,53,54,55,
418 56,57,58,59,60,61,62,63,
419 64,97,98,99,100,101,102,103,
420 104,105,106,107,108,109,110,111,
421 112,113,114,115,116,117,118,119,
422 120,121,122,91,92,93,94,95,
423 96,65,66,67,68,69,70,71,
424 72,73,74,75,76,77,78,79,
425 80,81,82,83,84,85,86,87,
426 88,89,90,123,124,125,126,127,
427 128,129,130,131,132,133,134,135,
428 136,137,138,139,140,141,142,143,
429 144,145,146,147,148,149,150,151,
430 152,153,154,155,156,157,158,159,
431 160,161,162,163,164,165,166,167,
432 168,169,170,171,172,173,174,175,
433 176,177,178,179,180,181,182,183,
434 184,185,186,187,188,189,190,191,
435 224,225,226,227,228,229,230,231,
436 232,233,234,235,236,237,238,239,
437 240,241,242,243,244,245,246,215,
438 248,249,250,251,252,253,254,223,
439 192,193,194,195,196,197,198,199,
440 200,201,202,203,204,205,206,207,
441 208,209,210,211,212,213,214,247,
442 216,217,218,219,220,221,222,255,
443 0,62,0,0,1,0,0,0,
444 0,0,0,0,0,0,0,0,
445 32,0,0,0,1,0,0,0,
446 0,0,0,0,0,0,0,0,
447 0,0,0,0,0,0,255,3,
448 126,0,0,0,126,0,0,0,
449 0,0,0,0,0,0,0,0,
450 0,0,0,0,0,0,0,0,
451 0,0,0,0,0,0,255,3,
452 0,0,0,0,0,0,0,0,
453 0,0,0,0,0,0,12,2,
454 0,0,0,0,0,0,0,0,
455 0,0,0,0,0,0,0,0,
456 254,255,255,7,0,0,0,0,
457 0,0,0,0,0,0,0,0,
458 255,255,127,127,0,0,0,0,
459 0,0,0,0,0,0,0,0,
460 0,0,0,0,254,255,255,7,
461 0,0,0,0,0,4,32,4,
462 0,0,0,128,255,255,127,255,
463 0,0,0,0,0,0,255,3,
464 254,255,255,135,254,255,255,7,
465 0,0,0,0,0,4,44,6,
466 255,255,127,255,255,255,127,255,
467 0,0,0,0,254,255,255,255,
468 255,255,255,255,255,255,255,127,
469 0,0,0,0,254,255,255,255,
470 255,255,255,255,255,255,255,255,
471 0,2,0,0,255,255,255,255,
472 255,255,255,255,255,255,255,127,
473 0,0,0,0,255,255,255,255,
474 255,255,255,255,255,255,255,255,
475 0,0,0,0,254,255,0,252,
476 1,0,0,248,1,0,0,120,
477 0,0,0,0,254,255,255,255,
478 0,0,128,0,0,0,128,0,
479 255,255,255,255,0,0,0,0,
480 0,0,0,0,0,0,0,128,
481 255,255,255,255,0,0,0,0,
482 0,0,0,0,0,0,0,0,
483 128,0,0,0,0,0,0,0,
484 0,1,1,0,1,1,0,0,
485 0,0,0,0,0,0,0,0,
486 0,0,0,0,0,0,0,0,
487 1,0,0,0,128,0,0,0,
488 128,128,128,128,0,0,128,0,
489 28,28,28,28,28,28,28,28,
490 28,28,0,0,0,0,0,128,
491 0,26,26,26,26,26,26,18,
492 18,18,18,18,18,18,18,18,
493 18,18,18,18,18,18,18,18,
494 18,18,18,128,128,0,128,16,
495 0,26,26,26,26,26,26,18,
496 18,18,18,18,18,18,18,18,
497 18,18,18,18,18,18,18,18,
498 18,18,18,128,128,0,0,0,
499 0,0,0,0,0,1,0,0,
500 0,0,0,0,0,0,0,0,
501 0,0,0,0,0,0,0,0,
502 0,0,0,0,0,0,0,0,
503 1,0,0,0,0,0,0,0,
504 0,0,18,0,0,0,0,0,
505 0,0,20,20,0,18,0,0,
506 0,20,18,0,0,0,0,0,
507 18,18,18,18,18,18,18,18,
508 18,18,18,18,18,18,18,18,
509 18,18,18,18,18,18,18,0,
510 18,18,18,18,18,18,18,18,
511 18,18,18,18,18,18,18,18,
512 18,18,18,18,18,18,18,18,
513 18,18,18,18,18,18,18,0,
514 18,18,18,18,18,18,18,18
515 };
516
517
518
519 /*************************************************
520 * Read or extend an input line *
521 *************************************************/
522
523 /* Input lines are read into buffer, but both patterns and data lines can be
524 continued over multiple input lines. In addition, if the buffer fills up, we
525 want to automatically expand it so as to be able to handle extremely large
526 lines that are needed for certain stress tests. When the input buffer is
527 expanded, the other two buffers must also be expanded likewise, and the
528 contents of pbuffer, which are a copy of the input for callouts, must be
529 preserved (for when expansion happens for a data line). This is not the most
530 optimal way of handling this, but hey, this is just a test program!
531
532 Arguments:
533 f the file to read
534 start where in buffer to start (this *must* be within buffer)
535 prompt for stdin or readline()
536
537 Returns: pointer to the start of new data
538 could be a copy of start, or could be moved
539 NULL if no data read and EOF reached
540 */
541
542 static uschar *
543 extend_inputline(FILE *f, uschar *start, const char *prompt)
544 {
545 uschar *here = start;
546
547 for (;;)
548 {
549 int rlen = (int)(buffer_size - (here - buffer));
550
551 if (rlen > 1000)
552 {
553 int dlen;
554
555 /* If libreadline support is required, use readline() to read a line if the
556 input is a terminal. Note that readline() removes the trailing newline, so
557 we must put it back again, to be compatible with fgets(). */
558
559 #ifdef SUPPORT_LIBREADLINE
560 if (isatty(fileno(f)))
561 {
562 size_t len;
563 char *s = readline(prompt);
564 if (s == NULL) return (here == start)? NULL : start;
565 len = strlen(s);
566 if (len > 0) add_history(s);
567 if (len > rlen - 1) len = rlen - 1;
568 memcpy(here, s, len);
569 here[len] = '\n';
570 here[len+1] = 0;
571 free(s);
572 }
573 else
574 #endif
575
576 /* Read the next line by normal means, prompting if the file is stdin. */
577
578 {
579 if (f == stdin) printf("%s", prompt);
580 if (fgets((char *)here, rlen, f) == NULL)
581 return (here == start)? NULL : start;
582 }
583
584 dlen = (int)strlen((char *)here);
585 if (dlen > 0 && here[dlen - 1] == '\n') return start;
586 here += dlen;
587 }
588
589 else
590 {
591 int new_buffer_size = 2*buffer_size;
592 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
593 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
594 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
595
596 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
597 {
598 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
599 exit(1);
600 }
601
602 memcpy(new_buffer, buffer, buffer_size);
603 memcpy(new_pbuffer, pbuffer, buffer_size);
604
605 buffer_size = new_buffer_size;
606
607 start = new_buffer + (start - buffer);
608 here = new_buffer + (here - buffer);
609
610 free(buffer);
611 free(dbuffer);
612 free(pbuffer);
613
614 buffer = new_buffer;
615 dbuffer = new_dbuffer;
616 pbuffer = new_pbuffer;
617 }
618 }
619
620 return NULL; /* Control never gets here */
621 }
622
623
624
625
626
627
628
629 /*************************************************
630 * Read number from string *
631 *************************************************/
632
633 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
634 around with conditional compilation, just do the job by hand. It is only used
635 for unpicking arguments, so just keep it simple.
636
637 Arguments:
638 str string to be converted
639 endptr where to put the end pointer
640
641 Returns: the unsigned long
642 */
643
644 static int
645 get_value(unsigned char *str, unsigned char **endptr)
646 {
647 int result = 0;
648 while(*str != 0 && isspace(*str)) str++;
649 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
650 *endptr = str;
651 return(result);
652 }
653
654
655
656
657 /*************************************************
658 * Convert UTF-8 string to value *
659 *************************************************/
660
661 /* This function takes one or more bytes that represents a UTF-8 character,
662 and returns the value of the character.
663
664 Argument:
665 utf8bytes a pointer to the byte vector
666 vptr a pointer to an int to receive the value
667
668 Returns: > 0 => the number of bytes consumed
669 -6 to 0 => malformed UTF-8 character at offset = (-return)
670 */
671
672 #if !defined NOUTF8
673
674 static int
675 utf82ord(unsigned char *utf8bytes, int *vptr)
676 {
677 int c = *utf8bytes++;
678 int d = c;
679 int i, j, s;
680
681 for (i = -1; i < 6; i++) /* i is number of additional bytes */
682 {
683 if ((d & 0x80) == 0) break;
684 d <<= 1;
685 }
686
687 if (i == -1) { *vptr = c; return 1; } /* ascii character */
688 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
689
690 /* i now has a value in the range 1-5 */
691
692 s = 6*i;
693 d = (c & utf8_table3[i]) << s;
694
695 for (j = 0; j < i; j++)
696 {
697 c = *utf8bytes++;
698 if ((c & 0xc0) != 0x80) return -(j+1);
699 s -= 6;
700 d |= (c & 0x3f) << s;
701 }
702
703 /* Check that encoding was the correct unique one */
704
705 for (j = 0; j < utf8_table1_size; j++)
706 if (d <= utf8_table1[j]) break;
707 if (j != i) return -(i+1);
708
709 /* Valid value */
710
711 *vptr = d;
712 return i+1;
713 }
714
715 #endif
716
717
718
719 /*************************************************
720 * Convert character value to UTF-8 *
721 *************************************************/
722
723 /* This function takes an integer value in the range 0 - 0x7fffffff
724 and encodes it as a UTF-8 character in 0 to 6 bytes.
725
726 Arguments:
727 cvalue the character value
728 utf8bytes pointer to buffer for result - at least 6 bytes long
729
730 Returns: number of characters placed in the buffer
731 */
732
733 #if !defined NOUTF8
734
735 static int
736 ord2utf8(int cvalue, uschar *utf8bytes)
737 {
738 register int i, j;
739 for (i = 0; i < utf8_table1_size; i++)
740 if (cvalue <= utf8_table1[i]) break;
741 utf8bytes += i;
742 for (j = i; j > 0; j--)
743 {
744 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
745 cvalue >>= 6;
746 }
747 *utf8bytes = utf8_table2[i] | cvalue;
748 return i + 1;
749 }
750
751 #endif
752
753
754
755 /*************************************************
756 * Print character string *
757 *************************************************/
758
759 /* Character string printing function. Must handle UTF-8 strings in utf8
760 mode. Yields number of characters printed. If handed a NULL file, just counts
761 chars without printing. */
762
763 static int pchars(unsigned char *p, int length, FILE *f)
764 {
765 int c = 0;
766 int yield = 0;
767
768 while (length-- > 0)
769 {
770 #if !defined NOUTF8
771 if (use_utf8)
772 {
773 int rc = utf82ord(p, &c);
774
775 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
776 {
777 length -= rc - 1;
778 p += rc;
779 if (PRINTHEX(c))
780 {
781 if (f != NULL) fprintf(f, "%c", c);
782 yield++;
783 }
784 else
785 {
786 int n = 4;
787 if (f != NULL) fprintf(f, "\\x{%02x}", c);
788 yield += (n <= 0x000000ff)? 2 :
789 (n <= 0x00000fff)? 3 :
790 (n <= 0x0000ffff)? 4 :
791 (n <= 0x000fffff)? 5 : 6;
792 }
793 continue;
794 }
795 }
796 #endif
797
798 /* Not UTF-8, or malformed UTF-8 */
799
800 c = *p++;
801 if (PRINTHEX(c))
802 {
803 if (f != NULL) fprintf(f, "%c", c);
804 yield++;
805 }
806 else
807 {
808 if (f != NULL) fprintf(f, "\\x%02x", c);
809 yield += 4;
810 }
811 }
812
813 return yield;
814 }
815
816
817
818 /*************************************************
819 * Callout function *
820 *************************************************/
821
822 /* Called from PCRE as a result of the (?C) item. We print out where we are in
823 the match. Yield zero unless more callouts than the fail count, or the callout
824 data is not zero. */
825
826 static int callout(pcre_callout_block *cb)
827 {
828 FILE *f = (first_callout | callout_extra)? outfile : NULL;
829 int i, pre_start, post_start, subject_length;
830
831 if (callout_extra)
832 {
833 fprintf(f, "Callout %d: last capture = %d\n",
834 cb->callout_number, cb->capture_last);
835
836 for (i = 0; i < cb->capture_top * 2; i += 2)
837 {
838 if (cb->offset_vector[i] < 0)
839 fprintf(f, "%2d: <unset>\n", i/2);
840 else
841 {
842 fprintf(f, "%2d: ", i/2);
843 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
844 cb->offset_vector[i+1] - cb->offset_vector[i], f);
845 fprintf(f, "\n");
846 }
847 }
848 }
849
850 /* Re-print the subject in canonical form, the first time or if giving full
851 datails. On subsequent calls in the same match, we use pchars just to find the
852 printed lengths of the substrings. */
853
854 if (f != NULL) fprintf(f, "--->");
855
856 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
857 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
858 cb->current_position - cb->start_match, f);
859
860 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
861
862 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
863 cb->subject_length - cb->current_position, f);
864
865 if (f != NULL) fprintf(f, "\n");
866
867 /* Always print appropriate indicators, with callout number if not already
868 shown. For automatic callouts, show the pattern offset. */
869
870 if (cb->callout_number == 255)
871 {
872 fprintf(outfile, "%+3d ", cb->pattern_position);
873 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
874 }
875 else
876 {
877 if (callout_extra) fprintf(outfile, " ");
878 else fprintf(outfile, "%3d ", cb->callout_number);
879 }
880
881 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
882 fprintf(outfile, "^");
883
884 if (post_start > 0)
885 {
886 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
887 fprintf(outfile, "^");
888 }
889
890 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
891 fprintf(outfile, " ");
892
893 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
894 pbuffer + cb->pattern_position);
895
896 fprintf(outfile, "\n");
897 first_callout = 0;
898
899 if (cb->callout_data != NULL)
900 {
901 int callout_data = *((int *)(cb->callout_data));
902 if (callout_data != 0)
903 {
904 fprintf(outfile, "Callout data = %d\n", callout_data);
905 return callout_data;
906 }
907 }
908
909 return (cb->callout_number != callout_fail_id)? 0 :
910 (++callout_count >= callout_fail_count)? 1 : 0;
911 }
912
913
914 /*************************************************
915 * Local malloc functions *
916 *************************************************/
917
918 /* Alternative malloc function, to test functionality and show the size of the
919 compiled re. */
920
921 static void *new_malloc(size_t size)
922 {
923 void *block = malloc(size);
924 gotten_store = size;
925 if (show_malloc)
926 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
927 return block;
928 }
929
930 static void new_free(void *block)
931 {
932 if (show_malloc)
933 fprintf(outfile, "free %p\n", block);
934 free(block);
935 }
936
937
938 /* For recursion malloc/free, to test stacking calls */
939
940 static void *stack_malloc(size_t size)
941 {
942 void *block = malloc(size);
943 if (show_malloc)
944 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
945 return block;
946 }
947
948 static void stack_free(void *block)
949 {
950 if (show_malloc)
951 fprintf(outfile, "stack_free %p\n", block);
952 free(block);
953 }
954
955
956 /*************************************************
957 * Call pcre_fullinfo() *
958 *************************************************/
959
960 /* Get one piece of information from the pcre_fullinfo() function */
961
962 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
963 {
964 int rc;
965 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
966 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
967 }
968
969
970
971 /*************************************************
972 * Byte flipping function *
973 *************************************************/
974
975 static unsigned long int
976 byteflip(unsigned long int value, int n)
977 {
978 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
979 return ((value & 0x000000ff) << 24) |
980 ((value & 0x0000ff00) << 8) |
981 ((value & 0x00ff0000) >> 8) |
982 ((value & 0xff000000) >> 24);
983 }
984
985
986
987
988 /*************************************************
989 * Check match or recursion limit *
990 *************************************************/
991
992 static int
993 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
994 int start_offset, int options, int *use_offsets, int use_size_offsets,
995 int flag, unsigned long int *limit, int errnumber, const char *msg)
996 {
997 int count;
998 int min = 0;
999 int mid = 64;
1000 int max = -1;
1001
1002 extra->flags |= flag;
1003
1004 for (;;)
1005 {
1006 *limit = mid;
1007
1008 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1009 use_offsets, use_size_offsets);
1010
1011 if (count == errnumber)
1012 {
1013 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1014 min = mid;
1015 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1016 }
1017
1018 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1019 count == PCRE_ERROR_PARTIAL)
1020 {
1021 if (mid == min + 1)
1022 {
1023 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1024 break;
1025 }
1026 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1027 max = mid;
1028 mid = (min + mid)/2;
1029 }
1030 else break; /* Some other error */
1031 }
1032
1033 extra->flags &= ~flag;
1034 return count;
1035 }
1036
1037
1038
1039 /*************************************************
1040 * Case-independent strncmp() function *
1041 *************************************************/
1042
1043 /*
1044 Arguments:
1045 s first string
1046 t second string
1047 n number of characters to compare
1048
1049 Returns: < 0, = 0, or > 0, according to the comparison
1050 */
1051
1052 static int
1053 strncmpic(uschar *s, uschar *t, int n)
1054 {
1055 while (n--)
1056 {
1057 int c = tolower(*s++) - tolower(*t++);
1058 if (c) return c;
1059 }
1060 return 0;
1061 }
1062
1063
1064
1065 /*************************************************
1066 * Check newline indicator *
1067 *************************************************/
1068
1069 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1070 a message and return 0 if there is no match.
1071
1072 Arguments:
1073 p points after the leading '<'
1074 f file for error message
1075
1076 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1077 */
1078
1079 static int
1080 check_newline(uschar *p, FILE *f)
1081 {
1082 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1083 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1084 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1085 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1086 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1087 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1088 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1089 fprintf(f, "Unknown newline type at: <%s\n", p);
1090 return 0;
1091 }
1092
1093
1094
1095 /*************************************************
1096 * Usage function *
1097 *************************************************/
1098
1099 static void
1100 usage(void)
1101 {
1102 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1103 printf("Input and output default to stdin and stdout.\n");
1104 #ifdef SUPPORT_LIBREADLINE
1105 printf("If input is a terminal, readline() is used to read from it.\n");
1106 #else
1107 printf("This version of pcretest is not linked with readline().\n");
1108 #endif
1109 printf("\nOptions:\n");
1110 printf(" -b show compiled code (bytecode)\n");
1111 printf(" -C show PCRE compile-time options and exit\n");
1112 printf(" -d debug: show compiled code and information (-b and -i)\n");
1113 #if !defined NODFA
1114 printf(" -dfa force DFA matching for all subjects\n");
1115 #endif
1116 printf(" -help show usage information\n");
1117 printf(" -i show information about compiled patterns\n"
1118 " -M find MATCH_LIMIT minimum for each subject\n"
1119 " -m output memory used information\n"
1120 " -o <n> set size of offsets vector to <n>\n");
1121 #if !defined NOPOSIX
1122 printf(" -p use POSIX interface\n");
1123 #endif
1124 printf(" -q quiet: do not output PCRE version number at start\n");
1125 printf(" -S <n> set stack size to <n> megabytes\n");
1126 printf(" -s output store (memory) used information\n"
1127 " -t time compilation and execution\n");
1128 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1129 printf(" -tm time execution (matching) only\n");
1130 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1131 }
1132
1133
1134
1135 /*************************************************
1136 * Main Program *
1137 *************************************************/
1138
1139 /* Read lines from named file or stdin and write to named file or stdout; lines
1140 consist of a regular expression, in delimiters and optionally followed by
1141 options, followed by a set of test data, terminated by an empty line. */
1142
1143 int main(int argc, char **argv)
1144 {
1145 FILE *infile = stdin;
1146 int options = 0;
1147 int study_options = 0;
1148 int default_find_match_limit = FALSE;
1149 int op = 1;
1150 int timeit = 0;
1151 int timeitm = 0;
1152 int showinfo = 0;
1153 int showstore = 0;
1154 int quiet = 0;
1155 int size_offsets = 45;
1156 int size_offsets_max;
1157 int *offsets = NULL;
1158 #if !defined NOPOSIX
1159 int posix = 0;
1160 #endif
1161 int debug = 0;
1162 int done = 0;
1163 int all_use_dfa = 0;
1164 int yield = 0;
1165 int stack_size;
1166
1167 /* These vectors store, end-to-end, a list of captured substring names. Assume
1168 that 1024 is plenty long enough for the few names we'll be testing. */
1169
1170 uschar copynames[1024];
1171 uschar getnames[1024];
1172
1173 uschar *copynamesptr;
1174 uschar *getnamesptr;
1175
1176 /* Get buffers from malloc() so that Electric Fence will check their misuse
1177 when I am debugging. They grow automatically when very long lines are read. */
1178
1179 buffer = (unsigned char *)malloc(buffer_size);
1180 dbuffer = (unsigned char *)malloc(buffer_size);
1181 pbuffer = (unsigned char *)malloc(buffer_size);
1182
1183 /* The outfile variable is static so that new_malloc can use it. */
1184
1185 outfile = stdout;
1186
1187 /* The following _setmode() stuff is some Windows magic that tells its runtime
1188 library to translate CRLF into a single LF character. At least, that's what
1189 I've been told: never having used Windows I take this all on trust. Originally
1190 it set 0x8000, but then I was advised that _O_BINARY was better. */
1191
1192 #if defined(_WIN32) || defined(WIN32)
1193 _setmode( _fileno( stdout ), _O_BINARY );
1194 #endif
1195
1196 /* Scan options */
1197
1198 while (argc > 1 && argv[op][0] == '-')
1199 {
1200 unsigned char *endptr;
1201
1202 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1203 showstore = 1;
1204 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1205 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1206 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1207 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1208 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1209 #if !defined NODFA
1210 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1211 #endif
1212 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1213 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1214 *endptr == 0))
1215 {
1216 op++;
1217 argc--;
1218 }
1219 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1220 {
1221 int both = argv[op][2] == 0;
1222 int temp;
1223 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1224 *endptr == 0))
1225 {
1226 timeitm = temp;
1227 op++;
1228 argc--;
1229 }
1230 else timeitm = LOOPREPEAT;
1231 if (both) timeit = timeitm;
1232 }
1233 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1234 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1235 *endptr == 0))
1236 {
1237 #if defined(_WIN32) || defined(WIN32)
1238 printf("PCRE: -S not supported on this OS\n");
1239 exit(1);
1240 #else
1241 int rc;
1242 struct rlimit rlim;
1243 getrlimit(RLIMIT_STACK, &rlim);
1244 rlim.rlim_cur = stack_size * 1024 * 1024;
1245 rc = setrlimit(RLIMIT_STACK, &rlim);
1246 if (rc != 0)
1247 {
1248 printf("PCRE: setrlimit() failed with error %d\n", rc);
1249 exit(1);
1250 }
1251 op++;
1252 argc--;
1253 #endif
1254 }
1255 #if !defined NOPOSIX
1256 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1257 #endif
1258 else if (strcmp(argv[op], "-C") == 0)
1259 {
1260 int rc;
1261 unsigned long int lrc;
1262 printf("PCRE version %s\n", pcre_version());
1263 printf("Compiled with\n");
1264 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1265 printf(" %sUTF-8 support\n", rc? "" : "No ");
1266 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1267 printf(" %sUnicode properties support\n", rc? "" : "No ");
1268 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1269 /* Note that these values are always the ASCII values, even
1270 in EBCDIC environments. CR is 13 and NL is 10. */
1271 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1272 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1273 (rc == -2)? "ANYCRLF" :
1274 (rc == -1)? "ANY" : "???");
1275 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1276 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1277 "all Unicode newlines");
1278 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1279 printf(" Internal link size = %d\n", rc);
1280 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1281 printf(" POSIX malloc threshold = %d\n", rc);
1282 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1283 printf(" Default match limit = %ld\n", lrc);
1284 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1285 printf(" Default recursion depth limit = %ld\n", lrc);
1286 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1287 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1288 goto EXIT;
1289 }
1290 else if (strcmp(argv[op], "-help") == 0 ||
1291 strcmp(argv[op], "--help") == 0)
1292 {
1293 usage();
1294 goto EXIT;
1295 }
1296 else
1297 {
1298 printf("** Unknown or malformed option %s\n", argv[op]);
1299 usage();
1300 yield = 1;
1301 goto EXIT;
1302 }
1303 op++;
1304 argc--;
1305 }
1306
1307 /* Get the store for the offsets vector, and remember what it was */
1308
1309 size_offsets_max = size_offsets;
1310 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1311 if (offsets == NULL)
1312 {
1313 printf("** Failed to get %d bytes of memory for offsets vector\n",
1314 (int)(size_offsets_max * sizeof(int)));
1315 yield = 1;
1316 goto EXIT;
1317 }
1318
1319 /* Sort out the input and output files */
1320
1321 if (argc > 1)
1322 {
1323 infile = fopen(argv[op], INPUT_MODE);
1324 if (infile == NULL)
1325 {
1326 printf("** Failed to open %s\n", argv[op]);
1327 yield = 1;
1328 goto EXIT;
1329 }
1330 }
1331
1332 if (argc > 2)
1333 {
1334 outfile = fopen(argv[op+1], OUTPUT_MODE);
1335 if (outfile == NULL)
1336 {
1337 printf("** Failed to open %s\n", argv[op+1]);
1338 yield = 1;
1339 goto EXIT;
1340 }
1341 }
1342
1343 /* Set alternative malloc function */
1344
1345 pcre_malloc = new_malloc;
1346 pcre_free = new_free;
1347 pcre_stack_malloc = stack_malloc;
1348 pcre_stack_free = stack_free;
1349
1350 /* Heading line unless quiet, then prompt for first regex if stdin */
1351
1352 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1353
1354 /* Main loop */
1355
1356 while (!done)
1357 {
1358 pcre *re = NULL;
1359 pcre_extra *extra = NULL;
1360
1361 #if !defined NOPOSIX /* There are still compilers that require no indent */
1362 regex_t preg;
1363 int do_posix = 0;
1364 #endif
1365
1366 const char *error;
1367 unsigned char *markptr;
1368 unsigned char *p, *pp, *ppp;
1369 unsigned char *to_file = NULL;
1370 const unsigned char *tables = NULL;
1371 unsigned long int true_size, true_study_size = 0;
1372 size_t size, regex_gotten_store;
1373 int do_mark = 0;
1374 int do_study = 0;
1375 int do_debug = debug;
1376 int do_G = 0;
1377 int do_g = 0;
1378 int do_showinfo = showinfo;
1379 int do_showrest = 0;
1380 int do_flip = 0;
1381 int erroroffset, len, delimiter, poffset;
1382
1383 use_utf8 = 0;
1384 debug_lengths = 1;
1385
1386 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1387 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1388 fflush(outfile);
1389
1390 p = buffer;
1391 while (isspace(*p)) p++;
1392 if (*p == 0) continue;
1393
1394 /* See if the pattern is to be loaded pre-compiled from a file. */
1395
1396 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1397 {
1398 unsigned long int magic, get_options;
1399 uschar sbuf[8];
1400 FILE *f;
1401
1402 p++;
1403 pp = p + (int)strlen((char *)p);
1404 while (isspace(pp[-1])) pp--;
1405 *pp = 0;
1406
1407 f = fopen((char *)p, "rb");
1408 if (f == NULL)
1409 {
1410 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1411 continue;
1412 }
1413
1414 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1415
1416 true_size =
1417 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1418 true_study_size =
1419 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1420
1421 re = (real_pcre *)new_malloc(true_size);
1422 regex_gotten_store = gotten_store;
1423
1424 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1425
1426 magic = ((real_pcre *)re)->magic_number;
1427 if (magic != MAGIC_NUMBER)
1428 {
1429 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1430 {
1431 do_flip = 1;
1432 }
1433 else
1434 {
1435 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1436 fclose(f);
1437 continue;
1438 }
1439 }
1440
1441 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1442 do_flip? " (byte-inverted)" : "", p);
1443
1444 /* Need to know if UTF-8 for printing data strings */
1445
1446 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1447 use_utf8 = (get_options & PCRE_UTF8) != 0;
1448
1449 /* Now see if there is any following study data */
1450
1451 if (true_study_size != 0)
1452 {
1453 pcre_study_data *psd;
1454
1455 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1456 extra->flags = PCRE_EXTRA_STUDY_DATA;
1457
1458 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1459 extra->study_data = psd;
1460
1461 if (fread(psd, 1, true_study_size, f) != true_study_size)
1462 {
1463 FAIL_READ:
1464 fprintf(outfile, "Failed to read data from %s\n", p);
1465 if (extra != NULL) new_free(extra);
1466 if (re != NULL) new_free(re);
1467 fclose(f);
1468 continue;
1469 }
1470 fprintf(outfile, "Study data loaded from %s\n", p);
1471 do_study = 1; /* To get the data output if requested */
1472 }
1473 else fprintf(outfile, "No study data\n");
1474
1475 fclose(f);
1476 goto SHOW_INFO;
1477 }
1478
1479 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1480 the pattern; if is isn't complete, read more. */
1481
1482 delimiter = *p++;
1483
1484 if (isalnum(delimiter) || delimiter == '\\')
1485 {
1486 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1487 goto SKIP_DATA;
1488 }
1489
1490 pp = p;
1491 poffset = (int)(p - buffer);
1492
1493 for(;;)
1494 {
1495 while (*pp != 0)
1496 {
1497 if (*pp == '\\' && pp[1] != 0) pp++;
1498 else if (*pp == delimiter) break;
1499 pp++;
1500 }
1501 if (*pp != 0) break;
1502 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1503 {
1504 fprintf(outfile, "** Unexpected EOF\n");
1505 done = 1;
1506 goto CONTINUE;
1507 }
1508 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1509 }
1510
1511 /* The buffer may have moved while being extended; reset the start of data
1512 pointer to the correct relative point in the buffer. */
1513
1514 p = buffer + poffset;
1515
1516 /* If the first character after the delimiter is backslash, make
1517 the pattern end with backslash. This is purely to provide a way
1518 of testing for the error message when a pattern ends with backslash. */
1519
1520 if (pp[1] == '\\') *pp++ = '\\';
1521
1522 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1523 for callouts. */
1524
1525 *pp++ = 0;
1526 strcpy((char *)pbuffer, (char *)p);
1527
1528 /* Look for options after final delimiter */
1529
1530 options = 0;
1531 study_options = 0;
1532 log_store = showstore; /* default from command line */
1533
1534 while (*pp != 0)
1535 {
1536 switch (*pp++)
1537 {
1538 case 'f': options |= PCRE_FIRSTLINE; break;
1539 case 'g': do_g = 1; break;
1540 case 'i': options |= PCRE_CASELESS; break;
1541 case 'm': options |= PCRE_MULTILINE; break;
1542 case 's': options |= PCRE_DOTALL; break;
1543 case 'x': options |= PCRE_EXTENDED; break;
1544
1545 case '+': do_showrest = 1; break;
1546 case 'A': options |= PCRE_ANCHORED; break;
1547 case 'B': do_debug = 1; break;
1548 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1549 case 'D': do_debug = do_showinfo = 1; break;
1550 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1551 case 'F': do_flip = 1; break;
1552 case 'G': do_G = 1; break;
1553 case 'I': do_showinfo = 1; break;
1554 case 'J': options |= PCRE_DUPNAMES; break;
1555 case 'K': do_mark = 1; break;
1556 case 'M': log_store = 1; break;
1557 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1558
1559 #if !defined NOPOSIX
1560 case 'P': do_posix = 1; break;
1561 #endif
1562
1563 case 'S': do_study = 1; break;
1564 case 'U': options |= PCRE_UNGREEDY; break;
1565 case 'W': options |= PCRE_UCP; break;
1566 case 'X': options |= PCRE_EXTRA; break;
1567 case 'Z': debug_lengths = 0; break;
1568 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1569 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1570
1571 case 'T':
1572 switch (*pp++)
1573 {
1574 case '0': tables = tables0; break;
1575 case '1': tables = tables1; break;
1576
1577 case '\r':
1578 case '\n':
1579 case ' ':
1580 case 0:
1581 fprintf(outfile, "** Missing table number after /T\n");
1582 goto SKIP_DATA;
1583
1584 default:
1585 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1586 goto SKIP_DATA;
1587 }
1588 break;
1589
1590 case 'L':
1591 ppp = pp;
1592 /* The '\r' test here is so that it works on Windows. */
1593 /* The '0' test is just in case this is an unterminated line. */
1594 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1595 *ppp = 0;
1596 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1597 {
1598 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1599 goto SKIP_DATA;
1600 }
1601 locale_set = 1;
1602 tables = pcre_maketables();
1603 pp = ppp;
1604 break;
1605
1606 case '>':
1607 to_file = pp;
1608 while (*pp != 0) pp++;
1609 while (isspace(pp[-1])) pp--;
1610 *pp = 0;
1611 break;
1612
1613 case '<':
1614 {
1615 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1616 {
1617 options |= PCRE_JAVASCRIPT_COMPAT;
1618 pp += 3;
1619 }
1620 else
1621 {
1622 int x = check_newline(pp, outfile);
1623 if (x == 0) goto SKIP_DATA;
1624 options |= x;
1625 while (*pp++ != '>');
1626 }
1627 }
1628 break;
1629
1630 case '\r': /* So that it works in Windows */
1631 case '\n':
1632 case ' ':
1633 break;
1634
1635 default:
1636 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1637 goto SKIP_DATA;
1638 }
1639 }
1640
1641 /* Handle compiling via the POSIX interface, which doesn't support the
1642 timing, showing, or debugging options, nor the ability to pass over
1643 local character tables. */
1644
1645 #if !defined NOPOSIX
1646 if (posix || do_posix)
1647 {
1648 int rc;
1649 int cflags = 0;
1650
1651 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1652 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1653 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1654 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1655 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1656 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1657 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1658
1659 rc = regcomp(&preg, (char *)p, cflags);
1660
1661 /* Compilation failed; go back for another re, skipping to blank line
1662 if non-interactive. */
1663
1664 if (rc != 0)
1665 {
1666 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1667 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1668 goto SKIP_DATA;
1669 }
1670 }
1671
1672 /* Handle compiling via the native interface */
1673
1674 else
1675 #endif /* !defined NOPOSIX */
1676
1677 {
1678 unsigned long int get_options;
1679
1680 if (timeit > 0)
1681 {
1682 register int i;
1683 clock_t time_taken;
1684 clock_t start_time = clock();
1685 for (i = 0; i < timeit; i++)
1686 {
1687 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1688 if (re != NULL) free(re);
1689 }
1690 time_taken = clock() - start_time;
1691 fprintf(outfile, "Compile time %.4f milliseconds\n",
1692 (((double)time_taken * 1000.0) / (double)timeit) /
1693 (double)CLOCKS_PER_SEC);
1694 }
1695
1696 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1697
1698 /* Compilation failed; go back for another re, skipping to blank line
1699 if non-interactive. */
1700
1701 if (re == NULL)
1702 {
1703 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1704 SKIP_DATA:
1705 if (infile != stdin)
1706 {
1707 for (;;)
1708 {
1709 if (extend_inputline(infile, buffer, NULL) == NULL)
1710 {
1711 done = 1;
1712 goto CONTINUE;
1713 }
1714 len = (int)strlen((char *)buffer);
1715 while (len > 0 && isspace(buffer[len-1])) len--;
1716 if (len == 0) break;
1717 }
1718 fprintf(outfile, "\n");
1719 }
1720 goto CONTINUE;
1721 }
1722
1723 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1724 within the regex; check for this so that we know how to process the data
1725 lines. */
1726
1727 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1728 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1729
1730 /* Print information if required. There are now two info-returning
1731 functions. The old one has a limited interface and returns only limited
1732 data. Check that it agrees with the newer one. */
1733
1734 if (log_store)
1735 fprintf(outfile, "Memory allocation (code space): %d\n",
1736 (int)(gotten_store -
1737 sizeof(real_pcre) -
1738 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1739
1740 /* Extract the size for possible writing before possibly flipping it,
1741 and remember the store that was got. */
1742
1743 true_size = ((real_pcre *)re)->size;
1744 regex_gotten_store = gotten_store;
1745
1746 /* If /S was present, study the regexp to generate additional info to
1747 help with the matching. */
1748
1749 if (do_study)
1750 {
1751 if (timeit > 0)
1752 {
1753 register int i;
1754 clock_t time_taken;
1755 clock_t start_time = clock();
1756 for (i = 0; i < timeit; i++)
1757 extra = pcre_study(re, study_options, &error);
1758 time_taken = clock() - start_time;
1759 if (extra != NULL) free(extra);
1760 fprintf(outfile, " Study time %.4f milliseconds\n",
1761 (((double)time_taken * 1000.0) / (double)timeit) /
1762 (double)CLOCKS_PER_SEC);
1763 }
1764 extra = pcre_study(re, study_options, &error);
1765 if (error != NULL)
1766 fprintf(outfile, "Failed to study: %s\n", error);
1767 else if (extra != NULL)
1768 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1769 }
1770
1771 /* If /K was present, we set up for handling MARK data. */
1772
1773 if (do_mark)
1774 {
1775 if (extra == NULL)
1776 {
1777 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1778 extra->flags = 0;
1779 }
1780 extra->mark = &markptr;
1781 extra->flags |= PCRE_EXTRA_MARK;
1782 }
1783
1784 /* If the 'F' option was present, we flip the bytes of all the integer
1785 fields in the regex data block and the study block. This is to make it
1786 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1787 compiled on a different architecture. */
1788
1789 if (do_flip)
1790 {
1791 real_pcre *rre = (real_pcre *)re;
1792 rre->magic_number =
1793 byteflip(rre->magic_number, sizeof(rre->magic_number));
1794 rre->size = byteflip(rre->size, sizeof(rre->size));
1795 rre->options = byteflip(rre->options, sizeof(rre->options));
1796 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1797 rre->top_bracket =
1798 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1799 rre->top_backref =
1800 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1801 rre->first_byte =
1802 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1803 rre->req_byte =
1804 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1805 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1806 sizeof(rre->name_table_offset));
1807 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1808 sizeof(rre->name_entry_size));
1809 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1810 sizeof(rre->name_count));
1811
1812 if (extra != NULL)
1813 {
1814 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1815 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1816 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1817 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1818 }
1819 }
1820
1821 /* Extract information from the compiled data if required */
1822
1823 SHOW_INFO:
1824
1825 if (do_debug)
1826 {
1827 fprintf(outfile, "------------------------------------------------------------------\n");
1828 pcre_printint(re, outfile, debug_lengths);
1829 }
1830
1831 /* We already have the options in get_options (see above) */
1832
1833 if (do_showinfo)
1834 {
1835 unsigned long int all_options;
1836 #if !defined NOINFOCHECK
1837 int old_first_char, old_options, old_count;
1838 #endif
1839 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1840 hascrorlf;
1841 int nameentrysize, namecount;
1842 const uschar *nametable;
1843
1844 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1845 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1846 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1847 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1848 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1849 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1850 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1851 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1852 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1853 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1854 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1855
1856 #if !defined NOINFOCHECK
1857 old_count = pcre_info(re, &old_options, &old_first_char);
1858 if (count < 0) fprintf(outfile,
1859 "Error %d from pcre_info()\n", count);
1860 else
1861 {
1862 if (old_count != count) fprintf(outfile,
1863 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1864 old_count);
1865
1866 if (old_first_char != first_char) fprintf(outfile,
1867 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1868 first_char, old_first_char);
1869
1870 if (old_options != (int)get_options) fprintf(outfile,
1871 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1872 get_options, old_options);
1873 }
1874 #endif
1875
1876 if (size != regex_gotten_store) fprintf(outfile,
1877 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1878 (int)size, (int)regex_gotten_store);
1879
1880 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1881 if (backrefmax > 0)
1882 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1883
1884 if (namecount > 0)
1885 {
1886 fprintf(outfile, "Named capturing subpatterns:\n");
1887 while (namecount-- > 0)
1888 {
1889 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1890 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1891 GET2(nametable, 0));
1892 nametable += nameentrysize;
1893 }
1894 }
1895
1896 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1897 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1898
1899 all_options = ((real_pcre *)re)->options;
1900 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1901
1902 if (get_options == 0) fprintf(outfile, "No options\n");
1903 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1904 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1905 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1906 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1907 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1908 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1909 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1910 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1911 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1912 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1913 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1914 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1915 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1916 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1917 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1918 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1919 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1920
1921 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1922
1923 switch (get_options & PCRE_NEWLINE_BITS)
1924 {
1925 case PCRE_NEWLINE_CR:
1926 fprintf(outfile, "Forced newline sequence: CR\n");
1927 break;
1928
1929 case PCRE_NEWLINE_LF:
1930 fprintf(outfile, "Forced newline sequence: LF\n");
1931 break;
1932
1933 case PCRE_NEWLINE_CRLF:
1934 fprintf(outfile, "Forced newline sequence: CRLF\n");
1935 break;
1936
1937 case PCRE_NEWLINE_ANYCRLF:
1938 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1939 break;
1940
1941 case PCRE_NEWLINE_ANY:
1942 fprintf(outfile, "Forced newline sequence: ANY\n");
1943 break;
1944
1945 default:
1946 break;
1947 }
1948
1949 if (first_char == -1)
1950 {
1951 fprintf(outfile, "First char at start or follows newline\n");
1952 }
1953 else if (first_char < 0)
1954 {
1955 fprintf(outfile, "No first char\n");
1956 }
1957 else
1958 {
1959 int ch = first_char & 255;
1960 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1961 "" : " (caseless)";
1962 if (PRINTHEX(ch))
1963 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1964 else
1965 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1966 }
1967
1968 if (need_char < 0)
1969 {
1970 fprintf(outfile, "No need char\n");
1971 }
1972 else
1973 {
1974 int ch = need_char & 255;
1975 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1976 "" : " (caseless)";
1977 if (PRINTHEX(ch))
1978 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1979 else
1980 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1981 }
1982
1983 /* Don't output study size; at present it is in any case a fixed
1984 value, but it varies, depending on the computer architecture, and
1985 so messes up the test suite. (And with the /F option, it might be
1986 flipped.) */
1987
1988 if (do_study)
1989 {
1990 if (extra == NULL)
1991 fprintf(outfile, "Study returned NULL\n");
1992 else
1993 {
1994 uschar *start_bits = NULL;
1995 int minlength;
1996
1997 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1998 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1999
2000 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2001 if (start_bits == NULL)
2002 fprintf(outfile, "No set of starting bytes\n");
2003 else
2004 {
2005 int i;
2006 int c = 24;
2007 fprintf(outfile, "Starting byte set: ");
2008 for (i = 0; i < 256; i++)
2009 {
2010 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2011 {
2012 if (c > 75)
2013 {
2014 fprintf(outfile, "\n ");
2015 c = 2;
2016 }
2017 if (PRINTHEX(i) && i != ' ')
2018 {
2019 fprintf(outfile, "%c ", i);
2020 c += 2;
2021 }
2022 else
2023 {
2024 fprintf(outfile, "\\x%02x ", i);
2025 c += 5;
2026 }
2027 }
2028 }
2029 fprintf(outfile, "\n");
2030 }
2031 }
2032 }
2033 }
2034
2035 /* If the '>' option was present, we write out the regex to a file, and
2036 that is all. The first 8 bytes of the file are the regex length and then
2037 the study length, in big-endian order. */
2038
2039 if (to_file != NULL)
2040 {
2041 FILE *f = fopen((char *)to_file, "wb");
2042 if (f == NULL)
2043 {
2044 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2045 }
2046 else
2047 {
2048 uschar sbuf[8];
2049 sbuf[0] = (uschar)((true_size >> 24) & 255);
2050 sbuf[1] = (uschar)((true_size >> 16) & 255);
2051 sbuf[2] = (uschar)((true_size >> 8) & 255);
2052 sbuf[3] = (uschar)((true_size) & 255);
2053
2054 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2055 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2056 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2057 sbuf[7] = (uschar)((true_study_size) & 255);
2058
2059 if (fwrite(sbuf, 1, 8, f) < 8 ||
2060 fwrite(re, 1, true_size, f) < true_size)
2061 {
2062 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2063 }
2064 else
2065 {
2066 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2067 if (extra != NULL)
2068 {
2069 if (fwrite(extra->study_data, 1, true_study_size, f) <
2070 true_study_size)
2071 {
2072 fprintf(outfile, "Write error on %s: %s\n", to_file,
2073 strerror(errno));
2074 }
2075 else fprintf(outfile, "Study data written to %s\n", to_file);
2076
2077 }
2078 }
2079 fclose(f);
2080 }
2081
2082 new_free(re);
2083 if (extra != NULL) new_free(extra);
2084 if (locale_set)
2085 {
2086 new_free((void *)tables);
2087 setlocale(LC_CTYPE, "C");
2088 locale_set = 0;
2089 }
2090 continue; /* With next regex */
2091 }
2092 } /* End of non-POSIX compile */
2093
2094 /* Read data lines and test them */
2095
2096 for (;;)
2097 {
2098 uschar *q;
2099 uschar *bptr;
2100 int *use_offsets = offsets;
2101 int use_size_offsets = size_offsets;
2102 int callout_data = 0;
2103 int callout_data_set = 0;
2104 int count, c;
2105 int copystrings = 0;
2106 int find_match_limit = default_find_match_limit;
2107 int getstrings = 0;
2108 int getlist = 0;
2109 int gmatched = 0;
2110 int start_offset = 0;
2111 int g_notempty = 0;
2112 int use_dfa = 0;
2113
2114 options = 0;
2115
2116 *copynames = 0;
2117 *getnames = 0;
2118
2119 copynamesptr = copynames;
2120 getnamesptr = getnames;
2121
2122 pcre_callout = callout;
2123 first_callout = 1;
2124 callout_extra = 0;
2125 callout_count = 0;
2126 callout_fail_count = 999999;
2127 callout_fail_id = -1;
2128 show_malloc = 0;
2129
2130 if (extra != NULL) extra->flags &=
2131 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2132
2133 len = 0;
2134 for (;;)
2135 {
2136 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2137 {
2138 if (len > 0) /* Reached EOF without hitting a newline */
2139 {
2140 fprintf(outfile, "\n");
2141 break;
2142 }
2143 done = 1;
2144 goto CONTINUE;
2145 }
2146 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2147 len = (int)strlen((char *)buffer);
2148 if (buffer[len-1] == '\n') break;
2149 }
2150
2151 while (len > 0 && isspace(buffer[len-1])) len--;
2152 buffer[len] = 0;
2153 if (len == 0) break;
2154
2155 p = buffer;
2156 while (isspace(*p)) p++;
2157
2158 bptr = q = dbuffer;
2159 while ((c = *p++) != 0)
2160 {
2161 int i = 0;
2162 int n = 0;
2163
2164 if (c == '\\') switch ((c = *p++))
2165 {
2166 case 'a': c = 7; break;
2167 case 'b': c = '\b'; break;
2168 case 'e': c = 27; break;
2169 case 'f': c = '\f'; break;
2170 case 'n': c = '\n'; break;
2171 case 'r': c = '\r'; break;
2172 case 't': c = '\t'; break;
2173 case 'v': c = '\v'; break;
2174
2175 case '0': case '1': case '2': case '3':
2176 case '4': case '5': case '6': case '7':
2177 c -= '0';
2178 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2179 c = c * 8 + *p++ - '0';
2180
2181 #if !defined NOUTF8
2182 if (use_utf8 && c > 255)
2183 {
2184 unsigned char buff8[8];
2185 int ii, utn;
2186 utn = ord2utf8(c, buff8);
2187 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2188 c = buff8[ii]; /* Last byte */
2189 }
2190 #endif
2191 break;
2192
2193 case 'x':
2194
2195 /* Handle \x{..} specially - new Perl thing for utf8 */
2196
2197 #if !defined NOUTF8
2198 if (*p == '{')
2199 {
2200 unsigned char *pt = p;
2201 c = 0;
2202 while (isxdigit(*(++pt)))
2203 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2204 if (*pt == '}')
2205 {
2206 unsigned char buff8[8];
2207 int ii, utn;
2208 if (use_utf8)
2209 {
2210 utn = ord2utf8(c, buff8);
2211 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2212 c = buff8[ii]; /* Last byte */
2213 }
2214 else
2215 {
2216 if (c > 255)
2217 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2218 "UTF-8 mode is not enabled.\n"
2219 "** Truncation will probably give the wrong result.\n", c);
2220 }
2221 p = pt + 1;
2222 break;
2223 }
2224 /* Not correct form; fall through */
2225 }
2226 #endif
2227
2228 /* Ordinary \x */
2229
2230 c = 0;
2231 while (i++ < 2 && isxdigit(*p))
2232 {
2233 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2234 p++;
2235 }
2236 break;
2237
2238 case 0: /* \ followed by EOF allows for an empty line */
2239 p--;
2240 continue;
2241
2242 case '>':
2243 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2244 continue;
2245
2246 case 'A': /* Option setting */
2247 options |= PCRE_ANCHORED;
2248 continue;
2249
2250 case 'B':
2251 options |= PCRE_NOTBOL;
2252 continue;
2253
2254 case 'C':
2255 if (isdigit(*p)) /* Set copy string */
2256 {
2257 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2258 copystrings |= 1 << n;
2259 }
2260 else if (isalnum(*p))
2261 {
2262 uschar *npp = copynamesptr;
2263 while (isalnum(*p)) *npp++ = *p++;
2264 *npp++ = 0;
2265 *npp = 0;
2266 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2267 if (n < 0)
2268 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2269 copynamesptr = npp;
2270 }
2271 else if (*p == '+')
2272 {
2273 callout_extra = 1;
2274 p++;
2275 }
2276 else if (*p == '-')
2277 {
2278 pcre_callout = NULL;
2279 p++;
2280 }
2281 else if (*p == '!')
2282 {
2283 callout_fail_id = 0;
2284 p++;
2285 while(isdigit(*p))
2286 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2287 callout_fail_count = 0;
2288 if (*p == '!')
2289 {
2290 p++;
2291 while(isdigit(*p))
2292 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2293 }
2294 }
2295 else if (*p == '*')
2296 {
2297 int sign = 1;
2298 callout_data = 0;
2299 if (*(++p) == '-') { sign = -1; p++; }
2300 while(isdigit(*p))
2301 callout_data = callout_data * 10 + *p++ - '0';
2302 callout_data *= sign;
2303 callout_data_set = 1;
2304 }
2305 continue;
2306
2307 #if !defined NODFA
2308 case 'D':
2309 #if !defined NOPOSIX
2310 if (posix || do_posix)
2311 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2312 else
2313 #endif
2314 use_dfa = 1;
2315 continue;
2316 #endif
2317
2318 #if !defined NODFA
2319 case 'F':
2320 options |= PCRE_DFA_SHORTEST;
2321 continue;
2322 #endif
2323
2324 case 'G':
2325 if (isdigit(*p))
2326 {
2327 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2328 getstrings |= 1 << n;
2329 }
2330 else if (isalnum(*p))
2331 {
2332 uschar *npp = getnamesptr;
2333 while (isalnum(*p)) *npp++ = *p++;
2334 *npp++ = 0;
2335 *npp = 0;
2336 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2337 if (n < 0)
2338 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2339 getnamesptr = npp;
2340 }
2341 continue;
2342
2343 case 'L':
2344 getlist = 1;
2345 continue;
2346
2347 case 'M':
2348 find_match_limit = 1;
2349 continue;
2350
2351 case 'N':
2352 if ((options & PCRE_NOTEMPTY) != 0)
2353 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2354 else
2355 options |= PCRE_NOTEMPTY;
2356 continue;
2357
2358 case 'O':
2359 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2360 if (n > size_offsets_max)
2361 {
2362 size_offsets_max = n;
2363 free(offsets);
2364 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2365 if (offsets == NULL)
2366 {
2367 printf("** Failed to get %d bytes of memory for offsets vector\n",
2368 (int)(size_offsets_max * sizeof(int)));
2369 yield = 1;
2370 goto EXIT;
2371 }
2372 }
2373 use_size_offsets = n;
2374 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2375 continue;
2376
2377 case 'P':
2378 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2379 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2380 continue;
2381
2382 case 'Q':
2383 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2384 if (extra == NULL)
2385 {
2386 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2387 extra->flags = 0;
2388 }
2389 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2390 extra->match_limit_recursion = n;
2391 continue;
2392
2393 case 'q':
2394 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2395 if (extra == NULL)
2396 {
2397 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2398 extra->flags = 0;
2399 }
2400 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2401 extra->match_limit = n;
2402 continue;
2403
2404 #if !defined NODFA
2405 case 'R':
2406 options |= PCRE_DFA_RESTART;
2407 continue;
2408 #endif
2409
2410 case 'S':
2411 show_malloc = 1;
2412 continue;
2413
2414 case 'Y':
2415 options |= PCRE_NO_START_OPTIMIZE;
2416 continue;
2417
2418 case 'Z':
2419 options |= PCRE_NOTEOL;
2420 continue;
2421
2422 case '?':
2423 options |= PCRE_NO_UTF8_CHECK;
2424 continue;
2425
2426 case '<':
2427 {
2428 int x = check_newline(p, outfile);
2429 if (x == 0) goto NEXT_DATA;
2430 options |= x;
2431 while (*p++ != '>');
2432 }
2433 continue;
2434 }
2435 *q++ = c;
2436 }
2437 *q = 0;
2438 len = (int)(q - dbuffer);
2439
2440 /* Move the data to the end of the buffer so that a read over the end of
2441 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2442 we are using the POSIX interface, we must include the terminating zero. */
2443
2444 #if !defined NOPOSIX
2445 if (posix || do_posix)
2446 {
2447 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2448 bptr += buffer_size - len - 1;
2449 }
2450 else
2451 #endif
2452 {
2453 memmove(bptr + buffer_size - len, bptr, len);
2454 bptr += buffer_size - len;
2455 }
2456
2457 if ((all_use_dfa || use_dfa) && find_match_limit)
2458 {
2459 printf("**Match limit not relevant for DFA matching: ignored\n");
2460 find_match_limit = 0;
2461 }
2462
2463 /* Handle matching via the POSIX interface, which does not
2464 support timing or playing with the match limit or callout data. */
2465
2466 #if !defined NOPOSIX
2467 if (posix || do_posix)
2468 {
2469 int rc;
2470 int eflags = 0;
2471 regmatch_t *pmatch = NULL;
2472 if (use_size_offsets > 0)
2473 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2474 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2475 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2476 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2477
2478 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2479
2480 if (rc != 0)
2481 {
2482 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2483 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2484 }
2485 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2486 != 0)
2487 {
2488 fprintf(outfile, "Matched with REG_NOSUB\n");
2489 }
2490 else
2491 {
2492 size_t i;
2493 for (i = 0; i < (size_t)use_size_offsets; i++)
2494 {
2495 if (pmatch[i].rm_so >= 0)
2496 {
2497 fprintf(outfile, "%2d: ", (int)i);
2498 (void)pchars(dbuffer + pmatch[i].rm_so,
2499 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2500 fprintf(outfile, "\n");
2501 if (i == 0 && do_showrest)
2502 {
2503 fprintf(outfile, " 0+ ");
2504 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2505 outfile);
2506 fprintf(outfile, "\n");
2507 }
2508 }
2509 }
2510 }
2511 free(pmatch);
2512 }
2513
2514 /* Handle matching via the native interface - repeats for /g and /G */
2515
2516 else
2517 #endif /* !defined NOPOSIX */
2518
2519 for (;; gmatched++) /* Loop for /g or /G */
2520 {
2521 markptr = NULL;
2522
2523 if (timeitm > 0)
2524 {
2525 register int i;
2526 clock_t time_taken;
2527 clock_t start_time = clock();
2528
2529 #if !defined NODFA
2530 if (all_use_dfa || use_dfa)
2531 {
2532 int workspace[1000];
2533 for (i = 0; i < timeitm; i++)
2534 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2535 options | g_notempty, use_offsets, use_size_offsets, workspace,
2536 sizeof(workspace)/sizeof(int));
2537 }
2538 else
2539 #endif
2540
2541 for (i = 0; i < timeitm; i++)
2542 count = pcre_exec(re, extra, (char *)bptr, len,
2543 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2544
2545 time_taken = clock() - start_time;
2546 fprintf(outfile, "Execute time %.4f milliseconds\n",
2547 (((double)time_taken * 1000.0) / (double)timeitm) /
2548 (double)CLOCKS_PER_SEC);
2549 }
2550
2551 /* If find_match_limit is set, we want to do repeated matches with
2552 varying limits in order to find the minimum value for the match limit and
2553 for the recursion limit. */
2554
2555 if (find_match_limit)
2556 {
2557 if (extra == NULL)
2558 {
2559 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2560 extra->flags = 0;
2561 }
2562
2563 (void)check_match_limit(re, extra, bptr, len, start_offset,
2564 options|g_notempty, use_offsets, use_size_offsets,
2565 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2566 PCRE_ERROR_MATCHLIMIT, "match()");
2567
2568 count = check_match_limit(re, extra, bptr, len, start_offset,
2569 options|g_notempty, use_offsets, use_size_offsets,
2570 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2571 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2572 }
2573
2574 /* If callout_data is set, use the interface with additional data */
2575
2576 else if (callout_data_set)
2577 {
2578 if (extra == NULL)
2579 {
2580 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2581 extra->flags = 0;
2582 }
2583 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2584 extra->callout_data = &callout_data;
2585 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2586 options | g_notempty, use_offsets, use_size_offsets);
2587 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2588 }
2589
2590 /* The normal case is just to do the match once, with the default
2591 value of match_limit. */
2592
2593 #if !defined NODFA
2594 else if (all_use_dfa || use_dfa)
2595 {
2596 int workspace[1000];
2597 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2598 options | g_notempty, use_offsets, use_size_offsets, workspace,
2599 sizeof(workspace)/sizeof(int));
2600 if (count == 0)
2601 {
2602 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2603 count = use_size_offsets/2;
2604 }
2605 }
2606 #endif
2607
2608 else
2609 {
2610 count = pcre_exec(re, extra, (char *)bptr, len,
2611 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2612 if (count == 0)
2613 {
2614 fprintf(outfile, "Matched, but too many substrings\n");
2615 count = use_size_offsets/3;
2616 }
2617 }
2618
2619 /* Matched */
2620
2621 if (count >= 0)
2622 {
2623 int i, maxcount;
2624
2625 #if !defined NODFA
2626 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2627 #endif
2628 maxcount = use_size_offsets/3;
2629
2630 /* This is a check against a lunatic return value. */
2631
2632 if (count > maxcount)
2633 {
2634 fprintf(outfile,
2635 "** PCRE error: returned count %d is too big for offset size %d\n",
2636 count, use_size_offsets);
2637 count = use_size_offsets/3;
2638 if (do_g || do_G)
2639 {
2640 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2641 do_g = do_G = FALSE; /* Break g/G loop */
2642 }
2643 }
2644
2645 for (i = 0; i < count * 2; i += 2)
2646 {
2647 if (use_offsets[i] < 0)
2648 fprintf(outfile, "%2d: <unset>\n", i/2);
2649 else
2650 {
2651 fprintf(outfile, "%2d: ", i/2);
2652 (void)pchars(bptr + use_offsets[i],
2653 use_offsets[i+1] - use_offsets[i], outfile);
2654 fprintf(outfile, "\n");
2655 if (i == 0)
2656 {
2657 if (do_showrest)
2658 {
2659 fprintf(outfile, " 0+ ");
2660 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2661 outfile);
2662 fprintf(outfile, "\n");
2663 }
2664 }
2665 }
2666 }
2667
2668 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2669
2670 for (i = 0; i < 32; i++)
2671 {
2672 if ((copystrings & (1 << i)) != 0)
2673 {
2674 char copybuffer[256];
2675 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2676 i, copybuffer, sizeof(copybuffer));
2677 if (rc < 0)
2678 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2679 else
2680 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2681 }
2682 }
2683
2684 for (copynamesptr = copynames;
2685 *copynamesptr != 0;
2686 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2687 {
2688 char copybuffer[256];
2689 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2690 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2691 if (rc < 0)
2692 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2693 else
2694 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2695 }
2696
2697 for (i = 0; i < 32; i++)
2698 {
2699 if ((getstrings & (1 << i)) != 0)
2700 {
2701 const char *substring;
2702 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2703 i, &substring);
2704 if (rc < 0)
2705 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2706 else
2707 {
2708 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2709 pcre_free_substring(substring);
2710 }
2711 }
2712 }
2713
2714 for (getnamesptr = getnames;
2715 *getnamesptr != 0;
2716 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2717 {
2718 const char *substring;
2719 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2720 count, (char *)getnamesptr, &substring);
2721 if (rc < 0)
2722 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2723 else
2724 {
2725 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2726 pcre_free_substring(substring);
2727 }
2728 }
2729
2730 if (getlist)
2731 {
2732 const char **stringlist;
2733 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2734 &stringlist);
2735 if (rc < 0)
2736 fprintf(outfile, "get substring list failed %d\n", rc);
2737 else
2738 {
2739 for (i = 0; i < count; i++)
2740 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2741 if (stringlist[i] != NULL)
2742 fprintf(outfile, "string list not terminated by NULL\n");
2743 /* free((void *)stringlist); */
2744 pcre_free_substring_list(stringlist);
2745 }
2746 }
2747 }
2748
2749 /* There was a partial match */
2750
2751 else if (count == PCRE_ERROR_PARTIAL)
2752 {
2753 if (markptr == NULL) fprintf(outfile, "Partial match");
2754 else fprintf(outfile, "Partial match, mark=%s", markptr);
2755 if (use_size_offsets > 1)
2756 {
2757 fprintf(outfile, ": ");
2758 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2759 outfile);
2760 }
2761 fprintf(outfile, "\n");
2762 break; /* Out of the /g loop */
2763 }
2764
2765 /* Failed to match. If this is a /g or /G loop and we previously set
2766 g_notempty after a null match, this is not necessarily the end. We want
2767 to advance the start offset, and continue. We won't be at the end of the
2768 string - that was checked before setting g_notempty.
2769
2770 Complication arises in the case when the newline option is "any" or
2771 "anycrlf". If the previous match was at the end of a line terminated by
2772 CRLF, an advance of one character just passes the \r, whereas we should
2773 prefer the longer newline sequence, as does the code in pcre_exec().
2774 Fudge the offset value to achieve this.
2775
2776 Otherwise, in the case of UTF-8 matching, the advance must be one
2777 character, not one byte. */
2778
2779 else
2780 {
2781 if (g_notempty != 0)
2782 {
2783 int onechar = 1;
2784 unsigned int obits = ((real_pcre *)re)->options;
2785 use_offsets[0] = start_offset;
2786 if ((obits & PCRE_NEWLINE_BITS) == 0)
2787 {
2788 int d;
2789 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2790 /* Note that these values are always the ASCII ones, even in
2791 EBCDIC environments. CR = 13, NL = 10. */
2792 obits = (d == 13)? PCRE_NEWLINE_CR :
2793 (d == 10)? PCRE_NEWLINE_LF :
2794 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2795 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2796 (d == -1)? PCRE_NEWLINE_ANY : 0;
2797 }
2798 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2799 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2800 &&
2801 start_offset < len - 1 &&
2802 bptr[start_offset] == '\r' &&
2803 bptr[start_offset+1] == '\n')
2804 onechar++;
2805 else if (use_utf8)
2806 {
2807 while (start_offset + onechar < len)
2808 {
2809 int tb = bptr[start_offset+onechar];
2810 if (tb <= 127) break;
2811 tb &= 0xc0;
2812 if (tb != 0 && tb != 0xc0) onechar++;
2813 }
2814 }
2815 use_offsets[1] = start_offset + onechar;
2816 }
2817 else
2818 {
2819 if (count == PCRE_ERROR_NOMATCH)
2820 {
2821 if (gmatched == 0)
2822 {
2823 if (markptr == NULL) fprintf(outfile, "No match\n");
2824 else fprintf(outfile, "No match, mark = %s\n", markptr);
2825 }
2826 }
2827 else fprintf(outfile, "Error %d\n", count);
2828 break; /* Out of the /g loop */
2829 }
2830 }
2831
2832 /* If not /g or /G we are done */
2833
2834 if (!do_g && !do_G) break;
2835
2836 /* If we have matched an empty string, first check to see if we are at
2837 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2838 Perl's /g options does. This turns out to be rather cunning. First we set
2839 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2840 same point. If this fails (picked up above) we advance to the next
2841 character. */
2842
2843 g_notempty = 0;
2844
2845 if (use_offsets[0] == use_offsets[1])
2846 {
2847 if (use_offsets[0] == len) break;
2848 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2849 }
2850
2851 /* For /g, update the start offset, leaving the rest alone */
2852
2853 if (do_g) start_offset = use_offsets[1];
2854
2855 /* For /G, update the pointer and length */
2856
2857 else
2858 {
2859 bptr += use_offsets[1];
2860 len -= use_offsets[1];
2861 }
2862 } /* End of loop for /g and /G */
2863
2864 NEXT_DATA: continue;
2865 } /* End of loop for data lines */
2866
2867 CONTINUE:
2868
2869 #if !defined NOPOSIX
2870 if (posix || do_posix) regfree(&preg);
2871 #endif
2872
2873 if (re != NULL) new_free(re);
2874 if (extra != NULL) new_free(extra);
2875 if (locale_set)
2876 {
2877 new_free((void *)tables);
2878 setlocale(LC_CTYPE, "C");
2879 locale_set = 0;
2880 }
2881 }
2882
2883 if (infile == stdin) fprintf(outfile, "\n");
2884
2885 EXIT:
2886
2887 if (infile != NULL && infile != stdin) fclose(infile);
2888 if (outfile != NULL && outfile != stdout) fclose(outfile);
2889
2890 free(buffer);
2891 free(dbuffer);
2892 free(pbuffer);
2893 free(offsets);
2894
2895 return yield;
2896 }
2897
2898 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12