/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 808 - (show annotations) (download)
Sun Dec 18 11:11:48 2011 UTC (2 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 102938 byte(s)
Commit progress so far on pcretest (runs test 1 on interpreter).

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA), and without the doublecheck
166 of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167 out the UTF8 support if PCRE is built without it. */
168
169 #ifndef SUPPORT_UTF8
170 #ifndef NOUTF8
171 #define NOUTF8
172 #endif
173 #endif
174
175 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177 only from one place and is handled differently). I couldn't dream up any way of
178 using a single macro to do this in a generic way, because of the many different
179 argument requirements. We know that at least one of SUPPORT_PCRE8 and
180 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181 use these in the definitions of generic macros. */
182
183 #ifdef SUPPORT_PCRE8
184 #define PCHARS8(lv, p, len, f) \
185 lv = pchars((pcre_uint8 *)p, len, f)
186
187 #define PCHARSV8(p, len, f) \
188 (void)pchars((pcre_uint8 *)p, len, f)
189
190 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191 re = pcre_compile((char *)pat, options, error, erroffset, tables)
192
193 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194 offsets, size_offsets) \
195 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196 offsets, size_offsets)
197
198 #define PCRE_STUDY8(extra, re, options, error) \
199 extra = pcre_study(re, options, error)
200 #endif
201
202
203 #ifdef SUPPORT_PCRE16
204 #define PCHARS16(lv, p, len, f) \
205 lv = pchars16((PCRE_SPTR16)p, len, f)
206
207 #define PCHARSV16(p, len, f) \
208 (void)pchars16((PCRE_SPTR16)p, len, f)
209
210 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
211 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
212
213 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
214 offsets, size_offsets) \
215 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
216 options, offsets, size_offsets)
217
218 #define PCRE_STUDY16(extra, re, options, error) \
219 extra = pcre16_study(re, options, error)
220 #endif
221
222
223 /* ----- Both modes are supported; a runtime test is needed ----- */
224
225 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
226
227 #define PCHARS(lv, p, len, f) \
228 if (use_pcre16) \
229 PCHARS16(lv, p, len, f); \
230 else \
231 PCHARS8(lv, p, len, f)
232
233 #define PCHARSV(p, len, f) \
234 if (use_pcre16) \
235 PCHARSV16(p, len, f); \
236 else \
237 PCHARSV8(p, len, f)
238
239 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
240 if (use_pcre16) \
241 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
242 else \
243 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
244
245 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
246 offsets, size_offsets) \
247 if (use_pcre16) \
248 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
249 offsets, size_offsets); \
250 else \
251 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252 offsets, size_offsets)
253
254 #define PCRE_STUDY(extra, re, options, error) \
255 if (use_pcre16) \
256 PCRE_STUDY16(extra, re, options, error); \
257 else \
258 PCRE_STUDY8(extra, re, options, error)
259
260 /* ----- Only 8-bit mode is supported ----- */
261
262 #elif defined SUPPORT_PCRE8
263 #define PCHARS PCHARS8
264 #define PCHARSV PCHARSV8
265 #define PCRE_COMPILE PCRE_COMPILE8
266 #define PCRE_EXEC PCRE_EXEC8
267 #define PCRE_STUDY PCRE_STUDY8
268
269 /* ----- Only 16-bit mode is supported ----- */
270
271 #else
272 #define PCHARS PCHARS16
273 #define PCHARSV PCHARSV16
274 #define PCRE_COMPILE PCRE_COMPILE16
275 #define PCRE_EXEC PCRE_EXEC16
276 #define PCRE_STUDY PCRE_STUDY16
277 #endif
278
279 /* ----- End of mode-specific function call macros ----- */
280
281
282 /* Other parameters */
283
284 #ifndef CLOCKS_PER_SEC
285 #ifdef CLK_TCK
286 #define CLOCKS_PER_SEC CLK_TCK
287 #else
288 #define CLOCKS_PER_SEC 100
289 #endif
290 #endif
291
292 /* This is the default loop count for timing. */
293
294 #define LOOPREPEAT 500000
295
296 /* Static variables */
297
298 static FILE *outfile;
299 static int log_store = 0;
300 static int callout_count;
301 static int callout_extra;
302 static int callout_fail_count;
303 static int callout_fail_id;
304 static int debug_lengths;
305 static int first_callout;
306 static int locale_set = 0;
307 static int show_malloc;
308 static int use_utf8;
309 static size_t gotten_store;
310 static size_t first_gotten_store = 0;
311 static const unsigned char *last_callout_mark = NULL;
312
313 /* The buffers grow automatically if very long input lines are encountered. */
314
315 static int buffer_size = 50000;
316 static pcre_uint8 *buffer = NULL;
317 static pcre_uint8 *dbuffer = NULL;
318 static pcre_uint8 *pbuffer = NULL;
319
320 #ifdef SUPPORT_PCRE16
321 static int buffer16_size = 0;
322 static pcre_uint16 *buffer16 = NULL;
323 #endif
324
325 /* If we have 8-bit support, default use_pcre16 to false; if there is also
326 16-bit support, it can be changed by an option. If there is no 8-bit support,
327 there must be 16-bit support, so default it to 1. */
328
329 #ifdef SUPPORT_PCRE8
330 static int use_pcre16 = 0;
331 #else
332 static int use_pcre16 = 1;
333 #endif
334
335 /* Textual explanations for runtime error codes */
336
337 static const char *errtexts[] = {
338 NULL, /* 0 is no error */
339 NULL, /* NOMATCH is handled specially */
340 "NULL argument passed",
341 "bad option value",
342 "magic number missing",
343 "unknown opcode - pattern overwritten?",
344 "no more memory",
345 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
346 "match limit exceeded",
347 "callout error code",
348 NULL, /* BADUTF8 is handled specially */
349 "bad UTF-8 offset",
350 NULL, /* PARTIAL is handled specially */
351 "not used - internal error",
352 "internal error - pattern overwritten?",
353 "bad count value",
354 "item unsupported for DFA matching",
355 "backreference condition or recursion test not supported for DFA matching",
356 "match limit not supported for DFA matching",
357 "workspace size exceeded in DFA matching",
358 "too much recursion for DFA matching",
359 "recursion limit exceeded",
360 "not used - internal error",
361 "invalid combination of newline options",
362 "bad offset value",
363 NULL, /* SHORTUTF8 is handled specially */
364 "nested recursion at the same subject position",
365 "JIT stack limit reached",
366 "pattern compiled in wrong mode (8-bit/16-bit error)"
367 };
368
369
370 /*************************************************
371 * Alternate character tables *
372 *************************************************/
373
374 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
375 using the default tables of the library. However, the T option can be used to
376 select alternate sets of tables, for different kinds of testing. Note also that
377 the L (locale) option also adjusts the tables. */
378
379 /* This is the set of tables distributed as default with PCRE. It recognizes
380 only ASCII characters. */
381
382 static const pcre_uint8 tables0[] = {
383
384 /* This table is a lower casing table. */
385
386 0, 1, 2, 3, 4, 5, 6, 7,
387 8, 9, 10, 11, 12, 13, 14, 15,
388 16, 17, 18, 19, 20, 21, 22, 23,
389 24, 25, 26, 27, 28, 29, 30, 31,
390 32, 33, 34, 35, 36, 37, 38, 39,
391 40, 41, 42, 43, 44, 45, 46, 47,
392 48, 49, 50, 51, 52, 53, 54, 55,
393 56, 57, 58, 59, 60, 61, 62, 63,
394 64, 97, 98, 99,100,101,102,103,
395 104,105,106,107,108,109,110,111,
396 112,113,114,115,116,117,118,119,
397 120,121,122, 91, 92, 93, 94, 95,
398 96, 97, 98, 99,100,101,102,103,
399 104,105,106,107,108,109,110,111,
400 112,113,114,115,116,117,118,119,
401 120,121,122,123,124,125,126,127,
402 128,129,130,131,132,133,134,135,
403 136,137,138,139,140,141,142,143,
404 144,145,146,147,148,149,150,151,
405 152,153,154,155,156,157,158,159,
406 160,161,162,163,164,165,166,167,
407 168,169,170,171,172,173,174,175,
408 176,177,178,179,180,181,182,183,
409 184,185,186,187,188,189,190,191,
410 192,193,194,195,196,197,198,199,
411 200,201,202,203,204,205,206,207,
412 208,209,210,211,212,213,214,215,
413 216,217,218,219,220,221,222,223,
414 224,225,226,227,228,229,230,231,
415 232,233,234,235,236,237,238,239,
416 240,241,242,243,244,245,246,247,
417 248,249,250,251,252,253,254,255,
418
419 /* This table is a case flipping table. */
420
421 0, 1, 2, 3, 4, 5, 6, 7,
422 8, 9, 10, 11, 12, 13, 14, 15,
423 16, 17, 18, 19, 20, 21, 22, 23,
424 24, 25, 26, 27, 28, 29, 30, 31,
425 32, 33, 34, 35, 36, 37, 38, 39,
426 40, 41, 42, 43, 44, 45, 46, 47,
427 48, 49, 50, 51, 52, 53, 54, 55,
428 56, 57, 58, 59, 60, 61, 62, 63,
429 64, 97, 98, 99,100,101,102,103,
430 104,105,106,107,108,109,110,111,
431 112,113,114,115,116,117,118,119,
432 120,121,122, 91, 92, 93, 94, 95,
433 96, 65, 66, 67, 68, 69, 70, 71,
434 72, 73, 74, 75, 76, 77, 78, 79,
435 80, 81, 82, 83, 84, 85, 86, 87,
436 88, 89, 90,123,124,125,126,127,
437 128,129,130,131,132,133,134,135,
438 136,137,138,139,140,141,142,143,
439 144,145,146,147,148,149,150,151,
440 152,153,154,155,156,157,158,159,
441 160,161,162,163,164,165,166,167,
442 168,169,170,171,172,173,174,175,
443 176,177,178,179,180,181,182,183,
444 184,185,186,187,188,189,190,191,
445 192,193,194,195,196,197,198,199,
446 200,201,202,203,204,205,206,207,
447 208,209,210,211,212,213,214,215,
448 216,217,218,219,220,221,222,223,
449 224,225,226,227,228,229,230,231,
450 232,233,234,235,236,237,238,239,
451 240,241,242,243,244,245,246,247,
452 248,249,250,251,252,253,254,255,
453
454 /* This table contains bit maps for various character classes. Each map is 32
455 bytes long and the bits run from the least significant end of each byte. The
456 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
457 graph, print, punct, and cntrl. Other classes are built from combinations. */
458
459 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
460 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
461 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
462 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
463
464 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
465 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
466 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
467 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
468
469 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
470 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
471 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
472 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
473
474 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
475 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
476 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
477 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478
479 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
480 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
481 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
482 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483
484 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
485 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
486 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488
489 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
490 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
491 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
492 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493
494 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
495 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
496 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498
499 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
500 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
501 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
502 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503
504 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
505 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
506 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
507 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508
509 /* This table identifies various classes of character by individual bits:
510 0x01 white space character
511 0x02 letter
512 0x04 decimal digit
513 0x08 hexadecimal digit
514 0x10 alphanumeric or '_'
515 0x80 regular expression metacharacter or binary zero
516 */
517
518 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
519 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
522 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
523 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
524 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
525 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
526 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
527 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
528 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
529 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
530 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
531 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
532 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
533 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
535 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
538 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
539 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
540 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
543 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
544 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
545 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
546 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
547 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
548 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
549 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
550
551 /* This is a set of tables that came orginally from a Windows user. It seems to
552 be at least an approximation of ISO 8859. In particular, there are characters
553 greater than 128 that are marked as spaces, letters, etc. */
554
555 static const pcre_uint8 tables1[] = {
556 0,1,2,3,4,5,6,7,
557 8,9,10,11,12,13,14,15,
558 16,17,18,19,20,21,22,23,
559 24,25,26,27,28,29,30,31,
560 32,33,34,35,36,37,38,39,
561 40,41,42,43,44,45,46,47,
562 48,49,50,51,52,53,54,55,
563 56,57,58,59,60,61,62,63,
564 64,97,98,99,100,101,102,103,
565 104,105,106,107,108,109,110,111,
566 112,113,114,115,116,117,118,119,
567 120,121,122,91,92,93,94,95,
568 96,97,98,99,100,101,102,103,
569 104,105,106,107,108,109,110,111,
570 112,113,114,115,116,117,118,119,
571 120,121,122,123,124,125,126,127,
572 128,129,130,131,132,133,134,135,
573 136,137,138,139,140,141,142,143,
574 144,145,146,147,148,149,150,151,
575 152,153,154,155,156,157,158,159,
576 160,161,162,163,164,165,166,167,
577 168,169,170,171,172,173,174,175,
578 176,177,178,179,180,181,182,183,
579 184,185,186,187,188,189,190,191,
580 224,225,226,227,228,229,230,231,
581 232,233,234,235,236,237,238,239,
582 240,241,242,243,244,245,246,215,
583 248,249,250,251,252,253,254,223,
584 224,225,226,227,228,229,230,231,
585 232,233,234,235,236,237,238,239,
586 240,241,242,243,244,245,246,247,
587 248,249,250,251,252,253,254,255,
588 0,1,2,3,4,5,6,7,
589 8,9,10,11,12,13,14,15,
590 16,17,18,19,20,21,22,23,
591 24,25,26,27,28,29,30,31,
592 32,33,34,35,36,37,38,39,
593 40,41,42,43,44,45,46,47,
594 48,49,50,51,52,53,54,55,
595 56,57,58,59,60,61,62,63,
596 64,97,98,99,100,101,102,103,
597 104,105,106,107,108,109,110,111,
598 112,113,114,115,116,117,118,119,
599 120,121,122,91,92,93,94,95,
600 96,65,66,67,68,69,70,71,
601 72,73,74,75,76,77,78,79,
602 80,81,82,83,84,85,86,87,
603 88,89,90,123,124,125,126,127,
604 128,129,130,131,132,133,134,135,
605 136,137,138,139,140,141,142,143,
606 144,145,146,147,148,149,150,151,
607 152,153,154,155,156,157,158,159,
608 160,161,162,163,164,165,166,167,
609 168,169,170,171,172,173,174,175,
610 176,177,178,179,180,181,182,183,
611 184,185,186,187,188,189,190,191,
612 224,225,226,227,228,229,230,231,
613 232,233,234,235,236,237,238,239,
614 240,241,242,243,244,245,246,215,
615 248,249,250,251,252,253,254,223,
616 192,193,194,195,196,197,198,199,
617 200,201,202,203,204,205,206,207,
618 208,209,210,211,212,213,214,247,
619 216,217,218,219,220,221,222,255,
620 0,62,0,0,1,0,0,0,
621 0,0,0,0,0,0,0,0,
622 32,0,0,0,1,0,0,0,
623 0,0,0,0,0,0,0,0,
624 0,0,0,0,0,0,255,3,
625 126,0,0,0,126,0,0,0,
626 0,0,0,0,0,0,0,0,
627 0,0,0,0,0,0,0,0,
628 0,0,0,0,0,0,255,3,
629 0,0,0,0,0,0,0,0,
630 0,0,0,0,0,0,12,2,
631 0,0,0,0,0,0,0,0,
632 0,0,0,0,0,0,0,0,
633 254,255,255,7,0,0,0,0,
634 0,0,0,0,0,0,0,0,
635 255,255,127,127,0,0,0,0,
636 0,0,0,0,0,0,0,0,
637 0,0,0,0,254,255,255,7,
638 0,0,0,0,0,4,32,4,
639 0,0,0,128,255,255,127,255,
640 0,0,0,0,0,0,255,3,
641 254,255,255,135,254,255,255,7,
642 0,0,0,0,0,4,44,6,
643 255,255,127,255,255,255,127,255,
644 0,0,0,0,254,255,255,255,
645 255,255,255,255,255,255,255,127,
646 0,0,0,0,254,255,255,255,
647 255,255,255,255,255,255,255,255,
648 0,2,0,0,255,255,255,255,
649 255,255,255,255,255,255,255,127,
650 0,0,0,0,255,255,255,255,
651 255,255,255,255,255,255,255,255,
652 0,0,0,0,254,255,0,252,
653 1,0,0,248,1,0,0,120,
654 0,0,0,0,254,255,255,255,
655 0,0,128,0,0,0,128,0,
656 255,255,255,255,0,0,0,0,
657 0,0,0,0,0,0,0,128,
658 255,255,255,255,0,0,0,0,
659 0,0,0,0,0,0,0,0,
660 128,0,0,0,0,0,0,0,
661 0,1,1,0,1,1,0,0,
662 0,0,0,0,0,0,0,0,
663 0,0,0,0,0,0,0,0,
664 1,0,0,0,128,0,0,0,
665 128,128,128,128,0,0,128,0,
666 28,28,28,28,28,28,28,28,
667 28,28,0,0,0,0,0,128,
668 0,26,26,26,26,26,26,18,
669 18,18,18,18,18,18,18,18,
670 18,18,18,18,18,18,18,18,
671 18,18,18,128,128,0,128,16,
672 0,26,26,26,26,26,26,18,
673 18,18,18,18,18,18,18,18,
674 18,18,18,18,18,18,18,18,
675 18,18,18,128,128,0,0,0,
676 0,0,0,0,0,1,0,0,
677 0,0,0,0,0,0,0,0,
678 0,0,0,0,0,0,0,0,
679 0,0,0,0,0,0,0,0,
680 1,0,0,0,0,0,0,0,
681 0,0,18,0,0,0,0,0,
682 0,0,20,20,0,18,0,0,
683 0,20,18,0,0,0,0,0,
684 18,18,18,18,18,18,18,18,
685 18,18,18,18,18,18,18,18,
686 18,18,18,18,18,18,18,0,
687 18,18,18,18,18,18,18,18,
688 18,18,18,18,18,18,18,18,
689 18,18,18,18,18,18,18,18,
690 18,18,18,18,18,18,18,0,
691 18,18,18,18,18,18,18,18
692 };
693
694
695
696
697 #ifndef HAVE_STRERROR
698 /*************************************************
699 * Provide strerror() for non-ANSI libraries *
700 *************************************************/
701
702 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
703 in their libraries, but can provide the same facility by this simple
704 alternative function. */
705
706 extern int sys_nerr;
707 extern char *sys_errlist[];
708
709 char *
710 strerror(int n)
711 {
712 if (n < 0 || n >= sys_nerr) return "unknown error number";
713 return sys_errlist[n];
714 }
715 #endif /* HAVE_STRERROR */
716
717
718 /*************************************************
719 * JIT memory callback *
720 *************************************************/
721
722 static pcre_jit_stack* jit_callback(void *arg)
723 {
724 return (pcre_jit_stack *)arg;
725 }
726
727
728 /*************************************************
729 * Convert UTF-8 string to value *
730 *************************************************/
731
732 /* This function takes one or more bytes that represents a UTF-8 character,
733 and returns the value of the character.
734
735 Argument:
736 utf8bytes a pointer to the byte vector
737 vptr a pointer to an int to receive the value
738
739 Returns: > 0 => the number of bytes consumed
740 -6 to 0 => malformed UTF-8 character at offset = (-return)
741 */
742
743 #if !defined NOUTF8
744
745 static int
746 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
747 {
748 int c = *utf8bytes++;
749 int d = c;
750 int i, j, s;
751
752 for (i = -1; i < 6; i++) /* i is number of additional bytes */
753 {
754 if ((d & 0x80) == 0) break;
755 d <<= 1;
756 }
757
758 if (i == -1) { *vptr = c; return 1; } /* ascii character */
759 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
760
761 /* i now has a value in the range 1-5 */
762
763 s = 6*i;
764 d = (c & utf8_table3[i]) << s;
765
766 for (j = 0; j < i; j++)
767 {
768 c = *utf8bytes++;
769 if ((c & 0xc0) != 0x80) return -(j+1);
770 s -= 6;
771 d |= (c & 0x3f) << s;
772 }
773
774 /* Check that encoding was the correct unique one */
775
776 for (j = 0; j < utf8_table1_size; j++)
777 if (d <= utf8_table1[j]) break;
778 if (j != i) return -(i+1);
779
780 /* Valid value */
781
782 *vptr = d;
783 return i+1;
784 }
785
786 #endif
787
788
789
790 /*************************************************
791 * Convert character value to UTF-8 *
792 *************************************************/
793
794 /* This function takes an integer value in the range 0 - 0x7fffffff
795 and encodes it as a UTF-8 character in 0 to 6 bytes.
796
797 Arguments:
798 cvalue the character value
799 utf8bytes pointer to buffer for result - at least 6 bytes long
800
801 Returns: number of characters placed in the buffer
802 */
803
804 #if !defined NOUTF8
805
806 static int
807 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
808 {
809 register int i, j;
810 for (i = 0; i < utf8_table1_size; i++)
811 if (cvalue <= utf8_table1[i]) break;
812 utf8bytes += i;
813 for (j = i; j > 0; j--)
814 {
815 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
816 cvalue >>= 6;
817 }
818 *utf8bytes = utf8_table2[i] | cvalue;
819 return i + 1;
820 }
821
822 #endif
823
824
825
826 #ifdef SUPPORT_PCRE16
827 /*************************************************
828 * Convert a string to 16-bit *
829 *************************************************/
830
831 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
832 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
833 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
834 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
835 result is always left in buffer16. */
836
837 static int
838 to16(pcre_uint8 *p, int utf, int len)
839 {
840 pcre_uint16 *pp;
841
842 if (buffer16_size < 2*len + 2)
843 {
844 if (buffer16 != NULL) free(buffer16);
845 buffer16_size = 2*len + 2;
846 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
847 if (buffer16 == NULL)
848 {
849 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
850 exit(1);
851 }
852 }
853
854 pp = buffer16;
855
856 if (!utf)
857 {
858 while (len-- > 0) *pp++ = *p++;
859 }
860
861 else
862 {
863 int c;
864 while (len > 0)
865 {
866 int chlen = utf82ord(p, &c);
867 p += chlen;
868 len -= chlen;
869 if (c < 0x10000) *pp++ = c; else
870 {
871 c -= 0x10000;
872 *pp++ = 0xD800 | (c >> 10);
873 *pp++ = 0xDC00 | (c & 0x3ff);
874 }
875 }
876 }
877
878 *pp = 0;
879 return pp - buffer16;
880 }
881 #endif
882
883
884 /*************************************************
885 * Read or extend an input line *
886 *************************************************/
887
888 /* Input lines are read into buffer, but both patterns and data lines can be
889 continued over multiple input lines. In addition, if the buffer fills up, we
890 want to automatically expand it so as to be able to handle extremely large
891 lines that are needed for certain stress tests. When the input buffer is
892 expanded, the other two buffers must also be expanded likewise, and the
893 contents of pbuffer, which are a copy of the input for callouts, must be
894 preserved (for when expansion happens for a data line). This is not the most
895 optimal way of handling this, but hey, this is just a test program!
896
897 Arguments:
898 f the file to read
899 start where in buffer to start (this *must* be within buffer)
900 prompt for stdin or readline()
901
902 Returns: pointer to the start of new data
903 could be a copy of start, or could be moved
904 NULL if no data read and EOF reached
905 */
906
907 static pcre_uint8 *
908 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
909 {
910 pcre_uint8 *here = start;
911
912 for (;;)
913 {
914 int rlen = (int)(buffer_size - (here - buffer));
915
916 if (rlen > 1000)
917 {
918 int dlen;
919
920 /* If libreadline support is required, use readline() to read a line if the
921 input is a terminal. Note that readline() removes the trailing newline, so
922 we must put it back again, to be compatible with fgets(). */
923
924 #ifdef SUPPORT_LIBREADLINE
925 if (isatty(fileno(f)))
926 {
927 size_t len;
928 char *s = readline(prompt);
929 if (s == NULL) return (here == start)? NULL : start;
930 len = strlen(s);
931 if (len > 0) add_history(s);
932 if (len > rlen - 1) len = rlen - 1;
933 memcpy(here, s, len);
934 here[len] = '\n';
935 here[len+1] = 0;
936 free(s);
937 }
938 else
939 #endif
940
941 /* Read the next line by normal means, prompting if the file is stdin. */
942
943 {
944 if (f == stdin) printf("%s", prompt);
945 if (fgets((char *)here, rlen, f) == NULL)
946 return (here == start)? NULL : start;
947 }
948
949 dlen = (int)strlen((char *)here);
950 if (dlen > 0 && here[dlen - 1] == '\n') return start;
951 here += dlen;
952 }
953
954 else
955 {
956 int new_buffer_size = 2*buffer_size;
957 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
958 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
959 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
960
961 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
962 {
963 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
964 exit(1);
965 }
966
967 memcpy(new_buffer, buffer, buffer_size);
968 memcpy(new_pbuffer, pbuffer, buffer_size);
969
970 buffer_size = new_buffer_size;
971
972 start = new_buffer + (start - buffer);
973 here = new_buffer + (here - buffer);
974
975 free(buffer);
976 free(dbuffer);
977 free(pbuffer);
978
979 buffer = new_buffer;
980 dbuffer = new_dbuffer;
981 pbuffer = new_pbuffer;
982 }
983 }
984
985 return NULL; /* Control never gets here */
986 }
987
988
989
990 /*************************************************
991 * Read number from string *
992 *************************************************/
993
994 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
995 around with conditional compilation, just do the job by hand. It is only used
996 for unpicking arguments, so just keep it simple.
997
998 Arguments:
999 str string to be converted
1000 endptr where to put the end pointer
1001
1002 Returns: the unsigned long
1003 */
1004
1005 static int
1006 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1007 {
1008 int result = 0;
1009 while(*str != 0 && isspace(*str)) str++;
1010 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1011 *endptr = str;
1012 return(result);
1013 }
1014
1015
1016
1017 #ifdef SUPPORT_PCRE8
1018 /*************************************************
1019 * Print 8-bit character string *
1020 *************************************************/
1021
1022 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1023 If handed a NULL file, just counts chars without printing. */
1024
1025 static int pchars(pcre_uint8 *p, int length, FILE *f)
1026 {
1027 int c = 0;
1028 int yield = 0;
1029
1030 while (length-- > 0)
1031 {
1032 #if !defined NOUTF8
1033 if (use_utf8)
1034 {
1035 int rc = utf82ord(p, &c);
1036
1037 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1038 {
1039 length -= rc - 1;
1040 p += rc;
1041 if (PRINTOK(c))
1042 {
1043 if (f != NULL) fprintf(f, "%c", c);
1044 yield++;
1045 }
1046 else
1047 {
1048 int n = 4;
1049 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1050 yield += (n <= 0x000000ff)? 2 :
1051 (n <= 0x00000fff)? 3 :
1052 (n <= 0x0000ffff)? 4 :
1053 (n <= 0x000fffff)? 5 : 6;
1054 }
1055 continue;
1056 }
1057 }
1058 #endif
1059
1060 /* Not UTF-8, or malformed UTF-8 */
1061
1062 c = *p++;
1063 if (PRINTOK(c))
1064 {
1065 if (f != NULL) fprintf(f, "%c", c);
1066 yield++;
1067 }
1068 else
1069 {
1070 if (f != NULL) fprintf(f, "\\x%02x", c);
1071 yield += 4;
1072 }
1073 }
1074
1075 return yield;
1076 }
1077 #endif
1078
1079
1080
1081 #ifdef SUPPORT_PCRE16
1082 /*************************************************
1083 * Print 16-bit character string *
1084 *************************************************/
1085
1086 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1087 If handed a NULL file, just counts chars without printing. */
1088
1089 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1090 {
1091 int yield = 0;
1092
1093 while (length-- > 0)
1094 {
1095 int c = *p++ & 0xffff;
1096
1097 #if !defined NOUTF8
1098 if (use_utf8 && c >= 0xD800 && c < 0xDC00 && length > 0)
1099 {
1100 int d = *p & 0xffff;
1101 if (d >= 0xDC00 && d < 0xDFFF)
1102 {
1103 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1104 length--;
1105 p++;
1106 }
1107 }
1108 #endif
1109
1110 if (PRINTOK(c))
1111 {
1112 if (f != NULL) fprintf(f, "%c", c);
1113 yield++;
1114 }
1115 else
1116 {
1117 yield += 4;
1118 if (c < 0x100)
1119 {
1120 if (f != NULL) fprintf(f, "\\x%02x", c);
1121 }
1122 else
1123 {
1124 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1125 yield += (c <= 0x000000ff)? 2 :
1126 (c <= 0x00000fff)? 3 :
1127 (c <= 0x0000ffff)? 4 :
1128 (c <= 0x000fffff)? 5 : 6;
1129 }
1130 }
1131 }
1132
1133 return yield;
1134 }
1135 #endif
1136
1137
1138
1139 /*************************************************
1140 * Callout function *
1141 *************************************************/
1142
1143 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1144 the match. Yield zero unless more callouts than the fail count, or the callout
1145 data is not zero. */
1146
1147 static int callout(pcre_callout_block *cb)
1148 {
1149 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1150 int i, pre_start, post_start, subject_length;
1151
1152 if (callout_extra)
1153 {
1154 fprintf(f, "Callout %d: last capture = %d\n",
1155 cb->callout_number, cb->capture_last);
1156
1157 for (i = 0; i < cb->capture_top * 2; i += 2)
1158 {
1159 if (cb->offset_vector[i] < 0)
1160 fprintf(f, "%2d: <unset>\n", i/2);
1161 else
1162 {
1163 fprintf(f, "%2d: ", i/2);
1164 PCHARSV(cb->subject + cb->offset_vector[i],
1165 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1166 fprintf(f, "\n");
1167 }
1168 }
1169 }
1170
1171 /* Re-print the subject in canonical form, the first time or if giving full
1172 datails. On subsequent calls in the same match, we use pchars just to find the
1173 printed lengths of the substrings. */
1174
1175 if (f != NULL) fprintf(f, "--->");
1176
1177 PCHARS(pre_start, cb->subject, cb->start_match, f);
1178 PCHARS(post_start, cb->subject + cb->start_match,
1179 cb->current_position - cb->start_match, f);
1180
1181 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1182
1183 PCHARSV(cb->subject + cb->current_position,
1184 cb->subject_length - cb->current_position, f);
1185
1186 if (f != NULL) fprintf(f, "\n");
1187
1188 /* Always print appropriate indicators, with callout number if not already
1189 shown. For automatic callouts, show the pattern offset. */
1190
1191 if (cb->callout_number == 255)
1192 {
1193 fprintf(outfile, "%+3d ", cb->pattern_position);
1194 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1195 }
1196 else
1197 {
1198 if (callout_extra) fprintf(outfile, " ");
1199 else fprintf(outfile, "%3d ", cb->callout_number);
1200 }
1201
1202 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1203 fprintf(outfile, "^");
1204
1205 if (post_start > 0)
1206 {
1207 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1208 fprintf(outfile, "^");
1209 }
1210
1211 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1212 fprintf(outfile, " ");
1213
1214 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1215 pbuffer + cb->pattern_position);
1216
1217 fprintf(outfile, "\n");
1218 first_callout = 0;
1219
1220 if (cb->mark != last_callout_mark)
1221 {
1222 fprintf(outfile, "Latest Mark: %s\n",
1223 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1224 last_callout_mark = cb->mark;
1225 }
1226
1227 if (cb->callout_data != NULL)
1228 {
1229 int callout_data = *((int *)(cb->callout_data));
1230 if (callout_data != 0)
1231 {
1232 fprintf(outfile, "Callout data = %d\n", callout_data);
1233 return callout_data;
1234 }
1235 }
1236
1237 return (cb->callout_number != callout_fail_id)? 0 :
1238 (++callout_count >= callout_fail_count)? 1 : 0;
1239 }
1240
1241
1242 /*************************************************
1243 * Local malloc functions *
1244 *************************************************/
1245
1246 /* Alternative malloc function, to test functionality and save the size of a
1247 compiled re, which is the first store request that pcre_compile() makes. The
1248 show_malloc variable is set only during matching. */
1249
1250 static void *new_malloc(size_t size)
1251 {
1252 void *block = malloc(size);
1253 gotten_store = size;
1254 if (first_gotten_store == 0) first_gotten_store = size;
1255 if (show_malloc)
1256 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1257 return block;
1258 }
1259
1260 static void new_free(void *block)
1261 {
1262 if (show_malloc)
1263 fprintf(outfile, "free %p\n", block);
1264 free(block);
1265 }
1266
1267 /* For recursion malloc/free, to test stacking calls */
1268
1269 static void *stack_malloc(size_t size)
1270 {
1271 void *block = malloc(size);
1272 if (show_malloc)
1273 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1274 return block;
1275 }
1276
1277 static void stack_free(void *block)
1278 {
1279 if (show_malloc)
1280 fprintf(outfile, "stack_free %p\n", block);
1281 free(block);
1282 }
1283
1284
1285 /*************************************************
1286 * Call pcre_fullinfo() *
1287 *************************************************/
1288
1289 /* Get one piece of information from the pcre_fullinfo() function. When only
1290 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1291 value, but the code is defensive. */
1292
1293 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1294 {
1295 int rc;
1296
1297 if (use_pcre16)
1298 #ifdef SUPPORT_PCRE16
1299 rc = pcre16_fullinfo(re, study, option, ptr);
1300 #else
1301 rc = PCRE_ERROR_BADMODE;
1302 #endif
1303 else
1304 #ifdef SUPPORT_PCRE8
1305 rc = pcre_fullinfo(re, study, option, ptr);
1306 #else
1307 rc = PCRE_ERROR_BADMODE;
1308 #endif
1309
1310 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1311 use_pcre16? "16" : "", option);
1312 }
1313
1314
1315
1316 /*************************************************
1317 * Byte flipping function *
1318 *************************************************/
1319
1320 static unsigned long int
1321 byteflip(unsigned long int value, int n)
1322 {
1323 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1324 return ((value & 0x000000ff) << 24) |
1325 ((value & 0x0000ff00) << 8) |
1326 ((value & 0x00ff0000) >> 8) |
1327 ((value & 0xff000000) >> 24);
1328 }
1329
1330
1331
1332
1333 /*************************************************
1334 * Check match or recursion limit *
1335 *************************************************/
1336
1337 static int
1338 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1339 int start_offset, int options, int *use_offsets, int use_size_offsets,
1340 int flag, unsigned long int *limit, int errnumber, const char *msg)
1341 {
1342 int count;
1343 int min = 0;
1344 int mid = 64;
1345 int max = -1;
1346
1347 extra->flags |= flag;
1348
1349 for (;;)
1350 {
1351 *limit = mid;
1352
1353 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1354 use_offsets, use_size_offsets);
1355
1356 if (count == errnumber)
1357 {
1358 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1359 min = mid;
1360 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1361 }
1362
1363 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1364 count == PCRE_ERROR_PARTIAL)
1365 {
1366 if (mid == min + 1)
1367 {
1368 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1369 break;
1370 }
1371 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1372 max = mid;
1373 mid = (min + mid)/2;
1374 }
1375 else break; /* Some other error */
1376 }
1377
1378 extra->flags &= ~flag;
1379 return count;
1380 }
1381
1382
1383
1384 /*************************************************
1385 * Case-independent strncmp() function *
1386 *************************************************/
1387
1388 /*
1389 Arguments:
1390 s first string
1391 t second string
1392 n number of characters to compare
1393
1394 Returns: < 0, = 0, or > 0, according to the comparison
1395 */
1396
1397 static int
1398 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1399 {
1400 while (n--)
1401 {
1402 int c = tolower(*s++) - tolower(*t++);
1403 if (c) return c;
1404 }
1405 return 0;
1406 }
1407
1408
1409
1410 /*************************************************
1411 * Check newline indicator *
1412 *************************************************/
1413
1414 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1415 a message and return 0 if there is no match.
1416
1417 Arguments:
1418 p points after the leading '<'
1419 f file for error message
1420
1421 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1422 */
1423
1424 static int
1425 check_newline(pcre_uint8 *p, FILE *f)
1426 {
1427 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1428 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1429 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1430 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1431 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1432 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1433 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1434 fprintf(f, "Unknown newline type at: <%s\n", p);
1435 return 0;
1436 }
1437
1438
1439
1440 /*************************************************
1441 * Usage function *
1442 *************************************************/
1443
1444 static void
1445 usage(void)
1446 {
1447 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1448 printf("Input and output default to stdin and stdout.\n");
1449 #ifdef SUPPORT_LIBREADLINE
1450 printf("If input is a terminal, readline() is used to read from it.\n");
1451 #else
1452 printf("This version of pcretest is not linked with readline().\n");
1453 #endif
1454 printf("\nOptions:\n");
1455 #ifdef SUPPORT_PCRE16
1456 printf(" -16 use 16-bit interface\n");
1457 #endif
1458 printf(" -b show compiled code (bytecode)\n");
1459 printf(" -C show PCRE compile-time options and exit\n");
1460 printf(" -d debug: show compiled code and information (-b and -i)\n");
1461 #if !defined NODFA
1462 printf(" -dfa force DFA matching for all subjects\n");
1463 #endif
1464 printf(" -help show usage information\n");
1465 printf(" -i show information about compiled patterns\n"
1466 " -M find MATCH_LIMIT minimum for each subject\n"
1467 " -m output memory used information\n"
1468 " -o <n> set size of offsets vector to <n>\n");
1469 #if !defined NOPOSIX
1470 printf(" -p use POSIX interface\n");
1471 #endif
1472 printf(" -q quiet: do not output PCRE version number at start\n");
1473 printf(" -S <n> set stack size to <n> megabytes\n");
1474 printf(" -s force each pattern to be studied at basic level\n"
1475 " -s+ force each pattern to be studied, using JIT if available\n"
1476 " -t time compilation and execution\n");
1477 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1478 printf(" -tm time execution (matching) only\n");
1479 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1480 }
1481
1482
1483
1484 /*************************************************
1485 * Main Program *
1486 *************************************************/
1487
1488 /* Read lines from named file or stdin and write to named file or stdout; lines
1489 consist of a regular expression, in delimiters and optionally followed by
1490 options, followed by a set of test data, terminated by an empty line. */
1491
1492 int main(int argc, char **argv)
1493 {
1494 FILE *infile = stdin;
1495 int options = 0;
1496 int study_options = 0;
1497 int default_find_match_limit = FALSE;
1498 int op = 1;
1499 int timeit = 0;
1500 int timeitm = 0;
1501 int showinfo = 0;
1502 int showstore = 0;
1503 int force_study = -1;
1504 int force_study_options = 0;
1505 int quiet = 0;
1506 int size_offsets = 45;
1507 int size_offsets_max;
1508 int *offsets = NULL;
1509 #if !defined NOPOSIX
1510 int posix = 0;
1511 #endif
1512 int debug = 0;
1513 int done = 0;
1514 int all_use_dfa = 0;
1515 int yield = 0;
1516 int stack_size;
1517
1518 pcre_jit_stack *jit_stack = NULL;
1519
1520 /* These vectors store, end-to-end, a list of captured substring names. Assume
1521 that 1024 is plenty long enough for the few names we'll be testing. */
1522
1523 pcre_uchar copynames[1024];
1524 pcre_uchar getnames[1024];
1525
1526 pcre_uchar *copynamesptr;
1527 pcre_uchar *getnamesptr;
1528
1529 /* Get buffers from malloc() so that valgrind will check their misuse when
1530 debugging. They grow automatically when very long lines are read. The 16-bit
1531 buffer (buffer16) is obtained only if needed. */
1532
1533 buffer = (pcre_uint8 *)malloc(buffer_size);
1534 dbuffer = (pcre_uint8 *)malloc(buffer_size);
1535 pbuffer = (pcre_uint8 *)malloc(buffer_size);
1536
1537 /* The outfile variable is static so that new_malloc can use it. */
1538
1539 outfile = stdout;
1540
1541 /* The following _setmode() stuff is some Windows magic that tells its runtime
1542 library to translate CRLF into a single LF character. At least, that's what
1543 I've been told: never having used Windows I take this all on trust. Originally
1544 it set 0x8000, but then I was advised that _O_BINARY was better. */
1545
1546 #if defined(_WIN32) || defined(WIN32)
1547 _setmode( _fileno( stdout ), _O_BINARY );
1548 #endif
1549
1550 /* Scan options */
1551
1552 while (argc > 1 && argv[op][0] == '-')
1553 {
1554 pcre_uint8 *endptr;
1555
1556 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1557 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1558 else if (strcmp(argv[op], "-s+") == 0)
1559 {
1560 force_study = 1;
1561 force_study_options = PCRE_STUDY_JIT_COMPILE;
1562 }
1563 #ifdef SUPPORT_PCRE16
1564 else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1565 #endif
1566
1567 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1568 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1569 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1570 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1571 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1572 #if !defined NODFA
1573 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1574 #endif
1575 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1576 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1577 *endptr == 0))
1578 {
1579 op++;
1580 argc--;
1581 }
1582 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1583 {
1584 int both = argv[op][2] == 0;
1585 int temp;
1586 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1587 *endptr == 0))
1588 {
1589 timeitm = temp;
1590 op++;
1591 argc--;
1592 }
1593 else timeitm = LOOPREPEAT;
1594 if (both) timeit = timeitm;
1595 }
1596 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1597 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1598 *endptr == 0))
1599 {
1600 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1601 printf("PCRE: -S not supported on this OS\n");
1602 exit(1);
1603 #else
1604 int rc;
1605 struct rlimit rlim;
1606 getrlimit(RLIMIT_STACK, &rlim);
1607 rlim.rlim_cur = stack_size * 1024 * 1024;
1608 rc = setrlimit(RLIMIT_STACK, &rlim);
1609 if (rc != 0)
1610 {
1611 printf("PCRE: setrlimit() failed with error %d\n", rc);
1612 exit(1);
1613 }
1614 op++;
1615 argc--;
1616 #endif
1617 }
1618 #if !defined NOPOSIX
1619 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1620 #endif
1621 else if (strcmp(argv[op], "-C") == 0)
1622 {
1623 int rc;
1624 unsigned long int lrc;
1625 printf("PCRE version %s\n", pcre_version());
1626 printf("Compiled with\n");
1627
1628 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1629 are set, either both UTFs are supported or both are not supported. */
1630
1631 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1632 printf(" 8-bit and 16-bit support\n");
1633 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1634 if (rc)
1635 printf(" UTF-8 and UTF-16 support\n");
1636 else
1637 printf(" No UTF-8 or UTF-16 support\n");
1638 #elif defined SUPPORT_PCRE8
1639 printf(" 8-bit support only\n");
1640 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1641 printf(" %sUTF-8 support\n", rc? "" : "No ");
1642 #else
1643 printf(" 16-bit support only\n");
1644 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1645 printf(" %sUTF-16 support\n", rc? "" : "No ");
1646 #endif
1647
1648 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1649 printf(" %sUnicode properties support\n", rc? "" : "No ");
1650 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1651 if (rc)
1652 printf(" Just-in-time compiler support\n");
1653 else
1654 printf(" No just-in-time compiler support\n");
1655 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1656 /* Note that these values are always the ASCII values, even
1657 in EBCDIC environments. CR is 13 and NL is 10. */
1658 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1659 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1660 (rc == -2)? "ANYCRLF" :
1661 (rc == -1)? "ANY" : "???");
1662 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1663 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1664 "all Unicode newlines");
1665 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1666 printf(" Internal link size = %d\n", rc);
1667 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1668 printf(" POSIX malloc threshold = %d\n", rc);
1669 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1670 printf(" Default match limit = %ld\n", lrc);
1671 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1672 printf(" Default recursion depth limit = %ld\n", lrc);
1673 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1674 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1675 goto EXIT;
1676 }
1677 else if (strcmp(argv[op], "-help") == 0 ||
1678 strcmp(argv[op], "--help") == 0)
1679 {
1680 usage();
1681 goto EXIT;
1682 }
1683 else
1684 {
1685 printf("** Unknown or malformed option %s\n", argv[op]);
1686 usage();
1687 yield = 1;
1688 goto EXIT;
1689 }
1690 op++;
1691 argc--;
1692 }
1693
1694 /* Get the store for the offsets vector, and remember what it was */
1695
1696 size_offsets_max = size_offsets;
1697 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1698 if (offsets == NULL)
1699 {
1700 printf("** Failed to get %d bytes of memory for offsets vector\n",
1701 (int)(size_offsets_max * sizeof(int)));
1702 yield = 1;
1703 goto EXIT;
1704 }
1705
1706 /* Sort out the input and output files */
1707
1708 if (argc > 1)
1709 {
1710 infile = fopen(argv[op], INPUT_MODE);
1711 if (infile == NULL)
1712 {
1713 printf("** Failed to open %s\n", argv[op]);
1714 yield = 1;
1715 goto EXIT;
1716 }
1717 }
1718
1719 if (argc > 2)
1720 {
1721 outfile = fopen(argv[op+1], OUTPUT_MODE);
1722 if (outfile == NULL)
1723 {
1724 printf("** Failed to open %s\n", argv[op+1]);
1725 yield = 1;
1726 goto EXIT;
1727 }
1728 }
1729
1730 /* Set alternative malloc function */
1731
1732 #ifdef SUPPORT_PCRE8
1733 pcre_malloc = new_malloc;
1734 pcre_free = new_free;
1735 pcre_stack_malloc = stack_malloc;
1736 pcre_stack_free = stack_free;
1737 #endif
1738
1739 #ifdef SUPPORT_PCRE16
1740 pcre16_malloc = new_malloc;
1741 pcre16_free = new_free;
1742 pcre16_stack_malloc = stack_malloc;
1743 pcre16_stack_free = stack_free;
1744 #endif
1745
1746 /* Heading line unless quiet, then prompt for first regex if stdin */
1747
1748 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1749
1750 /* Main loop */
1751
1752 while (!done)
1753 {
1754 pcre *re = NULL;
1755 pcre_extra *extra = NULL;
1756
1757 #if !defined NOPOSIX /* There are still compilers that require no indent */
1758 regex_t preg;
1759 int do_posix = 0;
1760 #endif
1761
1762 const char *error;
1763 pcre_uint8 *markptr;
1764 pcre_uint8 *p, *pp, *ppp;
1765 pcre_uint8 *to_file = NULL;
1766 const pcre_uint8 *tables = NULL;
1767 unsigned long int true_size, true_study_size = 0;
1768 size_t size, regex_gotten_store;
1769 int do_allcaps = 0;
1770 int do_mark = 0;
1771 int do_study = 0;
1772 int no_force_study = 0;
1773 int do_debug = debug;
1774 int do_G = 0;
1775 int do_g = 0;
1776 int do_showinfo = showinfo;
1777 int do_showrest = 0;
1778 int do_showcaprest = 0;
1779 int do_flip = 0;
1780 int erroroffset, len, delimiter, poffset;
1781
1782 use_utf8 = 0;
1783 debug_lengths = 1;
1784
1785 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1786 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1787 fflush(outfile);
1788
1789 p = buffer;
1790 while (isspace(*p)) p++;
1791 if (*p == 0) continue;
1792
1793 /* See if the pattern is to be loaded pre-compiled from a file. */
1794
1795 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1796 {
1797 unsigned long int magic, get_options;
1798 pcre_uint8 sbuf[8];
1799 FILE *f;
1800
1801 p++;
1802 pp = p + (int)strlen((char *)p);
1803 while (isspace(pp[-1])) pp--;
1804 *pp = 0;
1805
1806 f = fopen((char *)p, "rb");
1807 if (f == NULL)
1808 {
1809 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1810 continue;
1811 }
1812
1813 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1814
1815 true_size =
1816 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1817 true_study_size =
1818 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1819
1820 re = (real_pcre *)new_malloc(true_size);
1821 regex_gotten_store = first_gotten_store;
1822
1823 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1824
1825 magic = ((real_pcre *)re)->magic_number;
1826 if (magic != MAGIC_NUMBER)
1827 {
1828 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1829 {
1830 do_flip = 1;
1831 }
1832 else
1833 {
1834 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1835 fclose(f);
1836 continue;
1837 }
1838 }
1839
1840 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1841 do_flip? " (byte-inverted)" : "", p);
1842
1843 /* Need to know if UTF-8 for printing data strings */
1844
1845 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1846 use_utf8 = (get_options & PCRE_UTF8) != 0;
1847
1848 /* Now see if there is any following study data. */
1849
1850 if (true_study_size != 0)
1851 {
1852 pcre_study_data *psd;
1853
1854 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1855 extra->flags = PCRE_EXTRA_STUDY_DATA;
1856
1857 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1858 extra->study_data = psd;
1859
1860 if (fread(psd, 1, true_study_size, f) != true_study_size)
1861 {
1862 FAIL_READ:
1863 fprintf(outfile, "Failed to read data from %s\n", p);
1864 if (extra != NULL) pcre_free_study(extra);
1865 if (re != NULL) new_free(re);
1866 fclose(f);
1867 continue;
1868 }
1869 fprintf(outfile, "Study data loaded from %s\n", p);
1870 do_study = 1; /* To get the data output if requested */
1871 }
1872 else fprintf(outfile, "No study data\n");
1873
1874 fclose(f);
1875 goto SHOW_INFO;
1876 }
1877
1878 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1879 the pattern; if is isn't complete, read more. */
1880
1881 delimiter = *p++;
1882
1883 if (isalnum(delimiter) || delimiter == '\\')
1884 {
1885 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1886 goto SKIP_DATA;
1887 }
1888
1889 pp = p;
1890 poffset = (int)(p - buffer);
1891
1892 for(;;)
1893 {
1894 while (*pp != 0)
1895 {
1896 if (*pp == '\\' && pp[1] != 0) pp++;
1897 else if (*pp == delimiter) break;
1898 pp++;
1899 }
1900 if (*pp != 0) break;
1901 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1902 {
1903 fprintf(outfile, "** Unexpected EOF\n");
1904 done = 1;
1905 goto CONTINUE;
1906 }
1907 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1908 }
1909
1910 /* The buffer may have moved while being extended; reset the start of data
1911 pointer to the correct relative point in the buffer. */
1912
1913 p = buffer + poffset;
1914
1915 /* If the first character after the delimiter is backslash, make
1916 the pattern end with backslash. This is purely to provide a way
1917 of testing for the error message when a pattern ends with backslash. */
1918
1919 if (pp[1] == '\\') *pp++ = '\\';
1920
1921 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1922 for callouts. */
1923
1924 *pp++ = 0;
1925 strcpy((char *)pbuffer, (char *)p);
1926
1927 /* Look for options after final delimiter */
1928
1929 options = 0;
1930 study_options = 0;
1931 log_store = showstore; /* default from command line */
1932
1933 while (*pp != 0)
1934 {
1935 switch (*pp++)
1936 {
1937 case 'f': options |= PCRE_FIRSTLINE; break;
1938 case 'g': do_g = 1; break;
1939 case 'i': options |= PCRE_CASELESS; break;
1940 case 'm': options |= PCRE_MULTILINE; break;
1941 case 's': options |= PCRE_DOTALL; break;
1942 case 'x': options |= PCRE_EXTENDED; break;
1943
1944 case '+':
1945 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1946 break;
1947
1948 case '=': do_allcaps = 1; break;
1949 case 'A': options |= PCRE_ANCHORED; break;
1950 case 'B': do_debug = 1; break;
1951 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1952 case 'D': do_debug = do_showinfo = 1; break;
1953 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1954 case 'F': do_flip = 1; break;
1955 case 'G': do_G = 1; break;
1956 case 'I': do_showinfo = 1; break;
1957 case 'J': options |= PCRE_DUPNAMES; break;
1958 case 'K': do_mark = 1; break;
1959 case 'M': log_store = 1; break;
1960 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1961
1962 #if !defined NOPOSIX
1963 case 'P': do_posix = 1; break;
1964 #endif
1965
1966 case 'S':
1967 if (do_study == 0)
1968 {
1969 do_study = 1;
1970 if (*pp == '+')
1971 {
1972 study_options |= PCRE_STUDY_JIT_COMPILE;
1973 pp++;
1974 }
1975 }
1976 else
1977 {
1978 do_study = 0;
1979 no_force_study = 1;
1980 }
1981 break;
1982
1983 case 'U': options |= PCRE_UNGREEDY; break;
1984 case 'W': options |= PCRE_UCP; break;
1985 case 'X': options |= PCRE_EXTRA; break;
1986 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1987 case 'Z': debug_lengths = 0; break;
1988 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1989 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1990
1991 case 'T':
1992 switch (*pp++)
1993 {
1994 case '0': tables = tables0; break;
1995 case '1': tables = tables1; break;
1996
1997 case '\r':
1998 case '\n':
1999 case ' ':
2000 case 0:
2001 fprintf(outfile, "** Missing table number after /T\n");
2002 goto SKIP_DATA;
2003
2004 default:
2005 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2006 goto SKIP_DATA;
2007 }
2008 break;
2009
2010 case 'L':
2011 ppp = pp;
2012 /* The '\r' test here is so that it works on Windows. */
2013 /* The '0' test is just in case this is an unterminated line. */
2014 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2015 *ppp = 0;
2016 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2017 {
2018 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2019 goto SKIP_DATA;
2020 }
2021 locale_set = 1;
2022 tables = pcre_maketables();
2023 pp = ppp;
2024 break;
2025
2026 case '>':
2027 to_file = pp;
2028 while (*pp != 0) pp++;
2029 while (isspace(pp[-1])) pp--;
2030 *pp = 0;
2031 break;
2032
2033 case '<':
2034 {
2035 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2036 {
2037 options |= PCRE_JAVASCRIPT_COMPAT;
2038 pp += 3;
2039 }
2040 else
2041 {
2042 int x = check_newline(pp, outfile);
2043 if (x == 0) goto SKIP_DATA;
2044 options |= x;
2045 while (*pp++ != '>');
2046 }
2047 }
2048 break;
2049
2050 case '\r': /* So that it works in Windows */
2051 case '\n':
2052 case ' ':
2053 break;
2054
2055 default:
2056 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2057 goto SKIP_DATA;
2058 }
2059 }
2060
2061 /* Handle compiling via the POSIX interface, which doesn't support the
2062 timing, showing, or debugging options, nor the ability to pass over
2063 local character tables. Neither does it have 16-bit support. */
2064
2065 #if !defined NOPOSIX
2066 if (posix || do_posix)
2067 {
2068 int rc;
2069 int cflags = 0;
2070
2071 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2072 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2073 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2074 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2075 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2076 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2077 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2078
2079 first_gotten_store = 0;
2080 rc = regcomp(&preg, (char *)p, cflags);
2081
2082 /* Compilation failed; go back for another re, skipping to blank line
2083 if non-interactive. */
2084
2085 if (rc != 0)
2086 {
2087 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2088 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2089 goto SKIP_DATA;
2090 }
2091 }
2092
2093 /* Handle compiling via the native interface */
2094
2095 else
2096 #endif /* !defined NOPOSIX */
2097
2098 {
2099 unsigned long int get_options;
2100
2101 /* In 16-bit mode, convert the input. */
2102
2103 #ifdef SUPPORT_PCRE16
2104 if (use_pcre16)
2105 {
2106 (void)to16(p, options & PCRE_UTF8, (int)strlen((char *)p));
2107 p = (pcre_uint8 *)buffer16;
2108 }
2109 #endif
2110
2111 /* Compile many times when timing */
2112
2113 if (timeit > 0)
2114 {
2115 register int i;
2116 clock_t time_taken;
2117 clock_t start_time = clock();
2118 for (i = 0; i < timeit; i++)
2119 {
2120 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2121 if (re != NULL) free(re);
2122 }
2123 time_taken = clock() - start_time;
2124 fprintf(outfile, "Compile time %.4f milliseconds\n",
2125 (((double)time_taken * 1000.0) / (double)timeit) /
2126 (double)CLOCKS_PER_SEC);
2127 }
2128
2129 first_gotten_store = 0;
2130 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2131
2132 /* Compilation failed; go back for another re, skipping to blank line
2133 if non-interactive. */
2134
2135 if (re == NULL)
2136 {
2137 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2138 SKIP_DATA:
2139 if (infile != stdin)
2140 {
2141 for (;;)
2142 {
2143 if (extend_inputline(infile, buffer, NULL) == NULL)
2144 {
2145 done = 1;
2146 goto CONTINUE;
2147 }
2148 len = (int)strlen((char *)buffer);
2149 while (len > 0 && isspace(buffer[len-1])) len--;
2150 if (len == 0) break;
2151 }
2152 fprintf(outfile, "\n");
2153 }
2154 goto CONTINUE;
2155 }
2156
2157 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2158 within the regex; check for this so that we know how to process the data
2159 lines. */
2160
2161 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2162 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
2163
2164 /* Extract the size for possible writing before possibly flipping it,
2165 and remember the store that was got. */
2166
2167 true_size = ((real_pcre *)re)->size;
2168 regex_gotten_store = first_gotten_store;
2169
2170 /* Output code size information if requested */
2171
2172 if (log_store)
2173 fprintf(outfile, "Memory allocation (code space): %d\n",
2174 (int)(first_gotten_store -
2175 sizeof(real_pcre) -
2176 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2177
2178 /* If -s or /S was present, study the regex to generate additional info to
2179 help with the matching, unless the pattern has the SS option, which
2180 suppresses the effect of /S (used for a few test patterns where studying is
2181 never sensible). */
2182
2183 if (do_study || (force_study >= 0 && !no_force_study))
2184 {
2185 if (timeit > 0)
2186 {
2187 register int i;
2188 clock_t time_taken;
2189 clock_t start_time = clock();
2190 for (i = 0; i < timeit; i++)
2191 {
2192 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2193 }
2194 time_taken = clock() - start_time;
2195 if (extra != NULL) pcre_free_study(extra);
2196 fprintf(outfile, " Study time %.4f milliseconds\n",
2197 (((double)time_taken * 1000.0) / (double)timeit) /
2198 (double)CLOCKS_PER_SEC);
2199 }
2200 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2201 if (error != NULL)
2202 fprintf(outfile, "Failed to study: %s\n", error);
2203 else if (extra != NULL)
2204 {
2205 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2206 if (log_store)
2207 {
2208 size_t jitsize;
2209 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2210 if (jitsize != 0)
2211 fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2212 }
2213 }
2214 }
2215
2216 /* If /K was present, we set up for handling MARK data. */
2217
2218 if (do_mark)
2219 {
2220 if (extra == NULL)
2221 {
2222 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2223 extra->flags = 0;
2224 }
2225 extra->mark = &markptr;
2226 extra->flags |= PCRE_EXTRA_MARK;
2227 }
2228
2229 /* If the 'F' option was present, we flip the bytes of all the integer
2230 fields in the regex data block and the study block. This is to make it
2231 possible to test PCRE's handling of byte-flipped patterns, e.g. those
2232 compiled on a different architecture. */
2233
2234 if (do_flip)
2235 {
2236 real_pcre *rre = (real_pcre *)re;
2237 rre->magic_number =
2238 byteflip(rre->magic_number, sizeof(rre->magic_number));
2239 rre->size = byteflip(rre->size, sizeof(rre->size));
2240 rre->options = byteflip(rre->options, sizeof(rre->options));
2241 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2242 rre->top_bracket =
2243 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2244 rre->top_backref =
2245 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2246 rre->first_char =
2247 (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2248 rre->req_char =
2249 (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2250 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2251 sizeof(rre->name_table_offset));
2252 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2253 sizeof(rre->name_entry_size));
2254 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2255 sizeof(rre->name_count));
2256
2257 if (extra != NULL)
2258 {
2259 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2260 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2261 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2262 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2263 }
2264 }
2265
2266 /* Extract and display information from the compiled data if required. */
2267
2268 SHOW_INFO:
2269
2270 if (do_debug)
2271 {
2272 fprintf(outfile, "------------------------------------------------------------------\n");
2273 if (use_pcre16)
2274 pcre16_printint(re, outfile, debug_lengths);
2275 else
2276 pcre_printint(re, outfile, debug_lengths);
2277 }
2278
2279 /* We already have the options in get_options (see above) */
2280
2281 if (do_showinfo)
2282 {
2283 unsigned long int all_options;
2284 #if !defined NOINFOCHECK
2285 int old_first_char, old_options, old_count;
2286 #endif
2287 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2288 hascrorlf;
2289 int nameentrysize, namecount;
2290 const pcre_uchar *nametable;
2291
2292 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2293 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2294 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2295 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2296 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2297 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2298 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2299 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2300 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2301 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2302 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2303
2304 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2305 that it gives the same results as the new function. */
2306
2307 #if !defined NOINFOCHECK
2308 if (!use_pcre16)
2309 {
2310 old_count = pcre_info(re, &old_options, &old_first_char);
2311 if (count < 0) fprintf(outfile,
2312 "Error %d from pcre_info()\n", count);
2313 else
2314 {
2315 if (old_count != count) fprintf(outfile,
2316 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2317 old_count);
2318
2319 if (old_first_char != first_char) fprintf(outfile,
2320 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2321 first_char, old_first_char);
2322
2323 if (old_options != (int)get_options) fprintf(outfile,
2324 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2325 get_options, old_options);
2326 }
2327 }
2328 #endif
2329
2330 if (size != regex_gotten_store) fprintf(outfile,
2331 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2332 (int)size, (int)regex_gotten_store);
2333
2334 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2335 if (backrefmax > 0)
2336 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2337
2338 if (namecount > 0)
2339 {
2340 fprintf(outfile, "Named capturing subpatterns:\n");
2341 while (namecount-- > 0)
2342 {
2343 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2344 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2345 GET2(nametable, 0));
2346 nametable += nameentrysize;
2347 }
2348 }
2349
2350 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2351 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2352
2353 all_options = ((real_pcre *)re)->options;
2354 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2355
2356 if (get_options == 0) fprintf(outfile, "No options\n");
2357 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2358 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2359 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2360 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2361 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2362 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2363 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2364 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2365 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2366 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2367 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2368 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2369 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2370 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2371 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2372 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2373 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2374 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2375
2376 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2377
2378 switch (get_options & PCRE_NEWLINE_BITS)
2379 {
2380 case PCRE_NEWLINE_CR:
2381 fprintf(outfile, "Forced newline sequence: CR\n");
2382 break;
2383
2384 case PCRE_NEWLINE_LF:
2385 fprintf(outfile, "Forced newline sequence: LF\n");
2386 break;
2387
2388 case PCRE_NEWLINE_CRLF:
2389 fprintf(outfile, "Forced newline sequence: CRLF\n");
2390 break;
2391
2392 case PCRE_NEWLINE_ANYCRLF:
2393 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2394 break;
2395
2396 case PCRE_NEWLINE_ANY:
2397 fprintf(outfile, "Forced newline sequence: ANY\n");
2398 break;
2399
2400 default:
2401 break;
2402 }
2403
2404 if (first_char == -1)
2405 {
2406 fprintf(outfile, "First char at start or follows newline\n");
2407 }
2408 else if (first_char < 0)
2409 {
2410 fprintf(outfile, "No first char\n");
2411 }
2412 else
2413 {
2414 const char *caseless =
2415 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2416 "" : " (caseless)";
2417
2418 if (PRINTOK(first_char))
2419 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2420 else
2421 fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2422 }
2423
2424 if (need_char < 0)
2425 {
2426 fprintf(outfile, "No need char\n");
2427 }
2428 else
2429 {
2430 const char *caseless =
2431 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2432 "" : " (caseless)";
2433
2434 if (PRINTOK(need_char))
2435 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2436 else
2437 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2438 }
2439
2440 /* Don't output study size; at present it is in any case a fixed
2441 value, but it varies, depending on the computer architecture, and
2442 so messes up the test suite. (And with the /F option, it might be
2443 flipped.) If study was forced by an external -s, don't show this
2444 information unless -i or -d was also present. This means that, except
2445 when auto-callouts are involved, the output from runs with and without
2446 -s should be identical. */
2447
2448 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2449 {
2450 if (extra == NULL)
2451 fprintf(outfile, "Study returned NULL\n");
2452 else
2453 {
2454 pcre_uint8 *start_bits = NULL;
2455 int minlength;
2456
2457 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2458 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2459
2460 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2461 if (start_bits == NULL)
2462 fprintf(outfile, "No set of starting bytes\n");
2463 else
2464 {
2465 int i;
2466 int c = 24;
2467 fprintf(outfile, "Starting byte set: ");
2468 for (i = 0; i < 256; i++)
2469 {
2470 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2471 {
2472 if (c > 75)
2473 {
2474 fprintf(outfile, "\n ");
2475 c = 2;
2476 }
2477 if (PRINTOK(i) && i != ' ')
2478 {
2479 fprintf(outfile, "%c ", i);
2480 c += 2;
2481 }
2482 else
2483 {
2484 fprintf(outfile, "\\x%02x ", i);
2485 c += 5;
2486 }
2487 }
2488 }
2489 fprintf(outfile, "\n");
2490 }
2491 }
2492
2493 /* Show this only if the JIT was set by /S, not by -s. */
2494
2495 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2496 {
2497 int jit;
2498 new_info(re, extra, PCRE_INFO_JIT, &jit);
2499 if (jit)
2500 fprintf(outfile, "JIT study was successful\n");
2501 else
2502 #ifdef SUPPORT_JIT
2503 fprintf(outfile, "JIT study was not successful\n");
2504 #else
2505 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2506 #endif
2507 }
2508 }
2509 }
2510
2511 /* If the '>' option was present, we write out the regex to a file, and
2512 that is all. The first 8 bytes of the file are the regex length and then
2513 the study length, in big-endian order. */
2514
2515 if (to_file != NULL)
2516 {
2517 FILE *f = fopen((char *)to_file, "wb");
2518 if (f == NULL)
2519 {
2520 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2521 }
2522 else
2523 {
2524 pcre_uint8 sbuf[8];
2525 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2526 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2527 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2528 sbuf[3] = (pcre_uint8)((true_size) & 255);
2529
2530 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2531 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2532 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2533 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2534
2535 if (fwrite(sbuf, 1, 8, f) < 8 ||
2536 fwrite(re, 1, true_size, f) < true_size)
2537 {
2538 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2539 }
2540 else
2541 {
2542 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2543
2544 /* If there is study data, write it. */
2545
2546 if (extra != NULL)
2547 {
2548 if (fwrite(extra->study_data, 1, true_study_size, f) <
2549 true_study_size)
2550 {
2551 fprintf(outfile, "Write error on %s: %s\n", to_file,
2552 strerror(errno));
2553 }
2554 else fprintf(outfile, "Study data written to %s\n", to_file);
2555 }
2556 }
2557 fclose(f);
2558 }
2559
2560 new_free(re);
2561 if (extra != NULL) pcre_free_study(extra);
2562 if (locale_set)
2563 {
2564 new_free((void *)tables);
2565 setlocale(LC_CTYPE, "C");
2566 locale_set = 0;
2567 }
2568 continue; /* With next regex */
2569 }
2570 } /* End of non-POSIX compile */
2571
2572 /* Read data lines and test them */
2573
2574 for (;;)
2575 {
2576 pcre_uint8 *q;
2577 pcre_uint8 *bptr;
2578 int *use_offsets = offsets;
2579 int use_size_offsets = size_offsets;
2580 int callout_data = 0;
2581 int callout_data_set = 0;
2582 int count, c;
2583 int copystrings = 0;
2584 int find_match_limit = default_find_match_limit;
2585 int getstrings = 0;
2586 int getlist = 0;
2587 int gmatched = 0;
2588 int start_offset = 0;
2589 int start_offset_sign = 1;
2590 int g_notempty = 0;
2591 int use_dfa = 0;
2592
2593 options = 0;
2594
2595 *copynames = 0;
2596 *getnames = 0;
2597
2598 copynamesptr = copynames;
2599 getnamesptr = getnames;
2600
2601 pcre_callout = callout;
2602 first_callout = 1;
2603 last_callout_mark = NULL;
2604 callout_extra = 0;
2605 callout_count = 0;
2606 callout_fail_count = 999999;
2607 callout_fail_id = -1;
2608 show_malloc = 0;
2609
2610 if (extra != NULL) extra->flags &=
2611 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2612
2613 len = 0;
2614 for (;;)
2615 {
2616 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2617 {
2618 if (len > 0) /* Reached EOF without hitting a newline */
2619 {
2620 fprintf(outfile, "\n");
2621 break;
2622 }
2623 done = 1;
2624 goto CONTINUE;
2625 }
2626 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2627 len = (int)strlen((char *)buffer);
2628 if (buffer[len-1] == '\n') break;
2629 }
2630
2631 while (len > 0 && isspace(buffer[len-1])) len--;
2632 buffer[len] = 0;
2633 if (len == 0) break;
2634
2635 p = buffer;
2636 while (isspace(*p)) p++;
2637
2638 bptr = q = dbuffer;
2639 while ((c = *p++) != 0)
2640 {
2641 int i = 0;
2642 int n = 0;
2643
2644 if (c == '\\') switch ((c = *p++))
2645 {
2646 case 'a': c = 7; break;
2647 case 'b': c = '\b'; break;
2648 case 'e': c = 27; break;
2649 case 'f': c = '\f'; break;
2650 case 'n': c = '\n'; break;
2651 case 'r': c = '\r'; break;
2652 case 't': c = '\t'; break;
2653 case 'v': c = '\v'; break;
2654
2655 case '0': case '1': case '2': case '3':
2656 case '4': case '5': case '6': case '7':
2657 c -= '0';
2658 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2659 c = c * 8 + *p++ - '0';
2660
2661 #if !defined NOUTF8
2662 if (use_utf8 && c > 255)
2663 {
2664 pcre_uint8 buff8[8];
2665 int ii, utn;
2666 utn = ord2utf8(c, buff8);
2667 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2668 c = buff8[ii]; /* Last byte */
2669 }
2670 #endif
2671 break;
2672
2673 case 'x':
2674
2675 /* Handle \x{..} specially - new Perl thing for utf8 */
2676
2677 #if !defined NOUTF8
2678 if (*p == '{')
2679 {
2680 pcre_uint8 *pt = p;
2681 c = 0;
2682
2683 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2684 when isxdigit() is a macro that refers to its argument more than
2685 once. This is banned by the C Standard, but apparently happens in at
2686 least one MacOS environment. */
2687
2688 for (pt++; isxdigit(*pt); pt++)
2689 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2690 if (*pt == '}')
2691 {
2692 pcre_uint8 buff8[8];
2693 int ii, utn;
2694 if (use_utf8)
2695 {
2696 utn = ord2utf8(c, buff8);
2697 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2698 c = buff8[ii]; /* Last byte */
2699 }
2700 else
2701 {
2702 if (c > 255)
2703 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2704 "UTF-8 mode is not enabled.\n"
2705 "** Truncation will probably give the wrong result.\n", c);
2706 }
2707 p = pt + 1;
2708 break;
2709 }
2710 /* Not correct form; fall through */
2711 }
2712 #endif
2713
2714 /* Ordinary \x */
2715
2716 c = 0;
2717 while (i++ < 2 && isxdigit(*p))
2718 {
2719 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2720 p++;
2721 }
2722 break;
2723
2724 case 0: /* \ followed by EOF allows for an empty line */
2725 p--;
2726 continue;
2727
2728 case '>':
2729 if (*p == '-')
2730 {
2731 start_offset_sign = -1;
2732 p++;
2733 }
2734 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2735 start_offset *= start_offset_sign;
2736 continue;
2737
2738 case 'A': /* Option setting */
2739 options |= PCRE_ANCHORED;
2740 continue;
2741
2742 case 'B':
2743 options |= PCRE_NOTBOL;
2744 continue;
2745
2746 case 'C':
2747 if (isdigit(*p)) /* Set copy string */
2748 {
2749 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2750 copystrings |= 1 << n;
2751 }
2752 else if (isalnum(*p))
2753 {
2754 pcre_uchar *npp = copynamesptr;
2755 while (isalnum(*p)) *npp++ = *p++;
2756 *npp++ = 0;
2757 *npp = 0;
2758 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2759 if (n < 0)
2760 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2761 copynamesptr = npp;
2762 }
2763 else if (*p == '+')
2764 {
2765 callout_extra = 1;
2766 p++;
2767 }
2768 else if (*p == '-')
2769 {
2770 pcre_callout = NULL;
2771 p++;
2772 }
2773 else if (*p == '!')
2774 {
2775 callout_fail_id = 0;
2776 p++;
2777 while(isdigit(*p))
2778 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2779 callout_fail_count = 0;
2780 if (*p == '!')
2781 {
2782 p++;
2783 while(isdigit(*p))
2784 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2785 }
2786 }
2787 else if (*p == '*')
2788 {
2789 int sign = 1;
2790 callout_data = 0;
2791 if (*(++p) == '-') { sign = -1; p++; }
2792 while(isdigit(*p))
2793 callout_data = callout_data * 10 + *p++ - '0';
2794 callout_data *= sign;
2795 callout_data_set = 1;
2796 }
2797 continue;
2798
2799 #if !defined NODFA
2800 case 'D':
2801 #if !defined NOPOSIX
2802 if (posix || do_posix)
2803 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2804 else
2805 #endif
2806 use_dfa = 1;
2807 continue;
2808 #endif
2809
2810 #if !defined NODFA
2811 case 'F':
2812 options |= PCRE_DFA_SHORTEST;
2813 continue;
2814 #endif
2815
2816 case 'G':
2817 if (isdigit(*p))
2818 {
2819 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2820 getstrings |= 1 << n;
2821 }
2822 else if (isalnum(*p))
2823 {
2824 pcre_uchar *npp = getnamesptr;
2825 while (isalnum(*p)) *npp++ = *p++;
2826 *npp++ = 0;
2827 *npp = 0;
2828 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2829 if (n < 0)
2830 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2831 getnamesptr = npp;
2832 }
2833 continue;
2834
2835 case 'J':
2836 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2837 if (extra != NULL
2838 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2839 && extra->executable_jit != NULL)
2840 {
2841 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2842 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2843 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2844 }
2845 continue;
2846
2847 case 'L':
2848 getlist = 1;
2849 continue;
2850
2851 case 'M':
2852 find_match_limit = 1;
2853 continue;
2854
2855 case 'N':
2856 if ((options & PCRE_NOTEMPTY) != 0)
2857 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2858 else
2859 options |= PCRE_NOTEMPTY;
2860 continue;
2861
2862 case 'O':
2863 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2864 if (n > size_offsets_max)
2865 {
2866 size_offsets_max = n;
2867 free(offsets);
2868 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2869 if (offsets == NULL)
2870 {
2871 printf("** Failed to get %d bytes of memory for offsets vector\n",
2872 (int)(size_offsets_max * sizeof(int)));
2873 yield = 1;
2874 goto EXIT;
2875 }
2876 }
2877 use_size_offsets = n;
2878 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2879 continue;
2880
2881 case 'P':
2882 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2883 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2884 continue;
2885
2886 case 'Q':
2887 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2888 if (extra == NULL)
2889 {
2890 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2891 extra->flags = 0;
2892 }
2893 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2894 extra->match_limit_recursion = n;
2895 continue;
2896
2897 case 'q':
2898 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2899 if (extra == NULL)
2900 {
2901 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2902 extra->flags = 0;
2903 }
2904 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2905 extra->match_limit = n;
2906 continue;
2907
2908 #if !defined NODFA
2909 case 'R':
2910 options |= PCRE_DFA_RESTART;
2911 continue;
2912 #endif
2913
2914 case 'S':
2915 show_malloc = 1;
2916 continue;
2917
2918 case 'Y':
2919 options |= PCRE_NO_START_OPTIMIZE;
2920 continue;
2921
2922 case 'Z':
2923 options |= PCRE_NOTEOL;
2924 continue;
2925
2926 case '?':
2927 options |= PCRE_NO_UTF8_CHECK;
2928 continue;
2929
2930 case '<':
2931 {
2932 int x = check_newline(p, outfile);
2933 if (x == 0) goto NEXT_DATA;
2934 options |= x;
2935 while (*p++ != '>');
2936 }
2937 continue;
2938 }
2939 *q++ = c;
2940 }
2941 *q = 0;
2942 len = (int)(q - dbuffer);
2943
2944 /* Move the data to the end of the buffer so that a read over the end of
2945 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2946 we are using the POSIX interface, we must include the terminating zero. */
2947
2948 #if !defined NOPOSIX
2949 if (posix || do_posix)
2950 {
2951 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2952 bptr += buffer_size - len - 1;
2953 }
2954 else
2955 #endif
2956 {
2957 memmove(bptr + buffer_size - len, bptr, len);
2958 bptr += buffer_size - len;
2959 }
2960
2961 if ((all_use_dfa || use_dfa) && find_match_limit)
2962 {
2963 printf("**Match limit not relevant for DFA matching: ignored\n");
2964 find_match_limit = 0;
2965 }
2966
2967 /* Handle matching via the POSIX interface, which does not
2968 support timing or playing with the match limit or callout data. */
2969
2970 #if !defined NOPOSIX
2971 if (posix || do_posix)
2972 {
2973 int rc;
2974 int eflags = 0;
2975 regmatch_t *pmatch = NULL;
2976 if (use_size_offsets > 0)
2977 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2978 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2979 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2980 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2981
2982 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2983
2984 if (rc != 0)
2985 {
2986 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2987 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2988 }
2989 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2990 != 0)
2991 {
2992 fprintf(outfile, "Matched with REG_NOSUB\n");
2993 }
2994 else
2995 {
2996 size_t i;
2997 for (i = 0; i < (size_t)use_size_offsets; i++)
2998 {
2999 if (pmatch[i].rm_so >= 0)
3000 {
3001 fprintf(outfile, "%2d: ", (int)i);
3002 PCHARSV(dbuffer + pmatch[i].rm_so,
3003 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3004 fprintf(outfile, "\n");
3005 if (do_showcaprest || (i == 0 && do_showrest))
3006 {
3007 fprintf(outfile, "%2d+ ", (int)i);
3008 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3009 outfile);
3010 fprintf(outfile, "\n");
3011 }
3012 }
3013 }
3014 }
3015 free(pmatch);
3016 goto NEXT_DATA;
3017 }
3018
3019 #endif /* !defined NOPOSIX */
3020
3021 /* Handle matching via the native interface - repeats for /g and /G */
3022
3023 #ifdef SUPPORT_PCRE16
3024 if (use_pcre16)
3025 {
3026 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3027 bptr = (pcre_uint8 *)buffer16;
3028 }
3029 #endif
3030
3031 for (;; gmatched++) /* Loop for /g or /G */
3032 {
3033 markptr = NULL;
3034
3035 if (timeitm > 0)
3036 {
3037 register int i;
3038 clock_t time_taken;
3039 clock_t start_time = clock();
3040
3041 #if !defined NODFA
3042 if (all_use_dfa || use_dfa)
3043 {
3044 int workspace[1000];
3045 for (i = 0; i < timeitm; i++)
3046 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3047 options | g_notempty, use_offsets, use_size_offsets, workspace,
3048 sizeof(workspace)/sizeof(int));
3049 }
3050 else
3051 #endif
3052
3053 for (i = 0; i < timeitm; i++)
3054 {
3055 PCRE_EXEC(count, re, extra, bptr, len,
3056 start_offset, options | g_notempty, use_offsets, use_size_offsets);
3057 }
3058 time_taken = clock() - start_time;
3059 fprintf(outfile, "Execute time %.4f milliseconds\n",
3060 (((double)time_taken * 1000.0) / (double)timeitm) /
3061 (double)CLOCKS_PER_SEC);
3062 }
3063
3064 /* If find_match_limit is set, we want to do repeated matches with
3065 varying limits in order to find the minimum value for the match limit and
3066 for the recursion limit. The match limits are relevant only to the normal
3067 running of pcre_exec(), so disable the JIT optimization. This makes it
3068 possible to run the same set of tests with and without JIT externally
3069 requested. */
3070
3071 if (find_match_limit)
3072 {
3073 if (extra == NULL)
3074 {
3075 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3076 extra->flags = 0;
3077 }
3078 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3079
3080 (void)check_match_limit(re, extra, bptr, len, start_offset,
3081 options|g_notempty, use_offsets, use_size_offsets,
3082 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3083 PCRE_ERROR_MATCHLIMIT, "match()");
3084
3085 count = check_match_limit(re, extra, bptr, len, start_offset,
3086 options|g_notempty, use_offsets, use_size_offsets,
3087 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3088 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3089 }
3090
3091 /* If callout_data is set, use the interface with additional data */
3092
3093 else if (callout_data_set)
3094 {
3095 if (extra == NULL)
3096 {
3097 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3098 extra->flags = 0;
3099 }
3100 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3101 extra->callout_data = &callout_data;
3102 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3103 options | g_notempty, use_offsets, use_size_offsets);
3104 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3105 }
3106
3107 /* The normal case is just to do the match once, with the default
3108 value of match_limit. */
3109
3110 #if !defined NODFA
3111 else if (all_use_dfa || use_dfa)
3112 {
3113 int workspace[1000];
3114 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3115 options | g_notempty, use_offsets, use_size_offsets, workspace,
3116 sizeof(workspace)/sizeof(int));
3117 if (count == 0)
3118 {
3119 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3120 count = use_size_offsets/2;
3121 }
3122 }
3123 #endif
3124
3125 else
3126 {
3127 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3128 options | g_notempty, use_offsets, use_size_offsets);
3129 if (count == 0)
3130 {
3131 fprintf(outfile, "Matched, but too many substrings\n");
3132 count = use_size_offsets/3;
3133 }
3134 }
3135
3136 /* Matched */
3137
3138 if (count >= 0)
3139 {
3140 int i, maxcount;
3141
3142 #if !defined NODFA
3143 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3144 #endif
3145 maxcount = use_size_offsets/3;
3146
3147 /* This is a check against a lunatic return value. */
3148
3149 if (count > maxcount)
3150 {
3151 fprintf(outfile,
3152 "** PCRE error: returned count %d is too big for offset size %d\n",
3153 count, use_size_offsets);
3154 count = use_size_offsets/3;
3155 if (do_g || do_G)
3156 {
3157 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3158 do_g = do_G = FALSE; /* Break g/G loop */
3159 }
3160 }
3161
3162 /* do_allcaps requests showing of all captures in the pattern, to check
3163 unset ones at the end. */
3164
3165 if (do_allcaps)
3166 {
3167 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3168 count++; /* Allow for full match */
3169 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3170 }
3171
3172 /* Output the captured substrings */
3173
3174 for (i = 0; i < count * 2; i += 2)
3175 {
3176 if (use_offsets[i] < 0)
3177 {
3178 if (use_offsets[i] != -1)
3179 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3180 use_offsets[i], i);
3181 if (use_offsets[i+1] != -1)
3182 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3183 use_offsets[i+1], i+1);
3184 fprintf(outfile, "%2d: <unset>\n", i/2);
3185 }
3186 else
3187 {
3188 fprintf(outfile, "%2d: ", i/2);
3189 PCHARSV(bptr + use_offsets[i],
3190 use_offsets[i+1] - use_offsets[i], outfile);
3191 fprintf(outfile, "\n");
3192 if (do_showcaprest || (i == 0 && do_showrest))
3193 {
3194 fprintf(outfile, "%2d+ ", i/2);
3195 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3196 outfile);
3197 fprintf(outfile, "\n");
3198 }
3199 }
3200 }
3201
3202 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3203
3204 for (i = 0; i < 32; i++)
3205 {
3206 if ((copystrings & (1 << i)) != 0)
3207 {
3208 char copybuffer[256];
3209 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3210 i, copybuffer, sizeof(copybuffer));
3211 if (rc < 0)
3212 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3213 else
3214 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3215 }
3216 }
3217
3218 for (copynamesptr = copynames;
3219 *copynamesptr != 0;
3220 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3221 {
3222 char copybuffer[256];
3223 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3224 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3225 if (rc < 0)
3226 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3227 else
3228 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3229 }
3230
3231 for (i = 0; i < 32; i++)
3232 {
3233 if ((getstrings & (1 << i)) != 0)
3234 {
3235 const char *substring;
3236 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3237 i, &substring);
3238 if (rc < 0)
3239 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3240 else
3241 {
3242 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3243 pcre_free_substring(substring);
3244 }
3245 }
3246 }
3247
3248 for (getnamesptr = getnames;
3249 *getnamesptr != 0;
3250 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3251 {
3252 const char *substring;
3253 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3254 count, (char *)getnamesptr, &substring);
3255 if (rc < 0)
3256 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3257 else
3258 {
3259 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
3260 pcre_free_substring(substring);
3261 }
3262 }
3263
3264 if (getlist)
3265 {
3266 const char **stringlist;
3267 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
3268 &stringlist);
3269 if (rc < 0)
3270 fprintf(outfile, "get substring list failed %d\n", rc);
3271 else
3272 {
3273 for (i = 0; i < count; i++)
3274 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3275 if (stringlist[i] != NULL)
3276 fprintf(outfile, "string list not terminated by NULL\n");
3277 pcre_free_substring_list(stringlist);
3278 }
3279 }
3280 }
3281
3282 /* There was a partial match */
3283
3284 else if (count == PCRE_ERROR_PARTIAL)
3285 {
3286 if (markptr == NULL) fprintf(outfile, "Partial match");
3287 else fprintf(outfile, "Partial match, mark=%s", markptr);
3288 if (use_size_offsets > 1)
3289 {
3290 fprintf(outfile, ": ");
3291 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3292 outfile);
3293 }
3294 fprintf(outfile, "\n");
3295 break; /* Out of the /g loop */
3296 }
3297
3298 /* Failed to match. If this is a /g or /G loop and we previously set
3299 g_notempty after a null match, this is not necessarily the end. We want
3300 to advance the start offset, and continue. We won't be at the end of the
3301 string - that was checked before setting g_notempty.
3302
3303 Complication arises in the case when the newline convention is "any",
3304 "crlf", or "anycrlf". If the previous match was at the end of a line
3305 terminated by CRLF, an advance of one character just passes the \r,
3306 whereas we should prefer the longer newline sequence, as does the code in
3307 pcre_exec(). Fudge the offset value to achieve this. We check for a
3308 newline setting in the pattern; if none was set, use pcre_config() to
3309 find the default.
3310
3311 Otherwise, in the case of UTF-8 matching, the advance must be one
3312 character, not one byte. */
3313
3314 else
3315 {
3316 if (g_notempty != 0)
3317 {
3318 int onechar = 1;
3319 unsigned int obits = ((real_pcre *)re)->options;
3320 use_offsets[0] = start_offset;
3321 if ((obits & PCRE_NEWLINE_BITS) == 0)
3322 {
3323 int d;
3324 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3325 /* Note that these values are always the ASCII ones, even in
3326 EBCDIC environments. CR = 13, NL = 10. */
3327 obits = (d == 13)? PCRE_NEWLINE_CR :
3328 (d == 10)? PCRE_NEWLINE_LF :
3329 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3330 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3331 (d == -1)? PCRE_NEWLINE_ANY : 0;
3332 }
3333 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3334 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3335 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3336 &&
3337 start_offset < len - 1 &&
3338 bptr[start_offset] == '\r' &&
3339 bptr[start_offset+1] == '\n')
3340 onechar++;
3341 else if (use_utf8)
3342 {
3343 while (start_offset + onechar < len)
3344 {
3345 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3346 onechar++;
3347 }
3348 }
3349 use_offsets[1] = start_offset + onechar;
3350 }
3351 else
3352 {
3353 switch(count)
3354 {
3355 case PCRE_ERROR_NOMATCH:
3356 if (gmatched == 0)
3357 {
3358 if (markptr == NULL) fprintf(outfile, "No match\n");
3359 else fprintf(outfile, "No match, mark = %s\n", markptr);
3360 }
3361 break;
3362
3363 case PCRE_ERROR_BADUTF8:
3364 case PCRE_ERROR_SHORTUTF8:
3365 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3366 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3367 if (use_size_offsets >= 2)
3368 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3369 use_offsets[1]);
3370 fprintf(outfile, "\n");
3371 break;
3372
3373 default:
3374 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3375 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3376 else
3377 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3378 break;
3379 }
3380
3381 break; /* Out of the /g loop */
3382 }
3383 }
3384
3385 /* If not /g or /G we are done */
3386
3387 if (!do_g && !do_G) break;
3388
3389 /* If we have matched an empty string, first check to see if we are at
3390 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3391 Perl's /g options does. This turns out to be rather cunning. First we set
3392 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3393 same point. If this fails (picked up above) we advance to the next
3394 character. */
3395
3396 g_notempty = 0;
3397
3398 if (use_offsets[0] == use_offsets[1])
3399 {
3400 if (use_offsets[0] == len) break;
3401 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3402 }
3403
3404 /* For /g, update the start offset, leaving the rest alone */
3405
3406 if (do_g) start_offset = use_offsets[1];
3407
3408 /* For /G, update the pointer and length */
3409
3410 else
3411 {
3412 bptr += use_offsets[1];
3413 len -= use_offsets[1];
3414 }
3415 } /* End of loop for /g and /G */
3416
3417 NEXT_DATA: continue;
3418 } /* End of loop for data lines */
3419
3420 CONTINUE:
3421
3422 #if !defined NOPOSIX
3423 if (posix || do_posix) regfree(&preg);
3424 #endif
3425
3426 if (re != NULL) new_free(re);
3427 if (extra != NULL) pcre_free_study(extra);
3428 if (locale_set)
3429 {
3430 new_free((void *)tables);
3431 setlocale(LC_CTYPE, "C");
3432 locale_set = 0;
3433 }
3434 if (jit_stack != NULL)
3435 {
3436 pcre_jit_stack_free(jit_stack);
3437 jit_stack = NULL;
3438 }
3439 }
3440
3441 if (infile == stdin) fprintf(outfile, "\n");
3442
3443 EXIT:
3444
3445 if (infile != NULL && infile != stdin) fclose(infile);
3446 if (outfile != NULL && outfile != stdout) fclose(outfile);
3447
3448 free(buffer);
3449 free(dbuffer);
3450 free(pbuffer);
3451 free(offsets);
3452
3453 #ifdef SUPPORT_PCRE16
3454 if (buffer16 != NULL) free(buffer16);
3455 #endif
3456
3457 return yield;
3458 }
3459
3460 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12