/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 809 - (show annotations) (download)
Mon Dec 19 11:04:45 2011 UTC (2 years, 10 months ago) by zherczeg
File MIME type: text/plain
File size: 103658 byte(s)
fixing existing and adding new byte-order related functions
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <locale.h>
50 #include <errno.h>
51
52 #ifdef SUPPORT_LIBREADLINE
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #include <readline/readline.h>
57 #include <readline/history.h>
58 #endif
59
60
61 /* A number of things vary for Windows builds. Originally, pcretest opened its
62 input and output without "b"; then I was told that "b" was needed in some
63 environments, so it was added for release 5.0 to both the input and output. (It
64 makes no difference on Unix-like systems.) Later I was told that it is wrong
65 for the input on Windows. I've now abstracted the modes into two macros that
66 are set here, to make it easier to fiddle with them, and removed "b" from the
67 input mode under Windows. */
68
69 #if defined(_WIN32) || defined(WIN32)
70 #include <io.h> /* For _setmode() */
71 #include <fcntl.h> /* For _O_BINARY */
72 #define INPUT_MODE "r"
73 #define OUTPUT_MODE "wb"
74
75 #ifndef isatty
76 #define isatty _isatty /* This is what Windows calls them, I'm told, */
77 #endif /* though in some environments they seem to */
78 /* be already defined, hence the #ifndefs. */
79 #ifndef fileno
80 #define fileno _fileno
81 #endif
82
83 /* A user sent this fix for Borland Builder 5 under Windows. */
84
85 #ifdef __BORLANDC__
86 #define _setmode(handle, mode) setmode(handle, mode)
87 #endif
88
89 /* Not Windows */
90
91 #else
92 #include <sys/time.h> /* These two includes are needed */
93 #include <sys/resource.h> /* for setrlimit(). */
94 #define INPUT_MODE "rb"
95 #define OUTPUT_MODE "wb"
96 #endif
97
98
99 /* We have to include pcre_internal.h because we need the internal info for
100 displaying the results of pcre_study() and we also need to know about the
101 internal macros, structures, and other internal data values; pcretest has
102 "inside information" compared to a program that strictly follows the PCRE API.
103
104 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106 appropriately for an application, not for building PCRE. */
107
108 #include "pcre.h"
109 #include "pcre_internal.h"
110
111 /* The pcre_printint() function, which prints the internal form of a compiled
112 regex, is held in a separate file so that (a) it can be compiled in either
113 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 when that is compiled in debug mode. */
115
116 #ifdef SUPPORT_PCRE8
117 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118 #endif
119 #ifdef SUPPORT_PCRE16
120 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121 #endif
122
123 /* We need access to some of the data tables that PCRE uses. So as not to have
124 to keep two copies, we include the source file here, changing the names of the
125 external symbols to prevent clashes. */
126
127 #define _pcre_ucp_gentype ucp_gentype
128 #define _pcre_ucp_typerange ucp_typerange
129 #define _pcre_utf8_table1 utf8_table1
130 #define _pcre_utf8_table1_size utf8_table1_size
131 #define _pcre_utf8_table2 utf8_table2
132 #define _pcre_utf8_table3 utf8_table3
133 #define _pcre_utf8_table4 utf8_table4
134 #define _pcre_utt utt
135 #define _pcre_utt_size utt_size
136 #define _pcre_utt_names utt_names
137 #define _pcre_OP_lengths OP_lengths
138
139 #include "pcre_tables.c"
140
141 /* The definition of the macro PRINTABLE, which determines whether to print an
142 output character as-is or as a hex value when showing compiled patterns, is
143 the same as in the printint.src file. We uses it here in cases when the locale
144 has not been explicitly changed, so as to get consistent output from systems
145 that differ in their output from isprint() even in the "C" locale. */
146
147 #ifdef EBCDIC
148 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149 #else
150 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151 #endif
152
153 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154
155 /* It is possible to compile this test program without including support for
156 testing the POSIX interface, though this is not available via the standard
157 Makefile. */
158
159 #if !defined NOPOSIX
160 #include "pcreposix.h"
161 #endif
162
163 /* It is also possible, originally for the benefit of a version that was
164 imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165 without the interface to the DFA matcher (NODFA), and without the doublecheck
166 of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167 out the UTF8 support if PCRE is built without it. */
168
169 #ifndef SUPPORT_UTF8
170 #ifndef NOUTF8
171 #define NOUTF8
172 #endif
173 #endif
174
175 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177 only from one place and is handled differently). I couldn't dream up any way of
178 using a single macro to do this in a generic way, because of the many different
179 argument requirements. We know that at least one of SUPPORT_PCRE8 and
180 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181 use these in the definitions of generic macros. */
182
183 #ifdef SUPPORT_PCRE8
184 #define PCHARS8(lv, p, len, f) \
185 lv = pchars((pcre_uint8 *)p, len, f)
186
187 #define PCHARSV8(p, len, f) \
188 (void)pchars((pcre_uint8 *)p, len, f)
189
190 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191 re = pcre_compile((char *)pat, options, error, erroffset, tables)
192
193 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194 offsets, size_offsets) \
195 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196 offsets, size_offsets)
197
198 #define PCRE_STUDY8(extra, re, options, error) \
199 extra = pcre_study(re, options, error)
200
201 #define PCRE_FREE_STUDY8(extra) \
202 pcre_free_study(extra)
203
204 #endif /* SUPPORT_PCRE8 */
205
206
207 #ifdef SUPPORT_PCRE16
208 #define PCHARS16(lv, p, len, f) \
209 lv = pchars16((PCRE_SPTR16)p, len, f)
210
211 #define PCHARSV16(p, len, f) \
212 (void)pchars16((PCRE_SPTR16)p, len, f)
213
214 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
215 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
216
217 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
218 offsets, size_offsets) \
219 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
220 options, offsets, size_offsets)
221
222 #define PCRE_STUDY16(extra, re, options, error) \
223 extra = pcre16_study(re, options, error)
224
225 #define PCRE_FREE_STUDY16(extra) \
226 pcre16_free_study(extra)
227
228 #endif /* SUPPORT_PCRE16 */
229
230
231 /* ----- Both modes are supported; a runtime test is needed ----- */
232
233 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
234
235 #define PCHARS(lv, p, len, f) \
236 if (use_pcre16) \
237 PCHARS16(lv, p, len, f); \
238 else \
239 PCHARS8(lv, p, len, f)
240
241 #define PCHARSV(p, len, f) \
242 if (use_pcre16) \
243 PCHARSV16(p, len, f); \
244 else \
245 PCHARSV8(p, len, f)
246
247 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
248 if (use_pcre16) \
249 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
250 else \
251 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
252
253 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
254 offsets, size_offsets) \
255 if (use_pcre16) \
256 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
257 offsets, size_offsets); \
258 else \
259 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
260 offsets, size_offsets)
261
262 #define PCRE_STUDY(extra, re, options, error) \
263 if (use_pcre16) \
264 PCRE_STUDY16(extra, re, options, error); \
265 else \
266 PCRE_STUDY8(extra, re, options, error)
267
268 #define PCRE_FREE_STUDY(extra) \
269 if (use_pcre16) \
270 PCRE_FREE_STUDY16(extra); \
271 else \
272 PCRE_FREE_STUDY8(extra)
273
274 /* ----- Only 8-bit mode is supported ----- */
275
276 #elif defined SUPPORT_PCRE8
277 #define PCHARS PCHARS8
278 #define PCHARSV PCHARSV8
279 #define PCRE_COMPILE PCRE_COMPILE8
280 #define PCRE_EXEC PCRE_EXEC8
281 #define PCRE_STUDY PCRE_STUDY8
282 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
283
284 /* ----- Only 16-bit mode is supported ----- */
285
286 #else
287 #define PCHARS PCHARS16
288 #define PCHARSV PCHARSV16
289 #define PCRE_COMPILE PCRE_COMPILE16
290 #define PCRE_EXEC PCRE_EXEC16
291 #define PCRE_STUDY PCRE_STUDY16
292 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
293 #endif
294
295 /* ----- End of mode-specific function call macros ----- */
296
297
298 /* Other parameters */
299
300 #ifndef CLOCKS_PER_SEC
301 #ifdef CLK_TCK
302 #define CLOCKS_PER_SEC CLK_TCK
303 #else
304 #define CLOCKS_PER_SEC 100
305 #endif
306 #endif
307
308 /* This is the default loop count for timing. */
309
310 #define LOOPREPEAT 500000
311
312 /* Static variables */
313
314 static FILE *outfile;
315 static int log_store = 0;
316 static int callout_count;
317 static int callout_extra;
318 static int callout_fail_count;
319 static int callout_fail_id;
320 static int debug_lengths;
321 static int first_callout;
322 static int locale_set = 0;
323 static int show_malloc;
324 static int use_utf8;
325 static size_t gotten_store;
326 static size_t first_gotten_store = 0;
327 static const unsigned char *last_callout_mark = NULL;
328
329 /* The buffers grow automatically if very long input lines are encountered. */
330
331 static int buffer_size = 50000;
332 static pcre_uint8 *buffer = NULL;
333 static pcre_uint8 *dbuffer = NULL;
334 static pcre_uint8 *pbuffer = NULL;
335
336 #ifdef SUPPORT_PCRE16
337 static int buffer16_size = 0;
338 static pcre_uint16 *buffer16 = NULL;
339 #endif
340
341 /* If we have 8-bit support, default use_pcre16 to false; if there is also
342 16-bit support, it can be changed by an option. If there is no 8-bit support,
343 there must be 16-bit support, so default it to 1. */
344
345 #ifdef SUPPORT_PCRE8
346 static int use_pcre16 = 0;
347 #else
348 static int use_pcre16 = 1;
349 #endif
350
351 /* Textual explanations for runtime error codes */
352
353 static const char *errtexts[] = {
354 NULL, /* 0 is no error */
355 NULL, /* NOMATCH is handled specially */
356 "NULL argument passed",
357 "bad option value",
358 "magic number missing",
359 "unknown opcode - pattern overwritten?",
360 "no more memory",
361 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
362 "match limit exceeded",
363 "callout error code",
364 NULL, /* BADUTF8 is handled specially */
365 "bad UTF-8 offset",
366 NULL, /* PARTIAL is handled specially */
367 "not used - internal error",
368 "internal error - pattern overwritten?",
369 "bad count value",
370 "item unsupported for DFA matching",
371 "backreference condition or recursion test not supported for DFA matching",
372 "match limit not supported for DFA matching",
373 "workspace size exceeded in DFA matching",
374 "too much recursion for DFA matching",
375 "recursion limit exceeded",
376 "not used - internal error",
377 "invalid combination of newline options",
378 "bad offset value",
379 NULL, /* SHORTUTF8 is handled specially */
380 "nested recursion at the same subject position",
381 "JIT stack limit reached",
382 "pattern compiled in wrong mode (8-bit/16-bit error)"
383 };
384
385
386 /*************************************************
387 * Alternate character tables *
388 *************************************************/
389
390 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
391 using the default tables of the library. However, the T option can be used to
392 select alternate sets of tables, for different kinds of testing. Note also that
393 the L (locale) option also adjusts the tables. */
394
395 /* This is the set of tables distributed as default with PCRE. It recognizes
396 only ASCII characters. */
397
398 static const pcre_uint8 tables0[] = {
399
400 /* This table is a lower casing table. */
401
402 0, 1, 2, 3, 4, 5, 6, 7,
403 8, 9, 10, 11, 12, 13, 14, 15,
404 16, 17, 18, 19, 20, 21, 22, 23,
405 24, 25, 26, 27, 28, 29, 30, 31,
406 32, 33, 34, 35, 36, 37, 38, 39,
407 40, 41, 42, 43, 44, 45, 46, 47,
408 48, 49, 50, 51, 52, 53, 54, 55,
409 56, 57, 58, 59, 60, 61, 62, 63,
410 64, 97, 98, 99,100,101,102,103,
411 104,105,106,107,108,109,110,111,
412 112,113,114,115,116,117,118,119,
413 120,121,122, 91, 92, 93, 94, 95,
414 96, 97, 98, 99,100,101,102,103,
415 104,105,106,107,108,109,110,111,
416 112,113,114,115,116,117,118,119,
417 120,121,122,123,124,125,126,127,
418 128,129,130,131,132,133,134,135,
419 136,137,138,139,140,141,142,143,
420 144,145,146,147,148,149,150,151,
421 152,153,154,155,156,157,158,159,
422 160,161,162,163,164,165,166,167,
423 168,169,170,171,172,173,174,175,
424 176,177,178,179,180,181,182,183,
425 184,185,186,187,188,189,190,191,
426 192,193,194,195,196,197,198,199,
427 200,201,202,203,204,205,206,207,
428 208,209,210,211,212,213,214,215,
429 216,217,218,219,220,221,222,223,
430 224,225,226,227,228,229,230,231,
431 232,233,234,235,236,237,238,239,
432 240,241,242,243,244,245,246,247,
433 248,249,250,251,252,253,254,255,
434
435 /* This table is a case flipping table. */
436
437 0, 1, 2, 3, 4, 5, 6, 7,
438 8, 9, 10, 11, 12, 13, 14, 15,
439 16, 17, 18, 19, 20, 21, 22, 23,
440 24, 25, 26, 27, 28, 29, 30, 31,
441 32, 33, 34, 35, 36, 37, 38, 39,
442 40, 41, 42, 43, 44, 45, 46, 47,
443 48, 49, 50, 51, 52, 53, 54, 55,
444 56, 57, 58, 59, 60, 61, 62, 63,
445 64, 97, 98, 99,100,101,102,103,
446 104,105,106,107,108,109,110,111,
447 112,113,114,115,116,117,118,119,
448 120,121,122, 91, 92, 93, 94, 95,
449 96, 65, 66, 67, 68, 69, 70, 71,
450 72, 73, 74, 75, 76, 77, 78, 79,
451 80, 81, 82, 83, 84, 85, 86, 87,
452 88, 89, 90,123,124,125,126,127,
453 128,129,130,131,132,133,134,135,
454 136,137,138,139,140,141,142,143,
455 144,145,146,147,148,149,150,151,
456 152,153,154,155,156,157,158,159,
457 160,161,162,163,164,165,166,167,
458 168,169,170,171,172,173,174,175,
459 176,177,178,179,180,181,182,183,
460 184,185,186,187,188,189,190,191,
461 192,193,194,195,196,197,198,199,
462 200,201,202,203,204,205,206,207,
463 208,209,210,211,212,213,214,215,
464 216,217,218,219,220,221,222,223,
465 224,225,226,227,228,229,230,231,
466 232,233,234,235,236,237,238,239,
467 240,241,242,243,244,245,246,247,
468 248,249,250,251,252,253,254,255,
469
470 /* This table contains bit maps for various character classes. Each map is 32
471 bytes long and the bits run from the least significant end of each byte. The
472 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
473 graph, print, punct, and cntrl. Other classes are built from combinations. */
474
475 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
476 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
477 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
479
480 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
481 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
482 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
484
485 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
486 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
489
490 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
492 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
494
495 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499
500 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
501 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
502 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
504
505 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
506 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
507 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509
510 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
511 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
512 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
514
515 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
516 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519
520 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524
525 /* This table identifies various classes of character by individual bits:
526 0x01 white space character
527 0x02 letter
528 0x04 decimal digit
529 0x08 hexadecimal digit
530 0x10 alphanumeric or '_'
531 0x80 regular expression metacharacter or binary zero
532 */
533
534 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
535 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
538 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
539 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
540 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
541 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
542 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
543 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
544 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
545 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
546 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
547 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
548 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
549 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
557 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
558 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
559 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
561 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
563 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
564 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
566
567 /* This is a set of tables that came orginally from a Windows user. It seems to
568 be at least an approximation of ISO 8859. In particular, there are characters
569 greater than 128 that are marked as spaces, letters, etc. */
570
571 static const pcre_uint8 tables1[] = {
572 0,1,2,3,4,5,6,7,
573 8,9,10,11,12,13,14,15,
574 16,17,18,19,20,21,22,23,
575 24,25,26,27,28,29,30,31,
576 32,33,34,35,36,37,38,39,
577 40,41,42,43,44,45,46,47,
578 48,49,50,51,52,53,54,55,
579 56,57,58,59,60,61,62,63,
580 64,97,98,99,100,101,102,103,
581 104,105,106,107,108,109,110,111,
582 112,113,114,115,116,117,118,119,
583 120,121,122,91,92,93,94,95,
584 96,97,98,99,100,101,102,103,
585 104,105,106,107,108,109,110,111,
586 112,113,114,115,116,117,118,119,
587 120,121,122,123,124,125,126,127,
588 128,129,130,131,132,133,134,135,
589 136,137,138,139,140,141,142,143,
590 144,145,146,147,148,149,150,151,
591 152,153,154,155,156,157,158,159,
592 160,161,162,163,164,165,166,167,
593 168,169,170,171,172,173,174,175,
594 176,177,178,179,180,181,182,183,
595 184,185,186,187,188,189,190,191,
596 224,225,226,227,228,229,230,231,
597 232,233,234,235,236,237,238,239,
598 240,241,242,243,244,245,246,215,
599 248,249,250,251,252,253,254,223,
600 224,225,226,227,228,229,230,231,
601 232,233,234,235,236,237,238,239,
602 240,241,242,243,244,245,246,247,
603 248,249,250,251,252,253,254,255,
604 0,1,2,3,4,5,6,7,
605 8,9,10,11,12,13,14,15,
606 16,17,18,19,20,21,22,23,
607 24,25,26,27,28,29,30,31,
608 32,33,34,35,36,37,38,39,
609 40,41,42,43,44,45,46,47,
610 48,49,50,51,52,53,54,55,
611 56,57,58,59,60,61,62,63,
612 64,97,98,99,100,101,102,103,
613 104,105,106,107,108,109,110,111,
614 112,113,114,115,116,117,118,119,
615 120,121,122,91,92,93,94,95,
616 96,65,66,67,68,69,70,71,
617 72,73,74,75,76,77,78,79,
618 80,81,82,83,84,85,86,87,
619 88,89,90,123,124,125,126,127,
620 128,129,130,131,132,133,134,135,
621 136,137,138,139,140,141,142,143,
622 144,145,146,147,148,149,150,151,
623 152,153,154,155,156,157,158,159,
624 160,161,162,163,164,165,166,167,
625 168,169,170,171,172,173,174,175,
626 176,177,178,179,180,181,182,183,
627 184,185,186,187,188,189,190,191,
628 224,225,226,227,228,229,230,231,
629 232,233,234,235,236,237,238,239,
630 240,241,242,243,244,245,246,215,
631 248,249,250,251,252,253,254,223,
632 192,193,194,195,196,197,198,199,
633 200,201,202,203,204,205,206,207,
634 208,209,210,211,212,213,214,247,
635 216,217,218,219,220,221,222,255,
636 0,62,0,0,1,0,0,0,
637 0,0,0,0,0,0,0,0,
638 32,0,0,0,1,0,0,0,
639 0,0,0,0,0,0,0,0,
640 0,0,0,0,0,0,255,3,
641 126,0,0,0,126,0,0,0,
642 0,0,0,0,0,0,0,0,
643 0,0,0,0,0,0,0,0,
644 0,0,0,0,0,0,255,3,
645 0,0,0,0,0,0,0,0,
646 0,0,0,0,0,0,12,2,
647 0,0,0,0,0,0,0,0,
648 0,0,0,0,0,0,0,0,
649 254,255,255,7,0,0,0,0,
650 0,0,0,0,0,0,0,0,
651 255,255,127,127,0,0,0,0,
652 0,0,0,0,0,0,0,0,
653 0,0,0,0,254,255,255,7,
654 0,0,0,0,0,4,32,4,
655 0,0,0,128,255,255,127,255,
656 0,0,0,0,0,0,255,3,
657 254,255,255,135,254,255,255,7,
658 0,0,0,0,0,4,44,6,
659 255,255,127,255,255,255,127,255,
660 0,0,0,0,254,255,255,255,
661 255,255,255,255,255,255,255,127,
662 0,0,0,0,254,255,255,255,
663 255,255,255,255,255,255,255,255,
664 0,2,0,0,255,255,255,255,
665 255,255,255,255,255,255,255,127,
666 0,0,0,0,255,255,255,255,
667 255,255,255,255,255,255,255,255,
668 0,0,0,0,254,255,0,252,
669 1,0,0,248,1,0,0,120,
670 0,0,0,0,254,255,255,255,
671 0,0,128,0,0,0,128,0,
672 255,255,255,255,0,0,0,0,
673 0,0,0,0,0,0,0,128,
674 255,255,255,255,0,0,0,0,
675 0,0,0,0,0,0,0,0,
676 128,0,0,0,0,0,0,0,
677 0,1,1,0,1,1,0,0,
678 0,0,0,0,0,0,0,0,
679 0,0,0,0,0,0,0,0,
680 1,0,0,0,128,0,0,0,
681 128,128,128,128,0,0,128,0,
682 28,28,28,28,28,28,28,28,
683 28,28,0,0,0,0,0,128,
684 0,26,26,26,26,26,26,18,
685 18,18,18,18,18,18,18,18,
686 18,18,18,18,18,18,18,18,
687 18,18,18,128,128,0,128,16,
688 0,26,26,26,26,26,26,18,
689 18,18,18,18,18,18,18,18,
690 18,18,18,18,18,18,18,18,
691 18,18,18,128,128,0,0,0,
692 0,0,0,0,0,1,0,0,
693 0,0,0,0,0,0,0,0,
694 0,0,0,0,0,0,0,0,
695 0,0,0,0,0,0,0,0,
696 1,0,0,0,0,0,0,0,
697 0,0,18,0,0,0,0,0,
698 0,0,20,20,0,18,0,0,
699 0,20,18,0,0,0,0,0,
700 18,18,18,18,18,18,18,18,
701 18,18,18,18,18,18,18,18,
702 18,18,18,18,18,18,18,0,
703 18,18,18,18,18,18,18,18,
704 18,18,18,18,18,18,18,18,
705 18,18,18,18,18,18,18,18,
706 18,18,18,18,18,18,18,0,
707 18,18,18,18,18,18,18,18
708 };
709
710
711
712
713 #ifndef HAVE_STRERROR
714 /*************************************************
715 * Provide strerror() for non-ANSI libraries *
716 *************************************************/
717
718 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
719 in their libraries, but can provide the same facility by this simple
720 alternative function. */
721
722 extern int sys_nerr;
723 extern char *sys_errlist[];
724
725 char *
726 strerror(int n)
727 {
728 if (n < 0 || n >= sys_nerr) return "unknown error number";
729 return sys_errlist[n];
730 }
731 #endif /* HAVE_STRERROR */
732
733
734 /*************************************************
735 * JIT memory callback *
736 *************************************************/
737
738 static pcre_jit_stack* jit_callback(void *arg)
739 {
740 return (pcre_jit_stack *)arg;
741 }
742
743
744 /*************************************************
745 * Convert UTF-8 string to value *
746 *************************************************/
747
748 /* This function takes one or more bytes that represents a UTF-8 character,
749 and returns the value of the character.
750
751 Argument:
752 utf8bytes a pointer to the byte vector
753 vptr a pointer to an int to receive the value
754
755 Returns: > 0 => the number of bytes consumed
756 -6 to 0 => malformed UTF-8 character at offset = (-return)
757 */
758
759 #if !defined NOUTF8
760
761 static int
762 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
763 {
764 int c = *utf8bytes++;
765 int d = c;
766 int i, j, s;
767
768 for (i = -1; i < 6; i++) /* i is number of additional bytes */
769 {
770 if ((d & 0x80) == 0) break;
771 d <<= 1;
772 }
773
774 if (i == -1) { *vptr = c; return 1; } /* ascii character */
775 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
776
777 /* i now has a value in the range 1-5 */
778
779 s = 6*i;
780 d = (c & utf8_table3[i]) << s;
781
782 for (j = 0; j < i; j++)
783 {
784 c = *utf8bytes++;
785 if ((c & 0xc0) != 0x80) return -(j+1);
786 s -= 6;
787 d |= (c & 0x3f) << s;
788 }
789
790 /* Check that encoding was the correct unique one */
791
792 for (j = 0; j < utf8_table1_size; j++)
793 if (d <= utf8_table1[j]) break;
794 if (j != i) return -(i+1);
795
796 /* Valid value */
797
798 *vptr = d;
799 return i+1;
800 }
801
802 #endif
803
804
805
806 /*************************************************
807 * Convert character value to UTF-8 *
808 *************************************************/
809
810 /* This function takes an integer value in the range 0 - 0x7fffffff
811 and encodes it as a UTF-8 character in 0 to 6 bytes.
812
813 Arguments:
814 cvalue the character value
815 utf8bytes pointer to buffer for result - at least 6 bytes long
816
817 Returns: number of characters placed in the buffer
818 */
819
820 #if !defined NOUTF8
821
822 static int
823 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
824 {
825 register int i, j;
826 for (i = 0; i < utf8_table1_size; i++)
827 if (cvalue <= utf8_table1[i]) break;
828 utf8bytes += i;
829 for (j = i; j > 0; j--)
830 {
831 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
832 cvalue >>= 6;
833 }
834 *utf8bytes = utf8_table2[i] | cvalue;
835 return i + 1;
836 }
837
838 #endif
839
840
841
842 #ifdef SUPPORT_PCRE16
843 /*************************************************
844 * Convert a string to 16-bit *
845 *************************************************/
846
847 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
848 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
849 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
850 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
851 result is always left in buffer16. */
852
853 static int
854 to16(pcre_uint8 *p, int utf, int len)
855 {
856 pcre_uint16 *pp;
857
858 if (buffer16_size < 2*len + 2)
859 {
860 if (buffer16 != NULL) free(buffer16);
861 buffer16_size = 2*len + 2;
862 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
863 if (buffer16 == NULL)
864 {
865 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
866 exit(1);
867 }
868 }
869
870 pp = buffer16;
871
872 if (!utf)
873 {
874 while (len-- > 0) *pp++ = *p++;
875 }
876
877 else
878 {
879 int c;
880 while (len > 0)
881 {
882 int chlen = utf82ord(p, &c);
883 p += chlen;
884 len -= chlen;
885 if (c < 0x10000) *pp++ = c; else
886 {
887 c -= 0x10000;
888 *pp++ = 0xD800 | (c >> 10);
889 *pp++ = 0xDC00 | (c & 0x3ff);
890 }
891 }
892 }
893
894 *pp = 0;
895 return pp - buffer16;
896 }
897 #endif
898
899
900 /*************************************************
901 * Read or extend an input line *
902 *************************************************/
903
904 /* Input lines are read into buffer, but both patterns and data lines can be
905 continued over multiple input lines. In addition, if the buffer fills up, we
906 want to automatically expand it so as to be able to handle extremely large
907 lines that are needed for certain stress tests. When the input buffer is
908 expanded, the other two buffers must also be expanded likewise, and the
909 contents of pbuffer, which are a copy of the input for callouts, must be
910 preserved (for when expansion happens for a data line). This is not the most
911 optimal way of handling this, but hey, this is just a test program!
912
913 Arguments:
914 f the file to read
915 start where in buffer to start (this *must* be within buffer)
916 prompt for stdin or readline()
917
918 Returns: pointer to the start of new data
919 could be a copy of start, or could be moved
920 NULL if no data read and EOF reached
921 */
922
923 static pcre_uint8 *
924 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
925 {
926 pcre_uint8 *here = start;
927
928 for (;;)
929 {
930 int rlen = (int)(buffer_size - (here - buffer));
931
932 if (rlen > 1000)
933 {
934 int dlen;
935
936 /* If libreadline support is required, use readline() to read a line if the
937 input is a terminal. Note that readline() removes the trailing newline, so
938 we must put it back again, to be compatible with fgets(). */
939
940 #ifdef SUPPORT_LIBREADLINE
941 if (isatty(fileno(f)))
942 {
943 size_t len;
944 char *s = readline(prompt);
945 if (s == NULL) return (here == start)? NULL : start;
946 len = strlen(s);
947 if (len > 0) add_history(s);
948 if (len > rlen - 1) len = rlen - 1;
949 memcpy(here, s, len);
950 here[len] = '\n';
951 here[len+1] = 0;
952 free(s);
953 }
954 else
955 #endif
956
957 /* Read the next line by normal means, prompting if the file is stdin. */
958
959 {
960 if (f == stdin) printf("%s", prompt);
961 if (fgets((char *)here, rlen, f) == NULL)
962 return (here == start)? NULL : start;
963 }
964
965 dlen = (int)strlen((char *)here);
966 if (dlen > 0 && here[dlen - 1] == '\n') return start;
967 here += dlen;
968 }
969
970 else
971 {
972 int new_buffer_size = 2*buffer_size;
973 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
974 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
975 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
976
977 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
978 {
979 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
980 exit(1);
981 }
982
983 memcpy(new_buffer, buffer, buffer_size);
984 memcpy(new_pbuffer, pbuffer, buffer_size);
985
986 buffer_size = new_buffer_size;
987
988 start = new_buffer + (start - buffer);
989 here = new_buffer + (here - buffer);
990
991 free(buffer);
992 free(dbuffer);
993 free(pbuffer);
994
995 buffer = new_buffer;
996 dbuffer = new_dbuffer;
997 pbuffer = new_pbuffer;
998 }
999 }
1000
1001 return NULL; /* Control never gets here */
1002 }
1003
1004
1005
1006 /*************************************************
1007 * Read number from string *
1008 *************************************************/
1009
1010 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1011 around with conditional compilation, just do the job by hand. It is only used
1012 for unpicking arguments, so just keep it simple.
1013
1014 Arguments:
1015 str string to be converted
1016 endptr where to put the end pointer
1017
1018 Returns: the unsigned long
1019 */
1020
1021 static int
1022 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1023 {
1024 int result = 0;
1025 while(*str != 0 && isspace(*str)) str++;
1026 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1027 *endptr = str;
1028 return(result);
1029 }
1030
1031
1032
1033 #ifdef SUPPORT_PCRE8
1034 /*************************************************
1035 * Print 8-bit character string *
1036 *************************************************/
1037
1038 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1039 If handed a NULL file, just counts chars without printing. */
1040
1041 static int pchars(pcre_uint8 *p, int length, FILE *f)
1042 {
1043 int c = 0;
1044 int yield = 0;
1045
1046 while (length-- > 0)
1047 {
1048 #if !defined NOUTF8
1049 if (use_utf8)
1050 {
1051 int rc = utf82ord(p, &c);
1052
1053 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1054 {
1055 length -= rc - 1;
1056 p += rc;
1057 if (PRINTOK(c))
1058 {
1059 if (f != NULL) fprintf(f, "%c", c);
1060 yield++;
1061 }
1062 else
1063 {
1064 int n = 4;
1065 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1066 yield += (n <= 0x000000ff)? 2 :
1067 (n <= 0x00000fff)? 3 :
1068 (n <= 0x0000ffff)? 4 :
1069 (n <= 0x000fffff)? 5 : 6;
1070 }
1071 continue;
1072 }
1073 }
1074 #endif
1075
1076 /* Not UTF-8, or malformed UTF-8 */
1077
1078 c = *p++;
1079 if (PRINTOK(c))
1080 {
1081 if (f != NULL) fprintf(f, "%c", c);
1082 yield++;
1083 }
1084 else
1085 {
1086 if (f != NULL) fprintf(f, "\\x%02x", c);
1087 yield += 4;
1088 }
1089 }
1090
1091 return yield;
1092 }
1093 #endif
1094
1095
1096
1097 #ifdef SUPPORT_PCRE16
1098 /*************************************************
1099 * Print 16-bit character string *
1100 *************************************************/
1101
1102 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1103 If handed a NULL file, just counts chars without printing. */
1104
1105 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1106 {
1107 int yield = 0;
1108
1109 while (length-- > 0)
1110 {
1111 int c = *p++ & 0xffff;
1112
1113 #if !defined NOUTF8
1114 if (use_utf8 && c >= 0xD800 && c < 0xDC00 && length > 0)
1115 {
1116 int d = *p & 0xffff;
1117 if (d >= 0xDC00 && d < 0xDFFF)
1118 {
1119 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1120 length--;
1121 p++;
1122 }
1123 }
1124 #endif
1125
1126 if (PRINTOK(c))
1127 {
1128 if (f != NULL) fprintf(f, "%c", c);
1129 yield++;
1130 }
1131 else
1132 {
1133 yield += 4;
1134 if (c < 0x100)
1135 {
1136 if (f != NULL) fprintf(f, "\\x%02x", c);
1137 }
1138 else
1139 {
1140 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1141 yield += (c <= 0x000000ff)? 2 :
1142 (c <= 0x00000fff)? 3 :
1143 (c <= 0x0000ffff)? 4 :
1144 (c <= 0x000fffff)? 5 : 6;
1145 }
1146 }
1147 }
1148
1149 return yield;
1150 }
1151 #endif
1152
1153
1154
1155 /*************************************************
1156 * Callout function *
1157 *************************************************/
1158
1159 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1160 the match. Yield zero unless more callouts than the fail count, or the callout
1161 data is not zero. */
1162
1163 static int callout(pcre_callout_block *cb)
1164 {
1165 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1166 int i, pre_start, post_start, subject_length;
1167
1168 if (callout_extra)
1169 {
1170 fprintf(f, "Callout %d: last capture = %d\n",
1171 cb->callout_number, cb->capture_last);
1172
1173 for (i = 0; i < cb->capture_top * 2; i += 2)
1174 {
1175 if (cb->offset_vector[i] < 0)
1176 fprintf(f, "%2d: <unset>\n", i/2);
1177 else
1178 {
1179 fprintf(f, "%2d: ", i/2);
1180 PCHARSV(cb->subject + cb->offset_vector[i],
1181 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1182 fprintf(f, "\n");
1183 }
1184 }
1185 }
1186
1187 /* Re-print the subject in canonical form, the first time or if giving full
1188 datails. On subsequent calls in the same match, we use pchars just to find the
1189 printed lengths of the substrings. */
1190
1191 if (f != NULL) fprintf(f, "--->");
1192
1193 PCHARS(pre_start, cb->subject, cb->start_match, f);
1194 PCHARS(post_start, cb->subject + cb->start_match,
1195 cb->current_position - cb->start_match, f);
1196
1197 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1198
1199 PCHARSV(cb->subject + cb->current_position,
1200 cb->subject_length - cb->current_position, f);
1201
1202 if (f != NULL) fprintf(f, "\n");
1203
1204 /* Always print appropriate indicators, with callout number if not already
1205 shown. For automatic callouts, show the pattern offset. */
1206
1207 if (cb->callout_number == 255)
1208 {
1209 fprintf(outfile, "%+3d ", cb->pattern_position);
1210 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1211 }
1212 else
1213 {
1214 if (callout_extra) fprintf(outfile, " ");
1215 else fprintf(outfile, "%3d ", cb->callout_number);
1216 }
1217
1218 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1219 fprintf(outfile, "^");
1220
1221 if (post_start > 0)
1222 {
1223 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1224 fprintf(outfile, "^");
1225 }
1226
1227 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1228 fprintf(outfile, " ");
1229
1230 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1231 pbuffer + cb->pattern_position);
1232
1233 fprintf(outfile, "\n");
1234 first_callout = 0;
1235
1236 if (cb->mark != last_callout_mark)
1237 {
1238 fprintf(outfile, "Latest Mark: %s\n",
1239 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1240 last_callout_mark = cb->mark;
1241 }
1242
1243 if (cb->callout_data != NULL)
1244 {
1245 int callout_data = *((int *)(cb->callout_data));
1246 if (callout_data != 0)
1247 {
1248 fprintf(outfile, "Callout data = %d\n", callout_data);
1249 return callout_data;
1250 }
1251 }
1252
1253 return (cb->callout_number != callout_fail_id)? 0 :
1254 (++callout_count >= callout_fail_count)? 1 : 0;
1255 }
1256
1257
1258 /*************************************************
1259 * Local malloc functions *
1260 *************************************************/
1261
1262 /* Alternative malloc function, to test functionality and save the size of a
1263 compiled re, which is the first store request that pcre_compile() makes. The
1264 show_malloc variable is set only during matching. */
1265
1266 static void *new_malloc(size_t size)
1267 {
1268 void *block = malloc(size);
1269 gotten_store = size;
1270 if (first_gotten_store == 0) first_gotten_store = size;
1271 if (show_malloc)
1272 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1273 return block;
1274 }
1275
1276 static void new_free(void *block)
1277 {
1278 if (show_malloc)
1279 fprintf(outfile, "free %p\n", block);
1280 free(block);
1281 }
1282
1283 /* For recursion malloc/free, to test stacking calls */
1284
1285 static void *stack_malloc(size_t size)
1286 {
1287 void *block = malloc(size);
1288 if (show_malloc)
1289 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1290 return block;
1291 }
1292
1293 static void stack_free(void *block)
1294 {
1295 if (show_malloc)
1296 fprintf(outfile, "stack_free %p\n", block);
1297 free(block);
1298 }
1299
1300
1301 /*************************************************
1302 * Call pcre_fullinfo() *
1303 *************************************************/
1304
1305 /* Get one piece of information from the pcre_fullinfo() function. When only
1306 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1307 value, but the code is defensive. */
1308
1309 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1310 {
1311 int rc;
1312
1313 if (use_pcre16)
1314 #ifdef SUPPORT_PCRE16
1315 rc = pcre16_fullinfo(re, study, option, ptr);
1316 #else
1317 rc = PCRE_ERROR_BADMODE;
1318 #endif
1319 else
1320 #ifdef SUPPORT_PCRE8
1321 rc = pcre_fullinfo(re, study, option, ptr);
1322 #else
1323 rc = PCRE_ERROR_BADMODE;
1324 #endif
1325
1326 if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1327 use_pcre16? "16" : "", option);
1328 }
1329
1330
1331
1332 /*************************************************
1333 * Byte flipping function *
1334 *************************************************/
1335
1336 static unsigned long int
1337 byteflip(unsigned long int value, int n)
1338 {
1339 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1340 return ((value & 0x000000ff) << 24) |
1341 ((value & 0x0000ff00) << 8) |
1342 ((value & 0x00ff0000) >> 8) |
1343 ((value & 0xff000000) >> 24);
1344 }
1345
1346
1347
1348
1349 /*************************************************
1350 * Check match or recursion limit *
1351 *************************************************/
1352
1353 static int
1354 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1355 int start_offset, int options, int *use_offsets, int use_size_offsets,
1356 int flag, unsigned long int *limit, int errnumber, const char *msg)
1357 {
1358 int count;
1359 int min = 0;
1360 int mid = 64;
1361 int max = -1;
1362
1363 extra->flags |= flag;
1364
1365 for (;;)
1366 {
1367 *limit = mid;
1368
1369 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1370 use_offsets, use_size_offsets);
1371
1372 if (count == errnumber)
1373 {
1374 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1375 min = mid;
1376 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1377 }
1378
1379 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1380 count == PCRE_ERROR_PARTIAL)
1381 {
1382 if (mid == min + 1)
1383 {
1384 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1385 break;
1386 }
1387 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1388 max = mid;
1389 mid = (min + mid)/2;
1390 }
1391 else break; /* Some other error */
1392 }
1393
1394 extra->flags &= ~flag;
1395 return count;
1396 }
1397
1398
1399
1400 /*************************************************
1401 * Case-independent strncmp() function *
1402 *************************************************/
1403
1404 /*
1405 Arguments:
1406 s first string
1407 t second string
1408 n number of characters to compare
1409
1410 Returns: < 0, = 0, or > 0, according to the comparison
1411 */
1412
1413 static int
1414 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1415 {
1416 while (n--)
1417 {
1418 int c = tolower(*s++) - tolower(*t++);
1419 if (c) return c;
1420 }
1421 return 0;
1422 }
1423
1424
1425
1426 /*************************************************
1427 * Check newline indicator *
1428 *************************************************/
1429
1430 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1431 a message and return 0 if there is no match.
1432
1433 Arguments:
1434 p points after the leading '<'
1435 f file for error message
1436
1437 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1438 */
1439
1440 static int
1441 check_newline(pcre_uint8 *p, FILE *f)
1442 {
1443 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1444 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1445 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1446 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1447 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1448 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1449 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1450 fprintf(f, "Unknown newline type at: <%s\n", p);
1451 return 0;
1452 }
1453
1454
1455
1456 /*************************************************
1457 * Usage function *
1458 *************************************************/
1459
1460 static void
1461 usage(void)
1462 {
1463 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1464 printf("Input and output default to stdin and stdout.\n");
1465 #ifdef SUPPORT_LIBREADLINE
1466 printf("If input is a terminal, readline() is used to read from it.\n");
1467 #else
1468 printf("This version of pcretest is not linked with readline().\n");
1469 #endif
1470 printf("\nOptions:\n");
1471 #ifdef SUPPORT_PCRE16
1472 printf(" -16 use 16-bit interface\n");
1473 #endif
1474 printf(" -b show compiled code (bytecode)\n");
1475 printf(" -C show PCRE compile-time options and exit\n");
1476 printf(" -d debug: show compiled code and information (-b and -i)\n");
1477 #if !defined NODFA
1478 printf(" -dfa force DFA matching for all subjects\n");
1479 #endif
1480 printf(" -help show usage information\n");
1481 printf(" -i show information about compiled patterns\n"
1482 " -M find MATCH_LIMIT minimum for each subject\n"
1483 " -m output memory used information\n"
1484 " -o <n> set size of offsets vector to <n>\n");
1485 #if !defined NOPOSIX
1486 printf(" -p use POSIX interface\n");
1487 #endif
1488 printf(" -q quiet: do not output PCRE version number at start\n");
1489 printf(" -S <n> set stack size to <n> megabytes\n");
1490 printf(" -s force each pattern to be studied at basic level\n"
1491 " -s+ force each pattern to be studied, using JIT if available\n"
1492 " -t time compilation and execution\n");
1493 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1494 printf(" -tm time execution (matching) only\n");
1495 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1496 }
1497
1498
1499
1500 /*************************************************
1501 * Main Program *
1502 *************************************************/
1503
1504 /* Read lines from named file or stdin and write to named file or stdout; lines
1505 consist of a regular expression, in delimiters and optionally followed by
1506 options, followed by a set of test data, terminated by an empty line. */
1507
1508 int main(int argc, char **argv)
1509 {
1510 FILE *infile = stdin;
1511 int options = 0;
1512 int study_options = 0;
1513 int default_find_match_limit = FALSE;
1514 int op = 1;
1515 int timeit = 0;
1516 int timeitm = 0;
1517 int showinfo = 0;
1518 int showstore = 0;
1519 int force_study = -1;
1520 int force_study_options = 0;
1521 int quiet = 0;
1522 int size_offsets = 45;
1523 int size_offsets_max;
1524 int *offsets = NULL;
1525 #if !defined NOPOSIX
1526 int posix = 0;
1527 #endif
1528 int debug = 0;
1529 int done = 0;
1530 int all_use_dfa = 0;
1531 int yield = 0;
1532 int stack_size;
1533
1534 pcre_jit_stack *jit_stack = NULL;
1535
1536 /* These vectors store, end-to-end, a list of captured substring names. Assume
1537 that 1024 is plenty long enough for the few names we'll be testing. */
1538
1539 pcre_uchar copynames[1024];
1540 pcre_uchar getnames[1024];
1541
1542 pcre_uchar *copynamesptr;
1543 pcre_uchar *getnamesptr;
1544
1545 /* Get buffers from malloc() so that valgrind will check their misuse when
1546 debugging. They grow automatically when very long lines are read. The 16-bit
1547 buffer (buffer16) is obtained only if needed. */
1548
1549 buffer = (pcre_uint8 *)malloc(buffer_size);
1550 dbuffer = (pcre_uint8 *)malloc(buffer_size);
1551 pbuffer = (pcre_uint8 *)malloc(buffer_size);
1552
1553 /* The outfile variable is static so that new_malloc can use it. */
1554
1555 outfile = stdout;
1556
1557 /* The following _setmode() stuff is some Windows magic that tells its runtime
1558 library to translate CRLF into a single LF character. At least, that's what
1559 I've been told: never having used Windows I take this all on trust. Originally
1560 it set 0x8000, but then I was advised that _O_BINARY was better. */
1561
1562 #if defined(_WIN32) || defined(WIN32)
1563 _setmode( _fileno( stdout ), _O_BINARY );
1564 #endif
1565
1566 /* Scan options */
1567
1568 while (argc > 1 && argv[op][0] == '-')
1569 {
1570 pcre_uint8 *endptr;
1571
1572 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1573 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1574 else if (strcmp(argv[op], "-s+") == 0)
1575 {
1576 force_study = 1;
1577 force_study_options = PCRE_STUDY_JIT_COMPILE;
1578 }
1579 #ifdef SUPPORT_PCRE16
1580 else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1581 #endif
1582
1583 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1584 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1585 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1586 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1587 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1588 #if !defined NODFA
1589 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1590 #endif
1591 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1592 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1593 *endptr == 0))
1594 {
1595 op++;
1596 argc--;
1597 }
1598 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1599 {
1600 int both = argv[op][2] == 0;
1601 int temp;
1602 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1603 *endptr == 0))
1604 {
1605 timeitm = temp;
1606 op++;
1607 argc--;
1608 }
1609 else timeitm = LOOPREPEAT;
1610 if (both) timeit = timeitm;
1611 }
1612 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1613 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1614 *endptr == 0))
1615 {
1616 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1617 printf("PCRE: -S not supported on this OS\n");
1618 exit(1);
1619 #else
1620 int rc;
1621 struct rlimit rlim;
1622 getrlimit(RLIMIT_STACK, &rlim);
1623 rlim.rlim_cur = stack_size * 1024 * 1024;
1624 rc = setrlimit(RLIMIT_STACK, &rlim);
1625 if (rc != 0)
1626 {
1627 printf("PCRE: setrlimit() failed with error %d\n", rc);
1628 exit(1);
1629 }
1630 op++;
1631 argc--;
1632 #endif
1633 }
1634 #if !defined NOPOSIX
1635 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1636 #endif
1637 else if (strcmp(argv[op], "-C") == 0)
1638 {
1639 int rc;
1640 unsigned long int lrc;
1641 printf("PCRE version %s\n", pcre_version());
1642 printf("Compiled with\n");
1643
1644 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1645 are set, either both UTFs are supported or both are not supported. */
1646
1647 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1648 printf(" 8-bit and 16-bit support\n");
1649 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1650 if (rc)
1651 printf(" UTF-8 and UTF-16 support\n");
1652 else
1653 printf(" No UTF-8 or UTF-16 support\n");
1654 #elif defined SUPPORT_PCRE8
1655 printf(" 8-bit support only\n");
1656 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1657 printf(" %sUTF-8 support\n", rc? "" : "No ");
1658 #else
1659 printf(" 16-bit support only\n");
1660 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1661 printf(" %sUTF-16 support\n", rc? "" : "No ");
1662 #endif
1663
1664 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1665 printf(" %sUnicode properties support\n", rc? "" : "No ");
1666 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1667 if (rc)
1668 printf(" Just-in-time compiler support\n");
1669 else
1670 printf(" No just-in-time compiler support\n");
1671 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1672 /* Note that these values are always the ASCII values, even
1673 in EBCDIC environments. CR is 13 and NL is 10. */
1674 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1675 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1676 (rc == -2)? "ANYCRLF" :
1677 (rc == -1)? "ANY" : "???");
1678 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1679 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1680 "all Unicode newlines");
1681 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1682 printf(" Internal link size = %d\n", rc);
1683 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1684 printf(" POSIX malloc threshold = %d\n", rc);
1685 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1686 printf(" Default match limit = %ld\n", lrc);
1687 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1688 printf(" Default recursion depth limit = %ld\n", lrc);
1689 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1690 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1691 goto EXIT;
1692 }
1693 else if (strcmp(argv[op], "-help") == 0 ||
1694 strcmp(argv[op], "--help") == 0)
1695 {
1696 usage();
1697 goto EXIT;
1698 }
1699 else
1700 {
1701 printf("** Unknown or malformed option %s\n", argv[op]);
1702 usage();
1703 yield = 1;
1704 goto EXIT;
1705 }
1706 op++;
1707 argc--;
1708 }
1709
1710 /* Get the store for the offsets vector, and remember what it was */
1711
1712 size_offsets_max = size_offsets;
1713 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1714 if (offsets == NULL)
1715 {
1716 printf("** Failed to get %d bytes of memory for offsets vector\n",
1717 (int)(size_offsets_max * sizeof(int)));
1718 yield = 1;
1719 goto EXIT;
1720 }
1721
1722 /* Sort out the input and output files */
1723
1724 if (argc > 1)
1725 {
1726 infile = fopen(argv[op], INPUT_MODE);
1727 if (infile == NULL)
1728 {
1729 printf("** Failed to open %s\n", argv[op]);
1730 yield = 1;
1731 goto EXIT;
1732 }
1733 }
1734
1735 if (argc > 2)
1736 {
1737 outfile = fopen(argv[op+1], OUTPUT_MODE);
1738 if (outfile == NULL)
1739 {
1740 printf("** Failed to open %s\n", argv[op+1]);
1741 yield = 1;
1742 goto EXIT;
1743 }
1744 }
1745
1746 /* Set alternative malloc function */
1747
1748 #ifdef SUPPORT_PCRE8
1749 pcre_malloc = new_malloc;
1750 pcre_free = new_free;
1751 pcre_stack_malloc = stack_malloc;
1752 pcre_stack_free = stack_free;
1753 #endif
1754
1755 #ifdef SUPPORT_PCRE16
1756 pcre16_malloc = new_malloc;
1757 pcre16_free = new_free;
1758 pcre16_stack_malloc = stack_malloc;
1759 pcre16_stack_free = stack_free;
1760 #endif
1761
1762 /* Heading line unless quiet, then prompt for first regex if stdin */
1763
1764 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1765
1766 /* Main loop */
1767
1768 while (!done)
1769 {
1770 pcre *re = NULL;
1771 pcre_extra *extra = NULL;
1772
1773 #if !defined NOPOSIX /* There are still compilers that require no indent */
1774 regex_t preg;
1775 int do_posix = 0;
1776 #endif
1777
1778 const char *error;
1779 pcre_uint8 *markptr;
1780 pcre_uint8 *p, *pp, *ppp;
1781 pcre_uint8 *to_file = NULL;
1782 const pcre_uint8 *tables = NULL;
1783 unsigned long int true_size, true_study_size = 0;
1784 size_t size, regex_gotten_store;
1785 int do_allcaps = 0;
1786 int do_mark = 0;
1787 int do_study = 0;
1788 int no_force_study = 0;
1789 int do_debug = debug;
1790 int do_G = 0;
1791 int do_g = 0;
1792 int do_showinfo = showinfo;
1793 int do_showrest = 0;
1794 int do_showcaprest = 0;
1795 int do_flip = 0;
1796 int erroroffset, len, delimiter, poffset;
1797
1798 use_utf8 = 0;
1799 debug_lengths = 1;
1800
1801 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1802 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1803 fflush(outfile);
1804
1805 p = buffer;
1806 while (isspace(*p)) p++;
1807 if (*p == 0) continue;
1808
1809 /* See if the pattern is to be loaded pre-compiled from a file. */
1810
1811 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1812 {
1813 unsigned long int magic, get_options;
1814 pcre_uint8 sbuf[8];
1815 FILE *f;
1816
1817 p++;
1818 pp = p + (int)strlen((char *)p);
1819 while (isspace(pp[-1])) pp--;
1820 *pp = 0;
1821
1822 f = fopen((char *)p, "rb");
1823 if (f == NULL)
1824 {
1825 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1826 continue;
1827 }
1828
1829 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1830
1831 true_size =
1832 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1833 true_study_size =
1834 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1835
1836 re = (real_pcre *)new_malloc(true_size);
1837 regex_gotten_store = first_gotten_store;
1838
1839 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1840
1841 magic = ((real_pcre *)re)->magic_number;
1842 if (magic != MAGIC_NUMBER)
1843 {
1844 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1845 {
1846 do_flip = 1;
1847 }
1848 else
1849 {
1850 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1851 fclose(f);
1852 continue;
1853 }
1854 }
1855
1856 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1857 do_flip? " (byte-inverted)" : "", p);
1858
1859 /* Need to know if UTF-8 for printing data strings */
1860
1861 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1862 use_utf8 = (get_options & PCRE_UTF8) != 0;
1863
1864 /* Now see if there is any following study data. */
1865
1866 if (true_study_size != 0)
1867 {
1868 pcre_study_data *psd;
1869
1870 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1871 extra->flags = PCRE_EXTRA_STUDY_DATA;
1872
1873 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1874 extra->study_data = psd;
1875
1876 if (fread(psd, 1, true_study_size, f) != true_study_size)
1877 {
1878 FAIL_READ:
1879 fprintf(outfile, "Failed to read data from %s\n", p);
1880 if (extra != NULL)
1881 {
1882 PCRE_FREE_STUDY(extra);
1883 }
1884 if (re != NULL) new_free(re);
1885 fclose(f);
1886 continue;
1887 }
1888 fprintf(outfile, "Study data loaded from %s\n", p);
1889 do_study = 1; /* To get the data output if requested */
1890 }
1891 else fprintf(outfile, "No study data\n");
1892
1893 fclose(f);
1894 goto SHOW_INFO;
1895 }
1896
1897 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1898 the pattern; if is isn't complete, read more. */
1899
1900 delimiter = *p++;
1901
1902 if (isalnum(delimiter) || delimiter == '\\')
1903 {
1904 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1905 goto SKIP_DATA;
1906 }
1907
1908 pp = p;
1909 poffset = (int)(p - buffer);
1910
1911 for(;;)
1912 {
1913 while (*pp != 0)
1914 {
1915 if (*pp == '\\' && pp[1] != 0) pp++;
1916 else if (*pp == delimiter) break;
1917 pp++;
1918 }
1919 if (*pp != 0) break;
1920 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1921 {
1922 fprintf(outfile, "** Unexpected EOF\n");
1923 done = 1;
1924 goto CONTINUE;
1925 }
1926 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1927 }
1928
1929 /* The buffer may have moved while being extended; reset the start of data
1930 pointer to the correct relative point in the buffer. */
1931
1932 p = buffer + poffset;
1933
1934 /* If the first character after the delimiter is backslash, make
1935 the pattern end with backslash. This is purely to provide a way
1936 of testing for the error message when a pattern ends with backslash. */
1937
1938 if (pp[1] == '\\') *pp++ = '\\';
1939
1940 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1941 for callouts. */
1942
1943 *pp++ = 0;
1944 strcpy((char *)pbuffer, (char *)p);
1945
1946 /* Look for options after final delimiter */
1947
1948 options = 0;
1949 study_options = 0;
1950 log_store = showstore; /* default from command line */
1951
1952 while (*pp != 0)
1953 {
1954 switch (*pp++)
1955 {
1956 case 'f': options |= PCRE_FIRSTLINE; break;
1957 case 'g': do_g = 1; break;
1958 case 'i': options |= PCRE_CASELESS; break;
1959 case 'm': options |= PCRE_MULTILINE; break;
1960 case 's': options |= PCRE_DOTALL; break;
1961 case 'x': options |= PCRE_EXTENDED; break;
1962
1963 case '+':
1964 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1965 break;
1966
1967 case '=': do_allcaps = 1; break;
1968 case 'A': options |= PCRE_ANCHORED; break;
1969 case 'B': do_debug = 1; break;
1970 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1971 case 'D': do_debug = do_showinfo = 1; break;
1972 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1973 case 'F': do_flip = 1; break;
1974 case 'G': do_G = 1; break;
1975 case 'I': do_showinfo = 1; break;
1976 case 'J': options |= PCRE_DUPNAMES; break;
1977 case 'K': do_mark = 1; break;
1978 case 'M': log_store = 1; break;
1979 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1980
1981 #if !defined NOPOSIX
1982 case 'P': do_posix = 1; break;
1983 #endif
1984
1985 case 'S':
1986 if (do_study == 0)
1987 {
1988 do_study = 1;
1989 if (*pp == '+')
1990 {
1991 study_options |= PCRE_STUDY_JIT_COMPILE;
1992 pp++;
1993 }
1994 }
1995 else
1996 {
1997 do_study = 0;
1998 no_force_study = 1;
1999 }
2000 break;
2001
2002 case 'U': options |= PCRE_UNGREEDY; break;
2003 case 'W': options |= PCRE_UCP; break;
2004 case 'X': options |= PCRE_EXTRA; break;
2005 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2006 case 'Z': debug_lengths = 0; break;
2007 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
2008 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2009
2010 case 'T':
2011 switch (*pp++)
2012 {
2013 case '0': tables = tables0; break;
2014 case '1': tables = tables1; break;
2015
2016 case '\r':
2017 case '\n':
2018 case ' ':
2019 case 0:
2020 fprintf(outfile, "** Missing table number after /T\n");
2021 goto SKIP_DATA;
2022
2023 default:
2024 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2025 goto SKIP_DATA;
2026 }
2027 break;
2028
2029 case 'L':
2030 ppp = pp;
2031 /* The '\r' test here is so that it works on Windows. */
2032 /* The '0' test is just in case this is an unterminated line. */
2033 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2034 *ppp = 0;
2035 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2036 {
2037 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2038 goto SKIP_DATA;
2039 }
2040 locale_set = 1;
2041 tables = pcre_maketables();
2042 pp = ppp;
2043 break;
2044
2045 case '>':
2046 to_file = pp;
2047 while (*pp != 0) pp++;
2048 while (isspace(pp[-1])) pp--;
2049 *pp = 0;
2050 break;
2051
2052 case '<':
2053 {
2054 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2055 {
2056 options |= PCRE_JAVASCRIPT_COMPAT;
2057 pp += 3;
2058 }
2059 else
2060 {
2061 int x = check_newline(pp, outfile);
2062 if (x == 0) goto SKIP_DATA;
2063 options |= x;
2064 while (*pp++ != '>');
2065 }
2066 }
2067 break;
2068
2069 case '\r': /* So that it works in Windows */
2070 case '\n':
2071 case ' ':
2072 break;
2073
2074 default:
2075 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2076 goto SKIP_DATA;
2077 }
2078 }
2079
2080 /* Handle compiling via the POSIX interface, which doesn't support the
2081 timing, showing, or debugging options, nor the ability to pass over
2082 local character tables. Neither does it have 16-bit support. */
2083
2084 #if !defined NOPOSIX
2085 if (posix || do_posix)
2086 {
2087 int rc;
2088 int cflags = 0;
2089
2090 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2091 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2092 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2093 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2094 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2095 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2096 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2097
2098 first_gotten_store = 0;
2099 rc = regcomp(&preg, (char *)p, cflags);
2100
2101 /* Compilation failed; go back for another re, skipping to blank line
2102 if non-interactive. */
2103
2104 if (rc != 0)
2105 {
2106 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2107 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2108 goto SKIP_DATA;
2109 }
2110 }
2111
2112 /* Handle compiling via the native interface */
2113
2114 else
2115 #endif /* !defined NOPOSIX */
2116
2117 {
2118 unsigned long int get_options;
2119
2120 /* In 16-bit mode, convert the input. */
2121
2122 #ifdef SUPPORT_PCRE16
2123 if (use_pcre16)
2124 {
2125 (void)to16(p, options & PCRE_UTF8, (int)strlen((char *)p));
2126 p = (pcre_uint8 *)buffer16;
2127 }
2128 #endif
2129
2130 /* Compile many times when timing */
2131
2132 if (timeit > 0)
2133 {
2134 register int i;
2135 clock_t time_taken;
2136 clock_t start_time = clock();
2137 for (i = 0; i < timeit; i++)
2138 {
2139 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2140 if (re != NULL) free(re);
2141 }
2142 time_taken = clock() - start_time;
2143 fprintf(outfile, "Compile time %.4f milliseconds\n",
2144 (((double)time_taken * 1000.0) / (double)timeit) /
2145 (double)CLOCKS_PER_SEC);
2146 }
2147
2148 first_gotten_store = 0;
2149 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2150
2151 /* Compilation failed; go back for another re, skipping to blank line
2152 if non-interactive. */
2153
2154 if (re == NULL)
2155 {
2156 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2157 SKIP_DATA:
2158 if (infile != stdin)
2159 {
2160 for (;;)
2161 {
2162 if (extend_inputline(infile, buffer, NULL) == NULL)
2163 {
2164 done = 1;
2165 goto CONTINUE;
2166 }
2167 len = (int)strlen((char *)buffer);
2168 while (len > 0 && isspace(buffer[len-1])) len--;
2169 if (len == 0) break;
2170 }
2171 fprintf(outfile, "\n");
2172 }
2173 goto CONTINUE;
2174 }
2175
2176 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2177 within the regex; check for this so that we know how to process the data
2178 lines. */
2179
2180 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2181 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
2182
2183 /* Extract the size for possible writing before possibly flipping it,
2184 and remember the store that was got. */
2185
2186 true_size = ((real_pcre *)re)->size;
2187 regex_gotten_store = first_gotten_store;
2188
2189 /* Output code size information if requested */
2190
2191 if (log_store)
2192 fprintf(outfile, "Memory allocation (code space): %d\n",
2193 (int)(first_gotten_store -
2194 sizeof(real_pcre) -
2195 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2196
2197 /* If -s or /S was present, study the regex to generate additional info to
2198 help with the matching, unless the pattern has the SS option, which
2199 suppresses the effect of /S (used for a few test patterns where studying is
2200 never sensible). */
2201
2202 if (do_study || (force_study >= 0 && !no_force_study))
2203 {
2204 if (timeit > 0)
2205 {
2206 register int i;
2207 clock_t time_taken;
2208 clock_t start_time = clock();
2209 for (i = 0; i < timeit; i++)
2210 {
2211 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2212 }
2213 time_taken = clock() - start_time;
2214 if (extra != NULL)
2215 {
2216 PCRE_FREE_STUDY(extra);
2217 }
2218 fprintf(outfile, " Study time %.4f milliseconds\n",
2219 (((double)time_taken * 1000.0) / (double)timeit) /
2220 (double)CLOCKS_PER_SEC);
2221 }
2222 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2223 if (error != NULL)
2224 fprintf(outfile, "Failed to study: %s\n", error);
2225 else if (extra != NULL)
2226 {
2227 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2228 if (log_store)
2229 {
2230 size_t jitsize;
2231 new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2232 if (jitsize != 0)
2233 fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2234 }
2235 }
2236 }
2237
2238 /* If /K was present, we set up for handling MARK data. */
2239
2240 if (do_mark)
2241 {
2242 if (extra == NULL)
2243 {
2244 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2245 extra->flags = 0;
2246 }
2247 extra->mark = &markptr;
2248 extra->flags |= PCRE_EXTRA_MARK;
2249 }
2250
2251 /* If the 'F' option was present, we flip the bytes of all the integer
2252 fields in the regex data block and the study block. This is to make it
2253 possible to test PCRE's handling of byte-flipped patterns, e.g. those
2254 compiled on a different architecture. */
2255
2256 if (do_flip)
2257 {
2258 real_pcre *rre = (real_pcre *)re;
2259 rre->magic_number =
2260 byteflip(rre->magic_number, sizeof(rre->magic_number));
2261 rre->size = byteflip(rre->size, sizeof(rre->size));
2262 rre->options = byteflip(rre->options, sizeof(rre->options));
2263 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2264 rre->top_bracket =
2265 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2266 rre->top_backref =
2267 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2268 rre->first_char =
2269 (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2270 rre->req_char =
2271 (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2272 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2273 sizeof(rre->name_table_offset));
2274 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2275 sizeof(rre->name_entry_size));
2276 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2277 sizeof(rre->name_count));
2278
2279 if (extra != NULL)
2280 {
2281 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2282 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2283 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2284 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2285 }
2286 }
2287
2288 /* Extract and display information from the compiled data if required. */
2289
2290 SHOW_INFO:
2291
2292 if (do_debug)
2293 {
2294 fprintf(outfile, "------------------------------------------------------------------\n");
2295 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2296 if (use_pcre16)
2297 pcre16_printint(re, outfile, debug_lengths);
2298 else
2299 pcre_printint(re, outfile, debug_lengths);
2300 #elif defined SUPPORT_PCRE8
2301 pcre_printint(re, outfile, debug_lengths);
2302 #else
2303 pcre16_printint(re, outfile, debug_lengths);
2304 #endif
2305 }
2306
2307 /* We already have the options in get_options (see above) */
2308
2309 if (do_showinfo)
2310 {
2311 unsigned long int all_options;
2312 #if !defined NOINFOCHECK
2313 int old_first_char, old_options, old_count;
2314 #endif
2315 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2316 hascrorlf;
2317 int nameentrysize, namecount;
2318 const pcre_uchar *nametable;
2319
2320 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2321 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2322 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2323 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2324 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2325 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2326 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2327 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2328 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2329 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2330 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2331
2332 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2333 that it gives the same results as the new function. */
2334
2335 #if !defined NOINFOCHECK
2336 if (!use_pcre16)
2337 {
2338 old_count = pcre_info(re, &old_options, &old_first_char);
2339 if (count < 0) fprintf(outfile,
2340 "Error %d from pcre_info()\n", count);
2341 else
2342 {
2343 if (old_count != count) fprintf(outfile,
2344 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2345 old_count);
2346
2347 if (old_first_char != first_char) fprintf(outfile,
2348 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2349 first_char, old_first_char);
2350
2351 if (old_options != (int)get_options) fprintf(outfile,
2352 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2353 get_options, old_options);
2354 }
2355 }
2356 #endif
2357
2358 if (size != regex_gotten_store) fprintf(outfile,
2359 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2360 (int)size, (int)regex_gotten_store);
2361
2362 fprintf(outfile, "Capturing subpattern count = %d\n", count);
2363 if (backrefmax > 0)
2364 fprintf(outfile, "Max back reference = %d\n", backrefmax);
2365
2366 if (namecount > 0)
2367 {
2368 fprintf(outfile, "Named capturing subpatterns:\n");
2369 while (namecount-- > 0)
2370 {
2371 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2372 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2373 GET2(nametable, 0));
2374 nametable += nameentrysize;
2375 }
2376 }
2377
2378 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2379 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2380
2381 all_options = ((real_pcre *)re)->options;
2382 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2383
2384 if (get_options == 0) fprintf(outfile, "No options\n");
2385 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2386 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2387 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2388 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2389 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2390 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2391 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2392 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2393 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2394 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2395 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2396 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2397 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2398 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2399 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2400 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2401 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2402 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2403
2404 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2405
2406 switch (get_options & PCRE_NEWLINE_BITS)
2407 {
2408 case PCRE_NEWLINE_CR:
2409 fprintf(outfile, "Forced newline sequence: CR\n");
2410 break;
2411
2412 case PCRE_NEWLINE_LF:
2413 fprintf(outfile, "Forced newline sequence: LF\n");
2414 break;
2415
2416 case PCRE_NEWLINE_CRLF:
2417 fprintf(outfile, "Forced newline sequence: CRLF\n");
2418 break;
2419
2420 case PCRE_NEWLINE_ANYCRLF:
2421 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2422 break;
2423
2424 case PCRE_NEWLINE_ANY:
2425 fprintf(outfile, "Forced newline sequence: ANY\n");
2426 break;
2427
2428 default:
2429 break;
2430 }
2431
2432 if (first_char == -1)
2433 {
2434 fprintf(outfile, "First char at start or follows newline\n");
2435 }
2436 else if (first_char < 0)
2437 {
2438 fprintf(outfile, "No first char\n");
2439 }
2440 else
2441 {
2442 const char *caseless =
2443 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2444 "" : " (caseless)";
2445
2446 if (PRINTOK(first_char))
2447 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2448 else
2449 fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2450 }
2451
2452 if (need_char < 0)
2453 {
2454 fprintf(outfile, "No need char\n");
2455 }
2456 else
2457 {
2458 const char *caseless =
2459 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2460 "" : " (caseless)";
2461
2462 if (PRINTOK(need_char))
2463 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2464 else
2465 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2466 }
2467
2468 /* Don't output study size; at present it is in any case a fixed
2469 value, but it varies, depending on the computer architecture, and
2470 so messes up the test suite. (And with the /F option, it might be
2471 flipped.) If study was forced by an external -s, don't show this
2472 information unless -i or -d was also present. This means that, except
2473 when auto-callouts are involved, the output from runs with and without
2474 -s should be identical. */
2475
2476 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2477 {
2478 if (extra == NULL)
2479 fprintf(outfile, "Study returned NULL\n");
2480 else
2481 {
2482 pcre_uint8 *start_bits = NULL;
2483 int minlength;
2484
2485 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2486 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2487
2488 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2489 if (start_bits == NULL)
2490 fprintf(outfile, "No set of starting bytes\n");
2491 else
2492 {
2493 int i;
2494 int c = 24;
2495 fprintf(outfile, "Starting byte set: ");
2496 for (i = 0; i < 256; i++)
2497 {
2498 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2499 {
2500 if (c > 75)
2501 {
2502 fprintf(outfile, "\n ");
2503 c = 2;
2504 }
2505 if (PRINTOK(i) && i != ' ')
2506 {
2507 fprintf(outfile, "%c ", i);
2508 c += 2;
2509 }
2510 else
2511 {
2512 fprintf(outfile, "\\x%02x ", i);
2513 c += 5;
2514 }
2515 }
2516 }
2517 fprintf(outfile, "\n");
2518 }
2519 }
2520
2521 /* Show this only if the JIT was set by /S, not by -s. */
2522
2523 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2524 {
2525 int jit;
2526 new_info(re, extra, PCRE_INFO_JIT, &jit);
2527 if (jit)
2528 fprintf(outfile, "JIT study was successful\n");
2529 else
2530 #ifdef SUPPORT_JIT
2531 fprintf(outfile, "JIT study was not successful\n");
2532 #else
2533 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2534 #endif
2535 }
2536 }
2537 }
2538
2539 /* If the '>' option was present, we write out the regex to a file, and
2540 that is all. The first 8 bytes of the file are the regex length and then
2541 the study length, in big-endian order. */
2542
2543 if (to_file != NULL)
2544 {
2545 FILE *f = fopen((char *)to_file, "wb");
2546 if (f == NULL)
2547 {
2548 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2549 }
2550 else
2551 {
2552 pcre_uint8 sbuf[8];
2553 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2554 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2555 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2556 sbuf[3] = (pcre_uint8)((true_size) & 255);
2557
2558 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2559 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2560 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2561 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2562
2563 if (fwrite(sbuf, 1, 8, f) < 8 ||
2564 fwrite(re, 1, true_size, f) < true_size)
2565 {
2566 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2567 }
2568 else
2569 {
2570 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2571
2572 /* If there is study data, write it. */
2573
2574 if (extra != NULL)
2575 {
2576 if (fwrite(extra->study_data, 1, true_study_size, f) <
2577 true_study_size)
2578 {
2579 fprintf(outfile, "Write error on %s: %s\n", to_file,
2580 strerror(errno));
2581 }
2582 else fprintf(outfile, "Study data written to %s\n", to_file);
2583 }
2584 }
2585 fclose(f);
2586 }
2587
2588 new_free(re);
2589 if (extra != NULL)
2590 {
2591 PCRE_FREE_STUDY(extra);
2592 }
2593 if (locale_set)
2594 {
2595 new_free((void *)tables);
2596 setlocale(LC_CTYPE, "C");
2597 locale_set = 0;
2598 }
2599 continue; /* With next regex */
2600 }
2601 } /* End of non-POSIX compile */
2602
2603 /* Read data lines and test them */
2604
2605 for (;;)
2606 {
2607 pcre_uint8 *q;
2608 pcre_uint8 *bptr;
2609 int *use_offsets = offsets;
2610 int use_size_offsets = size_offsets;
2611 int callout_data = 0;
2612 int callout_data_set = 0;
2613 int count, c;
2614 int copystrings = 0;
2615 int find_match_limit = default_find_match_limit;
2616 int getstrings = 0;
2617 int getlist = 0;
2618 int gmatched = 0;
2619 int start_offset = 0;
2620 int start_offset_sign = 1;
2621 int g_notempty = 0;
2622 int use_dfa = 0;
2623
2624 options = 0;
2625
2626 *copynames = 0;
2627 *getnames = 0;
2628
2629 copynamesptr = copynames;
2630 getnamesptr = getnames;
2631
2632 pcre_callout = callout;
2633 first_callout = 1;
2634 last_callout_mark = NULL;
2635 callout_extra = 0;
2636 callout_count = 0;
2637 callout_fail_count = 999999;
2638 callout_fail_id = -1;
2639 show_malloc = 0;
2640
2641 if (extra != NULL) extra->flags &=
2642 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2643
2644 len = 0;
2645 for (;;)
2646 {
2647 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2648 {
2649 if (len > 0) /* Reached EOF without hitting a newline */
2650 {
2651 fprintf(outfile, "\n");
2652 break;
2653 }
2654 done = 1;
2655 goto CONTINUE;
2656 }
2657 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2658 len = (int)strlen((char *)buffer);
2659 if (buffer[len-1] == '\n') break;
2660 }
2661
2662 while (len > 0 && isspace(buffer[len-1])) len--;
2663 buffer[len] = 0;
2664 if (len == 0) break;
2665
2666 p = buffer;
2667 while (isspace(*p)) p++;
2668
2669 bptr = q = dbuffer;
2670 while ((c = *p++) != 0)
2671 {
2672 int i = 0;
2673 int n = 0;
2674
2675 if (c == '\\') switch ((c = *p++))
2676 {
2677 case 'a': c = 7; break;
2678 case 'b': c = '\b'; break;
2679 case 'e': c = 27; break;
2680 case 'f': c = '\f'; break;
2681 case 'n': c = '\n'; break;
2682 case 'r': c = '\r'; break;
2683 case 't': c = '\t'; break;
2684 case 'v': c = '\v'; break;
2685
2686 case '0': case '1': case '2': case '3':
2687 case '4': case '5': case '6': case '7':
2688 c -= '0';
2689 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2690 c = c * 8 + *p++ - '0';
2691
2692 #if !defined NOUTF8
2693 if (use_utf8 && c > 255)
2694 {
2695 pcre_uint8 buff8[8];
2696 int ii, utn;
2697 utn = ord2utf8(c, buff8);
2698 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2699 c = buff8[ii]; /* Last byte */
2700 }
2701 #endif
2702 break;
2703
2704 case 'x':
2705
2706 /* Handle \x{..} specially - new Perl thing for utf8 */
2707
2708 #if !defined NOUTF8
2709 if (*p == '{')
2710 {
2711 pcre_uint8 *pt = p;
2712 c = 0;
2713
2714 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2715 when isxdigit() is a macro that refers to its argument more than
2716 once. This is banned by the C Standard, but apparently happens in at
2717 least one MacOS environment. */
2718
2719 for (pt++; isxdigit(*pt); pt++)
2720 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2721 if (*pt == '}')
2722 {
2723 pcre_uint8 buff8[8];
2724 int ii, utn;
2725 if (use_utf8)
2726 {
2727 utn = ord2utf8(c, buff8);
2728 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2729 c = buff8[ii]; /* Last byte */
2730 }
2731 else
2732 {
2733 if (c > 255)
2734 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2735 "UTF-8 mode is not enabled.\n"
2736 "** Truncation will probably give the wrong result.\n", c);
2737 }
2738 p = pt + 1;
2739 break;
2740 }
2741 /* Not correct form; fall through */
2742 }
2743 #endif
2744
2745 /* Ordinary \x */
2746
2747 c = 0;
2748 while (i++ < 2 && isxdigit(*p))
2749 {
2750 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2751 p++;
2752 }
2753 break;
2754
2755 case 0: /* \ followed by EOF allows for an empty line */
2756 p--;
2757 continue;
2758
2759 case '>':
2760 if (*p == '-')
2761 {
2762 start_offset_sign = -1;
2763 p++;
2764 }
2765 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2766 start_offset *= start_offset_sign;
2767 continue;
2768
2769 case 'A': /* Option setting */
2770 options |= PCRE_ANCHORED;
2771 continue;
2772
2773 case 'B':
2774 options |= PCRE_NOTBOL;
2775 continue;
2776
2777 case 'C':
2778 if (isdigit(*p)) /* Set copy string */
2779 {
2780 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2781 copystrings |= 1 << n;
2782 }
2783 else if (isalnum(*p))
2784 {
2785 pcre_uchar *npp = copynamesptr;
2786 while (isalnum(*p)) *npp++ = *p++;
2787 *npp++ = 0;
2788 *npp = 0;
2789 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2790 if (n < 0)
2791 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2792 copynamesptr = npp;
2793 }
2794 else if (*p == '+')
2795 {
2796 callout_extra = 1;
2797 p++;
2798 }
2799 else if (*p == '-')
2800 {
2801 pcre_callout = NULL;
2802 p++;
2803 }
2804 else if (*p == '!')
2805 {
2806 callout_fail_id = 0;
2807 p++;
2808 while(isdigit(*p))
2809 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2810 callout_fail_count = 0;
2811 if (*p == '!')
2812 {
2813 p++;
2814 while(isdigit(*p))
2815 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2816 }
2817 }
2818 else if (*p == '*')
2819 {
2820 int sign = 1;
2821 callout_data = 0;
2822 if (*(++p) == '-') { sign = -1; p++; }
2823 while(isdigit(*p))
2824 callout_data = callout_data * 10 + *p++ - '0';
2825 callout_data *= sign;
2826 callout_data_set = 1;
2827 }
2828 continue;
2829
2830 #if !defined NODFA
2831 case 'D':
2832 #if !defined NOPOSIX
2833 if (posix || do_posix)
2834 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2835 else
2836 #endif
2837 use_dfa = 1;
2838 continue;
2839 #endif
2840
2841 #if !defined NODFA
2842 case 'F':
2843 options |= PCRE_DFA_SHORTEST;
2844 continue;
2845 #endif
2846
2847 case 'G':
2848 if (isdigit(*p))
2849 {
2850 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2851 getstrings |= 1 << n;
2852 }
2853 else if (isalnum(*p))
2854 {
2855 pcre_uchar *npp = getnamesptr;
2856 while (isalnum(*p)) *npp++ = *p++;
2857 *npp++ = 0;
2858 *npp = 0;
2859 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2860 if (n < 0)
2861 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2862 getnamesptr = npp;
2863 }
2864 continue;
2865
2866 case 'J':
2867 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2868 if (extra != NULL
2869 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2870 && extra->executable_jit != NULL)
2871 {
2872 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2873 jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2874 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2875 }
2876 continue;
2877
2878 case 'L':
2879 getlist = 1;
2880 continue;
2881
2882 case 'M':
2883 find_match_limit = 1;
2884 continue;
2885
2886 case 'N':
2887 if ((options & PCRE_NOTEMPTY) != 0)
2888 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2889 else
2890 options |= PCRE_NOTEMPTY;
2891 continue;
2892
2893 case 'O':
2894 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2895 if (n > size_offsets_max)
2896 {
2897 size_offsets_max = n;
2898 free(offsets);
2899 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2900 if (offsets == NULL)
2901 {
2902 printf("** Failed to get %d bytes of memory for offsets vector\n",
2903 (int)(size_offsets_max * sizeof(int)));
2904 yield = 1;
2905 goto EXIT;
2906 }
2907 }
2908 use_size_offsets = n;
2909 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2910 continue;
2911
2912 case 'P':
2913 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2914 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2915 continue;
2916
2917 case 'Q':
2918 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2919 if (extra == NULL)
2920 {
2921 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2922 extra->flags = 0;
2923 }
2924 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2925 extra->match_limit_recursion = n;
2926 continue;
2927
2928 case 'q':
2929 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2930 if (extra == NULL)
2931 {
2932 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2933 extra->flags = 0;
2934 }
2935 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2936 extra->match_limit = n;
2937 continue;
2938
2939 #if !defined NODFA
2940 case 'R':
2941 options |= PCRE_DFA_RESTART;
2942 continue;
2943 #endif
2944
2945 case 'S':
2946 show_malloc = 1;
2947 continue;
2948
2949 case 'Y':
2950 options |= PCRE_NO_START_OPTIMIZE;
2951 continue;
2952
2953 case 'Z':
2954 options |= PCRE_NOTEOL;
2955 continue;
2956
2957 case '?':
2958 options |= PCRE_NO_UTF8_CHECK;
2959 continue;
2960
2961 case '<':
2962 {
2963 int x = check_newline(p, outfile);
2964 if (x == 0) goto NEXT_DATA;
2965 options |= x;
2966 while (*p++ != '>');
2967 }
2968 continue;
2969 }
2970 *q++ = c;
2971 }
2972 *q = 0;
2973 len = (int)(q - dbuffer);
2974
2975 /* Move the data to the end of the buffer so that a read over the end of
2976 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2977 we are using the POSIX interface, we must include the terminating zero. */
2978
2979 #if !defined NOPOSIX
2980 if (posix || do_posix)
2981 {
2982 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2983 bptr += buffer_size - len - 1;
2984 }
2985 else
2986 #endif
2987 {
2988 memmove(bptr + buffer_size - len, bptr, len);
2989 bptr += buffer_size - len;
2990 }
2991
2992 if ((all_use_dfa || use_dfa) && find_match_limit)
2993 {
2994 printf("**Match limit not relevant for DFA matching: ignored\n");
2995 find_match_limit = 0;
2996 }
2997
2998 /* Handle matching via the POSIX interface, which does not
2999 support timing or playing with the match limit or callout data. */
3000
3001 #if !defined NOPOSIX
3002 if (posix || do_posix)
3003 {
3004 int rc;
3005 int eflags = 0;
3006 regmatch_t *pmatch = NULL;
3007 if (use_size_offsets > 0)
3008 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3009 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3010 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3011 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3012
3013 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3014
3015 if (rc != 0)
3016 {
3017 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3018 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3019 }
3020 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3021 != 0)
3022 {
3023 fprintf(outfile, "Matched with REG_NOSUB\n");
3024 }
3025 else
3026 {
3027 size_t i;
3028 for (i = 0; i < (size_t)use_size_offsets; i++)
3029 {
3030 if (pmatch[i].rm_so >= 0)
3031 {
3032 fprintf(outfile, "%2d: ", (int)i);
3033 PCHARSV(dbuffer + pmatch[i].rm_so,
3034 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3035 fprintf(outfile, "\n");
3036 if (do_showcaprest || (i == 0 && do_showrest))
3037 {
3038 fprintf(outfile, "%2d+ ", (int)i);
3039 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3040 outfile);
3041 fprintf(outfile, "\n");
3042 }
3043 }
3044 }
3045 }
3046 free(pmatch);
3047 goto NEXT_DATA;
3048 }
3049
3050 #endif /* !defined NOPOSIX */
3051
3052 /* Handle matching via the native interface - repeats for /g and /G */
3053
3054 #ifdef SUPPORT_PCRE16
3055 if (use_pcre16)
3056 {
3057 len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3058 bptr = (pcre_uint8 *)buffer16;
3059 }
3060 #endif
3061
3062 for (;; gmatched++) /* Loop for /g or /G */
3063 {
3064 markptr = NULL;
3065
3066 if (timeitm > 0)
3067 {
3068 register int i;
3069 clock_t time_taken;
3070 clock_t start_time = clock();
3071
3072 #if !defined NODFA
3073 if (all_use_dfa || use_dfa)
3074 {
3075 int workspace[1000];
3076 for (i = 0; i < timeitm; i++)
3077 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3078 options | g_notempty, use_offsets, use_size_offsets, workspace,
3079 sizeof(workspace)/sizeof(int));
3080 }
3081 else
3082 #endif
3083
3084 for (i = 0; i < timeitm; i++)
3085 {
3086 PCRE_EXEC(count, re, extra, bptr, len,
3087 start_offset, options | g_notempty, use_offsets, use_size_offsets);
3088 }
3089 time_taken = clock() - start_time;
3090 fprintf(outfile, "Execute time %.4f milliseconds\n",
3091 (((double)time_taken * 1000.0) / (double)timeitm) /
3092 (double)CLOCKS_PER_SEC);
3093 }
3094
3095 /* If find_match_limit is set, we want to do repeated matches with
3096 varying limits in order to find the minimum value for the match limit and
3097 for the recursion limit. The match limits are relevant only to the normal
3098 running of pcre_exec(), so disable the JIT optimization. This makes it
3099 possible to run the same set of tests with and without JIT externally
3100 requested. */
3101
3102 if (find_match_limit)
3103 {
3104 if (extra == NULL)
3105 {
3106 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3107 extra->flags = 0;
3108 }
3109 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3110
3111 (void)check_match_limit(re, extra, bptr, len, start_offset,
3112 options|g_notempty, use_offsets, use_size_offsets,
3113 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3114 PCRE_ERROR_MATCHLIMIT, "match()");
3115
3116 count = check_match_limit(re, extra, bptr, len, start_offset,
3117 options|g_notempty, use_offsets, use_size_offsets,
3118 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3119 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3120 }
3121
3122 /* If callout_data is set, use the interface with additional data */
3123
3124 else if (callout_data_set)
3125 {
3126 if (extra == NULL)
3127 {
3128 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3129 extra->flags = 0;
3130 }
3131 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3132 extra->callout_data = &callout_data;
3133 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3134 options | g_notempty, use_offsets, use_size_offsets);
3135 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3136 }
3137
3138 /* The normal case is just to do the match once, with the default
3139 value of match_limit. */
3140
3141 #if !defined NODFA
3142 else if (all_use_dfa || use_dfa)
3143 {
3144 int workspace[1000];
3145 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3146 options | g_notempty, use_offsets, use_size_offsets, workspace,
3147 sizeof(workspace)/sizeof(int));
3148 if (count == 0)
3149 {
3150 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3151 count = use_size_offsets/2;
3152 }
3153 }
3154 #endif
3155
3156 else
3157 {
3158 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3159 options | g_notempty, use_offsets, use_size_offsets);
3160 if (count == 0)
3161 {
3162 fprintf(outfile, "Matched, but too many substrings\n");
3163 count = use_size_offsets/3;
3164 }
3165 }
3166
3167 /* Matched */
3168
3169 if (count >= 0)
3170 {
3171 int i, maxcount;
3172
3173 #if !defined NODFA
3174 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3175 #endif
3176 maxcount = use_size_offsets/3;
3177
3178 /* This is a check against a lunatic return value. */
3179
3180 if (count > maxcount)
3181 {
3182 fprintf(outfile,
3183 "** PCRE error: returned count %d is too big for offset size %d\n",
3184 count, use_size_offsets);
3185 count = use_size_offsets/3;
3186 if (do_g || do_G)
3187 {
3188 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3189 do_g = do_G = FALSE; /* Break g/G loop */
3190 }
3191 }
3192
3193 /* do_allcaps requests showing of all captures in the pattern, to check
3194 unset ones at the end. */
3195
3196 if (do_allcaps)
3197 {
3198 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3199 count++; /* Allow for full match */
3200 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3201 }
3202
3203 /* Output the captured substrings */
3204
3205 for (i = 0; i < count * 2; i += 2)
3206 {
3207 if (use_offsets[i] < 0)
3208 {
3209 if (use_offsets[i] != -1)
3210 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3211 use_offsets[i], i);
3212 if (use_offsets[i+1] != -1)
3213 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3214 use_offsets[i+1], i+1);
3215 fprintf(outfile, "%2d: <unset>\n", i/2);
3216 }
3217 else
3218 {
3219 fprintf(outfile, "%2d: ", i/2);
3220 PCHARSV(bptr + use_offsets[i],
3221 use_offsets[i+1] - use_offsets[i], outfile);
3222 fprintf(outfile, "\n");
3223 if (do_showcaprest || (i == 0 && do_showrest))
3224 {
3225 fprintf(outfile, "%2d+ ", i/2);
3226 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3227 outfile);
3228 fprintf(outfile, "\n");
3229 }
3230 }
3231 }
3232
3233 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3234
3235 for (i = 0; i < 32; i++)
3236 {
3237 if ((copystrings & (1 << i)) != 0)
3238 {
3239 char copybuffer[256];
3240 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3241 i, copybuffer, sizeof(copybuffer));
3242 if (rc < 0)
3243 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3244 else
3245 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3246 }
3247 }
3248
3249 for (copynamesptr = copynames;
3250 *copynamesptr != 0;
3251 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3252 {
3253 char copybuffer[256];
3254 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3255 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3256 if (rc < 0)
3257 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3258 else
3259 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3260 }
3261
3262 for (i = 0; i < 32; i++)
3263 {
3264 if ((getstrings & (1 << i)) != 0)
3265 {
3266 const char *substring;
3267 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3268 i, &substring);
3269 if (rc < 0)
3270 fprintf(outfile, "get substring %d failed %d\n", i, rc);
3271 else
3272 {
3273 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3274 pcre_free_substring(substring);
3275 }
3276 }
3277 }
3278
3279 for (getnamesptr = getnames;
3280 *getnamesptr != 0;
3281 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3282 {
3283 const char *substring;
3284 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3285 count, (char *)getnamesptr, &substring);
3286 if (rc < 0)
3287 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3288 else
3289 {
3290 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
3291 pcre_free_substring(substring);
3292 }
3293 }
3294
3295 if (getlist)
3296 {
3297 const char **stringlist;
3298 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
3299 &stringlist);
3300 if (rc < 0)
3301 fprintf(outfile, "get substring list failed %d\n", rc);
3302 else
3303 {
3304 for (i = 0; i < count; i++)
3305 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3306 if (stringlist[i] != NULL)
3307 fprintf(outfile, "string list not terminated by NULL\n");
3308 pcre_free_substring_list(stringlist);
3309 }
3310 }
3311 }
3312
3313 /* There was a partial match */
3314
3315 else if (count == PCRE_ERROR_PARTIAL)
3316 {
3317 if (markptr == NULL) fprintf(outfile, "Partial match");
3318 else fprintf(outfile, "Partial match, mark=%s", markptr);
3319 if (use_size_offsets > 1)
3320 {
3321 fprintf(outfile, ": ");
3322 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3323 outfile);
3324 }
3325 fprintf(outfile, "\n");
3326 break; /* Out of the /g loop */
3327 }
3328
3329 /* Failed to match. If this is a /g or /G loop and we previously set
3330 g_notempty after a null match, this is not necessarily the end. We want
3331 to advance the start offset, and continue. We won't be at the end of the
3332 string - that was checked before setting g_notempty.
3333
3334 Complication arises in the case when the newline convention is "any",
3335 "crlf", or "anycrlf". If the previous match was at the end of a line
3336 terminated by CRLF, an advance of one character just passes the \r,
3337 whereas we should prefer the longer newline sequence, as does the code in
3338 pcre_exec(). Fudge the offset value to achieve this. We check for a
3339 newline setting in the pattern; if none was set, use pcre_config() to
3340 find the default.
3341
3342 Otherwise, in the case of UTF-8 matching, the advance must be one
3343 character, not one byte. */
3344
3345 else
3346 {
3347 if (g_notempty != 0)
3348 {
3349 int onechar = 1;
3350 unsigned int obits = ((real_pcre *)re)->options;
3351 use_offsets[0] = start_offset;
3352 if ((obits & PCRE_NEWLINE_BITS) == 0)
3353 {
3354 int d;
3355 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3356 /* Note that these values are always the ASCII ones, even in
3357 EBCDIC environments. CR = 13, NL = 10. */
3358 obits = (d == 13)? PCRE_NEWLINE_CR :
3359 (d == 10)? PCRE_NEWLINE_LF :
3360 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3361 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3362 (d == -1)? PCRE_NEWLINE_ANY : 0;
3363 }
3364 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3365 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3366 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3367 &&
3368 start_offset < len - 1 &&
3369 bptr[start_offset] == '\r' &&
3370 bptr[start_offset+1] == '\n')
3371 onechar++;
3372 else if (use_utf8)
3373 {
3374 while (start_offset + onechar < len)
3375 {
3376 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3377 onechar++;
3378 }
3379 }
3380 use_offsets[1] = start_offset + onechar;
3381 }
3382 else
3383 {
3384 switch(count)
3385 {
3386 case PCRE_ERROR_NOMATCH:
3387 if (gmatched == 0)
3388 {
3389 if (markptr == NULL) fprintf(outfile, "No match\n");
3390 else fprintf(outfile, "No match, mark = %s\n", markptr);
3391 }
3392 break;
3393
3394 case PCRE_ERROR_BADUTF8:
3395 case PCRE_ERROR_SHORTUTF8:
3396 fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3397 (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3398 if (use_size_offsets >= 2)
3399 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3400 use_offsets[1]);
3401 fprintf(outfile, "\n");
3402 break;
3403
3404 default:
3405 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3406 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3407 else
3408 fprintf(outfile, "Error %d (Unexpected value)\n", count);
3409 break;
3410 }
3411
3412 break; /* Out of the /g loop */
3413 }
3414 }
3415
3416 /* If not /g or /G we are done */
3417
3418 if (!do_g && !do_G) break;
3419
3420 /* If we have matched an empty string, first check to see if we are at
3421 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3422 Perl's /g options does. This turns out to be rather cunning. First we set
3423 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3424 same point. If this fails (picked up above) we advance to the next
3425 character. */
3426
3427 g_notempty = 0;
3428
3429 if (use_offsets[0] == use_offsets[1])
3430 {
3431 if (use_offsets[0] == len) break;
3432 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3433 }
3434
3435 /* For /g, update the start offset, leaving the rest alone */
3436
3437 if (do_g) start_offset = use_offsets[1];
3438
3439 /* For /G, update the pointer and length */
3440
3441 else
3442 {
3443 bptr += use_offsets[1];
3444 len -= use_offsets[1];
3445 }
3446 } /* End of loop for /g and /G */
3447
3448 NEXT_DATA: continue;
3449 } /* End of loop for data lines */
3450
3451 CONTINUE:
3452
3453 #if !defined NOPOSIX
3454 if (posix || do_posix) regfree(&preg);
3455 #endif
3456
3457 if (re != NULL) new_free(re);
3458 if (extra != NULL)
3459 {
3460 PCRE_FREE_STUDY(extra);
3461 }
3462 if (locale_set)
3463 {
3464 new_free((void *)tables);
3465 setlocale(LC_CTYPE, "C");
3466 locale_set = 0;
3467 }
3468 if (jit_stack != NULL)
3469 {
3470 pcre_jit_stack_free(jit_stack);
3471 jit_stack = NULL;
3472 }
3473 }
3474
3475 if (infile == stdin) fprintf(outfile, "\n");
3476
3477 EXIT:
3478
3479 if (infile != NULL && infile != stdin) fclose(infile);
3480 if (outfile != NULL && outfile != stdout) fclose(outfile);
3481
3482 free(buffer);
3483 free(dbuffer);
3484 free(pbuffer);
3485 free(offsets);
3486
3487 #ifdef SUPPORT_PCRE16
3488 if (buffer16 != NULL) free(buffer16);
3489 #endif
3490
3491 return yield;
3492 }
3493
3494 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12