/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 862 - (show annotations) (download)
Wed Jan 11 16:07:32 2012 UTC (2 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 130222 byte(s)
Diagnose more than 8 hex digits in \x{...} in pcretest data lines.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define STRLEN8(p) ((int)strlen((char *)p))
213
214 #define SET_PCRE_CALLOUT8(callout) \
215 pcre_callout = callout
216
217 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218 pcre_assign_jit_stack(extra, callback, userdata)
219
220 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221 re = pcre_compile((char *)pat, options, error, erroffset, tables)
222
223 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224 namesptr, cbuffer, size) \
225 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226 (char *)namesptr, cbuffer, size)
227
228 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230
231 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace) \
233 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234 offsets, size_offsets, workspace, size_workspace)
235
236 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237 offsets, size_offsets) \
238 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239 offsets, size_offsets)
240
241 #define PCRE_FREE_STUDY8(extra) \
242 pcre_free_study(extra)
243
244 #define PCRE_FREE_SUBSTRING8(substring) \
245 pcre_free_substring(substring)
246
247 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248 pcre_free_substring_list(listptr)
249
250 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 getnamesptr, subsptr) \
252 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)getnamesptr, subsptr)
254
255 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256 n = pcre_get_stringnumber(re, (char *)ptr)
257
258 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260
261 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263
264 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266
267 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268 pcre_printint(re, outfile, debug_lengths)
269
270 #define PCRE_STUDY8(extra, re, options, error) \
271 extra = pcre_study(re, options, error)
272
273 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274 pcre_jit_stack_alloc(startsize, maxsize)
275
276 #define PCRE_JIT_STACK_FREE8(stack) \
277 pcre_jit_stack_free(stack)
278
279 #endif /* SUPPORT_PCRE8 */
280
281 /* -----------------------------------------------------------*/
282
283 #ifdef SUPPORT_PCRE16
284
285 #define PCHARS16(lv, p, offset, len, f) \
286 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287
288 #define PCHARSV16(p, offset, len, f) \
289 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290
291 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292 p = read_capture_name16(p, cn16, re)
293
294 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295
296 #define SET_PCRE_CALLOUT16(callout) \
297 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298
299 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300 pcre16_assign_jit_stack((pcre16_extra *)extra, \
301 (pcre16_jit_callback)callback, userdata)
302
303 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305 tables)
306
307 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308 namesptr, cbuffer, size) \
309 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311
312 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 (PCRE_UCHAR16 *)cbuffer, size/2)
315
316 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317 offsets, size_offsets, workspace, size_workspace) \
318 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320 workspace, size_workspace)
321
322 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323 offsets, size_offsets) \
324 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325 len, start_offset, options, offsets, size_offsets)
326
327 #define PCRE_FREE_STUDY16(extra) \
328 pcre16_free_study((pcre16_extra *)extra)
329
330 #define PCRE_FREE_SUBSTRING16(substring) \
331 pcre16_free_substring((PCRE_SPTR16)substring)
332
333 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335
336 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337 getnamesptr, subsptr) \
338 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340
341 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343
344 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346 (PCRE_SPTR16 *)(void*)subsptr)
347
348 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350 (PCRE_SPTR16 **)(void*)listptr)
351
352 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354 tables)
355
356 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357 pcre16_printint(re, outfile, debug_lengths)
358
359 #define PCRE_STUDY16(extra, re, options, error) \
360 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361
362 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364
365 #define PCRE_JIT_STACK_FREE16(stack) \
366 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367
368 #endif /* SUPPORT_PCRE16 */
369
370
371 /* ----- Both modes are supported; a runtime test is needed, except for
372 pcre_config(), and the JIT stack functions, when it doesn't matter which
373 version is called. ----- */
374
375 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376
377 #define CHAR_SIZE (use_pcre16? 2:1)
378
379 #define PCHARS(lv, p, offset, len, f) \
380 if (use_pcre16) \
381 PCHARS16(lv, p, offset, len, f); \
382 else \
383 PCHARS8(lv, p, offset, len, f)
384
385 #define PCHARSV(p, offset, len, f) \
386 if (use_pcre16) \
387 PCHARSV16(p, offset, len, f); \
388 else \
389 PCHARSV8(p, offset, len, f)
390
391 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392 if (use_pcre16) \
393 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394 else \
395 READ_CAPTURE_NAME8(p, cn8, cn16, re)
396
397 #define SET_PCRE_CALLOUT(callout) \
398 if (use_pcre16) \
399 SET_PCRE_CALLOUT16(callout); \
400 else \
401 SET_PCRE_CALLOUT8(callout)
402
403 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404
405 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406 if (use_pcre16) \
407 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408 else \
409 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410
411 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412 if (use_pcre16) \
413 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414 else \
415 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416
417 #define PCRE_CONFIG pcre_config
418
419 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420 namesptr, cbuffer, size) \
421 if (use_pcre16) \
422 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423 namesptr, cbuffer, size); \
424 else \
425 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426 namesptr, cbuffer, size)
427
428 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429 if (use_pcre16) \
430 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431 else \
432 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433
434 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435 offsets, size_offsets, workspace, size_workspace) \
436 if (use_pcre16) \
437 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438 offsets, size_offsets, workspace, size_workspace); \
439 else \
440 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441 offsets, size_offsets, workspace, size_workspace)
442
443 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444 offsets, size_offsets) \
445 if (use_pcre16) \
446 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447 offsets, size_offsets); \
448 else \
449 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets)
451
452 #define PCRE_FREE_STUDY(extra) \
453 if (use_pcre16) \
454 PCRE_FREE_STUDY16(extra); \
455 else \
456 PCRE_FREE_STUDY8(extra)
457
458 #define PCRE_FREE_SUBSTRING(substring) \
459 if (use_pcre16) \
460 PCRE_FREE_SUBSTRING16(substring); \
461 else \
462 PCRE_FREE_SUBSTRING8(substring)
463
464 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465 if (use_pcre16) \
466 PCRE_FREE_SUBSTRING_LIST16(listptr); \
467 else \
468 PCRE_FREE_SUBSTRING_LIST8(listptr)
469
470 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471 getnamesptr, subsptr) \
472 if (use_pcre16) \
473 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474 getnamesptr, subsptr); \
475 else \
476 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477 getnamesptr, subsptr)
478
479 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480 if (use_pcre16) \
481 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482 else \
483 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484
485 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486 if (use_pcre16) \
487 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488 else \
489 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490
491 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492 if (use_pcre16) \
493 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494 else \
495 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496
497 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498 (use_pcre16 ? \
499 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501
502 #define PCRE_JIT_STACK_FREE(stack) \
503 if (use_pcre16) \
504 PCRE_JIT_STACK_FREE16(stack); \
505 else \
506 PCRE_JIT_STACK_FREE8(stack)
507
508 #define PCRE_MAKETABLES \
509 (use_pcre16? pcre16_maketables() : pcre_maketables())
510
511 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 if (use_pcre16) \
513 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 else \
515 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516
517 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518 if (use_pcre16) \
519 PCRE_PRINTINT16(re, outfile, debug_lengths); \
520 else \
521 PCRE_PRINTINT8(re, outfile, debug_lengths)
522
523 #define PCRE_STUDY(extra, re, options, error) \
524 if (use_pcre16) \
525 PCRE_STUDY16(extra, re, options, error); \
526 else \
527 PCRE_STUDY8(extra, re, options, error)
528
529 /* ----- Only 8-bit mode is supported ----- */
530
531 #elif defined SUPPORT_PCRE8
532 #define CHAR_SIZE 1
533 #define PCHARS PCHARS8
534 #define PCHARSV PCHARSV8
535 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537 #define STRLEN STRLEN8
538 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 #define PCRE_COMPILE PCRE_COMPILE8
540 #define PCRE_CONFIG pcre_config
541 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544 #define PCRE_EXEC PCRE_EXEC8
545 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 #define PCRE_MAKETABLES pcre_maketables()
555 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556 #define PCRE_PRINTINT PCRE_PRINTINT8
557 #define PCRE_STUDY PCRE_STUDY8
558
559 /* ----- Only 16-bit mode is supported ----- */
560
561 #else
562 #define CHAR_SIZE 2
563 #define PCHARS PCHARS16
564 #define PCHARSV PCHARSV16
565 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567 #define STRLEN STRLEN16
568 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 #define PCRE_COMPILE PCRE_COMPILE16
570 #define PCRE_CONFIG pcre16_config
571 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574 #define PCRE_EXEC PCRE_EXEC16
575 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 #define PCRE_MAKETABLES pcre16_maketables()
585 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586 #define PCRE_PRINTINT PCRE_PRINTINT16
587 #define PCRE_STUDY PCRE_STUDY16
588 #endif
589
590 /* ----- End of mode-specific function call macros ----- */
591
592
593 /* Other parameters */
594
595 #ifndef CLOCKS_PER_SEC
596 #ifdef CLK_TCK
597 #define CLOCKS_PER_SEC CLK_TCK
598 #else
599 #define CLOCKS_PER_SEC 100
600 #endif
601 #endif
602
603 /* This is the default loop count for timing. */
604
605 #define LOOPREPEAT 500000
606
607 /* Static variables */
608
609 static FILE *outfile;
610 static int log_store = 0;
611 static int callout_count;
612 static int callout_extra;
613 static int callout_fail_count;
614 static int callout_fail_id;
615 static int debug_lengths;
616 static int first_callout;
617 static int locale_set = 0;
618 static int show_malloc;
619 static int use_utf;
620 static size_t gotten_store;
621 static size_t first_gotten_store = 0;
622 static const unsigned char *last_callout_mark = NULL;
623
624 /* The buffers grow automatically if very long input lines are encountered. */
625
626 static int buffer_size = 50000;
627 static pcre_uint8 *buffer = NULL;
628 static pcre_uint8 *dbuffer = NULL;
629 static pcre_uint8 *pbuffer = NULL;
630
631 /* Another buffer is needed translation to 16-bit character strings. It will
632 obtained and extended as required. */
633
634 #ifdef SUPPORT_PCRE16
635 static int buffer16_size = 0;
636 static pcre_uint16 *buffer16 = NULL;
637
638 #ifdef SUPPORT_PCRE8
639
640 /* We need the table of operator lengths that is used for 16-bit compiling, in
641 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643 appropriately for the 16-bit world. Just as a safety check, make sure that
644 COMPILE_PCRE16 is *not* set. */
645
646 #ifdef COMPILE_PCRE16
647 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648 #endif
649
650 #if LINK_SIZE == 2
651 #undef LINK_SIZE
652 #define LINK_SIZE 1
653 #elif LINK_SIZE == 3 || LINK_SIZE == 4
654 #undef LINK_SIZE
655 #define LINK_SIZE 2
656 #else
657 #error LINK_SIZE must be either 2, 3, or 4
658 #endif
659
660 #undef IMM2_SIZE
661 #define IMM2_SIZE 1
662
663 #endif /* SUPPORT_PCRE8 */
664
665 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666 #endif /* SUPPORT_PCRE16 */
667
668 /* If we have 8-bit support, default use_pcre16 to false; if there is also
669 16-bit support, it can be changed by an option. If there is no 8-bit support,
670 there must be 16-bit support, so default it to 1. */
671
672 #ifdef SUPPORT_PCRE8
673 static int use_pcre16 = 0;
674 #else
675 static int use_pcre16 = 1;
676 #endif
677
678 /* Textual explanations for runtime error codes */
679
680 static const char *errtexts[] = {
681 NULL, /* 0 is no error */
682 NULL, /* NOMATCH is handled specially */
683 "NULL argument passed",
684 "bad option value",
685 "magic number missing",
686 "unknown opcode - pattern overwritten?",
687 "no more memory",
688 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 "match limit exceeded",
690 "callout error code",
691 NULL, /* BADUTF8/16 is handled specially */
692 NULL, /* BADUTF8/16 offset is handled specially */
693 NULL, /* PARTIAL is handled specially */
694 "not used - internal error",
695 "internal error - pattern overwritten?",
696 "bad count value",
697 "item unsupported for DFA matching",
698 "backreference condition or recursion test not supported for DFA matching",
699 "match limit not supported for DFA matching",
700 "workspace size exceeded in DFA matching",
701 "too much recursion for DFA matching",
702 "recursion limit exceeded",
703 "not used - internal error",
704 "invalid combination of newline options",
705 "bad offset value",
706 NULL, /* SHORTUTF8/16 is handled specially */
707 "nested recursion at the same subject position",
708 "JIT stack limit reached",
709 "pattern compiled in wrong mode: 8-bit/16-bit error"
710 };
711
712
713 /*************************************************
714 * Alternate character tables *
715 *************************************************/
716
717 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718 using the default tables of the library. However, the T option can be used to
719 select alternate sets of tables, for different kinds of testing. Note also that
720 the L (locale) option also adjusts the tables. */
721
722 /* This is the set of tables distributed as default with PCRE. It recognizes
723 only ASCII characters. */
724
725 static const pcre_uint8 tables0[] = {
726
727 /* This table is a lower casing table. */
728
729 0, 1, 2, 3, 4, 5, 6, 7,
730 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23,
732 24, 25, 26, 27, 28, 29, 30, 31,
733 32, 33, 34, 35, 36, 37, 38, 39,
734 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55,
736 56, 57, 58, 59, 60, 61, 62, 63,
737 64, 97, 98, 99,100,101,102,103,
738 104,105,106,107,108,109,110,111,
739 112,113,114,115,116,117,118,119,
740 120,121,122, 91, 92, 93, 94, 95,
741 96, 97, 98, 99,100,101,102,103,
742 104,105,106,107,108,109,110,111,
743 112,113,114,115,116,117,118,119,
744 120,121,122,123,124,125,126,127,
745 128,129,130,131,132,133,134,135,
746 136,137,138,139,140,141,142,143,
747 144,145,146,147,148,149,150,151,
748 152,153,154,155,156,157,158,159,
749 160,161,162,163,164,165,166,167,
750 168,169,170,171,172,173,174,175,
751 176,177,178,179,180,181,182,183,
752 184,185,186,187,188,189,190,191,
753 192,193,194,195,196,197,198,199,
754 200,201,202,203,204,205,206,207,
755 208,209,210,211,212,213,214,215,
756 216,217,218,219,220,221,222,223,
757 224,225,226,227,228,229,230,231,
758 232,233,234,235,236,237,238,239,
759 240,241,242,243,244,245,246,247,
760 248,249,250,251,252,253,254,255,
761
762 /* This table is a case flipping table. */
763
764 0, 1, 2, 3, 4, 5, 6, 7,
765 8, 9, 10, 11, 12, 13, 14, 15,
766 16, 17, 18, 19, 20, 21, 22, 23,
767 24, 25, 26, 27, 28, 29, 30, 31,
768 32, 33, 34, 35, 36, 37, 38, 39,
769 40, 41, 42, 43, 44, 45, 46, 47,
770 48, 49, 50, 51, 52, 53, 54, 55,
771 56, 57, 58, 59, 60, 61, 62, 63,
772 64, 97, 98, 99,100,101,102,103,
773 104,105,106,107,108,109,110,111,
774 112,113,114,115,116,117,118,119,
775 120,121,122, 91, 92, 93, 94, 95,
776 96, 65, 66, 67, 68, 69, 70, 71,
777 72, 73, 74, 75, 76, 77, 78, 79,
778 80, 81, 82, 83, 84, 85, 86, 87,
779 88, 89, 90,123,124,125,126,127,
780 128,129,130,131,132,133,134,135,
781 136,137,138,139,140,141,142,143,
782 144,145,146,147,148,149,150,151,
783 152,153,154,155,156,157,158,159,
784 160,161,162,163,164,165,166,167,
785 168,169,170,171,172,173,174,175,
786 176,177,178,179,180,181,182,183,
787 184,185,186,187,188,189,190,191,
788 192,193,194,195,196,197,198,199,
789 200,201,202,203,204,205,206,207,
790 208,209,210,211,212,213,214,215,
791 216,217,218,219,220,221,222,223,
792 224,225,226,227,228,229,230,231,
793 232,233,234,235,236,237,238,239,
794 240,241,242,243,244,245,246,247,
795 248,249,250,251,252,253,254,255,
796
797 /* This table contains bit maps for various character classes. Each map is 32
798 bytes long and the bits run from the least significant end of each byte. The
799 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800 graph, print, punct, and cntrl. Other classes are built from combinations. */
801
802 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806
807 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811
812 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821
822 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826
827 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831
832 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836
837 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841
842 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846
847 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851
852 /* This table identifies various classes of character by individual bits:
853 0x01 white space character
854 0x02 letter
855 0x04 decimal digit
856 0x08 hexadecimal digit
857 0x10 alphanumeric or '_'
858 0x80 regular expression metacharacter or binary zero
859 */
860
861 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893
894 /* This is a set of tables that came orginally from a Windows user. It seems to
895 be at least an approximation of ISO 8859. In particular, there are characters
896 greater than 128 that are marked as spaces, letters, etc. */
897
898 static const pcre_uint8 tables1[] = {
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,97,98,99,100,101,102,103,
912 104,105,106,107,108,109,110,111,
913 112,113,114,115,116,117,118,119,
914 120,121,122,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 224,225,226,227,228,229,230,231,
928 232,233,234,235,236,237,238,239,
929 240,241,242,243,244,245,246,247,
930 248,249,250,251,252,253,254,255,
931 0,1,2,3,4,5,6,7,
932 8,9,10,11,12,13,14,15,
933 16,17,18,19,20,21,22,23,
934 24,25,26,27,28,29,30,31,
935 32,33,34,35,36,37,38,39,
936 40,41,42,43,44,45,46,47,
937 48,49,50,51,52,53,54,55,
938 56,57,58,59,60,61,62,63,
939 64,97,98,99,100,101,102,103,
940 104,105,106,107,108,109,110,111,
941 112,113,114,115,116,117,118,119,
942 120,121,122,91,92,93,94,95,
943 96,65,66,67,68,69,70,71,
944 72,73,74,75,76,77,78,79,
945 80,81,82,83,84,85,86,87,
946 88,89,90,123,124,125,126,127,
947 128,129,130,131,132,133,134,135,
948 136,137,138,139,140,141,142,143,
949 144,145,146,147,148,149,150,151,
950 152,153,154,155,156,157,158,159,
951 160,161,162,163,164,165,166,167,
952 168,169,170,171,172,173,174,175,
953 176,177,178,179,180,181,182,183,
954 184,185,186,187,188,189,190,191,
955 224,225,226,227,228,229,230,231,
956 232,233,234,235,236,237,238,239,
957 240,241,242,243,244,245,246,215,
958 248,249,250,251,252,253,254,223,
959 192,193,194,195,196,197,198,199,
960 200,201,202,203,204,205,206,207,
961 208,209,210,211,212,213,214,247,
962 216,217,218,219,220,221,222,255,
963 0,62,0,0,1,0,0,0,
964 0,0,0,0,0,0,0,0,
965 32,0,0,0,1,0,0,0,
966 0,0,0,0,0,0,0,0,
967 0,0,0,0,0,0,255,3,
968 126,0,0,0,126,0,0,0,
969 0,0,0,0,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 0,0,0,0,0,0,255,3,
972 0,0,0,0,0,0,0,0,
973 0,0,0,0,0,0,12,2,
974 0,0,0,0,0,0,0,0,
975 0,0,0,0,0,0,0,0,
976 254,255,255,7,0,0,0,0,
977 0,0,0,0,0,0,0,0,
978 255,255,127,127,0,0,0,0,
979 0,0,0,0,0,0,0,0,
980 0,0,0,0,254,255,255,7,
981 0,0,0,0,0,4,32,4,
982 0,0,0,128,255,255,127,255,
983 0,0,0,0,0,0,255,3,
984 254,255,255,135,254,255,255,7,
985 0,0,0,0,0,4,44,6,
986 255,255,127,255,255,255,127,255,
987 0,0,0,0,254,255,255,255,
988 255,255,255,255,255,255,255,127,
989 0,0,0,0,254,255,255,255,
990 255,255,255,255,255,255,255,255,
991 0,2,0,0,255,255,255,255,
992 255,255,255,255,255,255,255,127,
993 0,0,0,0,255,255,255,255,
994 255,255,255,255,255,255,255,255,
995 0,0,0,0,254,255,0,252,
996 1,0,0,248,1,0,0,120,
997 0,0,0,0,254,255,255,255,
998 0,0,128,0,0,0,128,0,
999 255,255,255,255,0,0,0,0,
1000 0,0,0,0,0,0,0,128,
1001 255,255,255,255,0,0,0,0,
1002 0,0,0,0,0,0,0,0,
1003 128,0,0,0,0,0,0,0,
1004 0,1,1,0,1,1,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 1,0,0,0,128,0,0,0,
1008 128,128,128,128,0,0,128,0,
1009 28,28,28,28,28,28,28,28,
1010 28,28,0,0,0,0,0,128,
1011 0,26,26,26,26,26,26,18,
1012 18,18,18,18,18,18,18,18,
1013 18,18,18,18,18,18,18,18,
1014 18,18,18,128,128,0,128,16,
1015 0,26,26,26,26,26,26,18,
1016 18,18,18,18,18,18,18,18,
1017 18,18,18,18,18,18,18,18,
1018 18,18,18,128,128,0,0,0,
1019 0,0,0,0,0,1,0,0,
1020 0,0,0,0,0,0,0,0,
1021 0,0,0,0,0,0,0,0,
1022 0,0,0,0,0,0,0,0,
1023 1,0,0,0,0,0,0,0,
1024 0,0,18,0,0,0,0,0,
1025 0,0,20,20,0,18,0,0,
1026 0,20,18,0,0,0,0,0,
1027 18,18,18,18,18,18,18,18,
1028 18,18,18,18,18,18,18,18,
1029 18,18,18,18,18,18,18,0,
1030 18,18,18,18,18,18,18,18,
1031 18,18,18,18,18,18,18,18,
1032 18,18,18,18,18,18,18,18,
1033 18,18,18,18,18,18,18,0,
1034 18,18,18,18,18,18,18,18
1035 };
1036
1037
1038
1039
1040 #ifndef HAVE_STRERROR
1041 /*************************************************
1042 * Provide strerror() for non-ANSI libraries *
1043 *************************************************/
1044
1045 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046 in their libraries, but can provide the same facility by this simple
1047 alternative function. */
1048
1049 extern int sys_nerr;
1050 extern char *sys_errlist[];
1051
1052 char *
1053 strerror(int n)
1054 {
1055 if (n < 0 || n >= sys_nerr) return "unknown error number";
1056 return sys_errlist[n];
1057 }
1058 #endif /* HAVE_STRERROR */
1059
1060
1061 /*************************************************
1062 * JIT memory callback *
1063 *************************************************/
1064
1065 static pcre_jit_stack* jit_callback(void *arg)
1066 {
1067 return (pcre_jit_stack *)arg;
1068 }
1069
1070
1071 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 /*************************************************
1073 * Convert UTF-8 string to value *
1074 *************************************************/
1075
1076 /* This function takes one or more bytes that represents a UTF-8 character,
1077 and returns the value of the character.
1078
1079 Argument:
1080 utf8bytes a pointer to the byte vector
1081 vptr a pointer to an int to receive the value
1082
1083 Returns: > 0 => the number of bytes consumed
1084 -6 to 0 => malformed UTF-8 character at offset = (-return)
1085 */
1086
1087 static int
1088 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089 {
1090 int c = *utf8bytes++;
1091 int d = c;
1092 int i, j, s;
1093
1094 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095 {
1096 if ((d & 0x80) == 0) break;
1097 d <<= 1;
1098 }
1099
1100 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102
1103 /* i now has a value in the range 1-5 */
1104
1105 s = 6*i;
1106 d = (c & utf8_table3[i]) << s;
1107
1108 for (j = 0; j < i; j++)
1109 {
1110 c = *utf8bytes++;
1111 if ((c & 0xc0) != 0x80) return -(j+1);
1112 s -= 6;
1113 d |= (c & 0x3f) << s;
1114 }
1115
1116 /* Check that encoding was the correct unique one */
1117
1118 for (j = 0; j < utf8_table1_size; j++)
1119 if (d <= utf8_table1[j]) break;
1120 if (j != i) return -(i+1);
1121
1122 /* Valid value */
1123
1124 *vptr = d;
1125 return i+1;
1126 }
1127 #endif /* NOUTF || SUPPORT_PCRE16 */
1128
1129
1130
1131 #if !defined NOUTF || defined SUPPORT_PCRE16
1132 /*************************************************
1133 * Convert character value to UTF-8 *
1134 *************************************************/
1135
1136 /* This function takes an integer value in the range 0 - 0x7fffffff
1137 and encodes it as a UTF-8 character in 0 to 6 bytes.
1138
1139 Arguments:
1140 cvalue the character value
1141 utf8bytes pointer to buffer for result - at least 6 bytes long
1142
1143 Returns: number of characters placed in the buffer
1144 */
1145
1146 static int
1147 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148 {
1149 register int i, j;
1150 for (i = 0; i < utf8_table1_size; i++)
1151 if (cvalue <= utf8_table1[i]) break;
1152 utf8bytes += i;
1153 for (j = i; j > 0; j--)
1154 {
1155 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156 cvalue >>= 6;
1157 }
1158 *utf8bytes = utf8_table2[i] | cvalue;
1159 return i + 1;
1160 }
1161 #endif
1162
1163
1164 #ifdef SUPPORT_PCRE16
1165 /*************************************************
1166 * Convert a string to 16-bit *
1167 *************************************************/
1168
1169 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173 result is always left in buffer16.
1174
1175 Note that this function does not object to surrogate values. This is
1176 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177 for the purpose of testing that they are correctly faulted.
1178
1179 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 in UTF-8 so that values greater than 255 can be handled.
1181
1182 Arguments:
1183 data TRUE if converting a data line; FALSE for a regex
1184 p points to a byte string
1185 utf true if UTF-8 (to be converted to UTF-16)
1186 len number of bytes in the string (excluding trailing zero)
1187
1188 Returns: number of 16-bit data items used (excluding trailing zero)
1189 OR -1 if a UTF-8 string is malformed
1190 OR -2 if a value > 0x10ffff is encountered
1191 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 */
1193
1194 static int
1195 to16(int data, pcre_uint8 *p, int utf, int len)
1196 {
1197 pcre_uint16 *pp;
1198
1199 if (buffer16_size < 2*len + 2)
1200 {
1201 if (buffer16 != NULL) free(buffer16);
1202 buffer16_size = 2*len + 2;
1203 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204 if (buffer16 == NULL)
1205 {
1206 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207 exit(1);
1208 }
1209 }
1210
1211 pp = buffer16;
1212
1213 if (!utf && !data)
1214 {
1215 while (len-- > 0) *pp++ = *p++;
1216 }
1217
1218 else
1219 {
1220 int c = 0;
1221 while (len > 0)
1222 {
1223 int chlen = utf82ord(p, &c);
1224 if (chlen <= 0) return -1;
1225 if (c > 0x10ffff) return -2;
1226 p += chlen;
1227 len -= chlen;
1228 if (c < 0x10000) *pp++ = c; else
1229 {
1230 if (!utf) return -3;
1231 c -= 0x10000;
1232 *pp++ = 0xD800 | (c >> 10);
1233 *pp++ = 0xDC00 | (c & 0x3ff);
1234 }
1235 }
1236 }
1237
1238 *pp = 0;
1239 return pp - buffer16;
1240 }
1241 #endif
1242
1243
1244 /*************************************************
1245 * Read or extend an input line *
1246 *************************************************/
1247
1248 /* Input lines are read into buffer, but both patterns and data lines can be
1249 continued over multiple input lines. In addition, if the buffer fills up, we
1250 want to automatically expand it so as to be able to handle extremely large
1251 lines that are needed for certain stress tests. When the input buffer is
1252 expanded, the other two buffers must also be expanded likewise, and the
1253 contents of pbuffer, which are a copy of the input for callouts, must be
1254 preserved (for when expansion happens for a data line). This is not the most
1255 optimal way of handling this, but hey, this is just a test program!
1256
1257 Arguments:
1258 f the file to read
1259 start where in buffer to start (this *must* be within buffer)
1260 prompt for stdin or readline()
1261
1262 Returns: pointer to the start of new data
1263 could be a copy of start, or could be moved
1264 NULL if no data read and EOF reached
1265 */
1266
1267 static pcre_uint8 *
1268 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 {
1270 pcre_uint8 *here = start;
1271
1272 for (;;)
1273 {
1274 int rlen = (int)(buffer_size - (here - buffer));
1275
1276 if (rlen > 1000)
1277 {
1278 int dlen;
1279
1280 /* If libreadline support is required, use readline() to read a line if the
1281 input is a terminal. Note that readline() removes the trailing newline, so
1282 we must put it back again, to be compatible with fgets(). */
1283
1284 #ifdef SUPPORT_LIBREADLINE
1285 if (isatty(fileno(f)))
1286 {
1287 size_t len;
1288 char *s = readline(prompt);
1289 if (s == NULL) return (here == start)? NULL : start;
1290 len = strlen(s);
1291 if (len > 0) add_history(s);
1292 if (len > rlen - 1) len = rlen - 1;
1293 memcpy(here, s, len);
1294 here[len] = '\n';
1295 here[len+1] = 0;
1296 free(s);
1297 }
1298 else
1299 #endif
1300
1301 /* Read the next line by normal means, prompting if the file is stdin. */
1302
1303 {
1304 if (f == stdin) printf("%s", prompt);
1305 if (fgets((char *)here, rlen, f) == NULL)
1306 return (here == start)? NULL : start;
1307 }
1308
1309 dlen = (int)strlen((char *)here);
1310 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311 here += dlen;
1312 }
1313
1314 else
1315 {
1316 int new_buffer_size = 2*buffer_size;
1317 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320
1321 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322 {
1323 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324 exit(1);
1325 }
1326
1327 memcpy(new_buffer, buffer, buffer_size);
1328 memcpy(new_pbuffer, pbuffer, buffer_size);
1329
1330 buffer_size = new_buffer_size;
1331
1332 start = new_buffer + (start - buffer);
1333 here = new_buffer + (here - buffer);
1334
1335 free(buffer);
1336 free(dbuffer);
1337 free(pbuffer);
1338
1339 buffer = new_buffer;
1340 dbuffer = new_dbuffer;
1341 pbuffer = new_pbuffer;
1342 }
1343 }
1344
1345 return NULL; /* Control never gets here */
1346 }
1347
1348
1349
1350 /*************************************************
1351 * Read number from string *
1352 *************************************************/
1353
1354 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355 around with conditional compilation, just do the job by hand. It is only used
1356 for unpicking arguments, so just keep it simple.
1357
1358 Arguments:
1359 str string to be converted
1360 endptr where to put the end pointer
1361
1362 Returns: the unsigned long
1363 */
1364
1365 static int
1366 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 {
1368 int result = 0;
1369 while(*str != 0 && isspace(*str)) str++;
1370 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371 *endptr = str;
1372 return(result);
1373 }
1374
1375
1376
1377 /*************************************************
1378 * Print one character *
1379 *************************************************/
1380
1381 /* Print a single character either literally, or as a hex escape. */
1382
1383 static int pchar(int c, FILE *f)
1384 {
1385 if (PRINTOK(c))
1386 {
1387 if (f != NULL) fprintf(f, "%c", c);
1388 return 1;
1389 }
1390
1391 if (c < 0x100)
1392 {
1393 if (use_utf)
1394 {
1395 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396 return 6;
1397 }
1398 else
1399 {
1400 if (f != NULL) fprintf(f, "\\x%02x", c);
1401 return 4;
1402 }
1403 }
1404
1405 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406 return (c <= 0x000000ff)? 6 :
1407 (c <= 0x00000fff)? 7 :
1408 (c <= 0x0000ffff)? 8 :
1409 (c <= 0x000fffff)? 9 : 10;
1410 }
1411
1412
1413
1414 #ifdef SUPPORT_PCRE8
1415 /*************************************************
1416 * Print 8-bit character string *
1417 *************************************************/
1418
1419 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420 If handed a NULL file, just counts chars without printing. */
1421
1422 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423 {
1424 int c = 0;
1425 int yield = 0;
1426
1427 if (length < 0)
1428 length = strlen((char *)p);
1429
1430 while (length-- > 0)
1431 {
1432 #if !defined NOUTF
1433 if (use_utf)
1434 {
1435 int rc = utf82ord(p, &c);
1436 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437 {
1438 length -= rc - 1;
1439 p += rc;
1440 yield += pchar(c, f);
1441 continue;
1442 }
1443 }
1444 #endif
1445 c = *p++;
1446 yield += pchar(c, f);
1447 }
1448
1449 return yield;
1450 }
1451 #endif
1452
1453
1454
1455 #ifdef SUPPORT_PCRE16
1456 /*************************************************
1457 * Find length of 0-terminated 16-bit string *
1458 *************************************************/
1459
1460 static int strlen16(PCRE_SPTR16 p)
1461 {
1462 int len = 0;
1463 while (*p++ != 0) len++;
1464 return len;
1465 }
1466 #endif /* SUPPORT_PCRE16 */
1467
1468
1469 #ifdef SUPPORT_PCRE16
1470 /*************************************************
1471 * Print 16-bit character string *
1472 *************************************************/
1473
1474 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475 If handed a NULL file, just counts chars without printing. */
1476
1477 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 {
1479 int yield = 0;
1480
1481 if (length < 0)
1482 length = strlen16(p);
1483
1484 while (length-- > 0)
1485 {
1486 int c = *p++ & 0xffff;
1487 #if !defined NOUTF
1488 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 {
1490 int d = *p & 0xffff;
1491 if (d >= 0xDC00 && d < 0xDFFF)
1492 {
1493 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494 length--;
1495 p++;
1496 }
1497 }
1498 #endif
1499 yield += pchar(c, f);
1500 }
1501
1502 return yield;
1503 }
1504 #endif /* SUPPORT_PCRE16 */
1505
1506
1507
1508 #ifdef SUPPORT_PCRE8
1509 /*************************************************
1510 * Read a capture name (8-bit) and check it *
1511 *************************************************/
1512
1513 static pcre_uint8 *
1514 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515 {
1516 pcre_uint8 *npp = *pp;
1517 while (isalnum(*p)) *npp++ = *p++;
1518 *npp++ = 0;
1519 *npp = 0;
1520 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521 {
1522 fprintf(outfile, "no parentheses with name \"");
1523 PCHARSV(*pp, 0, -1, outfile);
1524 fprintf(outfile, "\"\n");
1525 }
1526
1527 *pp = npp;
1528 return p;
1529 }
1530 #endif /* SUPPORT_PCRE8 */
1531
1532
1533
1534 #ifdef SUPPORT_PCRE16
1535 /*************************************************
1536 * Read a capture name (16-bit) and check it *
1537 *************************************************/
1538
1539 /* Note that the text being read is 8-bit. */
1540
1541 static pcre_uint8 *
1542 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543 {
1544 pcre_uint16 *npp = *pp;
1545 while (isalnum(*p)) *npp++ = *p++;
1546 *npp++ = 0;
1547 *npp = 0;
1548 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 {
1550 fprintf(outfile, "no parentheses with name \"");
1551 PCHARSV(*pp, 0, -1, outfile);
1552 fprintf(outfile, "\"\n");
1553 }
1554 *pp = npp;
1555 return p;
1556 }
1557 #endif /* SUPPORT_PCRE16 */
1558
1559
1560
1561 /*************************************************
1562 * Callout function *
1563 *************************************************/
1564
1565 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566 the match. Yield zero unless more callouts than the fail count, or the callout
1567 data is not zero. */
1568
1569 static int callout(pcre_callout_block *cb)
1570 {
1571 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 int i, pre_start, post_start, subject_length;
1573
1574 if (callout_extra)
1575 {
1576 fprintf(f, "Callout %d: last capture = %d\n",
1577 cb->callout_number, cb->capture_last);
1578
1579 for (i = 0; i < cb->capture_top * 2; i += 2)
1580 {
1581 if (cb->offset_vector[i] < 0)
1582 fprintf(f, "%2d: <unset>\n", i/2);
1583 else
1584 {
1585 fprintf(f, "%2d: ", i/2);
1586 PCHARSV(cb->subject, cb->offset_vector[i],
1587 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588 fprintf(f, "\n");
1589 }
1590 }
1591 }
1592
1593 /* Re-print the subject in canonical form, the first time or if giving full
1594 datails. On subsequent calls in the same match, we use pchars just to find the
1595 printed lengths of the substrings. */
1596
1597 if (f != NULL) fprintf(f, "--->");
1598
1599 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600 PCHARS(post_start, cb->subject, cb->start_match,
1601 cb->current_position - cb->start_match, f);
1602
1603 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604
1605 PCHARSV(cb->subject, cb->current_position,
1606 cb->subject_length - cb->current_position, f);
1607
1608 if (f != NULL) fprintf(f, "\n");
1609
1610 /* Always print appropriate indicators, with callout number if not already
1611 shown. For automatic callouts, show the pattern offset. */
1612
1613 if (cb->callout_number == 255)
1614 {
1615 fprintf(outfile, "%+3d ", cb->pattern_position);
1616 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617 }
1618 else
1619 {
1620 if (callout_extra) fprintf(outfile, " ");
1621 else fprintf(outfile, "%3d ", cb->callout_number);
1622 }
1623
1624 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625 fprintf(outfile, "^");
1626
1627 if (post_start > 0)
1628 {
1629 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630 fprintf(outfile, "^");
1631 }
1632
1633 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634 fprintf(outfile, " ");
1635
1636 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637 pbuffer + cb->pattern_position);
1638
1639 fprintf(outfile, "\n");
1640 first_callout = 0;
1641
1642 if (cb->mark != last_callout_mark)
1643 {
1644 if (cb->mark == NULL)
1645 fprintf(outfile, "Latest Mark: <unset>\n");
1646 else
1647 {
1648 fprintf(outfile, "Latest Mark: ");
1649 PCHARSV(cb->mark, 0, -1, outfile);
1650 putc('\n', outfile);
1651 }
1652 last_callout_mark = cb->mark;
1653 }
1654
1655 if (cb->callout_data != NULL)
1656 {
1657 int callout_data = *((int *)(cb->callout_data));
1658 if (callout_data != 0)
1659 {
1660 fprintf(outfile, "Callout data = %d\n", callout_data);
1661 return callout_data;
1662 }
1663 }
1664
1665 return (cb->callout_number != callout_fail_id)? 0 :
1666 (++callout_count >= callout_fail_count)? 1 : 0;
1667 }
1668
1669
1670 /*************************************************
1671 * Local malloc functions *
1672 *************************************************/
1673
1674 /* Alternative malloc function, to test functionality and save the size of a
1675 compiled re, which is the first store request that pcre_compile() makes. The
1676 show_malloc variable is set only during matching. */
1677
1678 static void *new_malloc(size_t size)
1679 {
1680 void *block = malloc(size);
1681 gotten_store = size;
1682 if (first_gotten_store == 0) first_gotten_store = size;
1683 if (show_malloc)
1684 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 return block;
1686 }
1687
1688 static void new_free(void *block)
1689 {
1690 if (show_malloc)
1691 fprintf(outfile, "free %p\n", block);
1692 free(block);
1693 }
1694
1695 /* For recursion malloc/free, to test stacking calls */
1696
1697 static void *stack_malloc(size_t size)
1698 {
1699 void *block = malloc(size);
1700 if (show_malloc)
1701 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 return block;
1703 }
1704
1705 static void stack_free(void *block)
1706 {
1707 if (show_malloc)
1708 fprintf(outfile, "stack_free %p\n", block);
1709 free(block);
1710 }
1711
1712
1713 /*************************************************
1714 * Call pcre_fullinfo() *
1715 *************************************************/
1716
1717 /* Get one piece of information from the pcre_fullinfo() function. When only
1718 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719 value, but the code is defensive.
1720
1721 Arguments:
1722 re compiled regex
1723 study study data
1724 option PCRE_INFO_xxx option
1725 ptr where to put the data
1726
1727 Returns: 0 when OK, < 0 on error
1728 */
1729
1730 static int
1731 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 {
1733 int rc;
1734
1735 if (use_pcre16)
1736 #ifdef SUPPORT_PCRE16
1737 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 #else
1739 rc = PCRE_ERROR_BADMODE;
1740 #endif
1741 else
1742 #ifdef SUPPORT_PCRE8
1743 rc = pcre_fullinfo(re, study, option, ptr);
1744 #else
1745 rc = PCRE_ERROR_BADMODE;
1746 #endif
1747
1748 if (rc < 0)
1749 {
1750 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751 use_pcre16? "16" : "", option);
1752 if (rc == PCRE_ERROR_BADMODE)
1753 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755 }
1756
1757 return rc;
1758 }
1759
1760
1761
1762 /*************************************************
1763 * Swap byte functions *
1764 *************************************************/
1765
1766 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767 value, respectively.
1768
1769 Arguments:
1770 value any number
1771
1772 Returns: the byte swapped value
1773 */
1774
1775 static pcre_uint32
1776 swap_uint32(pcre_uint32 value)
1777 {
1778 return ((value & 0x000000ff) << 24) |
1779 ((value & 0x0000ff00) << 8) |
1780 ((value & 0x00ff0000) >> 8) |
1781 (value >> 24);
1782 }
1783
1784 static pcre_uint16
1785 swap_uint16(pcre_uint16 value)
1786 {
1787 return (value >> 8) | (value << 8);
1788 }
1789
1790
1791
1792 /*************************************************
1793 * Flip bytes in a compiled pattern *
1794 *************************************************/
1795
1796 /* This function is called if the 'F' option was present on a pattern that is
1797 to be written to a file. We flip the bytes of all the integer fields in the
1798 regex data block and the study block. In 16-bit mode this also flips relevant
1799 bytes in the pattern itself. This is to make it possible to test PCRE's
1800 ability to reload byte-flipped patterns, e.g. those compiled on a different
1801 architecture. */
1802
1803 static void
1804 regexflip(pcre *ere, pcre_extra *extra)
1805 {
1806 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 #ifdef SUPPORT_PCRE16
1808 int op;
1809 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810 int length = re->name_count * re->name_entry_size;
1811 #ifdef SUPPORT_UTF
1812 BOOL utf = (re->options & PCRE_UTF16) != 0;
1813 BOOL utf16_char = FALSE;
1814 #endif /* SUPPORT_UTF */
1815 #endif /* SUPPORT_PCRE16 */
1816
1817 /* Always flip the bytes in the main data block and study blocks. */
1818
1819 re->magic_number = REVERSED_MAGIC_NUMBER;
1820 re->size = swap_uint32(re->size);
1821 re->options = swap_uint32(re->options);
1822 re->flags = swap_uint16(re->flags);
1823 re->top_bracket = swap_uint16(re->top_bracket);
1824 re->top_backref = swap_uint16(re->top_backref);
1825 re->first_char = swap_uint16(re->first_char);
1826 re->req_char = swap_uint16(re->req_char);
1827 re->name_table_offset = swap_uint16(re->name_table_offset);
1828 re->name_entry_size = swap_uint16(re->name_entry_size);
1829 re->name_count = swap_uint16(re->name_count);
1830
1831 if (extra != NULL)
1832 {
1833 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834 rsd->size = swap_uint32(rsd->size);
1835 rsd->flags = swap_uint32(rsd->flags);
1836 rsd->minlength = swap_uint32(rsd->minlength);
1837 }
1838
1839 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840 in the name table, if present, and then in the pattern itself. */
1841
1842 #ifdef SUPPORT_PCRE16
1843 if (!use_pcre16) return;
1844
1845 while(TRUE)
1846 {
1847 /* Swap previous characters. */
1848 while (length-- > 0)
1849 {
1850 *ptr = swap_uint16(*ptr);
1851 ptr++;
1852 }
1853 #ifdef SUPPORT_UTF
1854 if (utf16_char)
1855 {
1856 if ((ptr[-1] & 0xfc00) == 0xd800)
1857 {
1858 /* We know that there is only one extra character in UTF-16. */
1859 *ptr = swap_uint16(*ptr);
1860 ptr++;
1861 }
1862 }
1863 utf16_char = FALSE;
1864 #endif /* SUPPORT_UTF */
1865
1866 /* Get next opcode. */
1867
1868 length = 0;
1869 op = *ptr;
1870 *ptr++ = swap_uint16(op);
1871
1872 switch (op)
1873 {
1874 case OP_END:
1875 return;
1876
1877 #ifdef SUPPORT_UTF
1878 case OP_CHAR:
1879 case OP_CHARI:
1880 case OP_NOT:
1881 case OP_NOTI:
1882 case OP_STAR:
1883 case OP_MINSTAR:
1884 case OP_PLUS:
1885 case OP_MINPLUS:
1886 case OP_QUERY:
1887 case OP_MINQUERY:
1888 case OP_UPTO:
1889 case OP_MINUPTO:
1890 case OP_EXACT:
1891 case OP_POSSTAR:
1892 case OP_POSPLUS:
1893 case OP_POSQUERY:
1894 case OP_POSUPTO:
1895 case OP_STARI:
1896 case OP_MINSTARI:
1897 case OP_PLUSI:
1898 case OP_MINPLUSI:
1899 case OP_QUERYI:
1900 case OP_MINQUERYI:
1901 case OP_UPTOI:
1902 case OP_MINUPTOI:
1903 case OP_EXACTI:
1904 case OP_POSSTARI:
1905 case OP_POSPLUSI:
1906 case OP_POSQUERYI:
1907 case OP_POSUPTOI:
1908 case OP_NOTSTAR:
1909 case OP_NOTMINSTAR:
1910 case OP_NOTPLUS:
1911 case OP_NOTMINPLUS:
1912 case OP_NOTQUERY:
1913 case OP_NOTMINQUERY:
1914 case OP_NOTUPTO:
1915 case OP_NOTMINUPTO:
1916 case OP_NOTEXACT:
1917 case OP_NOTPOSSTAR:
1918 case OP_NOTPOSPLUS:
1919 case OP_NOTPOSQUERY:
1920 case OP_NOTPOSUPTO:
1921 case OP_NOTSTARI:
1922 case OP_NOTMINSTARI:
1923 case OP_NOTPLUSI:
1924 case OP_NOTMINPLUSI:
1925 case OP_NOTQUERYI:
1926 case OP_NOTMINQUERYI:
1927 case OP_NOTUPTOI:
1928 case OP_NOTMINUPTOI:
1929 case OP_NOTEXACTI:
1930 case OP_NOTPOSSTARI:
1931 case OP_NOTPOSPLUSI:
1932 case OP_NOTPOSQUERYI:
1933 case OP_NOTPOSUPTOI:
1934 if (utf) utf16_char = TRUE;
1935 #endif
1936 /* Fall through. */
1937
1938 default:
1939 length = OP_lengths16[op] - 1;
1940 break;
1941
1942 case OP_CLASS:
1943 case OP_NCLASS:
1944 /* Skip the character bit map. */
1945 ptr += 32/sizeof(pcre_uint16);
1946 length = 0;
1947 break;
1948
1949 case OP_XCLASS:
1950 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951 if (LINK_SIZE > 1)
1952 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953 - (1 + LINK_SIZE + 1));
1954 else
1955 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956
1957 /* Reverse the size of the XCLASS instance. */
1958 *ptr = swap_uint16(*ptr);
1959 ptr++;
1960 if (LINK_SIZE > 1)
1961 {
1962 *ptr = swap_uint16(*ptr);
1963 ptr++;
1964 }
1965
1966 op = *ptr;
1967 *ptr = swap_uint16(op);
1968 ptr++;
1969 if ((op & XCL_MAP) != 0)
1970 {
1971 /* Skip the character bit map. */
1972 ptr += 32/sizeof(pcre_uint16);
1973 length -= 32/sizeof(pcre_uint16);
1974 }
1975 break;
1976 }
1977 }
1978 /* Control should never reach here in 16 bit mode. */
1979 #endif /* SUPPORT_PCRE16 */
1980 }
1981
1982
1983
1984 /*************************************************
1985 * Check match or recursion limit *
1986 *************************************************/
1987
1988 static int
1989 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991 int flag, unsigned long int *limit, int errnumber, const char *msg)
1992 {
1993 int count;
1994 int min = 0;
1995 int mid = 64;
1996 int max = -1;
1997
1998 extra->flags |= flag;
1999
2000 for (;;)
2001 {
2002 *limit = mid;
2003
2004 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 use_offsets, use_size_offsets);
2006
2007 if (count == errnumber)
2008 {
2009 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010 min = mid;
2011 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012 }
2013
2014 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015 count == PCRE_ERROR_PARTIAL)
2016 {
2017 if (mid == min + 1)
2018 {
2019 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020 break;
2021 }
2022 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023 max = mid;
2024 mid = (min + mid)/2;
2025 }
2026 else break; /* Some other error */
2027 }
2028
2029 extra->flags &= ~flag;
2030 return count;
2031 }
2032
2033
2034
2035 /*************************************************
2036 * Case-independent strncmp() function *
2037 *************************************************/
2038
2039 /*
2040 Arguments:
2041 s first string
2042 t second string
2043 n number of characters to compare
2044
2045 Returns: < 0, = 0, or > 0, according to the comparison
2046 */
2047
2048 static int
2049 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 {
2051 while (n--)
2052 {
2053 int c = tolower(*s++) - tolower(*t++);
2054 if (c) return c;
2055 }
2056 return 0;
2057 }
2058
2059
2060
2061 /*************************************************
2062 * Check newline indicator *
2063 *************************************************/
2064
2065 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066 a message and return 0 if there is no match.
2067
2068 Arguments:
2069 p points after the leading '<'
2070 f file for error message
2071
2072 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073 */
2074
2075 static int
2076 check_newline(pcre_uint8 *p, FILE *f)
2077 {
2078 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 fprintf(f, "Unknown newline type at: <%s\n", p);
2086 return 0;
2087 }
2088
2089
2090
2091 /*************************************************
2092 * Usage function *
2093 *************************************************/
2094
2095 static void
2096 usage(void)
2097 {
2098 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099 printf("Input and output default to stdin and stdout.\n");
2100 #ifdef SUPPORT_LIBREADLINE
2101 printf("If input is a terminal, readline() is used to read from it.\n");
2102 #else
2103 printf("This version of pcretest is not linked with readline().\n");
2104 #endif
2105 printf("\nOptions:\n");
2106 #ifdef SUPPORT_PCRE16
2107 printf(" -16 use the 16-bit library\n");
2108 #endif
2109 printf(" -b show compiled code\n");
2110 printf(" -C show PCRE compile-time options and exit\n");
2111 printf(" -C arg show a specific compile-time option\n");
2112 printf(" and exit with its value. The arg can be:\n");
2113 printf(" linksize internal link size [2, 3, 4]\n");
2114 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117 printf(" ucp Unicode Properties supported [0, 1]\n");
2118 printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121 #if !defined NODFA
2122 printf(" -dfa force DFA matching for all subjects\n");
2123 #endif
2124 printf(" -help show usage information\n");
2125 printf(" -i show information about compiled patterns\n"
2126 " -M find MATCH_LIMIT minimum for each subject\n"
2127 " -m output memory used information\n"
2128 " -o <n> set size of offsets vector to <n>\n");
2129 #if !defined NOPOSIX
2130 printf(" -p use POSIX interface\n");
2131 #endif
2132 printf(" -q quiet: do not output PCRE version number at start\n");
2133 printf(" -S <n> set stack size to <n> megabytes\n");
2134 printf(" -s force each pattern to be studied at basic level\n"
2135 " -s+ force each pattern to be studied, using JIT if available\n"
2136 " -t time compilation and execution\n");
2137 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138 printf(" -tm time execution (matching) only\n");
2139 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140 }
2141
2142
2143
2144 /*************************************************
2145 * Main Program *
2146 *************************************************/
2147
2148 /* Read lines from named file or stdin and write to named file or stdout; lines
2149 consist of a regular expression, in delimiters and optionally followed by
2150 options, followed by a set of test data, terminated by an empty line. */
2151
2152 int main(int argc, char **argv)
2153 {
2154 FILE *infile = stdin;
2155 const char *version;
2156 int options = 0;
2157 int study_options = 0;
2158 int default_find_match_limit = FALSE;
2159 int op = 1;
2160 int timeit = 0;
2161 int timeitm = 0;
2162 int showinfo = 0;
2163 int showstore = 0;
2164 int force_study = -1;
2165 int force_study_options = 0;
2166 int quiet = 0;
2167 int size_offsets = 45;
2168 int size_offsets_max;
2169 int *offsets = NULL;
2170 #if !defined NOPOSIX
2171 int posix = 0;
2172 #endif
2173 int debug = 0;
2174 int done = 0;
2175 int all_use_dfa = 0;
2176 int yield = 0;
2177 int stack_size;
2178
2179 pcre_jit_stack *jit_stack = NULL;
2180
2181 /* These vectors store, end-to-end, a list of zero-terminated captured
2182 substring names, each list itself being terminated by an empty name. Assume
2183 that 1024 is plenty long enough for the few names we'll be testing. It is
2184 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185 for the actual memory, to ensure alignment. By defining these variables always
2186 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2187 #ifdefs in the code. */
2188
2189 pcre_uint16 copynames[1024];
2190 pcre_uint16 getnames[1024];
2191
2192 pcre_uint16 *cn16ptr;
2193 pcre_uint16 *gn16ptr;
2194
2195 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2196 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2197 pcre_uint8 *cn8ptr;
2198 pcre_uint8 *gn8ptr;
2199
2200 /* Get buffers from malloc() so that valgrind will check their misuse when
2201 debugging. They grow automatically when very long lines are read. The 16-bit
2202 buffer (buffer16) is obtained only if needed. */
2203
2204 buffer = (pcre_uint8 *)malloc(buffer_size);
2205 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2206 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2207
2208 /* The outfile variable is static so that new_malloc can use it. */
2209
2210 outfile = stdout;
2211
2212 /* The following _setmode() stuff is some Windows magic that tells its runtime
2213 library to translate CRLF into a single LF character. At least, that's what
2214 I've been told: never having used Windows I take this all on trust. Originally
2215 it set 0x8000, but then I was advised that _O_BINARY was better. */
2216
2217 #if defined(_WIN32) || defined(WIN32)
2218 _setmode( _fileno( stdout ), _O_BINARY );
2219 #endif
2220
2221 /* Get the version number: both pcre_version() and pcre16_version() give the
2222 same answer. We just need to ensure that we call one that is available. */
2223
2224 #ifdef SUPPORT_PCRE8
2225 version = pcre_version();
2226 #else
2227 version = pcre16_version();
2228 #endif
2229
2230 /* Scan options */
2231
2232 while (argc > 1 && argv[op][0] == '-')
2233 {
2234 pcre_uint8 *endptr;
2235
2236 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2237 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2238 else if (strcmp(argv[op], "-s+") == 0)
2239 {
2240 force_study = 1;
2241 force_study_options = PCRE_STUDY_JIT_COMPILE;
2242 }
2243 else if (strcmp(argv[op], "-16") == 0)
2244 {
2245 #ifdef SUPPORT_PCRE16
2246 use_pcre16 = 1;
2247 #else
2248 printf("** This version of PCRE was built without 16-bit support\n");
2249 exit(1);
2250 #endif
2251 }
2252 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2253 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2254 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2255 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2256 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2257 #if !defined NODFA
2258 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2259 #endif
2260 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2261 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2262 *endptr == 0))
2263 {
2264 op++;
2265 argc--;
2266 }
2267 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2268 {
2269 int both = argv[op][2] == 0;
2270 int temp;
2271 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2272 *endptr == 0))
2273 {
2274 timeitm = temp;
2275 op++;
2276 argc--;
2277 }
2278 else timeitm = LOOPREPEAT;
2279 if (both) timeit = timeitm;
2280 }
2281 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2282 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2283 *endptr == 0))
2284 {
2285 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2286 printf("PCRE: -S not supported on this OS\n");
2287 exit(1);
2288 #else
2289 int rc;
2290 struct rlimit rlim;
2291 getrlimit(RLIMIT_STACK, &rlim);
2292 rlim.rlim_cur = stack_size * 1024 * 1024;
2293 rc = setrlimit(RLIMIT_STACK, &rlim);
2294 if (rc != 0)
2295 {
2296 printf("PCRE: setrlimit() failed with error %d\n", rc);
2297 exit(1);
2298 }
2299 op++;
2300 argc--;
2301 #endif
2302 }
2303 #if !defined NOPOSIX
2304 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2305 #endif
2306 else if (strcmp(argv[op], "-C") == 0)
2307 {
2308 int rc;
2309 unsigned long int lrc;
2310
2311 if (argc > 2)
2312 {
2313 if (strcmp(argv[op + 1], "linksize") == 0)
2314 {
2315 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2316 printf("%d\n", rc);
2317 yield = rc;
2318 goto EXIT;
2319 }
2320 if (strcmp(argv[op + 1], "pcre8") == 0)
2321 {
2322 #ifdef SUPPORT_PCRE8
2323 printf("1\n");
2324 yield = 1;
2325 #else
2326 printf("0\n");
2327 yield = 0;
2328 #endif
2329 goto EXIT;
2330 }
2331 if (strcmp(argv[op + 1], "pcre16") == 0)
2332 {
2333 #ifdef SUPPORT_PCRE16
2334 printf("1\n");
2335 yield = 1;
2336 #else
2337 printf("0\n");
2338 yield = 0;
2339 #endif
2340 goto EXIT;
2341 }
2342 if (strcmp(argv[op + 1], "utf") == 0)
2343 {
2344 #ifdef SUPPORT_PCRE8
2345 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2346 printf("%d\n", rc);
2347 yield = rc;
2348 #else
2349 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2350 printf("%d\n", rc);
2351 yield = rc;
2352 #endif
2353 goto EXIT;
2354 }
2355 if (strcmp(argv[op + 1], "ucp") == 0)
2356 {
2357 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2358 printf("%d\n", rc);
2359 yield = rc;
2360 goto EXIT;
2361 }
2362 if (strcmp(argv[op + 1], "jit") == 0)
2363 {
2364 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2365 printf("%d\n", rc);
2366 yield = rc;
2367 goto EXIT;
2368 }
2369 if (strcmp(argv[op + 1], "newline") == 0)
2370 {
2371 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2372 /* Note that these values are always the ASCII values, even
2373 in EBCDIC environments. CR is 13 and NL is 10. */
2374 printf("%s\n", (rc == 13)? "CR" :
2375 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2376 (rc == -2)? "ANYCRLF" :
2377 (rc == -1)? "ANY" : "???");
2378 goto EXIT;
2379 }
2380 printf("Unknown -C option: %s\n", argv[op + 1]);
2381 goto EXIT;
2382 }
2383
2384 printf("PCRE version %s\n", version);
2385 printf("Compiled with\n");
2386
2387 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2388 are set, either both UTFs are supported or both are not supported. */
2389
2390 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2391 printf(" 8-bit and 16-bit support\n");
2392 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2393 if (rc)
2394 printf(" UTF-8 and UTF-16 support\n");
2395 else
2396 printf(" No UTF-8 or UTF-16 support\n");
2397 #elif defined SUPPORT_PCRE8
2398 printf(" 8-bit support only\n");
2399 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2400 printf(" %sUTF-8 support\n", rc? "" : "No ");
2401 #else
2402 printf(" 16-bit support only\n");
2403 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2404 printf(" %sUTF-16 support\n", rc? "" : "No ");
2405 #endif
2406
2407 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2408 printf(" %sUnicode properties support\n", rc? "" : "No ");
2409 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2410 if (rc)
2411 printf(" Just-in-time compiler support\n");
2412 else
2413 printf(" No just-in-time compiler support\n");
2414 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2415 /* Note that these values are always the ASCII values, even
2416 in EBCDIC environments. CR is 13 and NL is 10. */
2417 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2418 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2419 (rc == -2)? "ANYCRLF" :
2420 (rc == -1)? "ANY" : "???");
2421 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2422 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2423 "all Unicode newlines");
2424 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2425 printf(" Internal link size = %d\n", rc);
2426 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2427 printf(" POSIX malloc threshold = %d\n", rc);
2428 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2429 printf(" Default match limit = %ld\n", lrc);
2430 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2431 printf(" Default recursion depth limit = %ld\n", lrc);
2432 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2433 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2434 goto EXIT;
2435 }
2436 else if (strcmp(argv[op], "-help") == 0 ||
2437 strcmp(argv[op], "--help") == 0)
2438 {
2439 usage();
2440 goto EXIT;
2441 }
2442 else
2443 {
2444 printf("** Unknown or malformed option %s\n", argv[op]);
2445 usage();
2446 yield = 1;
2447 goto EXIT;
2448 }
2449 op++;
2450 argc--;
2451 }
2452
2453 /* Get the store for the offsets vector, and remember what it was */
2454
2455 size_offsets_max = size_offsets;
2456 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2457 if (offsets == NULL)
2458 {
2459 printf("** Failed to get %d bytes of memory for offsets vector\n",
2460 (int)(size_offsets_max * sizeof(int)));
2461 yield = 1;
2462 goto EXIT;
2463 }
2464
2465 /* Sort out the input and output files */
2466
2467 if (argc > 1)
2468 {
2469 infile = fopen(argv[op], INPUT_MODE);
2470 if (infile == NULL)
2471 {
2472 printf("** Failed to open %s\n", argv[op]);
2473 yield = 1;
2474 goto EXIT;
2475 }
2476 }
2477
2478 if (argc > 2)
2479 {
2480 outfile = fopen(argv[op+1], OUTPUT_MODE);
2481 if (outfile == NULL)
2482 {
2483 printf("** Failed to open %s\n", argv[op+1]);
2484 yield = 1;
2485 goto EXIT;
2486 }
2487 }
2488
2489 /* Set alternative malloc function */
2490
2491 #ifdef SUPPORT_PCRE8
2492 pcre_malloc = new_malloc;
2493 pcre_free = new_free;
2494 pcre_stack_malloc = stack_malloc;
2495 pcre_stack_free = stack_free;
2496 #endif
2497
2498 #ifdef SUPPORT_PCRE16
2499 pcre16_malloc = new_malloc;
2500 pcre16_free = new_free;
2501 pcre16_stack_malloc = stack_malloc;
2502 pcre16_stack_free = stack_free;
2503 #endif
2504
2505 /* Heading line unless quiet, then prompt for first regex if stdin */
2506
2507 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2508
2509 /* Main loop */
2510
2511 while (!done)
2512 {
2513 pcre *re = NULL;
2514 pcre_extra *extra = NULL;
2515
2516 #if !defined NOPOSIX /* There are still compilers that require no indent */
2517 regex_t preg;
2518 int do_posix = 0;
2519 #endif
2520
2521 const char *error;
2522 pcre_uint8 *markptr;
2523 pcre_uint8 *p, *pp, *ppp;
2524 pcre_uint8 *to_file = NULL;
2525 const pcre_uint8 *tables = NULL;
2526 unsigned long int get_options;
2527 unsigned long int true_size, true_study_size = 0;
2528 size_t size, regex_gotten_store;
2529 int do_allcaps = 0;
2530 int do_mark = 0;
2531 int do_study = 0;
2532 int no_force_study = 0;
2533 int do_debug = debug;
2534 int do_G = 0;
2535 int do_g = 0;
2536 int do_showinfo = showinfo;
2537 int do_showrest = 0;
2538 int do_showcaprest = 0;
2539 int do_flip = 0;
2540 int erroroffset, len, delimiter, poffset;
2541
2542 use_utf = 0;
2543 debug_lengths = 1;
2544
2545 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2546 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2547 fflush(outfile);
2548
2549 p = buffer;
2550 while (isspace(*p)) p++;
2551 if (*p == 0) continue;
2552
2553 /* See if the pattern is to be loaded pre-compiled from a file. */
2554
2555 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2556 {
2557 pcre_uint32 magic;
2558 pcre_uint8 sbuf[8];
2559 FILE *f;
2560
2561 p++;
2562 if (*p == '!')
2563 {
2564 do_debug = TRUE;
2565 do_showinfo = TRUE;
2566 p++;
2567 }
2568
2569 pp = p + (int)strlen((char *)p);
2570 while (isspace(pp[-1])) pp--;
2571 *pp = 0;
2572
2573 f = fopen((char *)p, "rb");
2574 if (f == NULL)
2575 {
2576 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2577 continue;
2578 }
2579
2580 first_gotten_store = 0;
2581 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2582
2583 true_size =
2584 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2585 true_study_size =
2586 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2587
2588 re = (pcre *)new_malloc(true_size);
2589 regex_gotten_store = first_gotten_store;
2590
2591 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2592
2593 magic = ((REAL_PCRE *)re)->magic_number;
2594 if (magic != MAGIC_NUMBER)
2595 {
2596 if (swap_uint32(magic) == MAGIC_NUMBER)
2597 {
2598 do_flip = 1;
2599 }
2600 else
2601 {
2602 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2603 fclose(f);
2604 continue;
2605 }
2606 }
2607
2608 /* We hide the byte-invert info for little and big endian tests. */
2609 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2610 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2611
2612 /* Now see if there is any following study data. */
2613
2614 if (true_study_size != 0)
2615 {
2616 pcre_study_data *psd;
2617
2618 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2619 extra->flags = PCRE_EXTRA_STUDY_DATA;
2620
2621 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2622 extra->study_data = psd;
2623
2624 if (fread(psd, 1, true_study_size, f) != true_study_size)
2625 {
2626 FAIL_READ:
2627 fprintf(outfile, "Failed to read data from %s\n", p);
2628 if (extra != NULL)
2629 {
2630 PCRE_FREE_STUDY(extra);
2631 }
2632 if (re != NULL) new_free(re);
2633 fclose(f);
2634 continue;
2635 }
2636 fprintf(outfile, "Study data loaded from %s\n", p);
2637 do_study = 1; /* To get the data output if requested */
2638 }
2639 else fprintf(outfile, "No study data\n");
2640
2641 /* Flip the necessary bytes. */
2642 if (do_flip)
2643 {
2644 int rc;
2645 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2646 if (rc == PCRE_ERROR_BADMODE)
2647 {
2648 /* Simulate the result of the function call below. */
2649 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2650 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2651 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2652 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2653 continue;
2654 }
2655 }
2656
2657 /* Need to know if UTF-8 for printing data strings. */
2658
2659 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2660 use_utf = (get_options & PCRE_UTF8) != 0;
2661
2662 fclose(f);
2663 goto SHOW_INFO;
2664 }
2665
2666 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2667 the pattern; if it isn't complete, read more. */
2668
2669 delimiter = *p++;
2670
2671 if (isalnum(delimiter) || delimiter == '\\')
2672 {
2673 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2674 goto SKIP_DATA;
2675 }
2676
2677 pp = p;
2678 poffset = (int)(p - buffer);
2679
2680 for(;;)
2681 {
2682 while (*pp != 0)
2683 {
2684 if (*pp == '\\' && pp[1] != 0) pp++;
2685 else if (*pp == delimiter) break;
2686 pp++;
2687 }
2688 if (*pp != 0) break;
2689 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2690 {
2691 fprintf(outfile, "** Unexpected EOF\n");
2692 done = 1;
2693 goto CONTINUE;
2694 }
2695 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2696 }
2697
2698 /* The buffer may have moved while being extended; reset the start of data
2699 pointer to the correct relative point in the buffer. */
2700
2701 p = buffer + poffset;
2702
2703 /* If the first character after the delimiter is backslash, make
2704 the pattern end with backslash. This is purely to provide a way
2705 of testing for the error message when a pattern ends with backslash. */
2706
2707 if (pp[1] == '\\') *pp++ = '\\';
2708
2709 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2710 for callouts. */
2711
2712 *pp++ = 0;
2713 strcpy((char *)pbuffer, (char *)p);
2714
2715 /* Look for options after final delimiter */
2716
2717 options = 0;
2718 study_options = 0;
2719 log_store = showstore; /* default from command line */
2720
2721 while (*pp != 0)
2722 {
2723 switch (*pp++)
2724 {
2725 case 'f': options |= PCRE_FIRSTLINE; break;
2726 case 'g': do_g = 1; break;
2727 case 'i': options |= PCRE_CASELESS; break;
2728 case 'm': options |= PCRE_MULTILINE; break;
2729 case 's': options |= PCRE_DOTALL; break;
2730 case 'x': options |= PCRE_EXTENDED; break;
2731
2732 case '+':
2733 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2734 break;
2735
2736 case '=': do_allcaps = 1; break;
2737 case 'A': options |= PCRE_ANCHORED; break;
2738 case 'B': do_debug = 1; break;
2739 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2740 case 'D': do_debug = do_showinfo = 1; break;
2741 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2742 case 'F': do_flip = 1; break;
2743 case 'G': do_G = 1; break;
2744 case 'I': do_showinfo = 1; break;
2745 case 'J': options |= PCRE_DUPNAMES; break;
2746 case 'K': do_mark = 1; break;
2747 case 'M': log_store = 1; break;
2748 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2749
2750 #if !defined NOPOSIX
2751 case 'P': do_posix = 1; break;
2752 #endif
2753
2754 case 'S':
2755 if (do_study == 0)
2756 {
2757 do_study = 1;
2758 if (*pp == '+')
2759 {
2760 study_options |= PCRE_STUDY_JIT_COMPILE;
2761 pp++;
2762 }
2763 }
2764 else
2765 {
2766 do_study = 0;
2767 no_force_study = 1;
2768 }
2769 break;
2770
2771 case 'U': options |= PCRE_UNGREEDY; break;
2772 case 'W': options |= PCRE_UCP; break;
2773 case 'X': options |= PCRE_EXTRA; break;
2774 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2775 case 'Z': debug_lengths = 0; break;
2776 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2777 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2778
2779 case 'T':
2780 switch (*pp++)
2781 {
2782 case '0': tables = tables0; break;
2783 case '1': tables = tables1; break;
2784
2785 case '\r':
2786 case '\n':
2787 case ' ':
2788 case 0:
2789 fprintf(outfile, "** Missing table number after /T\n");
2790 goto SKIP_DATA;
2791
2792 default:
2793 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2794 goto SKIP_DATA;
2795 }
2796 break;
2797
2798 case 'L':
2799 ppp = pp;
2800 /* The '\r' test here is so that it works on Windows. */
2801 /* The '0' test is just in case this is an unterminated line. */
2802 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2803 *ppp = 0;
2804 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2805 {
2806 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2807 goto SKIP_DATA;
2808 }
2809 locale_set = 1;
2810 tables = PCRE_MAKETABLES;
2811 pp = ppp;
2812 break;
2813
2814 case '>':
2815 to_file = pp;
2816 while (*pp != 0) pp++;
2817 while (isspace(pp[-1])) pp--;
2818 *pp = 0;
2819 break;
2820
2821 case '<':
2822 {
2823 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2824 {
2825 options |= PCRE_JAVASCRIPT_COMPAT;
2826 pp += 3;
2827 }
2828 else
2829 {
2830 int x = check_newline(pp, outfile);
2831 if (x == 0) goto SKIP_DATA;
2832 options |= x;
2833 while (*pp++ != '>');
2834 }
2835 }
2836 break;
2837
2838 case '\r': /* So that it works in Windows */
2839 case '\n':
2840 case ' ':
2841 break;
2842
2843 default:
2844 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2845 goto SKIP_DATA;
2846 }
2847 }
2848
2849 /* Handle compiling via the POSIX interface, which doesn't support the
2850 timing, showing, or debugging options, nor the ability to pass over
2851 local character tables. Neither does it have 16-bit support. */
2852
2853 #if !defined NOPOSIX
2854 if (posix || do_posix)
2855 {
2856 int rc;
2857 int cflags = 0;
2858
2859 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2860 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2861 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2862 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2863 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2864 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2865 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2866
2867 first_gotten_store = 0;
2868 rc = regcomp(&preg, (char *)p, cflags);
2869
2870 /* Compilation failed; go back for another re, skipping to blank line
2871 if non-interactive. */
2872
2873 if (rc != 0)
2874 {
2875 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2876 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2877 goto SKIP_DATA;
2878 }
2879 }
2880
2881 /* Handle compiling via the native interface */
2882
2883 else
2884 #endif /* !defined NOPOSIX */
2885
2886 {
2887 /* In 16-bit mode, convert the input. */
2888
2889 #ifdef SUPPORT_PCRE16
2890 if (use_pcre16)
2891 {
2892 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2893 {
2894 case -1:
2895 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2896 "converted to UTF-16\n");
2897 goto SKIP_DATA;
2898
2899 case -2:
2900 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2901 "cannot be converted to UTF-16\n");
2902 goto SKIP_DATA;
2903
2904 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2905 fprintf(outfile, "**Failed: character value greater than 0xffff "
2906 "cannot be converted to 16-bit in non-UTF mode\n");
2907 goto SKIP_DATA;
2908
2909 default:
2910 break;
2911 }
2912 p = (pcre_uint8 *)buffer16;
2913 }
2914 #endif
2915
2916 /* Compile many times when timing */
2917
2918 if (timeit > 0)
2919 {
2920 register int i;
2921 clock_t time_taken;
2922 clock_t start_time = clock();
2923 for (i = 0; i < timeit; i++)
2924 {
2925 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2926 if (re != NULL) free(re);
2927 }
2928 time_taken = clock() - start_time;
2929 fprintf(outfile, "Compile time %.4f milliseconds\n",
2930 (((double)time_taken * 1000.0) / (double)timeit) /
2931 (double)CLOCKS_PER_SEC);
2932 }
2933
2934 first_gotten_store = 0;
2935 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2936
2937 /* Compilation failed; go back for another re, skipping to blank line
2938 if non-interactive. */
2939
2940 if (re == NULL)
2941 {
2942 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2943 SKIP_DATA:
2944 if (infile != stdin)
2945 {
2946 for (;;)
2947 {
2948 if (extend_inputline(infile, buffer, NULL) == NULL)
2949 {
2950 done = 1;
2951 goto CONTINUE;
2952 }
2953 len = (int)strlen((char *)buffer);
2954 while (len > 0 && isspace(buffer[len-1])) len--;
2955 if (len == 0) break;
2956 }
2957 fprintf(outfile, "\n");
2958 }
2959 goto CONTINUE;
2960 }
2961
2962 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2963 within the regex; check for this so that we know how to process the data
2964 lines. */
2965
2966 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2967 goto SKIP_DATA;
2968 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2969
2970 /* Extract the size for possible writing before possibly flipping it,
2971 and remember the store that was got. */
2972
2973 true_size = ((REAL_PCRE *)re)->size;
2974 regex_gotten_store = first_gotten_store;
2975
2976 /* Output code size information if requested */
2977
2978 if (log_store)
2979 fprintf(outfile, "Memory allocation (code space): %d\n",
2980 (int)(first_gotten_store -
2981 sizeof(REAL_PCRE) -
2982 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2983
2984 /* If -s or /S was present, study the regex to generate additional info to
2985 help with the matching, unless the pattern has the SS option, which
2986 suppresses the effect of /S (used for a few test patterns where studying is
2987 never sensible). */
2988
2989 if (do_study || (force_study >= 0 && !no_force_study))
2990 {
2991 if (timeit > 0)
2992 {
2993 register int i;
2994 clock_t time_taken;
2995 clock_t start_time = clock();
2996 for (i = 0; i < timeit; i++)
2997 {
2998 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2999 }
3000 time_taken = clock() - start_time;
3001 if (extra != NULL)
3002 {
3003 PCRE_FREE_STUDY(extra);
3004 }
3005 fprintf(outfile, " Study time %.4f milliseconds\n",
3006 (((double)time_taken * 1000.0) / (double)timeit) /
3007 (double)CLOCKS_PER_SEC);
3008 }
3009 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3010 if (error != NULL)
3011 fprintf(outfile, "Failed to study: %s\n", error);
3012 else if (extra != NULL)
3013 {
3014 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3015 if (log_store)
3016 {
3017 size_t jitsize;
3018 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3019 jitsize != 0)
3020 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3021 }
3022 }
3023 }
3024
3025 /* If /K was present, we set up for handling MARK data. */
3026
3027 if (do_mark)
3028 {
3029 if (extra == NULL)
3030 {
3031 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3032 extra->flags = 0;
3033 }
3034 extra->mark = &markptr;
3035 extra->flags |= PCRE_EXTRA_MARK;
3036 }
3037
3038 /* Extract and display information from the compiled data if required. */
3039
3040 SHOW_INFO:
3041
3042 if (do_debug)
3043 {
3044 fprintf(outfile, "------------------------------------------------------------------\n");
3045 PCRE_PRINTINT(re, outfile, debug_lengths);
3046 }
3047
3048 /* We already have the options in get_options (see above) */
3049
3050 if (do_showinfo)
3051 {
3052 unsigned long int all_options;
3053 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3054 hascrorlf;
3055 int nameentrysize, namecount;
3056 const pcre_uint8 *nametable;
3057
3058 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3059 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3060 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3061 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3062 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3063 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3064 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3065 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3066 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3067 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3068 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3069 != 0)
3070 goto SKIP_DATA;
3071
3072 if (size != regex_gotten_store) fprintf(outfile,
3073 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3074 (int)size, (int)regex_gotten_store);
3075
3076 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3077 if (backrefmax > 0)
3078 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3079
3080 if (namecount > 0)
3081 {
3082 fprintf(outfile, "Named capturing subpatterns:\n");
3083 while (namecount-- > 0)
3084 {
3085 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3086 int imm2_size = use_pcre16 ? 1 : 2;
3087 #else
3088 int imm2_size = IMM2_SIZE;
3089 #endif
3090 int length = (int)STRLEN(nametable + imm2_size);
3091 fprintf(outfile, " ");
3092 PCHARSV(nametable, imm2_size, length, outfile);
3093 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3094 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3095 fprintf(outfile, "%3d\n", use_pcre16?
3096 (int)(((PCRE_SPTR16)nametable)[0])
3097 :((int)nametable[0] << 8) | (int)nametable[1]);
3098 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3099 #else
3100 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3101 #ifdef SUPPORT_PCRE8
3102 nametable += nameentrysize;
3103 #else
3104 nametable += nameentrysize * 2;
3105 #endif
3106 #endif
3107 }
3108 }
3109
3110 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3111 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3112
3113 all_options = ((REAL_PCRE *)re)->options;
3114 if (do_flip) all_options = swap_uint32(all_options);
3115
3116 if (get_options == 0) fprintf(outfile, "No options\n");
3117 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3118 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3119 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3120 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3121 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3122 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3123 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3124 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3125 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3126 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3127 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3128 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3129 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3130 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3131 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3132 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3133 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3134 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3135
3136 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3137
3138 switch (get_options & PCRE_NEWLINE_BITS)
3139 {
3140 case PCRE_NEWLINE_CR:
3141 fprintf(outfile, "Forced newline sequence: CR\n");
3142 break;
3143
3144 case PCRE_NEWLINE_LF:
3145 fprintf(outfile, "Forced newline sequence: LF\n");
3146 break;
3147
3148 case PCRE_NEWLINE_CRLF:
3149 fprintf(outfile, "Forced newline sequence: CRLF\n");
3150 break;
3151
3152 case PCRE_NEWLINE_ANYCRLF:
3153 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3154 break;
3155
3156 case PCRE_NEWLINE_ANY:
3157 fprintf(outfile, "Forced newline sequence: ANY\n");
3158 break;
3159
3160 default:
3161 break;
3162 }
3163
3164 if (first_char == -1)
3165 {
3166 fprintf(outfile, "First char at start or follows newline\n");
3167 }
3168 else if (first_char < 0)
3169 {
3170 fprintf(outfile, "No first char\n");
3171 }
3172 else
3173 {
3174 const char *caseless =
3175 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3176 "" : " (caseless)";
3177
3178 if (PRINTOK(first_char))
3179 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3180 else
3181 {
3182 fprintf(outfile, "First char = ");
3183 pchar(first_char, outfile);
3184 fprintf(outfile, "%s\n", caseless);
3185 }
3186 }
3187
3188 if (need_char < 0)
3189 {
3190 fprintf(outfile, "No need char\n");
3191 }
3192 else
3193 {
3194 const char *caseless =
3195 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3196 "" : " (caseless)";
3197
3198 if (PRINTOK(need_char))
3199 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3200 else
3201 {
3202 fprintf(outfile, "Need char = ");
3203 pchar(need_char, outfile);
3204 fprintf(outfile, "%s\n", caseless);
3205 }
3206 }
3207
3208 /* Don't output study size; at present it is in any case a fixed
3209 value, but it varies, depending on the computer architecture, and
3210 so messes up the test suite. (And with the /F option, it might be
3211 flipped.) If study was forced by an external -s, don't show this
3212 information unless -i or -d was also present. This means that, except
3213 when auto-callouts are involved, the output from runs with and without
3214 -s should be identical. */
3215
3216 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3217 {
3218 if (extra == NULL)
3219 fprintf(outfile, "Study returned NULL\n");
3220 else
3221 {
3222 pcre_uint8 *start_bits = NULL;
3223 int minlength;
3224
3225 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3226 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3227
3228 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3229 {
3230 if (start_bits == NULL)
3231 fprintf(outfile, "No set of starting bytes\n");
3232 else
3233 {
3234 int i;
3235 int c = 24;
3236 fprintf(outfile, "Starting byte set: ");
3237 for (i = 0; i < 256; i++)
3238 {
3239 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3240 {
3241 if (c > 75)
3242 {
3243 fprintf(outfile, "\n ");
3244 c = 2;
3245 }
3246 if (PRINTOK(i) && i != ' ')
3247 {
3248 fprintf(outfile, "%c ", i);
3249 c += 2;
3250 }
3251 else
3252 {
3253 fprintf(outfile, "\\x%02x ", i);
3254 c += 5;
3255 }
3256 }
3257 }
3258 fprintf(outfile, "\n");
3259 }
3260 }
3261 }
3262
3263 /* Show this only if the JIT was set by /S, not by -s. */
3264
3265 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3266 {
3267 int jit;
3268 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3269 {
3270 if (jit)
3271 fprintf(outfile, "JIT study was successful\n");
3272 else
3273 #ifdef SUPPORT_JIT
3274 fprintf(outfile, "JIT study was not successful\n");
3275 #else
3276 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3277 #endif
3278 }
3279 }
3280 }
3281 }
3282
3283 /* If the '>' option was present, we write out the regex to a file, and
3284 that is all. The first 8 bytes of the file are the regex length and then
3285 the study length, in big-endian order. */
3286
3287 if (to_file != NULL)
3288 {
3289 FILE *f = fopen((char *)to_file, "wb");
3290 if (f == NULL)
3291 {
3292 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3293 }
3294 else
3295 {
3296 pcre_uint8 sbuf[8];
3297
3298 if (do_flip) regexflip(re, extra);
3299 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3300 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3301 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3302 sbuf[3] = (pcre_uint8)((true_size) & 255);
3303 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3304 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3305 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3306 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3307
3308 if (fwrite(sbuf, 1, 8, f) < 8 ||
3309 fwrite(re, 1, true_size, f) < true_size)
3310 {
3311 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3312 }
3313 else
3314 {
3315 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3316
3317 /* If there is study data, write it. */
3318
3319 if (extra != NULL)
3320 {
3321 if (fwrite(extra->study_data, 1, true_study_size, f) <
3322 true_study_size)
3323 {
3324 fprintf(outfile, "Write error on %s: %s\n", to_file,
3325 strerror(errno));
3326 }
3327 else fprintf(outfile, "Study data written to %s\n", to_file);
3328 }
3329 }
3330 fclose(f);
3331 }
3332
3333 new_free(re);
3334 if (extra != NULL)
3335 {
3336 PCRE_FREE_STUDY(extra);
3337 }
3338 if (locale_set)
3339 {
3340 new_free((void *)tables);
3341 setlocale(LC_CTYPE, "C");
3342 locale_set = 0;
3343 }
3344 continue; /* With next regex */
3345 }
3346 } /* End of non-POSIX compile */
3347
3348 /* Read data lines and test them */
3349
3350 for (;;)
3351 {
3352 pcre_uint8 *q;
3353 pcre_uint8 *bptr;
3354 int *use_offsets = offsets;
3355 int use_size_offsets = size_offsets;
3356 int callout_data = 0;
3357 int callout_data_set = 0;
3358 int count, c;
3359 int copystrings = 0;
3360 int find_match_limit = default_find_match_limit;
3361 int getstrings = 0;
3362 int getlist = 0;
3363 int gmatched = 0;
3364 int start_offset = 0;
3365 int start_offset_sign = 1;
3366 int g_notempty = 0;
3367 int use_dfa = 0;
3368
3369 *copynames = 0;
3370 *getnames = 0;
3371
3372 cn16ptr = copynames;
3373 gn16ptr = getnames;
3374 cn8ptr = copynames8;
3375 gn8ptr = getnames8;
3376
3377 SET_PCRE_CALLOUT(callout);
3378 first_callout = 1;
3379 last_callout_mark = NULL;
3380 callout_extra = 0;
3381 callout_count = 0;
3382 callout_fail_count = 999999;
3383 callout_fail_id = -1;
3384 show_malloc = 0;
3385 options = 0;
3386
3387 if (extra != NULL) extra->flags &=
3388 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3389
3390 len = 0;
3391 for (;;)
3392 {
3393 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3394 {
3395 if (len > 0) /* Reached EOF without hitting a newline */
3396 {
3397 fprintf(outfile, "\n");
3398 break;
3399 }
3400 done = 1;
3401 goto CONTINUE;
3402 }
3403 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3404 len = (int)strlen((char *)buffer);
3405 if (buffer[len-1] == '\n') break;
3406 }
3407
3408 while (len > 0 && isspace(buffer[len-1])) len--;
3409 buffer[len] = 0;
3410 if (len == 0) break;
3411
3412 p = buffer;
3413 while (isspace(*p)) p++;
3414
3415 bptr = q = dbuffer;
3416 while ((c = *p++) != 0)
3417 {
3418 int i = 0;
3419 int n = 0;
3420
3421 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3422 In non-UTF mode, allow the value of the byte to fall through to later,
3423 where values greater than 127 are turned into UTF-8 when running in
3424 16-bit mode. */
3425
3426 if (c != '\\')
3427 {
3428 if (use_utf)
3429 {
3430 *q++ = c;
3431 continue;
3432 }
3433 }
3434
3435 /* Handle backslash escapes */
3436
3437 else switch ((c = *p++))
3438 {
3439 case 'a': c = 7; break;
3440 case 'b': c = '\b'; break;
3441 case 'e': c = 27; break;
3442 case 'f': c = '\f'; break;
3443 case 'n': c = '\n'; break;
3444 case 'r': c = '\r'; break;
3445 case 't': c = '\t'; break;
3446 case 'v': c = '\v'; break;
3447
3448 case '0': case '1': case '2': case '3':
3449 case '4': case '5': case '6': case '7':
3450 c -= '0';
3451 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3452 c = c * 8 + *p++ - '0';
3453 break;
3454
3455 case 'x':
3456 if (*p == '{')
3457 {
3458 pcre_uint8 *pt = p;
3459 c = 0;
3460
3461 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3462 when isxdigit() is a macro that refers to its argument more than
3463 once. This is banned by the C Standard, but apparently happens in at
3464 least one MacOS environment. */
3465
3466 for (pt++; isxdigit(*pt); pt++)
3467 {
3468 if (++i == 9)
3469 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3470 "using only the first eight.\n");
3471 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3472 }
3473 if (*pt == '}')
3474 {
3475 p = pt + 1;
3476 break;
3477 }
3478 /* Not correct form for \x{...}; fall through */
3479 }
3480
3481 /* \x without {} always defines just one byte in 8-bit mode. This
3482 allows UTF-8 characters to be constructed byte by byte, and also allows
3483 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3484 Otherwise, pass it down to later code so that it can be turned into
3485 UTF-8 when running in 16-bit mode. */
3486
3487 c = 0;
3488 while (i++ < 2 && isxdigit(*p))
3489 {
3490 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3491 p++;
3492 }
3493 if (use_utf)
3494 {
3495 *q++ = c;
3496 continue;
3497 }
3498 break;
3499
3500 case 0: /* \ followed by EOF allows for an empty line */
3501 p--;
3502 continue;
3503
3504 case '>':
3505 if (*p == '-')
3506 {
3507 start_offset_sign = -1;
3508 p++;
3509 }
3510 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3511 start_offset *= start_offset_sign;
3512 continue;
3513
3514 case 'A': /* Option setting */
3515 options |= PCRE_ANCHORED;
3516 continue;
3517
3518 case 'B':
3519 options |= PCRE_NOTBOL;
3520 continue;
3521
3522 case 'C':
3523 if (isdigit(*p)) /* Set copy string */
3524 {
3525 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3526 copystrings |= 1 << n;
3527 }
3528 else if (isalnum(*p))
3529 {
3530 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3531 }
3532 else if (*p == '+')
3533 {
3534 callout_extra = 1;
3535 p++;
3536 }
3537 else if (*p == '-')
3538 {
3539 SET_PCRE_CALLOUT(NULL);
3540 p++;
3541 }
3542 else if (*p == '!')
3543 {
3544 callout_fail_id = 0;
3545 p++;
3546 while(isdigit(*p))
3547 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3548 callout_fail_count = 0;
3549 if (*p == '!')
3550 {
3551 p++;
3552 while(isdigit(*p))
3553 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3554 }
3555 }
3556 else if (*p == '*')
3557 {
3558 int sign = 1;
3559 callout_data = 0;
3560 if (*(++p) == '-') { sign = -1; p++; }
3561 while(isdigit(*p))
3562 callout_data = callout_data * 10 + *p++ - '0';
3563 callout_data *= sign;
3564 callout_data_set = 1;
3565 }
3566 continue;
3567
3568 #if !defined NODFA
3569 case 'D':
3570 #if !defined NOPOSIX
3571 if (posix || do_posix)
3572 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3573 else
3574 #endif
3575 use_dfa = 1;
3576 continue;
3577 #endif
3578
3579 #if !defined NODFA
3580 case 'F':
3581 options |= PCRE_DFA_SHORTEST;
3582 continue;
3583 #endif
3584
3585 case 'G':
3586 if (isdigit(*p))
3587 {
3588 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3589 getstrings |= 1 << n;
3590 }
3591 else if (isalnum(*p))
3592 {
3593 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3594 }
3595 continue;
3596
3597 case 'J':
3598 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3599 if (extra != NULL
3600 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3601 && extra->executable_jit != NULL)
3602 {
3603 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3604 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3605 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3606 }
3607 continue;
3608
3609 case 'L':
3610 getlist = 1;
3611 continue;
3612
3613 case 'M':
3614 find_match_limit = 1;
3615 continue;
3616
3617 case 'N':
3618 if ((options & PCRE_NOTEMPTY) != 0)
3619 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3620 else
3621 options |= PCRE_NOTEMPTY;
3622 continue;
3623
3624 case 'O':
3625 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3626 if (n > size_offsets_max)
3627 {
3628 size_offsets_max = n;
3629 free(offsets);
3630 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3631 if (offsets == NULL)
3632 {
3633 printf("** Failed to get %d bytes of memory for offsets vector\n",
3634 (int)(size_offsets_max * sizeof(int)));
3635 yield = 1;
3636 goto EXIT;
3637 }
3638 }
3639 use_size_offsets = n;
3640 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3641 continue;
3642
3643 case 'P':
3644 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3645 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3646 continue;
3647
3648 case 'Q':
3649 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3650 if (extra == NULL)
3651 {
3652 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3653 extra->flags = 0;
3654 }
3655 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3656 extra->match_limit_recursion = n;
3657 continue;
3658
3659 case 'q':
3660 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3661 if (extra == NULL)
3662 {
3663 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3664 extra->flags = 0;
3665 }
3666 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3667 extra->match_limit = n;
3668 continue;
3669
3670 #if !defined NODFA
3671 case 'R':
3672 options |= PCRE_DFA_RESTART;
3673 continue;
3674 #endif
3675
3676 case 'S':
3677 show_malloc = 1;
3678 continue;
3679
3680 case 'Y':
3681 options |= PCRE_NO_START_OPTIMIZE;
3682 continue;
3683
3684 case 'Z':
3685 options |= PCRE_NOTEOL;
3686 continue;
3687
3688 case '?':
3689 options |= PCRE_NO_UTF8_CHECK;
3690 continue;
3691
3692 case '<':
3693 {
3694 int x = check_newline(p, outfile);
3695 if (x == 0) goto NEXT_DATA;
3696 options |= x;
3697 while (*p++ != '>');
3698 }
3699 continue;
3700 }
3701
3702 /* We now have a character value in c that may be greater than 255. In
3703 16-bit mode, we always convert characters to UTF-8 so that values greater
3704 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3705 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3706 mode must have come from \x{...} or octal constructs because values from
3707 \x.. get this far only in non-UTF mode. */
3708
3709 #if !defined NOUTF || defined SUPPORT_PCRE16
3710 if (use_pcre16 || use_utf)
3711 {
3712 pcre_uint8 buff8[8];
3713 int ii, utn;
3714 utn = ord2utf8(c, buff8);
3715 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3716 }
3717 else
3718 #endif
3719 {
3720 if (c > 255)
3721 {
3722 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3723 "and UTF-8 mode is not enabled.\n", c);
3724 fprintf(outfile, "** Truncation will probably give the wrong "
3725 "result.\n");
3726 }
3727 *q++ = c;
3728 }
3729 }
3730
3731 /* Reached end of subject string */
3732
3733 *q = 0;
3734 len = (int)(q - dbuffer);
3735
3736 /* Move the data to the end of the buffer so that a read over the end of
3737 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3738 we are using the POSIX interface, we must include the terminating zero. */
3739
3740 #if !defined NOPOSIX
3741 if (posix || do_posix)
3742 {
3743 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3744 bptr += buffer_size - len - 1;
3745 }
3746 else
3747 #endif
3748 {
3749 memmove(bptr + buffer_size - len, bptr, len);
3750 bptr += buffer_size - len;
3751 }
3752
3753 if ((all_use_dfa || use_dfa) && find_match_limit)
3754 {
3755 printf("**Match limit not relevant for DFA matching: ignored\n");
3756 find_match_limit = 0;
3757 }
3758
3759 /* Handle matching via the POSIX interface, which does not
3760 support timing or playing with the match limit or callout data. */
3761
3762 #if !defined NOPOSIX
3763 if (posix || do_posix)
3764 {
3765 int rc;
3766 int eflags = 0;
3767 regmatch_t *pmatch = NULL;
3768 if (use_size_offsets > 0)
3769 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3770 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3771 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3772 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3773
3774 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3775
3776 if (rc != 0)
3777 {
3778 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3779 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3780 }
3781 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3782 != 0)
3783 {
3784 fprintf(outfile, "Matched with REG_NOSUB\n");
3785 }
3786 else
3787 {
3788 size_t i;
3789 for (i = 0; i < (size_t)use_size_offsets; i++)
3790 {
3791 if (pmatch[i].rm_so >= 0)
3792 {
3793 fprintf(outfile, "%2d: ", (int)i);
3794 PCHARSV(dbuffer, pmatch[i].rm_so,
3795 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3796 fprintf(outfile, "\n");
3797 if (do_showcaprest || (i == 0 && do_showrest))
3798 {
3799 fprintf(outfile, "%2d+ ", (int)i);
3800 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3801 outfile);
3802 fprintf(outfile, "\n");
3803 }
3804 }
3805 }
3806 }
3807 free(pmatch);
3808 goto NEXT_DATA;
3809 }
3810
3811 #endif /* !defined NOPOSIX */
3812
3813 /* Handle matching via the native interface - repeats for /g and /G */
3814
3815 #ifdef SUPPORT_PCRE16
3816 if (use_pcre16)
3817 {
3818 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3819 switch(len)
3820 {
3821 case -1:
3822 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3823 "converted to UTF-16\n");
3824 goto NEXT_DATA;
3825
3826 case -2:
3827 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3828 "cannot be converted to UTF-16\n");
3829 goto NEXT_DATA;
3830
3831 case -3:
3832 fprintf(outfile, "**Failed: character value greater than 0xffff "
3833 "cannot be converted to 16-bit in non-UTF mode\n");
3834 goto NEXT_DATA;
3835
3836 default:
3837 break;
3838 }
3839 bptr = (pcre_uint8 *)buffer16;
3840 }
3841 #endif
3842
3843 for (;; gmatched++) /* Loop for /g or /G */
3844 {
3845 markptr = NULL;
3846
3847 if (timeitm > 0)
3848 {
3849 register int i;
3850 clock_t time_taken;
3851 clock_t start_time = clock();
3852
3853 #if !defined NODFA
3854 if (all_use_dfa || use_dfa)
3855 {
3856 int workspace[1000];
3857 for (i = 0; i < timeitm; i++)
3858 {
3859 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3860 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3861 (sizeof(workspace)/sizeof(int)));
3862 }
3863 }
3864 else
3865 #endif
3866
3867 for (i = 0; i < timeitm; i++)
3868 {
3869 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3870 (options | g_notempty), use_offsets, use_size_offsets);
3871 }
3872 time_taken = clock() - start_time;
3873 fprintf(outfile, "Execute time %.4f milliseconds\n",
3874 (((double)time_taken * 1000.0) / (double)timeitm) /
3875 (double)CLOCKS_PER_SEC);
3876 }
3877
3878 /* If find_match_limit is set, we want to do repeated matches with
3879 varying limits in order to find the minimum value for the match limit and
3880 for the recursion limit. The match limits are relevant only to the normal
3881 running of pcre_exec(), so disable the JIT optimization. This makes it
3882 possible to run the same set of tests with and without JIT externally
3883 requested. */
3884
3885 if (find_match_limit)
3886 {
3887 if (extra == NULL)
3888 {
3889 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3890 extra->flags = 0;
3891 }
3892 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3893
3894 (void)check_match_limit(re, extra, bptr, len, start_offset,
3895 options|g_notempty, use_offsets, use_size_offsets,
3896 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3897 PCRE_ERROR_MATCHLIMIT, "match()");
3898
3899 count = check_match_limit(re, extra, bptr, len, start_offset,
3900 options|g_notempty, use_offsets, use_size_offsets,
3901 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3902 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3903 }
3904
3905 /* If callout_data is set, use the interface with additional data */
3906
3907 else if (callout_data_set)
3908 {
3909 if (extra == NULL)
3910 {
3911 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3912 extra->flags = 0;
3913 }
3914 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3915 extra->callout_data = &callout_data;
3916 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3917 options | g_notempty, use_offsets, use_size_offsets);
3918 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3919 }
3920
3921 /* The normal case is just to do the match once, with the default
3922 value of match_limit. */
3923
3924 #if !defined NODFA
3925 else if (all_use_dfa || use_dfa)
3926 {
3927 int workspace[1000];
3928 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3929 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3930 (sizeof(workspace)/sizeof(int)));
3931 if (count == 0)
3932 {
3933 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3934 count = use_size_offsets/2;
3935 }
3936 }
3937 #endif
3938
3939 else
3940 {
3941 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3942 options | g_notempty, use_offsets, use_size_offsets);
3943 if (count == 0)
3944 {
3945 fprintf(outfile, "Matched, but too many substrings\n");
3946 count = use_size_offsets/3;
3947 }
3948 }
3949
3950 /* Matched */
3951
3952 if (count >= 0)
3953 {
3954 int i, maxcount;
3955 void *cnptr, *gnptr;
3956
3957 #if !defined NODFA
3958 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3959 #endif
3960 maxcount = use_size_offsets/3;
3961
3962 /* This is a check against a lunatic return value. */
3963
3964 if (count > maxcount)
3965 {
3966 fprintf(outfile,
3967 "** PCRE error: returned count %d is too big for offset size %d\n",
3968 count, use_size_offsets);
3969 count = use_size_offsets/3;
3970 if (do_g || do_G)
3971 {
3972 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3973 do_g = do_G = FALSE; /* Break g/G loop */
3974 }
3975 }
3976
3977 /* do_allcaps requests showing of all captures in the pattern, to check
3978 unset ones at the end. */
3979
3980 if (do_allcaps)
3981 {
3982 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3983 goto SKIP_DATA;
3984 count++; /* Allow for full match */
3985 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3986 }
3987
3988 /* Output the captured substrings */
3989
3990 for (i = 0; i < count * 2; i += 2)
3991 {
3992 if (use_offsets[i] < 0)
3993 {
3994 if (use_offsets[i] != -1)
3995 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3996 use_offsets[i], i);
3997 if (use_offsets[i+1] != -1)
3998 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3999 use_offsets[i+1], i+1);
4000 fprintf(outfile, "%2d: <unset>\n", i/2);
4001 }
4002 else
4003 {
4004 fprintf(outfile, "%2d: ", i/2);
4005 PCHARSV(bptr, use_offsets[i],
4006 use_offsets[i+1] - use_offsets[i], outfile);
4007 fprintf(outfile, "\n");
4008 if (do_showcaprest || (i == 0 && do_showrest))
4009 {
4010 fprintf(outfile, "%2d+ ", i/2);
4011 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4012 outfile);
4013 fprintf(outfile, "\n");
4014 }
4015 }
4016 }
4017
4018 if (markptr != NULL)
4019 {
4020 fprintf(outfile, "MK: ");
4021 PCHARSV(markptr, 0, -1, outfile);
4022 fprintf(outfile, "\n");
4023 }
4024
4025 for (i = 0; i < 32; i++)
4026 {
4027 if ((copystrings & (1 << i)) != 0)
4028 {
4029 int rc;
4030 char copybuffer[256];
4031 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4032 copybuffer, sizeof(copybuffer));
4033 if (rc < 0)
4034 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4035 else
4036 {
4037 fprintf(outfile, "%2dC ", i);
4038 PCHARSV(copybuffer, 0, rc, outfile);
4039 fprintf(outfile, " (%d)\n", rc);
4040 }
4041 }
4042 }
4043
4044 cnptr = copynames;
4045 for (;;)
4046 {
4047 int rc;
4048 char copybuffer[256];
4049
4050 if (use_pcre16)
4051 {
4052 if (*(pcre_uint16 *)cnptr == 0) break;
4053 }
4054 else
4055 {
4056 if (*(pcre_uint8 *)cnptr == 0) break;
4057 }
4058
4059 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4060 cnptr, copybuffer, sizeof(copybuffer));
4061
4062 if (rc < 0)
4063 {
4064 fprintf(outfile, "copy substring ");
4065 PCHARSV(cnptr, 0, -1, outfile);
4066 fprintf(outfile, " failed %d\n", rc);
4067 }
4068 else
4069 {
4070 fprintf(outfile, " C ");
4071 PCHARSV(copybuffer, 0, rc, outfile);
4072 fprintf(outfile, " (%d) ", rc);
4073 PCHARSV(cnptr, 0, -1, outfile);
4074 putc('\n', outfile);
4075 }
4076
4077 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4078 }
4079
4080 for (i = 0; i < 32; i++)
4081 {
4082 if ((getstrings & (1 << i)) != 0)
4083 {
4084 int rc;
4085 const char *substring;
4086 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4087 if (rc < 0)
4088 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4089 else
4090 {
4091 fprintf(outfile, "%2dG ", i);
4092 PCHARSV(substring, 0, rc, outfile);
4093 fprintf(outfile, " (%d)\n", rc);
4094 PCRE_FREE_SUBSTRING(substring);
4095 }
4096 }
4097 }
4098
4099 gnptr = getnames;
4100 for (;;)
4101 {
4102 int rc;
4103 const char *substring;
4104
4105 if (use_pcre16)
4106 {
4107 if (*(pcre_uint16 *)gnptr == 0) break;
4108 }
4109 else
4110 {
4111 if (*(pcre_uint8 *)gnptr == 0) break;
4112 }
4113
4114 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4115 gnptr, &substring);
4116 if (rc < 0)
4117 {
4118 fprintf(outfile, "get substring ");
4119 PCHARSV(gnptr, 0, -1, outfile);
4120 fprintf(outfile, " failed %d\n", rc);
4121 }
4122 else
4123 {
4124 fprintf(outfile, " G ");
4125 PCHARSV(substring, 0, rc, outfile);
4126 fprintf(outfile, " (%d) ", rc);
4127 PCHARSV(gnptr, 0, -1, outfile);
4128 PCRE_FREE_SUBSTRING(substring);
4129 putc('\n', outfile);
4130 }
4131
4132 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4133 }
4134
4135 if (getlist)
4136 {
4137 int rc;
4138 const char **stringlist;
4139 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4140 if (rc < 0)
4141 fprintf(outfile, "get substring list failed %d\n", rc);
4142 else
4143 {
4144 for (i = 0; i < count; i++)
4145 {
4146 fprintf(outfile, "%2dL ", i);
4147 PCHARSV(stringlist[i], 0, -1, outfile);
4148 putc('\n', outfile);
4149 }
4150 if (stringlist[i] != NULL)
4151 fprintf(outfile, "string list not terminated by NULL\n");
4152 PCRE_FREE_SUBSTRING_LIST(stringlist);
4153 }
4154 }
4155 }
4156
4157 /* There was a partial match */
4158
4159 else if (count == PCRE_ERROR_PARTIAL)
4160 {
4161 if (markptr == NULL) fprintf(outfile, "Partial match");
4162 else
4163 {
4164 fprintf(outfile, "Partial match, mark=");
4165 PCHARSV(markptr, 0, -1, outfile);
4166 }
4167 if (use_size_offsets > 1)
4168 {
4169 fprintf(outfile, ": ");
4170 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4171 outfile);
4172 }
4173 fprintf(outfile, "\n");
4174 break; /* Out of the /g loop */
4175 }
4176
4177 /* Failed to match. If this is a /g or /G loop and we previously set
4178 g_notempty after a null match, this is not necessarily the end. We want
4179 to advance the start offset, and continue. We won't be at the end of the
4180 string - that was checked before setting g_notempty.
4181
4182 Complication arises in the case when the newline convention is "any",
4183 "crlf", or "anycrlf". If the previous match was at the end of a line
4184 terminated by CRLF, an advance of one character just passes the \r,
4185 whereas we should prefer the longer newline sequence, as does the code in
4186 pcre_exec(). Fudge the offset value to achieve this. We check for a
4187 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4188 find the default.
4189
4190 Otherwise, in the case of UTF-8 matching, the advance must be one
4191 character, not one byte. */
4192
4193 else
4194 {
4195 if (g_notempty != 0)
4196 {
4197 int onechar = 1;
4198 unsigned int obits = ((REAL_PCRE *)re)->options;
4199 use_offsets[0] = start_offset;
4200 if ((obits & PCRE_NEWLINE_BITS) == 0)
4201 {
4202 int d;
4203 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4204 /* Note that these values are always the ASCII ones, even in
4205 EBCDIC environments. CR = 13, NL = 10. */
4206 obits = (d == 13)? PCRE_NEWLINE_CR :
4207 (d == 10)? PCRE_NEWLINE_LF :
4208 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4209 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4210 (d == -1)? PCRE_NEWLINE_ANY : 0;
4211 }
4212 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4213 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4214 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4215 &&
4216 start_offset < len - 1 &&
4217 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4218 (use_pcre16?
4219 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4220 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4221 :
4222 bptr[start_offset] == '\r'
4223 && bptr[start_offset + 1] == '\n')
4224 #elif defined SUPPORT_PCRE16
4225 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4226 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4227 #else
4228 bptr[start_offset] == '\r'
4229 && bptr[start_offset + 1] == '\n'
4230 #endif
4231 )
4232 onechar++;
4233 else if (use_utf)
4234 {
4235 while (start_offset + onechar < len)
4236 {
4237 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4238 onechar++;
4239 }
4240 }
4241 use_offsets[1] = start_offset + onechar;
4242 }
4243 else
4244 {
4245 switch(count)
4246 {
4247 case PCRE_ERROR_NOMATCH:
4248 if (gmatched == 0)
4249 {
4250 if (markptr == NULL)
4251 {
4252 fprintf(outfile, "No match\n");
4253 }
4254 else
4255 {
4256 fprintf(outfile, "No match, mark = ");
4257 PCHARSV(markptr, 0, -1, outfile);
4258 putc('\n', outfile);
4259 }
4260 }
4261 break;
4262
4263 case PCRE_ERROR_BADUTF8:
4264 case PCRE_ERROR_SHORTUTF8:
4265 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4266 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4267 use_pcre16? "16" : "8");
4268 if (use_size_offsets >= 2)
4269 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4270 use_offsets[1]);
4271 fprintf(outfile, "\n");
4272 break;
4273
4274 case PCRE_ERROR_BADUTF8_OFFSET:
4275 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4276 use_pcre16? "16" : "8");
4277 break;
4278
4279 default:
4280 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4281 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4282 else
4283 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4284 break;
4285 }
4286
4287 break; /* Out of the /g loop */
4288 }
4289 }
4290
4291 /* If not /g or /G we are done */
4292
4293 if (!do_g && !do_G) break;
4294
4295 /* If we have matched an empty string, first check to see if we are at
4296 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4297 Perl's /g options does. This turns out to be rather cunning. First we set
4298 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4299 same point. If this fails (picked up above) we advance to the next
4300 character. */
4301
4302 g_notempty = 0;
4303
4304 if (use_offsets[0] == use_offsets[1])
4305 {
4306 if (use_offsets[0] == len) break;
4307 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4308 }
4309
4310 /* For /g, update the start offset, leaving the rest alone */
4311
4312 if (do_g) start_offset = use_offsets[1];
4313
4314 /* For /G, update the pointer and length */
4315
4316 else
4317 {
4318 bptr += use_offsets[1] * CHAR_SIZE;
4319 len -= use_offsets[1];
4320 }
4321 } /* End of loop for /g and /G */
4322
4323 NEXT_DATA: continue;
4324 } /* End of loop for data lines */
4325
4326 CONTINUE:
4327
4328 #if !defined NOPOSIX
4329 if (posix || do_posix) regfree(&preg);
4330 #endif
4331
4332 if (re != NULL) new_free(re);
4333 if (extra != NULL)
4334 {
4335 PCRE_FREE_STUDY(extra);
4336 }
4337 if (locale_set)
4338 {
4339 new_free((void *)tables);
4340 setlocale(LC_CTYPE, "C");
4341 locale_set = 0;
4342 }
4343 if (jit_stack != NULL)
4344 {
4345 PCRE_JIT_STACK_FREE(jit_stack);
4346 jit_stack = NULL;
4347 }
4348 }
4349
4350 if (infile == stdin) fprintf(outfile, "\n");
4351
4352 EXIT:
4353
4354 if (infile != NULL && infile != stdin) fclose(infile);
4355 if (outfile != NULL && outfile != stdout) fclose(outfile);
4356
4357 free(buffer);
4358 free(dbuffer);
4359 free(pbuffer);
4360 free(offsets);
4361
4362 #ifdef SUPPORT_PCRE16
4363 if (buffer16 != NULL) free(buffer16);
4364 #endif
4365
4366 return yield;
4367 }
4368
4369 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12