/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 895 - (show annotations) (download)
Fri Jan 20 12:12:03 2012 UTC (2 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 130528 byte(s)
Modified, and made non-default the stack size info experimental code.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define STRLEN8(p) ((int)strlen((char *)p))
213
214 #define SET_PCRE_CALLOUT8(callout) \
215 pcre_callout = callout
216
217 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218 pcre_assign_jit_stack(extra, callback, userdata)
219
220 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221 re = pcre_compile((char *)pat, options, error, erroffset, tables)
222
223 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224 namesptr, cbuffer, size) \
225 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226 (char *)namesptr, cbuffer, size)
227
228 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230
231 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace) \
233 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234 offsets, size_offsets, workspace, size_workspace)
235
236 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237 offsets, size_offsets) \
238 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239 offsets, size_offsets)
240
241 #define PCRE_FREE_STUDY8(extra) \
242 pcre_free_study(extra)
243
244 #define PCRE_FREE_SUBSTRING8(substring) \
245 pcre_free_substring(substring)
246
247 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248 pcre_free_substring_list(listptr)
249
250 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 getnamesptr, subsptr) \
252 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)getnamesptr, subsptr)
254
255 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256 n = pcre_get_stringnumber(re, (char *)ptr)
257
258 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260
261 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263
264 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266
267 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268 pcre_printint(re, outfile, debug_lengths)
269
270 #define PCRE_STUDY8(extra, re, options, error) \
271 extra = pcre_study(re, options, error)
272
273 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274 pcre_jit_stack_alloc(startsize, maxsize)
275
276 #define PCRE_JIT_STACK_FREE8(stack) \
277 pcre_jit_stack_free(stack)
278
279 #endif /* SUPPORT_PCRE8 */
280
281 /* -----------------------------------------------------------*/
282
283 #ifdef SUPPORT_PCRE16
284
285 #define PCHARS16(lv, p, offset, len, f) \
286 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287
288 #define PCHARSV16(p, offset, len, f) \
289 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290
291 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292 p = read_capture_name16(p, cn16, re)
293
294 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295
296 #define SET_PCRE_CALLOUT16(callout) \
297 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298
299 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300 pcre16_assign_jit_stack((pcre16_extra *)extra, \
301 (pcre16_jit_callback)callback, userdata)
302
303 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305 tables)
306
307 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308 namesptr, cbuffer, size) \
309 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311
312 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 (PCRE_UCHAR16 *)cbuffer, size/2)
315
316 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317 offsets, size_offsets, workspace, size_workspace) \
318 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320 workspace, size_workspace)
321
322 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323 offsets, size_offsets) \
324 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325 len, start_offset, options, offsets, size_offsets)
326
327 #define PCRE_FREE_STUDY16(extra) \
328 pcre16_free_study((pcre16_extra *)extra)
329
330 #define PCRE_FREE_SUBSTRING16(substring) \
331 pcre16_free_substring((PCRE_SPTR16)substring)
332
333 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335
336 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337 getnamesptr, subsptr) \
338 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340
341 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343
344 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346 (PCRE_SPTR16 *)(void*)subsptr)
347
348 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350 (PCRE_SPTR16 **)(void*)listptr)
351
352 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354 tables)
355
356 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357 pcre16_printint(re, outfile, debug_lengths)
358
359 #define PCRE_STUDY16(extra, re, options, error) \
360 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361
362 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364
365 #define PCRE_JIT_STACK_FREE16(stack) \
366 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367
368 #endif /* SUPPORT_PCRE16 */
369
370
371 /* ----- Both modes are supported; a runtime test is needed, except for
372 pcre_config(), and the JIT stack functions, when it doesn't matter which
373 version is called. ----- */
374
375 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376
377 #define CHAR_SIZE (use_pcre16? 2:1)
378
379 #define PCHARS(lv, p, offset, len, f) \
380 if (use_pcre16) \
381 PCHARS16(lv, p, offset, len, f); \
382 else \
383 PCHARS8(lv, p, offset, len, f)
384
385 #define PCHARSV(p, offset, len, f) \
386 if (use_pcre16) \
387 PCHARSV16(p, offset, len, f); \
388 else \
389 PCHARSV8(p, offset, len, f)
390
391 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392 if (use_pcre16) \
393 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394 else \
395 READ_CAPTURE_NAME8(p, cn8, cn16, re)
396
397 #define SET_PCRE_CALLOUT(callout) \
398 if (use_pcre16) \
399 SET_PCRE_CALLOUT16(callout); \
400 else \
401 SET_PCRE_CALLOUT8(callout)
402
403 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404
405 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406 if (use_pcre16) \
407 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408 else \
409 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410
411 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412 if (use_pcre16) \
413 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414 else \
415 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416
417 #define PCRE_CONFIG pcre_config
418
419 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420 namesptr, cbuffer, size) \
421 if (use_pcre16) \
422 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423 namesptr, cbuffer, size); \
424 else \
425 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426 namesptr, cbuffer, size)
427
428 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429 if (use_pcre16) \
430 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431 else \
432 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433
434 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435 offsets, size_offsets, workspace, size_workspace) \
436 if (use_pcre16) \
437 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438 offsets, size_offsets, workspace, size_workspace); \
439 else \
440 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441 offsets, size_offsets, workspace, size_workspace)
442
443 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444 offsets, size_offsets) \
445 if (use_pcre16) \
446 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447 offsets, size_offsets); \
448 else \
449 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets)
451
452 #define PCRE_FREE_STUDY(extra) \
453 if (use_pcre16) \
454 PCRE_FREE_STUDY16(extra); \
455 else \
456 PCRE_FREE_STUDY8(extra)
457
458 #define PCRE_FREE_SUBSTRING(substring) \
459 if (use_pcre16) \
460 PCRE_FREE_SUBSTRING16(substring); \
461 else \
462 PCRE_FREE_SUBSTRING8(substring)
463
464 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465 if (use_pcre16) \
466 PCRE_FREE_SUBSTRING_LIST16(listptr); \
467 else \
468 PCRE_FREE_SUBSTRING_LIST8(listptr)
469
470 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471 getnamesptr, subsptr) \
472 if (use_pcre16) \
473 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474 getnamesptr, subsptr); \
475 else \
476 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477 getnamesptr, subsptr)
478
479 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480 if (use_pcre16) \
481 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482 else \
483 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484
485 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486 if (use_pcre16) \
487 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488 else \
489 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490
491 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492 if (use_pcre16) \
493 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494 else \
495 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496
497 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498 (use_pcre16 ? \
499 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501
502 #define PCRE_JIT_STACK_FREE(stack) \
503 if (use_pcre16) \
504 PCRE_JIT_STACK_FREE16(stack); \
505 else \
506 PCRE_JIT_STACK_FREE8(stack)
507
508 #define PCRE_MAKETABLES \
509 (use_pcre16? pcre16_maketables() : pcre_maketables())
510
511 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 if (use_pcre16) \
513 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 else \
515 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516
517 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518 if (use_pcre16) \
519 PCRE_PRINTINT16(re, outfile, debug_lengths); \
520 else \
521 PCRE_PRINTINT8(re, outfile, debug_lengths)
522
523 #define PCRE_STUDY(extra, re, options, error) \
524 if (use_pcre16) \
525 PCRE_STUDY16(extra, re, options, error); \
526 else \
527 PCRE_STUDY8(extra, re, options, error)
528
529 /* ----- Only 8-bit mode is supported ----- */
530
531 #elif defined SUPPORT_PCRE8
532 #define CHAR_SIZE 1
533 #define PCHARS PCHARS8
534 #define PCHARSV PCHARSV8
535 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537 #define STRLEN STRLEN8
538 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 #define PCRE_COMPILE PCRE_COMPILE8
540 #define PCRE_CONFIG pcre_config
541 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544 #define PCRE_EXEC PCRE_EXEC8
545 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 #define PCRE_MAKETABLES pcre_maketables()
555 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556 #define PCRE_PRINTINT PCRE_PRINTINT8
557 #define PCRE_STUDY PCRE_STUDY8
558
559 /* ----- Only 16-bit mode is supported ----- */
560
561 #else
562 #define CHAR_SIZE 2
563 #define PCHARS PCHARS16
564 #define PCHARSV PCHARSV16
565 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567 #define STRLEN STRLEN16
568 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 #define PCRE_COMPILE PCRE_COMPILE16
570 #define PCRE_CONFIG pcre16_config
571 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574 #define PCRE_EXEC PCRE_EXEC16
575 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 #define PCRE_MAKETABLES pcre16_maketables()
585 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586 #define PCRE_PRINTINT PCRE_PRINTINT16
587 #define PCRE_STUDY PCRE_STUDY16
588 #endif
589
590 /* ----- End of mode-specific function call macros ----- */
591
592
593 /* Other parameters */
594
595 #ifndef CLOCKS_PER_SEC
596 #ifdef CLK_TCK
597 #define CLOCKS_PER_SEC CLK_TCK
598 #else
599 #define CLOCKS_PER_SEC 100
600 #endif
601 #endif
602
603 /* This is the default loop count for timing. */
604
605 #define LOOPREPEAT 500000
606
607 /* Static variables */
608
609 static FILE *outfile;
610 static int log_store = 0;
611 static int callout_count;
612 static int callout_extra;
613 static int callout_fail_count;
614 static int callout_fail_id;
615 static int debug_lengths;
616 static int first_callout;
617 static int locale_set = 0;
618 static int show_malloc;
619 static int use_utf;
620 static size_t gotten_store;
621 static size_t first_gotten_store = 0;
622 static const unsigned char *last_callout_mark = NULL;
623
624 /* The buffers grow automatically if very long input lines are encountered. */
625
626 static int buffer_size = 50000;
627 static pcre_uint8 *buffer = NULL;
628 static pcre_uint8 *dbuffer = NULL;
629 static pcre_uint8 *pbuffer = NULL;
630
631 /* Another buffer is needed translation to 16-bit character strings. It will
632 obtained and extended as required. */
633
634 #ifdef SUPPORT_PCRE16
635 static int buffer16_size = 0;
636 static pcre_uint16 *buffer16 = NULL;
637
638 #ifdef SUPPORT_PCRE8
639
640 /* We need the table of operator lengths that is used for 16-bit compiling, in
641 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643 appropriately for the 16-bit world. Just as a safety check, make sure that
644 COMPILE_PCRE16 is *not* set. */
645
646 #ifdef COMPILE_PCRE16
647 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648 #endif
649
650 #if LINK_SIZE == 2
651 #undef LINK_SIZE
652 #define LINK_SIZE 1
653 #elif LINK_SIZE == 3 || LINK_SIZE == 4
654 #undef LINK_SIZE
655 #define LINK_SIZE 2
656 #else
657 #error LINK_SIZE must be either 2, 3, or 4
658 #endif
659
660 #undef IMM2_SIZE
661 #define IMM2_SIZE 1
662
663 #endif /* SUPPORT_PCRE8 */
664
665 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666 #endif /* SUPPORT_PCRE16 */
667
668 /* If we have 8-bit support, default use_pcre16 to false; if there is also
669 16-bit support, it can be changed by an option. If there is no 8-bit support,
670 there must be 16-bit support, so default it to 1. */
671
672 #ifdef SUPPORT_PCRE8
673 static int use_pcre16 = 0;
674 #else
675 static int use_pcre16 = 1;
676 #endif
677
678 /* Textual explanations for runtime error codes */
679
680 static const char *errtexts[] = {
681 NULL, /* 0 is no error */
682 NULL, /* NOMATCH is handled specially */
683 "NULL argument passed",
684 "bad option value",
685 "magic number missing",
686 "unknown opcode - pattern overwritten?",
687 "no more memory",
688 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 "match limit exceeded",
690 "callout error code",
691 NULL, /* BADUTF8/16 is handled specially */
692 NULL, /* BADUTF8/16 offset is handled specially */
693 NULL, /* PARTIAL is handled specially */
694 "not used - internal error",
695 "internal error - pattern overwritten?",
696 "bad count value",
697 "item unsupported for DFA matching",
698 "backreference condition or recursion test not supported for DFA matching",
699 "match limit not supported for DFA matching",
700 "workspace size exceeded in DFA matching",
701 "too much recursion for DFA matching",
702 "recursion limit exceeded",
703 "not used - internal error",
704 "invalid combination of newline options",
705 "bad offset value",
706 NULL, /* SHORTUTF8/16 is handled specially */
707 "nested recursion at the same subject position",
708 "JIT stack limit reached",
709 "pattern compiled in wrong mode: 8-bit/16-bit error"
710 };
711
712
713 /*************************************************
714 * Alternate character tables *
715 *************************************************/
716
717 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718 using the default tables of the library. However, the T option can be used to
719 select alternate sets of tables, for different kinds of testing. Note also that
720 the L (locale) option also adjusts the tables. */
721
722 /* This is the set of tables distributed as default with PCRE. It recognizes
723 only ASCII characters. */
724
725 static const pcre_uint8 tables0[] = {
726
727 /* This table is a lower casing table. */
728
729 0, 1, 2, 3, 4, 5, 6, 7,
730 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23,
732 24, 25, 26, 27, 28, 29, 30, 31,
733 32, 33, 34, 35, 36, 37, 38, 39,
734 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55,
736 56, 57, 58, 59, 60, 61, 62, 63,
737 64, 97, 98, 99,100,101,102,103,
738 104,105,106,107,108,109,110,111,
739 112,113,114,115,116,117,118,119,
740 120,121,122, 91, 92, 93, 94, 95,
741 96, 97, 98, 99,100,101,102,103,
742 104,105,106,107,108,109,110,111,
743 112,113,114,115,116,117,118,119,
744 120,121,122,123,124,125,126,127,
745 128,129,130,131,132,133,134,135,
746 136,137,138,139,140,141,142,143,
747 144,145,146,147,148,149,150,151,
748 152,153,154,155,156,157,158,159,
749 160,161,162,163,164,165,166,167,
750 168,169,170,171,172,173,174,175,
751 176,177,178,179,180,181,182,183,
752 184,185,186,187,188,189,190,191,
753 192,193,194,195,196,197,198,199,
754 200,201,202,203,204,205,206,207,
755 208,209,210,211,212,213,214,215,
756 216,217,218,219,220,221,222,223,
757 224,225,226,227,228,229,230,231,
758 232,233,234,235,236,237,238,239,
759 240,241,242,243,244,245,246,247,
760 248,249,250,251,252,253,254,255,
761
762 /* This table is a case flipping table. */
763
764 0, 1, 2, 3, 4, 5, 6, 7,
765 8, 9, 10, 11, 12, 13, 14, 15,
766 16, 17, 18, 19, 20, 21, 22, 23,
767 24, 25, 26, 27, 28, 29, 30, 31,
768 32, 33, 34, 35, 36, 37, 38, 39,
769 40, 41, 42, 43, 44, 45, 46, 47,
770 48, 49, 50, 51, 52, 53, 54, 55,
771 56, 57, 58, 59, 60, 61, 62, 63,
772 64, 97, 98, 99,100,101,102,103,
773 104,105,106,107,108,109,110,111,
774 112,113,114,115,116,117,118,119,
775 120,121,122, 91, 92, 93, 94, 95,
776 96, 65, 66, 67, 68, 69, 70, 71,
777 72, 73, 74, 75, 76, 77, 78, 79,
778 80, 81, 82, 83, 84, 85, 86, 87,
779 88, 89, 90,123,124,125,126,127,
780 128,129,130,131,132,133,134,135,
781 136,137,138,139,140,141,142,143,
782 144,145,146,147,148,149,150,151,
783 152,153,154,155,156,157,158,159,
784 160,161,162,163,164,165,166,167,
785 168,169,170,171,172,173,174,175,
786 176,177,178,179,180,181,182,183,
787 184,185,186,187,188,189,190,191,
788 192,193,194,195,196,197,198,199,
789 200,201,202,203,204,205,206,207,
790 208,209,210,211,212,213,214,215,
791 216,217,218,219,220,221,222,223,
792 224,225,226,227,228,229,230,231,
793 232,233,234,235,236,237,238,239,
794 240,241,242,243,244,245,246,247,
795 248,249,250,251,252,253,254,255,
796
797 /* This table contains bit maps for various character classes. Each map is 32
798 bytes long and the bits run from the least significant end of each byte. The
799 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800 graph, print, punct, and cntrl. Other classes are built from combinations. */
801
802 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806
807 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811
812 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821
822 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826
827 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831
832 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836
837 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841
842 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846
847 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851
852 /* This table identifies various classes of character by individual bits:
853 0x01 white space character
854 0x02 letter
855 0x04 decimal digit
856 0x08 hexadecimal digit
857 0x10 alphanumeric or '_'
858 0x80 regular expression metacharacter or binary zero
859 */
860
861 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893
894 /* This is a set of tables that came orginally from a Windows user. It seems to
895 be at least an approximation of ISO 8859. In particular, there are characters
896 greater than 128 that are marked as spaces, letters, etc. */
897
898 static const pcre_uint8 tables1[] = {
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,97,98,99,100,101,102,103,
912 104,105,106,107,108,109,110,111,
913 112,113,114,115,116,117,118,119,
914 120,121,122,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 224,225,226,227,228,229,230,231,
928 232,233,234,235,236,237,238,239,
929 240,241,242,243,244,245,246,247,
930 248,249,250,251,252,253,254,255,
931 0,1,2,3,4,5,6,7,
932 8,9,10,11,12,13,14,15,
933 16,17,18,19,20,21,22,23,
934 24,25,26,27,28,29,30,31,
935 32,33,34,35,36,37,38,39,
936 40,41,42,43,44,45,46,47,
937 48,49,50,51,52,53,54,55,
938 56,57,58,59,60,61,62,63,
939 64,97,98,99,100,101,102,103,
940 104,105,106,107,108,109,110,111,
941 112,113,114,115,116,117,118,119,
942 120,121,122,91,92,93,94,95,
943 96,65,66,67,68,69,70,71,
944 72,73,74,75,76,77,78,79,
945 80,81,82,83,84,85,86,87,
946 88,89,90,123,124,125,126,127,
947 128,129,130,131,132,133,134,135,
948 136,137,138,139,140,141,142,143,
949 144,145,146,147,148,149,150,151,
950 152,153,154,155,156,157,158,159,
951 160,161,162,163,164,165,166,167,
952 168,169,170,171,172,173,174,175,
953 176,177,178,179,180,181,182,183,
954 184,185,186,187,188,189,190,191,
955 224,225,226,227,228,229,230,231,
956 232,233,234,235,236,237,238,239,
957 240,241,242,243,244,245,246,215,
958 248,249,250,251,252,253,254,223,
959 192,193,194,195,196,197,198,199,
960 200,201,202,203,204,205,206,207,
961 208,209,210,211,212,213,214,247,
962 216,217,218,219,220,221,222,255,
963 0,62,0,0,1,0,0,0,
964 0,0,0,0,0,0,0,0,
965 32,0,0,0,1,0,0,0,
966 0,0,0,0,0,0,0,0,
967 0,0,0,0,0,0,255,3,
968 126,0,0,0,126,0,0,0,
969 0,0,0,0,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 0,0,0,0,0,0,255,3,
972 0,0,0,0,0,0,0,0,
973 0,0,0,0,0,0,12,2,
974 0,0,0,0,0,0,0,0,
975 0,0,0,0,0,0,0,0,
976 254,255,255,7,0,0,0,0,
977 0,0,0,0,0,0,0,0,
978 255,255,127,127,0,0,0,0,
979 0,0,0,0,0,0,0,0,
980 0,0,0,0,254,255,255,7,
981 0,0,0,0,0,4,32,4,
982 0,0,0,128,255,255,127,255,
983 0,0,0,0,0,0,255,3,
984 254,255,255,135,254,255,255,7,
985 0,0,0,0,0,4,44,6,
986 255,255,127,255,255,255,127,255,
987 0,0,0,0,254,255,255,255,
988 255,255,255,255,255,255,255,127,
989 0,0,0,0,254,255,255,255,
990 255,255,255,255,255,255,255,255,
991 0,2,0,0,255,255,255,255,
992 255,255,255,255,255,255,255,127,
993 0,0,0,0,255,255,255,255,
994 255,255,255,255,255,255,255,255,
995 0,0,0,0,254,255,0,252,
996 1,0,0,248,1,0,0,120,
997 0,0,0,0,254,255,255,255,
998 0,0,128,0,0,0,128,0,
999 255,255,255,255,0,0,0,0,
1000 0,0,0,0,0,0,0,128,
1001 255,255,255,255,0,0,0,0,
1002 0,0,0,0,0,0,0,0,
1003 128,0,0,0,0,0,0,0,
1004 0,1,1,0,1,1,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 1,0,0,0,128,0,0,0,
1008 128,128,128,128,0,0,128,0,
1009 28,28,28,28,28,28,28,28,
1010 28,28,0,0,0,0,0,128,
1011 0,26,26,26,26,26,26,18,
1012 18,18,18,18,18,18,18,18,
1013 18,18,18,18,18,18,18,18,
1014 18,18,18,128,128,0,128,16,
1015 0,26,26,26,26,26,26,18,
1016 18,18,18,18,18,18,18,18,
1017 18,18,18,18,18,18,18,18,
1018 18,18,18,128,128,0,0,0,
1019 0,0,0,0,0,1,0,0,
1020 0,0,0,0,0,0,0,0,
1021 0,0,0,0,0,0,0,0,
1022 0,0,0,0,0,0,0,0,
1023 1,0,0,0,0,0,0,0,
1024 0,0,18,0,0,0,0,0,
1025 0,0,20,20,0,18,0,0,
1026 0,20,18,0,0,0,0,0,
1027 18,18,18,18,18,18,18,18,
1028 18,18,18,18,18,18,18,18,
1029 18,18,18,18,18,18,18,0,
1030 18,18,18,18,18,18,18,18,
1031 18,18,18,18,18,18,18,18,
1032 18,18,18,18,18,18,18,18,
1033 18,18,18,18,18,18,18,0,
1034 18,18,18,18,18,18,18,18
1035 };
1036
1037
1038
1039
1040 #ifndef HAVE_STRERROR
1041 /*************************************************
1042 * Provide strerror() for non-ANSI libraries *
1043 *************************************************/
1044
1045 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046 in their libraries, but can provide the same facility by this simple
1047 alternative function. */
1048
1049 extern int sys_nerr;
1050 extern char *sys_errlist[];
1051
1052 char *
1053 strerror(int n)
1054 {
1055 if (n < 0 || n >= sys_nerr) return "unknown error number";
1056 return sys_errlist[n];
1057 }
1058 #endif /* HAVE_STRERROR */
1059
1060
1061 /*************************************************
1062 * JIT memory callback *
1063 *************************************************/
1064
1065 static pcre_jit_stack* jit_callback(void *arg)
1066 {
1067 return (pcre_jit_stack *)arg;
1068 }
1069
1070
1071 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 /*************************************************
1073 * Convert UTF-8 string to value *
1074 *************************************************/
1075
1076 /* This function takes one or more bytes that represents a UTF-8 character,
1077 and returns the value of the character.
1078
1079 Argument:
1080 utf8bytes a pointer to the byte vector
1081 vptr a pointer to an int to receive the value
1082
1083 Returns: > 0 => the number of bytes consumed
1084 -6 to 0 => malformed UTF-8 character at offset = (-return)
1085 */
1086
1087 static int
1088 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089 {
1090 int c = *utf8bytes++;
1091 int d = c;
1092 int i, j, s;
1093
1094 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095 {
1096 if ((d & 0x80) == 0) break;
1097 d <<= 1;
1098 }
1099
1100 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102
1103 /* i now has a value in the range 1-5 */
1104
1105 s = 6*i;
1106 d = (c & utf8_table3[i]) << s;
1107
1108 for (j = 0; j < i; j++)
1109 {
1110 c = *utf8bytes++;
1111 if ((c & 0xc0) != 0x80) return -(j+1);
1112 s -= 6;
1113 d |= (c & 0x3f) << s;
1114 }
1115
1116 /* Check that encoding was the correct unique one */
1117
1118 for (j = 0; j < utf8_table1_size; j++)
1119 if (d <= utf8_table1[j]) break;
1120 if (j != i) return -(i+1);
1121
1122 /* Valid value */
1123
1124 *vptr = d;
1125 return i+1;
1126 }
1127 #endif /* NOUTF || SUPPORT_PCRE16 */
1128
1129
1130
1131 #if !defined NOUTF || defined SUPPORT_PCRE16
1132 /*************************************************
1133 * Convert character value to UTF-8 *
1134 *************************************************/
1135
1136 /* This function takes an integer value in the range 0 - 0x7fffffff
1137 and encodes it as a UTF-8 character in 0 to 6 bytes.
1138
1139 Arguments:
1140 cvalue the character value
1141 utf8bytes pointer to buffer for result - at least 6 bytes long
1142
1143 Returns: number of characters placed in the buffer
1144 */
1145
1146 static int
1147 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148 {
1149 register int i, j;
1150 for (i = 0; i < utf8_table1_size; i++)
1151 if (cvalue <= utf8_table1[i]) break;
1152 utf8bytes += i;
1153 for (j = i; j > 0; j--)
1154 {
1155 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156 cvalue >>= 6;
1157 }
1158 *utf8bytes = utf8_table2[i] | cvalue;
1159 return i + 1;
1160 }
1161 #endif
1162
1163
1164 #ifdef SUPPORT_PCRE16
1165 /*************************************************
1166 * Convert a string to 16-bit *
1167 *************************************************/
1168
1169 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173 result is always left in buffer16.
1174
1175 Note that this function does not object to surrogate values. This is
1176 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177 for the purpose of testing that they are correctly faulted.
1178
1179 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 in UTF-8 so that values greater than 255 can be handled.
1181
1182 Arguments:
1183 data TRUE if converting a data line; FALSE for a regex
1184 p points to a byte string
1185 utf true if UTF-8 (to be converted to UTF-16)
1186 len number of bytes in the string (excluding trailing zero)
1187
1188 Returns: number of 16-bit data items used (excluding trailing zero)
1189 OR -1 if a UTF-8 string is malformed
1190 OR -2 if a value > 0x10ffff is encountered
1191 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 */
1193
1194 static int
1195 to16(int data, pcre_uint8 *p, int utf, int len)
1196 {
1197 pcre_uint16 *pp;
1198
1199 if (buffer16_size < 2*len + 2)
1200 {
1201 if (buffer16 != NULL) free(buffer16);
1202 buffer16_size = 2*len + 2;
1203 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204 if (buffer16 == NULL)
1205 {
1206 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207 exit(1);
1208 }
1209 }
1210
1211 pp = buffer16;
1212
1213 if (!utf && !data)
1214 {
1215 while (len-- > 0) *pp++ = *p++;
1216 }
1217
1218 else
1219 {
1220 int c = 0;
1221 while (len > 0)
1222 {
1223 int chlen = utf82ord(p, &c);
1224 if (chlen <= 0) return -1;
1225 if (c > 0x10ffff) return -2;
1226 p += chlen;
1227 len -= chlen;
1228 if (c < 0x10000) *pp++ = c; else
1229 {
1230 if (!utf) return -3;
1231 c -= 0x10000;
1232 *pp++ = 0xD800 | (c >> 10);
1233 *pp++ = 0xDC00 | (c & 0x3ff);
1234 }
1235 }
1236 }
1237
1238 *pp = 0;
1239 return pp - buffer16;
1240 }
1241 #endif
1242
1243
1244 /*************************************************
1245 * Read or extend an input line *
1246 *************************************************/
1247
1248 /* Input lines are read into buffer, but both patterns and data lines can be
1249 continued over multiple input lines. In addition, if the buffer fills up, we
1250 want to automatically expand it so as to be able to handle extremely large
1251 lines that are needed for certain stress tests. When the input buffer is
1252 expanded, the other two buffers must also be expanded likewise, and the
1253 contents of pbuffer, which are a copy of the input for callouts, must be
1254 preserved (for when expansion happens for a data line). This is not the most
1255 optimal way of handling this, but hey, this is just a test program!
1256
1257 Arguments:
1258 f the file to read
1259 start where in buffer to start (this *must* be within buffer)
1260 prompt for stdin or readline()
1261
1262 Returns: pointer to the start of new data
1263 could be a copy of start, or could be moved
1264 NULL if no data read and EOF reached
1265 */
1266
1267 static pcre_uint8 *
1268 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 {
1270 pcre_uint8 *here = start;
1271
1272 for (;;)
1273 {
1274 int rlen = (int)(buffer_size - (here - buffer));
1275
1276 if (rlen > 1000)
1277 {
1278 int dlen;
1279
1280 /* If libreadline support is required, use readline() to read a line if the
1281 input is a terminal. Note that readline() removes the trailing newline, so
1282 we must put it back again, to be compatible with fgets(). */
1283
1284 #ifdef SUPPORT_LIBREADLINE
1285 if (isatty(fileno(f)))
1286 {
1287 size_t len;
1288 char *s = readline(prompt);
1289 if (s == NULL) return (here == start)? NULL : start;
1290 len = strlen(s);
1291 if (len > 0) add_history(s);
1292 if (len > rlen - 1) len = rlen - 1;
1293 memcpy(here, s, len);
1294 here[len] = '\n';
1295 here[len+1] = 0;
1296 free(s);
1297 }
1298 else
1299 #endif
1300
1301 /* Read the next line by normal means, prompting if the file is stdin. */
1302
1303 {
1304 if (f == stdin) printf("%s", prompt);
1305 if (fgets((char *)here, rlen, f) == NULL)
1306 return (here == start)? NULL : start;
1307 }
1308
1309 dlen = (int)strlen((char *)here);
1310 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311 here += dlen;
1312 }
1313
1314 else
1315 {
1316 int new_buffer_size = 2*buffer_size;
1317 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320
1321 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322 {
1323 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324 exit(1);
1325 }
1326
1327 memcpy(new_buffer, buffer, buffer_size);
1328 memcpy(new_pbuffer, pbuffer, buffer_size);
1329
1330 buffer_size = new_buffer_size;
1331
1332 start = new_buffer + (start - buffer);
1333 here = new_buffer + (here - buffer);
1334
1335 free(buffer);
1336 free(dbuffer);
1337 free(pbuffer);
1338
1339 buffer = new_buffer;
1340 dbuffer = new_dbuffer;
1341 pbuffer = new_pbuffer;
1342 }
1343 }
1344
1345 return NULL; /* Control never gets here */
1346 }
1347
1348
1349
1350 /*************************************************
1351 * Read number from string *
1352 *************************************************/
1353
1354 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355 around with conditional compilation, just do the job by hand. It is only used
1356 for unpicking arguments, so just keep it simple.
1357
1358 Arguments:
1359 str string to be converted
1360 endptr where to put the end pointer
1361
1362 Returns: the unsigned long
1363 */
1364
1365 static int
1366 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 {
1368 int result = 0;
1369 while(*str != 0 && isspace(*str)) str++;
1370 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371 *endptr = str;
1372 return(result);
1373 }
1374
1375
1376
1377 /*************************************************
1378 * Print one character *
1379 *************************************************/
1380
1381 /* Print a single character either literally, or as a hex escape. */
1382
1383 static int pchar(int c, FILE *f)
1384 {
1385 if (PRINTOK(c))
1386 {
1387 if (f != NULL) fprintf(f, "%c", c);
1388 return 1;
1389 }
1390
1391 if (c < 0x100)
1392 {
1393 if (use_utf)
1394 {
1395 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396 return 6;
1397 }
1398 else
1399 {
1400 if (f != NULL) fprintf(f, "\\x%02x", c);
1401 return 4;
1402 }
1403 }
1404
1405 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406 return (c <= 0x000000ff)? 6 :
1407 (c <= 0x00000fff)? 7 :
1408 (c <= 0x0000ffff)? 8 :
1409 (c <= 0x000fffff)? 9 : 10;
1410 }
1411
1412
1413
1414 #ifdef SUPPORT_PCRE8
1415 /*************************************************
1416 * Print 8-bit character string *
1417 *************************************************/
1418
1419 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420 If handed a NULL file, just counts chars without printing. */
1421
1422 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423 {
1424 int c = 0;
1425 int yield = 0;
1426
1427 if (length < 0)
1428 length = strlen((char *)p);
1429
1430 while (length-- > 0)
1431 {
1432 #if !defined NOUTF
1433 if (use_utf)
1434 {
1435 int rc = utf82ord(p, &c);
1436 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437 {
1438 length -= rc - 1;
1439 p += rc;
1440 yield += pchar(c, f);
1441 continue;
1442 }
1443 }
1444 #endif
1445 c = *p++;
1446 yield += pchar(c, f);
1447 }
1448
1449 return yield;
1450 }
1451 #endif
1452
1453
1454
1455 #ifdef SUPPORT_PCRE16
1456 /*************************************************
1457 * Find length of 0-terminated 16-bit string *
1458 *************************************************/
1459
1460 static int strlen16(PCRE_SPTR16 p)
1461 {
1462 int len = 0;
1463 while (*p++ != 0) len++;
1464 return len;
1465 }
1466 #endif /* SUPPORT_PCRE16 */
1467
1468
1469 #ifdef SUPPORT_PCRE16
1470 /*************************************************
1471 * Print 16-bit character string *
1472 *************************************************/
1473
1474 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475 If handed a NULL file, just counts chars without printing. */
1476
1477 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 {
1479 int yield = 0;
1480
1481 if (length < 0)
1482 length = strlen16(p);
1483
1484 while (length-- > 0)
1485 {
1486 int c = *p++ & 0xffff;
1487 #if !defined NOUTF
1488 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 {
1490 int d = *p & 0xffff;
1491 if (d >= 0xDC00 && d < 0xDFFF)
1492 {
1493 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494 length--;
1495 p++;
1496 }
1497 }
1498 #endif
1499 yield += pchar(c, f);
1500 }
1501
1502 return yield;
1503 }
1504 #endif /* SUPPORT_PCRE16 */
1505
1506
1507
1508 #ifdef SUPPORT_PCRE8
1509 /*************************************************
1510 * Read a capture name (8-bit) and check it *
1511 *************************************************/
1512
1513 static pcre_uint8 *
1514 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515 {
1516 pcre_uint8 *npp = *pp;
1517 while (isalnum(*p)) *npp++ = *p++;
1518 *npp++ = 0;
1519 *npp = 0;
1520 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521 {
1522 fprintf(outfile, "no parentheses with name \"");
1523 PCHARSV(*pp, 0, -1, outfile);
1524 fprintf(outfile, "\"\n");
1525 }
1526
1527 *pp = npp;
1528 return p;
1529 }
1530 #endif /* SUPPORT_PCRE8 */
1531
1532
1533
1534 #ifdef SUPPORT_PCRE16
1535 /*************************************************
1536 * Read a capture name (16-bit) and check it *
1537 *************************************************/
1538
1539 /* Note that the text being read is 8-bit. */
1540
1541 static pcre_uint8 *
1542 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543 {
1544 pcre_uint16 *npp = *pp;
1545 while (isalnum(*p)) *npp++ = *p++;
1546 *npp++ = 0;
1547 *npp = 0;
1548 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 {
1550 fprintf(outfile, "no parentheses with name \"");
1551 PCHARSV(*pp, 0, -1, outfile);
1552 fprintf(outfile, "\"\n");
1553 }
1554 *pp = npp;
1555 return p;
1556 }
1557 #endif /* SUPPORT_PCRE16 */
1558
1559
1560
1561 /*************************************************
1562 * Callout function *
1563 *************************************************/
1564
1565 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566 the match. Yield zero unless more callouts than the fail count, or the callout
1567 data is not zero. */
1568
1569 static int callout(pcre_callout_block *cb)
1570 {
1571 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 int i, pre_start, post_start, subject_length;
1573
1574 if (callout_extra)
1575 {
1576 fprintf(f, "Callout %d: last capture = %d\n",
1577 cb->callout_number, cb->capture_last);
1578
1579 for (i = 0; i < cb->capture_top * 2; i += 2)
1580 {
1581 if (cb->offset_vector[i] < 0)
1582 fprintf(f, "%2d: <unset>\n", i/2);
1583 else
1584 {
1585 fprintf(f, "%2d: ", i/2);
1586 PCHARSV(cb->subject, cb->offset_vector[i],
1587 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588 fprintf(f, "\n");
1589 }
1590 }
1591 }
1592
1593 /* Re-print the subject in canonical form, the first time or if giving full
1594 datails. On subsequent calls in the same match, we use pchars just to find the
1595 printed lengths of the substrings. */
1596
1597 if (f != NULL) fprintf(f, "--->");
1598
1599 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600 PCHARS(post_start, cb->subject, cb->start_match,
1601 cb->current_position - cb->start_match, f);
1602
1603 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604
1605 PCHARSV(cb->subject, cb->current_position,
1606 cb->subject_length - cb->current_position, f);
1607
1608 if (f != NULL) fprintf(f, "\n");
1609
1610 /* Always print appropriate indicators, with callout number if not already
1611 shown. For automatic callouts, show the pattern offset. */
1612
1613 if (cb->callout_number == 255)
1614 {
1615 fprintf(outfile, "%+3d ", cb->pattern_position);
1616 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617 }
1618 else
1619 {
1620 if (callout_extra) fprintf(outfile, " ");
1621 else fprintf(outfile, "%3d ", cb->callout_number);
1622 }
1623
1624 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625 fprintf(outfile, "^");
1626
1627 if (post_start > 0)
1628 {
1629 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630 fprintf(outfile, "^");
1631 }
1632
1633 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634 fprintf(outfile, " ");
1635
1636 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637 pbuffer + cb->pattern_position);
1638
1639 fprintf(outfile, "\n");
1640 first_callout = 0;
1641
1642 if (cb->mark != last_callout_mark)
1643 {
1644 if (cb->mark == NULL)
1645 fprintf(outfile, "Latest Mark: <unset>\n");
1646 else
1647 {
1648 fprintf(outfile, "Latest Mark: ");
1649 PCHARSV(cb->mark, 0, -1, outfile);
1650 putc('\n', outfile);
1651 }
1652 last_callout_mark = cb->mark;
1653 }
1654
1655 if (cb->callout_data != NULL)
1656 {
1657 int callout_data = *((int *)(cb->callout_data));
1658 if (callout_data != 0)
1659 {
1660 fprintf(outfile, "Callout data = %d\n", callout_data);
1661 return callout_data;
1662 }
1663 }
1664
1665 return (cb->callout_number != callout_fail_id)? 0 :
1666 (++callout_count >= callout_fail_count)? 1 : 0;
1667 }
1668
1669
1670 /*************************************************
1671 * Local malloc functions *
1672 *************************************************/
1673
1674 /* Alternative malloc function, to test functionality and save the size of a
1675 compiled re, which is the first store request that pcre_compile() makes. The
1676 show_malloc variable is set only during matching. */
1677
1678 static void *new_malloc(size_t size)
1679 {
1680 void *block = malloc(size);
1681 gotten_store = size;
1682 if (first_gotten_store == 0) first_gotten_store = size;
1683 if (show_malloc)
1684 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 return block;
1686 }
1687
1688 static void new_free(void *block)
1689 {
1690 if (show_malloc)
1691 fprintf(outfile, "free %p\n", block);
1692 free(block);
1693 }
1694
1695 /* For recursion malloc/free, to test stacking calls */
1696
1697 static void *stack_malloc(size_t size)
1698 {
1699 void *block = malloc(size);
1700 if (show_malloc)
1701 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 return block;
1703 }
1704
1705 static void stack_free(void *block)
1706 {
1707 if (show_malloc)
1708 fprintf(outfile, "stack_free %p\n", block);
1709 free(block);
1710 }
1711
1712
1713 /*************************************************
1714 * Call pcre_fullinfo() *
1715 *************************************************/
1716
1717 /* Get one piece of information from the pcre_fullinfo() function. When only
1718 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719 value, but the code is defensive.
1720
1721 Arguments:
1722 re compiled regex
1723 study study data
1724 option PCRE_INFO_xxx option
1725 ptr where to put the data
1726
1727 Returns: 0 when OK, < 0 on error
1728 */
1729
1730 static int
1731 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 {
1733 int rc;
1734
1735 if (use_pcre16)
1736 #ifdef SUPPORT_PCRE16
1737 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 #else
1739 rc = PCRE_ERROR_BADMODE;
1740 #endif
1741 else
1742 #ifdef SUPPORT_PCRE8
1743 rc = pcre_fullinfo(re, study, option, ptr);
1744 #else
1745 rc = PCRE_ERROR_BADMODE;
1746 #endif
1747
1748 if (rc < 0)
1749 {
1750 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751 use_pcre16? "16" : "", option);
1752 if (rc == PCRE_ERROR_BADMODE)
1753 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755 }
1756
1757 return rc;
1758 }
1759
1760
1761
1762 /*************************************************
1763 * Swap byte functions *
1764 *************************************************/
1765
1766 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767 value, respectively.
1768
1769 Arguments:
1770 value any number
1771
1772 Returns: the byte swapped value
1773 */
1774
1775 static pcre_uint32
1776 swap_uint32(pcre_uint32 value)
1777 {
1778 return ((value & 0x000000ff) << 24) |
1779 ((value & 0x0000ff00) << 8) |
1780 ((value & 0x00ff0000) >> 8) |
1781 (value >> 24);
1782 }
1783
1784 static pcre_uint16
1785 swap_uint16(pcre_uint16 value)
1786 {
1787 return (value >> 8) | (value << 8);
1788 }
1789
1790
1791
1792 /*************************************************
1793 * Flip bytes in a compiled pattern *
1794 *************************************************/
1795
1796 /* This function is called if the 'F' option was present on a pattern that is
1797 to be written to a file. We flip the bytes of all the integer fields in the
1798 regex data block and the study block. In 16-bit mode this also flips relevant
1799 bytes in the pattern itself. This is to make it possible to test PCRE's
1800 ability to reload byte-flipped patterns, e.g. those compiled on a different
1801 architecture. */
1802
1803 static void
1804 regexflip(pcre *ere, pcre_extra *extra)
1805 {
1806 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 #ifdef SUPPORT_PCRE16
1808 int op;
1809 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810 int length = re->name_count * re->name_entry_size;
1811 #ifdef SUPPORT_UTF
1812 BOOL utf = (re->options & PCRE_UTF16) != 0;
1813 BOOL utf16_char = FALSE;
1814 #endif /* SUPPORT_UTF */
1815 #endif /* SUPPORT_PCRE16 */
1816
1817 /* Always flip the bytes in the main data block and study blocks. */
1818
1819 re->magic_number = REVERSED_MAGIC_NUMBER;
1820 re->size = swap_uint32(re->size);
1821 re->options = swap_uint32(re->options);
1822 re->flags = swap_uint16(re->flags);
1823 re->top_bracket = swap_uint16(re->top_bracket);
1824 re->top_backref = swap_uint16(re->top_backref);
1825 re->first_char = swap_uint16(re->first_char);
1826 re->req_char = swap_uint16(re->req_char);
1827 re->name_table_offset = swap_uint16(re->name_table_offset);
1828 re->name_entry_size = swap_uint16(re->name_entry_size);
1829 re->name_count = swap_uint16(re->name_count);
1830
1831 if (extra != NULL)
1832 {
1833 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834 rsd->size = swap_uint32(rsd->size);
1835 rsd->flags = swap_uint32(rsd->flags);
1836 rsd->minlength = swap_uint32(rsd->minlength);
1837 }
1838
1839 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840 in the name table, if present, and then in the pattern itself. */
1841
1842 #ifdef SUPPORT_PCRE16
1843 if (!use_pcre16) return;
1844
1845 while(TRUE)
1846 {
1847 /* Swap previous characters. */
1848 while (length-- > 0)
1849 {
1850 *ptr = swap_uint16(*ptr);
1851 ptr++;
1852 }
1853 #ifdef SUPPORT_UTF
1854 if (utf16_char)
1855 {
1856 if ((ptr[-1] & 0xfc00) == 0xd800)
1857 {
1858 /* We know that there is only one extra character in UTF-16. */
1859 *ptr = swap_uint16(*ptr);
1860 ptr++;
1861 }
1862 }
1863 utf16_char = FALSE;
1864 #endif /* SUPPORT_UTF */
1865
1866 /* Get next opcode. */
1867
1868 length = 0;
1869 op = *ptr;
1870 *ptr++ = swap_uint16(op);
1871
1872 switch (op)
1873 {
1874 case OP_END:
1875 return;
1876
1877 #ifdef SUPPORT_UTF
1878 case OP_CHAR:
1879 case OP_CHARI:
1880 case OP_NOT:
1881 case OP_NOTI:
1882 case OP_STAR:
1883 case OP_MINSTAR:
1884 case OP_PLUS:
1885 case OP_MINPLUS:
1886 case OP_QUERY:
1887 case OP_MINQUERY:
1888 case OP_UPTO:
1889 case OP_MINUPTO:
1890 case OP_EXACT:
1891 case OP_POSSTAR:
1892 case OP_POSPLUS:
1893 case OP_POSQUERY:
1894 case OP_POSUPTO:
1895 case OP_STARI:
1896 case OP_MINSTARI:
1897 case OP_PLUSI:
1898 case OP_MINPLUSI:
1899 case OP_QUERYI:
1900 case OP_MINQUERYI:
1901 case OP_UPTOI:
1902 case OP_MINUPTOI:
1903 case OP_EXACTI:
1904 case OP_POSSTARI:
1905 case OP_POSPLUSI:
1906 case OP_POSQUERYI:
1907 case OP_POSUPTOI:
1908 case OP_NOTSTAR:
1909 case OP_NOTMINSTAR:
1910 case OP_NOTPLUS:
1911 case OP_NOTMINPLUS:
1912 case OP_NOTQUERY:
1913 case OP_NOTMINQUERY:
1914 case OP_NOTUPTO:
1915 case OP_NOTMINUPTO:
1916 case OP_NOTEXACT:
1917 case OP_NOTPOSSTAR:
1918 case OP_NOTPOSPLUS:
1919 case OP_NOTPOSQUERY:
1920 case OP_NOTPOSUPTO:
1921 case OP_NOTSTARI:
1922 case OP_NOTMINSTARI:
1923 case OP_NOTPLUSI:
1924 case OP_NOTMINPLUSI:
1925 case OP_NOTQUERYI:
1926 case OP_NOTMINQUERYI:
1927 case OP_NOTUPTOI:
1928 case OP_NOTMINUPTOI:
1929 case OP_NOTEXACTI:
1930 case OP_NOTPOSSTARI:
1931 case OP_NOTPOSPLUSI:
1932 case OP_NOTPOSQUERYI:
1933 case OP_NOTPOSUPTOI:
1934 if (utf) utf16_char = TRUE;
1935 #endif
1936 /* Fall through. */
1937
1938 default:
1939 length = OP_lengths16[op] - 1;
1940 break;
1941
1942 case OP_CLASS:
1943 case OP_NCLASS:
1944 /* Skip the character bit map. */
1945 ptr += 32/sizeof(pcre_uint16);
1946 length = 0;
1947 break;
1948
1949 case OP_XCLASS:
1950 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951 if (LINK_SIZE > 1)
1952 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953 - (1 + LINK_SIZE + 1));
1954 else
1955 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956
1957 /* Reverse the size of the XCLASS instance. */
1958 *ptr = swap_uint16(*ptr);
1959 ptr++;
1960 if (LINK_SIZE > 1)
1961 {
1962 *ptr = swap_uint16(*ptr);
1963 ptr++;
1964 }
1965
1966 op = *ptr;
1967 *ptr = swap_uint16(op);
1968 ptr++;
1969 if ((op & XCL_MAP) != 0)
1970 {
1971 /* Skip the character bit map. */
1972 ptr += 32/sizeof(pcre_uint16);
1973 length -= 32/sizeof(pcre_uint16);
1974 }
1975 break;
1976 }
1977 }
1978 /* Control should never reach here in 16 bit mode. */
1979 #endif /* SUPPORT_PCRE16 */
1980 }
1981
1982
1983
1984 /*************************************************
1985 * Check match or recursion limit *
1986 *************************************************/
1987
1988 static int
1989 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991 int flag, unsigned long int *limit, int errnumber, const char *msg)
1992 {
1993 int count;
1994 int min = 0;
1995 int mid = 64;
1996 int max = -1;
1997
1998 extra->flags |= flag;
1999
2000 for (;;)
2001 {
2002 *limit = mid;
2003
2004 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 use_offsets, use_size_offsets);
2006
2007 if (count == errnumber)
2008 {
2009 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010 min = mid;
2011 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012 }
2013
2014 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015 count == PCRE_ERROR_PARTIAL)
2016 {
2017 if (mid == min + 1)
2018 {
2019 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020 break;
2021 }
2022 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023 max = mid;
2024 mid = (min + mid)/2;
2025 }
2026 else break; /* Some other error */
2027 }
2028
2029 extra->flags &= ~flag;
2030 return count;
2031 }
2032
2033
2034
2035 /*************************************************
2036 * Case-independent strncmp() function *
2037 *************************************************/
2038
2039 /*
2040 Arguments:
2041 s first string
2042 t second string
2043 n number of characters to compare
2044
2045 Returns: < 0, = 0, or > 0, according to the comparison
2046 */
2047
2048 static int
2049 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 {
2051 while (n--)
2052 {
2053 int c = tolower(*s++) - tolower(*t++);
2054 if (c) return c;
2055 }
2056 return 0;
2057 }
2058
2059
2060
2061 /*************************************************
2062 * Check newline indicator *
2063 *************************************************/
2064
2065 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066 a message and return 0 if there is no match.
2067
2068 Arguments:
2069 p points after the leading '<'
2070 f file for error message
2071
2072 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073 */
2074
2075 static int
2076 check_newline(pcre_uint8 *p, FILE *f)
2077 {
2078 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 fprintf(f, "Unknown newline type at: <%s\n", p);
2086 return 0;
2087 }
2088
2089
2090
2091 /*************************************************
2092 * Usage function *
2093 *************************************************/
2094
2095 static void
2096 usage(void)
2097 {
2098 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099 printf("Input and output default to stdin and stdout.\n");
2100 #ifdef SUPPORT_LIBREADLINE
2101 printf("If input is a terminal, readline() is used to read from it.\n");
2102 #else
2103 printf("This version of pcretest is not linked with readline().\n");
2104 #endif
2105 printf("\nOptions:\n");
2106 #ifdef SUPPORT_PCRE16
2107 printf(" -16 use the 16-bit library\n");
2108 #endif
2109 printf(" -b show compiled code\n");
2110 printf(" -C show PCRE compile-time options and exit\n");
2111 printf(" -C arg show a specific compile-time option\n");
2112 printf(" and exit with its value. The arg can be:\n");
2113 printf(" linksize internal link size [2, 3, 4]\n");
2114 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117 printf(" ucp Unicode Properties supported [0, 1]\n");
2118 printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121 #if !defined NODFA
2122 printf(" -dfa force DFA matching for all subjects\n");
2123 #endif
2124 printf(" -help show usage information\n");
2125 printf(" -i show information about compiled patterns\n"
2126 " -M find MATCH_LIMIT minimum for each subject\n"
2127 " -m output memory used information\n"
2128 " -o <n> set size of offsets vector to <n>\n");
2129 #if !defined NOPOSIX
2130 printf(" -p use POSIX interface\n");
2131 #endif
2132 printf(" -q quiet: do not output PCRE version number at start\n");
2133 printf(" -S <n> set stack size to <n> megabytes\n");
2134 printf(" -s force each pattern to be studied at basic level\n"
2135 " -s+ force each pattern to be studied, using JIT if available\n"
2136 " -t time compilation and execution\n");
2137 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138 printf(" -tm time execution (matching) only\n");
2139 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140 }
2141
2142
2143
2144 /*************************************************
2145 * Main Program *
2146 *************************************************/
2147
2148 /* Read lines from named file or stdin and write to named file or stdout; lines
2149 consist of a regular expression, in delimiters and optionally followed by
2150 options, followed by a set of test data, terminated by an empty line. */
2151
2152 int main(int argc, char **argv)
2153 {
2154 FILE *infile = stdin;
2155 const char *version;
2156 int options = 0;
2157 int study_options = 0;
2158 int default_find_match_limit = FALSE;
2159 int op = 1;
2160 int timeit = 0;
2161 int timeitm = 0;
2162 int showinfo = 0;
2163 int showstore = 0;
2164 int force_study = -1;
2165 int force_study_options = 0;
2166 int quiet = 0;
2167 int size_offsets = 45;
2168 int size_offsets_max;
2169 int *offsets = NULL;
2170 #if !defined NOPOSIX
2171 int posix = 0;
2172 #endif
2173 int debug = 0;
2174 int done = 0;
2175 int all_use_dfa = 0;
2176 int yield = 0;
2177 int stack_size;
2178
2179 pcre_jit_stack *jit_stack = NULL;
2180
2181 /* These vectors store, end-to-end, a list of zero-terminated captured
2182 substring names, each list itself being terminated by an empty name. Assume
2183 that 1024 is plenty long enough for the few names we'll be testing. It is
2184 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185 for the actual memory, to ensure alignment. */
2186
2187 pcre_uint16 copynames[1024];
2188 pcre_uint16 getnames[1024];
2189
2190 #ifdef SUPPORT_PCRE16
2191 pcre_uint16 *cn16ptr;
2192 pcre_uint16 *gn16ptr;
2193 #endif
2194
2195 #ifdef SUPPORT_PCRE8
2196 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198 pcre_uint8 *cn8ptr;
2199 pcre_uint8 *gn8ptr;
2200 #endif
2201
2202 /* Get buffers from malloc() so that valgrind will check their misuse when
2203 debugging. They grow automatically when very long lines are read. The 16-bit
2204 buffer (buffer16) is obtained only if needed. */
2205
2206 buffer = (pcre_uint8 *)malloc(buffer_size);
2207 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209
2210 /* The outfile variable is static so that new_malloc can use it. */
2211
2212 outfile = stdout;
2213
2214 /* The following _setmode() stuff is some Windows magic that tells its runtime
2215 library to translate CRLF into a single LF character. At least, that's what
2216 I've been told: never having used Windows I take this all on trust. Originally
2217 it set 0x8000, but then I was advised that _O_BINARY was better. */
2218
2219 #if defined(_WIN32) || defined(WIN32)
2220 _setmode( _fileno( stdout ), _O_BINARY );
2221 #endif
2222
2223 /* Get the version number: both pcre_version() and pcre16_version() give the
2224 same answer. We just need to ensure that we call one that is available. */
2225
2226 #ifdef SUPPORT_PCRE8
2227 version = pcre_version();
2228 #else
2229 version = pcre16_version();
2230 #endif
2231
2232 /* Scan options */
2233
2234 while (argc > 1 && argv[op][0] == '-')
2235 {
2236 pcre_uint8 *endptr;
2237
2238 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240 else if (strcmp(argv[op], "-s+") == 0)
2241 {
2242 force_study = 1;
2243 force_study_options = PCRE_STUDY_JIT_COMPILE;
2244 }
2245 else if (strcmp(argv[op], "-16") == 0)
2246 {
2247 #ifdef SUPPORT_PCRE16
2248 use_pcre16 = 1;
2249 #else
2250 printf("** This version of PCRE was built without 16-bit support\n");
2251 exit(1);
2252 #endif
2253 }
2254 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2257 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2258 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259 #if !defined NODFA
2260 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261 #endif
2262 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264 *endptr == 0))
2265 {
2266 op++;
2267 argc--;
2268 }
2269 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2270 {
2271 int both = argv[op][2] == 0;
2272 int temp;
2273 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274 *endptr == 0))
2275 {
2276 timeitm = temp;
2277 op++;
2278 argc--;
2279 }
2280 else timeitm = LOOPREPEAT;
2281 if (both) timeit = timeitm;
2282 }
2283 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285 *endptr == 0))
2286 {
2287 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288 printf("PCRE: -S not supported on this OS\n");
2289 exit(1);
2290 #else
2291 int rc;
2292 struct rlimit rlim;
2293 getrlimit(RLIMIT_STACK, &rlim);
2294 rlim.rlim_cur = stack_size * 1024 * 1024;
2295 rc = setrlimit(RLIMIT_STACK, &rlim);
2296 if (rc != 0)
2297 {
2298 printf("PCRE: setrlimit() failed with error %d\n", rc);
2299 exit(1);
2300 }
2301 op++;
2302 argc--;
2303 #endif
2304 }
2305 #if !defined NOPOSIX
2306 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2307 #endif
2308 else if (strcmp(argv[op], "-C") == 0)
2309 {
2310 int rc;
2311 unsigned long int lrc;
2312
2313 if (argc > 2)
2314 {
2315 if (strcmp(argv[op + 1], "linksize") == 0)
2316 {
2317 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318 printf("%d\n", rc);
2319 yield = rc;
2320 goto EXIT;
2321 }
2322 if (strcmp(argv[op + 1], "pcre8") == 0)
2323 {
2324 #ifdef SUPPORT_PCRE8
2325 printf("1\n");
2326 yield = 1;
2327 #else
2328 printf("0\n");
2329 yield = 0;
2330 #endif
2331 goto EXIT;
2332 }
2333 if (strcmp(argv[op + 1], "pcre16") == 0)
2334 {
2335 #ifdef SUPPORT_PCRE16
2336 printf("1\n");
2337 yield = 1;
2338 #else
2339 printf("0\n");
2340 yield = 0;
2341 #endif
2342 goto EXIT;
2343 }
2344 if (strcmp(argv[op + 1], "utf") == 0)
2345 {
2346 #ifdef SUPPORT_PCRE8
2347 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348 printf("%d\n", rc);
2349 yield = rc;
2350 #else
2351 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352 printf("%d\n", rc);
2353 yield = rc;
2354 #endif
2355 goto EXIT;
2356 }
2357 if (strcmp(argv[op + 1], "ucp") == 0)
2358 {
2359 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360 printf("%d\n", rc);
2361 yield = rc;
2362 goto EXIT;
2363 }
2364 if (strcmp(argv[op + 1], "jit") == 0)
2365 {
2366 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367 printf("%d\n", rc);
2368 yield = rc;
2369 goto EXIT;
2370 }
2371 if (strcmp(argv[op + 1], "newline") == 0)
2372 {
2373 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374 /* Note that these values are always the ASCII values, even
2375 in EBCDIC environments. CR is 13 and NL is 10. */
2376 printf("%s\n", (rc == 13)? "CR" :
2377 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378 (rc == -2)? "ANYCRLF" :
2379 (rc == -1)? "ANY" : "???");
2380 goto EXIT;
2381 }
2382 printf("Unknown -C option: %s\n", argv[op + 1]);
2383 goto EXIT;
2384 }
2385
2386 printf("PCRE version %s\n", version);
2387 printf("Compiled with\n");
2388
2389 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390 are set, either both UTFs are supported or both are not supported. */
2391
2392 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393 printf(" 8-bit and 16-bit support\n");
2394 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395 if (rc)
2396 printf(" UTF-8 and UTF-16 support\n");
2397 else
2398 printf(" No UTF-8 or UTF-16 support\n");
2399 #elif defined SUPPORT_PCRE8
2400 printf(" 8-bit support only\n");
2401 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402 printf(" %sUTF-8 support\n", rc? "" : "No ");
2403 #else
2404 printf(" 16-bit support only\n");
2405 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406 printf(" %sUTF-16 support\n", rc? "" : "No ");
2407 #endif
2408
2409 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410 printf(" %sUnicode properties support\n", rc? "" : "No ");
2411 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412 if (rc)
2413 {
2414 const char *arch;
2415 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416 printf(" Just-in-time compiler support: %s\n", arch);
2417 }
2418 else
2419 printf(" No just-in-time compiler support\n");
2420 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421 /* Note that these values are always the ASCII values, even
2422 in EBCDIC environments. CR is 13 and NL is 10. */
2423 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2424 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425 (rc == -2)? "ANYCRLF" :
2426 (rc == -1)? "ANY" : "???");
2427 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429 "all Unicode newlines");
2430 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431 printf(" Internal link size = %d\n", rc);
2432 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433 printf(" POSIX malloc threshold = %d\n", rc);
2434 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435 printf(" Default match limit = %ld\n", lrc);
2436 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437 printf(" Default recursion depth limit = %ld\n", lrc);
2438 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439 printf(" Match recursion uses %s", rc? "stack" : "heap");
2440 if (showstore)
2441 {
2442 PCRE_EXEC(stack_size, NULL, NULL, NULL, -1, -1, 0, NULL, 0);
2443 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2444 }
2445 printf("\n");
2446 goto EXIT;
2447 }
2448 else if (strcmp(argv[op], "-help") == 0 ||
2449 strcmp(argv[op], "--help") == 0)
2450 {
2451 usage();
2452 goto EXIT;
2453 }
2454 else
2455 {
2456 printf("** Unknown or malformed option %s\n", argv[op]);
2457 usage();
2458 yield = 1;
2459 goto EXIT;
2460 }
2461 op++;
2462 argc--;
2463 }
2464
2465 /* Get the store for the offsets vector, and remember what it was */
2466
2467 size_offsets_max = size_offsets;
2468 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2469 if (offsets == NULL)
2470 {
2471 printf("** Failed to get %d bytes of memory for offsets vector\n",
2472 (int)(size_offsets_max * sizeof(int)));
2473 yield = 1;
2474 goto EXIT;
2475 }
2476
2477 /* Sort out the input and output files */
2478
2479 if (argc > 1)
2480 {
2481 infile = fopen(argv[op], INPUT_MODE);
2482 if (infile == NULL)
2483 {
2484 printf("** Failed to open %s\n", argv[op]);
2485 yield = 1;
2486 goto EXIT;
2487 }
2488 }
2489
2490 if (argc > 2)
2491 {
2492 outfile = fopen(argv[op+1], OUTPUT_MODE);
2493 if (outfile == NULL)
2494 {
2495 printf("** Failed to open %s\n", argv[op+1]);
2496 yield = 1;
2497 goto EXIT;
2498 }
2499 }
2500
2501 /* Set alternative malloc function */
2502
2503 #ifdef SUPPORT_PCRE8
2504 pcre_malloc = new_malloc;
2505 pcre_free = new_free;
2506 pcre_stack_malloc = stack_malloc;
2507 pcre_stack_free = stack_free;
2508 #endif
2509
2510 #ifdef SUPPORT_PCRE16
2511 pcre16_malloc = new_malloc;
2512 pcre16_free = new_free;
2513 pcre16_stack_malloc = stack_malloc;
2514 pcre16_stack_free = stack_free;
2515 #endif
2516
2517 /* Heading line unless quiet, then prompt for first regex if stdin */
2518
2519 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2520
2521 /* Main loop */
2522
2523 while (!done)
2524 {
2525 pcre *re = NULL;
2526 pcre_extra *extra = NULL;
2527
2528 #if !defined NOPOSIX /* There are still compilers that require no indent */
2529 regex_t preg;
2530 int do_posix = 0;
2531 #endif
2532
2533 const char *error;
2534 pcre_uint8 *markptr;
2535 pcre_uint8 *p, *pp, *ppp;
2536 pcre_uint8 *to_file = NULL;
2537 const pcre_uint8 *tables = NULL;
2538 unsigned long int get_options;
2539 unsigned long int true_size, true_study_size = 0;
2540 size_t size, regex_gotten_store;
2541 int do_allcaps = 0;
2542 int do_mark = 0;
2543 int do_study = 0;
2544 int no_force_study = 0;
2545 int do_debug = debug;
2546 int do_G = 0;
2547 int do_g = 0;
2548 int do_showinfo = showinfo;
2549 int do_showrest = 0;
2550 int do_showcaprest = 0;
2551 int do_flip = 0;
2552 int erroroffset, len, delimiter, poffset;
2553
2554 use_utf = 0;
2555 debug_lengths = 1;
2556
2557 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2558 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2559 fflush(outfile);
2560
2561 p = buffer;
2562 while (isspace(*p)) p++;
2563 if (*p == 0) continue;
2564
2565 /* See if the pattern is to be loaded pre-compiled from a file. */
2566
2567 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2568 {
2569 pcre_uint32 magic;
2570 pcre_uint8 sbuf[8];
2571 FILE *f;
2572
2573 p++;
2574 if (*p == '!')
2575 {
2576 do_debug = TRUE;
2577 do_showinfo = TRUE;
2578 p++;
2579 }
2580
2581 pp = p + (int)strlen((char *)p);
2582 while (isspace(pp[-1])) pp--;
2583 *pp = 0;
2584
2585 f = fopen((char *)p, "rb");
2586 if (f == NULL)
2587 {
2588 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2589 continue;
2590 }
2591
2592 first_gotten_store = 0;
2593 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2594
2595 true_size =
2596 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2597 true_study_size =
2598 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2599
2600 re = (pcre *)new_malloc(true_size);
2601 regex_gotten_store = first_gotten_store;
2602
2603 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2604
2605 magic = ((REAL_PCRE *)re)->magic_number;
2606 if (magic != MAGIC_NUMBER)
2607 {
2608 if (swap_uint32(magic) == MAGIC_NUMBER)
2609 {
2610 do_flip = 1;
2611 }
2612 else
2613 {
2614 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2615 fclose(f);
2616 continue;
2617 }
2618 }
2619
2620 /* We hide the byte-invert info for little and big endian tests. */
2621 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2622 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2623
2624 /* Now see if there is any following study data. */
2625
2626 if (true_study_size != 0)
2627 {
2628 pcre_study_data *psd;
2629
2630 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2631 extra->flags = PCRE_EXTRA_STUDY_DATA;
2632
2633 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2634 extra->study_data = psd;
2635
2636 if (fread(psd, 1, true_study_size, f) != true_study_size)
2637 {
2638 FAIL_READ:
2639 fprintf(outfile, "Failed to read data from %s\n", p);
2640 if (extra != NULL)
2641 {
2642 PCRE_FREE_STUDY(extra);
2643 }
2644 if (re != NULL) new_free(re);
2645 fclose(f);
2646 continue;
2647 }
2648 fprintf(outfile, "Study data loaded from %s\n", p);
2649 do_study = 1; /* To get the data output if requested */
2650 }
2651 else fprintf(outfile, "No study data\n");
2652
2653 /* Flip the necessary bytes. */
2654 if (do_flip)
2655 {
2656 int rc;
2657 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2658 if (rc == PCRE_ERROR_BADMODE)
2659 {
2660 /* Simulate the result of the function call below. */
2661 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2662 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2663 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2664 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2665 continue;
2666 }
2667 }
2668
2669 /* Need to know if UTF-8 for printing data strings. */
2670
2671 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2672 use_utf = (get_options & PCRE_UTF8) != 0;
2673
2674 fclose(f);
2675 goto SHOW_INFO;
2676 }
2677
2678 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2679 the pattern; if it isn't complete, read more. */
2680
2681 delimiter = *p++;
2682
2683 if (isalnum(delimiter) || delimiter == '\\')
2684 {
2685 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2686 goto SKIP_DATA;
2687 }
2688
2689 pp = p;
2690 poffset = (int)(p - buffer);
2691
2692 for(;;)
2693 {
2694 while (*pp != 0)
2695 {
2696 if (*pp == '\\' && pp[1] != 0) pp++;
2697 else if (*pp == delimiter) break;
2698 pp++;
2699 }
2700 if (*pp != 0) break;
2701 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2702 {
2703 fprintf(outfile, "** Unexpected EOF\n");
2704 done = 1;
2705 goto CONTINUE;
2706 }
2707 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2708 }
2709
2710 /* The buffer may have moved while being extended; reset the start of data
2711 pointer to the correct relative point in the buffer. */
2712
2713 p = buffer + poffset;
2714
2715 /* If the first character after the delimiter is backslash, make
2716 the pattern end with backslash. This is purely to provide a way
2717 of testing for the error message when a pattern ends with backslash. */
2718
2719 if (pp[1] == '\\') *pp++ = '\\';
2720
2721 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2722 for callouts. */
2723
2724 *pp++ = 0;
2725 strcpy((char *)pbuffer, (char *)p);
2726
2727 /* Look for options after final delimiter */
2728
2729 options = 0;
2730 study_options = 0;
2731 log_store = showstore; /* default from command line */
2732
2733 while (*pp != 0)
2734 {
2735 switch (*pp++)
2736 {
2737 case 'f': options |= PCRE_FIRSTLINE; break;
2738 case 'g': do_g = 1; break;
2739 case 'i': options |= PCRE_CASELESS; break;
2740 case 'm': options |= PCRE_MULTILINE; break;
2741 case 's': options |= PCRE_DOTALL; break;
2742 case 'x': options |= PCRE_EXTENDED; break;
2743
2744 case '+':
2745 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2746 break;
2747
2748 case '=': do_allcaps = 1; break;
2749 case 'A': options |= PCRE_ANCHORED; break;
2750 case 'B': do_debug = 1; break;
2751 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2752 case 'D': do_debug = do_showinfo = 1; break;
2753 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2754 case 'F': do_flip = 1; break;
2755 case 'G': do_G = 1; break;
2756 case 'I': do_showinfo = 1; break;
2757 case 'J': options |= PCRE_DUPNAMES; break;
2758 case 'K': do_mark = 1; break;
2759 case 'M': log_store = 1; break;
2760 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2761
2762 #if !defined NOPOSIX
2763 case 'P': do_posix = 1; break;
2764 #endif
2765
2766 case 'S':
2767 if (do_study == 0)
2768 {
2769 do_study = 1;
2770 if (*pp == '+')
2771 {
2772 study_options |= PCRE_STUDY_JIT_COMPILE;
2773 pp++;
2774 }
2775 }
2776 else
2777 {
2778 do_study = 0;
2779 no_force_study = 1;
2780 }
2781 break;
2782
2783 case 'U': options |= PCRE_UNGREEDY; break;
2784 case 'W': options |= PCRE_UCP; break;
2785 case 'X': options |= PCRE_EXTRA; break;
2786 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2787 case 'Z': debug_lengths = 0; break;
2788 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2789 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2790
2791 case 'T':
2792 switch (*pp++)
2793 {
2794 case '0': tables = tables0; break;
2795 case '1': tables = tables1; break;
2796
2797 case '\r':
2798 case '\n':
2799 case ' ':
2800 case 0:
2801 fprintf(outfile, "** Missing table number after /T\n");
2802 goto SKIP_DATA;
2803
2804 default:
2805 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2806 goto SKIP_DATA;
2807 }
2808 break;
2809
2810 case 'L':
2811 ppp = pp;
2812 /* The '\r' test here is so that it works on Windows. */
2813 /* The '0' test is just in case this is an unterminated line. */
2814 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2815 *ppp = 0;
2816 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2817 {
2818 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2819 goto SKIP_DATA;
2820 }
2821 locale_set = 1;
2822 tables = PCRE_MAKETABLES;
2823 pp = ppp;
2824 break;
2825
2826 case '>':
2827 to_file = pp;
2828 while (*pp != 0) pp++;
2829 while (isspace(pp[-1])) pp--;
2830 *pp = 0;
2831 break;
2832
2833 case '<':
2834 {
2835 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2836 {
2837 options |= PCRE_JAVASCRIPT_COMPAT;
2838 pp += 3;
2839 }
2840 else
2841 {
2842 int x = check_newline(pp, outfile);
2843 if (x == 0) goto SKIP_DATA;
2844 options |= x;
2845 while (*pp++ != '>');
2846 }
2847 }
2848 break;
2849
2850 case '\r': /* So that it works in Windows */
2851 case '\n':
2852 case ' ':
2853 break;
2854
2855 default:
2856 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2857 goto SKIP_DATA;
2858 }
2859 }
2860
2861 /* Handle compiling via the POSIX interface, which doesn't support the
2862 timing, showing, or debugging options, nor the ability to pass over
2863 local character tables. Neither does it have 16-bit support. */
2864
2865 #if !defined NOPOSIX
2866 if (posix || do_posix)
2867 {
2868 int rc;
2869 int cflags = 0;
2870
2871 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2872 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2873 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2874 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2875 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2876 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2877 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2878
2879 first_gotten_store = 0;
2880 rc = regcomp(&preg, (char *)p, cflags);
2881
2882 /* Compilation failed; go back for another re, skipping to blank line
2883 if non-interactive. */
2884
2885 if (rc != 0)
2886 {
2887 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2888 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2889 goto SKIP_DATA;
2890 }
2891 }
2892
2893 /* Handle compiling via the native interface */
2894
2895 else
2896 #endif /* !defined NOPOSIX */
2897
2898 {
2899 /* In 16-bit mode, convert the input. */
2900
2901 #ifdef SUPPORT_PCRE16
2902 if (use_pcre16)
2903 {
2904 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2905 {
2906 case -1:
2907 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2908 "converted to UTF-16\n");
2909 goto SKIP_DATA;
2910
2911 case -2:
2912 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2913 "cannot be converted to UTF-16\n");
2914 goto SKIP_DATA;
2915
2916 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2917 fprintf(outfile, "**Failed: character value greater than 0xffff "
2918 "cannot be converted to 16-bit in non-UTF mode\n");
2919 goto SKIP_DATA;
2920
2921 default:
2922 break;
2923 }
2924 p = (pcre_uint8 *)buffer16;
2925 }
2926 #endif
2927
2928 /* Compile many times when timing */
2929
2930 if (timeit > 0)
2931 {
2932 register int i;
2933 clock_t time_taken;
2934 clock_t start_time = clock();
2935 for (i = 0; i < timeit; i++)
2936 {
2937 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2938 if (re != NULL) free(re);
2939 }
2940 time_taken = clock() - start_time;
2941 fprintf(outfile, "Compile time %.4f milliseconds\n",
2942 (((double)time_taken * 1000.0) / (double)timeit) /
2943 (double)CLOCKS_PER_SEC);
2944 }
2945
2946 first_gotten_store = 0;
2947 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2948
2949 /* Compilation failed; go back for another re, skipping to blank line
2950 if non-interactive. */
2951
2952 if (re == NULL)
2953 {
2954 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2955 SKIP_DATA:
2956 if (infile != stdin)
2957 {
2958 for (;;)
2959 {
2960 if (extend_inputline(infile, buffer, NULL) == NULL)
2961 {
2962 done = 1;
2963 goto CONTINUE;
2964 }
2965 len = (int)strlen((char *)buffer);
2966 while (len > 0 && isspace(buffer[len-1])) len--;
2967 if (len == 0) break;
2968 }
2969 fprintf(outfile, "\n");
2970 }
2971 goto CONTINUE;
2972 }
2973
2974 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2975 within the regex; check for this so that we know how to process the data
2976 lines. */
2977
2978 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2979 goto SKIP_DATA;
2980 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2981
2982 /* Extract the size for possible writing before possibly flipping it,
2983 and remember the store that was got. */
2984
2985 true_size = ((REAL_PCRE *)re)->size;
2986 regex_gotten_store = first_gotten_store;
2987
2988 /* Output code size information if requested */
2989
2990 if (log_store)
2991 fprintf(outfile, "Memory allocation (code space): %d\n",
2992 (int)(first_gotten_store -
2993 sizeof(REAL_PCRE) -
2994 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2995
2996 /* If -s or /S was present, study the regex to generate additional info to
2997 help with the matching, unless the pattern has the SS option, which
2998 suppresses the effect of /S (used for a few test patterns where studying is
2999 never sensible). */
3000
3001 if (do_study || (force_study >= 0 && !no_force_study))
3002 {
3003 if (timeit > 0)
3004 {
3005 register int i;
3006 clock_t time_taken;
3007 clock_t start_time = clock();
3008 for (i = 0; i < timeit; i++)
3009 {
3010 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3011 }
3012 time_taken = clock() - start_time;
3013 if (extra != NULL)
3014 {
3015 PCRE_FREE_STUDY(extra);
3016 }
3017 fprintf(outfile, " Study time %.4f milliseconds\n",
3018 (((double)time_taken * 1000.0) / (double)timeit) /
3019 (double)CLOCKS_PER_SEC);
3020 }
3021 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3022 if (error != NULL)
3023 fprintf(outfile, "Failed to study: %s\n", error);
3024 else if (extra != NULL)
3025 {
3026 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3027 if (log_store)
3028 {
3029 size_t jitsize;
3030 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3031 jitsize != 0)
3032 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3033 }
3034 }
3035 }
3036
3037 /* If /K was present, we set up for handling MARK data. */
3038
3039 if (do_mark)
3040 {
3041 if (extra == NULL)
3042 {
3043 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3044 extra->flags = 0;
3045 }
3046 extra->mark = &markptr;
3047 extra->flags |= PCRE_EXTRA_MARK;
3048 }
3049
3050 /* Extract and display information from the compiled data if required. */
3051
3052 SHOW_INFO:
3053
3054 if (do_debug)
3055 {
3056 fprintf(outfile, "------------------------------------------------------------------\n");
3057 PCRE_PRINTINT(re, outfile, debug_lengths);
3058 }
3059
3060 /* We already have the options in get_options (see above) */
3061
3062 if (do_showinfo)
3063 {
3064 unsigned long int all_options;
3065 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3066 hascrorlf;
3067 int nameentrysize, namecount;
3068 const pcre_uint8 *nametable;
3069
3070 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3071 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3072 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3073 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3074 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3075 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3076 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3077 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3078 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3079 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3080 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3081 != 0)
3082 goto SKIP_DATA;
3083
3084 if (size != regex_gotten_store) fprintf(outfile,
3085 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3086 (int)size, (int)regex_gotten_store);
3087
3088 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3089 if (backrefmax > 0)
3090 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3091
3092 if (namecount > 0)
3093 {
3094 fprintf(outfile, "Named capturing subpatterns:\n");
3095 while (namecount-- > 0)
3096 {
3097 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3098 int imm2_size = use_pcre16 ? 1 : 2;
3099 #else
3100 int imm2_size = IMM2_SIZE;
3101 #endif
3102 int length = (int)STRLEN(nametable + imm2_size);
3103 fprintf(outfile, " ");
3104 PCHARSV(nametable, imm2_size, length, outfile);
3105 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3106 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3107 fprintf(outfile, "%3d\n", use_pcre16?
3108 (int)(((PCRE_SPTR16)nametable)[0])
3109 :((int)nametable[0] << 8) | (int)nametable[1]);
3110 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3111 #else
3112 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3113 #ifdef SUPPORT_PCRE8
3114 nametable += nameentrysize;
3115 #else
3116 nametable += nameentrysize * 2;
3117 #endif
3118 #endif
3119 }
3120 }
3121
3122 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3123 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3124
3125 all_options = ((REAL_PCRE *)re)->options;
3126 if (do_flip) all_options = swap_uint32(all_options);
3127
3128 if (get_options == 0) fprintf(outfile, "No options\n");
3129 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3130 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3131 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3132 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3133 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3134 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3135 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3136 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3137 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3138 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3139 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3140 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3141 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3142 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3143 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3144 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3145 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3146 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3147
3148 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3149
3150 switch (get_options & PCRE_NEWLINE_BITS)
3151 {
3152 case PCRE_NEWLINE_CR:
3153 fprintf(outfile, "Forced newline sequence: CR\n");
3154 break;
3155
3156 case PCRE_NEWLINE_LF:
3157 fprintf(outfile, "Forced newline sequence: LF\n");
3158 break;
3159
3160 case PCRE_NEWLINE_CRLF:
3161 fprintf(outfile, "Forced newline sequence: CRLF\n");
3162 break;
3163
3164 case PCRE_NEWLINE_ANYCRLF:
3165 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3166 break;
3167
3168 case PCRE_NEWLINE_ANY:
3169 fprintf(outfile, "Forced newline sequence: ANY\n");
3170 break;
3171
3172 default:
3173 break;
3174 }
3175
3176 if (first_char == -1)
3177 {
3178 fprintf(outfile, "First char at start or follows newline\n");
3179 }
3180 else if (first_char < 0)
3181 {
3182 fprintf(outfile, "No first char\n");
3183 }
3184 else
3185 {
3186 const char *caseless =
3187 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3188 "" : " (caseless)";
3189
3190 if (PRINTOK(first_char))
3191 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3192 else
3193 {
3194 fprintf(outfile, "First char = ");
3195 pchar(first_char, outfile);
3196 fprintf(outfile, "%s\n", caseless);
3197 }
3198 }
3199
3200 if (need_char < 0)
3201 {
3202 fprintf(outfile, "No need char\n");
3203 }
3204 else
3205 {
3206 const char *caseless =
3207 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3208 "" : " (caseless)";
3209
3210 if (PRINTOK(need_char))
3211 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3212 else
3213 {
3214 fprintf(outfile, "Need char = ");
3215 pchar(need_char, outfile);
3216 fprintf(outfile, "%s\n", caseless);
3217 }
3218 }
3219
3220 /* Don't output study size; at present it is in any case a fixed
3221 value, but it varies, depending on the computer architecture, and
3222 so messes up the test suite. (And with the /F option, it might be
3223 flipped.) If study was forced by an external -s, don't show this
3224 information unless -i or -d was also present. This means that, except
3225 when auto-callouts are involved, the output from runs with and without
3226 -s should be identical. */
3227
3228 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3229 {
3230 if (extra == NULL)
3231 fprintf(outfile, "Study returned NULL\n");
3232 else
3233 {
3234 pcre_uint8 *start_bits = NULL;
3235 int minlength;
3236
3237 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3238 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3239
3240 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3241 {
3242 if (start_bits == NULL)
3243 fprintf(outfile, "No set of starting bytes\n");
3244 else
3245 {
3246 int i;
3247 int c = 24;
3248 fprintf(outfile, "Starting byte set: ");
3249 for (i = 0; i < 256; i++)
3250 {
3251 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3252 {
3253 if (c > 75)
3254 {
3255 fprintf(outfile, "\n ");
3256 c = 2;
3257 }
3258 if (PRINTOK(i) && i != ' ')
3259 {
3260 fprintf(outfile, "%c ", i);
3261 c += 2;
3262 }
3263 else
3264 {
3265 fprintf(outfile, "\\x%02x ", i);
3266 c += 5;
3267 }
3268 }
3269 }
3270 fprintf(outfile, "\n");
3271 }
3272 }
3273 }
3274
3275 /* Show this only if the JIT was set by /S, not by -s. */
3276
3277 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3278 {
3279 int jit;
3280 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3281 {
3282 if (jit)
3283 fprintf(outfile, "JIT study was successful\n");
3284 else
3285 #ifdef SUPPORT_JIT
3286 fprintf(outfile, "JIT study was not successful\n");
3287 #else
3288 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3289 #endif
3290 }
3291 }
3292 }
3293 }
3294
3295 /* If the '>' option was present, we write out the regex to a file, and
3296 that is all. The first 8 bytes of the file are the regex length and then
3297 the study length, in big-endian order. */
3298
3299 if (to_file != NULL)
3300 {
3301 FILE *f = fopen((char *)to_file, "wb");
3302 if (f == NULL)
3303 {
3304 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3305 }
3306 else
3307 {
3308 pcre_uint8 sbuf[8];
3309
3310 if (do_flip) regexflip(re, extra);
3311 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3312 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3313 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3314 sbuf[3] = (pcre_uint8)((true_size) & 255);
3315 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3316 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3317 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3318 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3319
3320 if (fwrite(sbuf, 1, 8, f) < 8 ||
3321 fwrite(re, 1, true_size, f) < true_size)
3322 {
3323 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3324 }
3325 else
3326 {
3327 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3328
3329 /* If there is study data, write it. */
3330
3331 if (extra != NULL)
3332 {
3333 if (fwrite(extra->study_data, 1, true_study_size, f) <
3334 true_study_size)
3335 {
3336 fprintf(outfile, "Write error on %s: %s\n", to_file,
3337 strerror(errno));
3338 }
3339 else fprintf(outfile, "Study data written to %s\n", to_file);
3340 }
3341 }
3342 fclose(f);
3343 }
3344
3345 new_free(re);
3346 if (extra != NULL)
3347 {
3348 PCRE_FREE_STUDY(extra);
3349 }
3350 if (locale_set)
3351 {
3352 new_free((void *)tables);
3353 setlocale(LC_CTYPE, "C");
3354 locale_set = 0;
3355 }
3356 continue; /* With next regex */
3357 }
3358 } /* End of non-POSIX compile */
3359
3360 /* Read data lines and test them */
3361
3362 for (;;)
3363 {
3364 pcre_uint8 *q;
3365 pcre_uint8 *bptr;
3366 int *use_offsets = offsets;
3367 int use_size_offsets = size_offsets;
3368 int callout_data = 0;
3369 int callout_data_set = 0;
3370 int count, c;
3371 int copystrings = 0;
3372 int find_match_limit = default_find_match_limit;
3373 int getstrings = 0;
3374 int getlist = 0;
3375 int gmatched = 0;
3376 int start_offset = 0;
3377 int start_offset_sign = 1;
3378 int g_notempty = 0;
3379 int use_dfa = 0;
3380
3381 *copynames = 0;
3382 *getnames = 0;
3383
3384 #ifdef SUPPORT_PCRE16
3385 cn16ptr = copynames;
3386 gn16ptr = getnames;
3387 #endif
3388 #ifdef SUPPORT_PCRE8
3389 cn8ptr = copynames8;
3390 gn8ptr = getnames8;
3391 #endif
3392
3393 SET_PCRE_CALLOUT(callout);
3394 first_callout = 1;
3395 last_callout_mark = NULL;
3396 callout_extra = 0;
3397 callout_count = 0;
3398 callout_fail_count = 999999;
3399 callout_fail_id = -1;
3400 show_malloc = 0;
3401 options = 0;
3402
3403 if (extra != NULL) extra->flags &=
3404 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3405
3406 len = 0;
3407 for (;;)
3408 {
3409 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3410 {
3411 if (len > 0) /* Reached EOF without hitting a newline */
3412 {
3413 fprintf(outfile, "\n");
3414 break;
3415 }
3416 done = 1;
3417 goto CONTINUE;
3418 }
3419 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3420 len = (int)strlen((char *)buffer);
3421 if (buffer[len-1] == '\n') break;
3422 }
3423
3424 while (len > 0 && isspace(buffer[len-1])) len--;
3425 buffer[len] = 0;
3426 if (len == 0) break;
3427
3428 p = buffer;
3429 while (isspace(*p)) p++;
3430
3431 bptr = q = dbuffer;
3432 while ((c = *p++) != 0)
3433 {
3434 int i = 0;
3435 int n = 0;
3436
3437 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3438 In non-UTF mode, allow the value of the byte to fall through to later,
3439 where values greater than 127 are turned into UTF-8 when running in
3440 16-bit mode. */
3441
3442 if (c != '\\')
3443 {
3444 if (use_utf)
3445 {
3446 *q++ = c;
3447 continue;
3448 }
3449 }
3450
3451 /* Handle backslash escapes */
3452
3453 else switch ((c = *p++))
3454 {
3455 case 'a': c = 7; break;
3456 case 'b': c = '\b'; break;
3457 case 'e': c = 27; break;
3458 case 'f': c = '\f'; break;
3459 case 'n': c = '\n'; break;
3460 case 'r': c = '\r'; break;
3461 case 't': c = '\t'; break;
3462 case 'v': c = '\v'; break;
3463
3464 case '0': case '1': case '2': case '3':
3465 case '4': case '5': case '6': case '7':
3466 c -= '0';
3467 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3468 c = c * 8 + *p++ - '0';
3469 break;
3470
3471 case 'x':
3472 if (*p == '{')
3473 {
3474 pcre_uint8 *pt = p;
3475 c = 0;
3476
3477 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3478 when isxdigit() is a macro that refers to its argument more than
3479 once. This is banned by the C Standard, but apparently happens in at
3480 least one MacOS environment. */
3481
3482 for (pt++; isxdigit(*pt); pt++)
3483 {
3484 if (++i == 9)
3485 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3486 "using only the first eight.\n");
3487 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3488 }
3489 if (*pt == '}')
3490 {
3491 p = pt + 1;
3492 break;
3493 }
3494 /* Not correct form for \x{...}; fall through */
3495 }
3496
3497 /* \x without {} always defines just one byte in 8-bit mode. This
3498 allows UTF-8 characters to be constructed byte by byte, and also allows
3499 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3500 Otherwise, pass it down to later code so that it can be turned into
3501 UTF-8 when running in 16-bit mode. */
3502
3503 c = 0;
3504 while (i++ < 2 && isxdigit(*p))
3505 {
3506 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3507 p++;
3508 }
3509 if (use_utf)
3510 {
3511 *q++ = c;
3512 continue;
3513 }
3514 break;
3515
3516 case 0: /* \ followed by EOF allows for an empty line */
3517 p--;
3518 continue;
3519
3520 case '>':
3521 if (*p == '-')
3522 {
3523 start_offset_sign = -1;
3524 p++;
3525 }
3526 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3527 start_offset *= start_offset_sign;
3528 continue;
3529
3530 case 'A': /* Option setting */
3531 options |= PCRE_ANCHORED;
3532 continue;
3533
3534 case 'B':
3535 options |= PCRE_NOTBOL;
3536 continue;
3537
3538 case 'C':
3539 if (isdigit(*p)) /* Set copy string */
3540 {
3541 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3542 copystrings |= 1 << n;
3543 }
3544 else if (isalnum(*p))
3545 {
3546 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3547 }
3548 else if (*p == '+')
3549 {
3550 callout_extra = 1;
3551 p++;
3552 }
3553 else if (*p == '-')
3554 {
3555 SET_PCRE_CALLOUT(NULL);
3556 p++;
3557 }
3558 else if (*p == '!')
3559 {
3560 callout_fail_id = 0;
3561 p++;
3562 while(isdigit(*p))
3563 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3564 callout_fail_count = 0;
3565 if (*p == '!')
3566 {
3567 p++;
3568 while(isdigit(*p))
3569 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3570 }
3571 }
3572 else if (*p == '*')
3573 {
3574 int sign = 1;
3575 callout_data = 0;
3576 if (*(++p) == '-') { sign = -1; p++; }
3577 while(isdigit(*p))
3578 callout_data = callout_data * 10 + *p++ - '0';
3579 callout_data *= sign;
3580 callout_data_set = 1;
3581 }
3582 continue;
3583
3584 #if !defined NODFA
3585 case 'D':
3586 #if !defined NOPOSIX
3587 if (posix || do_posix)
3588 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3589 else
3590 #endif
3591 use_dfa = 1;
3592 continue;
3593 #endif
3594
3595 #if !defined NODFA
3596 case 'F':
3597 options |= PCRE_DFA_SHORTEST;
3598 continue;
3599 #endif
3600
3601 case 'G':
3602 if (isdigit(*p))
3603 {
3604 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3605 getstrings |= 1 << n;
3606 }
3607 else if (isalnum(*p))
3608 {
3609 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3610 }
3611 continue;
3612
3613 case 'J':
3614 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3615 if (extra != NULL
3616 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3617 && extra->executable_jit != NULL)
3618 {
3619 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3620 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3621 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3622 }
3623 continue;
3624
3625 case 'L':
3626 getlist = 1;
3627 continue;
3628
3629 case 'M':
3630 find_match_limit = 1;
3631 continue;
3632
3633 case 'N':
3634 if ((options & PCRE_NOTEMPTY) != 0)
3635 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3636 else
3637 options |= PCRE_NOTEMPTY;
3638 continue;
3639
3640 case 'O':
3641 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3642 if (n > size_offsets_max)
3643 {
3644 size_offsets_max = n;
3645 free(offsets);
3646 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3647 if (offsets == NULL)
3648 {
3649 printf("** Failed to get %d bytes of memory for offsets vector\n",
3650 (int)(size_offsets_max * sizeof(int)));
3651 yield = 1;
3652 goto EXIT;
3653 }
3654 }
3655 use_size_offsets = n;
3656 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3657 continue;
3658
3659 case 'P':
3660 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3661 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3662 continue;
3663
3664 case 'Q':
3665 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3666 if (extra == NULL)
3667 {
3668 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3669 extra->flags = 0;
3670 }
3671 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3672 extra->match_limit_recursion = n;
3673 continue;
3674
3675 case 'q':
3676 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3677 if (extra == NULL)
3678 {
3679 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3680 extra->flags = 0;
3681 }
3682 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3683 extra->match_limit = n;
3684 continue;
3685
3686 #if !defined NODFA
3687 case 'R':
3688 options |= PCRE_DFA_RESTART;
3689 continue;
3690 #endif
3691
3692 case 'S':
3693 show_malloc = 1;
3694 continue;
3695
3696 case 'Y':
3697 options |= PCRE_NO_START_OPTIMIZE;
3698 continue;
3699
3700 case 'Z':
3701 options |= PCRE_NOTEOL;
3702 continue;
3703
3704 case '?':
3705 options |= PCRE_NO_UTF8_CHECK;
3706 continue;
3707
3708 case '<':
3709 {
3710 int x = check_newline(p, outfile);
3711 if (x == 0) goto NEXT_DATA;
3712 options |= x;
3713 while (*p++ != '>');
3714 }
3715 continue;
3716 }
3717
3718 /* We now have a character value in c that may be greater than 255. In
3719 16-bit mode, we always convert characters to UTF-8 so that values greater
3720 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3721 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3722 mode must have come from \x{...} or octal constructs because values from
3723 \x.. get this far only in non-UTF mode. */
3724
3725 #if !defined NOUTF || defined SUPPORT_PCRE16
3726 if (use_pcre16 || use_utf)
3727 {
3728 pcre_uint8 buff8[8];
3729 int ii, utn;
3730 utn = ord2utf8(c, buff8);
3731 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3732 }
3733 else
3734 #endif
3735 {
3736 if (c > 255)
3737 {
3738 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3739 "and UTF-8 mode is not enabled.\n", c);
3740 fprintf(outfile, "** Truncation will probably give the wrong "
3741 "result.\n");
3742 }
3743 *q++ = c;
3744 }
3745 }
3746
3747 /* Reached end of subject string */
3748
3749 *q = 0;
3750 len = (int)(q - dbuffer);
3751
3752 /* Move the data to the end of the buffer so that a read over the end of
3753 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3754 we are using the POSIX interface, we must include the terminating zero. */
3755
3756 #if !defined NOPOSIX
3757 if (posix || do_posix)
3758 {
3759 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3760 bptr += buffer_size - len - 1;
3761 }
3762 else
3763 #endif
3764 {
3765 memmove(bptr + buffer_size - len, bptr, len);
3766 bptr += buffer_size - len;
3767 }
3768
3769 if ((all_use_dfa || use_dfa) && find_match_limit)
3770 {
3771 printf("**Match limit not relevant for DFA matching: ignored\n");
3772 find_match_limit = 0;
3773 }
3774
3775 /* Handle matching via the POSIX interface, which does not
3776 support timing or playing with the match limit or callout data. */
3777
3778 #if !defined NOPOSIX
3779 if (posix || do_posix)
3780 {
3781 int rc;
3782 int eflags = 0;
3783 regmatch_t *pmatch = NULL;
3784 if (use_size_offsets > 0)
3785 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3786 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3787 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3788 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3789
3790 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3791
3792 if (rc != 0)
3793 {
3794 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3795 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3796 }
3797 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3798 != 0)
3799 {
3800 fprintf(outfile, "Matched with REG_NOSUB\n");
3801 }
3802 else
3803 {
3804 size_t i;
3805 for (i = 0; i < (size_t)use_size_offsets; i++)
3806 {
3807 if (pmatch[i].rm_so >= 0)
3808 {
3809 fprintf(outfile, "%2d: ", (int)i);
3810 PCHARSV(dbuffer, pmatch[i].rm_so,
3811 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3812 fprintf(outfile, "\n");
3813 if (do_showcaprest || (i == 0 && do_showrest))
3814 {
3815 fprintf(outfile, "%2d+ ", (int)i);
3816 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3817 outfile);
3818 fprintf(outfile, "\n");
3819 }
3820 }
3821 }
3822 }
3823 free(pmatch);
3824 goto NEXT_DATA;
3825 }
3826
3827 #endif /* !defined NOPOSIX */
3828
3829 /* Handle matching via the native interface - repeats for /g and /G */
3830
3831 #ifdef SUPPORT_PCRE16
3832 if (use_pcre16)
3833 {
3834 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3835 switch(len)
3836 {
3837 case -1:
3838 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3839 "converted to UTF-16\n");
3840 goto NEXT_DATA;
3841
3842 case -2:
3843 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3844 "cannot be converted to UTF-16\n");
3845 goto NEXT_DATA;
3846
3847 case -3:
3848 fprintf(outfile, "**Failed: character value greater than 0xffff "
3849 "cannot be converted to 16-bit in non-UTF mode\n");
3850 goto NEXT_DATA;
3851
3852 default:
3853 break;
3854 }
3855 bptr = (pcre_uint8 *)buffer16;
3856 }
3857 #endif
3858
3859 for (;; gmatched++) /* Loop for /g or /G */
3860 {
3861 markptr = NULL;
3862
3863 if (timeitm > 0)
3864 {
3865 register int i;
3866 clock_t time_taken;
3867 clock_t start_time = clock();
3868
3869 #if !defined NODFA
3870 if (all_use_dfa || use_dfa)
3871 {
3872 int workspace[1000];
3873 for (i = 0; i < timeitm; i++)
3874 {
3875 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3876 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3877 (sizeof(workspace)/sizeof(int)));
3878 }
3879 }
3880 else
3881 #endif
3882
3883 for (i = 0; i < timeitm; i++)
3884 {
3885 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3886 (options | g_notempty), use_offsets, use_size_offsets);
3887 }
3888 time_taken = clock() - start_time;
3889 fprintf(outfile, "Execute time %.4f milliseconds\n",
3890 (((double)time_taken * 1000.0) / (double)timeitm) /
3891 (double)CLOCKS_PER_SEC);
3892 }
3893
3894 /* If find_match_limit is set, we want to do repeated matches with
3895 varying limits in order to find the minimum value for the match limit and
3896 for the recursion limit. The match limits are relevant only to the normal
3897 running of pcre_exec(), so disable the JIT optimization. This makes it
3898 possible to run the same set of tests with and without JIT externally
3899 requested. */
3900
3901 if (find_match_limit)
3902 {
3903 if (extra == NULL)
3904 {
3905 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3906 extra->flags = 0;
3907 }
3908 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3909
3910 (void)check_match_limit(re, extra, bptr, len, start_offset,
3911 options|g_notempty, use_offsets, use_size_offsets,
3912 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3913 PCRE_ERROR_MATCHLIMIT, "match()");
3914
3915 count = check_match_limit(re, extra, bptr, len, start_offset,
3916 options|g_notempty, use_offsets, use_size_offsets,
3917 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3918 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3919 }
3920
3921 /* If callout_data is set, use the interface with additional data */
3922
3923 else if (callout_data_set)
3924 {
3925 if (extra == NULL)
3926 {
3927 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3928 extra->flags = 0;
3929 }
3930 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3931 extra->callout_data = &callout_data;
3932 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3933 options | g_notempty, use_offsets, use_size_offsets);
3934 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3935 }
3936
3937 /* The normal case is just to do the match once, with the default
3938 value of match_limit. */
3939
3940 #if !defined NODFA
3941 else if (all_use_dfa || use_dfa)
3942 {
3943 int workspace[1000];
3944 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3945 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3946 (sizeof(workspace)/sizeof(int)));
3947 if (count == 0)
3948 {
3949 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3950 count = use_size_offsets/2;
3951 }
3952 }
3953 #endif
3954
3955 else
3956 {
3957 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3958 options | g_notempty, use_offsets, use_size_offsets);
3959 if (count == 0)
3960 {
3961 fprintf(outfile, "Matched, but too many substrings\n");
3962 count = use_size_offsets/3;
3963 }
3964 }
3965
3966 /* Matched */
3967
3968 if (count >= 0)
3969 {
3970 int i, maxcount;
3971 void *cnptr, *gnptr;
3972
3973 #if !defined NODFA
3974 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3975 #endif
3976 maxcount = use_size_offsets/3;
3977
3978 /* This is a check against a lunatic return value. */
3979
3980 if (count > maxcount)
3981 {
3982 fprintf(outfile,
3983 "** PCRE error: returned count %d is too big for offset size %d\n",
3984 count, use_size_offsets);
3985 count = use_size_offsets/3;
3986 if (do_g || do_G)
3987 {
3988 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3989 do_g = do_G = FALSE; /* Break g/G loop */
3990 }
3991 }
3992
3993 /* do_allcaps requests showing of all captures in the pattern, to check
3994 unset ones at the end. */
3995
3996 if (do_allcaps)
3997 {
3998 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3999 goto SKIP_DATA;
4000 count++; /* Allow for full match */
4001 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4002 }
4003
4004 /* Output the captured substrings */
4005
4006 for (i = 0; i < count * 2; i += 2)
4007 {
4008 if (use_offsets[i] < 0)
4009 {
4010 if (use_offsets[i] != -1)
4011 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4012 use_offsets[i], i);
4013 if (use_offsets[i+1] != -1)
4014 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4015 use_offsets[i+1], i+1);
4016 fprintf(outfile, "%2d: <unset>\n", i/2);
4017 }
4018 else
4019 {
4020 fprintf(outfile, "%2d: ", i/2);
4021 PCHARSV(bptr, use_offsets[i],
4022 use_offsets[i+1] - use_offsets[i], outfile);
4023 fprintf(outfile, "\n");
4024 if (do_showcaprest || (i == 0 && do_showrest))
4025 {
4026 fprintf(outfile, "%2d+ ", i/2);
4027 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4028 outfile);
4029 fprintf(outfile, "\n");
4030 }
4031 }
4032 }
4033
4034 if (markptr != NULL)
4035 {
4036 fprintf(outfile, "MK: ");
4037 PCHARSV(markptr, 0, -1, outfile);
4038 fprintf(outfile, "\n");
4039 }
4040
4041 for (i = 0; i < 32; i++)
4042 {
4043 if ((copystrings & (1 << i)) != 0)
4044 {
4045 int rc;
4046 char copybuffer[256];
4047 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4048 copybuffer, sizeof(copybuffer));
4049 if (rc < 0)
4050 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4051 else
4052 {
4053 fprintf(outfile, "%2dC ", i);
4054 PCHARSV(copybuffer, 0, rc, outfile);
4055 fprintf(outfile, " (%d)\n", rc);
4056 }
4057 }
4058 }
4059
4060 cnptr = copynames;
4061 for (;;)
4062 {
4063 int rc;
4064 char copybuffer[256];
4065
4066 if (use_pcre16)
4067 {
4068 if (*(pcre_uint16 *)cnptr == 0) break;
4069 }
4070 else
4071 {
4072 if (*(pcre_uint8 *)cnptr == 0) break;
4073 }
4074
4075 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4076 cnptr, copybuffer, sizeof(copybuffer));
4077
4078 if (rc < 0)
4079 {
4080 fprintf(outfile, "copy substring ");
4081 PCHARSV(cnptr, 0, -1, outfile);
4082 fprintf(outfile, " failed %d\n", rc);
4083 }
4084 else
4085 {
4086 fprintf(outfile, " C ");
4087 PCHARSV(copybuffer, 0, rc, outfile);
4088 fprintf(outfile, " (%d) ", rc);
4089 PCHARSV(cnptr, 0, -1, outfile);
4090 putc('\n', outfile);
4091 }
4092
4093 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4094 }
4095
4096 for (i = 0; i < 32; i++)
4097 {
4098 if ((getstrings & (1 << i)) != 0)
4099 {
4100 int rc;
4101 const char *substring;
4102 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4103 if (rc < 0)
4104 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4105 else
4106 {
4107 fprintf(outfile, "%2dG ", i);
4108 PCHARSV(substring, 0, rc, outfile);
4109 fprintf(outfile, " (%d)\n", rc);
4110 PCRE_FREE_SUBSTRING(substring);
4111 }
4112 }
4113 }
4114
4115 gnptr = getnames;
4116 for (;;)
4117 {
4118 int rc;
4119 const char *substring;
4120
4121 if (use_pcre16)
4122 {
4123 if (*(pcre_uint16 *)gnptr == 0) break;
4124 }
4125 else
4126 {
4127 if (*(pcre_uint8 *)gnptr == 0) break;
4128 }
4129
4130 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4131 gnptr, &substring);
4132 if (rc < 0)
4133 {
4134 fprintf(outfile, "get substring ");
4135 PCHARSV(gnptr, 0, -1, outfile);
4136 fprintf(outfile, " failed %d\n", rc);
4137 }
4138 else
4139 {
4140 fprintf(outfile, " G ");
4141 PCHARSV(substring, 0, rc, outfile);
4142 fprintf(outfile, " (%d) ", rc);
4143 PCHARSV(gnptr, 0, -1, outfile);
4144 PCRE_FREE_SUBSTRING(substring);
4145 putc('\n', outfile);
4146 }
4147
4148 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4149 }
4150
4151 if (getlist)
4152 {
4153 int rc;
4154 const char **stringlist;
4155 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4156 if (rc < 0)
4157 fprintf(outfile, "get substring list failed %d\n", rc);
4158 else
4159 {
4160 for (i = 0; i < count; i++)
4161 {
4162 fprintf(outfile, "%2dL ", i);
4163 PCHARSV(stringlist[i], 0, -1, outfile);
4164 putc('\n', outfile);
4165 }
4166 if (stringlist[i] != NULL)
4167 fprintf(outfile, "string list not terminated by NULL\n");
4168 PCRE_FREE_SUBSTRING_LIST(stringlist);
4169 }
4170 }
4171 }
4172
4173 /* There was a partial match */
4174
4175 else if (count == PCRE_ERROR_PARTIAL)
4176 {
4177 if (markptr == NULL) fprintf(outfile, "Partial match");
4178 else
4179 {
4180 fprintf(outfile, "Partial match, mark=");
4181 PCHARSV(markptr, 0, -1, outfile);
4182 }
4183 if (use_size_offsets > 1)
4184 {
4185 fprintf(outfile, ": ");
4186 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4187 outfile);
4188 }
4189 fprintf(outfile, "\n");
4190 break; /* Out of the /g loop */
4191 }
4192
4193 /* Failed to match. If this is a /g or /G loop and we previously set
4194 g_notempty after a null match, this is not necessarily the end. We want
4195 to advance the start offset, and continue. We won't be at the end of the
4196 string - that was checked before setting g_notempty.
4197
4198 Complication arises in the case when the newline convention is "any",
4199 "crlf", or "anycrlf". If the previous match was at the end of a line
4200 terminated by CRLF, an advance of one character just passes the \r,
4201 whereas we should prefer the longer newline sequence, as does the code in
4202 pcre_exec(). Fudge the offset value to achieve this. We check for a
4203 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4204 find the default.
4205
4206 Otherwise, in the case of UTF-8 matching, the advance must be one
4207 character, not one byte. */
4208
4209 else
4210 {
4211 if (g_notempty != 0)
4212 {
4213 int onechar = 1;
4214 unsigned int obits = ((REAL_PCRE *)re)->options;
4215 use_offsets[0] = start_offset;
4216 if ((obits & PCRE_NEWLINE_BITS) == 0)
4217 {
4218 int d;
4219 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4220 /* Note that these values are always the ASCII ones, even in
4221 EBCDIC environments. CR = 13, NL = 10. */
4222 obits = (d == 13)? PCRE_NEWLINE_CR :
4223 (d == 10)? PCRE_NEWLINE_LF :
4224 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4225 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4226 (d == -1)? PCRE_NEWLINE_ANY : 0;
4227 }
4228 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4229 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4230 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4231 &&
4232 start_offset < len - 1 &&
4233 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4234 (use_pcre16?
4235 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4236 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4237 :
4238 bptr[start_offset] == '\r'
4239 && bptr[start_offset + 1] == '\n')
4240 #elif defined SUPPORT_PCRE16
4241 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4242 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4243 #else
4244 bptr[start_offset] == '\r'
4245 && bptr[start_offset + 1] == '\n'
4246 #endif
4247 )
4248 onechar++;
4249 else if (use_utf)
4250 {
4251 while (start_offset + onechar < len)
4252 {
4253 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4254 onechar++;
4255 }
4256 }
4257 use_offsets[1] = start_offset + onechar;
4258 }
4259 else
4260 {
4261 switch(count)
4262 {
4263 case PCRE_ERROR_NOMATCH:
4264 if (gmatched == 0)
4265 {
4266 if (markptr == NULL)
4267 {
4268 fprintf(outfile, "No match\n");
4269 }
4270 else
4271 {
4272 fprintf(outfile, "No match, mark = ");
4273 PCHARSV(markptr, 0, -1, outfile);
4274 putc('\n', outfile);
4275 }
4276 }
4277 break;
4278
4279 case PCRE_ERROR_BADUTF8:
4280 case PCRE_ERROR_SHORTUTF8:
4281 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4282 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4283 use_pcre16? "16" : "8");
4284 if (use_size_offsets >= 2)
4285 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4286 use_offsets[1]);
4287 fprintf(outfile, "\n");
4288 break;
4289
4290 case PCRE_ERROR_BADUTF8_OFFSET:
4291 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4292 use_pcre16? "16" : "8");
4293 break;
4294
4295 default:
4296 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4297 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4298 else
4299 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4300 break;
4301 }
4302
4303 break; /* Out of the /g loop */
4304 }
4305 }
4306
4307 /* If not /g or /G we are done */
4308
4309 if (!do_g && !do_G) break;
4310
4311 /* If we have matched an empty string, first check to see if we are at
4312 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4313 Perl's /g options does. This turns out to be rather cunning. First we set
4314 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4315 same point. If this fails (picked up above) we advance to the next
4316 character. */
4317
4318 g_notempty = 0;
4319
4320 if (use_offsets[0] == use_offsets[1])
4321 {
4322 if (use_offsets[0] == len) break;
4323 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4324 }
4325
4326 /* For /g, update the start offset, leaving the rest alone */
4327
4328 if (do_g) start_offset = use_offsets[1];
4329
4330 /* For /G, update the pointer and length */
4331
4332 else
4333 {
4334 bptr += use_offsets[1] * CHAR_SIZE;
4335 len -= use_offsets[1];
4336 }
4337 } /* End of loop for /g and /G */
4338
4339 NEXT_DATA: continue;
4340 } /* End of loop for data lines */
4341
4342 CONTINUE:
4343
4344 #if !defined NOPOSIX
4345 if (posix || do_posix) regfree(&preg);
4346 #endif
4347
4348 if (re != NULL) new_free(re);
4349 if (extra != NULL)
4350 {
4351 PCRE_FREE_STUDY(extra);
4352 }
4353 if (locale_set)
4354 {
4355 new_free((void *)tables);
4356 setlocale(LC_CTYPE, "C");
4357 locale_set = 0;
4358 }
4359 if (jit_stack != NULL)
4360 {
4361 PCRE_JIT_STACK_FREE(jit_stack);
4362 jit_stack = NULL;
4363 }
4364 }
4365
4366 if (infile == stdin) fprintf(outfile, "\n");
4367
4368 EXIT:
4369
4370 if (infile != NULL && infile != stdin) fclose(infile);
4371 if (outfile != NULL && outfile != stdout) fclose(outfile);
4372
4373 free(buffer);
4374 free(dbuffer);
4375 free(pbuffer);
4376 free(offsets);
4377
4378 #ifdef SUPPORT_PCRE16
4379 if (buffer16 != NULL) free(buffer16);
4380 #endif
4381
4382 return yield;
4383 }
4384
4385 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12