/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1027 - (show annotations) (download)
Mon Sep 3 14:01:38 2012 UTC (2 years ago) by ph10
File MIME type: text/plain
File size: 134614 byte(s)
Small patches for the z/OS port that don't affect other OS.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #define PRIV(name) name
125
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
130
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
134
135 #include "pcre.h"
136
137 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
138 /* Configure internal macros to 16 bit mode. */
139 #define COMPILE_PCRE16
140 #endif
141
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155
156 /* We need access to some of the data tables that PCRE uses. So as not to have
157 to keep two copies, we include the source file here, changing the names of the
158 external symbols to prevent clashes. */
159
160 #define PCRE_INCLUDED
161
162 #include "pcre_tables.c"
163
164 /* The definition of the macro PRINTABLE, which determines whether to print an
165 output character as-is or as a hex value when showing compiled patterns, is
166 the same as in the printint.src file. We uses it here in cases when the locale
167 has not been explicitly changed, so as to get consistent output from systems
168 that differ in their output from isprint() even in the "C" locale. */
169
170 #ifdef EBCDIC
171 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
172 #else
173 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174 #endif
175
176 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
177
178 /* Posix support is disabled in 16 bit only mode. */
179 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
180 #define NOPOSIX
181 #endif
182
183 /* It is possible to compile this test program without including support for
184 testing the POSIX interface, though this is not available via the standard
185 Makefile. */
186
187 #if !defined NOPOSIX
188 #include "pcreposix.h"
189 #endif
190
191 /* It is also possible, originally for the benefit of a version that was
192 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
193 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
194 automatically cut out the UTF support if PCRE is built without it. */
195
196 #ifndef SUPPORT_UTF
197 #ifndef NOUTF
198 #define NOUTF
199 #endif
200 #endif
201
202 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
203 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
204 only from one place and is handled differently). I couldn't dream up any way of
205 using a single macro to do this in a generic way, because of the many different
206 argument requirements. We know that at least one of SUPPORT_PCRE8 and
207 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
208 use these in the definitions of generic macros.
209
210 **** Special note about the PCHARSxxx macros: the address of the string to be
211 printed is always given as two arguments: a base address followed by an offset.
212 The base address is cast to the correct data size for 8 or 16 bit data; the
213 offset is in units of this size. If the string were given as base+offset in one
214 argument, the casting might be incorrectly applied. */
215
216 #ifdef SUPPORT_PCRE8
217
218 #define PCHARS8(lv, p, offset, len, f) \
219 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
220
221 #define PCHARSV8(p, offset, len, f) \
222 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
223
224 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
225 p = read_capture_name8(p, cn8, re)
226
227 #define STRLEN8(p) ((int)strlen((char *)p))
228
229 #define SET_PCRE_CALLOUT8(callout) \
230 pcre_callout = callout
231
232 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
233 pcre_assign_jit_stack(extra, callback, userdata)
234
235 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
236 re = pcre_compile((char *)pat, options, error, erroffset, tables)
237
238 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
239 namesptr, cbuffer, size) \
240 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
241 (char *)namesptr, cbuffer, size)
242
243 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
244 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
245
246 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace) \
248 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
249 offsets, size_offsets, workspace, size_workspace)
250
251 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
252 offsets, size_offsets) \
253 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
254 offsets, size_offsets)
255
256 #define PCRE_FREE_STUDY8(extra) \
257 pcre_free_study(extra)
258
259 #define PCRE_FREE_SUBSTRING8(substring) \
260 pcre_free_substring(substring)
261
262 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
263 pcre_free_substring_list(listptr)
264
265 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
266 getnamesptr, subsptr) \
267 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
268 (char *)getnamesptr, subsptr)
269
270 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
271 n = pcre_get_stringnumber(re, (char *)ptr)
272
273 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
274 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
275
276 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
277 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
278
279 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
280 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
281
282 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
283 pcre_printint(re, outfile, debug_lengths)
284
285 #define PCRE_STUDY8(extra, re, options, error) \
286 extra = pcre_study(re, options, error)
287
288 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
289 pcre_jit_stack_alloc(startsize, maxsize)
290
291 #define PCRE_JIT_STACK_FREE8(stack) \
292 pcre_jit_stack_free(stack)
293
294 #endif /* SUPPORT_PCRE8 */
295
296 /* -----------------------------------------------------------*/
297
298 #ifdef SUPPORT_PCRE16
299
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
302
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
305
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
307 p = read_capture_name16(p, cn16, re)
308
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
310
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
313
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
317
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
321
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
330
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
336
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
341
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
344
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
347
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
350
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
355
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
358
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
362
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
366
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
370
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
373
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
376
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
379
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
382
383 #endif /* SUPPORT_PCRE16 */
384
385
386 /* ----- Both modes are supported; a runtime test is needed, except for
387 pcre_config(), and the JIT stack functions, when it doesn't matter which
388 version is called. ----- */
389
390 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
391
392 #define CHAR_SIZE (use_pcre16? 2:1)
393
394 #define PCHARS(lv, p, offset, len, f) \
395 if (use_pcre16) \
396 PCHARS16(lv, p, offset, len, f); \
397 else \
398 PCHARS8(lv, p, offset, len, f)
399
400 #define PCHARSV(p, offset, len, f) \
401 if (use_pcre16) \
402 PCHARSV16(p, offset, len, f); \
403 else \
404 PCHARSV8(p, offset, len, f)
405
406 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
407 if (use_pcre16) \
408 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
409 else \
410 READ_CAPTURE_NAME8(p, cn8, cn16, re)
411
412 #define SET_PCRE_CALLOUT(callout) \
413 if (use_pcre16) \
414 SET_PCRE_CALLOUT16(callout); \
415 else \
416 SET_PCRE_CALLOUT8(callout)
417
418 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
419
420 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
421 if (use_pcre16) \
422 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
423 else \
424 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
425
426 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
427 if (use_pcre16) \
428 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
429 else \
430 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
431
432 #define PCRE_CONFIG pcre_config
433
434 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
435 namesptr, cbuffer, size) \
436 if (use_pcre16) \
437 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
438 namesptr, cbuffer, size); \
439 else \
440 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
441 namesptr, cbuffer, size)
442
443 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
444 if (use_pcre16) \
445 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
446 else \
447 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
448
449 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets, workspace, size_workspace) \
451 if (use_pcre16) \
452 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
453 offsets, size_offsets, workspace, size_workspace); \
454 else \
455 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
456 offsets, size_offsets, workspace, size_workspace)
457
458 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
459 offsets, size_offsets) \
460 if (use_pcre16) \
461 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
462 offsets, size_offsets); \
463 else \
464 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
465 offsets, size_offsets)
466
467 #define PCRE_FREE_STUDY(extra) \
468 if (use_pcre16) \
469 PCRE_FREE_STUDY16(extra); \
470 else \
471 PCRE_FREE_STUDY8(extra)
472
473 #define PCRE_FREE_SUBSTRING(substring) \
474 if (use_pcre16) \
475 PCRE_FREE_SUBSTRING16(substring); \
476 else \
477 PCRE_FREE_SUBSTRING8(substring)
478
479 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
480 if (use_pcre16) \
481 PCRE_FREE_SUBSTRING_LIST16(listptr); \
482 else \
483 PCRE_FREE_SUBSTRING_LIST8(listptr)
484
485 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
486 getnamesptr, subsptr) \
487 if (use_pcre16) \
488 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
489 getnamesptr, subsptr); \
490 else \
491 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
492 getnamesptr, subsptr)
493
494 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
495 if (use_pcre16) \
496 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
497 else \
498 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
499
500 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
501 if (use_pcre16) \
502 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
503 else \
504 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
505
506 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
507 if (use_pcre16) \
508 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
509 else \
510 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
511
512 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
513 (use_pcre16 ? \
514 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
515 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
516
517 #define PCRE_JIT_STACK_FREE(stack) \
518 if (use_pcre16) \
519 PCRE_JIT_STACK_FREE16(stack); \
520 else \
521 PCRE_JIT_STACK_FREE8(stack)
522
523 #define PCRE_MAKETABLES \
524 (use_pcre16? pcre16_maketables() : pcre_maketables())
525
526 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
527 if (use_pcre16) \
528 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
529 else \
530 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
531
532 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
533 if (use_pcre16) \
534 PCRE_PRINTINT16(re, outfile, debug_lengths); \
535 else \
536 PCRE_PRINTINT8(re, outfile, debug_lengths)
537
538 #define PCRE_STUDY(extra, re, options, error) \
539 if (use_pcre16) \
540 PCRE_STUDY16(extra, re, options, error); \
541 else \
542 PCRE_STUDY8(extra, re, options, error)
543
544 /* ----- Only 8-bit mode is supported ----- */
545
546 #elif defined SUPPORT_PCRE8
547 #define CHAR_SIZE 1
548 #define PCHARS PCHARS8
549 #define PCHARSV PCHARSV8
550 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
551 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
552 #define STRLEN STRLEN8
553 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
554 #define PCRE_COMPILE PCRE_COMPILE8
555 #define PCRE_CONFIG pcre_config
556 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
557 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
558 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
559 #define PCRE_EXEC PCRE_EXEC8
560 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
561 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
562 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
563 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
564 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
565 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
566 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
567 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
568 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
569 #define PCRE_MAKETABLES pcre_maketables()
570 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
571 #define PCRE_PRINTINT PCRE_PRINTINT8
572 #define PCRE_STUDY PCRE_STUDY8
573
574 /* ----- Only 16-bit mode is supported ----- */
575
576 #else
577 #define CHAR_SIZE 2
578 #define PCHARS PCHARS16
579 #define PCHARSV PCHARSV16
580 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
581 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
582 #define STRLEN STRLEN16
583 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
584 #define PCRE_COMPILE PCRE_COMPILE16
585 #define PCRE_CONFIG pcre16_config
586 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
587 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
588 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
589 #define PCRE_EXEC PCRE_EXEC16
590 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
591 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
592 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
593 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
594 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
595 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
596 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
597 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
598 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
599 #define PCRE_MAKETABLES pcre16_maketables()
600 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
601 #define PCRE_PRINTINT PCRE_PRINTINT16
602 #define PCRE_STUDY PCRE_STUDY16
603 #endif
604
605 /* ----- End of mode-specific function call macros ----- */
606
607
608 /* Other parameters */
609
610 #ifndef CLOCKS_PER_SEC
611 #ifdef CLK_TCK
612 #define CLOCKS_PER_SEC CLK_TCK
613 #else
614 #define CLOCKS_PER_SEC 100
615 #endif
616 #endif
617
618 #if !defined NODFA
619 #define DFA_WS_DIMENSION 1000
620 #endif
621
622 /* This is the default loop count for timing. */
623
624 #define LOOPREPEAT 500000
625
626 /* Static variables */
627
628 static FILE *outfile;
629 static int log_store = 0;
630 static int callout_count;
631 static int callout_extra;
632 static int callout_fail_count;
633 static int callout_fail_id;
634 static int debug_lengths;
635 static int first_callout;
636 static int jit_was_used;
637 static int locale_set = 0;
638 static int show_malloc;
639 static int use_utf;
640 static size_t gotten_store;
641 static size_t first_gotten_store = 0;
642 static const unsigned char *last_callout_mark = NULL;
643
644 /* The buffers grow automatically if very long input lines are encountered. */
645
646 static int buffer_size = 50000;
647 static pcre_uint8 *buffer = NULL;
648 static pcre_uint8 *dbuffer = NULL;
649 static pcre_uint8 *pbuffer = NULL;
650
651 /* Another buffer is needed translation to 16-bit character strings. It will
652 obtained and extended as required. */
653
654 #ifdef SUPPORT_PCRE16
655 static int buffer16_size = 0;
656 static pcre_uint16 *buffer16 = NULL;
657
658 #ifdef SUPPORT_PCRE8
659
660 /* We need the table of operator lengths that is used for 16-bit compiling, in
661 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
662 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
663 appropriately for the 16-bit world. Just as a safety check, make sure that
664 COMPILE_PCRE16 is *not* set. */
665
666 #ifdef COMPILE_PCRE16
667 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
668 #endif
669
670 #if LINK_SIZE == 2
671 #undef LINK_SIZE
672 #define LINK_SIZE 1
673 #elif LINK_SIZE == 3 || LINK_SIZE == 4
674 #undef LINK_SIZE
675 #define LINK_SIZE 2
676 #else
677 #error LINK_SIZE must be either 2, 3, or 4
678 #endif
679
680 #undef IMM2_SIZE
681 #define IMM2_SIZE 1
682
683 #endif /* SUPPORT_PCRE8 */
684
685 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
686 #endif /* SUPPORT_PCRE16 */
687
688 /* If we have 8-bit support, default use_pcre16 to false; if there is also
689 16-bit support, it can be changed by an option. If there is no 8-bit support,
690 there must be 16-bit support, so default it to 1. */
691
692 #ifdef SUPPORT_PCRE8
693 static int use_pcre16 = 0;
694 #else
695 static int use_pcre16 = 1;
696 #endif
697
698 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
699
700 static int jit_study_bits[] =
701 {
702 PCRE_STUDY_JIT_COMPILE,
703 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
704 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
705 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
706 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
707 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
708 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
709 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
710 };
711
712 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
713 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
714
715 /* Textual explanations for runtime error codes */
716
717 static const char *errtexts[] = {
718 NULL, /* 0 is no error */
719 NULL, /* NOMATCH is handled specially */
720 "NULL argument passed",
721 "bad option value",
722 "magic number missing",
723 "unknown opcode - pattern overwritten?",
724 "no more memory",
725 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
726 "match limit exceeded",
727 "callout error code",
728 NULL, /* BADUTF8/16 is handled specially */
729 NULL, /* BADUTF8/16 offset is handled specially */
730 NULL, /* PARTIAL is handled specially */
731 "not used - internal error",
732 "internal error - pattern overwritten?",
733 "bad count value",
734 "item unsupported for DFA matching",
735 "backreference condition or recursion test not supported for DFA matching",
736 "match limit not supported for DFA matching",
737 "workspace size exceeded in DFA matching",
738 "too much recursion for DFA matching",
739 "recursion limit exceeded",
740 "not used - internal error",
741 "invalid combination of newline options",
742 "bad offset value",
743 NULL, /* SHORTUTF8/16 is handled specially */
744 "nested recursion at the same subject position",
745 "JIT stack limit reached",
746 "pattern compiled in wrong mode: 8-bit/16-bit error",
747 "pattern compiled with other endianness",
748 "invalid data in workspace for DFA restart"
749 };
750
751
752 /*************************************************
753 * Alternate character tables *
754 *************************************************/
755
756 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
757 using the default tables of the library. However, the T option can be used to
758 select alternate sets of tables, for different kinds of testing. Note also that
759 the L (locale) option also adjusts the tables. */
760
761 /* This is the set of tables distributed as default with PCRE. It recognizes
762 only ASCII characters. */
763
764 static const pcre_uint8 tables0[] = {
765
766 /* This table is a lower casing table. */
767
768 0, 1, 2, 3, 4, 5, 6, 7,
769 8, 9, 10, 11, 12, 13, 14, 15,
770 16, 17, 18, 19, 20, 21, 22, 23,
771 24, 25, 26, 27, 28, 29, 30, 31,
772 32, 33, 34, 35, 36, 37, 38, 39,
773 40, 41, 42, 43, 44, 45, 46, 47,
774 48, 49, 50, 51, 52, 53, 54, 55,
775 56, 57, 58, 59, 60, 61, 62, 63,
776 64, 97, 98, 99,100,101,102,103,
777 104,105,106,107,108,109,110,111,
778 112,113,114,115,116,117,118,119,
779 120,121,122, 91, 92, 93, 94, 95,
780 96, 97, 98, 99,100,101,102,103,
781 104,105,106,107,108,109,110,111,
782 112,113,114,115,116,117,118,119,
783 120,121,122,123,124,125,126,127,
784 128,129,130,131,132,133,134,135,
785 136,137,138,139,140,141,142,143,
786 144,145,146,147,148,149,150,151,
787 152,153,154,155,156,157,158,159,
788 160,161,162,163,164,165,166,167,
789 168,169,170,171,172,173,174,175,
790 176,177,178,179,180,181,182,183,
791 184,185,186,187,188,189,190,191,
792 192,193,194,195,196,197,198,199,
793 200,201,202,203,204,205,206,207,
794 208,209,210,211,212,213,214,215,
795 216,217,218,219,220,221,222,223,
796 224,225,226,227,228,229,230,231,
797 232,233,234,235,236,237,238,239,
798 240,241,242,243,244,245,246,247,
799 248,249,250,251,252,253,254,255,
800
801 /* This table is a case flipping table. */
802
803 0, 1, 2, 3, 4, 5, 6, 7,
804 8, 9, 10, 11, 12, 13, 14, 15,
805 16, 17, 18, 19, 20, 21, 22, 23,
806 24, 25, 26, 27, 28, 29, 30, 31,
807 32, 33, 34, 35, 36, 37, 38, 39,
808 40, 41, 42, 43, 44, 45, 46, 47,
809 48, 49, 50, 51, 52, 53, 54, 55,
810 56, 57, 58, 59, 60, 61, 62, 63,
811 64, 97, 98, 99,100,101,102,103,
812 104,105,106,107,108,109,110,111,
813 112,113,114,115,116,117,118,119,
814 120,121,122, 91, 92, 93, 94, 95,
815 96, 65, 66, 67, 68, 69, 70, 71,
816 72, 73, 74, 75, 76, 77, 78, 79,
817 80, 81, 82, 83, 84, 85, 86, 87,
818 88, 89, 90,123,124,125,126,127,
819 128,129,130,131,132,133,134,135,
820 136,137,138,139,140,141,142,143,
821 144,145,146,147,148,149,150,151,
822 152,153,154,155,156,157,158,159,
823 160,161,162,163,164,165,166,167,
824 168,169,170,171,172,173,174,175,
825 176,177,178,179,180,181,182,183,
826 184,185,186,187,188,189,190,191,
827 192,193,194,195,196,197,198,199,
828 200,201,202,203,204,205,206,207,
829 208,209,210,211,212,213,214,215,
830 216,217,218,219,220,221,222,223,
831 224,225,226,227,228,229,230,231,
832 232,233,234,235,236,237,238,239,
833 240,241,242,243,244,245,246,247,
834 248,249,250,251,252,253,254,255,
835
836 /* This table contains bit maps for various character classes. Each map is 32
837 bytes long and the bits run from the least significant end of each byte. The
838 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
839 graph, print, punct, and cntrl. Other classes are built from combinations. */
840
841 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
842 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845
846 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
847 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850
851 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
858 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
860
861 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
865
866 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
867 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
868 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
869 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
870
871 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
872 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
873 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
875
876 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
877 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
880
881 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
882 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
885
886 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
890
891 /* This table identifies various classes of character by individual bits:
892 0x01 white space character
893 0x02 letter
894 0x04 decimal digit
895 0x08 hexadecimal digit
896 0x10 alphanumeric or '_'
897 0x80 regular expression metacharacter or binary zero
898 */
899
900 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
901 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
902 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
903 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
904 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
905 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
906 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
907 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
908 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
909 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
910 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
911 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
912 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
913 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
914 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
915 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
924 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
925 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
926 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
927 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
928 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
929 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
930 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
931 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
932
933 /* This is a set of tables that came orginally from a Windows user. It seems to
934 be at least an approximation of ISO 8859. In particular, there are characters
935 greater than 128 that are marked as spaces, letters, etc. */
936
937 static const pcre_uint8 tables1[] = {
938 0,1,2,3,4,5,6,7,
939 8,9,10,11,12,13,14,15,
940 16,17,18,19,20,21,22,23,
941 24,25,26,27,28,29,30,31,
942 32,33,34,35,36,37,38,39,
943 40,41,42,43,44,45,46,47,
944 48,49,50,51,52,53,54,55,
945 56,57,58,59,60,61,62,63,
946 64,97,98,99,100,101,102,103,
947 104,105,106,107,108,109,110,111,
948 112,113,114,115,116,117,118,119,
949 120,121,122,91,92,93,94,95,
950 96,97,98,99,100,101,102,103,
951 104,105,106,107,108,109,110,111,
952 112,113,114,115,116,117,118,119,
953 120,121,122,123,124,125,126,127,
954 128,129,130,131,132,133,134,135,
955 136,137,138,139,140,141,142,143,
956 144,145,146,147,148,149,150,151,
957 152,153,154,155,156,157,158,159,
958 160,161,162,163,164,165,166,167,
959 168,169,170,171,172,173,174,175,
960 176,177,178,179,180,181,182,183,
961 184,185,186,187,188,189,190,191,
962 224,225,226,227,228,229,230,231,
963 232,233,234,235,236,237,238,239,
964 240,241,242,243,244,245,246,215,
965 248,249,250,251,252,253,254,223,
966 224,225,226,227,228,229,230,231,
967 232,233,234,235,236,237,238,239,
968 240,241,242,243,244,245,246,247,
969 248,249,250,251,252,253,254,255,
970 0,1,2,3,4,5,6,7,
971 8,9,10,11,12,13,14,15,
972 16,17,18,19,20,21,22,23,
973 24,25,26,27,28,29,30,31,
974 32,33,34,35,36,37,38,39,
975 40,41,42,43,44,45,46,47,
976 48,49,50,51,52,53,54,55,
977 56,57,58,59,60,61,62,63,
978 64,97,98,99,100,101,102,103,
979 104,105,106,107,108,109,110,111,
980 112,113,114,115,116,117,118,119,
981 120,121,122,91,92,93,94,95,
982 96,65,66,67,68,69,70,71,
983 72,73,74,75,76,77,78,79,
984 80,81,82,83,84,85,86,87,
985 88,89,90,123,124,125,126,127,
986 128,129,130,131,132,133,134,135,
987 136,137,138,139,140,141,142,143,
988 144,145,146,147,148,149,150,151,
989 152,153,154,155,156,157,158,159,
990 160,161,162,163,164,165,166,167,
991 168,169,170,171,172,173,174,175,
992 176,177,178,179,180,181,182,183,
993 184,185,186,187,188,189,190,191,
994 224,225,226,227,228,229,230,231,
995 232,233,234,235,236,237,238,239,
996 240,241,242,243,244,245,246,215,
997 248,249,250,251,252,253,254,223,
998 192,193,194,195,196,197,198,199,
999 200,201,202,203,204,205,206,207,
1000 208,209,210,211,212,213,214,247,
1001 216,217,218,219,220,221,222,255,
1002 0,62,0,0,1,0,0,0,
1003 0,0,0,0,0,0,0,0,
1004 32,0,0,0,1,0,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,255,3,
1007 126,0,0,0,126,0,0,0,
1008 0,0,0,0,0,0,0,0,
1009 0,0,0,0,0,0,0,0,
1010 0,0,0,0,0,0,255,3,
1011 0,0,0,0,0,0,0,0,
1012 0,0,0,0,0,0,12,2,
1013 0,0,0,0,0,0,0,0,
1014 0,0,0,0,0,0,0,0,
1015 254,255,255,7,0,0,0,0,
1016 0,0,0,0,0,0,0,0,
1017 255,255,127,127,0,0,0,0,
1018 0,0,0,0,0,0,0,0,
1019 0,0,0,0,254,255,255,7,
1020 0,0,0,0,0,4,32,4,
1021 0,0,0,128,255,255,127,255,
1022 0,0,0,0,0,0,255,3,
1023 254,255,255,135,254,255,255,7,
1024 0,0,0,0,0,4,44,6,
1025 255,255,127,255,255,255,127,255,
1026 0,0,0,0,254,255,255,255,
1027 255,255,255,255,255,255,255,127,
1028 0,0,0,0,254,255,255,255,
1029 255,255,255,255,255,255,255,255,
1030 0,2,0,0,255,255,255,255,
1031 255,255,255,255,255,255,255,127,
1032 0,0,0,0,255,255,255,255,
1033 255,255,255,255,255,255,255,255,
1034 0,0,0,0,254,255,0,252,
1035 1,0,0,248,1,0,0,120,
1036 0,0,0,0,254,255,255,255,
1037 0,0,128,0,0,0,128,0,
1038 255,255,255,255,0,0,0,0,
1039 0,0,0,0,0,0,0,128,
1040 255,255,255,255,0,0,0,0,
1041 0,0,0,0,0,0,0,0,
1042 128,0,0,0,0,0,0,0,
1043 0,1,1,0,1,1,0,0,
1044 0,0,0,0,0,0,0,0,
1045 0,0,0,0,0,0,0,0,
1046 1,0,0,0,128,0,0,0,
1047 128,128,128,128,0,0,128,0,
1048 28,28,28,28,28,28,28,28,
1049 28,28,0,0,0,0,0,128,
1050 0,26,26,26,26,26,26,18,
1051 18,18,18,18,18,18,18,18,
1052 18,18,18,18,18,18,18,18,
1053 18,18,18,128,128,0,128,16,
1054 0,26,26,26,26,26,26,18,
1055 18,18,18,18,18,18,18,18,
1056 18,18,18,18,18,18,18,18,
1057 18,18,18,128,128,0,0,0,
1058 0,0,0,0,0,1,0,0,
1059 0,0,0,0,0,0,0,0,
1060 0,0,0,0,0,0,0,0,
1061 0,0,0,0,0,0,0,0,
1062 1,0,0,0,0,0,0,0,
1063 0,0,18,0,0,0,0,0,
1064 0,0,20,20,0,18,0,0,
1065 0,20,18,0,0,0,0,0,
1066 18,18,18,18,18,18,18,18,
1067 18,18,18,18,18,18,18,18,
1068 18,18,18,18,18,18,18,0,
1069 18,18,18,18,18,18,18,18,
1070 18,18,18,18,18,18,18,18,
1071 18,18,18,18,18,18,18,18,
1072 18,18,18,18,18,18,18,0,
1073 18,18,18,18,18,18,18,18
1074 };
1075
1076
1077
1078
1079 #ifndef HAVE_STRERROR
1080 /*************************************************
1081 * Provide strerror() for non-ANSI libraries *
1082 *************************************************/
1083
1084 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085 in their libraries, but can provide the same facility by this simple
1086 alternative function. */
1087
1088 extern int sys_nerr;
1089 extern char *sys_errlist[];
1090
1091 char *
1092 strerror(int n)
1093 {
1094 if (n < 0 || n >= sys_nerr) return "unknown error number";
1095 return sys_errlist[n];
1096 }
1097 #endif /* HAVE_STRERROR */
1098
1099
1100 /*************************************************
1101 * JIT memory callback *
1102 *************************************************/
1103
1104 static pcre_jit_stack* jit_callback(void *arg)
1105 {
1106 jit_was_used = TRUE;
1107 return (pcre_jit_stack *)arg;
1108 }
1109
1110
1111 #if !defined NOUTF || defined SUPPORT_PCRE16
1112 /*************************************************
1113 * Convert UTF-8 string to value *
1114 *************************************************/
1115
1116 /* This function takes one or more bytes that represents a UTF-8 character,
1117 and returns the value of the character.
1118
1119 Argument:
1120 utf8bytes a pointer to the byte vector
1121 vptr a pointer to an int to receive the value
1122
1123 Returns: > 0 => the number of bytes consumed
1124 -6 to 0 => malformed UTF-8 character at offset = (-return)
1125 */
1126
1127 static int
1128 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1129 {
1130 int c = *utf8bytes++;
1131 int d = c;
1132 int i, j, s;
1133
1134 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1135 {
1136 if ((d & 0x80) == 0) break;
1137 d <<= 1;
1138 }
1139
1140 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1141 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1142
1143 /* i now has a value in the range 1-5 */
1144
1145 s = 6*i;
1146 d = (c & utf8_table3[i]) << s;
1147
1148 for (j = 0; j < i; j++)
1149 {
1150 c = *utf8bytes++;
1151 if ((c & 0xc0) != 0x80) return -(j+1);
1152 s -= 6;
1153 d |= (c & 0x3f) << s;
1154 }
1155
1156 /* Check that encoding was the correct unique one */
1157
1158 for (j = 0; j < utf8_table1_size; j++)
1159 if (d <= utf8_table1[j]) break;
1160 if (j != i) return -(i+1);
1161
1162 /* Valid value */
1163
1164 *vptr = d;
1165 return i+1;
1166 }
1167 #endif /* NOUTF || SUPPORT_PCRE16 */
1168
1169
1170
1171 #if !defined NOUTF || defined SUPPORT_PCRE16
1172 /*************************************************
1173 * Convert character value to UTF-8 *
1174 *************************************************/
1175
1176 /* This function takes an integer value in the range 0 - 0x7fffffff
1177 and encodes it as a UTF-8 character in 0 to 6 bytes.
1178
1179 Arguments:
1180 cvalue the character value
1181 utf8bytes pointer to buffer for result - at least 6 bytes long
1182
1183 Returns: number of characters placed in the buffer
1184 */
1185
1186 static int
1187 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1188 {
1189 register int i, j;
1190 for (i = 0; i < utf8_table1_size; i++)
1191 if (cvalue <= utf8_table1[i]) break;
1192 utf8bytes += i;
1193 for (j = i; j > 0; j--)
1194 {
1195 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1196 cvalue >>= 6;
1197 }
1198 *utf8bytes = utf8_table2[i] | cvalue;
1199 return i + 1;
1200 }
1201 #endif
1202
1203
1204 #ifdef SUPPORT_PCRE16
1205 /*************************************************
1206 * Convert a string to 16-bit *
1207 *************************************************/
1208
1209 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1210 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1211 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1212 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1213 result is always left in buffer16.
1214
1215 Note that this function does not object to surrogate values. This is
1216 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1217 for the purpose of testing that they are correctly faulted.
1218
1219 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1220 in UTF-8 so that values greater than 255 can be handled.
1221
1222 Arguments:
1223 data TRUE if converting a data line; FALSE for a regex
1224 p points to a byte string
1225 utf true if UTF-8 (to be converted to UTF-16)
1226 len number of bytes in the string (excluding trailing zero)
1227
1228 Returns: number of 16-bit data items used (excluding trailing zero)
1229 OR -1 if a UTF-8 string is malformed
1230 OR -2 if a value > 0x10ffff is encountered
1231 OR -3 if a value > 0xffff is encountered when not in UTF mode
1232 */
1233
1234 static int
1235 to16(int data, pcre_uint8 *p, int utf, int len)
1236 {
1237 pcre_uint16 *pp;
1238
1239 if (buffer16_size < 2*len + 2)
1240 {
1241 if (buffer16 != NULL) free(buffer16);
1242 buffer16_size = 2*len + 2;
1243 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1244 if (buffer16 == NULL)
1245 {
1246 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1247 exit(1);
1248 }
1249 }
1250
1251 pp = buffer16;
1252
1253 if (!utf && !data)
1254 {
1255 while (len-- > 0) *pp++ = *p++;
1256 }
1257
1258 else
1259 {
1260 int c = 0;
1261 while (len > 0)
1262 {
1263 int chlen = utf82ord(p, &c);
1264 if (chlen <= 0) return -1;
1265 if (c > 0x10ffff) return -2;
1266 p += chlen;
1267 len -= chlen;
1268 if (c < 0x10000) *pp++ = c; else
1269 {
1270 if (!utf) return -3;
1271 c -= 0x10000;
1272 *pp++ = 0xD800 | (c >> 10);
1273 *pp++ = 0xDC00 | (c & 0x3ff);
1274 }
1275 }
1276 }
1277
1278 *pp = 0;
1279 return pp - buffer16;
1280 }
1281 #endif
1282
1283
1284 /*************************************************
1285 * Read or extend an input line *
1286 *************************************************/
1287
1288 /* Input lines are read into buffer, but both patterns and data lines can be
1289 continued over multiple input lines. In addition, if the buffer fills up, we
1290 want to automatically expand it so as to be able to handle extremely large
1291 lines that are needed for certain stress tests. When the input buffer is
1292 expanded, the other two buffers must also be expanded likewise, and the
1293 contents of pbuffer, which are a copy of the input for callouts, must be
1294 preserved (for when expansion happens for a data line). This is not the most
1295 optimal way of handling this, but hey, this is just a test program!
1296
1297 Arguments:
1298 f the file to read
1299 start where in buffer to start (this *must* be within buffer)
1300 prompt for stdin or readline()
1301
1302 Returns: pointer to the start of new data
1303 could be a copy of start, or could be moved
1304 NULL if no data read and EOF reached
1305 */
1306
1307 static pcre_uint8 *
1308 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1309 {
1310 pcre_uint8 *here = start;
1311
1312 for (;;)
1313 {
1314 size_t rlen = (size_t)(buffer_size - (here - buffer));
1315
1316 if (rlen > 1000)
1317 {
1318 int dlen;
1319
1320 /* If libreadline or libedit support is required, use readline() to read a
1321 line if the input is a terminal. Note that readline() removes the trailing
1322 newline, so we must put it back again, to be compatible with fgets(). */
1323
1324 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1325 if (isatty(fileno(f)))
1326 {
1327 size_t len;
1328 char *s = readline(prompt);
1329 if (s == NULL) return (here == start)? NULL : start;
1330 len = strlen(s);
1331 if (len > 0) add_history(s);
1332 if (len > rlen - 1) len = rlen - 1;
1333 memcpy(here, s, len);
1334 here[len] = '\n';
1335 here[len+1] = 0;
1336 free(s);
1337 }
1338 else
1339 #endif
1340
1341 /* Read the next line by normal means, prompting if the file is stdin. */
1342
1343 {
1344 if (f == stdin) printf("%s", prompt);
1345 if (fgets((char *)here, rlen, f) == NULL)
1346 return (here == start)? NULL : start;
1347 }
1348
1349 dlen = (int)strlen((char *)here);
1350 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1351 here += dlen;
1352 }
1353
1354 else
1355 {
1356 int new_buffer_size = 2*buffer_size;
1357 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1358 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1359 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1360
1361 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1362 {
1363 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1364 exit(1);
1365 }
1366
1367 memcpy(new_buffer, buffer, buffer_size);
1368 memcpy(new_pbuffer, pbuffer, buffer_size);
1369
1370 buffer_size = new_buffer_size;
1371
1372 start = new_buffer + (start - buffer);
1373 here = new_buffer + (here - buffer);
1374
1375 free(buffer);
1376 free(dbuffer);
1377 free(pbuffer);
1378
1379 buffer = new_buffer;
1380 dbuffer = new_dbuffer;
1381 pbuffer = new_pbuffer;
1382 }
1383 }
1384
1385 return NULL; /* Control never gets here */
1386 }
1387
1388
1389
1390 /*************************************************
1391 * Read number from string *
1392 *************************************************/
1393
1394 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1395 around with conditional compilation, just do the job by hand. It is only used
1396 for unpicking arguments, so just keep it simple.
1397
1398 Arguments:
1399 str string to be converted
1400 endptr where to put the end pointer
1401
1402 Returns: the unsigned long
1403 */
1404
1405 static int
1406 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1407 {
1408 int result = 0;
1409 while(*str != 0 && isspace(*str)) str++;
1410 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1411 *endptr = str;
1412 return(result);
1413 }
1414
1415
1416
1417 /*************************************************
1418 * Print one character *
1419 *************************************************/
1420
1421 /* Print a single character either literally, or as a hex escape. */
1422
1423 static int pchar(int c, FILE *f)
1424 {
1425 if (PRINTOK(c))
1426 {
1427 if (f != NULL) fprintf(f, "%c", c);
1428 return 1;
1429 }
1430
1431 if (c < 0x100)
1432 {
1433 if (use_utf)
1434 {
1435 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1436 return 6;
1437 }
1438 else
1439 {
1440 if (f != NULL) fprintf(f, "\\x%02x", c);
1441 return 4;
1442 }
1443 }
1444
1445 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1446 return (c <= 0x000000ff)? 6 :
1447 (c <= 0x00000fff)? 7 :
1448 (c <= 0x0000ffff)? 8 :
1449 (c <= 0x000fffff)? 9 : 10;
1450 }
1451
1452
1453
1454 #ifdef SUPPORT_PCRE8
1455 /*************************************************
1456 * Print 8-bit character string *
1457 *************************************************/
1458
1459 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1460 If handed a NULL file, just counts chars without printing. */
1461
1462 static int pchars(pcre_uint8 *p, int length, FILE *f)
1463 {
1464 int c = 0;
1465 int yield = 0;
1466
1467 if (length < 0)
1468 length = strlen((char *)p);
1469
1470 while (length-- > 0)
1471 {
1472 #if !defined NOUTF
1473 if (use_utf)
1474 {
1475 int rc = utf82ord(p, &c);
1476 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1477 {
1478 length -= rc - 1;
1479 p += rc;
1480 yield += pchar(c, f);
1481 continue;
1482 }
1483 }
1484 #endif
1485 c = *p++;
1486 yield += pchar(c, f);
1487 }
1488
1489 return yield;
1490 }
1491 #endif
1492
1493
1494
1495 #ifdef SUPPORT_PCRE16
1496 /*************************************************
1497 * Find length of 0-terminated 16-bit string *
1498 *************************************************/
1499
1500 static int strlen16(PCRE_SPTR16 p)
1501 {
1502 int len = 0;
1503 while (*p++ != 0) len++;
1504 return len;
1505 }
1506 #endif /* SUPPORT_PCRE16 */
1507
1508
1509 #ifdef SUPPORT_PCRE16
1510 /*************************************************
1511 * Print 16-bit character string *
1512 *************************************************/
1513
1514 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1515 If handed a NULL file, just counts chars without printing. */
1516
1517 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1518 {
1519 int yield = 0;
1520
1521 if (length < 0)
1522 length = strlen16(p);
1523
1524 while (length-- > 0)
1525 {
1526 int c = *p++ & 0xffff;
1527 #if !defined NOUTF
1528 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1529 {
1530 int d = *p & 0xffff;
1531 if (d >= 0xDC00 && d < 0xDFFF)
1532 {
1533 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1534 length--;
1535 p++;
1536 }
1537 }
1538 #endif
1539 yield += pchar(c, f);
1540 }
1541
1542 return yield;
1543 }
1544 #endif /* SUPPORT_PCRE16 */
1545
1546
1547
1548 #ifdef SUPPORT_PCRE8
1549 /*************************************************
1550 * Read a capture name (8-bit) and check it *
1551 *************************************************/
1552
1553 static pcre_uint8 *
1554 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1555 {
1556 pcre_uint8 *npp = *pp;
1557 while (isalnum(*p)) *npp++ = *p++;
1558 *npp++ = 0;
1559 *npp = 0;
1560 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1561 {
1562 fprintf(outfile, "no parentheses with name \"");
1563 PCHARSV(*pp, 0, -1, outfile);
1564 fprintf(outfile, "\"\n");
1565 }
1566
1567 *pp = npp;
1568 return p;
1569 }
1570 #endif /* SUPPORT_PCRE8 */
1571
1572
1573
1574 #ifdef SUPPORT_PCRE16
1575 /*************************************************
1576 * Read a capture name (16-bit) and check it *
1577 *************************************************/
1578
1579 /* Note that the text being read is 8-bit. */
1580
1581 static pcre_uint8 *
1582 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1583 {
1584 pcre_uint16 *npp = *pp;
1585 while (isalnum(*p)) *npp++ = *p++;
1586 *npp++ = 0;
1587 *npp = 0;
1588 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1589 {
1590 fprintf(outfile, "no parentheses with name \"");
1591 PCHARSV(*pp, 0, -1, outfile);
1592 fprintf(outfile, "\"\n");
1593 }
1594 *pp = npp;
1595 return p;
1596 }
1597 #endif /* SUPPORT_PCRE16 */
1598
1599
1600
1601 /*************************************************
1602 * Callout function *
1603 *************************************************/
1604
1605 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1606 the match. Yield zero unless more callouts than the fail count, or the callout
1607 data is not zero. */
1608
1609 static int callout(pcre_callout_block *cb)
1610 {
1611 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1612 int i, pre_start, post_start, subject_length;
1613
1614 if (callout_extra)
1615 {
1616 fprintf(f, "Callout %d: last capture = %d\n",
1617 cb->callout_number, cb->capture_last);
1618
1619 for (i = 0; i < cb->capture_top * 2; i += 2)
1620 {
1621 if (cb->offset_vector[i] < 0)
1622 fprintf(f, "%2d: <unset>\n", i/2);
1623 else
1624 {
1625 fprintf(f, "%2d: ", i/2);
1626 PCHARSV(cb->subject, cb->offset_vector[i],
1627 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1628 fprintf(f, "\n");
1629 }
1630 }
1631 }
1632
1633 /* Re-print the subject in canonical form, the first time or if giving full
1634 datails. On subsequent calls in the same match, we use pchars just to find the
1635 printed lengths of the substrings. */
1636
1637 if (f != NULL) fprintf(f, "--->");
1638
1639 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1640 PCHARS(post_start, cb->subject, cb->start_match,
1641 cb->current_position - cb->start_match, f);
1642
1643 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1644
1645 PCHARSV(cb->subject, cb->current_position,
1646 cb->subject_length - cb->current_position, f);
1647
1648 if (f != NULL) fprintf(f, "\n");
1649
1650 /* Always print appropriate indicators, with callout number if not already
1651 shown. For automatic callouts, show the pattern offset. */
1652
1653 if (cb->callout_number == 255)
1654 {
1655 fprintf(outfile, "%+3d ", cb->pattern_position);
1656 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1657 }
1658 else
1659 {
1660 if (callout_extra) fprintf(outfile, " ");
1661 else fprintf(outfile, "%3d ", cb->callout_number);
1662 }
1663
1664 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1665 fprintf(outfile, "^");
1666
1667 if (post_start > 0)
1668 {
1669 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1670 fprintf(outfile, "^");
1671 }
1672
1673 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1674 fprintf(outfile, " ");
1675
1676 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1677 pbuffer + cb->pattern_position);
1678
1679 fprintf(outfile, "\n");
1680 first_callout = 0;
1681
1682 if (cb->mark != last_callout_mark)
1683 {
1684 if (cb->mark == NULL)
1685 fprintf(outfile, "Latest Mark: <unset>\n");
1686 else
1687 {
1688 fprintf(outfile, "Latest Mark: ");
1689 PCHARSV(cb->mark, 0, -1, outfile);
1690 putc('\n', outfile);
1691 }
1692 last_callout_mark = cb->mark;
1693 }
1694
1695 if (cb->callout_data != NULL)
1696 {
1697 int callout_data = *((int *)(cb->callout_data));
1698 if (callout_data != 0)
1699 {
1700 fprintf(outfile, "Callout data = %d\n", callout_data);
1701 return callout_data;
1702 }
1703 }
1704
1705 return (cb->callout_number != callout_fail_id)? 0 :
1706 (++callout_count >= callout_fail_count)? 1 : 0;
1707 }
1708
1709
1710 /*************************************************
1711 * Local malloc functions *
1712 *************************************************/
1713
1714 /* Alternative malloc function, to test functionality and save the size of a
1715 compiled re, which is the first store request that pcre_compile() makes. The
1716 show_malloc variable is set only during matching. */
1717
1718 static void *new_malloc(size_t size)
1719 {
1720 void *block = malloc(size);
1721 gotten_store = size;
1722 if (first_gotten_store == 0) first_gotten_store = size;
1723 if (show_malloc)
1724 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1725 return block;
1726 }
1727
1728 static void new_free(void *block)
1729 {
1730 if (show_malloc)
1731 fprintf(outfile, "free %p\n", block);
1732 free(block);
1733 }
1734
1735 /* For recursion malloc/free, to test stacking calls */
1736
1737 static void *stack_malloc(size_t size)
1738 {
1739 void *block = malloc(size);
1740 if (show_malloc)
1741 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1742 return block;
1743 }
1744
1745 static void stack_free(void *block)
1746 {
1747 if (show_malloc)
1748 fprintf(outfile, "stack_free %p\n", block);
1749 free(block);
1750 }
1751
1752
1753 /*************************************************
1754 * Call pcre_fullinfo() *
1755 *************************************************/
1756
1757 /* Get one piece of information from the pcre_fullinfo() function. When only
1758 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1759 value, but the code is defensive.
1760
1761 Arguments:
1762 re compiled regex
1763 study study data
1764 option PCRE_INFO_xxx option
1765 ptr where to put the data
1766
1767 Returns: 0 when OK, < 0 on error
1768 */
1769
1770 static int
1771 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1772 {
1773 int rc;
1774
1775 if (use_pcre16)
1776 #ifdef SUPPORT_PCRE16
1777 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1778 #else
1779 rc = PCRE_ERROR_BADMODE;
1780 #endif
1781 else
1782 #ifdef SUPPORT_PCRE8
1783 rc = pcre_fullinfo(re, study, option, ptr);
1784 #else
1785 rc = PCRE_ERROR_BADMODE;
1786 #endif
1787
1788 if (rc < 0)
1789 {
1790 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1791 use_pcre16? "16" : "", option);
1792 if (rc == PCRE_ERROR_BADMODE)
1793 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1794 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1795 }
1796
1797 return rc;
1798 }
1799
1800
1801
1802 /*************************************************
1803 * Swap byte functions *
1804 *************************************************/
1805
1806 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1807 value, respectively.
1808
1809 Arguments:
1810 value any number
1811
1812 Returns: the byte swapped value
1813 */
1814
1815 static pcre_uint32
1816 swap_uint32(pcre_uint32 value)
1817 {
1818 return ((value & 0x000000ff) << 24) |
1819 ((value & 0x0000ff00) << 8) |
1820 ((value & 0x00ff0000) >> 8) |
1821 (value >> 24);
1822 }
1823
1824 static pcre_uint16
1825 swap_uint16(pcre_uint16 value)
1826 {
1827 return (value >> 8) | (value << 8);
1828 }
1829
1830
1831
1832 /*************************************************
1833 * Flip bytes in a compiled pattern *
1834 *************************************************/
1835
1836 /* This function is called if the 'F' option was present on a pattern that is
1837 to be written to a file. We flip the bytes of all the integer fields in the
1838 regex data block and the study block. In 16-bit mode this also flips relevant
1839 bytes in the pattern itself. This is to make it possible to test PCRE's
1840 ability to reload byte-flipped patterns, e.g. those compiled on a different
1841 architecture. */
1842
1843 static void
1844 regexflip(pcre *ere, pcre_extra *extra)
1845 {
1846 REAL_PCRE *re = (REAL_PCRE *)ere;
1847 #ifdef SUPPORT_PCRE16
1848 int op;
1849 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1850 int length = re->name_count * re->name_entry_size;
1851 #ifdef SUPPORT_UTF
1852 BOOL utf = (re->options & PCRE_UTF16) != 0;
1853 BOOL utf16_char = FALSE;
1854 #endif /* SUPPORT_UTF */
1855 #endif /* SUPPORT_PCRE16 */
1856
1857 /* Always flip the bytes in the main data block and study blocks. */
1858
1859 re->magic_number = REVERSED_MAGIC_NUMBER;
1860 re->size = swap_uint32(re->size);
1861 re->options = swap_uint32(re->options);
1862 re->flags = swap_uint16(re->flags);
1863 re->top_bracket = swap_uint16(re->top_bracket);
1864 re->top_backref = swap_uint16(re->top_backref);
1865 re->first_char = swap_uint16(re->first_char);
1866 re->req_char = swap_uint16(re->req_char);
1867 re->name_table_offset = swap_uint16(re->name_table_offset);
1868 re->name_entry_size = swap_uint16(re->name_entry_size);
1869 re->name_count = swap_uint16(re->name_count);
1870
1871 if (extra != NULL)
1872 {
1873 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1874 rsd->size = swap_uint32(rsd->size);
1875 rsd->flags = swap_uint32(rsd->flags);
1876 rsd->minlength = swap_uint32(rsd->minlength);
1877 }
1878
1879 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1880 in the name table, if present, and then in the pattern itself. */
1881
1882 #ifdef SUPPORT_PCRE16
1883 if (!use_pcre16) return;
1884
1885 while(TRUE)
1886 {
1887 /* Swap previous characters. */
1888 while (length-- > 0)
1889 {
1890 *ptr = swap_uint16(*ptr);
1891 ptr++;
1892 }
1893 #ifdef SUPPORT_UTF
1894 if (utf16_char)
1895 {
1896 if ((ptr[-1] & 0xfc00) == 0xd800)
1897 {
1898 /* We know that there is only one extra character in UTF-16. */
1899 *ptr = swap_uint16(*ptr);
1900 ptr++;
1901 }
1902 }
1903 utf16_char = FALSE;
1904 #endif /* SUPPORT_UTF */
1905
1906 /* Get next opcode. */
1907
1908 length = 0;
1909 op = *ptr;
1910 *ptr++ = swap_uint16(op);
1911
1912 switch (op)
1913 {
1914 case OP_END:
1915 return;
1916
1917 #ifdef SUPPORT_UTF
1918 case OP_CHAR:
1919 case OP_CHARI:
1920 case OP_NOT:
1921 case OP_NOTI:
1922 case OP_STAR:
1923 case OP_MINSTAR:
1924 case OP_PLUS:
1925 case OP_MINPLUS:
1926 case OP_QUERY:
1927 case OP_MINQUERY:
1928 case OP_UPTO:
1929 case OP_MINUPTO:
1930 case OP_EXACT:
1931 case OP_POSSTAR:
1932 case OP_POSPLUS:
1933 case OP_POSQUERY:
1934 case OP_POSUPTO:
1935 case OP_STARI:
1936 case OP_MINSTARI:
1937 case OP_PLUSI:
1938 case OP_MINPLUSI:
1939 case OP_QUERYI:
1940 case OP_MINQUERYI:
1941 case OP_UPTOI:
1942 case OP_MINUPTOI:
1943 case OP_EXACTI:
1944 case OP_POSSTARI:
1945 case OP_POSPLUSI:
1946 case OP_POSQUERYI:
1947 case OP_POSUPTOI:
1948 case OP_NOTSTAR:
1949 case OP_NOTMINSTAR:
1950 case OP_NOTPLUS:
1951 case OP_NOTMINPLUS:
1952 case OP_NOTQUERY:
1953 case OP_NOTMINQUERY:
1954 case OP_NOTUPTO:
1955 case OP_NOTMINUPTO:
1956 case OP_NOTEXACT:
1957 case OP_NOTPOSSTAR:
1958 case OP_NOTPOSPLUS:
1959 case OP_NOTPOSQUERY:
1960 case OP_NOTPOSUPTO:
1961 case OP_NOTSTARI:
1962 case OP_NOTMINSTARI:
1963 case OP_NOTPLUSI:
1964 case OP_NOTMINPLUSI:
1965 case OP_NOTQUERYI:
1966 case OP_NOTMINQUERYI:
1967 case OP_NOTUPTOI:
1968 case OP_NOTMINUPTOI:
1969 case OP_NOTEXACTI:
1970 case OP_NOTPOSSTARI:
1971 case OP_NOTPOSPLUSI:
1972 case OP_NOTPOSQUERYI:
1973 case OP_NOTPOSUPTOI:
1974 if (utf) utf16_char = TRUE;
1975 #endif
1976 /* Fall through. */
1977
1978 default:
1979 length = OP_lengths16[op] - 1;
1980 break;
1981
1982 case OP_CLASS:
1983 case OP_NCLASS:
1984 /* Skip the character bit map. */
1985 ptr += 32/sizeof(pcre_uint16);
1986 length = 0;
1987 break;
1988
1989 case OP_XCLASS:
1990 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1991 if (LINK_SIZE > 1)
1992 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1993 - (1 + LINK_SIZE + 1));
1994 else
1995 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1996
1997 /* Reverse the size of the XCLASS instance. */
1998 *ptr = swap_uint16(*ptr);
1999 ptr++;
2000 if (LINK_SIZE > 1)
2001 {
2002 *ptr = swap_uint16(*ptr);
2003 ptr++;
2004 }
2005
2006 op = *ptr;
2007 *ptr = swap_uint16(op);
2008 ptr++;
2009 if ((op & XCL_MAP) != 0)
2010 {
2011 /* Skip the character bit map. */
2012 ptr += 32/sizeof(pcre_uint16);
2013 length -= 32/sizeof(pcre_uint16);
2014 }
2015 break;
2016 }
2017 }
2018 /* Control should never reach here in 16 bit mode. */
2019 #endif /* SUPPORT_PCRE16 */
2020 }
2021
2022
2023
2024 /*************************************************
2025 * Check match or recursion limit *
2026 *************************************************/
2027
2028 static int
2029 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2030 int start_offset, int options, int *use_offsets, int use_size_offsets,
2031 int flag, unsigned long int *limit, int errnumber, const char *msg)
2032 {
2033 int count;
2034 int min = 0;
2035 int mid = 64;
2036 int max = -1;
2037
2038 extra->flags |= flag;
2039
2040 for (;;)
2041 {
2042 *limit = mid;
2043
2044 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2045 use_offsets, use_size_offsets);
2046
2047 if (count == errnumber)
2048 {
2049 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2050 min = mid;
2051 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2052 }
2053
2054 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2055 count == PCRE_ERROR_PARTIAL)
2056 {
2057 if (mid == min + 1)
2058 {
2059 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2060 break;
2061 }
2062 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2063 max = mid;
2064 mid = (min + mid)/2;
2065 }
2066 else break; /* Some other error */
2067 }
2068
2069 extra->flags &= ~flag;
2070 return count;
2071 }
2072
2073
2074
2075 /*************************************************
2076 * Case-independent strncmp() function *
2077 *************************************************/
2078
2079 /*
2080 Arguments:
2081 s first string
2082 t second string
2083 n number of characters to compare
2084
2085 Returns: < 0, = 0, or > 0, according to the comparison
2086 */
2087
2088 static int
2089 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2090 {
2091 while (n--)
2092 {
2093 int c = tolower(*s++) - tolower(*t++);
2094 if (c) return c;
2095 }
2096 return 0;
2097 }
2098
2099
2100
2101 /*************************************************
2102 * Check newline indicator *
2103 *************************************************/
2104
2105 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2106 a message and return 0 if there is no match.
2107
2108 Arguments:
2109 p points after the leading '<'
2110 f file for error message
2111
2112 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2113 */
2114
2115 static int
2116 check_newline(pcre_uint8 *p, FILE *f)
2117 {
2118 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2119 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2120 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2121 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2122 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2123 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2124 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2125 fprintf(f, "Unknown newline type at: <%s\n", p);
2126 return 0;
2127 }
2128
2129
2130
2131 /*************************************************
2132 * Usage function *
2133 *************************************************/
2134
2135 static void
2136 usage(void)
2137 {
2138 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2139 printf("Input and output default to stdin and stdout.\n");
2140 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2141 printf("If input is a terminal, readline() is used to read from it.\n");
2142 #else
2143 printf("This version of pcretest is not linked with readline().\n");
2144 #endif
2145 printf("\nOptions:\n");
2146 #ifdef SUPPORT_PCRE16
2147 printf(" -16 use the 16-bit library\n");
2148 #endif
2149 printf(" -b show compiled code\n");
2150 printf(" -C show PCRE compile-time options and exit\n");
2151 printf(" -C arg show a specific compile-time option\n");
2152 printf(" and exit with its value. The arg can be:\n");
2153 printf(" linksize internal link size [2, 3, 4]\n");
2154 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2155 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2156 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2157 printf(" ucp Unicode Properties supported [0, 1]\n");
2158 printf(" jit Just-in-time compiler supported [0, 1]\n");
2159 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2160 printf(" -d debug: show compiled code and information (-b and -i)\n");
2161 #if !defined NODFA
2162 printf(" -dfa force DFA matching for all subjects\n");
2163 #endif
2164 printf(" -help show usage information\n");
2165 printf(" -i show information about compiled patterns\n"
2166 " -M find MATCH_LIMIT minimum for each subject\n"
2167 " -m output memory used information\n"
2168 " -o <n> set size of offsets vector to <n>\n");
2169 #if !defined NOPOSIX
2170 printf(" -p use POSIX interface\n");
2171 #endif
2172 printf(" -q quiet: do not output PCRE version number at start\n");
2173 printf(" -S <n> set stack size to <n> megabytes\n");
2174 printf(" -s force each pattern to be studied at basic level\n"
2175 " -s+ force each pattern to be studied, using JIT if available\n"
2176 " -s++ ditto, verifying when JIT was actually used\n"
2177 " -s+n force each pattern to be studied, using JIT if available,\n"
2178 " where 1 <= n <= 7 selects JIT options\n"
2179 " -s++n ditto, verifying when JIT was actually used\n"
2180 " -t time compilation and execution\n");
2181 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2182 printf(" -tm time execution (matching) only\n");
2183 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2184 }
2185
2186
2187
2188 /*************************************************
2189 * Main Program *
2190 *************************************************/
2191
2192 /* Read lines from named file or stdin and write to named file or stdout; lines
2193 consist of a regular expression, in delimiters and optionally followed by
2194 options, followed by a set of test data, terminated by an empty line. */
2195
2196 int main(int argc, char **argv)
2197 {
2198 FILE *infile = stdin;
2199 const char *version;
2200 int options = 0;
2201 int study_options = 0;
2202 int default_find_match_limit = FALSE;
2203 int op = 1;
2204 int timeit = 0;
2205 int timeitm = 0;
2206 int showinfo = 0;
2207 int showstore = 0;
2208 int force_study = -1;
2209 int force_study_options = 0;
2210 int quiet = 0;
2211 int size_offsets = 45;
2212 int size_offsets_max;
2213 int *offsets = NULL;
2214 int debug = 0;
2215 int done = 0;
2216 int all_use_dfa = 0;
2217 int verify_jit = 0;
2218 int yield = 0;
2219 int stack_size;
2220
2221 #if !defined NOPOSIX
2222 int posix = 0;
2223 #endif
2224 #if !defined NODFA
2225 int *dfa_workspace = NULL;
2226 #endif
2227
2228 pcre_jit_stack *jit_stack = NULL;
2229
2230 /* These vectors store, end-to-end, a list of zero-terminated captured
2231 substring names, each list itself being terminated by an empty name. Assume
2232 that 1024 is plenty long enough for the few names we'll be testing. It is
2233 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2234 for the actual memory, to ensure alignment. */
2235
2236 pcre_uint16 copynames[1024];
2237 pcre_uint16 getnames[1024];
2238
2239 #ifdef SUPPORT_PCRE16
2240 pcre_uint16 *cn16ptr;
2241 pcre_uint16 *gn16ptr;
2242 #endif
2243
2244 #ifdef SUPPORT_PCRE8
2245 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2246 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2247 pcre_uint8 *cn8ptr;
2248 pcre_uint8 *gn8ptr;
2249 #endif
2250
2251 /* Get buffers from malloc() so that valgrind will check their misuse when
2252 debugging. They grow automatically when very long lines are read. The 16-bit
2253 buffer (buffer16) is obtained only if needed. */
2254
2255 buffer = (pcre_uint8 *)malloc(buffer_size);
2256 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2257 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2258
2259 /* The outfile variable is static so that new_malloc can use it. */
2260
2261 outfile = stdout;
2262
2263 /* The following _setmode() stuff is some Windows magic that tells its runtime
2264 library to translate CRLF into a single LF character. At least, that's what
2265 I've been told: never having used Windows I take this all on trust. Originally
2266 it set 0x8000, but then I was advised that _O_BINARY was better. */
2267
2268 #if defined(_WIN32) || defined(WIN32)
2269 _setmode( _fileno( stdout ), _O_BINARY );
2270 #endif
2271
2272 /* Get the version number: both pcre_version() and pcre16_version() give the
2273 same answer. We just need to ensure that we call one that is available. */
2274
2275 #ifdef SUPPORT_PCRE8
2276 version = pcre_version();
2277 #else
2278 version = pcre16_version();
2279 #endif
2280
2281 /* Scan options */
2282
2283 while (argc > 1 && argv[op][0] == '-')
2284 {
2285 pcre_uint8 *endptr;
2286 char *arg = argv[op];
2287
2288 if (strcmp(arg, "-m") == 0) showstore = 1;
2289 else if (strcmp(arg, "-s") == 0) force_study = 0;
2290
2291 else if (strncmp(arg, "-s+", 3) == 0)
2292 {
2293 arg += 3;
2294 if (*arg == '+') { arg++; verify_jit = TRUE; }
2295 force_study = 1;
2296 if (*arg == 0)
2297 force_study_options = jit_study_bits[6];
2298 else if (*arg >= '1' && *arg <= '7')
2299 force_study_options = jit_study_bits[*arg - '1'];
2300 else goto BAD_ARG;
2301 }
2302 else if (strcmp(arg, "-16") == 0)
2303 {
2304 #ifdef SUPPORT_PCRE16
2305 use_pcre16 = 1;
2306 #else
2307 printf("** This version of PCRE was built without 16-bit support\n");
2308 exit(1);
2309 #endif
2310 }
2311 else if (strcmp(arg, "-q") == 0) quiet = 1;
2312 else if (strcmp(arg, "-b") == 0) debug = 1;
2313 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2314 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2315 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2316 #if !defined NODFA
2317 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2318 #endif
2319 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2320 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2321 *endptr == 0))
2322 {
2323 op++;
2324 argc--;
2325 }
2326 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2327 {
2328 int both = arg[2] == 0;
2329 int temp;
2330 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2331 *endptr == 0))
2332 {
2333 timeitm = temp;
2334 op++;
2335 argc--;
2336 }
2337 else timeitm = LOOPREPEAT;
2338 if (both) timeit = timeitm;
2339 }
2340 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2341 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2342 *endptr == 0))
2343 {
2344 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2345 printf("PCRE: -S not supported on this OS\n");
2346 exit(1);
2347 #else
2348 int rc;
2349 struct rlimit rlim;
2350 getrlimit(RLIMIT_STACK, &rlim);
2351 rlim.rlim_cur = stack_size * 1024 * 1024;
2352 rc = setrlimit(RLIMIT_STACK, &rlim);
2353 if (rc != 0)
2354 {
2355 printf("PCRE: setrlimit() failed with error %d\n", rc);
2356 exit(1);
2357 }
2358 op++;
2359 argc--;
2360 #endif
2361 }
2362 #if !defined NOPOSIX
2363 else if (strcmp(arg, "-p") == 0) posix = 1;
2364 #endif
2365 else if (strcmp(arg, "-C") == 0)
2366 {
2367 int rc;
2368 unsigned long int lrc;
2369
2370 if (argc > 2)
2371 {
2372 if (strcmp(argv[op + 1], "linksize") == 0)
2373 {
2374 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2375 printf("%d\n", rc);
2376 yield = rc;
2377 goto EXIT;
2378 }
2379 if (strcmp(argv[op + 1], "pcre8") == 0)
2380 {
2381 #ifdef SUPPORT_PCRE8
2382 printf("1\n");
2383 yield = 1;
2384 #else
2385 printf("0\n");
2386 yield = 0;
2387 #endif
2388 goto EXIT;
2389 }
2390 if (strcmp(argv[op + 1], "pcre16") == 0)
2391 {
2392 #ifdef SUPPORT_PCRE16
2393 printf("1\n");
2394 yield = 1;
2395 #else
2396 printf("0\n");
2397 yield = 0;
2398 #endif
2399 goto EXIT;
2400 }
2401 if (strcmp(argv[op + 1], "utf") == 0)
2402 {
2403 #ifdef SUPPORT_PCRE8
2404 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2405 printf("%d\n", rc);
2406 yield = rc;
2407 #else
2408 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2409 printf("%d\n", rc);
2410 yield = rc;
2411 #endif
2412 goto EXIT;
2413 }
2414 if (strcmp(argv[op + 1], "ucp") == 0)
2415 {
2416 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2417 printf("%d\n", rc);
2418 yield = rc;
2419 goto EXIT;
2420 }
2421 if (strcmp(argv[op + 1], "jit") == 0)
2422 {
2423 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2424 printf("%d\n", rc);
2425 yield = rc;
2426 goto EXIT;
2427 }
2428 if (strcmp(argv[op + 1], "newline") == 0)
2429 {
2430 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2431 /* Note that these values are always the ASCII values, even
2432 in EBCDIC environments. CR is 13 and NL is 10. */
2433 printf("%s\n", (rc == 13)? "CR" :
2434 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2435 (rc == -2)? "ANYCRLF" :
2436 (rc == -1)? "ANY" : "???");
2437 goto EXIT;
2438 }
2439 printf("Unknown -C option: %s\n", argv[op + 1]);
2440 goto EXIT;
2441 }
2442
2443 printf("PCRE version %s\n", version);
2444 printf("Compiled with\n");
2445
2446 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2447 are set, either both UTFs are supported or both are not supported. */
2448
2449 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2450 printf(" 8-bit and 16-bit support\n");
2451 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2452 if (rc)
2453 printf(" UTF-8 and UTF-16 support\n");
2454 else
2455 printf(" No UTF-8 or UTF-16 support\n");
2456 #elif defined SUPPORT_PCRE8
2457 printf(" 8-bit support only\n");
2458 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2459 printf(" %sUTF-8 support\n", rc? "" : "No ");
2460 #else
2461 printf(" 16-bit support only\n");
2462 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2463 printf(" %sUTF-16 support\n", rc? "" : "No ");
2464 #endif
2465
2466 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2467 printf(" %sUnicode properties support\n", rc? "" : "No ");
2468 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2469 if (rc)
2470 {
2471 const char *arch;
2472 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2473 printf(" Just-in-time compiler support: %s\n", arch);
2474 }
2475 else
2476 printf(" No just-in-time compiler support\n");
2477 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2478 /* Note that these values are always the ASCII values, even
2479 in EBCDIC environments. CR is 13 and NL is 10. */
2480 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2481 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2482 (rc == -2)? "ANYCRLF" :
2483 (rc == -1)? "ANY" : "???");
2484 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2485 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2486 "all Unicode newlines");
2487 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2488 printf(" Internal link size = %d\n", rc);
2489 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2490 printf(" POSIX malloc threshold = %d\n", rc);
2491 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2492 printf(" Default match limit = %ld\n", lrc);
2493 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2494 printf(" Default recursion depth limit = %ld\n", lrc);
2495 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2496 printf(" Match recursion uses %s", rc? "stack" : "heap");
2497 if (showstore)
2498 {
2499 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2500 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2501 }
2502 printf("\n");
2503 goto EXIT;
2504 }
2505 else if (strcmp(arg, "-help") == 0 ||
2506 strcmp(arg, "--help") == 0)
2507 {
2508 usage();
2509 goto EXIT;
2510 }
2511 else
2512 {
2513 BAD_ARG:
2514 printf("** Unknown or malformed option %s\n", arg);
2515 usage();
2516 yield = 1;
2517 goto EXIT;
2518 }
2519 op++;
2520 argc--;
2521 }
2522
2523 /* Get the store for the offsets vector, and remember what it was */
2524
2525 size_offsets_max = size_offsets;
2526 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2527 if (offsets == NULL)
2528 {
2529 printf("** Failed to get %d bytes of memory for offsets vector\n",
2530 (int)(size_offsets_max * sizeof(int)));
2531 yield = 1;
2532 goto EXIT;
2533 }
2534
2535 /* Sort out the input and output files */
2536
2537 if (argc > 1)
2538 {
2539 infile = fopen(argv[op], INPUT_MODE);
2540 if (infile == NULL)
2541 {
2542 printf("** Failed to open %s\n", argv[op]);
2543 yield = 1;
2544 goto EXIT;
2545 }
2546 }
2547
2548 if (argc > 2)
2549 {
2550 outfile = fopen(argv[op+1], OUTPUT_MODE);
2551 if (outfile == NULL)
2552 {
2553 printf("** Failed to open %s\n", argv[op+1]);
2554 yield = 1;
2555 goto EXIT;
2556 }
2557 }
2558
2559 /* Set alternative malloc function */
2560
2561 #ifdef SUPPORT_PCRE8
2562 pcre_malloc = new_malloc;
2563 pcre_free = new_free;
2564 pcre_stack_malloc = stack_malloc;
2565 pcre_stack_free = stack_free;
2566 #endif
2567
2568 #ifdef SUPPORT_PCRE16
2569 pcre16_malloc = new_malloc;
2570 pcre16_free = new_free;
2571 pcre16_stack_malloc = stack_malloc;
2572 pcre16_stack_free = stack_free;
2573 #endif
2574
2575 /* Heading line unless quiet, then prompt for first regex if stdin */
2576
2577 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2578
2579 /* Main loop */
2580
2581 while (!done)
2582 {
2583 pcre *re = NULL;
2584 pcre_extra *extra = NULL;
2585
2586 #if !defined NOPOSIX /* There are still compilers that require no indent */
2587 regex_t preg;
2588 int do_posix = 0;
2589 #endif
2590
2591 const char *error;
2592 pcre_uint8 *markptr;
2593 pcre_uint8 *p, *pp, *ppp;
2594 pcre_uint8 *to_file = NULL;
2595 const pcre_uint8 *tables = NULL;
2596 unsigned long int get_options;
2597 unsigned long int true_size, true_study_size = 0;
2598 size_t size, regex_gotten_store;
2599 int do_allcaps = 0;
2600 int do_mark = 0;
2601 int do_study = 0;
2602 int no_force_study = 0;
2603 int do_debug = debug;
2604 int do_G = 0;
2605 int do_g = 0;
2606 int do_showinfo = showinfo;
2607 int do_showrest = 0;
2608 int do_showcaprest = 0;
2609 int do_flip = 0;
2610 int erroroffset, len, delimiter, poffset;
2611
2612 #if !defined NODFA
2613 int dfa_matched = 0;
2614 #endif
2615
2616 use_utf = 0;
2617 debug_lengths = 1;
2618
2619 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2620 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2621 fflush(outfile);
2622
2623 p = buffer;
2624 while (isspace(*p)) p++;
2625 if (*p == 0) continue;
2626
2627 /* See if the pattern is to be loaded pre-compiled from a file. */
2628
2629 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2630 {
2631 pcre_uint32 magic;
2632 pcre_uint8 sbuf[8];
2633 FILE *f;
2634
2635 p++;
2636 if (*p == '!')
2637 {
2638 do_debug = TRUE;
2639 do_showinfo = TRUE;
2640 p++;
2641 }
2642
2643 pp = p + (int)strlen((char *)p);
2644 while (isspace(pp[-1])) pp--;
2645 *pp = 0;
2646
2647 f = fopen((char *)p, "rb");
2648 if (f == NULL)
2649 {
2650 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2651 continue;
2652 }
2653
2654 first_gotten_store = 0;
2655 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2656
2657 true_size =
2658 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2659 true_study_size =
2660 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2661
2662 re = (pcre *)new_malloc(true_size);
2663 if (re == NULL)
2664 {
2665 printf("** Failed to get %d bytes of memory for pcre object\n",
2666 (int)true_size);
2667 yield = 1;
2668 goto EXIT;
2669 }
2670 regex_gotten_store = first_gotten_store;
2671
2672 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2673
2674 magic = ((REAL_PCRE *)re)->magic_number;
2675 if (magic != MAGIC_NUMBER)
2676 {
2677 if (swap_uint32(magic) == MAGIC_NUMBER)
2678 {
2679 do_flip = 1;
2680 }
2681 else
2682 {
2683 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2684 new_free(re);
2685 fclose(f);
2686 continue;
2687 }
2688 }
2689
2690 /* We hide the byte-invert info for little and big endian tests. */
2691 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2692 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2693
2694 /* Now see if there is any following study data. */
2695
2696 if (true_study_size != 0)
2697 {
2698 pcre_study_data *psd;
2699
2700 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2701 extra->flags = PCRE_EXTRA_STUDY_DATA;
2702
2703 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2704 extra->study_data = psd;
2705
2706 if (fread(psd, 1, true_study_size, f) != true_study_size)
2707 {
2708 FAIL_READ:
2709 fprintf(outfile, "Failed to read data from %s\n", p);
2710 if (extra != NULL)
2711 {
2712 PCRE_FREE_STUDY(extra);
2713 }
2714 new_free(re);
2715 fclose(f);
2716 continue;
2717 }
2718 fprintf(outfile, "Study data loaded from %s\n", p);
2719 do_study = 1; /* To get the data output if requested */
2720 }
2721 else fprintf(outfile, "No study data\n");
2722
2723 /* Flip the necessary bytes. */
2724 if (do_flip)
2725 {
2726 int rc;
2727 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2728 if (rc == PCRE_ERROR_BADMODE)
2729 {
2730 /* Simulate the result of the function call below. */
2731 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2732 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2733 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2734 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2735 new_free(re);
2736 fclose(f);
2737 continue;
2738 }
2739 }
2740
2741 /* Need to know if UTF-8 for printing data strings. */
2742
2743 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2744 {
2745 new_free(re);
2746 fclose(f);
2747 continue;
2748 }
2749 use_utf = (get_options & PCRE_UTF8) != 0;
2750
2751 fclose(f);
2752 goto SHOW_INFO;
2753 }
2754
2755 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2756 the pattern; if it isn't complete, read more. */
2757
2758 delimiter = *p++;
2759
2760 if (isalnum(delimiter) || delimiter == '\\')
2761 {
2762 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2763 goto SKIP_DATA;
2764 }
2765
2766 pp = p;
2767 poffset = (int)(p - buffer);
2768
2769 for(;;)
2770 {
2771 while (*pp != 0)
2772 {
2773 if (*pp == '\\' && pp[1] != 0) pp++;
2774 else if (*pp == delimiter) break;
2775 pp++;
2776 }
2777 if (*pp != 0) break;
2778 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2779 {
2780 fprintf(outfile, "** Unexpected EOF\n");
2781 done = 1;
2782 goto CONTINUE;
2783 }
2784 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2785 }
2786
2787 /* The buffer may have moved while being extended; reset the start of data
2788 pointer to the correct relative point in the buffer. */
2789
2790 p = buffer + poffset;
2791
2792 /* If the first character after the delimiter is backslash, make
2793 the pattern end with backslash. This is purely to provide a way
2794 of testing for the error message when a pattern ends with backslash. */
2795
2796 if (pp[1] == '\\') *pp++ = '\\';
2797
2798 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2799 for callouts. */
2800
2801 *pp++ = 0;
2802 strcpy((char *)pbuffer, (char *)p);
2803
2804 /* Look for options after final delimiter */
2805
2806 options = 0;
2807 study_options = force_study_options;
2808 log_store = showstore; /* default from command line */
2809
2810 while (*pp != 0)
2811 {
2812 switch (*pp++)
2813 {
2814 case 'f': options |= PCRE_FIRSTLINE; break;
2815 case 'g': do_g = 1; break;
2816 case 'i': options |= PCRE_CASELESS; break;
2817 case 'm': options |= PCRE_MULTILINE; break;
2818 case 's': options |= PCRE_DOTALL; break;
2819 case 'x': options |= PCRE_EXTENDED; break;
2820
2821 case '+':
2822 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2823 break;
2824
2825 case '=': do_allcaps = 1; break;
2826 case 'A': options |= PCRE_ANCHORED; break;
2827 case 'B': do_debug = 1; break;
2828 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2829 case 'D': do_debug = do_showinfo = 1; break;
2830 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2831 case 'F': do_flip = 1; break;
2832 case 'G': do_G = 1; break;
2833 case 'I': do_showinfo = 1; break;
2834 case 'J': options |= PCRE_DUPNAMES; break;
2835 case 'K': do_mark = 1; break;
2836 case 'M': log_store = 1; break;
2837 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2838
2839 #if !defined NOPOSIX
2840 case 'P': do_posix = 1; break;
2841 #endif
2842
2843 case 'S':
2844 do_study = 1;
2845 for (;;)
2846 {
2847 switch (*pp++)
2848 {
2849 case 'S':
2850 do_study = 0;
2851 no_force_study = 1;
2852 break;
2853
2854 case '!':
2855 study_options |= PCRE_STUDY_EXTRA_NEEDED;
2856 break;
2857
2858 case '+':
2859 if (*pp == '+')
2860 {
2861 verify_jit = TRUE;
2862 pp++;
2863 }
2864 if (*pp >= '1' && *pp <= '7')
2865 study_options |= jit_study_bits[*pp++ - '1'];
2866 else
2867 study_options |= jit_study_bits[6];
2868 break;
2869
2870 case '-':
2871 study_options &= ~PCRE_STUDY_ALLJIT;
2872 break;
2873
2874 default:
2875 pp--;
2876 goto ENDLOOP;
2877 }
2878 }
2879 ENDLOOP:
2880 break;
2881
2882 case 'U': options |= PCRE_UNGREEDY; break;
2883 case 'W': options |= PCRE_UCP; break;
2884 case 'X': options |= PCRE_EXTRA; break;
2885 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2886 case 'Z': debug_lengths = 0; break;
2887 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2888 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2889
2890 case 'T':
2891 switch (*pp++)
2892 {
2893 case '0': tables = tables0; break;
2894 case '1': tables = tables1; break;
2895
2896 case '\r':
2897 case '\n':
2898 case ' ':
2899 case 0:
2900 fprintf(outfile, "** Missing table number after /T\n");
2901 goto SKIP_DATA;
2902
2903 default:
2904 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2905 goto SKIP_DATA;
2906 }
2907 break;
2908
2909 case 'L':
2910 ppp = pp;
2911 /* The '\r' test here is so that it works on Windows. */
2912 /* The '0' test is just in case this is an unterminated line. */
2913 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2914 *ppp = 0;
2915 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2916 {
2917 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2918 goto SKIP_DATA;
2919 }
2920 locale_set = 1;
2921 tables = PCRE_MAKETABLES;
2922 pp = ppp;
2923 break;
2924
2925 case '>':
2926 to_file = pp;
2927 while (*pp != 0) pp++;
2928 while (isspace(pp[-1])) pp--;
2929 *pp = 0;
2930 break;
2931
2932 case '<':
2933 {
2934 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2935 {
2936 options |= PCRE_JAVASCRIPT_COMPAT;
2937 pp += 3;
2938 }
2939 else
2940 {
2941 int x = check_newline(pp, outfile);
2942 if (x == 0) goto SKIP_DATA;
2943 options |= x;
2944 while (*pp++ != '>');
2945 }
2946 }
2947 break;
2948
2949 case '\r': /* So that it works in Windows */
2950 case '\n':
2951 case ' ':
2952 break;
2953
2954 default:
2955 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2956 goto SKIP_DATA;
2957 }
2958 }
2959
2960 /* Handle compiling via the POSIX interface, which doesn't support the
2961 timing, showing, or debugging options, nor the ability to pass over
2962 local character tables. Neither does it have 16-bit support. */
2963
2964 #if !defined NOPOSIX
2965 if (posix || do_posix)
2966 {
2967 int rc;
2968 int cflags = 0;
2969
2970 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2971 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2972 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2973 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2974 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2975 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2976 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2977
2978 first_gotten_store = 0;
2979 rc = regcomp(&preg, (char *)p, cflags);
2980
2981 /* Compilation failed; go back for another re, skipping to blank line
2982 if non-interactive. */
2983
2984 if (rc != 0)
2985 {
2986 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2987 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2988 goto SKIP_DATA;
2989 }
2990 }
2991
2992 /* Handle compiling via the native interface */
2993
2994 else
2995 #endif /* !defined NOPOSIX */
2996
2997 {
2998 /* In 16-bit mode, convert the input. */
2999
3000 #ifdef SUPPORT_PCRE16
3001 if (use_pcre16)
3002 {
3003 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3004 {
3005 case -1:
3006 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3007 "converted to UTF-16\n");
3008 goto SKIP_DATA;
3009
3010 case -2:
3011 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3012 "cannot be converted to UTF-16\n");
3013 goto SKIP_DATA;
3014
3015 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3016 fprintf(outfile, "**Failed: character value greater than 0xffff "
3017 "cannot be converted to 16-bit in non-UTF mode\n");
3018 goto SKIP_DATA;
3019
3020 default:
3021 break;
3022 }
3023 p = (pcre_uint8 *)buffer16;
3024 }
3025 #endif
3026
3027 /* Compile many times when timing */
3028
3029 if (timeit > 0)
3030 {
3031 register int i;
3032 clock_t time_taken;
3033 clock_t start_time = clock();
3034 for (i = 0; i < timeit; i++)
3035 {
3036 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3037 if (re != NULL) free(re);
3038 }
3039 time_taken = clock() - start_time;
3040 fprintf(outfile, "Compile time %.4f milliseconds\n",
3041 (((double)time_taken * 1000.0) / (double)timeit) /
3042 (double)CLOCKS_PER_SEC);
3043 }
3044
3045 first_gotten_store = 0;
3046 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3047
3048 /* Compilation failed; go back for another re, skipping to blank line
3049 if non-interactive. */
3050
3051 if (re == NULL)
3052 {
3053 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3054 SKIP_DATA:
3055 if (infile != stdin)
3056 {
3057 for (;;)
3058 {
3059 if (extend_inputline(infile, buffer, NULL) == NULL)
3060 {
3061 done = 1;
3062 goto CONTINUE;
3063 }
3064 len = (int)strlen((char *)buffer);
3065 while (len > 0 && isspace(buffer[len-1])) len--;
3066 if (len == 0) break;
3067 }
3068 fprintf(outfile, "\n");
3069 }
3070 goto CONTINUE;
3071 }
3072
3073 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3074 within the regex; check for this so that we know how to process the data
3075 lines. */
3076
3077 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3078 goto SKIP_DATA;
3079 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3080
3081 /* Extract the size for possible writing before possibly flipping it,
3082 and remember the store that was got. */
3083
3084 true_size = ((REAL_PCRE *)re)->size;
3085 regex_gotten_store = first_gotten_store;
3086
3087 /* Output code size information if requested */
3088
3089 if (log_store)
3090 fprintf(outfile, "Memory allocation (code space): %d\n",
3091 (int)(first_gotten_store -
3092 sizeof(REAL_PCRE) -
3093 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3094
3095 /* If -s or /S was present, study the regex to generate additional info to
3096 help with the matching, unless the pattern has the SS option, which
3097 suppresses the effect of /S (used for a few test patterns where studying is
3098 never sensible). */
3099
3100 if (do_study || (force_study >= 0 && !no_force_study))
3101 {
3102 if (timeit > 0)
3103 {
3104 register int i;
3105 clock_t time_taken;
3106 clock_t start_time = clock();
3107 for (i = 0; i < timeit; i++)
3108 {
3109 PCRE_STUDY(extra, re, study_options, &error);
3110 }
3111 time_taken = clock() - start_time;
3112 if (extra != NULL)
3113 {
3114 PCRE_FREE_STUDY(extra);
3115 }
3116 fprintf(outfile, " Study time %.4f milliseconds\n",
3117 (((double)time_taken * 1000.0) / (double)timeit) /
3118 (double)CLOCKS_PER_SEC);
3119 }
3120 PCRE_STUDY(extra, re, study_options, &error);
3121 if (error != NULL)
3122 fprintf(outfile, "Failed to study: %s\n", error);
3123 else if (extra != NULL)
3124 {
3125 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3126 if (log_store)
3127 {
3128 size_t jitsize;
3129 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3130 jitsize != 0)
3131 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3132 }
3133 }
3134 }
3135
3136 /* If /K was present, we set up for handling MARK data. */
3137
3138 if (do_mark)
3139 {
3140 if (extra == NULL)
3141 {
3142 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3143 extra->flags = 0;
3144 }
3145 extra->mark = &markptr;
3146 extra->flags |= PCRE_EXTRA_MARK;
3147 }
3148
3149 /* Extract and display information from the compiled data if required. */
3150
3151 SHOW_INFO:
3152
3153 if (do_debug)
3154 {
3155 fprintf(outfile, "------------------------------------------------------------------\n");
3156 PCRE_PRINTINT(re, outfile, debug_lengths);
3157 }
3158
3159 /* We already have the options in get_options (see above) */
3160
3161 if (do_showinfo)
3162 {
3163 unsigned long int all_options;
3164 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3165 hascrorlf, maxlookbehind;
3166 int nameentrysize, namecount;
3167 const pcre_uint8 *nametable;
3168
3169 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3170 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3171 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3172 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3173 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3174 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3175 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3176 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3177 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3178 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3179 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3180 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3181 != 0)
3182 goto SKIP_DATA;
3183
3184 if (size != regex_gotten_store) fprintf(outfile,
3185 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3186 (int)size, (int)regex_gotten_store);
3187
3188 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3189 if (backrefmax > 0)
3190 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3191
3192 if (namecount > 0)
3193 {
3194 fprintf(outfile, "Named capturing subpatterns:\n");
3195 while (namecount-- > 0)
3196 {
3197 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3198 int imm2_size = use_pcre16 ? 1 : 2;
3199 #else
3200 int imm2_size = IMM2_SIZE;
3201 #endif
3202 int length = (int)STRLEN(nametable + imm2_size);
3203 fprintf(outfile, " ");
3204 PCHARSV(nametable, imm2_size, length, outfile);
3205 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3206 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3207 fprintf(outfile, "%3d\n", use_pcre16?
3208 (int)(((PCRE_SPTR16)nametable)[0])
3209 :((int)nametable[0] << 8) | (int)nametable[1]);
3210 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3211 #else
3212 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3213 #ifdef SUPPORT_PCRE8
3214 nametable += nameentrysize;
3215 #else
3216 nametable += nameentrysize * 2;
3217 #endif
3218 #endif
3219 }
3220 }
3221
3222 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3223 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3224
3225 all_options = ((REAL_PCRE *)re)->options;
3226 if (do_flip) all_options = swap_uint32(all_options);
3227
3228 if (get_options == 0) fprintf(outfile, "No options\n");
3229 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3230 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3231 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3232 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3233 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3234 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3235 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3236 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3237 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3238 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3239 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3240 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3241 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3242 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3243 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3244 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3245 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3246 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3247
3248 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3249
3250 switch (get_options & PCRE_NEWLINE_BITS)
3251 {
3252 case PCRE_NEWLINE_CR:
3253 fprintf(outfile, "Forced newline sequence: CR\n");
3254 break;
3255
3256 case PCRE_NEWLINE_LF:
3257 fprintf(outfile, "Forced newline sequence: LF\n");
3258 break;
3259
3260 case PCRE_NEWLINE_CRLF:
3261 fprintf(outfile, "Forced newline sequence: CRLF\n");
3262 break;
3263
3264 case PCRE_NEWLINE_ANYCRLF:
3265 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3266 break;
3267
3268 case PCRE_NEWLINE_ANY:
3269 fprintf(outfile, "Forced newline sequence: ANY\n");
3270 break;
3271
3272 default:
3273 break;
3274 }
3275
3276 if (first_char == -1)
3277 {
3278 fprintf(outfile, "First char at start or follows newline\n");
3279 }
3280 else if (first_char < 0)
3281 {
3282 fprintf(outfile, "No first char\n");
3283 }
3284 else
3285 {
3286 const char *caseless =
3287 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3288 "" : " (caseless)";
3289
3290 if (PRINTOK(first_char))
3291 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3292 else
3293 {
3294 fprintf(outfile, "First char = ");
3295 pchar(first_char, outfile);
3296 fprintf(outfile, "%s\n", caseless);
3297 }
3298 }
3299
3300 if (need_char < 0)
3301 {
3302 fprintf(outfile, "No need char\n");
3303 }
3304 else
3305 {
3306 const char *caseless =
3307 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3308 "" : " (caseless)";
3309
3310 if (PRINTOK(need_char))
3311 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3312 else
3313 {
3314 fprintf(outfile, "Need char = ");
3315 pchar(need_char, outfile);
3316 fprintf(outfile, "%s\n", caseless);
3317 }
3318 }
3319
3320 if (maxlookbehind > 0)
3321 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3322
3323 /* Don't output study size; at present it is in any case a fixed
3324 value, but it varies, depending on the computer architecture, and
3325 so messes up the test suite. (And with the /F option, it might be
3326 flipped.) If study was forced by an external -s, don't show this
3327 information unless -i or -d was also present. This means that, except
3328 when auto-callouts are involved, the output from runs with and without
3329 -s should be identical. */
3330
3331 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3332 {
3333 if (extra == NULL)
3334 fprintf(outfile, "Study returned NULL\n");
3335 else
3336 {
3337 pcre_uint8 *start_bits = NULL;
3338 int minlength;
3339
3340 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3341 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3342
3343 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3344 {
3345 if (start_bits == NULL)
3346 fprintf(outfile, "No set of starting bytes\n");
3347 else
3348 {
3349 int i;
3350 int c = 24;
3351 fprintf(outfile, "Starting byte set: ");
3352 for (i = 0; i < 256; i++)
3353 {
3354 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3355 {
3356 if (c > 75)
3357 {
3358 fprintf(outfile, "\n ");
3359 c = 2;
3360 }
3361 if (PRINTOK(i) && i != ' ')
3362 {
3363 fprintf(outfile, "%c ", i);
3364 c += 2;
3365 }
3366 else
3367 {
3368 fprintf(outfile, "\\x%02x ", i);
3369 c += 5;
3370 }
3371 }
3372 }
3373 fprintf(outfile, "\n");
3374 }
3375 }
3376 }
3377
3378 /* Show this only if the JIT was set by /S, not by -s. */
3379
3380 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3381 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3382 {
3383 int jit;
3384 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3385 {
3386 if (jit)
3387 fprintf(outfile, "JIT study was successful\n");
3388 else
3389 #ifdef SUPPORT_JIT
3390 fprintf(outfile, "JIT study was not successful\n");
3391 #else
3392 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3393 #endif
3394 }
3395 }
3396 }
3397 }
3398
3399 /* If the '>' option was present, we write out the regex to a file, and
3400 that is all. The first 8 bytes of the file are the regex length and then
3401 the study length, in big-endian order. */
3402
3403 if (to_file != NULL)
3404 {
3405 FILE *f = fopen((char *)to_file, "wb");
3406 if (f == NULL)
3407 {
3408 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3409 }
3410 else
3411 {
3412 pcre_uint8 sbuf[8];
3413
3414 if (do_flip) regexflip(re, extra);
3415 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3416 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3417 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3418 sbuf[3] = (pcre_uint8)((true_size) & 255);
3419 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3420 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3421 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3422 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3423
3424 if (fwrite(sbuf, 1, 8, f) < 8 ||
3425 fwrite(re, 1, true_size, f) < true_size)
3426 {
3427 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3428 }
3429 else
3430 {
3431 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3432
3433 /* If there is study data, write it. */
3434
3435 if (extra != NULL)
3436 {
3437 if (fwrite(extra->study_data, 1, true_study_size, f) <
3438 true_study_size)
3439 {
3440 fprintf(outfile, "Write error on %s: %s\n", to_file,
3441 strerror(errno));
3442 }
3443 else fprintf(outfile, "Study data written to %s\n", to_file);
3444 }
3445 }
3446 fclose(f);
3447 }
3448
3449 new_free(re);
3450 if (extra != NULL)
3451 {
3452 PCRE_FREE_STUDY(extra);
3453 }
3454 if (locale_set)
3455 {
3456 new_free((void *)tables);
3457 setlocale(LC_CTYPE, "C");
3458 locale_set = 0;
3459 }
3460 continue; /* With next regex */
3461 }
3462 } /* End of non-POSIX compile */
3463
3464 /* Read data lines and test them */
3465
3466 for (;;)
3467 {
3468 pcre_uint8 *q;
3469 pcre_uint8 *bptr;
3470 int *use_offsets = offsets;
3471 int use_size_offsets = size_offsets;
3472 int callout_data = 0;
3473 int callout_data_set = 0;
3474 int count, c;
3475 int copystrings = 0;
3476 int find_match_limit = default_find_match_limit;
3477 int getstrings = 0;
3478 int getlist = 0;
3479 int gmatched = 0;
3480 int start_offset = 0;
3481 int start_offset_sign = 1;
3482 int g_notempty = 0;
3483 int use_dfa = 0;
3484
3485 *copynames = 0;
3486 *getnames = 0;
3487
3488 #ifdef SUPPORT_PCRE16
3489 cn16ptr = copynames;
3490 gn16ptr = getnames;
3491 #endif
3492 #ifdef SUPPORT_PCRE8
3493 cn8ptr = copynames8;
3494 gn8ptr = getnames8;
3495 #endif
3496
3497 SET_PCRE_CALLOUT(callout);
3498 first_callout = 1;
3499 last_callout_mark = NULL;
3500 callout_extra = 0;
3501 callout_count = 0;
3502 callout_fail_count = 999999;
3503 callout_fail_id = -1;
3504 show_malloc = 0;
3505 options = 0;
3506
3507 if (extra != NULL) extra->flags &=
3508 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3509
3510 len = 0;
3511 for (;;)
3512 {
3513 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3514 {
3515 if (len > 0) /* Reached EOF without hitting a newline */
3516 {
3517 fprintf(outfile, "\n");
3518 break;
3519 }
3520 done = 1;
3521 goto CONTINUE;
3522 }
3523 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3524 len = (int)strlen((char *)buffer);
3525 if (buffer[len-1] == '\n') break;
3526 }
3527
3528 while (len > 0 && isspace(buffer[len-1])) len--;
3529 buffer[len] = 0;
3530 if (len == 0) break;
3531
3532 p = buffer;
3533 while (isspace(*p)) p++;
3534
3535 bptr = q = dbuffer;
3536 while ((c = *p++) != 0)
3537 {
3538 int i = 0;
3539 int n = 0;
3540
3541 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3542 In non-UTF mode, allow the value of the byte to fall through to later,
3543 where values greater than 127 are turned into UTF-8 when running in
3544 16-bit mode. */
3545
3546 if (c != '\\')
3547 {
3548 if (use_utf)
3549 {
3550 *q++ = c;
3551 continue;
3552 }
3553 }
3554
3555 /* Handle backslash escapes */
3556
3557 else switch ((c = *p++))
3558 {
3559 case 'a': c = 7; break;
3560 case 'b': c = '\b'; break;
3561 case 'e': c = 27; break;
3562 case 'f': c = '\f'; break;
3563 case 'n': c = '\n'; break;
3564 case 'r': c = '\r'; break;
3565 case 't': c = '\t'; break;
3566 case 'v': c = '\v'; break;
3567
3568 case '0': case '1': case '2': case '3':
3569 case '4': case '5': case '6': case '7':
3570 c -= '0';
3571 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3572 c = c * 8 + *p++ - '0';
3573 break;
3574
3575 case 'x':
3576 if (*p == '{')
3577 {
3578 pcre_uint8 *pt = p;
3579 c = 0;
3580
3581 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3582 when isxdigit() is a macro that refers to its argument more than
3583 once. This is banned by the C Standard, but apparently happens in at
3584 least one MacOS environment. */
3585
3586 for (pt++; isxdigit(*pt); pt++)
3587 {
3588 if (++i == 9)
3589 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3590 "using only the first eight.\n");
3591 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3592 }
3593 if (*pt == '}')
3594 {
3595 p = pt + 1;
3596 break;
3597 }
3598 /* Not correct form for \x{...}; fall through */
3599 }
3600
3601 /* \x without {} always defines just one byte in 8-bit mode. This
3602 allows UTF-8 characters to be constructed byte by byte, and also allows
3603 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3604 Otherwise, pass it down to later code so that it can be turned into
3605 UTF-8 when running in 16-bit mode. */
3606
3607 c = 0;
3608 while (i++ < 2 && isxdigit(*p))
3609 {
3610 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3611 p++;
3612 }
3613 if (use_utf)
3614 {
3615 *q++ = c;
3616 continue;
3617 }
3618 break;
3619
3620 case 0: /* \ followed by EOF allows for an empty line */
3621 p--;
3622 continue;
3623
3624 case '>':
3625 if (*p == '-')
3626 {
3627 start_offset_sign = -1;
3628 p++;
3629 }
3630 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3631 start_offset *= start_offset_sign;
3632 continue;
3633
3634 case 'A': /* Option setting */
3635 options |= PCRE_ANCHORED;
3636 continue;
3637
3638 case 'B':
3639 options |= PCRE_NOTBOL;
3640 continue;
3641
3642 case 'C':
3643 if (isdigit(*p)) /* Set copy string */
3644 {
3645 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3646 copystrings |= 1 << n;
3647 }
3648 else if (isalnum(*p))
3649 {
3650 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3651 }
3652 else if (*p == '+')
3653 {
3654 callout_extra = 1;
3655 p++;
3656 }
3657 else if (*p == '-')
3658 {
3659 SET_PCRE_CALLOUT(NULL);
3660 p++;
3661 }
3662 else if (*p == '!')
3663 {
3664 callout_fail_id = 0;
3665 p++;
3666 while(isdigit(*p))
3667 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3668 callout_fail_count = 0;
3669 if (*p == '!')
3670 {
3671 p++;
3672 while(isdigit(*p))
3673 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3674 }
3675 }
3676 else if (*p == '*')
3677 {
3678 int sign = 1;
3679 callout_data = 0;
3680 if (*(++p) == '-') { sign = -1; p++; }
3681 while(isdigit(*p))
3682 callout_data = callout_data * 10 + *p++ - '0';
3683 callout_data *= sign;
3684 callout_data_set = 1;
3685 }
3686 continue;
3687
3688 #if !defined NODFA
3689 case 'D':
3690 #if !defined NOPOSIX
3691 if (posix || do_posix)
3692 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3693 else
3694 #endif
3695 use_dfa = 1;
3696 continue;
3697 #endif
3698
3699 #if !defined NODFA
3700 case 'F':
3701 options |= PCRE_DFA_SHORTEST;
3702 continue;
3703 #endif
3704
3705 case 'G':
3706 if (isdigit(*p))
3707 {
3708 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3709 getstrings |= 1 << n;
3710 }
3711 else if (isalnum(*p))
3712 {
3713 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3714 }
3715 continue;
3716
3717 case 'J':
3718 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3719 if (extra != NULL
3720 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3721 && extra->executable_jit != NULL)
3722 {
3723 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3724 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3725 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3726 }
3727 continue;
3728
3729 case 'L':
3730 getlist = 1;
3731 continue;
3732
3733 case 'M':
3734 find_match_limit = 1;
3735 continue;
3736
3737 case 'N':
3738 if ((options & PCRE_NOTEMPTY) != 0)
3739 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3740 else
3741 options |= PCRE_NOTEMPTY;
3742 continue;
3743
3744 case 'O':
3745 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3746 if (n > size_offsets_max)
3747 {
3748 size_offsets_max = n;
3749 free(offsets);
3750 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3751 if (offsets == NULL)
3752 {
3753 printf("** Failed to get %d bytes of memory for offsets vector\n",
3754 (int)(size_offsets_max * sizeof(int)));
3755 yield = 1;
3756 goto EXIT;
3757 }
3758 }
3759 use_size_offsets = n;
3760 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3761 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
3762 continue;
3763
3764 case 'P':
3765 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3766 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3767 continue;
3768
3769 case 'Q':
3770 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3771 if (extra == NULL)
3772 {
3773 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3774 extra->flags = 0;
3775 }
3776 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3777 extra->match_limit_recursion = n;
3778 continue;
3779
3780 case 'q':
3781 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3782 if (extra == NULL)
3783 {
3784 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3785 extra->flags = 0;
3786 }
3787 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3788 extra->match_limit = n;
3789 continue;
3790
3791 #if !defined NODFA
3792 case 'R':
3793 options |= PCRE_DFA_RESTART;
3794 continue;
3795 #endif
3796
3797 case 'S':
3798 show_malloc = 1;
3799 continue;
3800
3801 case 'Y':
3802 options |= PCRE_NO_START_OPTIMIZE;
3803 continue;
3804
3805 case 'Z':
3806 options |= PCRE_NOTEOL;
3807 continue;
3808
3809 case '?':
3810 options |= PCRE_NO_UTF8_CHECK;
3811 continue;
3812
3813 case '<':
3814 {
3815 int x = check_newline(p, outfile);
3816 if (x == 0) goto NEXT_DATA;
3817 options |= x;
3818 while (*p++ != '>');
3819 }
3820 continue;
3821 }
3822
3823 /* We now have a character value in c that may be greater than 255. In
3824 16-bit mode, we always convert characters to UTF-8 so that values greater
3825 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3826 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3827 mode must have come from \x{...} or octal constructs because values from
3828 \x.. get this far only in non-UTF mode. */
3829
3830 #if !defined NOUTF || defined SUPPORT_PCRE16
3831 if (use_pcre16 || use_utf)
3832 {
3833 pcre_uint8 buff8[8];
3834 int ii, utn;
3835 utn = ord2utf8(c, buff8);
3836 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3837 }
3838 else
3839 #endif
3840 {
3841 if (c > 255)
3842 {
3843 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3844 "and UTF-8 mode is not enabled.\n", c);
3845 fprintf(outfile, "** Truncation will probably give the wrong "
3846 "result.\n");
3847 }
3848 *q++ = c;
3849 }
3850 }
3851
3852 /* Reached end of subject string */
3853
3854 *q = 0;
3855 len = (int)(q - dbuffer);
3856
3857 /* Move the data to the end of the buffer so that a read over the end of
3858 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3859 we are using the POSIX interface, we must include the terminating zero. */
3860
3861 #if !defined NOPOSIX
3862 if (posix || do_posix)
3863 {
3864 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3865 bptr += buffer_size - len - 1;
3866 }
3867 else
3868 #endif
3869 {
3870 memmove(bptr + buffer_size - len, bptr, len);
3871 bptr += buffer_size - len;
3872 }
3873
3874 if ((all_use_dfa || use_dfa) && find_match_limit)
3875 {
3876 printf("**Match limit not relevant for DFA matching: ignored\n");
3877 find_match_limit = 0;
3878 }
3879
3880 /* Handle matching via the POSIX interface, which does not
3881 support timing or playing with the match limit or callout data. */
3882
3883 #if !defined NOPOSIX
3884 if (posix || do_posix)
3885 {
3886 int rc;
3887 int eflags = 0;
3888 regmatch_t *pmatch = NULL;
3889 if (use_size_offsets > 0)
3890 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3891 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3892 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3893 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3894
3895 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3896
3897 if (rc != 0)
3898 {
3899 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3900 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3901 }
3902 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3903 != 0)
3904 {
3905 fprintf(outfile, "Matched with REG_NOSUB\n");
3906 }
3907 else
3908 {
3909 size_t i;
3910 for (i = 0; i < (size_t)use_size_offsets; i++)
3911 {
3912 if (pmatch[i].rm_so >= 0)
3913 {
3914 fprintf(outfile, "%2d: ", (int)i);
3915 PCHARSV(dbuffer, pmatch[i].rm_so,
3916 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3917 fprintf(outfile, "\n");
3918 if (do_showcaprest || (i == 0 && do_showrest))
3919 {
3920 fprintf(outfile, "%2d+ ", (int)i);
3921 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3922 outfile);
3923 fprintf(outfile, "\n");
3924 }
3925 }
3926 }
3927 }
3928 free(pmatch);
3929 goto NEXT_DATA;
3930 }
3931
3932 #endif /* !defined NOPOSIX */
3933
3934 /* Handle matching via the native interface - repeats for /g and /G */
3935
3936 #ifdef SUPPORT_PCRE16
3937 if (use_pcre16)
3938 {
3939 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3940 switch(len)
3941 {
3942 case -1:
3943 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3944 "converted to UTF-16\n");
3945 goto NEXT_DATA;
3946
3947 case -2:
3948 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3949 "cannot be converted to UTF-16\n");
3950 goto NEXT_DATA;
3951
3952 case -3:
3953 fprintf(outfile, "**Failed: character value greater than 0xffff "
3954 "cannot be converted to 16-bit in non-UTF mode\n");
3955 goto NEXT_DATA;
3956
3957 default:
3958 break;
3959 }
3960 bptr = (pcre_uint8 *)buffer16;
3961 }
3962 #endif
3963
3964 /* Ensure that there is a JIT callback if we want to verify that JIT was
3965 actually used. If jit_stack == NULL, no stack has yet been assigned. */
3966
3967 if (verify_jit && jit_stack == NULL && extra != NULL)
3968 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3969
3970 for (;; gmatched++) /* Loop for /g or /G */
3971 {
3972 markptr = NULL;
3973 jit_was_used = FALSE;
3974
3975 if (timeitm > 0)
3976 {
3977 register int i;
3978 clock_t time_taken;
3979 clock_t start_time = clock();
3980
3981 #if !defined NODFA
3982 if (all_use_dfa || use_dfa)
3983 {
3984 if ((options & PCRE_DFA_RESTART) != 0)
3985 {
3986 fprintf(outfile, "Timing DFA restarts is not supported\n");
3987 break;
3988 }
3989 if (dfa_workspace == NULL)
3990 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3991 for (i = 0; i < timeitm; i++)
3992 {
3993 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3994 (options | g_notempty), use_offsets, use_size_offsets,
3995 dfa_workspace, DFA_WS_DIMENSION);
3996 }
3997 }
3998 else
3999 #endif
4000
4001 for (i = 0; i < timeitm; i++)
4002 {
4003 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4004 (options | g_notempty), use_offsets, use_size_offsets);
4005 }
4006 time_taken = clock() - start_time;
4007 fprintf(outfile, "Execute time %.4f milliseconds\n",
4008 (((double)time_taken * 1000.0) / (double)timeitm) /
4009 (double)CLOCKS_PER_SEC);
4010 }
4011
4012 /* If find_match_limit is set, we want to do repeated matches with
4013 varying limits in order to find the minimum value for the match limit and
4014 for the recursion limit. The match limits are relevant only to the normal
4015 running of pcre_exec(), so disable the JIT optimization. This makes it
4016 possible to run the same set of tests with and without JIT externally
4017 requested. */
4018
4019 if (find_match_limit)
4020 {
4021 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4022 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4023 extra->flags = 0;
4024
4025 (void)check_match_limit(re, extra, bptr, len, start_offset,
4026 options|g_notempty, use_offsets, use_size_offsets,
4027 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4028 PCRE_ERROR_MATCHLIMIT, "match()");
4029
4030 count = check_match_limit(re, extra, bptr, len, start_offset,
4031 options|g_notempty, use_offsets, use_size_offsets,
4032 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4033 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4034 }
4035
4036 /* If callout_data is set, use the interface with additional data */
4037
4038 else if (callout_data_set)
4039 {
4040 if (extra == NULL)
4041 {
4042 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4043 extra->flags = 0;
4044 }
4045 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4046 extra->callout_data = &callout_data;
4047 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4048 options | g_notempty, use_offsets, use_size_offsets);
4049 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4050 }
4051
4052 /* The normal case is just to do the match once, with the default
4053 value of match_limit. */
4054
4055 #if !defined NODFA
4056 else if (all_use_dfa || use_dfa)
4057 {
4058 if (dfa_workspace == NULL)
4059 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4060 if (dfa_matched++ == 0)
4061 dfa_workspace[0] = -1; /* To catch bad restart */
4062 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4063 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4064 DFA_WS_DIMENSION);
4065 if (count == 0)
4066 {
4067 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4068 count = use_size_offsets/2;
4069 }
4070 }
4071 #endif
4072
4073 else
4074 {
4075 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4076 options | g_notempty, use_offsets, use_size_offsets);
4077 if (count == 0)
4078 {
4079 fprintf(outfile, "Matched, but too many substrings\n");
4080 count = use_size_offsets/3;
4081 }
4082 }
4083
4084 /* Matched */
4085
4086 if (count >= 0)
4087 {
4088 int i, maxcount;
4089 void *cnptr, *gnptr;
4090
4091 #if !defined NODFA
4092 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4093 #endif
4094 maxcount = use_size_offsets/3;
4095
4096 /* This is a check against a lunatic return value. */
4097
4098 if (count > maxcount)
4099 {
4100 fprintf(outfile,
4101 "** PCRE error: returned count %d is too big for offset size %d\n",
4102 count, use_size_offsets);
4103 count = use_size_offsets/3;
4104 if (do_g || do_G)
4105 {
4106 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4107 do_g = do_G = FALSE; /* Break g/G loop */
4108 }
4109 }
4110
4111 /* do_allcaps requests showing of all captures in the pattern, to check
4112 unset ones at the end. */
4113
4114 if (do_allcaps)
4115 {
4116 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4117 goto SKIP_DATA;
4118 count++; /* Allow for full match */
4119 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4120 }
4121
4122 /* Output the captured substrings */
4123
4124 for (i = 0; i < count * 2; i += 2)
4125 {
4126 if (use_offsets[i] < 0)
4127 {
4128 if (use_offsets[i] != -1)
4129 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4130 use_offsets[i], i);
4131 if (use_offsets[i+1] != -1)
4132 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4133 use_offsets[i+1], i+1);
4134 fprintf(outfile, "%2d: <unset>\n", i/2);
4135 }
4136 else
4137 {
4138 fprintf(outfile, "%2d: ", i/2);
4139 PCHARSV(bptr, use_offsets[i],
4140 use_offsets[i+1] - use_offsets[i], outfile);
4141 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4142 fprintf(outfile, "\n");
4143 if (do_showcaprest || (i == 0 && do_showrest))
4144 {
4145 fprintf(outfile, "%2d+ ", i/2);
4146 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4147 outfile);
4148 fprintf(outfile, "\n");
4149 }
4150 }
4151 }
4152
4153 if (markptr != NULL)
4154 {
4155 fprintf(outfile, "MK: ");
4156 PCHARSV(markptr, 0, -1, outfile);
4157 fprintf(outfile, "\n");
4158 }
4159
4160 for (i = 0; i < 32; i++)
4161 {
4162 if ((copystrings & (1 << i)) != 0)
4163 {
4164 int rc;
4165 char copybuffer[256];
4166 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4167 copybuffer, sizeof(copybuffer));
4168 if (rc < 0)
4169 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4170 else
4171 {
4172 fprintf(outfile, "%2dC ", i);
4173 PCHARSV(copybuffer, 0, rc, outfile);
4174 fprintf(outfile, " (%d)\n", rc);
4175 }
4176 }
4177 }
4178
4179 cnptr = copynames;
4180 for (;;)
4181 {
4182 int rc;
4183 char copybuffer[256];
4184
4185 if (use_pcre16)
4186 {
4187 if (*(pcre_uint16 *)cnptr == 0) break;
4188 }
4189 else
4190 {
4191 if (*(pcre_uint8 *)cnptr == 0) break;
4192 }
4193
4194 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4195 cnptr, copybuffer, sizeof(copybuffer));
4196
4197 if (rc < 0)
4198 {
4199 fprintf(outfile, "copy substring ");
4200 PCHARSV(cnptr, 0, -1, outfile);
4201 fprintf(outfile, " failed %d\n", rc);
4202 }
4203 else
4204 {
4205 fprintf(outfile, " C ");
4206 PCHARSV(copybuffer, 0, rc, outfile);
4207 fprintf(outfile, " (%d) ", rc);
4208 PCHARSV(cnptr, 0, -1, outfile);
4209 putc('\n', outfile);
4210 }
4211
4212 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4213 }
4214
4215 for (i = 0; i < 32; i++)
4216 {
4217 if ((getstrings & (1 << i)) != 0)
4218 {
4219 int rc;
4220 const char *substring;
4221 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4222 if (rc < 0)
4223 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4224 else
4225 {
4226 fprintf(outfile, "%2dG ", i);
4227 PCHARSV(substring, 0, rc, outfile);
4228 fprintf(outfile, " (%d)\n", rc);
4229 PCRE_FREE_SUBSTRING(substring);
4230 }
4231 }
4232 }
4233
4234 gnptr = getnames;
4235 for (;;)
4236 {
4237 int rc;
4238 const char *substring;
4239
4240 if (use_pcre16)
4241 {
4242 if (*(pcre_uint16 *)gnptr == 0) break;
4243 }
4244 else
4245 {
4246 if (*(pcre_uint8 *)gnptr == 0) break;
4247 }
4248
4249 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4250 gnptr, &substring);
4251 if (rc < 0)
4252 {
4253 fprintf(outfile, "get substring ");
4254 PCHARSV(gnptr, 0, -1, outfile);
4255 fprintf(outfile, " failed %d\n", rc);
4256 }
4257 else
4258 {
4259 fprintf(outfile, " G ");
4260 PCHARSV(substring, 0, rc, outfile);
4261 fprintf(outfile, " (%d) ", rc);
4262 PCHARSV(gnptr, 0, -1, outfile);
4263 PCRE_FREE_SUBSTRING(substring);
4264 putc('\n', outfile);
4265 }
4266
4267 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4268 }
4269
4270 if (getlist)
4271 {
4272 int rc;
4273 const char **stringlist;
4274 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4275 if (rc < 0)
4276 fprintf(outfile, "get substring list failed %d\n", rc);
4277 else
4278 {
4279 for (i = 0; i < count; i++)
4280 {
4281 fprintf(outfile, "%2dL ", i);
4282 PCHARSV(stringlist[i], 0, -1, outfile);
4283 putc('\n', outfile);
4284 }
4285 if (stringlist[i] != NULL)
4286 fprintf(outfile, "string list not terminated by NULL\n");
4287 PCRE_FREE_SUBSTRING_LIST(stringlist);
4288 }
4289 }
4290 }
4291
4292 /* There was a partial match */
4293
4294 else if (count == PCRE_ERROR_PARTIAL)
4295 {
4296 if (markptr == NULL) fprintf(outfile, "Partial match");
4297 else
4298 {
4299 fprintf(outfile, "Partial match, mark=");
4300 PCHARSV(markptr, 0, -1, outfile);
4301 }
4302 if (use_size_offsets > 1)
4303 {
4304 fprintf(outfile, ": ");
4305 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4306 outfile);
4307 }
4308 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4309 fprintf(outfile, "\n");
4310 break; /* Out of the /g loop */
4311 }
4312
4313 /* Failed to match. If this is a /g or /G loop and we previously set
4314 g_notempty after a null match, this is not necessarily the end. We want
4315 to advance the start offset, and continue. We won't be at the end of the
4316 string - that was checked before setting g_notempty.
4317
4318 Complication arises in the case when the newline convention is "any",
4319 "crlf", or "anycrlf". If the previous match was at the end of a line
4320 terminated by CRLF, an advance of one character just passes the \r,
4321 whereas we should prefer the longer newline sequence, as does the code in
4322 pcre_exec(). Fudge the offset value to achieve this. We check for a
4323 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4324 find the default.
4325
4326 Otherwise, in the case of UTF-8 matching, the advance must be one
4327 character, not one byte. */
4328
4329 else
4330 {
4331 if (g_notempty != 0)
4332 {
4333 int onechar = 1;
4334 unsigned int obits = ((REAL_PCRE *)re)->options;
4335 use_offsets[0] = start_offset;
4336 if ((obits & PCRE_NEWLINE_BITS) == 0)
4337 {
4338 int d;
4339 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4340 /* Note that these values are always the ASCII ones, even in
4341 EBCDIC environments. CR = 13, NL = 10. */
4342 obits = (d == 13)? PCRE_NEWLINE_CR :
4343 (d == 10)? PCRE_NEWLINE_LF :
4344 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4345 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4346 (d == -1)? PCRE_NEWLINE_ANY : 0;
4347 }
4348 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4349 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4350 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4351 &&
4352 start_offset < len - 1 &&
4353 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4354 (use_pcre16?
4355 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4356 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4357 :
4358 bptr[start_offset] == '\r'
4359 && bptr[start_offset + 1] == '\n')
4360 #elif defined SUPPORT_PCRE16
4361 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4362 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4363 #else
4364 bptr[start_offset] == '\r'
4365 && bptr[start_offset + 1] == '\n'
4366 #endif
4367 )
4368 onechar++;
4369 else if (use_utf)
4370 {
4371 while (start_offset + onechar < len)
4372 {
4373 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4374 onechar++;
4375 }
4376 }
4377 use_offsets[1] = start_offset + onechar;
4378 }
4379 else
4380 {
4381 switch(count)
4382 {
4383 case PCRE_ERROR_NOMATCH:
4384 if (gmatched == 0)
4385 {
4386 if (markptr == NULL)
4387 {
4388 fprintf(outfile, "No match");
4389 }
4390 else
4391 {
4392 fprintf(outfile, "No match, mark = ");
4393 PCHARSV(markptr, 0, -1, outfile);
4394 }
4395 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4396 putc('\n', outfile);
4397 }
4398 break;
4399
4400 case PCRE_ERROR_BADUTF8:
4401 case PCRE_ERROR_SHORTUTF8:
4402 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4403 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4404 use_pcre16? "16" : "8");
4405 if (use_size_offsets >= 2)
4406 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4407 use_offsets[1]);
4408 fprintf(outfile, "\n");
4409 break;
4410
4411 case PCRE_ERROR_BADUTF8_OFFSET:
4412 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4413 use_pcre16? "16" : "8");
4414 break;
4415
4416 default:
4417 if (count < 0 &&
4418 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4419 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4420 else
4421 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4422 break;
4423 }
4424
4425 break; /* Out of the /g loop */
4426 }
4427 }
4428
4429 /* If not /g or /G we are done */
4430
4431 if (!do_g && !do_G) break;
4432
4433 /* If we have matched an empty string, first check to see if we are at
4434 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4435 Perl's /g options does. This turns out to be rather cunning. First we set
4436 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4437 same point. If this fails (picked up above) we advance to the next
4438 character. */
4439
4440 g_notempty = 0;
4441
4442 if (use_offsets[0] == use_offsets[1])
4443 {
4444 if (use_offsets[0] == len) break;
4445 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4446 }
4447
4448 /* For /g, update the start offset, leaving the rest alone */
4449
4450 if (do_g) start_offset = use_offsets[1];
4451
4452 /* For /G, update the pointer and length */
4453
4454 else
4455 {
4456 bptr += use_offsets[1] * CHAR_SIZE;
4457 len -= use_offsets[1];
4458 }
4459 } /* End of loop for /g and /G */
4460
4461 NEXT_DATA: continue;
4462 } /* End of loop for data lines */
4463
4464 CONTINUE:
4465
4466 #if !defined NOPOSIX
4467 if (posix || do_posix) regfree(&preg);
4468 #endif
4469
4470 if (re != NULL) new_free(re);
4471 if (extra != NULL)
4472 {
4473 PCRE_FREE_STUDY(extra);
4474 }
4475 if (locale_set)
4476 {
4477 new_free((void *)tables);
4478 setlocale(LC_CTYPE, "C");
4479 locale_set = 0;
4480 }
4481 if (jit_stack != NULL)
4482 {
4483 PCRE_JIT_STACK_FREE(jit_stack);
4484 jit_stack = NULL;
4485 }
4486 }
4487
4488 if (infile == stdin) fprintf(outfile, "\n");
4489
4490 EXIT:
4491
4492 if (infile != NULL && infile != stdin) fclose(infile);
4493 if (outfile != NULL && outfile != stdout) fclose(outfile);
4494
4495 free(buffer);
4496 free(dbuffer);
4497 free(pbuffer);
4498 free(offsets);
4499
4500 #ifdef SUPPORT_PCRE16
4501 if (buffer16 != NULL) free(buffer16);
4502 #endif
4503
4504 #if !defined NODFA
4505 if (dfa_workspace != NULL)
4506 free(dfa_workspace);
4507 #endif
4508
4509 return yield;
4510 }
4511
4512 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12