/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 936 - (show annotations) (download)
Sat Feb 25 17:02:23 2012 UTC (2 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 133060 byte(s)
Add support for linking pcretest with libedit instead of libreadline.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 /* Both libreadline and libedit are optionally supported. The user-supplied
63 original patch uses readline/readline.h for libedit, but in at least one system
64 it is installed as editline/readline.h, so the configuration code now looks for
65 that first, falling back to readline/readline.h. */
66
67 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
68 #ifdef HAVE_UNISTD_H
69 #include <unistd.h>
70 #endif
71 #if defined(SUPPORT_LIBREADLINE)
72 #include <readline/readline.h>
73 #include <readline/history.h>
74 #else
75 #if defined(HAVE_EDITLINE_READLINE_H)
76 #include <editline/readline.h>
77 #else
78 #include <readline/readline.h>
79 #endif
80 #endif
81 #endif
82
83 /* A number of things vary for Windows builds. Originally, pcretest opened its
84 input and output without "b"; then I was told that "b" was needed in some
85 environments, so it was added for release 5.0 to both the input and output. (It
86 makes no difference on Unix-like systems.) Later I was told that it is wrong
87 for the input on Windows. I've now abstracted the modes into two macros that
88 are set here, to make it easier to fiddle with them, and removed "b" from the
89 input mode under Windows. */
90
91 #if defined(_WIN32) || defined(WIN32)
92 #include <io.h> /* For _setmode() */
93 #include <fcntl.h> /* For _O_BINARY */
94 #define INPUT_MODE "r"
95 #define OUTPUT_MODE "wb"
96
97 #ifndef isatty
98 #define isatty _isatty /* This is what Windows calls them, I'm told, */
99 #endif /* though in some environments they seem to */
100 /* be already defined, hence the #ifndefs. */
101 #ifndef fileno
102 #define fileno _fileno
103 #endif
104
105 /* A user sent this fix for Borland Builder 5 under Windows. */
106
107 #ifdef __BORLANDC__
108 #define _setmode(handle, mode) setmode(handle, mode)
109 #endif
110
111 /* Not Windows */
112
113 #else
114 #include <sys/time.h> /* These two includes are needed */
115 #include <sys/resource.h> /* for setrlimit(). */
116 #define INPUT_MODE "rb"
117 #define OUTPUT_MODE "wb"
118 #endif
119
120 #define PRIV(name) name
121
122 /* We have to include pcre_internal.h because we need the internal info for
123 displaying the results of pcre_study() and we also need to know about the
124 internal macros, structures, and other internal data values; pcretest has
125 "inside information" compared to a program that strictly follows the PCRE API.
126
127 Although pcre_internal.h does itself include pcre.h, we explicitly include it
128 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
129 appropriately for an application, not for building PCRE. */
130
131 #include "pcre.h"
132
133 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
134 /* Configure internal macros to 16 bit mode. */
135 #define COMPILE_PCRE16
136 #endif
137
138 #include "pcre_internal.h"
139
140 /* The pcre_printint() function, which prints the internal form of a compiled
141 regex, is held in a separate file so that (a) it can be compiled in either
142 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
143 when that is compiled in debug mode. */
144
145 #ifdef SUPPORT_PCRE8
146 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
147 #endif
148 #ifdef SUPPORT_PCRE16
149 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
150 #endif
151
152 /* We need access to some of the data tables that PCRE uses. So as not to have
153 to keep two copies, we include the source file here, changing the names of the
154 external symbols to prevent clashes. */
155
156 #define PCRE_INCLUDED
157
158 #include "pcre_tables.c"
159
160 /* The definition of the macro PRINTABLE, which determines whether to print an
161 output character as-is or as a hex value when showing compiled patterns, is
162 the same as in the printint.src file. We uses it here in cases when the locale
163 has not been explicitly changed, so as to get consistent output from systems
164 that differ in their output from isprint() even in the "C" locale. */
165
166 #ifdef EBCDIC
167 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
168 #else
169 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
170 #endif
171
172 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
173
174 /* Posix support is disabled in 16 bit only mode. */
175 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
176 #define NOPOSIX
177 #endif
178
179 /* It is possible to compile this test program without including support for
180 testing the POSIX interface, though this is not available via the standard
181 Makefile. */
182
183 #if !defined NOPOSIX
184 #include "pcreposix.h"
185 #endif
186
187 /* It is also possible, originally for the benefit of a version that was
188 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
189 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
190 automatically cut out the UTF support if PCRE is built without it. */
191
192 #ifndef SUPPORT_UTF
193 #ifndef NOUTF
194 #define NOUTF
195 #endif
196 #endif
197
198 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
199 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
200 only from one place and is handled differently). I couldn't dream up any way of
201 using a single macro to do this in a generic way, because of the many different
202 argument requirements. We know that at least one of SUPPORT_PCRE8 and
203 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
204 use these in the definitions of generic macros.
205
206 **** Special note about the PCHARSxxx macros: the address of the string to be
207 printed is always given as two arguments: a base address followed by an offset.
208 The base address is cast to the correct data size for 8 or 16 bit data; the
209 offset is in units of this size. If the string were given as base+offset in one
210 argument, the casting might be incorrectly applied. */
211
212 #ifdef SUPPORT_PCRE8
213
214 #define PCHARS8(lv, p, offset, len, f) \
215 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
216
217 #define PCHARSV8(p, offset, len, f) \
218 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
219
220 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
221 p = read_capture_name8(p, cn8, re)
222
223 #define STRLEN8(p) ((int)strlen((char *)p))
224
225 #define SET_PCRE_CALLOUT8(callout) \
226 pcre_callout = callout
227
228 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
229 pcre_assign_jit_stack(extra, callback, userdata)
230
231 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
232 re = pcre_compile((char *)pat, options, error, erroffset, tables)
233
234 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
235 namesptr, cbuffer, size) \
236 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
237 (char *)namesptr, cbuffer, size)
238
239 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
240 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
241
242 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
243 offsets, size_offsets, workspace, size_workspace) \
244 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
245 offsets, size_offsets, workspace, size_workspace)
246
247 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
248 offsets, size_offsets) \
249 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
250 offsets, size_offsets)
251
252 #define PCRE_FREE_STUDY8(extra) \
253 pcre_free_study(extra)
254
255 #define PCRE_FREE_SUBSTRING8(substring) \
256 pcre_free_substring(substring)
257
258 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
259 pcre_free_substring_list(listptr)
260
261 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
262 getnamesptr, subsptr) \
263 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
264 (char *)getnamesptr, subsptr)
265
266 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
267 n = pcre_get_stringnumber(re, (char *)ptr)
268
269 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
270 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
271
272 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
273 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
274
275 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
276 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
277
278 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
279 pcre_printint(re, outfile, debug_lengths)
280
281 #define PCRE_STUDY8(extra, re, options, error) \
282 extra = pcre_study(re, options, error)
283
284 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
285 pcre_jit_stack_alloc(startsize, maxsize)
286
287 #define PCRE_JIT_STACK_FREE8(stack) \
288 pcre_jit_stack_free(stack)
289
290 #endif /* SUPPORT_PCRE8 */
291
292 /* -----------------------------------------------------------*/
293
294 #ifdef SUPPORT_PCRE16
295
296 #define PCHARS16(lv, p, offset, len, f) \
297 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
298
299 #define PCHARSV16(p, offset, len, f) \
300 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
301
302 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
303 p = read_capture_name16(p, cn16, re)
304
305 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
306
307 #define SET_PCRE_CALLOUT16(callout) \
308 pcre16_callout = (int (*)(pcre16_callout_block *))callout
309
310 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
311 pcre16_assign_jit_stack((pcre16_extra *)extra, \
312 (pcre16_jit_callback)callback, userdata)
313
314 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
315 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
316 tables)
317
318 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
319 namesptr, cbuffer, size) \
320 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
321 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
322
323 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
324 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
325 (PCRE_UCHAR16 *)cbuffer, size/2)
326
327 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
328 offsets, size_offsets, workspace, size_workspace) \
329 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
330 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
331 workspace, size_workspace)
332
333 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
334 offsets, size_offsets) \
335 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
336 len, start_offset, options, offsets, size_offsets)
337
338 #define PCRE_FREE_STUDY16(extra) \
339 pcre16_free_study((pcre16_extra *)extra)
340
341 #define PCRE_FREE_SUBSTRING16(substring) \
342 pcre16_free_substring((PCRE_SPTR16)substring)
343
344 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
345 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
346
347 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
348 getnamesptr, subsptr) \
349 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
350 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
351
352 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
353 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
354
355 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
356 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
357 (PCRE_SPTR16 *)(void*)subsptr)
358
359 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
360 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
361 (PCRE_SPTR16 **)(void*)listptr)
362
363 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
364 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
365 tables)
366
367 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
368 pcre16_printint(re, outfile, debug_lengths)
369
370 #define PCRE_STUDY16(extra, re, options, error) \
371 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
372
373 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
374 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
375
376 #define PCRE_JIT_STACK_FREE16(stack) \
377 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
378
379 #endif /* SUPPORT_PCRE16 */
380
381
382 /* ----- Both modes are supported; a runtime test is needed, except for
383 pcre_config(), and the JIT stack functions, when it doesn't matter which
384 version is called. ----- */
385
386 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
387
388 #define CHAR_SIZE (use_pcre16? 2:1)
389
390 #define PCHARS(lv, p, offset, len, f) \
391 if (use_pcre16) \
392 PCHARS16(lv, p, offset, len, f); \
393 else \
394 PCHARS8(lv, p, offset, len, f)
395
396 #define PCHARSV(p, offset, len, f) \
397 if (use_pcre16) \
398 PCHARSV16(p, offset, len, f); \
399 else \
400 PCHARSV8(p, offset, len, f)
401
402 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
403 if (use_pcre16) \
404 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
405 else \
406 READ_CAPTURE_NAME8(p, cn8, cn16, re)
407
408 #define SET_PCRE_CALLOUT(callout) \
409 if (use_pcre16) \
410 SET_PCRE_CALLOUT16(callout); \
411 else \
412 SET_PCRE_CALLOUT8(callout)
413
414 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
415
416 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
417 if (use_pcre16) \
418 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
419 else \
420 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
421
422 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
423 if (use_pcre16) \
424 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
425 else \
426 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
427
428 #define PCRE_CONFIG pcre_config
429
430 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
431 namesptr, cbuffer, size) \
432 if (use_pcre16) \
433 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
434 namesptr, cbuffer, size); \
435 else \
436 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
437 namesptr, cbuffer, size)
438
439 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
440 if (use_pcre16) \
441 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
442 else \
443 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
444
445 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
446 offsets, size_offsets, workspace, size_workspace) \
447 if (use_pcre16) \
448 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
449 offsets, size_offsets, workspace, size_workspace); \
450 else \
451 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
452 offsets, size_offsets, workspace, size_workspace)
453
454 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
455 offsets, size_offsets) \
456 if (use_pcre16) \
457 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
458 offsets, size_offsets); \
459 else \
460 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
461 offsets, size_offsets)
462
463 #define PCRE_FREE_STUDY(extra) \
464 if (use_pcre16) \
465 PCRE_FREE_STUDY16(extra); \
466 else \
467 PCRE_FREE_STUDY8(extra)
468
469 #define PCRE_FREE_SUBSTRING(substring) \
470 if (use_pcre16) \
471 PCRE_FREE_SUBSTRING16(substring); \
472 else \
473 PCRE_FREE_SUBSTRING8(substring)
474
475 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
476 if (use_pcre16) \
477 PCRE_FREE_SUBSTRING_LIST16(listptr); \
478 else \
479 PCRE_FREE_SUBSTRING_LIST8(listptr)
480
481 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
482 getnamesptr, subsptr) \
483 if (use_pcre16) \
484 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
485 getnamesptr, subsptr); \
486 else \
487 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
488 getnamesptr, subsptr)
489
490 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
491 if (use_pcre16) \
492 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
493 else \
494 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
495
496 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
497 if (use_pcre16) \
498 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
499 else \
500 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
501
502 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
503 if (use_pcre16) \
504 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
505 else \
506 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
507
508 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
509 (use_pcre16 ? \
510 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
511 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
512
513 #define PCRE_JIT_STACK_FREE(stack) \
514 if (use_pcre16) \
515 PCRE_JIT_STACK_FREE16(stack); \
516 else \
517 PCRE_JIT_STACK_FREE8(stack)
518
519 #define PCRE_MAKETABLES \
520 (use_pcre16? pcre16_maketables() : pcre_maketables())
521
522 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
523 if (use_pcre16) \
524 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
525 else \
526 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
527
528 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
529 if (use_pcre16) \
530 PCRE_PRINTINT16(re, outfile, debug_lengths); \
531 else \
532 PCRE_PRINTINT8(re, outfile, debug_lengths)
533
534 #define PCRE_STUDY(extra, re, options, error) \
535 if (use_pcre16) \
536 PCRE_STUDY16(extra, re, options, error); \
537 else \
538 PCRE_STUDY8(extra, re, options, error)
539
540 /* ----- Only 8-bit mode is supported ----- */
541
542 #elif defined SUPPORT_PCRE8
543 #define CHAR_SIZE 1
544 #define PCHARS PCHARS8
545 #define PCHARSV PCHARSV8
546 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
547 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
548 #define STRLEN STRLEN8
549 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
550 #define PCRE_COMPILE PCRE_COMPILE8
551 #define PCRE_CONFIG pcre_config
552 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
553 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
554 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
555 #define PCRE_EXEC PCRE_EXEC8
556 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
557 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
558 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
559 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
560 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
561 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
562 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
563 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
564 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
565 #define PCRE_MAKETABLES pcre_maketables()
566 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
567 #define PCRE_PRINTINT PCRE_PRINTINT8
568 #define PCRE_STUDY PCRE_STUDY8
569
570 /* ----- Only 16-bit mode is supported ----- */
571
572 #else
573 #define CHAR_SIZE 2
574 #define PCHARS PCHARS16
575 #define PCHARSV PCHARSV16
576 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
577 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
578 #define STRLEN STRLEN16
579 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
580 #define PCRE_COMPILE PCRE_COMPILE16
581 #define PCRE_CONFIG pcre16_config
582 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
583 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
584 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
585 #define PCRE_EXEC PCRE_EXEC16
586 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
587 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
588 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
589 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
590 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
591 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
592 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
593 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
594 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
595 #define PCRE_MAKETABLES pcre16_maketables()
596 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
597 #define PCRE_PRINTINT PCRE_PRINTINT16
598 #define PCRE_STUDY PCRE_STUDY16
599 #endif
600
601 /* ----- End of mode-specific function call macros ----- */
602
603
604 /* Other parameters */
605
606 #ifndef CLOCKS_PER_SEC
607 #ifdef CLK_TCK
608 #define CLOCKS_PER_SEC CLK_TCK
609 #else
610 #define CLOCKS_PER_SEC 100
611 #endif
612 #endif
613
614 /* This is the default loop count for timing. */
615
616 #define LOOPREPEAT 500000
617
618 /* Static variables */
619
620 static FILE *outfile;
621 static int log_store = 0;
622 static int callout_count;
623 static int callout_extra;
624 static int callout_fail_count;
625 static int callout_fail_id;
626 static int debug_lengths;
627 static int first_callout;
628 static int jit_was_used;
629 static int locale_set = 0;
630 static int show_malloc;
631 static int use_utf;
632 static size_t gotten_store;
633 static size_t first_gotten_store = 0;
634 static const unsigned char *last_callout_mark = NULL;
635
636 /* The buffers grow automatically if very long input lines are encountered. */
637
638 static int buffer_size = 50000;
639 static pcre_uint8 *buffer = NULL;
640 static pcre_uint8 *dbuffer = NULL;
641 static pcre_uint8 *pbuffer = NULL;
642
643 /* Another buffer is needed translation to 16-bit character strings. It will
644 obtained and extended as required. */
645
646 #ifdef SUPPORT_PCRE16
647 static int buffer16_size = 0;
648 static pcre_uint16 *buffer16 = NULL;
649
650 #ifdef SUPPORT_PCRE8
651
652 /* We need the table of operator lengths that is used for 16-bit compiling, in
653 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
654 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
655 appropriately for the 16-bit world. Just as a safety check, make sure that
656 COMPILE_PCRE16 is *not* set. */
657
658 #ifdef COMPILE_PCRE16
659 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
660 #endif
661
662 #if LINK_SIZE == 2
663 #undef LINK_SIZE
664 #define LINK_SIZE 1
665 #elif LINK_SIZE == 3 || LINK_SIZE == 4
666 #undef LINK_SIZE
667 #define LINK_SIZE 2
668 #else
669 #error LINK_SIZE must be either 2, 3, or 4
670 #endif
671
672 #undef IMM2_SIZE
673 #define IMM2_SIZE 1
674
675 #endif /* SUPPORT_PCRE8 */
676
677 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
678 #endif /* SUPPORT_PCRE16 */
679
680 /* If we have 8-bit support, default use_pcre16 to false; if there is also
681 16-bit support, it can be changed by an option. If there is no 8-bit support,
682 there must be 16-bit support, so default it to 1. */
683
684 #ifdef SUPPORT_PCRE8
685 static int use_pcre16 = 0;
686 #else
687 static int use_pcre16 = 1;
688 #endif
689
690 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
691
692 static int jit_study_bits[] =
693 {
694 PCRE_STUDY_JIT_COMPILE,
695 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
696 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
697 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
698 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
699 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
700 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
701 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
702 };
703
704 /* Textual explanations for runtime error codes */
705
706 static const char *errtexts[] = {
707 NULL, /* 0 is no error */
708 NULL, /* NOMATCH is handled specially */
709 "NULL argument passed",
710 "bad option value",
711 "magic number missing",
712 "unknown opcode - pattern overwritten?",
713 "no more memory",
714 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
715 "match limit exceeded",
716 "callout error code",
717 NULL, /* BADUTF8/16 is handled specially */
718 NULL, /* BADUTF8/16 offset is handled specially */
719 NULL, /* PARTIAL is handled specially */
720 "not used - internal error",
721 "internal error - pattern overwritten?",
722 "bad count value",
723 "item unsupported for DFA matching",
724 "backreference condition or recursion test not supported for DFA matching",
725 "match limit not supported for DFA matching",
726 "workspace size exceeded in DFA matching",
727 "too much recursion for DFA matching",
728 "recursion limit exceeded",
729 "not used - internal error",
730 "invalid combination of newline options",
731 "bad offset value",
732 NULL, /* SHORTUTF8/16 is handled specially */
733 "nested recursion at the same subject position",
734 "JIT stack limit reached",
735 "pattern compiled in wrong mode: 8-bit/16-bit error"
736 };
737
738
739 /*************************************************
740 * Alternate character tables *
741 *************************************************/
742
743 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
744 using the default tables of the library. However, the T option can be used to
745 select alternate sets of tables, for different kinds of testing. Note also that
746 the L (locale) option also adjusts the tables. */
747
748 /* This is the set of tables distributed as default with PCRE. It recognizes
749 only ASCII characters. */
750
751 static const pcre_uint8 tables0[] = {
752
753 /* This table is a lower casing table. */
754
755 0, 1, 2, 3, 4, 5, 6, 7,
756 8, 9, 10, 11, 12, 13, 14, 15,
757 16, 17, 18, 19, 20, 21, 22, 23,
758 24, 25, 26, 27, 28, 29, 30, 31,
759 32, 33, 34, 35, 36, 37, 38, 39,
760 40, 41, 42, 43, 44, 45, 46, 47,
761 48, 49, 50, 51, 52, 53, 54, 55,
762 56, 57, 58, 59, 60, 61, 62, 63,
763 64, 97, 98, 99,100,101,102,103,
764 104,105,106,107,108,109,110,111,
765 112,113,114,115,116,117,118,119,
766 120,121,122, 91, 92, 93, 94, 95,
767 96, 97, 98, 99,100,101,102,103,
768 104,105,106,107,108,109,110,111,
769 112,113,114,115,116,117,118,119,
770 120,121,122,123,124,125,126,127,
771 128,129,130,131,132,133,134,135,
772 136,137,138,139,140,141,142,143,
773 144,145,146,147,148,149,150,151,
774 152,153,154,155,156,157,158,159,
775 160,161,162,163,164,165,166,167,
776 168,169,170,171,172,173,174,175,
777 176,177,178,179,180,181,182,183,
778 184,185,186,187,188,189,190,191,
779 192,193,194,195,196,197,198,199,
780 200,201,202,203,204,205,206,207,
781 208,209,210,211,212,213,214,215,
782 216,217,218,219,220,221,222,223,
783 224,225,226,227,228,229,230,231,
784 232,233,234,235,236,237,238,239,
785 240,241,242,243,244,245,246,247,
786 248,249,250,251,252,253,254,255,
787
788 /* This table is a case flipping table. */
789
790 0, 1, 2, 3, 4, 5, 6, 7,
791 8, 9, 10, 11, 12, 13, 14, 15,
792 16, 17, 18, 19, 20, 21, 22, 23,
793 24, 25, 26, 27, 28, 29, 30, 31,
794 32, 33, 34, 35, 36, 37, 38, 39,
795 40, 41, 42, 43, 44, 45, 46, 47,
796 48, 49, 50, 51, 52, 53, 54, 55,
797 56, 57, 58, 59, 60, 61, 62, 63,
798 64, 97, 98, 99,100,101,102,103,
799 104,105,106,107,108,109,110,111,
800 112,113,114,115,116,117,118,119,
801 120,121,122, 91, 92, 93, 94, 95,
802 96, 65, 66, 67, 68, 69, 70, 71,
803 72, 73, 74, 75, 76, 77, 78, 79,
804 80, 81, 82, 83, 84, 85, 86, 87,
805 88, 89, 90,123,124,125,126,127,
806 128,129,130,131,132,133,134,135,
807 136,137,138,139,140,141,142,143,
808 144,145,146,147,148,149,150,151,
809 152,153,154,155,156,157,158,159,
810 160,161,162,163,164,165,166,167,
811 168,169,170,171,172,173,174,175,
812 176,177,178,179,180,181,182,183,
813 184,185,186,187,188,189,190,191,
814 192,193,194,195,196,197,198,199,
815 200,201,202,203,204,205,206,207,
816 208,209,210,211,212,213,214,215,
817 216,217,218,219,220,221,222,223,
818 224,225,226,227,228,229,230,231,
819 232,233,234,235,236,237,238,239,
820 240,241,242,243,244,245,246,247,
821 248,249,250,251,252,253,254,255,
822
823 /* This table contains bit maps for various character classes. Each map is 32
824 bytes long and the bits run from the least significant end of each byte. The
825 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
826 graph, print, punct, and cntrl. Other classes are built from combinations. */
827
828 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
832
833 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
834 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837
838 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852
853 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
854 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857
858 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
859 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862
863 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
864 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867
868 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
869 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
870 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872
873 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
874 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
875 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877
878 /* This table identifies various classes of character by individual bits:
879 0x01 white space character
880 0x02 letter
881 0x04 decimal digit
882 0x08 hexadecimal digit
883 0x10 alphanumeric or '_'
884 0x80 regular expression metacharacter or binary zero
885 */
886
887 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
888 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
891 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
892 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
893 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
894 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
895 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
896 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
897 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
898 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
899 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
900 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
901 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
902 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
903 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
904 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
905 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
906 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
907 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
908 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
909 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
910 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
911 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
912 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
913 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
914 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
915 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
919
920 /* This is a set of tables that came orginally from a Windows user. It seems to
921 be at least an approximation of ISO 8859. In particular, there are characters
922 greater than 128 that are marked as spaces, letters, etc. */
923
924 static const pcre_uint8 tables1[] = {
925 0,1,2,3,4,5,6,7,
926 8,9,10,11,12,13,14,15,
927 16,17,18,19,20,21,22,23,
928 24,25,26,27,28,29,30,31,
929 32,33,34,35,36,37,38,39,
930 40,41,42,43,44,45,46,47,
931 48,49,50,51,52,53,54,55,
932 56,57,58,59,60,61,62,63,
933 64,97,98,99,100,101,102,103,
934 104,105,106,107,108,109,110,111,
935 112,113,114,115,116,117,118,119,
936 120,121,122,91,92,93,94,95,
937 96,97,98,99,100,101,102,103,
938 104,105,106,107,108,109,110,111,
939 112,113,114,115,116,117,118,119,
940 120,121,122,123,124,125,126,127,
941 128,129,130,131,132,133,134,135,
942 136,137,138,139,140,141,142,143,
943 144,145,146,147,148,149,150,151,
944 152,153,154,155,156,157,158,159,
945 160,161,162,163,164,165,166,167,
946 168,169,170,171,172,173,174,175,
947 176,177,178,179,180,181,182,183,
948 184,185,186,187,188,189,190,191,
949 224,225,226,227,228,229,230,231,
950 232,233,234,235,236,237,238,239,
951 240,241,242,243,244,245,246,215,
952 248,249,250,251,252,253,254,223,
953 224,225,226,227,228,229,230,231,
954 232,233,234,235,236,237,238,239,
955 240,241,242,243,244,245,246,247,
956 248,249,250,251,252,253,254,255,
957 0,1,2,3,4,5,6,7,
958 8,9,10,11,12,13,14,15,
959 16,17,18,19,20,21,22,23,
960 24,25,26,27,28,29,30,31,
961 32,33,34,35,36,37,38,39,
962 40,41,42,43,44,45,46,47,
963 48,49,50,51,52,53,54,55,
964 56,57,58,59,60,61,62,63,
965 64,97,98,99,100,101,102,103,
966 104,105,106,107,108,109,110,111,
967 112,113,114,115,116,117,118,119,
968 120,121,122,91,92,93,94,95,
969 96,65,66,67,68,69,70,71,
970 72,73,74,75,76,77,78,79,
971 80,81,82,83,84,85,86,87,
972 88,89,90,123,124,125,126,127,
973 128,129,130,131,132,133,134,135,
974 136,137,138,139,140,141,142,143,
975 144,145,146,147,148,149,150,151,
976 152,153,154,155,156,157,158,159,
977 160,161,162,163,164,165,166,167,
978 168,169,170,171,172,173,174,175,
979 176,177,178,179,180,181,182,183,
980 184,185,186,187,188,189,190,191,
981 224,225,226,227,228,229,230,231,
982 232,233,234,235,236,237,238,239,
983 240,241,242,243,244,245,246,215,
984 248,249,250,251,252,253,254,223,
985 192,193,194,195,196,197,198,199,
986 200,201,202,203,204,205,206,207,
987 208,209,210,211,212,213,214,247,
988 216,217,218,219,220,221,222,255,
989 0,62,0,0,1,0,0,0,
990 0,0,0,0,0,0,0,0,
991 32,0,0,0,1,0,0,0,
992 0,0,0,0,0,0,0,0,
993 0,0,0,0,0,0,255,3,
994 126,0,0,0,126,0,0,0,
995 0,0,0,0,0,0,0,0,
996 0,0,0,0,0,0,0,0,
997 0,0,0,0,0,0,255,3,
998 0,0,0,0,0,0,0,0,
999 0,0,0,0,0,0,12,2,
1000 0,0,0,0,0,0,0,0,
1001 0,0,0,0,0,0,0,0,
1002 254,255,255,7,0,0,0,0,
1003 0,0,0,0,0,0,0,0,
1004 255,255,127,127,0,0,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,254,255,255,7,
1007 0,0,0,0,0,4,32,4,
1008 0,0,0,128,255,255,127,255,
1009 0,0,0,0,0,0,255,3,
1010 254,255,255,135,254,255,255,7,
1011 0,0,0,0,0,4,44,6,
1012 255,255,127,255,255,255,127,255,
1013 0,0,0,0,254,255,255,255,
1014 255,255,255,255,255,255,255,127,
1015 0,0,0,0,254,255,255,255,
1016 255,255,255,255,255,255,255,255,
1017 0,2,0,0,255,255,255,255,
1018 255,255,255,255,255,255,255,127,
1019 0,0,0,0,255,255,255,255,
1020 255,255,255,255,255,255,255,255,
1021 0,0,0,0,254,255,0,252,
1022 1,0,0,248,1,0,0,120,
1023 0,0,0,0,254,255,255,255,
1024 0,0,128,0,0,0,128,0,
1025 255,255,255,255,0,0,0,0,
1026 0,0,0,0,0,0,0,128,
1027 255,255,255,255,0,0,0,0,
1028 0,0,0,0,0,0,0,0,
1029 128,0,0,0,0,0,0,0,
1030 0,1,1,0,1,1,0,0,
1031 0,0,0,0,0,0,0,0,
1032 0,0,0,0,0,0,0,0,
1033 1,0,0,0,128,0,0,0,
1034 128,128,128,128,0,0,128,0,
1035 28,28,28,28,28,28,28,28,
1036 28,28,0,0,0,0,0,128,
1037 0,26,26,26,26,26,26,18,
1038 18,18,18,18,18,18,18,18,
1039 18,18,18,18,18,18,18,18,
1040 18,18,18,128,128,0,128,16,
1041 0,26,26,26,26,26,26,18,
1042 18,18,18,18,18,18,18,18,
1043 18,18,18,18,18,18,18,18,
1044 18,18,18,128,128,0,0,0,
1045 0,0,0,0,0,1,0,0,
1046 0,0,0,0,0,0,0,0,
1047 0,0,0,0,0,0,0,0,
1048 0,0,0,0,0,0,0,0,
1049 1,0,0,0,0,0,0,0,
1050 0,0,18,0,0,0,0,0,
1051 0,0,20,20,0,18,0,0,
1052 0,20,18,0,0,0,0,0,
1053 18,18,18,18,18,18,18,18,
1054 18,18,18,18,18,18,18,18,
1055 18,18,18,18,18,18,18,0,
1056 18,18,18,18,18,18,18,18,
1057 18,18,18,18,18,18,18,18,
1058 18,18,18,18,18,18,18,18,
1059 18,18,18,18,18,18,18,0,
1060 18,18,18,18,18,18,18,18
1061 };
1062
1063
1064
1065
1066 #ifndef HAVE_STRERROR
1067 /*************************************************
1068 * Provide strerror() for non-ANSI libraries *
1069 *************************************************/
1070
1071 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1072 in their libraries, but can provide the same facility by this simple
1073 alternative function. */
1074
1075 extern int sys_nerr;
1076 extern char *sys_errlist[];
1077
1078 char *
1079 strerror(int n)
1080 {
1081 if (n < 0 || n >= sys_nerr) return "unknown error number";
1082 return sys_errlist[n];
1083 }
1084 #endif /* HAVE_STRERROR */
1085
1086
1087 /*************************************************
1088 * JIT memory callback *
1089 *************************************************/
1090
1091 static pcre_jit_stack* jit_callback(void *arg)
1092 {
1093 jit_was_used = TRUE;
1094 return (pcre_jit_stack *)arg;
1095 }
1096
1097
1098 #if !defined NOUTF || defined SUPPORT_PCRE16
1099 /*************************************************
1100 * Convert UTF-8 string to value *
1101 *************************************************/
1102
1103 /* This function takes one or more bytes that represents a UTF-8 character,
1104 and returns the value of the character.
1105
1106 Argument:
1107 utf8bytes a pointer to the byte vector
1108 vptr a pointer to an int to receive the value
1109
1110 Returns: > 0 => the number of bytes consumed
1111 -6 to 0 => malformed UTF-8 character at offset = (-return)
1112 */
1113
1114 static int
1115 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1116 {
1117 int c = *utf8bytes++;
1118 int d = c;
1119 int i, j, s;
1120
1121 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1122 {
1123 if ((d & 0x80) == 0) break;
1124 d <<= 1;
1125 }
1126
1127 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1128 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1129
1130 /* i now has a value in the range 1-5 */
1131
1132 s = 6*i;
1133 d = (c & utf8_table3[i]) << s;
1134
1135 for (j = 0; j < i; j++)
1136 {
1137 c = *utf8bytes++;
1138 if ((c & 0xc0) != 0x80) return -(j+1);
1139 s -= 6;
1140 d |= (c & 0x3f) << s;
1141 }
1142
1143 /* Check that encoding was the correct unique one */
1144
1145 for (j = 0; j < utf8_table1_size; j++)
1146 if (d <= utf8_table1[j]) break;
1147 if (j != i) return -(i+1);
1148
1149 /* Valid value */
1150
1151 *vptr = d;
1152 return i+1;
1153 }
1154 #endif /* NOUTF || SUPPORT_PCRE16 */
1155
1156
1157
1158 #if !defined NOUTF || defined SUPPORT_PCRE16
1159 /*************************************************
1160 * Convert character value to UTF-8 *
1161 *************************************************/
1162
1163 /* This function takes an integer value in the range 0 - 0x7fffffff
1164 and encodes it as a UTF-8 character in 0 to 6 bytes.
1165
1166 Arguments:
1167 cvalue the character value
1168 utf8bytes pointer to buffer for result - at least 6 bytes long
1169
1170 Returns: number of characters placed in the buffer
1171 */
1172
1173 static int
1174 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1175 {
1176 register int i, j;
1177 for (i = 0; i < utf8_table1_size; i++)
1178 if (cvalue <= utf8_table1[i]) break;
1179 utf8bytes += i;
1180 for (j = i; j > 0; j--)
1181 {
1182 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1183 cvalue >>= 6;
1184 }
1185 *utf8bytes = utf8_table2[i] | cvalue;
1186 return i + 1;
1187 }
1188 #endif
1189
1190
1191 #ifdef SUPPORT_PCRE16
1192 /*************************************************
1193 * Convert a string to 16-bit *
1194 *************************************************/
1195
1196 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1197 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1198 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1199 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1200 result is always left in buffer16.
1201
1202 Note that this function does not object to surrogate values. This is
1203 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1204 for the purpose of testing that they are correctly faulted.
1205
1206 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1207 in UTF-8 so that values greater than 255 can be handled.
1208
1209 Arguments:
1210 data TRUE if converting a data line; FALSE for a regex
1211 p points to a byte string
1212 utf true if UTF-8 (to be converted to UTF-16)
1213 len number of bytes in the string (excluding trailing zero)
1214
1215 Returns: number of 16-bit data items used (excluding trailing zero)
1216 OR -1 if a UTF-8 string is malformed
1217 OR -2 if a value > 0x10ffff is encountered
1218 OR -3 if a value > 0xffff is encountered when not in UTF mode
1219 */
1220
1221 static int
1222 to16(int data, pcre_uint8 *p, int utf, int len)
1223 {
1224 pcre_uint16 *pp;
1225
1226 if (buffer16_size < 2*len + 2)
1227 {
1228 if (buffer16 != NULL) free(buffer16);
1229 buffer16_size = 2*len + 2;
1230 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1231 if (buffer16 == NULL)
1232 {
1233 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1234 exit(1);
1235 }
1236 }
1237
1238 pp = buffer16;
1239
1240 if (!utf && !data)
1241 {
1242 while (len-- > 0) *pp++ = *p++;
1243 }
1244
1245 else
1246 {
1247 int c = 0;
1248 while (len > 0)
1249 {
1250 int chlen = utf82ord(p, &c);
1251 if (chlen <= 0) return -1;
1252 if (c > 0x10ffff) return -2;
1253 p += chlen;
1254 len -= chlen;
1255 if (c < 0x10000) *pp++ = c; else
1256 {
1257 if (!utf) return -3;
1258 c -= 0x10000;
1259 *pp++ = 0xD800 | (c >> 10);
1260 *pp++ = 0xDC00 | (c & 0x3ff);
1261 }
1262 }
1263 }
1264
1265 *pp = 0;
1266 return pp - buffer16;
1267 }
1268 #endif
1269
1270
1271 /*************************************************
1272 * Read or extend an input line *
1273 *************************************************/
1274
1275 /* Input lines are read into buffer, but both patterns and data lines can be
1276 continued over multiple input lines. In addition, if the buffer fills up, we
1277 want to automatically expand it so as to be able to handle extremely large
1278 lines that are needed for certain stress tests. When the input buffer is
1279 expanded, the other two buffers must also be expanded likewise, and the
1280 contents of pbuffer, which are a copy of the input for callouts, must be
1281 preserved (for when expansion happens for a data line). This is not the most
1282 optimal way of handling this, but hey, this is just a test program!
1283
1284 Arguments:
1285 f the file to read
1286 start where in buffer to start (this *must* be within buffer)
1287 prompt for stdin or readline()
1288
1289 Returns: pointer to the start of new data
1290 could be a copy of start, or could be moved
1291 NULL if no data read and EOF reached
1292 */
1293
1294 static pcre_uint8 *
1295 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1296 {
1297 pcre_uint8 *here = start;
1298
1299 for (;;)
1300 {
1301 size_t rlen = (size_t)(buffer_size - (here - buffer));
1302
1303 if (rlen > 1000)
1304 {
1305 int dlen;
1306
1307 /* If libreadline or libedit support is required, use readline() to read a
1308 line if the input is a terminal. Note that readline() removes the trailing
1309 newline, so we must put it back again, to be compatible with fgets(). */
1310
1311 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1312 if (isatty(fileno(f)))
1313 {
1314 size_t len;
1315 char *s = readline(prompt);
1316 if (s == NULL) return (here == start)? NULL : start;
1317 len = strlen(s);
1318 if (len > 0) add_history(s);
1319 if (len > rlen - 1) len = rlen - 1;
1320 memcpy(here, s, len);
1321 here[len] = '\n';
1322 here[len+1] = 0;
1323 free(s);
1324 }
1325 else
1326 #endif
1327
1328 /* Read the next line by normal means, prompting if the file is stdin. */
1329
1330 {
1331 if (f == stdin) printf("%s", prompt);
1332 if (fgets((char *)here, rlen, f) == NULL)
1333 return (here == start)? NULL : start;
1334 }
1335
1336 dlen = (int)strlen((char *)here);
1337 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1338 here += dlen;
1339 }
1340
1341 else
1342 {
1343 int new_buffer_size = 2*buffer_size;
1344 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1345 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1346 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1347
1348 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1349 {
1350 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1351 exit(1);
1352 }
1353
1354 memcpy(new_buffer, buffer, buffer_size);
1355 memcpy(new_pbuffer, pbuffer, buffer_size);
1356
1357 buffer_size = new_buffer_size;
1358
1359 start = new_buffer + (start - buffer);
1360 here = new_buffer + (here - buffer);
1361
1362 free(buffer);
1363 free(dbuffer);
1364 free(pbuffer);
1365
1366 buffer = new_buffer;
1367 dbuffer = new_dbuffer;
1368 pbuffer = new_pbuffer;
1369 }
1370 }
1371
1372 return NULL; /* Control never gets here */
1373 }
1374
1375
1376
1377 /*************************************************
1378 * Read number from string *
1379 *************************************************/
1380
1381 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1382 around with conditional compilation, just do the job by hand. It is only used
1383 for unpicking arguments, so just keep it simple.
1384
1385 Arguments:
1386 str string to be converted
1387 endptr where to put the end pointer
1388
1389 Returns: the unsigned long
1390 */
1391
1392 static int
1393 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1394 {
1395 int result = 0;
1396 while(*str != 0 && isspace(*str)) str++;
1397 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1398 *endptr = str;
1399 return(result);
1400 }
1401
1402
1403
1404 /*************************************************
1405 * Print one character *
1406 *************************************************/
1407
1408 /* Print a single character either literally, or as a hex escape. */
1409
1410 static int pchar(int c, FILE *f)
1411 {
1412 if (PRINTOK(c))
1413 {
1414 if (f != NULL) fprintf(f, "%c", c);
1415 return 1;
1416 }
1417
1418 if (c < 0x100)
1419 {
1420 if (use_utf)
1421 {
1422 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1423 return 6;
1424 }
1425 else
1426 {
1427 if (f != NULL) fprintf(f, "\\x%02x", c);
1428 return 4;
1429 }
1430 }
1431
1432 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1433 return (c <= 0x000000ff)? 6 :
1434 (c <= 0x00000fff)? 7 :
1435 (c <= 0x0000ffff)? 8 :
1436 (c <= 0x000fffff)? 9 : 10;
1437 }
1438
1439
1440
1441 #ifdef SUPPORT_PCRE8
1442 /*************************************************
1443 * Print 8-bit character string *
1444 *************************************************/
1445
1446 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1447 If handed a NULL file, just counts chars without printing. */
1448
1449 static int pchars(pcre_uint8 *p, int length, FILE *f)
1450 {
1451 int c = 0;
1452 int yield = 0;
1453
1454 if (length < 0)
1455 length = strlen((char *)p);
1456
1457 while (length-- > 0)
1458 {
1459 #if !defined NOUTF
1460 if (use_utf)
1461 {
1462 int rc = utf82ord(p, &c);
1463 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1464 {
1465 length -= rc - 1;
1466 p += rc;
1467 yield += pchar(c, f);
1468 continue;
1469 }
1470 }
1471 #endif
1472 c = *p++;
1473 yield += pchar(c, f);
1474 }
1475
1476 return yield;
1477 }
1478 #endif
1479
1480
1481
1482 #ifdef SUPPORT_PCRE16
1483 /*************************************************
1484 * Find length of 0-terminated 16-bit string *
1485 *************************************************/
1486
1487 static int strlen16(PCRE_SPTR16 p)
1488 {
1489 int len = 0;
1490 while (*p++ != 0) len++;
1491 return len;
1492 }
1493 #endif /* SUPPORT_PCRE16 */
1494
1495
1496 #ifdef SUPPORT_PCRE16
1497 /*************************************************
1498 * Print 16-bit character string *
1499 *************************************************/
1500
1501 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1502 If handed a NULL file, just counts chars without printing. */
1503
1504 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1505 {
1506 int yield = 0;
1507
1508 if (length < 0)
1509 length = strlen16(p);
1510
1511 while (length-- > 0)
1512 {
1513 int c = *p++ & 0xffff;
1514 #if !defined NOUTF
1515 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1516 {
1517 int d = *p & 0xffff;
1518 if (d >= 0xDC00 && d < 0xDFFF)
1519 {
1520 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1521 length--;
1522 p++;
1523 }
1524 }
1525 #endif
1526 yield += pchar(c, f);
1527 }
1528
1529 return yield;
1530 }
1531 #endif /* SUPPORT_PCRE16 */
1532
1533
1534
1535 #ifdef SUPPORT_PCRE8
1536 /*************************************************
1537 * Read a capture name (8-bit) and check it *
1538 *************************************************/
1539
1540 static pcre_uint8 *
1541 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1542 {
1543 pcre_uint8 *npp = *pp;
1544 while (isalnum(*p)) *npp++ = *p++;
1545 *npp++ = 0;
1546 *npp = 0;
1547 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1548 {
1549 fprintf(outfile, "no parentheses with name \"");
1550 PCHARSV(*pp, 0, -1, outfile);
1551 fprintf(outfile, "\"\n");
1552 }
1553
1554 *pp = npp;
1555 return p;
1556 }
1557 #endif /* SUPPORT_PCRE8 */
1558
1559
1560
1561 #ifdef SUPPORT_PCRE16
1562 /*************************************************
1563 * Read a capture name (16-bit) and check it *
1564 *************************************************/
1565
1566 /* Note that the text being read is 8-bit. */
1567
1568 static pcre_uint8 *
1569 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1570 {
1571 pcre_uint16 *npp = *pp;
1572 while (isalnum(*p)) *npp++ = *p++;
1573 *npp++ = 0;
1574 *npp = 0;
1575 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1576 {
1577 fprintf(outfile, "no parentheses with name \"");
1578 PCHARSV(*pp, 0, -1, outfile);
1579 fprintf(outfile, "\"\n");
1580 }
1581 *pp = npp;
1582 return p;
1583 }
1584 #endif /* SUPPORT_PCRE16 */
1585
1586
1587
1588 /*************************************************
1589 * Callout function *
1590 *************************************************/
1591
1592 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1593 the match. Yield zero unless more callouts than the fail count, or the callout
1594 data is not zero. */
1595
1596 static int callout(pcre_callout_block *cb)
1597 {
1598 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1599 int i, pre_start, post_start, subject_length;
1600
1601 if (callout_extra)
1602 {
1603 fprintf(f, "Callout %d: last capture = %d\n",
1604 cb->callout_number, cb->capture_last);
1605
1606 for (i = 0; i < cb->capture_top * 2; i += 2)
1607 {
1608 if (cb->offset_vector[i] < 0)
1609 fprintf(f, "%2d: <unset>\n", i/2);
1610 else
1611 {
1612 fprintf(f, "%2d: ", i/2);
1613 PCHARSV(cb->subject, cb->offset_vector[i],
1614 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1615 fprintf(f, "\n");
1616 }
1617 }
1618 }
1619
1620 /* Re-print the subject in canonical form, the first time or if giving full
1621 datails. On subsequent calls in the same match, we use pchars just to find the
1622 printed lengths of the substrings. */
1623
1624 if (f != NULL) fprintf(f, "--->");
1625
1626 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1627 PCHARS(post_start, cb->subject, cb->start_match,
1628 cb->current_position - cb->start_match, f);
1629
1630 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1631
1632 PCHARSV(cb->subject, cb->current_position,
1633 cb->subject_length - cb->current_position, f);
1634
1635 if (f != NULL) fprintf(f, "\n");
1636
1637 /* Always print appropriate indicators, with callout number if not already
1638 shown. For automatic callouts, show the pattern offset. */
1639
1640 if (cb->callout_number == 255)
1641 {
1642 fprintf(outfile, "%+3d ", cb->pattern_position);
1643 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1644 }
1645 else
1646 {
1647 if (callout_extra) fprintf(outfile, " ");
1648 else fprintf(outfile, "%3d ", cb->callout_number);
1649 }
1650
1651 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1652 fprintf(outfile, "^");
1653
1654 if (post_start > 0)
1655 {
1656 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1657 fprintf(outfile, "^");
1658 }
1659
1660 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1661 fprintf(outfile, " ");
1662
1663 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1664 pbuffer + cb->pattern_position);
1665
1666 fprintf(outfile, "\n");
1667 first_callout = 0;
1668
1669 if (cb->mark != last_callout_mark)
1670 {
1671 if (cb->mark == NULL)
1672 fprintf(outfile, "Latest Mark: <unset>\n");
1673 else
1674 {
1675 fprintf(outfile, "Latest Mark: ");
1676 PCHARSV(cb->mark, 0, -1, outfile);
1677 putc('\n', outfile);
1678 }
1679 last_callout_mark = cb->mark;
1680 }
1681
1682 if (cb->callout_data != NULL)
1683 {
1684 int callout_data = *((int *)(cb->callout_data));
1685 if (callout_data != 0)
1686 {
1687 fprintf(outfile, "Callout data = %d\n", callout_data);
1688 return callout_data;
1689 }
1690 }
1691
1692 return (cb->callout_number != callout_fail_id)? 0 :
1693 (++callout_count >= callout_fail_count)? 1 : 0;
1694 }
1695
1696
1697 /*************************************************
1698 * Local malloc functions *
1699 *************************************************/
1700
1701 /* Alternative malloc function, to test functionality and save the size of a
1702 compiled re, which is the first store request that pcre_compile() makes. The
1703 show_malloc variable is set only during matching. */
1704
1705 static void *new_malloc(size_t size)
1706 {
1707 void *block = malloc(size);
1708 gotten_store = size;
1709 if (first_gotten_store == 0) first_gotten_store = size;
1710 if (show_malloc)
1711 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1712 return block;
1713 }
1714
1715 static void new_free(void *block)
1716 {
1717 if (show_malloc)
1718 fprintf(outfile, "free %p\n", block);
1719 free(block);
1720 }
1721
1722 /* For recursion malloc/free, to test stacking calls */
1723
1724 static void *stack_malloc(size_t size)
1725 {
1726 void *block = malloc(size);
1727 if (show_malloc)
1728 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1729 return block;
1730 }
1731
1732 static void stack_free(void *block)
1733 {
1734 if (show_malloc)
1735 fprintf(outfile, "stack_free %p\n", block);
1736 free(block);
1737 }
1738
1739
1740 /*************************************************
1741 * Call pcre_fullinfo() *
1742 *************************************************/
1743
1744 /* Get one piece of information from the pcre_fullinfo() function. When only
1745 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1746 value, but the code is defensive.
1747
1748 Arguments:
1749 re compiled regex
1750 study study data
1751 option PCRE_INFO_xxx option
1752 ptr where to put the data
1753
1754 Returns: 0 when OK, < 0 on error
1755 */
1756
1757 static int
1758 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1759 {
1760 int rc;
1761
1762 if (use_pcre16)
1763 #ifdef SUPPORT_PCRE16
1764 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1765 #else
1766 rc = PCRE_ERROR_BADMODE;
1767 #endif
1768 else
1769 #ifdef SUPPORT_PCRE8
1770 rc = pcre_fullinfo(re, study, option, ptr);
1771 #else
1772 rc = PCRE_ERROR_BADMODE;
1773 #endif
1774
1775 if (rc < 0)
1776 {
1777 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1778 use_pcre16? "16" : "", option);
1779 if (rc == PCRE_ERROR_BADMODE)
1780 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1781 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1782 }
1783
1784 return rc;
1785 }
1786
1787
1788
1789 /*************************************************
1790 * Swap byte functions *
1791 *************************************************/
1792
1793 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1794 value, respectively.
1795
1796 Arguments:
1797 value any number
1798
1799 Returns: the byte swapped value
1800 */
1801
1802 static pcre_uint32
1803 swap_uint32(pcre_uint32 value)
1804 {
1805 return ((value & 0x000000ff) << 24) |
1806 ((value & 0x0000ff00) << 8) |
1807 ((value & 0x00ff0000) >> 8) |
1808 (value >> 24);
1809 }
1810
1811 static pcre_uint16
1812 swap_uint16(pcre_uint16 value)
1813 {
1814 return (value >> 8) | (value << 8);
1815 }
1816
1817
1818
1819 /*************************************************
1820 * Flip bytes in a compiled pattern *
1821 *************************************************/
1822
1823 /* This function is called if the 'F' option was present on a pattern that is
1824 to be written to a file. We flip the bytes of all the integer fields in the
1825 regex data block and the study block. In 16-bit mode this also flips relevant
1826 bytes in the pattern itself. This is to make it possible to test PCRE's
1827 ability to reload byte-flipped patterns, e.g. those compiled on a different
1828 architecture. */
1829
1830 static void
1831 regexflip(pcre *ere, pcre_extra *extra)
1832 {
1833 REAL_PCRE *re = (REAL_PCRE *)ere;
1834 #ifdef SUPPORT_PCRE16
1835 int op;
1836 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1837 int length = re->name_count * re->name_entry_size;
1838 #ifdef SUPPORT_UTF
1839 BOOL utf = (re->options & PCRE_UTF16) != 0;
1840 BOOL utf16_char = FALSE;
1841 #endif /* SUPPORT_UTF */
1842 #endif /* SUPPORT_PCRE16 */
1843
1844 /* Always flip the bytes in the main data block and study blocks. */
1845
1846 re->magic_number = REVERSED_MAGIC_NUMBER;
1847 re->size = swap_uint32(re->size);
1848 re->options = swap_uint32(re->options);
1849 re->flags = swap_uint16(re->flags);
1850 re->top_bracket = swap_uint16(re->top_bracket);
1851 re->top_backref = swap_uint16(re->top_backref);
1852 re->first_char = swap_uint16(re->first_char);
1853 re->req_char = swap_uint16(re->req_char);
1854 re->name_table_offset = swap_uint16(re->name_table_offset);
1855 re->name_entry_size = swap_uint16(re->name_entry_size);
1856 re->name_count = swap_uint16(re->name_count);
1857
1858 if (extra != NULL)
1859 {
1860 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1861 rsd->size = swap_uint32(rsd->size);
1862 rsd->flags = swap_uint32(rsd->flags);
1863 rsd->minlength = swap_uint32(rsd->minlength);
1864 }
1865
1866 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1867 in the name table, if present, and then in the pattern itself. */
1868
1869 #ifdef SUPPORT_PCRE16
1870 if (!use_pcre16) return;
1871
1872 while(TRUE)
1873 {
1874 /* Swap previous characters. */
1875 while (length-- > 0)
1876 {
1877 *ptr = swap_uint16(*ptr);
1878 ptr++;
1879 }
1880 #ifdef SUPPORT_UTF
1881 if (utf16_char)
1882 {
1883 if ((ptr[-1] & 0xfc00) == 0xd800)
1884 {
1885 /* We know that there is only one extra character in UTF-16. */
1886 *ptr = swap_uint16(*ptr);
1887 ptr++;
1888 }
1889 }
1890 utf16_char = FALSE;
1891 #endif /* SUPPORT_UTF */
1892
1893 /* Get next opcode. */
1894
1895 length = 0;
1896 op = *ptr;
1897 *ptr++ = swap_uint16(op);
1898
1899 switch (op)
1900 {
1901 case OP_END:
1902 return;
1903
1904 #ifdef SUPPORT_UTF
1905 case OP_CHAR:
1906 case OP_CHARI:
1907 case OP_NOT:
1908 case OP_NOTI:
1909 case OP_STAR:
1910 case OP_MINSTAR:
1911 case OP_PLUS:
1912 case OP_MINPLUS:
1913 case OP_QUERY:
1914 case OP_MINQUERY:
1915 case OP_UPTO:
1916 case OP_MINUPTO:
1917 case OP_EXACT:
1918 case OP_POSSTAR:
1919 case OP_POSPLUS:
1920 case OP_POSQUERY:
1921 case OP_POSUPTO:
1922 case OP_STARI:
1923 case OP_MINSTARI:
1924 case OP_PLUSI:
1925 case OP_MINPLUSI:
1926 case OP_QUERYI:
1927 case OP_MINQUERYI:
1928 case OP_UPTOI:
1929 case OP_MINUPTOI:
1930 case OP_EXACTI:
1931 case OP_POSSTARI:
1932 case OP_POSPLUSI:
1933 case OP_POSQUERYI:
1934 case OP_POSUPTOI:
1935 case OP_NOTSTAR:
1936 case OP_NOTMINSTAR:
1937 case OP_NOTPLUS:
1938 case OP_NOTMINPLUS:
1939 case OP_NOTQUERY:
1940 case OP_NOTMINQUERY:
1941 case OP_NOTUPTO:
1942 case OP_NOTMINUPTO:
1943 case OP_NOTEXACT:
1944 case OP_NOTPOSSTAR:
1945 case OP_NOTPOSPLUS:
1946 case OP_NOTPOSQUERY:
1947 case OP_NOTPOSUPTO:
1948 case OP_NOTSTARI:
1949 case OP_NOTMINSTARI:
1950 case OP_NOTPLUSI:
1951 case OP_NOTMINPLUSI:
1952 case OP_NOTQUERYI:
1953 case OP_NOTMINQUERYI:
1954 case OP_NOTUPTOI:
1955 case OP_NOTMINUPTOI:
1956 case OP_NOTEXACTI:
1957 case OP_NOTPOSSTARI:
1958 case OP_NOTPOSPLUSI:
1959 case OP_NOTPOSQUERYI:
1960 case OP_NOTPOSUPTOI:
1961 if (utf) utf16_char = TRUE;
1962 #endif
1963 /* Fall through. */
1964
1965 default:
1966 length = OP_lengths16[op] - 1;
1967 break;
1968
1969 case OP_CLASS:
1970 case OP_NCLASS:
1971 /* Skip the character bit map. */
1972 ptr += 32/sizeof(pcre_uint16);
1973 length = 0;
1974 break;
1975
1976 case OP_XCLASS:
1977 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1978 if (LINK_SIZE > 1)
1979 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1980 - (1 + LINK_SIZE + 1));
1981 else
1982 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1983
1984 /* Reverse the size of the XCLASS instance. */
1985 *ptr = swap_uint16(*ptr);
1986 ptr++;
1987 if (LINK_SIZE > 1)
1988 {
1989 *ptr = swap_uint16(*ptr);
1990 ptr++;
1991 }
1992
1993 op = *ptr;
1994 *ptr = swap_uint16(op);
1995 ptr++;
1996 if ((op & XCL_MAP) != 0)
1997 {
1998 /* Skip the character bit map. */
1999 ptr += 32/sizeof(pcre_uint16);
2000 length -= 32/sizeof(pcre_uint16);
2001 }
2002 break;
2003 }
2004 }
2005 /* Control should never reach here in 16 bit mode. */
2006 #endif /* SUPPORT_PCRE16 */
2007 }
2008
2009
2010
2011 /*************************************************
2012 * Check match or recursion limit *
2013 *************************************************/
2014
2015 static int
2016 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2017 int start_offset, int options, int *use_offsets, int use_size_offsets,
2018 int flag, unsigned long int *limit, int errnumber, const char *msg)
2019 {
2020 int count;
2021 int min = 0;
2022 int mid = 64;
2023 int max = -1;
2024
2025 extra->flags |= flag;
2026
2027 for (;;)
2028 {
2029 *limit = mid;
2030
2031 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2032 use_offsets, use_size_offsets);
2033
2034 if (count == errnumber)
2035 {
2036 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2037 min = mid;
2038 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2039 }
2040
2041 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2042 count == PCRE_ERROR_PARTIAL)
2043 {
2044 if (mid == min + 1)
2045 {
2046 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2047 break;
2048 }
2049 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2050 max = mid;
2051 mid = (min + mid)/2;
2052 }
2053 else break; /* Some other error */
2054 }
2055
2056 extra->flags &= ~flag;
2057 return count;
2058 }
2059
2060
2061
2062 /*************************************************
2063 * Case-independent strncmp() function *
2064 *************************************************/
2065
2066 /*
2067 Arguments:
2068 s first string
2069 t second string
2070 n number of characters to compare
2071
2072 Returns: < 0, = 0, or > 0, according to the comparison
2073 */
2074
2075 static int
2076 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2077 {
2078 while (n--)
2079 {
2080 int c = tolower(*s++) - tolower(*t++);
2081 if (c) return c;
2082 }
2083 return 0;
2084 }
2085
2086
2087
2088 /*************************************************
2089 * Check newline indicator *
2090 *************************************************/
2091
2092 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2093 a message and return 0 if there is no match.
2094
2095 Arguments:
2096 p points after the leading '<'
2097 f file for error message
2098
2099 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2100 */
2101
2102 static int
2103 check_newline(pcre_uint8 *p, FILE *f)
2104 {
2105 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2106 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2107 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2108 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2109 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2110 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2111 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2112 fprintf(f, "Unknown newline type at: <%s\n", p);
2113 return 0;
2114 }
2115
2116
2117
2118 /*************************************************
2119 * Usage function *
2120 *************************************************/
2121
2122 static void
2123 usage(void)
2124 {
2125 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2126 printf("Input and output default to stdin and stdout.\n");
2127 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2128 printf("If input is a terminal, readline() is used to read from it.\n");
2129 #else
2130 printf("This version of pcretest is not linked with readline().\n");
2131 #endif
2132 printf("\nOptions:\n");
2133 #ifdef SUPPORT_PCRE16
2134 printf(" -16 use the 16-bit library\n");
2135 #endif
2136 printf(" -b show compiled code\n");
2137 printf(" -C show PCRE compile-time options and exit\n");
2138 printf(" -C arg show a specific compile-time option\n");
2139 printf(" and exit with its value. The arg can be:\n");
2140 printf(" linksize internal link size [2, 3, 4]\n");
2141 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2142 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2143 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2144 printf(" ucp Unicode Properties supported [0, 1]\n");
2145 printf(" jit Just-in-time compiler supported [0, 1]\n");
2146 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2147 printf(" -d debug: show compiled code and information (-b and -i)\n");
2148 #if !defined NODFA
2149 printf(" -dfa force DFA matching for all subjects\n");
2150 #endif
2151 printf(" -help show usage information\n");
2152 printf(" -i show information about compiled patterns\n"
2153 " -M find MATCH_LIMIT minimum for each subject\n"
2154 " -m output memory used information\n"
2155 " -o <n> set size of offsets vector to <n>\n");
2156 #if !defined NOPOSIX
2157 printf(" -p use POSIX interface\n");
2158 #endif
2159 printf(" -q quiet: do not output PCRE version number at start\n");
2160 printf(" -S <n> set stack size to <n> megabytes\n");
2161 printf(" -s force each pattern to be studied at basic level\n"
2162 " -s+ force each pattern to be studied, using JIT if available\n"
2163 " -s++ ditto, verifying when JIT was actually used\n"
2164 " -s+n force each pattern to be studied, using JIT if available,\n"
2165 " where 1 <= n <= 7 selects JIT options\n"
2166 " -s++n ditto, verifying when JIT was actually used\n"
2167 " -t time compilation and execution\n");
2168 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2169 printf(" -tm time execution (matching) only\n");
2170 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2171 }
2172
2173
2174
2175 /*************************************************
2176 * Main Program *
2177 *************************************************/
2178
2179 /* Read lines from named file or stdin and write to named file or stdout; lines
2180 consist of a regular expression, in delimiters and optionally followed by
2181 options, followed by a set of test data, terminated by an empty line. */
2182
2183 int main(int argc, char **argv)
2184 {
2185 FILE *infile = stdin;
2186 const char *version;
2187 int options = 0;
2188 int study_options = 0;
2189 int default_find_match_limit = FALSE;
2190 int op = 1;
2191 int timeit = 0;
2192 int timeitm = 0;
2193 int showinfo = 0;
2194 int showstore = 0;
2195 int force_study = -1;
2196 int force_study_options = 0;
2197 int quiet = 0;
2198 int size_offsets = 45;
2199 int size_offsets_max;
2200 int *offsets = NULL;
2201 #if !defined NOPOSIX
2202 int posix = 0;
2203 #endif
2204 int debug = 0;
2205 int done = 0;
2206 int all_use_dfa = 0;
2207 int verify_jit = 0;
2208 int yield = 0;
2209 int stack_size;
2210
2211 pcre_jit_stack *jit_stack = NULL;
2212
2213 /* These vectors store, end-to-end, a list of zero-terminated captured
2214 substring names, each list itself being terminated by an empty name. Assume
2215 that 1024 is plenty long enough for the few names we'll be testing. It is
2216 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2217 for the actual memory, to ensure alignment. */
2218
2219 pcre_uint16 copynames[1024];
2220 pcre_uint16 getnames[1024];
2221
2222 #ifdef SUPPORT_PCRE16
2223 pcre_uint16 *cn16ptr;
2224 pcre_uint16 *gn16ptr;
2225 #endif
2226
2227 #ifdef SUPPORT_PCRE8
2228 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2229 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2230 pcre_uint8 *cn8ptr;
2231 pcre_uint8 *gn8ptr;
2232 #endif
2233
2234 /* Get buffers from malloc() so that valgrind will check their misuse when
2235 debugging. They grow automatically when very long lines are read. The 16-bit
2236 buffer (buffer16) is obtained only if needed. */
2237
2238 buffer = (pcre_uint8 *)malloc(buffer_size);
2239 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2240 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2241
2242 /* The outfile variable is static so that new_malloc can use it. */
2243
2244 outfile = stdout;
2245
2246 /* The following _setmode() stuff is some Windows magic that tells its runtime
2247 library to translate CRLF into a single LF character. At least, that's what
2248 I've been told: never having used Windows I take this all on trust. Originally
2249 it set 0x8000, but then I was advised that _O_BINARY was better. */
2250
2251 #if defined(_WIN32) || defined(WIN32)
2252 _setmode( _fileno( stdout ), _O_BINARY );
2253 #endif
2254
2255 /* Get the version number: both pcre_version() and pcre16_version() give the
2256 same answer. We just need to ensure that we call one that is available. */
2257
2258 #ifdef SUPPORT_PCRE8
2259 version = pcre_version();
2260 #else
2261 version = pcre16_version();
2262 #endif
2263
2264 /* Scan options */
2265
2266 while (argc > 1 && argv[op][0] == '-')
2267 {
2268 pcre_uint8 *endptr;
2269 char *arg = argv[op];
2270
2271 if (strcmp(arg, "-m") == 0) showstore = 1;
2272 else if (strcmp(arg, "-s") == 0) force_study = 0;
2273
2274 else if (strncmp(arg, "-s+", 3) == 0)
2275 {
2276 arg += 3;
2277 if (*arg == '+') { arg++; verify_jit = TRUE; }
2278 force_study = 1;
2279 if (*arg == 0)
2280 force_study_options = jit_study_bits[6];
2281 else if (*arg >= '1' && *arg <= '7')
2282 force_study_options = jit_study_bits[*arg - '1'];
2283 else goto BAD_ARG;
2284 }
2285 else if (strcmp(arg, "-16") == 0)
2286 {
2287 #ifdef SUPPORT_PCRE16
2288 use_pcre16 = 1;
2289 #else
2290 printf("** This version of PCRE was built without 16-bit support\n");
2291 exit(1);
2292 #endif
2293 }
2294 else if (strcmp(arg, "-q") == 0) quiet = 1;
2295 else if (strcmp(arg, "-b") == 0) debug = 1;
2296 else if (strcmp(arg, "-i") == 0) showinfo = 1;
2297 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2298 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2299 #if !defined NODFA
2300 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2301 #endif
2302 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2303 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2304 *endptr == 0))
2305 {
2306 op++;
2307 argc--;
2308 }
2309 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2310 {
2311 int both = arg[2] == 0;
2312 int temp;
2313 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2314 *endptr == 0))
2315 {
2316 timeitm = temp;
2317 op++;
2318 argc--;
2319 }
2320 else timeitm = LOOPREPEAT;
2321 if (both) timeit = timeitm;
2322 }
2323 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2324 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2325 *endptr == 0))
2326 {
2327 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2328 printf("PCRE: -S not supported on this OS\n");
2329 exit(1);
2330 #else
2331 int rc;
2332 struct rlimit rlim;
2333 getrlimit(RLIMIT_STACK, &rlim);
2334 rlim.rlim_cur = stack_size * 1024 * 1024;
2335 rc = setrlimit(RLIMIT_STACK, &rlim);
2336 if (rc != 0)
2337 {
2338 printf("PCRE: setrlimit() failed with error %d\n", rc);
2339 exit(1);
2340 }
2341 op++;
2342 argc--;
2343 #endif
2344 }
2345 #if !defined NOPOSIX
2346 else if (strcmp(arg, "-p") == 0) posix = 1;
2347 #endif
2348 else if (strcmp(arg, "-C") == 0)
2349 {
2350 int rc;
2351 unsigned long int lrc;
2352
2353 if (argc > 2)
2354 {
2355 if (strcmp(argv[op + 1], "linksize") == 0)
2356 {
2357 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2358 printf("%d\n", rc);
2359 yield = rc;
2360 goto EXIT;
2361 }
2362 if (strcmp(argv[op + 1], "pcre8") == 0)
2363 {
2364 #ifdef SUPPORT_PCRE8
2365 printf("1\n");
2366 yield = 1;
2367 #else
2368 printf("0\n");
2369 yield = 0;
2370 #endif
2371 goto EXIT;
2372 }
2373 if (strcmp(argv[op + 1], "pcre16") == 0)
2374 {
2375 #ifdef SUPPORT_PCRE16
2376 printf("1\n");
2377 yield = 1;
2378 #else
2379 printf("0\n");
2380 yield = 0;
2381 #endif
2382 goto EXIT;
2383 }
2384 if (strcmp(argv[op + 1], "utf") == 0)
2385 {
2386 #ifdef SUPPORT_PCRE8
2387 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2388 printf("%d\n", rc);
2389 yield = rc;
2390 #else
2391 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2392 printf("%d\n", rc);
2393 yield = rc;
2394 #endif
2395 goto EXIT;
2396 }
2397 if (strcmp(argv[op + 1], "ucp") == 0)
2398 {
2399 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2400 printf("%d\n", rc);
2401 yield = rc;
2402 goto EXIT;
2403 }
2404 if (strcmp(argv[op + 1], "jit") == 0)
2405 {
2406 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2407 printf("%d\n", rc);
2408 yield = rc;
2409 goto EXIT;
2410 }
2411 if (strcmp(argv[op + 1], "newline") == 0)
2412 {
2413 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2414 /* Note that these values are always the ASCII values, even
2415 in EBCDIC environments. CR is 13 and NL is 10. */
2416 printf("%s\n", (rc == 13)? "CR" :
2417 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2418 (rc == -2)? "ANYCRLF" :
2419 (rc == -1)? "ANY" : "???");
2420 goto EXIT;
2421 }
2422 printf("Unknown -C option: %s\n", argv[op + 1]);
2423 goto EXIT;
2424 }
2425
2426 printf("PCRE version %s\n", version);
2427 printf("Compiled with\n");
2428
2429 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2430 are set, either both UTFs are supported or both are not supported. */
2431
2432 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2433 printf(" 8-bit and 16-bit support\n");
2434 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2435 if (rc)
2436 printf(" UTF-8 and UTF-16 support\n");
2437 else
2438 printf(" No UTF-8 or UTF-16 support\n");
2439 #elif defined SUPPORT_PCRE8
2440 printf(" 8-bit support only\n");
2441 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2442 printf(" %sUTF-8 support\n", rc? "" : "No ");
2443 #else
2444 printf(" 16-bit support only\n");
2445 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2446 printf(" %sUTF-16 support\n", rc? "" : "No ");
2447 #endif
2448
2449 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2450 printf(" %sUnicode properties support\n", rc? "" : "No ");
2451 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2452 if (rc)
2453 {
2454 const char *arch;
2455 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2456 printf(" Just-in-time compiler support: %s\n", arch);
2457 }
2458 else
2459 printf(" No just-in-time compiler support\n");
2460 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2461 /* Note that these values are always the ASCII values, even
2462 in EBCDIC environments. CR is 13 and NL is 10. */
2463 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2464 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2465 (rc == -2)? "ANYCRLF" :
2466 (rc == -1)? "ANY" : "???");
2467 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2468 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2469 "all Unicode newlines");
2470 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2471 printf(" Internal link size = %d\n", rc);
2472 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2473 printf(" POSIX malloc threshold = %d\n", rc);
2474 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2475 printf(" Default match limit = %ld\n", lrc);
2476 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2477 printf(" Default recursion depth limit = %ld\n", lrc);
2478 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2479 printf(" Match recursion uses %s", rc? "stack" : "heap");
2480 if (showstore)
2481 {
2482 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2483 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2484 }
2485 printf("\n");
2486 goto EXIT;
2487 }
2488 else if (strcmp(arg, "-help") == 0 ||
2489 strcmp(arg, "--help") == 0)
2490 {
2491 usage();
2492 goto EXIT;
2493 }
2494 else
2495 {
2496 BAD_ARG:
2497 printf("** Unknown or malformed option %s\n", arg);
2498 usage();
2499 yield = 1;
2500 goto EXIT;
2501 }
2502 op++;
2503 argc--;
2504 }
2505
2506 /* Get the store for the offsets vector, and remember what it was */
2507
2508 size_offsets_max = size_offsets;
2509 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2510 if (offsets == NULL)
2511 {
2512 printf("** Failed to get %d bytes of memory for offsets vector\n",
2513 (int)(size_offsets_max * sizeof(int)));
2514 yield = 1;
2515 goto EXIT;
2516 }
2517
2518 /* Sort out the input and output files */
2519
2520 if (argc > 1)
2521 {
2522 infile = fopen(argv[op], INPUT_MODE);
2523 if (infile == NULL)
2524 {
2525 printf("** Failed to open %s\n", argv[op]);
2526 yield = 1;
2527 goto EXIT;
2528 }
2529 }
2530
2531 if (argc > 2)
2532 {
2533 outfile = fopen(argv[op+1], OUTPUT_MODE);
2534 if (outfile == NULL)
2535 {
2536 printf("** Failed to open %s\n", argv[op+1]);
2537 yield = 1;
2538 goto EXIT;
2539 }
2540 }
2541
2542 /* Set alternative malloc function */
2543
2544 #ifdef SUPPORT_PCRE8
2545 pcre_malloc = new_malloc;
2546 pcre_free = new_free;
2547 pcre_stack_malloc = stack_malloc;
2548 pcre_stack_free = stack_free;
2549 #endif
2550
2551 #ifdef SUPPORT_PCRE16
2552 pcre16_malloc = new_malloc;
2553 pcre16_free = new_free;
2554 pcre16_stack_malloc = stack_malloc;
2555 pcre16_stack_free = stack_free;
2556 #endif
2557
2558 /* Heading line unless quiet, then prompt for first regex if stdin */
2559
2560 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2561
2562 /* Main loop */
2563
2564 while (!done)
2565 {
2566 pcre *re = NULL;
2567 pcre_extra *extra = NULL;
2568
2569 #if !defined NOPOSIX /* There are still compilers that require no indent */
2570 regex_t preg;
2571 int do_posix = 0;
2572 #endif
2573
2574 const char *error;
2575 pcre_uint8 *markptr;
2576 pcre_uint8 *p, *pp, *ppp;
2577 pcre_uint8 *to_file = NULL;
2578 const pcre_uint8 *tables = NULL;
2579 unsigned long int get_options;
2580 unsigned long int true_size, true_study_size = 0;
2581 size_t size, regex_gotten_store;
2582 int do_allcaps = 0;
2583 int do_mark = 0;
2584 int do_study = 0;
2585 int no_force_study = 0;
2586 int do_debug = debug;
2587 int do_G = 0;
2588 int do_g = 0;
2589 int do_showinfo = showinfo;
2590 int do_showrest = 0;
2591 int do_showcaprest = 0;
2592 int do_flip = 0;
2593 int erroroffset, len, delimiter, poffset;
2594
2595 use_utf = 0;
2596 debug_lengths = 1;
2597
2598 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2599 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2600 fflush(outfile);
2601
2602 p = buffer;
2603 while (isspace(*p)) p++;
2604 if (*p == 0) continue;
2605
2606 /* See if the pattern is to be loaded pre-compiled from a file. */
2607
2608 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2609 {
2610 pcre_uint32 magic;
2611 pcre_uint8 sbuf[8];
2612 FILE *f;
2613
2614 p++;
2615 if (*p == '!')
2616 {
2617 do_debug = TRUE;
2618 do_showinfo = TRUE;
2619 p++;
2620 }
2621
2622 pp = p + (int)strlen((char *)p);
2623 while (isspace(pp[-1])) pp--;
2624 *pp = 0;
2625
2626 f = fopen((char *)p, "rb");
2627 if (f == NULL)
2628 {
2629 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2630 continue;
2631 }
2632
2633 first_gotten_store = 0;
2634 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2635
2636 true_size =
2637 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2638 true_study_size =
2639 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2640
2641 re = (pcre *)new_malloc(true_size);
2642 regex_gotten_store = first_gotten_store;
2643
2644 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2645
2646 magic = ((REAL_PCRE *)re)->magic_number;
2647 if (magic != MAGIC_NUMBER)
2648 {
2649 if (swap_uint32(magic) == MAGIC_NUMBER)
2650 {
2651 do_flip = 1;
2652 }
2653 else
2654 {
2655 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2656 fclose(f);
2657 continue;
2658 }
2659 }
2660
2661 /* We hide the byte-invert info for little and big endian tests. */
2662 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2663 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2664
2665 /* Now see if there is any following study data. */
2666
2667 if (true_study_size != 0)
2668 {
2669 pcre_study_data *psd;
2670
2671 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2672 extra->flags = PCRE_EXTRA_STUDY_DATA;
2673
2674 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2675 extra->study_data = psd;
2676
2677 if (fread(psd, 1, true_study_size, f) != true_study_size)
2678 {
2679 FAIL_READ:
2680 fprintf(outfile, "Failed to read data from %s\n", p);
2681 if (extra != NULL)
2682 {
2683 PCRE_FREE_STUDY(extra);
2684 }
2685 if (re != NULL) new_free(re);
2686 fclose(f);
2687 continue;
2688 }
2689 fprintf(outfile, "Study data loaded from %s\n", p);
2690 do_study = 1; /* To get the data output if requested */
2691 }
2692 else fprintf(outfile, "No study data\n");
2693
2694 /* Flip the necessary bytes. */
2695 if (do_flip)
2696 {
2697 int rc;
2698 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2699 if (rc == PCRE_ERROR_BADMODE)
2700 {
2701 /* Simulate the result of the function call below. */
2702 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2703 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2704 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2705 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2706 continue;
2707 }
2708 }
2709
2710 /* Need to know if UTF-8 for printing data strings. */
2711
2712 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2713 use_utf = (get_options & PCRE_UTF8) != 0;
2714
2715 fclose(f);
2716 goto SHOW_INFO;
2717 }
2718
2719 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2720 the pattern; if it isn't complete, read more. */
2721
2722 delimiter = *p++;
2723
2724 if (isalnum(delimiter) || delimiter == '\\')
2725 {
2726 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2727 goto SKIP_DATA;
2728 }
2729
2730 pp = p;
2731 poffset = (int)(p - buffer);
2732
2733 for(;;)
2734 {
2735 while (*pp != 0)
2736 {
2737 if (*pp == '\\' && pp[1] != 0) pp++;
2738 else if (*pp == delimiter) break;
2739 pp++;
2740 }
2741 if (*pp != 0) break;
2742 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2743 {
2744 fprintf(outfile, "** Unexpected EOF\n");
2745 done = 1;
2746 goto CONTINUE;
2747 }
2748 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2749 }
2750
2751 /* The buffer may have moved while being extended; reset the start of data
2752 pointer to the correct relative point in the buffer. */
2753
2754 p = buffer + poffset;
2755
2756 /* If the first character after the delimiter is backslash, make
2757 the pattern end with backslash. This is purely to provide a way
2758 of testing for the error message when a pattern ends with backslash. */
2759
2760 if (pp[1] == '\\') *pp++ = '\\';
2761
2762 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2763 for callouts. */
2764
2765 *pp++ = 0;
2766 strcpy((char *)pbuffer, (char *)p);
2767
2768 /* Look for options after final delimiter */
2769
2770 options = 0;
2771 study_options = 0;
2772 log_store = showstore; /* default from command line */
2773
2774 while (*pp != 0)
2775 {
2776 switch (*pp++)
2777 {
2778 case 'f': options |= PCRE_FIRSTLINE; break;
2779 case 'g': do_g = 1; break;
2780 case 'i': options |= PCRE_CASELESS; break;
2781 case 'm': options |= PCRE_MULTILINE; break;
2782 case 's': options |= PCRE_DOTALL; break;
2783 case 'x': options |= PCRE_EXTENDED; break;
2784
2785 case '+':
2786 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2787 break;
2788
2789 case '=': do_allcaps = 1; break;
2790 case 'A': options |= PCRE_ANCHORED; break;
2791 case 'B': do_debug = 1; break;
2792 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2793 case 'D': do_debug = do_showinfo = 1; break;
2794 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2795 case 'F': do_flip = 1; break;
2796 case 'G': do_G = 1; break;
2797 case 'I': do_showinfo = 1; break;
2798 case 'J': options |= PCRE_DUPNAMES; break;
2799 case 'K': do_mark = 1; break;
2800 case 'M': log_store = 1; break;
2801 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2802
2803 #if !defined NOPOSIX
2804 case 'P': do_posix = 1; break;
2805 #endif
2806
2807 case 'S':
2808 if (do_study == 0)
2809 {
2810 do_study = 1;
2811 if (*pp == '+')
2812 {
2813 if (*(++pp) == '+')
2814 {
2815 verify_jit = TRUE;
2816 pp++;
2817 }
2818 if (*pp >= '1' && *pp <= '7')
2819 study_options |= jit_study_bits[*pp++ - '1'];
2820 else
2821 study_options |= jit_study_bits[6];
2822 }
2823 }
2824 else
2825 {
2826 do_study = 0;
2827 no_force_study = 1;
2828 }
2829 break;
2830
2831 case 'U': options |= PCRE_UNGREEDY; break;
2832 case 'W': options |= PCRE_UCP; break;
2833 case 'X': options |= PCRE_EXTRA; break;
2834 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2835 case 'Z': debug_lengths = 0; break;
2836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2837 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2838
2839 case 'T':
2840 switch (*pp++)
2841 {
2842 case '0': tables = tables0; break;
2843 case '1': tables = tables1; break;
2844
2845 case '\r':
2846 case '\n':
2847 case ' ':
2848 case 0:
2849 fprintf(outfile, "** Missing table number after /T\n");
2850 goto SKIP_DATA;
2851
2852 default:
2853 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2854 goto SKIP_DATA;
2855 }
2856 break;
2857
2858 case 'L':
2859 ppp = pp;
2860 /* The '\r' test here is so that it works on Windows. */
2861 /* The '0' test is just in case this is an unterminated line. */
2862 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2863 *ppp = 0;
2864 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2865 {
2866 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2867 goto SKIP_DATA;
2868 }
2869 locale_set = 1;
2870 tables = PCRE_MAKETABLES;
2871 pp = ppp;
2872 break;
2873
2874 case '>':
2875 to_file = pp;
2876 while (*pp != 0) pp++;
2877 while (isspace(pp[-1])) pp--;
2878 *pp = 0;
2879 break;
2880
2881 case '<':
2882 {
2883 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2884 {
2885 options |= PCRE_JAVASCRIPT_COMPAT;
2886 pp += 3;
2887 }
2888 else
2889 {
2890 int x = check_newline(pp, outfile);
2891 if (x == 0) goto SKIP_DATA;
2892 options |= x;
2893 while (*pp++ != '>');
2894 }
2895 }
2896 break;
2897
2898 case '\r': /* So that it works in Windows */
2899 case '\n':
2900 case ' ':
2901 break;
2902
2903 default:
2904 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2905 goto SKIP_DATA;
2906 }
2907 }
2908
2909 /* Handle compiling via the POSIX interface, which doesn't support the
2910 timing, showing, or debugging options, nor the ability to pass over
2911 local character tables. Neither does it have 16-bit support. */
2912
2913 #if !defined NOPOSIX
2914 if (posix || do_posix)
2915 {
2916 int rc;
2917 int cflags = 0;
2918
2919 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2920 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2921 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2922 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2923 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2924 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2925 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2926
2927 first_gotten_store = 0;
2928 rc = regcomp(&preg, (char *)p, cflags);
2929
2930 /* Compilation failed; go back for another re, skipping to blank line
2931 if non-interactive. */
2932
2933 if (rc != 0)
2934 {
2935 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2936 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2937 goto SKIP_DATA;
2938 }
2939 }
2940
2941 /* Handle compiling via the native interface */
2942
2943 else
2944 #endif /* !defined NOPOSIX */
2945
2946 {
2947 /* In 16-bit mode, convert the input. */
2948
2949 #ifdef SUPPORT_PCRE16
2950 if (use_pcre16)
2951 {
2952 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2953 {
2954 case -1:
2955 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2956 "converted to UTF-16\n");
2957 goto SKIP_DATA;
2958
2959 case -2:
2960 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2961 "cannot be converted to UTF-16\n");
2962 goto SKIP_DATA;
2963
2964 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2965 fprintf(outfile, "**Failed: character value greater than 0xffff "
2966 "cannot be converted to 16-bit in non-UTF mode\n");
2967 goto SKIP_DATA;
2968
2969 default:
2970 break;
2971 }
2972 p = (pcre_uint8 *)buffer16;
2973 }
2974 #endif
2975
2976 /* Compile many times when timing */
2977
2978 if (timeit > 0)
2979 {
2980 register int i;
2981 clock_t time_taken;
2982 clock_t start_time = clock();
2983 for (i = 0; i < timeit; i++)
2984 {
2985 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2986 if (re != NULL) free(re);
2987 }
2988 time_taken = clock() - start_time;
2989 fprintf(outfile, "Compile time %.4f milliseconds\n",
2990 (((double)time_taken * 1000.0) / (double)timeit) /
2991 (double)CLOCKS_PER_SEC);
2992 }
2993
2994 first_gotten_store = 0;
2995 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2996
2997 /* Compilation failed; go back for another re, skipping to blank line
2998 if non-interactive. */
2999
3000 if (re == NULL)
3001 {
3002 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3003 SKIP_DATA:
3004 if (infile != stdin)
3005 {
3006 for (;;)
3007 {
3008 if (extend_inputline(infile, buffer, NULL) == NULL)
3009 {
3010 done = 1;
3011 goto CONTINUE;
3012 }
3013 len = (int)strlen((char *)buffer);
3014 while (len > 0 && isspace(buffer[len-1])) len--;
3015 if (len == 0) break;
3016 }
3017 fprintf(outfile, "\n");
3018 }
3019 goto CONTINUE;
3020 }
3021
3022 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3023 within the regex; check for this so that we know how to process the data
3024 lines. */
3025
3026 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3027 goto SKIP_DATA;
3028 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3029
3030 /* Extract the size for possible writing before possibly flipping it,
3031 and remember the store that was got. */
3032
3033 true_size = ((REAL_PCRE *)re)->size;
3034 regex_gotten_store = first_gotten_store;
3035
3036 /* Output code size information if requested */
3037
3038 if (log_store)
3039 fprintf(outfile, "Memory allocation (code space): %d\n",
3040 (int)(first_gotten_store -
3041 sizeof(REAL_PCRE) -
3042 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3043
3044 /* If -s or /S was present, study the regex to generate additional info to
3045 help with the matching, unless the pattern has the SS option, which
3046 suppresses the effect of /S (used for a few test patterns where studying is
3047 never sensible). */
3048
3049 if (do_study || (force_study >= 0 && !no_force_study))
3050 {
3051 if (timeit > 0)
3052 {
3053 register int i;
3054 clock_t time_taken;
3055 clock_t start_time = clock();
3056 for (i = 0; i < timeit; i++)
3057 {
3058 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3059 }
3060 time_taken = clock() - start_time;
3061 if (extra != NULL)
3062 {
3063 PCRE_FREE_STUDY(extra);
3064 }
3065 fprintf(outfile, " Study time %.4f milliseconds\n",
3066 (((double)time_taken * 1000.0) / (double)timeit) /
3067 (double)CLOCKS_PER_SEC);
3068 }
3069 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3070 if (error != NULL)
3071 fprintf(outfile, "Failed to study: %s\n", error);
3072 else if (extra != NULL)
3073 {
3074 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3075 if (log_store)
3076 {
3077 size_t jitsize;
3078 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3079 jitsize != 0)
3080 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3081 }
3082 }
3083 }
3084
3085 /* If /K was present, we set up for handling MARK data. */
3086
3087 if (do_mark)
3088 {
3089 if (extra == NULL)
3090 {
3091 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3092 extra->flags = 0;
3093 }
3094 extra->mark = &markptr;
3095 extra->flags |= PCRE_EXTRA_MARK;
3096 }
3097
3098 /* Extract and display information from the compiled data if required. */
3099
3100 SHOW_INFO:
3101
3102 if (do_debug)
3103 {
3104 fprintf(outfile, "------------------------------------------------------------------\n");
3105 PCRE_PRINTINT(re, outfile, debug_lengths);
3106 }
3107
3108 /* We already have the options in get_options (see above) */
3109
3110 if (do_showinfo)
3111 {
3112 unsigned long int all_options;
3113 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3114 hascrorlf, maxlookbehind;
3115 int nameentrysize, namecount;
3116 const pcre_uint8 *nametable;
3117
3118 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3119 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3120 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3121 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3122 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3123 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3124 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3125 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3126 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3127 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3128 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3129 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3130 != 0)
3131 goto SKIP_DATA;
3132
3133 if (size != regex_gotten_store) fprintf(outfile,
3134 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3135 (int)size, (int)regex_gotten_store);
3136
3137 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3138 if (backrefmax > 0)
3139 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3140
3141 if (namecount > 0)
3142 {
3143 fprintf(outfile, "Named capturing subpatterns:\n");
3144 while (namecount-- > 0)
3145 {
3146 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3147 int imm2_size = use_pcre16 ? 1 : 2;
3148 #else
3149 int imm2_size = IMM2_SIZE;
3150 #endif
3151 int length = (int)STRLEN(nametable + imm2_size);
3152 fprintf(outfile, " ");
3153 PCHARSV(nametable, imm2_size, length, outfile);
3154 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3155 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3156 fprintf(outfile, "%3d\n", use_pcre16?
3157 (int)(((PCRE_SPTR16)nametable)[0])
3158 :((int)nametable[0] << 8) | (int)nametable[1]);
3159 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3160 #else
3161 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3162 #ifdef SUPPORT_PCRE8
3163 nametable += nameentrysize;
3164 #else
3165 nametable += nameentrysize * 2;
3166 #endif
3167 #endif
3168 }
3169 }
3170
3171 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3172 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3173
3174 all_options = ((REAL_PCRE *)re)->options;
3175 if (do_flip) all_options = swap_uint32(all_options);
3176
3177 if (get_options == 0) fprintf(outfile, "No options\n");
3178 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3179 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3180 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3181 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3182 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3183 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3184 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3185 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3186 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3187 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3188 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3189 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3190 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3191 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3192 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3193 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3194 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3195 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3196
3197 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3198
3199 switch (get_options & PCRE_NEWLINE_BITS)
3200 {
3201 case PCRE_NEWLINE_CR:
3202 fprintf(outfile, "Forced newline sequence: CR\n");
3203 break;
3204
3205 case PCRE_NEWLINE_LF:
3206 fprintf(outfile, "Forced newline sequence: LF\n");
3207 break;
3208
3209 case PCRE_NEWLINE_CRLF:
3210 fprintf(outfile, "Forced newline sequence: CRLF\n");
3211 break;
3212
3213 case PCRE_NEWLINE_ANYCRLF:
3214 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3215 break;
3216
3217 case PCRE_NEWLINE_ANY:
3218 fprintf(outfile, "Forced newline sequence: ANY\n");
3219 break;
3220
3221 default:
3222 break;
3223 }
3224
3225 if (first_char == -1)
3226 {
3227 fprintf(outfile, "First char at start or follows newline\n");
3228 }
3229 else if (first_char < 0)
3230 {
3231 fprintf(outfile, "No first char\n");
3232 }
3233 else
3234 {
3235 const char *caseless =
3236 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3237 "" : " (caseless)";
3238
3239 if (PRINTOK(first_char))
3240 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3241 else
3242 {
3243 fprintf(outfile, "First char = ");
3244 pchar(first_char, outfile);
3245 fprintf(outfile, "%s\n", caseless);
3246 }
3247 }
3248
3249 if (need_char < 0)
3250 {
3251 fprintf(outfile, "No need char\n");
3252 }
3253 else
3254 {
3255 const char *caseless =
3256 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3257 "" : " (caseless)";
3258
3259 if (PRINTOK(need_char))
3260 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3261 else
3262 {
3263 fprintf(outfile, "Need char = ");
3264 pchar(need_char, outfile);
3265 fprintf(outfile, "%s\n", caseless);
3266 }
3267 }
3268
3269 if (maxlookbehind > 0)
3270 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3271
3272 /* Don't output study size; at present it is in any case a fixed
3273 value, but it varies, depending on the computer architecture, and
3274 so messes up the test suite. (And with the /F option, it might be
3275 flipped.) If study was forced by an external -s, don't show this
3276 information unless -i or -d was also present. This means that, except
3277 when auto-callouts are involved, the output from runs with and without
3278 -s should be identical. */
3279
3280 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3281 {
3282 if (extra == NULL)
3283 fprintf(outfile, "Study returned NULL\n");
3284 else
3285 {
3286 pcre_uint8 *start_bits = NULL;
3287 int minlength;
3288
3289 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3290 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3291
3292 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3293 {
3294 if (start_bits == NULL)
3295 fprintf(outfile, "No set of starting bytes\n");
3296 else
3297 {
3298 int i;
3299 int c = 24;
3300 fprintf(outfile, "Starting byte set: ");
3301 for (i = 0; i < 256; i++)
3302 {
3303 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3304 {
3305 if (c > 75)
3306 {
3307 fprintf(outfile, "\n ");
3308 c = 2;
3309 }
3310 if (PRINTOK(i) && i != ' ')
3311 {
3312 fprintf(outfile, "%c ", i);
3313 c += 2;
3314 }
3315 else
3316 {
3317 fprintf(outfile, "\\x%02x ", i);
3318 c += 5;
3319 }
3320 }
3321 }
3322 fprintf(outfile, "\n");
3323 }
3324 }
3325 }
3326
3327 /* Show this only if the JIT was set by /S, not by -s. */
3328
3329 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3330 {
3331 int jit;
3332 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3333 {
3334 if (jit)
3335 fprintf(outfile, "JIT study was successful\n");
3336 else
3337 #ifdef SUPPORT_JIT
3338 fprintf(outfile, "JIT study was not successful\n");
3339 #else
3340 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3341 #endif
3342 }
3343 }
3344 }
3345 }
3346
3347 /* If the '>' option was present, we write out the regex to a file, and
3348 that is all. The first 8 bytes of the file are the regex length and then
3349 the study length, in big-endian order. */
3350
3351 if (to_file != NULL)
3352 {
3353 FILE *f = fopen((char *)to_file, "wb");
3354 if (f == NULL)
3355 {
3356 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3357 }
3358 else
3359 {
3360 pcre_uint8 sbuf[8];
3361
3362 if (do_flip) regexflip(re, extra);
3363 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3364 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3365 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3366 sbuf[3] = (pcre_uint8)((true_size) & 255);
3367 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3368 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3369 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3370 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3371
3372 if (fwrite(sbuf, 1, 8, f) < 8 ||
3373 fwrite(re, 1, true_size, f) < true_size)
3374 {
3375 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3376 }
3377 else
3378 {
3379 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3380
3381 /* If there is study data, write it. */
3382
3383 if (extra != NULL)
3384 {
3385 if (fwrite(extra->study_data, 1, true_study_size, f) <
3386 true_study_size)
3387 {
3388 fprintf(outfile, "Write error on %s: %s\n", to_file,
3389 strerror(errno));
3390 }
3391 else fprintf(outfile, "Study data written to %s\n", to_file);
3392 }
3393 }
3394 fclose(f);
3395 }
3396
3397 new_free(re);
3398 if (extra != NULL)
3399 {
3400 PCRE_FREE_STUDY(extra);
3401 }
3402 if (locale_set)
3403 {
3404 new_free((void *)tables);
3405 setlocale(LC_CTYPE, "C");
3406 locale_set = 0;
3407 }
3408 continue; /* With next regex */
3409 }
3410 } /* End of non-POSIX compile */
3411
3412 /* Read data lines and test them */
3413
3414 for (;;)
3415 {
3416 pcre_uint8 *q;
3417 pcre_uint8 *bptr;
3418 int *use_offsets = offsets;
3419 int use_size_offsets = size_offsets;
3420 int callout_data = 0;
3421 int callout_data_set = 0;
3422 int count, c;
3423 int copystrings = 0;
3424 int find_match_limit = default_find_match_limit;
3425 int getstrings = 0;
3426 int getlist = 0;
3427 int gmatched = 0;
3428 int start_offset = 0;
3429 int start_offset_sign = 1;
3430 int g_notempty = 0;
3431 int use_dfa = 0;
3432
3433 *copynames = 0;
3434 *getnames = 0;
3435
3436 #ifdef SUPPORT_PCRE16
3437 cn16ptr = copynames;
3438 gn16ptr = getnames;
3439 #endif
3440 #ifdef SUPPORT_PCRE8
3441 cn8ptr = copynames8;
3442 gn8ptr = getnames8;
3443 #endif
3444
3445 SET_PCRE_CALLOUT(callout);
3446 first_callout = 1;
3447 last_callout_mark = NULL;
3448 callout_extra = 0;
3449 callout_count = 0;
3450 callout_fail_count = 999999;
3451 callout_fail_id = -1;
3452 show_malloc = 0;
3453 options = 0;
3454
3455 if (extra != NULL) extra->flags &=
3456 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3457
3458 len = 0;
3459 for (;;)
3460 {
3461 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3462 {
3463 if (len > 0) /* Reached EOF without hitting a newline */
3464 {
3465 fprintf(outfile, "\n");
3466 break;
3467 }
3468 done = 1;
3469 goto CONTINUE;
3470 }
3471 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3472 len = (int)strlen((char *)buffer);
3473 if (buffer[len-1] == '\n') break;
3474 }
3475
3476 while (len > 0 && isspace(buffer[len-1])) len--;
3477 buffer[len] = 0;
3478 if (len == 0) break;
3479
3480 p = buffer;
3481 while (isspace(*p)) p++;
3482
3483 bptr = q = dbuffer;
3484 while ((c = *p++) != 0)
3485 {
3486 int i = 0;
3487 int n = 0;
3488
3489 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3490 In non-UTF mode, allow the value of the byte to fall through to later,
3491 where values greater than 127 are turned into UTF-8 when running in
3492 16-bit mode. */
3493
3494 if (c != '\\')
3495 {
3496 if (use_utf)
3497 {
3498 *q++ = c;
3499 continue;
3500 }
3501 }
3502
3503 /* Handle backslash escapes */
3504
3505 else switch ((c = *p++))
3506 {
3507 case 'a': c = 7; break;
3508 case 'b': c = '\b'; break;
3509 case 'e': c = 27; break;
3510 case 'f': c = '\f'; break;
3511 case 'n': c = '\n'; break;
3512 case 'r': c = '\r'; break;
3513 case 't': c = '\t'; break;
3514 case 'v': c = '\v'; break;
3515
3516 case '0': case '1': case '2': case '3':
3517 case '4': case '5': case '6': case '7':
3518 c -= '0';
3519 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3520 c = c * 8 + *p++ - '0';
3521 break;
3522
3523 case 'x':
3524 if (*p == '{')
3525 {
3526 pcre_uint8 *pt = p;
3527 c = 0;
3528
3529 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3530 when isxdigit() is a macro that refers to its argument more than
3531 once. This is banned by the C Standard, but apparently happens in at
3532 least one MacOS environment. */
3533
3534 for (pt++; isxdigit(*pt); pt++)
3535 {
3536 if (++i == 9)
3537 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3538 "using only the first eight.\n");
3539 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3540 }
3541 if (*pt == '}')
3542 {
3543 p = pt + 1;
3544 break;
3545 }
3546 /* Not correct form for \x{...}; fall through */
3547 }
3548
3549 /* \x without {} always defines just one byte in 8-bit mode. This
3550 allows UTF-8 characters to be constructed byte by byte, and also allows
3551 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3552 Otherwise, pass it down to later code so that it can be turned into
3553 UTF-8 when running in 16-bit mode. */
3554
3555 c = 0;
3556 while (i++ < 2 && isxdigit(*p))
3557 {
3558 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3559 p++;
3560 }
3561 if (use_utf)
3562 {
3563 *q++ = c;
3564 continue;
3565 }
3566 break;
3567
3568 case 0: /* \ followed by EOF allows for an empty line */
3569 p--;
3570 continue;
3571
3572 case '>':
3573 if (*p == '-')
3574 {
3575 start_offset_sign = -1;
3576 p++;
3577 }
3578 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3579 start_offset *= start_offset_sign;
3580 continue;
3581
3582 case 'A': /* Option setting */
3583 options |= PCRE_ANCHORED;
3584 continue;
3585
3586 case 'B':
3587 options |= PCRE_NOTBOL;
3588 continue;
3589
3590 case 'C':
3591 if (isdigit(*p)) /* Set copy string */
3592 {
3593 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3594 copystrings |= 1 << n;
3595 }
3596 else if (isalnum(*p))
3597 {
3598 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3599 }
3600 else if (*p == '+')
3601 {
3602 callout_extra = 1;
3603 p++;
3604 }
3605 else if (*p == '-')
3606 {
3607 SET_PCRE_CALLOUT(NULL);
3608 p++;
3609 }
3610 else if (*p == '!')
3611 {
3612 callout_fail_id = 0;
3613 p++;
3614 while(isdigit(*p))
3615 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3616 callout_fail_count = 0;
3617 if (*p == '!')
3618 {
3619 p++;
3620 while(isdigit(*p))
3621 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3622 }
3623 }
3624 else if (*p == '*')
3625 {
3626 int sign = 1;
3627 callout_data = 0;
3628 if (*(++p) == '-') { sign = -1; p++; }
3629 while(isdigit(*p))
3630 callout_data = callout_data * 10 + *p++ - '0';
3631 callout_data *= sign;
3632 callout_data_set = 1;
3633 }
3634 continue;
3635
3636 #if !defined NODFA
3637 case 'D':
3638 #if !defined NOPOSIX
3639 if (posix || do_posix)
3640 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3641 else
3642 #endif
3643 use_dfa = 1;
3644 continue;
3645 #endif
3646
3647 #if !defined NODFA
3648 case 'F':
3649 options |= PCRE_DFA_SHORTEST;
3650 continue;
3651 #endif
3652
3653 case 'G':
3654 if (isdigit(*p))
3655 {
3656 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3657 getstrings |= 1 << n;
3658 }
3659 else if (isalnum(*p))
3660 {
3661 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3662 }
3663 continue;
3664
3665 case 'J':
3666 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3667 if (extra != NULL
3668 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3669 && extra->executable_jit != NULL)
3670 {
3671 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3672 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3673 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3674 }
3675 continue;
3676
3677 case 'L':
3678 getlist = 1;
3679 continue;
3680
3681 case 'M':
3682 find_match_limit = 1;
3683 continue;
3684
3685 case 'N':
3686 if ((options & PCRE_NOTEMPTY) != 0)
3687 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3688 else
3689 options |= PCRE_NOTEMPTY;
3690 continue;
3691
3692 case 'O':
3693 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3694 if (n > size_offsets_max)
3695 {
3696 size_offsets_max = n;
3697 free(offsets);
3698 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3699 if (offsets == NULL)
3700 {
3701 printf("** Failed to get %d bytes of memory for offsets vector\n",
3702 (int)(size_offsets_max * sizeof(int)));
3703 yield = 1;
3704 goto EXIT;
3705 }
3706 }
3707 use_size_offsets = n;
3708 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3709 continue;
3710
3711 case 'P':
3712 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3713 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3714 continue;
3715
3716 case 'Q':
3717 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3718 if (extra == NULL)
3719 {
3720 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3721 extra->flags = 0;
3722 }
3723 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3724 extra->match_limit_recursion = n;
3725 continue;
3726
3727 case 'q':
3728 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3729 if (extra == NULL)
3730 {
3731 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3732 extra->flags = 0;
3733 }
3734 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3735 extra->match_limit = n;
3736 continue;
3737
3738 #if !defined NODFA
3739 case 'R':
3740 options |= PCRE_DFA_RESTART;
3741 continue;
3742 #endif
3743
3744 case 'S':
3745 show_malloc = 1;
3746 continue;
3747
3748 case 'Y':
3749 options |= PCRE_NO_START_OPTIMIZE;
3750 continue;
3751
3752 case 'Z':
3753 options |= PCRE_NOTEOL;
3754 continue;
3755
3756 case '?':
3757 options |= PCRE_NO_UTF8_CHECK;
3758 continue;
3759
3760 case '<':
3761 {
3762 int x = check_newline(p, outfile);
3763 if (x == 0) goto NEXT_DATA;
3764 options |= x;
3765 while (*p++ != '>');
3766 }
3767 continue;
3768 }
3769
3770 /* We now have a character value in c that may be greater than 255. In
3771 16-bit mode, we always convert characters to UTF-8 so that values greater
3772 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3773 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3774 mode must have come from \x{...} or octal constructs because values from
3775 \x.. get this far only in non-UTF mode. */
3776
3777 #if !defined NOUTF || defined SUPPORT_PCRE16
3778 if (use_pcre16 || use_utf)
3779 {
3780 pcre_uint8 buff8[8];
3781 int ii, utn;
3782 utn = ord2utf8(c, buff8);
3783 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3784 }
3785 else
3786 #endif
3787 {
3788 if (c > 255)
3789 {
3790 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3791 "and UTF-8 mode is not enabled.\n", c);
3792 fprintf(outfile, "** Truncation will probably give the wrong "
3793 "result.\n");
3794 }
3795 *q++ = c;
3796 }
3797 }
3798
3799 /* Reached end of subject string */
3800
3801 *q = 0;
3802 len = (int)(q - dbuffer);
3803
3804 /* Move the data to the end of the buffer so that a read over the end of
3805 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3806 we are using the POSIX interface, we must include the terminating zero. */
3807
3808 #if !defined NOPOSIX
3809 if (posix || do_posix)
3810 {
3811 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3812 bptr += buffer_size - len - 1;
3813 }
3814 else
3815 #endif
3816 {
3817 memmove(bptr + buffer_size - len, bptr, len);
3818 bptr += buffer_size - len;
3819 }
3820
3821 if ((all_use_dfa || use_dfa) && find_match_limit)
3822 {
3823 printf("**Match limit not relevant for DFA matching: ignored\n");
3824 find_match_limit = 0;
3825 }
3826
3827 /* Handle matching via the POSIX interface, which does not
3828 support timing or playing with the match limit or callout data. */
3829
3830 #if !defined NOPOSIX
3831 if (posix || do_posix)
3832 {
3833 int rc;
3834 int eflags = 0;
3835 regmatch_t *pmatch = NULL;
3836 if (use_size_offsets > 0)
3837 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3838 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3839 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3840 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3841
3842 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3843
3844 if (rc != 0)
3845 {
3846 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3847 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3848 }
3849 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3850 != 0)
3851 {
3852 fprintf(outfile, "Matched with REG_NOSUB\n");
3853 }
3854 else
3855 {
3856 size_t i;
3857 for (i = 0; i < (size_t)use_size_offsets; i++)
3858 {
3859 if (pmatch[i].rm_so >= 0)
3860 {
3861 fprintf(outfile, "%2d: ", (int)i);
3862 PCHARSV(dbuffer, pmatch[i].rm_so,
3863 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3864 fprintf(outfile, "\n");
3865 if (do_showcaprest || (i == 0 && do_showrest))
3866 {
3867 fprintf(outfile, "%2d+ ", (int)i);
3868 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3869 outfile);
3870 fprintf(outfile, "\n");
3871 }
3872 }
3873 }
3874 }
3875 free(pmatch);
3876 goto NEXT_DATA;
3877 }
3878
3879 #endif /* !defined NOPOSIX */
3880
3881 /* Handle matching via the native interface - repeats for /g and /G */
3882
3883 #ifdef SUPPORT_PCRE16
3884 if (use_pcre16)
3885 {
3886 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3887 switch(len)
3888 {
3889 case -1:
3890 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3891 "converted to UTF-16\n");
3892 goto NEXT_DATA;
3893
3894 case -2:
3895 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3896 "cannot be converted to UTF-16\n");
3897 goto NEXT_DATA;
3898
3899 case -3:
3900 fprintf(outfile, "**Failed: character value greater than 0xffff "
3901 "cannot be converted to 16-bit in non-UTF mode\n");
3902 goto NEXT_DATA;
3903
3904 default:
3905 break;
3906 }
3907 bptr = (pcre_uint8 *)buffer16;
3908 }
3909 #endif
3910
3911 /* Ensure that there is a JIT callback if we want to verify that JIT was
3912 actually used. If jit_stack == NULL, no stack has yet been assigned. */
3913
3914 if (verify_jit && jit_stack == NULL && extra != NULL)
3915 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3916
3917 for (;; gmatched++) /* Loop for /g or /G */
3918 {
3919 markptr = NULL;
3920 jit_was_used = FALSE;
3921
3922 if (timeitm > 0)
3923 {
3924 register int i;
3925 clock_t time_taken;
3926 clock_t start_time = clock();
3927
3928 #if !defined NODFA
3929 if (all_use_dfa || use_dfa)
3930 {
3931 int workspace[1000];
3932 for (i = 0; i < timeitm; i++)
3933 {
3934 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3935 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3936 (sizeof(workspace)/sizeof(int)));
3937 }
3938 }
3939 else
3940 #endif
3941
3942 for (i = 0; i < timeitm; i++)
3943 {
3944 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3945 (options | g_notempty), use_offsets, use_size_offsets);
3946 }
3947 time_taken = clock() - start_time;
3948 fprintf(outfile, "Execute time %.4f milliseconds\n",
3949 (((double)time_taken * 1000.0) / (double)timeitm) /
3950 (double)CLOCKS_PER_SEC);
3951 }
3952
3953 /* If find_match_limit is set, we want to do repeated matches with
3954 varying limits in order to find the minimum value for the match limit and
3955 for the recursion limit. The match limits are relevant only to the normal
3956 running of pcre_exec(), so disable the JIT optimization. This makes it
3957 possible to run the same set of tests with and without JIT externally
3958 requested. */
3959
3960 if (find_match_limit)
3961 {
3962 if (extra == NULL)
3963 {
3964 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3965 extra->flags = 0;
3966 }
3967 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3968
3969 (void)check_match_limit(re, extra, bptr, len, start_offset,
3970 options|g_notempty, use_offsets, use_size_offsets,
3971 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3972 PCRE_ERROR_MATCHLIMIT, "match()");
3973
3974 count = check_match_limit(re, extra, bptr, len, start_offset,
3975 options|g_notempty, use_offsets, use_size_offsets,
3976 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3977 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3978 }
3979
3980 /* If callout_data is set, use the interface with additional data */
3981
3982 else if (callout_data_set)
3983 {
3984 if (extra == NULL)
3985 {
3986 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3987 extra->flags = 0;
3988 }
3989 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3990 extra->callout_data = &callout_data;
3991 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3992 options | g_notempty, use_offsets, use_size_offsets);
3993 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3994 }
3995
3996 /* The normal case is just to do the match once, with the default
3997 value of match_limit. */
3998
3999 #if !defined NODFA
4000 else if (all_use_dfa || use_dfa)
4001 {
4002 int workspace[1000];
4003 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4004 (options | g_notempty), use_offsets, use_size_offsets, workspace,
4005 (sizeof(workspace)/sizeof(int)));
4006 if (count == 0)
4007 {
4008 fprintf(outfile, "Matched, but too many subsidiary matches\n");
4009 count = use_size_offsets/2;
4010 }
4011 }
4012 #endif
4013
4014 else
4015 {
4016 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4017 options | g_notempty, use_offsets, use_size_offsets);
4018 if (count == 0)
4019 {
4020 fprintf(outfile, "Matched, but too many substrings\n");
4021 count = use_size_offsets/3;
4022 }
4023 }
4024
4025 /* Matched */
4026
4027 if (count >= 0)
4028 {
4029 int i, maxcount;
4030 void *cnptr, *gnptr;
4031
4032 #if !defined NODFA
4033 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4034 #endif
4035 maxcount = use_size_offsets/3;
4036
4037 /* This is a check against a lunatic return value. */
4038
4039 if (count > maxcount)
4040 {
4041 fprintf(outfile,
4042 "** PCRE error: returned count %d is too big for offset size %d\n",
4043 count, use_size_offsets);
4044 count = use_size_offsets/3;
4045 if (do_g || do_G)
4046 {
4047 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4048 do_g = do_G = FALSE; /* Break g/G loop */
4049 }
4050 }
4051
4052 /* do_allcaps requests showing of all captures in the pattern, to check
4053 unset ones at the end. */
4054
4055 if (do_allcaps)
4056 {
4057 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4058 goto SKIP_DATA;
4059 count++; /* Allow for full match */
4060 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4061 }
4062
4063 /* Output the captured substrings */
4064
4065 for (i = 0; i < count * 2; i += 2)
4066 {
4067 if (use_offsets[i] < 0)
4068 {
4069 if (use_offsets[i] != -1)
4070 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4071 use_offsets[i], i);
4072 if (use_offsets[i+1] != -1)
4073 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4074 use_offsets[i+1], i+1);
4075 fprintf(outfile, "%2d: <unset>\n", i/2);
4076 }
4077 else
4078 {
4079 fprintf(outfile, "%2d: ", i/2);
4080 PCHARSV(bptr, use_offsets[i],
4081 use_offsets[i+1] - use_offsets[i], outfile);
4082 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4083 fprintf(outfile, "\n");
4084 if (do_showcaprest || (i == 0 && do_showrest))
4085 {
4086 fprintf(outfile, "%2d+ ", i/2);
4087 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4088 outfile);
4089 fprintf(outfile, "\n");
4090 }
4091 }
4092 }
4093
4094 if (markptr != NULL)
4095 {
4096 fprintf(outfile, "MK: ");
4097 PCHARSV(markptr, 0, -1, outfile);
4098 fprintf(outfile, "\n");
4099 }
4100
4101 for (i = 0; i < 32; i++)
4102 {
4103 if ((copystrings & (1 << i)) != 0)
4104 {
4105 int rc;
4106 char copybuffer[256];
4107 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4108 copybuffer, sizeof(copybuffer));
4109 if (rc < 0)
4110 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4111 else
4112 {
4113 fprintf(outfile, "%2dC ", i);
4114 PCHARSV(copybuffer, 0, rc, outfile);
4115 fprintf(outfile, " (%d)\n", rc);
4116 }
4117 }
4118 }
4119
4120 cnptr = copynames;
4121 for (;;)
4122 {
4123 int rc;
4124 char copybuffer[256];
4125
4126 if (use_pcre16)
4127 {
4128 if (*(pcre_uint16 *)cnptr == 0) break;
4129 }
4130 else
4131 {
4132 if (*(pcre_uint8 *)cnptr == 0) break;
4133 }
4134
4135 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4136 cnptr, copybuffer, sizeof(copybuffer));
4137
4138 if (rc < 0)
4139 {
4140 fprintf(outfile, "copy substring ");
4141 PCHARSV(cnptr, 0, -1, outfile);
4142 fprintf(outfile, " failed %d\n", rc);
4143 }
4144 else
4145 {
4146 fprintf(outfile, " C ");
4147 PCHARSV(copybuffer, 0, rc, outfile);
4148 fprintf(outfile, " (%d) ", rc);
4149 PCHARSV(cnptr, 0, -1, outfile);
4150 putc('\n', outfile);
4151 }
4152
4153 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4154 }
4155
4156 for (i = 0; i < 32; i++)
4157 {
4158 if ((getstrings & (1 << i)) != 0)
4159 {
4160 int rc;
4161 const char *substring;
4162 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4163 if (rc < 0)
4164 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4165 else
4166 {
4167 fprintf(outfile, "%2dG ", i);
4168 PCHARSV(substring, 0, rc, outfile);
4169 fprintf(outfile, " (%d)\n", rc);
4170 PCRE_FREE_SUBSTRING(substring);
4171 }
4172 }
4173 }
4174
4175 gnptr = getnames;
4176 for (;;)
4177 {
4178 int rc;
4179 const char *substring;
4180
4181 if (use_pcre16)
4182 {
4183 if (*(pcre_uint16 *)gnptr == 0) break;
4184 }
4185 else
4186 {
4187 if (*(pcre_uint8 *)gnptr == 0) break;
4188 }
4189
4190 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4191 gnptr, &substring);
4192 if (rc < 0)
4193 {
4194 fprintf(outfile, "get substring ");
4195 PCHARSV(gnptr, 0, -1, outfile);
4196 fprintf(outfile, " failed %d\n", rc);
4197 }
4198 else
4199 {
4200 fprintf(outfile, " G ");
4201 PCHARSV(substring, 0, rc, outfile);
4202 fprintf(outfile, " (%d) ", rc);
4203 PCHARSV(gnptr, 0, -1, outfile);
4204 PCRE_FREE_SUBSTRING(substring);
4205 putc('\n', outfile);
4206 }
4207
4208 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4209 }
4210
4211 if (getlist)
4212 {
4213 int rc;
4214 const char **stringlist;
4215 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4216 if (rc < 0)
4217 fprintf(outfile, "get substring list failed %d\n", rc);
4218 else
4219 {
4220 for (i = 0; i < count; i++)
4221 {
4222 fprintf(outfile, "%2dL ", i);
4223 PCHARSV(stringlist[i], 0, -1, outfile);
4224 putc('\n', outfile);
4225 }
4226 if (stringlist[i] != NULL)
4227 fprintf(outfile, "string list not terminated by NULL\n");
4228 PCRE_FREE_SUBSTRING_LIST(stringlist);
4229 }
4230 }
4231 }
4232
4233 /* There was a partial match */
4234
4235 else if (count == PCRE_ERROR_PARTIAL)
4236 {
4237 if (markptr == NULL) fprintf(outfile, "Partial match");
4238 else
4239 {
4240 fprintf(outfile, "Partial match, mark=");
4241 PCHARSV(markptr, 0, -1, outfile);
4242 }
4243 if (use_size_offsets > 1)
4244 {
4245 fprintf(outfile, ": ");
4246 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4247 outfile);
4248 }
4249 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4250 fprintf(outfile, "\n");
4251 break; /* Out of the /g loop */
4252 }
4253
4254 /* Failed to match. If this is a /g or /G loop and we previously set
4255 g_notempty after a null match, this is not necessarily the end. We want
4256 to advance the start offset, and continue. We won't be at the end of the
4257 string - that was checked before setting g_notempty.
4258
4259 Complication arises in the case when the newline convention is "any",
4260 "crlf", or "anycrlf". If the previous match was at the end of a line
4261 terminated by CRLF, an advance of one character just passes the \r,
4262 whereas we should prefer the longer newline sequence, as does the code in
4263 pcre_exec(). Fudge the offset value to achieve this. We check for a
4264 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4265 find the default.
4266
4267 Otherwise, in the case of UTF-8 matching, the advance must be one
4268 character, not one byte. */
4269
4270 else
4271 {
4272 if (g_notempty != 0)
4273 {
4274 int onechar = 1;
4275 unsigned int obits = ((REAL_PCRE *)re)->options;
4276 use_offsets[0] = start_offset;
4277 if ((obits & PCRE_NEWLINE_BITS) == 0)
4278 {
4279 int d;
4280 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4281 /* Note that these values are always the ASCII ones, even in
4282 EBCDIC environments. CR = 13, NL = 10. */
4283 obits = (d == 13)? PCRE_NEWLINE_CR :
4284 (d == 10)? PCRE_NEWLINE_LF :
4285 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4286 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4287 (d == -1)? PCRE_NEWLINE_ANY : 0;
4288 }
4289 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4290 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4291 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4292 &&
4293 start_offset < len - 1 &&
4294 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4295 (use_pcre16?
4296 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4297 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4298 :
4299 bptr[start_offset] == '\r'
4300 && bptr[start_offset + 1] == '\n')
4301 #elif defined SUPPORT_PCRE16
4302 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4303 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4304 #else
4305 bptr[start_offset] == '\r'
4306 && bptr[start_offset + 1] == '\n'
4307 #endif
4308 )
4309 onechar++;
4310 else if (use_utf)
4311 {
4312 while (start_offset + onechar < len)
4313 {
4314 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4315 onechar++;
4316 }
4317 }
4318 use_offsets[1] = start_offset + onechar;
4319 }
4320 else
4321 {
4322 switch(count)
4323 {
4324 case PCRE_ERROR_NOMATCH:
4325 if (gmatched == 0)
4326 {
4327 if (markptr == NULL)
4328 {
4329 fprintf(outfile, "No match");
4330 }
4331 else
4332 {
4333 fprintf(outfile, "No match, mark = ");
4334 PCHARSV(markptr, 0, -1, outfile);
4335 }
4336 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4337 putc('\n', outfile);
4338 }
4339 break;
4340
4341 case PCRE_ERROR_BADUTF8:
4342 case PCRE_ERROR_SHORTUTF8:
4343 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4344 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4345 use_pcre16? "16" : "8");
4346 if (use_size_offsets >= 2)
4347 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4348 use_offsets[1]);
4349 fprintf(outfile, "\n");
4350 break;
4351
4352 case PCRE_ERROR_BADUTF8_OFFSET:
4353 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4354 use_pcre16? "16" : "8");
4355 break;
4356
4357 default:
4358 if (count < 0 &&
4359 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4360 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4361 else
4362 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4363 break;
4364 }
4365
4366 break; /* Out of the /g loop */
4367 }
4368 }
4369
4370 /* If not /g or /G we are done */
4371
4372 if (!do_g && !do_G) break;
4373
4374 /* If we have matched an empty string, first check to see if we are at
4375 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4376 Perl's /g options does. This turns out to be rather cunning. First we set
4377 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4378 same point. If this fails (picked up above) we advance to the next
4379 character. */
4380
4381 g_notempty = 0;
4382
4383 if (use_offsets[0] == use_offsets[1])
4384 {
4385 if (use_offsets[0] == len) break;
4386 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4387 }
4388
4389 /* For /g, update the start offset, leaving the rest alone */
4390
4391 if (do_g) start_offset = use_offsets[1];
4392
4393 /* For /G, update the pointer and length */
4394
4395 else
4396 {
4397 bptr += use_offsets[1] * CHAR_SIZE;
4398 len -= use_offsets[1];
4399 }
4400 } /* End of loop for /g and /G */
4401
4402 NEXT_DATA: continue;
4403 } /* End of loop for data lines */
4404
4405 CONTINUE:
4406
4407 #if !defined NOPOSIX
4408 if (posix || do_posix) regfree(&preg);
4409 #endif
4410
4411 if (re != NULL) new_free(re);
4412 if (extra != NULL)
4413 {
4414 PCRE_FREE_STUDY(extra);
4415 }
4416 if (locale_set)
4417 {
4418 new_free((void *)tables);
4419 setlocale(LC_CTYPE, "C");
4420 locale_set = 0;
4421 }
4422 if (jit_stack != NULL)
4423 {
4424 PCRE_JIT_STACK_FREE(jit_stack);
4425 jit_stack = NULL;
4426 }
4427 }
4428
4429 if (infile == stdin) fprintf(outfile, "\n");
4430
4431 EXIT:
4432
4433 if (infile != NULL && infile != stdin) fclose(infile);
4434 if (outfile != NULL && outfile != stdout) fclose(outfile);
4435
4436 free(buffer);
4437 free(dbuffer);
4438 free(pbuffer);
4439 free(offsets);
4440
4441 #ifdef SUPPORT_PCRE16
4442 if (buffer16 != NULL) free(buffer16);
4443 #endif
4444
4445 return yield;
4446 }
4447
4448 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12