/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 841 - (show annotations) (download)
Sat Dec 31 07:04:43 2011 UTC (2 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 128846 byte(s)
use unsigned short for 16 bit strings and a minor fix
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define SET_PCRE_CALLOUT8(callout) \
213 pcre_callout = callout
214
215 #define STRLEN8(p) ((int)strlen((char *)p))
216
217
218 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219 re = pcre_compile((char *)pat, options, error, erroffset, tables)
220
221 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222 namesptr, cbuffer, size) \
223 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224 (char *)namesptr, cbuffer, size)
225
226 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228
229 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230 offsets, size_offsets, workspace, size_workspace) \
231 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace)
233
234 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235 offsets, size_offsets) \
236 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237 offsets, size_offsets)
238
239 #define PCRE_FREE_STUDY8(extra) \
240 pcre_free_study(extra)
241
242 #define PCRE_FREE_SUBSTRING8(substring) \
243 pcre_free_substring(substring)
244
245 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246 pcre_free_substring_list(listptr)
247
248 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249 getnamesptr, subsptr) \
250 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251 (char *)getnamesptr, subsptr)
252
253 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254 n = pcre_get_stringnumber(re, (char *)ptr)
255
256 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258
259 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261
262 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
263 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
264
265 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266 pcre_printint(re, outfile, debug_lengths)
267
268 #define PCRE_STUDY8(extra, re, options, error) \
269 extra = pcre_study(re, options, error)
270
271 #endif /* SUPPORT_PCRE8 */
272
273 /* -----------------------------------------------------------*/
274
275 #ifdef SUPPORT_PCRE16
276
277 #define PCHARS16(lv, p, offset, len, f) \
278 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279
280 #define PCHARSV16(p, offset, len, f) \
281 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282
283 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284 p = read_capture_name16(p, cn16, re)
285
286 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287
288 #define SET_PCRE_CALLOUT16(callout) \
289 pcre16_callout = callout
290
291
292 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294
295 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296 namesptr, cbuffer, size) \
297 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299
300 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302 (PCRE_SCHAR16 *)cbuffer, size/2)
303
304 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305 offsets, size_offsets, workspace, size_workspace) \
306 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307 options, offsets, size_offsets, workspace, size_workspace)
308
309 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310 offsets, size_offsets) \
311 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312 options, offsets, size_offsets)
313
314 #define PCRE_FREE_STUDY16(extra) \
315 pcre16_free_study(extra)
316
317 #define PCRE_FREE_SUBSTRING16(substring) \
318 pcre16_free_substring((PCRE_SPTR16)substring)
319
320 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322
323 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324 getnamesptr, subsptr) \
325 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327
328 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330
331 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333 (PCRE_SPTR16 *)(void*)subsptr)
334
335 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337 (PCRE_SPTR16 **)(void*)listptr)
338
339 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
340 rc = pcre16_pattern_to_host_byte_order(re, extra, tables)
341
342 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343 pcre16_printint(re, outfile, debug_lengths)
344
345 #define PCRE_STUDY16(extra, re, options, error) \
346 extra = pcre16_study(re, options, error)
347
348 #endif /* SUPPORT_PCRE16 */
349
350
351 /* ----- Both modes are supported; a runtime test is needed, except for
352 pcre_config(), and the JIT stack functions, when it doesn't matter which
353 version is called. ----- */
354
355 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356
357 #define CHAR_SIZE (use_pcre16? 2:1)
358
359 #define PCHARS(lv, p, offset, len, f) \
360 if (use_pcre16) \
361 PCHARS16(lv, p, offset, len, f); \
362 else \
363 PCHARS8(lv, p, offset, len, f)
364
365 #define PCHARSV(p, offset, len, f) \
366 if (use_pcre16) \
367 PCHARSV16(p, offset, len, f); \
368 else \
369 PCHARSV8(p, offset, len, f)
370
371 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372 if (use_pcre16) \
373 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374 else \
375 READ_CAPTURE_NAME8(p, cn8, cn16, re)
376
377 #define SET_PCRE_CALLOUT(callout) \
378 if (use_pcre16) \
379 SET_PCRE_CALLOUT16(callout); \
380 else \
381 SET_PCRE_CALLOUT8(callout)
382
383 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384
385 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386
387 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388 if (use_pcre16) \
389 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390 else \
391 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392
393 #define PCRE_CONFIG pcre_config
394
395 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396 namesptr, cbuffer, size) \
397 if (use_pcre16) \
398 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399 namesptr, cbuffer, size); \
400 else \
401 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402 namesptr, cbuffer, size)
403
404 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405 if (use_pcre16) \
406 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407 else \
408 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409
410 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411 offsets, size_offsets, workspace, size_workspace) \
412 if (use_pcre16) \
413 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414 offsets, size_offsets, workspace, size_workspace); \
415 else \
416 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417 offsets, size_offsets, workspace, size_workspace)
418
419 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420 offsets, size_offsets) \
421 if (use_pcre16) \
422 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423 offsets, size_offsets); \
424 else \
425 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426 offsets, size_offsets)
427
428 #define PCRE_FREE_STUDY(extra) \
429 if (use_pcre16) \
430 PCRE_FREE_STUDY16(extra); \
431 else \
432 PCRE_FREE_STUDY8(extra)
433
434 #define PCRE_FREE_SUBSTRING(substring) \
435 if (use_pcre16) \
436 PCRE_FREE_SUBSTRING16(substring); \
437 else \
438 PCRE_FREE_SUBSTRING8(substring)
439
440 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441 if (use_pcre16) \
442 PCRE_FREE_SUBSTRING_LIST16(listptr); \
443 else \
444 PCRE_FREE_SUBSTRING_LIST8(listptr)
445
446 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr) \
448 if (use_pcre16) \
449 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450 getnamesptr, subsptr); \
451 else \
452 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr)
454
455 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456 if (use_pcre16) \
457 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458 else \
459 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460
461 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462 if (use_pcre16) \
463 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464 else \
465 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466
467 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468 if (use_pcre16) \
469 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470 else \
471 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472
473 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475
476 #define PCRE_MAKETABLES \
477 (use_pcre16? pcre16_maketables() : pcre_maketables())
478
479 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
480 if (use_pcre16) \
481 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
482 else \
483 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
484
485 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486 if (use_pcre16) \
487 PCRE_PRINTINT16(re, outfile, debug_lengths); \
488 else \
489 PCRE_PRINTINT8(re, outfile, debug_lengths)
490
491 #define PCRE_STUDY(extra, re, options, error) \
492 if (use_pcre16) \
493 PCRE_STUDY16(extra, re, options, error); \
494 else \
495 PCRE_STUDY8(extra, re, options, error)
496
497 /* ----- Only 8-bit mode is supported ----- */
498
499 #elif defined SUPPORT_PCRE8
500 #define CHAR_SIZE 1
501 #define PCHARS PCHARS8
502 #define PCHARSV PCHARSV8
503 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
504 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
505 #define STRLEN STRLEN8
506 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
507 #define PCRE_COMPILE PCRE_COMPILE8
508 #define PCRE_CONFIG pcre_config
509 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
511 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
512 #define PCRE_EXEC PCRE_EXEC8
513 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
514 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
515 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
516 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
517 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
518 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
519 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
520 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
521 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
522 #define PCRE_MAKETABLES pcre_maketables()
523 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524 #define PCRE_PRINTINT PCRE_PRINTINT8
525 #define PCRE_STUDY PCRE_STUDY8
526
527 /* ----- Only 16-bit mode is supported ----- */
528
529 #else
530 #define CHAR_SIZE 2
531 #define PCHARS PCHARS16
532 #define PCHARSV PCHARSV16
533 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
534 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
535 #define STRLEN STRLEN16
536 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
537 #define PCRE_COMPILE PCRE_COMPILE16
538 #define PCRE_CONFIG pcre16_config
539 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
541 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
542 #define PCRE_EXEC PCRE_EXEC16
543 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
544 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
545 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
546 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
547 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
548 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
549 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
550 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
551 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
552 #define PCRE_MAKETABLES pcre16_maketables()
553 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554 #define PCRE_PRINTINT PCRE_PRINTINT16
555 #define PCRE_STUDY PCRE_STUDY16
556 #endif
557
558 /* ----- End of mode-specific function call macros ----- */
559
560
561 /* Other parameters */
562
563 #ifndef CLOCKS_PER_SEC
564 #ifdef CLK_TCK
565 #define CLOCKS_PER_SEC CLK_TCK
566 #else
567 #define CLOCKS_PER_SEC 100
568 #endif
569 #endif
570
571 /* This is the default loop count for timing. */
572
573 #define LOOPREPEAT 500000
574
575 /* Static variables */
576
577 static FILE *outfile;
578 static int log_store = 0;
579 static int callout_count;
580 static int callout_extra;
581 static int callout_fail_count;
582 static int callout_fail_id;
583 static int debug_lengths;
584 static int first_callout;
585 static int locale_set = 0;
586 static int show_malloc;
587 static int use_utf;
588 static size_t gotten_store;
589 static size_t first_gotten_store = 0;
590 static const unsigned char *last_callout_mark = NULL;
591
592 /* The buffers grow automatically if very long input lines are encountered. */
593
594 static int buffer_size = 50000;
595 static pcre_uint8 *buffer = NULL;
596 static pcre_uint8 *dbuffer = NULL;
597 static pcre_uint8 *pbuffer = NULL;
598
599 /* Another buffer is needed translation to 16-bit character strings. It will
600 obtained and extended as required. */
601
602 #ifdef SUPPORT_PCRE16
603 static int buffer16_size = 0;
604 static pcre_uint16 *buffer16 = NULL;
605
606 #ifdef SUPPORT_PCRE8
607
608 /* We need the table of operator lengths that is used for 16-bit compiling, in
609 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611 appropriately for the 16-bit world. Just as a safety check, make sure that
612 COMPILE_PCRE16 is *not* set. */
613
614 #ifdef COMPILE_PCRE16
615 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616 #endif
617
618 #if LINK_SIZE == 2
619 #undef LINK_SIZE
620 #define LINK_SIZE 1
621 #elif LINK_SIZE == 3 || LINK_SIZE == 4
622 #undef LINK_SIZE
623 #define LINK_SIZE 2
624 #else
625 #error LINK_SIZE must be either 2, 3, or 4
626 #endif
627
628 #undef IMM2_SIZE
629 #define IMM2_SIZE 1
630
631 #endif /* SUPPORT_PCRE8 */
632
633 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
634 #endif /* SUPPORT_PCRE16 */
635
636 /* If we have 8-bit support, default use_pcre16 to false; if there is also
637 16-bit support, it can be changed by an option. If there is no 8-bit support,
638 there must be 16-bit support, so default it to 1. */
639
640 #ifdef SUPPORT_PCRE8
641 static int use_pcre16 = 0;
642 #else
643 static int use_pcre16 = 1;
644 #endif
645
646 /* Textual explanations for runtime error codes */
647
648 static const char *errtexts[] = {
649 NULL, /* 0 is no error */
650 NULL, /* NOMATCH is handled specially */
651 "NULL argument passed",
652 "bad option value",
653 "magic number missing",
654 "unknown opcode - pattern overwritten?",
655 "no more memory",
656 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
657 "match limit exceeded",
658 "callout error code",
659 NULL, /* BADUTF8/16 is handled specially */
660 NULL, /* BADUTF8/16 offset is handled specially */
661 NULL, /* PARTIAL is handled specially */
662 "not used - internal error",
663 "internal error - pattern overwritten?",
664 "bad count value",
665 "item unsupported for DFA matching",
666 "backreference condition or recursion test not supported for DFA matching",
667 "match limit not supported for DFA matching",
668 "workspace size exceeded in DFA matching",
669 "too much recursion for DFA matching",
670 "recursion limit exceeded",
671 "not used - internal error",
672 "invalid combination of newline options",
673 "bad offset value",
674 NULL, /* SHORTUTF8/16 is handled specially */
675 "nested recursion at the same subject position",
676 "JIT stack limit reached",
677 "pattern compiled in wrong mode: 8-bit/16-bit error"
678 };
679
680
681 /*************************************************
682 * Alternate character tables *
683 *************************************************/
684
685 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
686 using the default tables of the library. However, the T option can be used to
687 select alternate sets of tables, for different kinds of testing. Note also that
688 the L (locale) option also adjusts the tables. */
689
690 /* This is the set of tables distributed as default with PCRE. It recognizes
691 only ASCII characters. */
692
693 static const pcre_uint8 tables0[] = {
694
695 /* This table is a lower casing table. */
696
697 0, 1, 2, 3, 4, 5, 6, 7,
698 8, 9, 10, 11, 12, 13, 14, 15,
699 16, 17, 18, 19, 20, 21, 22, 23,
700 24, 25, 26, 27, 28, 29, 30, 31,
701 32, 33, 34, 35, 36, 37, 38, 39,
702 40, 41, 42, 43, 44, 45, 46, 47,
703 48, 49, 50, 51, 52, 53, 54, 55,
704 56, 57, 58, 59, 60, 61, 62, 63,
705 64, 97, 98, 99,100,101,102,103,
706 104,105,106,107,108,109,110,111,
707 112,113,114,115,116,117,118,119,
708 120,121,122, 91, 92, 93, 94, 95,
709 96, 97, 98, 99,100,101,102,103,
710 104,105,106,107,108,109,110,111,
711 112,113,114,115,116,117,118,119,
712 120,121,122,123,124,125,126,127,
713 128,129,130,131,132,133,134,135,
714 136,137,138,139,140,141,142,143,
715 144,145,146,147,148,149,150,151,
716 152,153,154,155,156,157,158,159,
717 160,161,162,163,164,165,166,167,
718 168,169,170,171,172,173,174,175,
719 176,177,178,179,180,181,182,183,
720 184,185,186,187,188,189,190,191,
721 192,193,194,195,196,197,198,199,
722 200,201,202,203,204,205,206,207,
723 208,209,210,211,212,213,214,215,
724 216,217,218,219,220,221,222,223,
725 224,225,226,227,228,229,230,231,
726 232,233,234,235,236,237,238,239,
727 240,241,242,243,244,245,246,247,
728 248,249,250,251,252,253,254,255,
729
730 /* This table is a case flipping table. */
731
732 0, 1, 2, 3, 4, 5, 6, 7,
733 8, 9, 10, 11, 12, 13, 14, 15,
734 16, 17, 18, 19, 20, 21, 22, 23,
735 24, 25, 26, 27, 28, 29, 30, 31,
736 32, 33, 34, 35, 36, 37, 38, 39,
737 40, 41, 42, 43, 44, 45, 46, 47,
738 48, 49, 50, 51, 52, 53, 54, 55,
739 56, 57, 58, 59, 60, 61, 62, 63,
740 64, 97, 98, 99,100,101,102,103,
741 104,105,106,107,108,109,110,111,
742 112,113,114,115,116,117,118,119,
743 120,121,122, 91, 92, 93, 94, 95,
744 96, 65, 66, 67, 68, 69, 70, 71,
745 72, 73, 74, 75, 76, 77, 78, 79,
746 80, 81, 82, 83, 84, 85, 86, 87,
747 88, 89, 90,123,124,125,126,127,
748 128,129,130,131,132,133,134,135,
749 136,137,138,139,140,141,142,143,
750 144,145,146,147,148,149,150,151,
751 152,153,154,155,156,157,158,159,
752 160,161,162,163,164,165,166,167,
753 168,169,170,171,172,173,174,175,
754 176,177,178,179,180,181,182,183,
755 184,185,186,187,188,189,190,191,
756 192,193,194,195,196,197,198,199,
757 200,201,202,203,204,205,206,207,
758 208,209,210,211,212,213,214,215,
759 216,217,218,219,220,221,222,223,
760 224,225,226,227,228,229,230,231,
761 232,233,234,235,236,237,238,239,
762 240,241,242,243,244,245,246,247,
763 248,249,250,251,252,253,254,255,
764
765 /* This table contains bit maps for various character classes. Each map is 32
766 bytes long and the bits run from the least significant end of each byte. The
767 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
768 graph, print, punct, and cntrl. Other classes are built from combinations. */
769
770 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
771 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
772 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774
775 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
776 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
777 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779
780 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
781 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784
785 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
787 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789
790 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
792 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794
795 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
796 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
797 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799
800 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
801 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
802 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804
805 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
806 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
807 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809
810 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
811 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
812 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814
815 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
816 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819
820 /* This table identifies various classes of character by individual bits:
821 0x01 white space character
822 0x02 letter
823 0x04 decimal digit
824 0x08 hexadecimal digit
825 0x10 alphanumeric or '_'
826 0x80 regular expression metacharacter or binary zero
827 */
828
829 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
830 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
831 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
832 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
833 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
834 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
835 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
836 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
837 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
838 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
839 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
840 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
841 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
842 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
843 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
844 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
857 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
858 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
859 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
861
862 /* This is a set of tables that came orginally from a Windows user. It seems to
863 be at least an approximation of ISO 8859. In particular, there are characters
864 greater than 128 that are marked as spaces, letters, etc. */
865
866 static const pcre_uint8 tables1[] = {
867 0,1,2,3,4,5,6,7,
868 8,9,10,11,12,13,14,15,
869 16,17,18,19,20,21,22,23,
870 24,25,26,27,28,29,30,31,
871 32,33,34,35,36,37,38,39,
872 40,41,42,43,44,45,46,47,
873 48,49,50,51,52,53,54,55,
874 56,57,58,59,60,61,62,63,
875 64,97,98,99,100,101,102,103,
876 104,105,106,107,108,109,110,111,
877 112,113,114,115,116,117,118,119,
878 120,121,122,91,92,93,94,95,
879 96,97,98,99,100,101,102,103,
880 104,105,106,107,108,109,110,111,
881 112,113,114,115,116,117,118,119,
882 120,121,122,123,124,125,126,127,
883 128,129,130,131,132,133,134,135,
884 136,137,138,139,140,141,142,143,
885 144,145,146,147,148,149,150,151,
886 152,153,154,155,156,157,158,159,
887 160,161,162,163,164,165,166,167,
888 168,169,170,171,172,173,174,175,
889 176,177,178,179,180,181,182,183,
890 184,185,186,187,188,189,190,191,
891 224,225,226,227,228,229,230,231,
892 232,233,234,235,236,237,238,239,
893 240,241,242,243,244,245,246,215,
894 248,249,250,251,252,253,254,223,
895 224,225,226,227,228,229,230,231,
896 232,233,234,235,236,237,238,239,
897 240,241,242,243,244,245,246,247,
898 248,249,250,251,252,253,254,255,
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,65,66,67,68,69,70,71,
912 72,73,74,75,76,77,78,79,
913 80,81,82,83,84,85,86,87,
914 88,89,90,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 192,193,194,195,196,197,198,199,
928 200,201,202,203,204,205,206,207,
929 208,209,210,211,212,213,214,247,
930 216,217,218,219,220,221,222,255,
931 0,62,0,0,1,0,0,0,
932 0,0,0,0,0,0,0,0,
933 32,0,0,0,1,0,0,0,
934 0,0,0,0,0,0,0,0,
935 0,0,0,0,0,0,255,3,
936 126,0,0,0,126,0,0,0,
937 0,0,0,0,0,0,0,0,
938 0,0,0,0,0,0,0,0,
939 0,0,0,0,0,0,255,3,
940 0,0,0,0,0,0,0,0,
941 0,0,0,0,0,0,12,2,
942 0,0,0,0,0,0,0,0,
943 0,0,0,0,0,0,0,0,
944 254,255,255,7,0,0,0,0,
945 0,0,0,0,0,0,0,0,
946 255,255,127,127,0,0,0,0,
947 0,0,0,0,0,0,0,0,
948 0,0,0,0,254,255,255,7,
949 0,0,0,0,0,4,32,4,
950 0,0,0,128,255,255,127,255,
951 0,0,0,0,0,0,255,3,
952 254,255,255,135,254,255,255,7,
953 0,0,0,0,0,4,44,6,
954 255,255,127,255,255,255,127,255,
955 0,0,0,0,254,255,255,255,
956 255,255,255,255,255,255,255,127,
957 0,0,0,0,254,255,255,255,
958 255,255,255,255,255,255,255,255,
959 0,2,0,0,255,255,255,255,
960 255,255,255,255,255,255,255,127,
961 0,0,0,0,255,255,255,255,
962 255,255,255,255,255,255,255,255,
963 0,0,0,0,254,255,0,252,
964 1,0,0,248,1,0,0,120,
965 0,0,0,0,254,255,255,255,
966 0,0,128,0,0,0,128,0,
967 255,255,255,255,0,0,0,0,
968 0,0,0,0,0,0,0,128,
969 255,255,255,255,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 128,0,0,0,0,0,0,0,
972 0,1,1,0,1,1,0,0,
973 0,0,0,0,0,0,0,0,
974 0,0,0,0,0,0,0,0,
975 1,0,0,0,128,0,0,0,
976 128,128,128,128,0,0,128,0,
977 28,28,28,28,28,28,28,28,
978 28,28,0,0,0,0,0,128,
979 0,26,26,26,26,26,26,18,
980 18,18,18,18,18,18,18,18,
981 18,18,18,18,18,18,18,18,
982 18,18,18,128,128,0,128,16,
983 0,26,26,26,26,26,26,18,
984 18,18,18,18,18,18,18,18,
985 18,18,18,18,18,18,18,18,
986 18,18,18,128,128,0,0,0,
987 0,0,0,0,0,1,0,0,
988 0,0,0,0,0,0,0,0,
989 0,0,0,0,0,0,0,0,
990 0,0,0,0,0,0,0,0,
991 1,0,0,0,0,0,0,0,
992 0,0,18,0,0,0,0,0,
993 0,0,20,20,0,18,0,0,
994 0,20,18,0,0,0,0,0,
995 18,18,18,18,18,18,18,18,
996 18,18,18,18,18,18,18,18,
997 18,18,18,18,18,18,18,0,
998 18,18,18,18,18,18,18,18,
999 18,18,18,18,18,18,18,18,
1000 18,18,18,18,18,18,18,18,
1001 18,18,18,18,18,18,18,0,
1002 18,18,18,18,18,18,18,18
1003 };
1004
1005
1006
1007
1008 #ifndef HAVE_STRERROR
1009 /*************************************************
1010 * Provide strerror() for non-ANSI libraries *
1011 *************************************************/
1012
1013 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1014 in their libraries, but can provide the same facility by this simple
1015 alternative function. */
1016
1017 extern int sys_nerr;
1018 extern char *sys_errlist[];
1019
1020 char *
1021 strerror(int n)
1022 {
1023 if (n < 0 || n >= sys_nerr) return "unknown error number";
1024 return sys_errlist[n];
1025 }
1026 #endif /* HAVE_STRERROR */
1027
1028
1029 /*************************************************
1030 * JIT memory callback *
1031 *************************************************/
1032
1033 static pcre_jit_stack* jit_callback(void *arg)
1034 {
1035 return (pcre_jit_stack *)arg;
1036 }
1037
1038
1039 #if !defined NOUTF || defined SUPPORT_PCRE16
1040 /*************************************************
1041 * Convert UTF-8 string to value *
1042 *************************************************/
1043
1044 /* This function takes one or more bytes that represents a UTF-8 character,
1045 and returns the value of the character.
1046
1047 Argument:
1048 utf8bytes a pointer to the byte vector
1049 vptr a pointer to an int to receive the value
1050
1051 Returns: > 0 => the number of bytes consumed
1052 -6 to 0 => malformed UTF-8 character at offset = (-return)
1053 */
1054
1055 static int
1056 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1057 {
1058 int c = *utf8bytes++;
1059 int d = c;
1060 int i, j, s;
1061
1062 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1063 {
1064 if ((d & 0x80) == 0) break;
1065 d <<= 1;
1066 }
1067
1068 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1069 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1070
1071 /* i now has a value in the range 1-5 */
1072
1073 s = 6*i;
1074 d = (c & utf8_table3[i]) << s;
1075
1076 for (j = 0; j < i; j++)
1077 {
1078 c = *utf8bytes++;
1079 if ((c & 0xc0) != 0x80) return -(j+1);
1080 s -= 6;
1081 d |= (c & 0x3f) << s;
1082 }
1083
1084 /* Check that encoding was the correct unique one */
1085
1086 for (j = 0; j < utf8_table1_size; j++)
1087 if (d <= utf8_table1[j]) break;
1088 if (j != i) return -(i+1);
1089
1090 /* Valid value */
1091
1092 *vptr = d;
1093 return i+1;
1094 }
1095 #endif /* NOUTF || SUPPORT_PCRE16 */
1096
1097
1098
1099 #if !defined NOUTF || defined SUPPORT_PCRE16
1100 /*************************************************
1101 * Convert character value to UTF-8 *
1102 *************************************************/
1103
1104 /* This function takes an integer value in the range 0 - 0x7fffffff
1105 and encodes it as a UTF-8 character in 0 to 6 bytes.
1106
1107 Arguments:
1108 cvalue the character value
1109 utf8bytes pointer to buffer for result - at least 6 bytes long
1110
1111 Returns: number of characters placed in the buffer
1112 */
1113
1114 static int
1115 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1116 {
1117 register int i, j;
1118 for (i = 0; i < utf8_table1_size; i++)
1119 if (cvalue <= utf8_table1[i]) break;
1120 utf8bytes += i;
1121 for (j = i; j > 0; j--)
1122 {
1123 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1124 cvalue >>= 6;
1125 }
1126 *utf8bytes = utf8_table2[i] | cvalue;
1127 return i + 1;
1128 }
1129 #endif /* NOUTF || SUPPORT_PCRE16 */
1130
1131
1132
1133 #ifdef SUPPORT_PCRE16
1134 /*************************************************
1135 * Convert a string to 16-bit *
1136 *************************************************/
1137
1138 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1139 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1140 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1141 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1142 result is always left in buffer16.
1143
1144 Note that this function does not object to surrogate values. This is
1145 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1146 for the purpose of testing that they are correctly faulted.
1147
1148 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1149 in UTF-8 so that values greater than 255 can be handled.
1150
1151 Arguments:
1152 data TRUE if converting a data line; FALSE for a regex
1153 p points to a byte string
1154 utf true if UTF-8 (to be converted to UTF-16)
1155 len number of bytes in the string (excluding trailing zero)
1156
1157 Returns: number of 16-bit data items used (excluding trailing zero)
1158 OR -1 if a UTF-8 string is malformed
1159 OR -2 if a value > 0x10ffff is encountered
1160 OR -3 if a value > 0xffff is encountered when not in UTF mode
1161 */
1162
1163 static int
1164 to16(int data, pcre_uint8 *p, int utf, int len)
1165 {
1166 pcre_uint16 *pp;
1167
1168 if (buffer16_size < 2*len + 2)
1169 {
1170 if (buffer16 != NULL) free(buffer16);
1171 buffer16_size = 2*len + 2;
1172 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1173 if (buffer16 == NULL)
1174 {
1175 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1176 exit(1);
1177 }
1178 }
1179
1180 pp = buffer16;
1181
1182 if (!utf && !data)
1183 {
1184 while (len-- > 0) *pp++ = *p++;
1185 }
1186
1187 else
1188 {
1189 int c = 0;
1190 while (len > 0)
1191 {
1192 int chlen = utf82ord(p, &c);
1193 if (chlen <= 0) return -1;
1194 if (c > 0x10ffff) return -2;
1195 p += chlen;
1196 len -= chlen;
1197 if (c < 0x10000) *pp++ = c; else
1198 {
1199 if (!utf) return -3;
1200 c -= 0x10000;
1201 *pp++ = 0xD800 | (c >> 10);
1202 *pp++ = 0xDC00 | (c & 0x3ff);
1203 }
1204 }
1205 }
1206
1207 *pp = 0;
1208 return pp - buffer16;
1209 }
1210 #endif
1211
1212
1213 /*************************************************
1214 * Read or extend an input line *
1215 *************************************************/
1216
1217 /* Input lines are read into buffer, but both patterns and data lines can be
1218 continued over multiple input lines. In addition, if the buffer fills up, we
1219 want to automatically expand it so as to be able to handle extremely large
1220 lines that are needed for certain stress tests. When the input buffer is
1221 expanded, the other two buffers must also be expanded likewise, and the
1222 contents of pbuffer, which are a copy of the input for callouts, must be
1223 preserved (for when expansion happens for a data line). This is not the most
1224 optimal way of handling this, but hey, this is just a test program!
1225
1226 Arguments:
1227 f the file to read
1228 start where in buffer to start (this *must* be within buffer)
1229 prompt for stdin or readline()
1230
1231 Returns: pointer to the start of new data
1232 could be a copy of start, or could be moved
1233 NULL if no data read and EOF reached
1234 */
1235
1236 static pcre_uint8 *
1237 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1238 {
1239 pcre_uint8 *here = start;
1240
1241 for (;;)
1242 {
1243 int rlen = (int)(buffer_size - (here - buffer));
1244
1245 if (rlen > 1000)
1246 {
1247 int dlen;
1248
1249 /* If libreadline support is required, use readline() to read a line if the
1250 input is a terminal. Note that readline() removes the trailing newline, so
1251 we must put it back again, to be compatible with fgets(). */
1252
1253 #ifdef SUPPORT_LIBREADLINE
1254 if (isatty(fileno(f)))
1255 {
1256 size_t len;
1257 char *s = readline(prompt);
1258 if (s == NULL) return (here == start)? NULL : start;
1259 len = strlen(s);
1260 if (len > 0) add_history(s);
1261 if (len > rlen - 1) len = rlen - 1;
1262 memcpy(here, s, len);
1263 here[len] = '\n';
1264 here[len+1] = 0;
1265 free(s);
1266 }
1267 else
1268 #endif
1269
1270 /* Read the next line by normal means, prompting if the file is stdin. */
1271
1272 {
1273 if (f == stdin) printf("%s", prompt);
1274 if (fgets((char *)here, rlen, f) == NULL)
1275 return (here == start)? NULL : start;
1276 }
1277
1278 dlen = (int)strlen((char *)here);
1279 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1280 here += dlen;
1281 }
1282
1283 else
1284 {
1285 int new_buffer_size = 2*buffer_size;
1286 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1287 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1288 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1289
1290 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1291 {
1292 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1293 exit(1);
1294 }
1295
1296 memcpy(new_buffer, buffer, buffer_size);
1297 memcpy(new_pbuffer, pbuffer, buffer_size);
1298
1299 buffer_size = new_buffer_size;
1300
1301 start = new_buffer + (start - buffer);
1302 here = new_buffer + (here - buffer);
1303
1304 free(buffer);
1305 free(dbuffer);
1306 free(pbuffer);
1307
1308 buffer = new_buffer;
1309 dbuffer = new_dbuffer;
1310 pbuffer = new_pbuffer;
1311 }
1312 }
1313
1314 return NULL; /* Control never gets here */
1315 }
1316
1317
1318
1319 /*************************************************
1320 * Read number from string *
1321 *************************************************/
1322
1323 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1324 around with conditional compilation, just do the job by hand. It is only used
1325 for unpicking arguments, so just keep it simple.
1326
1327 Arguments:
1328 str string to be converted
1329 endptr where to put the end pointer
1330
1331 Returns: the unsigned long
1332 */
1333
1334 static int
1335 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1336 {
1337 int result = 0;
1338 while(*str != 0 && isspace(*str)) str++;
1339 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1340 *endptr = str;
1341 return(result);
1342 }
1343
1344
1345
1346 /*************************************************
1347 * Print one character *
1348 *************************************************/
1349
1350 /* Print a single character either literally, or as a hex escape. */
1351
1352 static int pchar(int c, FILE *f)
1353 {
1354 if (PRINTOK(c))
1355 {
1356 if (f != NULL) fprintf(f, "%c", c);
1357 return 1;
1358 }
1359
1360 if (c < 0x100)
1361 {
1362 if (use_utf)
1363 {
1364 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1365 return 6;
1366 }
1367 else
1368 {
1369 if (f != NULL) fprintf(f, "\\x%02x", c);
1370 return 4;
1371 }
1372 }
1373
1374 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1375 return (c <= 0x000000ff)? 6 :
1376 (c <= 0x00000fff)? 7 :
1377 (c <= 0x0000ffff)? 8 :
1378 (c <= 0x000fffff)? 9 : 10;
1379 }
1380
1381
1382
1383 #ifdef SUPPORT_PCRE8
1384 /*************************************************
1385 * Print 8-bit character string *
1386 *************************************************/
1387
1388 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1389 If handed a NULL file, just counts chars without printing. */
1390
1391 static int pchars(pcre_uint8 *p, int length, FILE *f)
1392 {
1393 int c = 0;
1394 int yield = 0;
1395
1396 if (length < 0)
1397 length = strlen((char *)p);
1398
1399 while (length-- > 0)
1400 {
1401 #if !defined NOUTF
1402 if (use_utf)
1403 {
1404 int rc = utf82ord(p, &c);
1405 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1406 {
1407 length -= rc - 1;
1408 p += rc;
1409 yield += pchar(c, f);
1410 continue;
1411 }
1412 }
1413 #endif
1414 c = *p++;
1415 yield += pchar(c, f);
1416 }
1417
1418 return yield;
1419 }
1420 #endif
1421
1422
1423
1424 #ifdef SUPPORT_PCRE16
1425 /*************************************************
1426 * Find length of 0-terminated 16-bit string *
1427 *************************************************/
1428
1429 static int strlen16(PCRE_SPTR16 p)
1430 {
1431 int len = 0;
1432 while (*p++ != 0) len++;
1433 return len;
1434 }
1435 #endif /* SUPPORT_PCRE16 */
1436
1437
1438 #ifdef SUPPORT_PCRE16
1439 /*************************************************
1440 * Print 16-bit character string *
1441 *************************************************/
1442
1443 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1444 If handed a NULL file, just counts chars without printing. */
1445
1446 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1447 {
1448 int yield = 0;
1449
1450 if (length < 0)
1451 length = strlen16(p);
1452
1453 while (length-- > 0)
1454 {
1455 int c = *p++ & 0xffff;
1456 #if !defined NOUTF
1457 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1458 {
1459 int d = *p & 0xffff;
1460 if (d >= 0xDC00 && d < 0xDFFF)
1461 {
1462 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1463 length--;
1464 p++;
1465 }
1466 }
1467 #endif
1468 yield += pchar(c, f);
1469 }
1470
1471 return yield;
1472 }
1473 #endif /* SUPPORT_PCRE16 */
1474
1475
1476
1477 #ifdef SUPPORT_PCRE8
1478 /*************************************************
1479 * Read a capture name (8-bit) and check it *
1480 *************************************************/
1481
1482 static pcre_uint8 *
1483 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1484 {
1485 pcre_uint8 *npp = *pp;
1486 while (isalnum(*p)) *npp++ = *p++;
1487 *npp++ = 0;
1488 *npp = 0;
1489 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1490 {
1491 fprintf(outfile, "no parentheses with name \"");
1492 PCHARSV(*pp, 0, -1, outfile);
1493 fprintf(outfile, "\"\n");
1494 }
1495
1496 *pp = npp;
1497 return p;
1498 }
1499 #endif /* SUPPORT_PCRE8 */
1500
1501
1502
1503 #ifdef SUPPORT_PCRE16
1504 /*************************************************
1505 * Read a capture name (16-bit) and check it *
1506 *************************************************/
1507
1508 /* Note that the text being read is 8-bit. */
1509
1510 static pcre_uint8 *
1511 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1512 {
1513 pcre_uint16 *npp = *pp;
1514 while (isalnum(*p)) *npp++ = *p++;
1515 *npp++ = 0;
1516 *npp = 0;
1517 if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1518 {
1519 fprintf(outfile, "no parentheses with name \"");
1520 PCHARSV(*pp, 0, -1, outfile);
1521 fprintf(outfile, "\"\n");
1522 }
1523 *pp = npp;
1524 return p;
1525 }
1526 #endif /* SUPPORT_PCRE16 */
1527
1528
1529
1530 /*************************************************
1531 * Callout function *
1532 *************************************************/
1533
1534 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1535 the match. Yield zero unless more callouts than the fail count, or the callout
1536 data is not zero. */
1537
1538 static int callout(pcre_callout_block *cb)
1539 {
1540 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1541 int i, pre_start, post_start, subject_length;
1542
1543 if (callout_extra)
1544 {
1545 fprintf(f, "Callout %d: last capture = %d\n",
1546 cb->callout_number, cb->capture_last);
1547
1548 for (i = 0; i < cb->capture_top * 2; i += 2)
1549 {
1550 if (cb->offset_vector[i] < 0)
1551 fprintf(f, "%2d: <unset>\n", i/2);
1552 else
1553 {
1554 fprintf(f, "%2d: ", i/2);
1555 PCHARSV(cb->subject, cb->offset_vector[i],
1556 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1557 fprintf(f, "\n");
1558 }
1559 }
1560 }
1561
1562 /* Re-print the subject in canonical form, the first time or if giving full
1563 datails. On subsequent calls in the same match, we use pchars just to find the
1564 printed lengths of the substrings. */
1565
1566 if (f != NULL) fprintf(f, "--->");
1567
1568 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1569 PCHARS(post_start, cb->subject, cb->start_match,
1570 cb->current_position - cb->start_match, f);
1571
1572 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1573
1574 PCHARSV(cb->subject, cb->current_position,
1575 cb->subject_length - cb->current_position, f);
1576
1577 if (f != NULL) fprintf(f, "\n");
1578
1579 /* Always print appropriate indicators, with callout number if not already
1580 shown. For automatic callouts, show the pattern offset. */
1581
1582 if (cb->callout_number == 255)
1583 {
1584 fprintf(outfile, "%+3d ", cb->pattern_position);
1585 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1586 }
1587 else
1588 {
1589 if (callout_extra) fprintf(outfile, " ");
1590 else fprintf(outfile, "%3d ", cb->callout_number);
1591 }
1592
1593 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1594 fprintf(outfile, "^");
1595
1596 if (post_start > 0)
1597 {
1598 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1599 fprintf(outfile, "^");
1600 }
1601
1602 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1603 fprintf(outfile, " ");
1604
1605 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1606 pbuffer + cb->pattern_position);
1607
1608 fprintf(outfile, "\n");
1609 first_callout = 0;
1610
1611 if (cb->mark != last_callout_mark)
1612 {
1613 if (cb->mark == NULL)
1614 fprintf(outfile, "Latest Mark: <unset>\n");
1615 else
1616 {
1617 fprintf(outfile, "Latest Mark: ");
1618 PCHARSV(cb->mark, 0, -1, outfile);
1619 putc('\n', outfile);
1620 }
1621 last_callout_mark = cb->mark;
1622 }
1623
1624 if (cb->callout_data != NULL)
1625 {
1626 int callout_data = *((int *)(cb->callout_data));
1627 if (callout_data != 0)
1628 {
1629 fprintf(outfile, "Callout data = %d\n", callout_data);
1630 return callout_data;
1631 }
1632 }
1633
1634 return (cb->callout_number != callout_fail_id)? 0 :
1635 (++callout_count >= callout_fail_count)? 1 : 0;
1636 }
1637
1638
1639 /*************************************************
1640 * Local malloc functions *
1641 *************************************************/
1642
1643 /* Alternative malloc function, to test functionality and save the size of a
1644 compiled re, which is the first store request that pcre_compile() makes. The
1645 show_malloc variable is set only during matching. */
1646
1647 static void *new_malloc(size_t size)
1648 {
1649 void *block = malloc(size);
1650 gotten_store = size;
1651 if (first_gotten_store == 0) first_gotten_store = size;
1652 if (show_malloc)
1653 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1654 return block;
1655 }
1656
1657 static void new_free(void *block)
1658 {
1659 if (show_malloc)
1660 fprintf(outfile, "free %p\n", block);
1661 free(block);
1662 }
1663
1664 /* For recursion malloc/free, to test stacking calls */
1665
1666 static void *stack_malloc(size_t size)
1667 {
1668 void *block = malloc(size);
1669 if (show_malloc)
1670 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1671 return block;
1672 }
1673
1674 static void stack_free(void *block)
1675 {
1676 if (show_malloc)
1677 fprintf(outfile, "stack_free %p\n", block);
1678 free(block);
1679 }
1680
1681
1682 /*************************************************
1683 * Call pcre_fullinfo() *
1684 *************************************************/
1685
1686 /* Get one piece of information from the pcre_fullinfo() function. When only
1687 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1688 value, but the code is defensive.
1689
1690 Arguments:
1691 re compiled regex
1692 study study data
1693 option PCRE_INFO_xxx option
1694 ptr where to put the data
1695
1696 Returns: 0 when OK, < 0 on error
1697 */
1698
1699 static int
1700 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1701 {
1702 int rc;
1703
1704 if (use_pcre16)
1705 #ifdef SUPPORT_PCRE16
1706 rc = pcre16_fullinfo(re, study, option, ptr);
1707 #else
1708 rc = PCRE_ERROR_BADMODE;
1709 #endif
1710 else
1711 #ifdef SUPPORT_PCRE8
1712 rc = pcre_fullinfo(re, study, option, ptr);
1713 #else
1714 rc = PCRE_ERROR_BADMODE;
1715 #endif
1716
1717 if (rc < 0)
1718 {
1719 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1720 use_pcre16? "16" : "", option);
1721 if (rc == PCRE_ERROR_BADMODE)
1722 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1723 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1724 }
1725
1726 return rc;
1727 }
1728
1729
1730
1731 /*************************************************
1732 * Swap byte functions *
1733 *************************************************/
1734
1735 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1736 value, respectively.
1737
1738 Arguments:
1739 value any number
1740
1741 Returns: the byte swapped value
1742 */
1743
1744 static pcre_uint32
1745 swap_uint32(pcre_uint32 value)
1746 {
1747 return ((value & 0x000000ff) << 24) |
1748 ((value & 0x0000ff00) << 8) |
1749 ((value & 0x00ff0000) >> 8) |
1750 (value >> 24);
1751 }
1752
1753 static pcre_uint16
1754 swap_uint16(pcre_uint16 value)
1755 {
1756 return (value >> 8) | (value << 8);
1757 }
1758
1759
1760
1761 /*************************************************
1762 * Flip bytes in a compiled pattern *
1763 *************************************************/
1764
1765 /* This function is called if the 'F' option was present on a pattern that is
1766 to be written to a file. We flip the bytes of all the integer fields in the
1767 regex data block and the study block. In 16-bit mode this also flips relevant
1768 bytes in the pattern itself. This is to make it possible to test PCRE's
1769 ability to reload byte-flipped patterns, e.g. those compiled on a different
1770 architecture. */
1771
1772 static void
1773 regexflip(pcre *ere, pcre_extra *extra)
1774 {
1775 real_pcre *re = (real_pcre *)ere;
1776 #ifdef SUPPORT_PCRE16
1777 int op;
1778 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1779 int length = re->name_count * re->name_entry_size;
1780 #ifdef SUPPORT_UTF
1781 BOOL utf = (re->options & PCRE_UTF16) != 0;
1782 BOOL utf16_char = FALSE;
1783 #endif /* SUPPORT_UTF */
1784 #endif /* SUPPORT_PCRE16 */
1785
1786 /* Always flip the bytes in the main data block and study blocks. */
1787
1788 re->magic_number = REVERSED_MAGIC_NUMBER;
1789 re->size = swap_uint32(re->size);
1790 re->options = swap_uint32(re->options);
1791 re->flags = swap_uint16(re->flags);
1792 re->top_bracket = swap_uint16(re->top_bracket);
1793 re->top_backref = swap_uint16(re->top_backref);
1794 re->first_char = swap_uint16(re->first_char);
1795 re->req_char = swap_uint16(re->req_char);
1796 re->name_table_offset = swap_uint16(re->name_table_offset);
1797 re->name_entry_size = swap_uint16(re->name_entry_size);
1798 re->name_count = swap_uint16(re->name_count);
1799
1800 if (extra != NULL)
1801 {
1802 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1803 rsd->size = swap_uint32(rsd->size);
1804 rsd->flags = swap_uint32(rsd->flags);
1805 rsd->minlength = swap_uint32(rsd->minlength);
1806 }
1807
1808 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1809 in the name table, if present, and then in the pattern itself. */
1810
1811 #ifdef SUPPORT_PCRE16
1812 if (!use_pcre16) return;
1813
1814 while(TRUE)
1815 {
1816 /* Swap previous characters. */
1817 while (length-- > 0)
1818 {
1819 *ptr = swap_uint16(*ptr);
1820 ptr++;
1821 }
1822 #ifdef SUPPORT_UTF
1823 if (utf16_char)
1824 {
1825 if ((ptr[-1] & 0xfc00) == 0xd800)
1826 {
1827 /* We know that there is only one extra character in UTF-16. */
1828 *ptr = swap_uint16(*ptr);
1829 ptr++;
1830 }
1831 }
1832 utf16_char = FALSE;
1833 #endif /* SUPPORT_UTF */
1834
1835 /* Get next opcode. */
1836
1837 length = 0;
1838 op = *ptr;
1839 *ptr++ = swap_uint16(op);
1840
1841 switch (op)
1842 {
1843 case OP_END:
1844 return;
1845
1846 #ifdef SUPPORT_UTF
1847 case OP_CHAR:
1848 case OP_CHARI:
1849 case OP_NOT:
1850 case OP_NOTI:
1851 case OP_STAR:
1852 case OP_MINSTAR:
1853 case OP_PLUS:
1854 case OP_MINPLUS:
1855 case OP_QUERY:
1856 case OP_MINQUERY:
1857 case OP_UPTO:
1858 case OP_MINUPTO:
1859 case OP_EXACT:
1860 case OP_POSSTAR:
1861 case OP_POSPLUS:
1862 case OP_POSQUERY:
1863 case OP_POSUPTO:
1864 case OP_STARI:
1865 case OP_MINSTARI:
1866 case OP_PLUSI:
1867 case OP_MINPLUSI:
1868 case OP_QUERYI:
1869 case OP_MINQUERYI:
1870 case OP_UPTOI:
1871 case OP_MINUPTOI:
1872 case OP_EXACTI:
1873 case OP_POSSTARI:
1874 case OP_POSPLUSI:
1875 case OP_POSQUERYI:
1876 case OP_POSUPTOI:
1877 case OP_NOTSTAR:
1878 case OP_NOTMINSTAR:
1879 case OP_NOTPLUS:
1880 case OP_NOTMINPLUS:
1881 case OP_NOTQUERY:
1882 case OP_NOTMINQUERY:
1883 case OP_NOTUPTO:
1884 case OP_NOTMINUPTO:
1885 case OP_NOTEXACT:
1886 case OP_NOTPOSSTAR:
1887 case OP_NOTPOSPLUS:
1888 case OP_NOTPOSQUERY:
1889 case OP_NOTPOSUPTO:
1890 case OP_NOTSTARI:
1891 case OP_NOTMINSTARI:
1892 case OP_NOTPLUSI:
1893 case OP_NOTMINPLUSI:
1894 case OP_NOTQUERYI:
1895 case OP_NOTMINQUERYI:
1896 case OP_NOTUPTOI:
1897 case OP_NOTMINUPTOI:
1898 case OP_NOTEXACTI:
1899 case OP_NOTPOSSTARI:
1900 case OP_NOTPOSPLUSI:
1901 case OP_NOTPOSQUERYI:
1902 case OP_NOTPOSUPTOI:
1903 if (utf) utf16_char = TRUE;
1904 #endif
1905 /* Fall through. */
1906
1907 default:
1908 length = OP_lengths16[op] - 1;
1909 break;
1910
1911 case OP_CLASS:
1912 case OP_NCLASS:
1913 /* Skip the character bit map. */
1914 ptr += 32/sizeof(pcre_uint16);
1915 length = 0;
1916 break;
1917
1918 case OP_XCLASS:
1919 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1920 if (LINK_SIZE > 1)
1921 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1922 - (1 + LINK_SIZE + 1));
1923 else
1924 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1925
1926 /* Reverse the size of the XCLASS instance. */
1927 *ptr = swap_uint16(*ptr);
1928 ptr++;
1929 if (LINK_SIZE > 1)
1930 {
1931 *ptr = swap_uint16(*ptr);
1932 ptr++;
1933 }
1934
1935 op = *ptr;
1936 *ptr = swap_uint16(op);
1937 ptr++;
1938 if ((op & XCL_MAP) != 0)
1939 {
1940 /* Skip the character bit map. */
1941 ptr += 32/sizeof(pcre_uint16);
1942 length -= 32/sizeof(pcre_uint16);
1943 }
1944 break;
1945 }
1946 }
1947 /* Control should never reach here in 16 bit mode. */
1948 #endif /* SUPPORT_PCRE16 */
1949 }
1950
1951
1952
1953 /*************************************************
1954 * Check match or recursion limit *
1955 *************************************************/
1956
1957 static int
1958 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1959 int start_offset, int options, int *use_offsets, int use_size_offsets,
1960 int flag, unsigned long int *limit, int errnumber, const char *msg)
1961 {
1962 int count;
1963 int min = 0;
1964 int mid = 64;
1965 int max = -1;
1966
1967 extra->flags |= flag;
1968
1969 for (;;)
1970 {
1971 *limit = mid;
1972
1973 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1974 use_offsets, use_size_offsets);
1975
1976 if (count == errnumber)
1977 {
1978 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1979 min = mid;
1980 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1981 }
1982
1983 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1984 count == PCRE_ERROR_PARTIAL)
1985 {
1986 if (mid == min + 1)
1987 {
1988 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1989 break;
1990 }
1991 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1992 max = mid;
1993 mid = (min + mid)/2;
1994 }
1995 else break; /* Some other error */
1996 }
1997
1998 extra->flags &= ~flag;
1999 return count;
2000 }
2001
2002
2003
2004 /*************************************************
2005 * Case-independent strncmp() function *
2006 *************************************************/
2007
2008 /*
2009 Arguments:
2010 s first string
2011 t second string
2012 n number of characters to compare
2013
2014 Returns: < 0, = 0, or > 0, according to the comparison
2015 */
2016
2017 static int
2018 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2019 {
2020 while (n--)
2021 {
2022 int c = tolower(*s++) - tolower(*t++);
2023 if (c) return c;
2024 }
2025 return 0;
2026 }
2027
2028
2029
2030 /*************************************************
2031 * Check newline indicator *
2032 *************************************************/
2033
2034 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2035 a message and return 0 if there is no match.
2036
2037 Arguments:
2038 p points after the leading '<'
2039 f file for error message
2040
2041 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2042 */
2043
2044 static int
2045 check_newline(pcre_uint8 *p, FILE *f)
2046 {
2047 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2048 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2049 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2050 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2051 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2052 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2053 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2054 fprintf(f, "Unknown newline type at: <%s\n", p);
2055 return 0;
2056 }
2057
2058
2059
2060 /*************************************************
2061 * Usage function *
2062 *************************************************/
2063
2064 static void
2065 usage(void)
2066 {
2067 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2068 printf("Input and output default to stdin and stdout.\n");
2069 #ifdef SUPPORT_LIBREADLINE
2070 printf("If input is a terminal, readline() is used to read from it.\n");
2071 #else
2072 printf("This version of pcretest is not linked with readline().\n");
2073 #endif
2074 printf("\nOptions:\n");
2075 #ifdef SUPPORT_PCRE16
2076 printf(" -16 use 16-bit interface\n");
2077 #endif
2078 printf(" -b show compiled code (bytecode)\n");
2079 printf(" -C show PCRE compile-time options and exit\n");
2080 printf(" -C arg show a specific compile-time option\n");
2081 printf(" and exit with its value. The arg can be:\n");
2082 printf(" linksize internal link size [2, 3, 4]\n");
2083 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2084 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2085 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2086 printf(" ucp Unicode Properties supported [0, 1]\n");
2087 printf(" jit Just-in-time compiler supported [0, 1]\n");
2088 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2089 printf(" -d debug: show compiled code and information (-b and -i)\n");
2090 #if !defined NODFA
2091 printf(" -dfa force DFA matching for all subjects\n");
2092 #endif
2093 printf(" -help show usage information\n");
2094 printf(" -i show information about compiled patterns\n"
2095 " -M find MATCH_LIMIT minimum for each subject\n"
2096 " -m output memory used information\n"
2097 " -o <n> set size of offsets vector to <n>\n");
2098 #if !defined NOPOSIX
2099 printf(" -p use POSIX interface\n");
2100 #endif
2101 printf(" -q quiet: do not output PCRE version number at start\n");
2102 printf(" -S <n> set stack size to <n> megabytes\n");
2103 printf(" -s force each pattern to be studied at basic level\n"
2104 " -s+ force each pattern to be studied, using JIT if available\n"
2105 " -t time compilation and execution\n");
2106 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2107 printf(" -tm time execution (matching) only\n");
2108 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2109 }
2110
2111
2112
2113 /*************************************************
2114 * Main Program *
2115 *************************************************/
2116
2117 /* Read lines from named file or stdin and write to named file or stdout; lines
2118 consist of a regular expression, in delimiters and optionally followed by
2119 options, followed by a set of test data, terminated by an empty line. */
2120
2121 int main(int argc, char **argv)
2122 {
2123 FILE *infile = stdin;
2124 const char *version;
2125 int options = 0;
2126 int study_options = 0;
2127 int default_find_match_limit = FALSE;
2128 int op = 1;
2129 int timeit = 0;
2130 int timeitm = 0;
2131 int showinfo = 0;
2132 int showstore = 0;
2133 int force_study = -1;
2134 int force_study_options = 0;
2135 int quiet = 0;
2136 int size_offsets = 45;
2137 int size_offsets_max;
2138 int *offsets = NULL;
2139 #if !defined NOPOSIX
2140 int posix = 0;
2141 #endif
2142 int debug = 0;
2143 int done = 0;
2144 int all_use_dfa = 0;
2145 int yield = 0;
2146 int stack_size;
2147
2148 pcre_jit_stack *jit_stack = NULL;
2149
2150 /* These vectors store, end-to-end, a list of zero-terminated captured
2151 substring names, each list itself being terminated by an empty name. Assume
2152 that 1024 is plenty long enough for the few names we'll be testing. It is
2153 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2154 for the actual memory, to ensure alignment. By defining these variables always
2155 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2156 #ifdefs in the code. */
2157
2158 pcre_uint16 copynames[1024];
2159 pcre_uint16 getnames[1024];
2160
2161 pcre_uint16 *cn16ptr;
2162 pcre_uint16 *gn16ptr;
2163
2164 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2165 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2166 pcre_uint8 *cn8ptr;
2167 pcre_uint8 *gn8ptr;
2168
2169 /* Get buffers from malloc() so that valgrind will check their misuse when
2170 debugging. They grow automatically when very long lines are read. The 16-bit
2171 buffer (buffer16) is obtained only if needed. */
2172
2173 buffer = (pcre_uint8 *)malloc(buffer_size);
2174 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2175 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2176
2177 /* The outfile variable is static so that new_malloc can use it. */
2178
2179 outfile = stdout;
2180
2181 /* The following _setmode() stuff is some Windows magic that tells its runtime
2182 library to translate CRLF into a single LF character. At least, that's what
2183 I've been told: never having used Windows I take this all on trust. Originally
2184 it set 0x8000, but then I was advised that _O_BINARY was better. */
2185
2186 #if defined(_WIN32) || defined(WIN32)
2187 _setmode( _fileno( stdout ), _O_BINARY );
2188 #endif
2189
2190 /* Get the version number: both pcre_version() and pcre16_version() give the
2191 same answer. We just need to ensure that we call one that is available. */
2192
2193 #ifdef SUPPORT_PCRE8
2194 version = pcre_version();
2195 #else
2196 version = pcre16_version();
2197 #endif
2198
2199 /* Scan options */
2200
2201 while (argc > 1 && argv[op][0] == '-')
2202 {
2203 pcre_uint8 *endptr;
2204
2205 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2206 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2207 else if (strcmp(argv[op], "-s+") == 0)
2208 {
2209 force_study = 1;
2210 force_study_options = PCRE_STUDY_JIT_COMPILE;
2211 }
2212 else if (strcmp(argv[op], "-16") == 0)
2213 {
2214 #ifdef SUPPORT_PCRE16
2215 use_pcre16 = 1;
2216 #else
2217 printf("** This version of PCRE was built without 16-bit support\n");
2218 exit(1);
2219 #endif
2220 }
2221 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2222 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2223 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2224 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2225 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2226 #if !defined NODFA
2227 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2228 #endif
2229 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2230 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2231 *endptr == 0))
2232 {
2233 op++;
2234 argc--;
2235 }
2236 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2237 {
2238 int both = argv[op][2] == 0;
2239 int temp;
2240 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2241 *endptr == 0))
2242 {
2243 timeitm = temp;
2244 op++;
2245 argc--;
2246 }
2247 else timeitm = LOOPREPEAT;
2248 if (both) timeit = timeitm;
2249 }
2250 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2251 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2252 *endptr == 0))
2253 {
2254 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2255 printf("PCRE: -S not supported on this OS\n");
2256 exit(1);
2257 #else
2258 int rc;
2259 struct rlimit rlim;
2260 getrlimit(RLIMIT_STACK, &rlim);
2261 rlim.rlim_cur = stack_size * 1024 * 1024;
2262 rc = setrlimit(RLIMIT_STACK, &rlim);
2263 if (rc != 0)
2264 {
2265 printf("PCRE: setrlimit() failed with error %d\n", rc);
2266 exit(1);
2267 }
2268 op++;
2269 argc--;
2270 #endif
2271 }
2272 #if !defined NOPOSIX
2273 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2274 #endif
2275 else if (strcmp(argv[op], "-C") == 0)
2276 {
2277 int rc;
2278 unsigned long int lrc;
2279
2280 if (argc > 2)
2281 {
2282 if (strcmp(argv[op + 1], "linksize") == 0)
2283 {
2284 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2285 printf("%d\n", rc);
2286 yield = rc;
2287 goto EXIT;
2288 }
2289 if (strcmp(argv[op + 1], "pcre8") == 0)
2290 {
2291 #ifdef SUPPORT_PCRE8
2292 printf("1\n");
2293 yield = 1;
2294 #else
2295 printf("0\n");
2296 yield = 0;
2297 #endif
2298 goto EXIT;
2299 }
2300 if (strcmp(argv[op + 1], "pcre16") == 0)
2301 {
2302 #ifdef SUPPORT_PCRE16
2303 printf("1\n");
2304 yield = 1;
2305 #else
2306 printf("0\n");
2307 yield = 0;
2308 #endif
2309 goto EXIT;
2310 }
2311 if (strcmp(argv[op + 1], "utf") == 0)
2312 {
2313 #ifdef SUPPORT_PCRE8
2314 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2315 printf("%d\n", rc);
2316 yield = rc;
2317 #else
2318 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2319 printf("%d\n", rc);
2320 yield = rc;
2321 #endif
2322 goto EXIT;
2323 }
2324 if (strcmp(argv[op + 1], "ucp") == 0)
2325 {
2326 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2327 printf("%d\n", rc);
2328 yield = rc;
2329 goto EXIT;
2330 }
2331 if (strcmp(argv[op + 1], "jit") == 0)
2332 {
2333 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2334 printf("%d\n", rc);
2335 yield = rc;
2336 goto EXIT;
2337 }
2338 if (strcmp(argv[op + 1], "newline") == 0)
2339 {
2340 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2341 /* Note that these values are always the ASCII values, even
2342 in EBCDIC environments. CR is 13 and NL is 10. */
2343 printf("%s\n", (rc == 13)? "CR" :
2344 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2345 (rc == -2)? "ANYCRLF" :
2346 (rc == -1)? "ANY" : "???");
2347 goto EXIT;
2348 }
2349 printf("Unknown -C option: %s\n", argv[op + 1]);
2350 goto EXIT;
2351 }
2352
2353 printf("PCRE version %s\n", version);
2354 printf("Compiled with\n");
2355
2356 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2357 are set, either both UTFs are supported or both are not supported. */
2358
2359 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2360 printf(" 8-bit and 16-bit support\n");
2361 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2362 if (rc)
2363 printf(" UTF-8 and UTF-16 support\n");
2364 else
2365 printf(" No UTF-8 or UTF-16 support\n");
2366 #elif defined SUPPORT_PCRE8
2367 printf(" 8-bit support only\n");
2368 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2369 printf(" %sUTF-8 support\n", rc? "" : "No ");
2370 #else
2371 printf(" 16-bit support only\n");
2372 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2373 printf(" %sUTF-16 support\n", rc? "" : "No ");
2374 #endif
2375
2376 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2377 printf(" %sUnicode properties support\n", rc? "" : "No ");
2378 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2379 if (rc)
2380 printf(" Just-in-time compiler support\n");
2381 else
2382 printf(" No just-in-time compiler support\n");
2383 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2384 /* Note that these values are always the ASCII values, even
2385 in EBCDIC environments. CR is 13 and NL is 10. */
2386 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2387 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2388 (rc == -2)? "ANYCRLF" :
2389 (rc == -1)? "ANY" : "???");
2390 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2391 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2392 "all Unicode newlines");
2393 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2394 printf(" Internal link size = %d\n", rc);
2395 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2396 printf(" POSIX malloc threshold = %d\n", rc);
2397 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2398 printf(" Default match limit = %ld\n", lrc);
2399 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2400 printf(" Default recursion depth limit = %ld\n", lrc);
2401 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2402 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2403 goto EXIT;
2404 }
2405 else if (strcmp(argv[op], "-help") == 0 ||
2406 strcmp(argv[op], "--help") == 0)
2407 {
2408 usage();
2409 goto EXIT;
2410 }
2411 else
2412 {
2413 printf("** Unknown or malformed option %s\n", argv[op]);
2414 usage();
2415 yield = 1;
2416 goto EXIT;
2417 }
2418 op++;
2419 argc--;
2420 }
2421
2422 /* Get the store for the offsets vector, and remember what it was */
2423
2424 size_offsets_max = size_offsets;
2425 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2426 if (offsets == NULL)
2427 {
2428 printf("** Failed to get %d bytes of memory for offsets vector\n",
2429 (int)(size_offsets_max * sizeof(int)));
2430 yield = 1;
2431 goto EXIT;
2432 }
2433
2434 /* Sort out the input and output files */
2435
2436 if (argc > 1)
2437 {
2438 infile = fopen(argv[op], INPUT_MODE);
2439 if (infile == NULL)
2440 {
2441 printf("** Failed to open %s\n", argv[op]);
2442 yield = 1;
2443 goto EXIT;
2444 }
2445 }
2446
2447 if (argc > 2)
2448 {
2449 outfile = fopen(argv[op+1], OUTPUT_MODE);
2450 if (outfile == NULL)
2451 {
2452 printf("** Failed to open %s\n", argv[op+1]);
2453 yield = 1;
2454 goto EXIT;
2455 }
2456 }
2457
2458 /* Set alternative malloc function */
2459
2460 #ifdef SUPPORT_PCRE8
2461 pcre_malloc = new_malloc;
2462 pcre_free = new_free;
2463 pcre_stack_malloc = stack_malloc;
2464 pcre_stack_free = stack_free;
2465 #endif
2466
2467 #ifdef SUPPORT_PCRE16
2468 pcre16_malloc = new_malloc;
2469 pcre16_free = new_free;
2470 pcre16_stack_malloc = stack_malloc;
2471 pcre16_stack_free = stack_free;
2472 #endif
2473
2474 /* Heading line unless quiet, then prompt for first regex if stdin */
2475
2476 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2477
2478 /* Main loop */
2479
2480 while (!done)
2481 {
2482 pcre *re = NULL;
2483 pcre_extra *extra = NULL;
2484
2485 #if !defined NOPOSIX /* There are still compilers that require no indent */
2486 regex_t preg;
2487 int do_posix = 0;
2488 #endif
2489
2490 const char *error;
2491 pcre_uint8 *markptr;
2492 pcre_uint8 *p, *pp, *ppp;
2493 pcre_uint8 *to_file = NULL;
2494 const pcre_uint8 *tables = NULL;
2495 pcre_uint32 get_options;
2496 unsigned long int true_size, true_study_size = 0;
2497 size_t size, regex_gotten_store;
2498 int do_allcaps = 0;
2499 int do_mark = 0;
2500 int do_study = 0;
2501 int no_force_study = 0;
2502 int do_debug = debug;
2503 int do_G = 0;
2504 int do_g = 0;
2505 int do_showinfo = showinfo;
2506 int do_showrest = 0;
2507 int do_showcaprest = 0;
2508 int do_flip = 0;
2509 int erroroffset, len, delimiter, poffset;
2510
2511 use_utf = 0;
2512 debug_lengths = 1;
2513
2514 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2515 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2516 fflush(outfile);
2517
2518 p = buffer;
2519 while (isspace(*p)) p++;
2520 if (*p == 0) continue;
2521
2522 /* See if the pattern is to be loaded pre-compiled from a file. */
2523
2524 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2525 {
2526 pcre_uint32 magic;
2527 pcre_uint8 sbuf[8];
2528 FILE *f;
2529
2530 p++;
2531 if (*p == '!')
2532 {
2533 do_debug = TRUE;
2534 do_showinfo = TRUE;
2535 p++;
2536 }
2537
2538 pp = p + (int)strlen((char *)p);
2539 while (isspace(pp[-1])) pp--;
2540 *pp = 0;
2541
2542 f = fopen((char *)p, "rb");
2543 if (f == NULL)
2544 {
2545 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2546 continue;
2547 }
2548
2549 first_gotten_store = 0;
2550 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2551
2552 true_size =
2553 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2554 true_study_size =
2555 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2556
2557 re = (real_pcre *)new_malloc(true_size);
2558 regex_gotten_store = first_gotten_store;
2559
2560 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2561
2562 magic = ((real_pcre *)re)->magic_number;
2563 if (magic != MAGIC_NUMBER)
2564 {
2565 if (swap_uint32(magic) == MAGIC_NUMBER)
2566 {
2567 do_flip = 1;
2568 }
2569 else
2570 {
2571 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2572 fclose(f);
2573 continue;
2574 }
2575 }
2576
2577 /* We hide the byte-invert info for little and big endian tests. */
2578 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2579 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2580
2581 /* Now see if there is any following study data. */
2582
2583 if (true_study_size != 0)
2584 {
2585 pcre_study_data *psd;
2586
2587 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2588 extra->flags = PCRE_EXTRA_STUDY_DATA;
2589
2590 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2591 extra->study_data = psd;
2592
2593 if (fread(psd, 1, true_study_size, f) != true_study_size)
2594 {
2595 FAIL_READ:
2596 fprintf(outfile, "Failed to read data from %s\n", p);
2597 if (extra != NULL)
2598 {
2599 PCRE_FREE_STUDY(extra);
2600 }
2601 if (re != NULL) new_free(re);
2602 fclose(f);
2603 continue;
2604 }
2605 fprintf(outfile, "Study data loaded from %s\n", p);
2606 do_study = 1; /* To get the data output if requested */
2607 }
2608 else fprintf(outfile, "No study data\n");
2609
2610 /* Flip the necessary bytes. */
2611 if (do_flip)
2612 {
2613 int rc;
2614 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2615 if (rc == PCRE_ERROR_BADMODE)
2616 {
2617 /* Simulate the result of the function call below. */
2618 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2619 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2620 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2621 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2622 continue;
2623 }
2624 }
2625
2626 /* Need to know if UTF-8 for printing data strings. */
2627
2628 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2629 use_utf = (get_options & PCRE_UTF8) != 0;
2630
2631 fclose(f);
2632 goto SHOW_INFO;
2633 }
2634
2635 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2636 the pattern; if it isn't complete, read more. */
2637
2638 delimiter = *p++;
2639
2640 if (isalnum(delimiter) || delimiter == '\\')
2641 {
2642 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2643 goto SKIP_DATA;
2644 }
2645
2646 pp = p;
2647 poffset = (int)(p - buffer);
2648
2649 for(;;)
2650 {
2651 while (*pp != 0)
2652 {
2653 if (*pp == '\\' && pp[1] != 0) pp++;
2654 else if (*pp == delimiter) break;
2655 pp++;
2656 }
2657 if (*pp != 0) break;
2658 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2659 {
2660 fprintf(outfile, "** Unexpected EOF\n");
2661 done = 1;
2662 goto CONTINUE;
2663 }
2664 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2665 }
2666
2667 /* The buffer may have moved while being extended; reset the start of data
2668 pointer to the correct relative point in the buffer. */
2669
2670 p = buffer + poffset;
2671
2672 /* If the first character after the delimiter is backslash, make
2673 the pattern end with backslash. This is purely to provide a way
2674 of testing for the error message when a pattern ends with backslash. */
2675
2676 if (pp[1] == '\\') *pp++ = '\\';
2677
2678 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2679 for callouts. */
2680
2681 *pp++ = 0;
2682 strcpy((char *)pbuffer, (char *)p);
2683
2684 /* Look for options after final delimiter */
2685
2686 options = 0;
2687 study_options = 0;
2688 log_store = showstore; /* default from command line */
2689
2690 while (*pp != 0)
2691 {
2692 switch (*pp++)
2693 {
2694 case 'f': options |= PCRE_FIRSTLINE; break;
2695 case 'g': do_g = 1; break;
2696 case 'i': options |= PCRE_CASELESS; break;
2697 case 'm': options |= PCRE_MULTILINE; break;
2698 case 's': options |= PCRE_DOTALL; break;
2699 case 'x': options |= PCRE_EXTENDED; break;
2700
2701 case '+':
2702 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2703 break;
2704
2705 case '=': do_allcaps = 1; break;
2706 case 'A': options |= PCRE_ANCHORED; break;
2707 case 'B': do_debug = 1; break;
2708 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2709 case 'D': do_debug = do_showinfo = 1; break;
2710 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2711 case 'F': do_flip = 1; break;
2712 case 'G': do_G = 1; break;
2713 case 'I': do_showinfo = 1; break;
2714 case 'J': options |= PCRE_DUPNAMES; break;
2715 case 'K': do_mark = 1; break;
2716 case 'M': log_store = 1; break;
2717 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2718
2719 #if !defined NOPOSIX
2720 case 'P': do_posix = 1; break;
2721 #endif
2722
2723 case 'S':
2724 if (do_study == 0)
2725 {
2726 do_study = 1;
2727 if (*pp == '+')
2728 {
2729 study_options |= PCRE_STUDY_JIT_COMPILE;
2730 pp++;
2731 }
2732 }
2733 else
2734 {
2735 do_study = 0;
2736 no_force_study = 1;
2737 }
2738 break;
2739
2740 case 'U': options |= PCRE_UNGREEDY; break;
2741 case 'W': options |= PCRE_UCP; break;
2742 case 'X': options |= PCRE_EXTRA; break;
2743 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2744 case 'Z': debug_lengths = 0; break;
2745 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2746 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2747
2748 case 'T':
2749 switch (*pp++)
2750 {
2751 case '0': tables = tables0; break;
2752 case '1': tables = tables1; break;
2753
2754 case '\r':
2755 case '\n':
2756 case ' ':
2757 case 0:
2758 fprintf(outfile, "** Missing table number after /T\n");
2759 goto SKIP_DATA;
2760
2761 default:
2762 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2763 goto SKIP_DATA;
2764 }
2765 break;
2766
2767 case 'L':
2768 ppp = pp;
2769 /* The '\r' test here is so that it works on Windows. */
2770 /* The '0' test is just in case this is an unterminated line. */
2771 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2772 *ppp = 0;
2773 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2774 {
2775 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2776 goto SKIP_DATA;
2777 }
2778 locale_set = 1;
2779 tables = PCRE_MAKETABLES;
2780 pp = ppp;
2781 break;
2782
2783 case '>':
2784 to_file = pp;
2785 while (*pp != 0) pp++;
2786 while (isspace(pp[-1])) pp--;
2787 *pp = 0;
2788 break;
2789
2790 case '<':
2791 {
2792 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2793 {
2794 options |= PCRE_JAVASCRIPT_COMPAT;
2795 pp += 3;
2796 }
2797 else
2798 {
2799 int x = check_newline(pp, outfile);
2800 if (x == 0) goto SKIP_DATA;
2801 options |= x;
2802 while (*pp++ != '>');
2803 }
2804 }
2805 break;
2806
2807 case '\r': /* So that it works in Windows */
2808 case '\n':
2809 case ' ':
2810 break;
2811
2812 default:
2813 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2814 goto SKIP_DATA;
2815 }
2816 }
2817
2818 /* Handle compiling via the POSIX interface, which doesn't support the
2819 timing, showing, or debugging options, nor the ability to pass over
2820 local character tables. Neither does it have 16-bit support. */
2821
2822 #if !defined NOPOSIX
2823 if (posix || do_posix)
2824 {
2825 int rc;
2826 int cflags = 0;
2827
2828 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2829 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2830 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2831 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2832 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2833 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2834 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2835
2836 first_gotten_store = 0;
2837 rc = regcomp(&preg, (char *)p, cflags);
2838
2839 /* Compilation failed; go back for another re, skipping to blank line
2840 if non-interactive. */
2841
2842 if (rc != 0)
2843 {
2844 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2845 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2846 goto SKIP_DATA;
2847 }
2848 }
2849
2850 /* Handle compiling via the native interface */
2851
2852 else
2853 #endif /* !defined NOPOSIX */
2854
2855 {
2856 /* In 16-bit mode, convert the input. */
2857
2858 #ifdef SUPPORT_PCRE16
2859 if (use_pcre16)
2860 {
2861 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2862 {
2863 case -1:
2864 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2865 "converted to UTF-16\n");
2866 goto SKIP_DATA;
2867
2868 case -2:
2869 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2870 "cannot be converted to UTF-16\n");
2871 goto SKIP_DATA;
2872
2873 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2874 fprintf(outfile, "**Failed: character value greater than 0xffff "
2875 "cannot be converted to 16-bit in non-UTF mode\n");
2876 goto SKIP_DATA;
2877
2878 default:
2879 break;
2880 }
2881 p = (pcre_uint8 *)buffer16;
2882 }
2883 #endif
2884
2885 /* Compile many times when timing */
2886
2887 if (timeit > 0)
2888 {
2889 register int i;
2890 clock_t time_taken;
2891 clock_t start_time = clock();
2892 for (i = 0; i < timeit; i++)
2893 {
2894 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2895 if (re != NULL) free(re);
2896 }
2897 time_taken = clock() - start_time;
2898 fprintf(outfile, "Compile time %.4f milliseconds\n",
2899 (((double)time_taken * 1000.0) / (double)timeit) /
2900 (double)CLOCKS_PER_SEC);
2901 }
2902
2903 first_gotten_store = 0;
2904 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2905
2906 /* Compilation failed; go back for another re, skipping to blank line
2907 if non-interactive. */
2908
2909 if (re == NULL)
2910 {
2911 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2912 SKIP_DATA:
2913 if (infile != stdin)
2914 {
2915 for (;;)
2916 {
2917 if (extend_inputline(infile, buffer, NULL) == NULL)
2918 {
2919 done = 1;
2920 goto CONTINUE;
2921 }
2922 len = (int)strlen((char *)buffer);
2923 while (len > 0 && isspace(buffer[len-1])) len--;
2924 if (len == 0) break;
2925 }
2926 fprintf(outfile, "\n");
2927 }
2928 goto CONTINUE;
2929 }
2930
2931 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2932 within the regex; check for this so that we know how to process the data
2933 lines. */
2934
2935 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2936 goto SKIP_DATA;
2937 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2938
2939 /* Extract the size for possible writing before possibly flipping it,
2940 and remember the store that was got. */
2941
2942 true_size = ((real_pcre *)re)->size;
2943 regex_gotten_store = first_gotten_store;
2944
2945 /* Output code size information if requested */
2946
2947 if (log_store)
2948 fprintf(outfile, "Memory allocation (code space): %d\n",
2949 (int)(first_gotten_store -
2950 sizeof(real_pcre) -
2951 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2952
2953 /* If -s or /S was present, study the regex to generate additional info to
2954 help with the matching, unless the pattern has the SS option, which
2955 suppresses the effect of /S (used for a few test patterns where studying is
2956 never sensible). */
2957
2958 if (do_study || (force_study >= 0 && !no_force_study))
2959 {
2960 if (timeit > 0)
2961 {
2962 register int i;
2963 clock_t time_taken;
2964 clock_t start_time = clock();
2965 for (i = 0; i < timeit; i++)
2966 {
2967 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2968 }
2969 time_taken = clock() - start_time;
2970 if (extra != NULL)
2971 {
2972 PCRE_FREE_STUDY(extra);
2973 }
2974 fprintf(outfile, " Study time %.4f milliseconds\n",
2975 (((double)time_taken * 1000.0) / (double)timeit) /
2976 (double)CLOCKS_PER_SEC);
2977 }
2978 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2979 if (error != NULL)
2980 fprintf(outfile, "Failed to study: %s\n", error);
2981 else if (extra != NULL)
2982 {
2983 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2984 if (log_store)
2985 {
2986 size_t jitsize;
2987 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2988 jitsize != 0)
2989 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2990 }
2991 }
2992 }
2993
2994 /* If /K was present, we set up for handling MARK data. */
2995
2996 if (do_mark)
2997 {
2998 if (extra == NULL)
2999 {
3000 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3001 extra->flags = 0;
3002 }
3003 extra->mark = &markptr;
3004 extra->flags |= PCRE_EXTRA_MARK;
3005 }
3006
3007 /* Extract and display information from the compiled data if required. */
3008
3009 SHOW_INFO:
3010
3011 if (do_debug)
3012 {
3013 fprintf(outfile, "------------------------------------------------------------------\n");
3014 PCRE_PRINTINT(re, outfile, debug_lengths);
3015 }
3016
3017 /* We already have the options in get_options (see above) */
3018
3019 if (do_showinfo)
3020 {
3021 unsigned long int all_options;
3022 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3023 hascrorlf;
3024 int nameentrysize, namecount;
3025 const pcre_uint8 *nametable;
3026
3027 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3028 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3029 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3030 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3031 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3032 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3033 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3034 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3035 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3036 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3037 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3038 != 0)
3039 goto SKIP_DATA;
3040
3041 if (size != regex_gotten_store) fprintf(outfile,
3042 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3043 (int)size, (int)regex_gotten_store);
3044
3045 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3046 if (backrefmax > 0)
3047 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3048
3049 if (namecount > 0)
3050 {
3051 fprintf(outfile, "Named capturing subpatterns:\n");
3052 while (namecount-- > 0)
3053 {
3054 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3055 int imm2_size = use_pcre16 ? 1 : 2;
3056 #else
3057 int imm2_size = IMM2_SIZE;
3058 #endif
3059 int length = (int)STRLEN(nametable + imm2_size);
3060 fprintf(outfile, " ");
3061 PCHARSV(nametable, imm2_size, length, outfile);
3062 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3063 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3064 fprintf(outfile, "%3d\n", use_pcre16?
3065 (int)(((PCRE_SPTR16)nametable)[0])
3066 :((int)nametable[0] << 8) | (int)nametable[1]);
3067 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3068 #else
3069 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3070 #ifdef SUPPORT_PCRE8
3071 nametable += nameentrysize;
3072 #else
3073 nametable += nameentrysize * 2;
3074 #endif
3075 #endif
3076 }
3077 }
3078
3079 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3080 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3081
3082 all_options = ((real_pcre *)re)->options;
3083 if (do_flip) all_options = swap_uint32(all_options);
3084
3085 if (get_options == 0) fprintf(outfile, "No options\n");
3086 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3087 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3088 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3089 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3090 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3091 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3092 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3093 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3094 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3095 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3096 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3097 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3098 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3099 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3100 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3101 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3102 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3103 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3104
3105 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3106
3107 switch (get_options & PCRE_NEWLINE_BITS)
3108 {
3109 case PCRE_NEWLINE_CR:
3110 fprintf(outfile, "Forced newline sequence: CR\n");
3111 break;
3112
3113 case PCRE_NEWLINE_LF:
3114 fprintf(outfile, "Forced newline sequence: LF\n");
3115 break;
3116
3117 case PCRE_NEWLINE_CRLF:
3118 fprintf(outfile, "Forced newline sequence: CRLF\n");
3119 break;
3120
3121 case PCRE_NEWLINE_ANYCRLF:
3122 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3123 break;
3124
3125 case PCRE_NEWLINE_ANY:
3126 fprintf(outfile, "Forced newline sequence: ANY\n");
3127 break;
3128
3129 default:
3130 break;
3131 }
3132
3133 if (first_char == -1)
3134 {
3135 fprintf(outfile, "First char at start or follows newline\n");
3136 }
3137 else if (first_char < 0)
3138 {
3139 fprintf(outfile, "No first char\n");
3140 }
3141 else
3142 {
3143 const char *caseless =
3144 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3145 "" : " (caseless)";
3146
3147 if (PRINTOK(first_char))
3148 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3149 else
3150 {
3151 fprintf(outfile, "First char = ");
3152 pchar(first_char, outfile);
3153 fprintf(outfile, "%s\n", caseless);
3154 }
3155 }
3156
3157 if (need_char < 0)
3158 {
3159 fprintf(outfile, "No need char\n");
3160 }
3161 else
3162 {
3163 const char *caseless =
3164 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3165 "" : " (caseless)";
3166
3167 if (PRINTOK(need_char))
3168 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3169 else
3170 {
3171 fprintf(outfile, "Need char = ");
3172 pchar(need_char, outfile);
3173 fprintf(outfile, "%s\n", caseless);
3174 }
3175 }
3176
3177 /* Don't output study size; at present it is in any case a fixed
3178 value, but it varies, depending on the computer architecture, and
3179 so messes up the test suite. (And with the /F option, it might be
3180 flipped.) If study was forced by an external -s, don't show this
3181 information unless -i or -d was also present. This means that, except
3182 when auto-callouts are involved, the output from runs with and without
3183 -s should be identical. */
3184
3185 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3186 {
3187 if (extra == NULL)
3188 fprintf(outfile, "Study returned NULL\n");
3189 else
3190 {
3191 pcre_uint8 *start_bits = NULL;
3192 int minlength;
3193
3194 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3195 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3196
3197 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3198 {
3199 if (start_bits == NULL)
3200 fprintf(outfile, "No set of starting bytes\n");
3201 else
3202 {
3203 int i;
3204 int c = 24;
3205 fprintf(outfile, "Starting byte set: ");
3206 for (i = 0; i < 256; i++)
3207 {
3208 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3209 {
3210 if (c > 75)
3211 {
3212 fprintf(outfile, "\n ");
3213 c = 2;
3214 }
3215 if (PRINTOK(i) && i != ' ')
3216 {
3217 fprintf(outfile, "%c ", i);
3218 c += 2;
3219 }
3220 else
3221 {
3222 fprintf(outfile, "\\x%02x ", i);
3223 c += 5;
3224 }
3225 }
3226 }
3227 fprintf(outfile, "\n");
3228 }
3229 }
3230 }
3231
3232 /* Show this only if the JIT was set by /S, not by -s. */
3233
3234 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3235 {
3236 int jit;
3237 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3238 {
3239 if (jit)
3240 fprintf(outfile, "JIT study was successful\n");
3241 else
3242 #ifdef SUPPORT_JIT
3243 fprintf(outfile, "JIT study was not successful\n");
3244 #else
3245 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3246 #endif
3247 }
3248 }
3249 }
3250 }
3251
3252 /* If the '>' option was present, we write out the regex to a file, and
3253 that is all. The first 8 bytes of the file are the regex length and then
3254 the study length, in big-endian order. */
3255
3256 if (to_file != NULL)
3257 {
3258 FILE *f = fopen((char *)to_file, "wb");
3259 if (f == NULL)
3260 {
3261 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3262 }
3263 else
3264 {
3265 pcre_uint8 sbuf[8];
3266
3267 if (do_flip) regexflip(re, extra);
3268 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3269 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3270 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3271 sbuf[3] = (pcre_uint8)((true_size) & 255);
3272 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3273 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3274 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3275 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3276
3277 if (fwrite(sbuf, 1, 8, f) < 8 ||
3278 fwrite(re, 1, true_size, f) < true_size)
3279 {
3280 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3281 }
3282 else
3283 {
3284 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3285
3286 /* If there is study data, write it. */
3287
3288 if (extra != NULL)
3289 {
3290 if (fwrite(extra->study_data, 1, true_study_size, f) <
3291 true_study_size)
3292 {
3293 fprintf(outfile, "Write error on %s: %s\n", to_file,
3294 strerror(errno));
3295 }
3296 else fprintf(outfile, "Study data written to %s\n", to_file);
3297 }
3298 }
3299 fclose(f);
3300 }
3301
3302 new_free(re);
3303 if (extra != NULL)
3304 {
3305 PCRE_FREE_STUDY(extra);
3306 }
3307 if (locale_set)
3308 {
3309 new_free((void *)tables);
3310 setlocale(LC_CTYPE, "C");
3311 locale_set = 0;
3312 }
3313 continue; /* With next regex */
3314 }
3315 } /* End of non-POSIX compile */
3316
3317 /* Read data lines and test them */
3318
3319 for (;;)
3320 {
3321 pcre_uint8 *q;
3322 pcre_uint8 *bptr;
3323 int *use_offsets = offsets;
3324 int use_size_offsets = size_offsets;
3325 int callout_data = 0;
3326 int callout_data_set = 0;
3327 int count, c;
3328 int copystrings = 0;
3329 int find_match_limit = default_find_match_limit;
3330 int getstrings = 0;
3331 int getlist = 0;
3332 int gmatched = 0;
3333 int start_offset = 0;
3334 int start_offset_sign = 1;
3335 int g_notempty = 0;
3336 int use_dfa = 0;
3337
3338 *copynames = 0;
3339 *getnames = 0;
3340
3341 cn16ptr = copynames;
3342 gn16ptr = getnames;
3343 cn8ptr = copynames8;
3344 gn8ptr = getnames8;
3345
3346 SET_PCRE_CALLOUT(callout);
3347 first_callout = 1;
3348 last_callout_mark = NULL;
3349 callout_extra = 0;
3350 callout_count = 0;
3351 callout_fail_count = 999999;
3352 callout_fail_id = -1;
3353 show_malloc = 0;
3354 options = 0;
3355
3356 if (extra != NULL) extra->flags &=
3357 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3358
3359 len = 0;
3360 for (;;)
3361 {
3362 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3363 {
3364 if (len > 0) /* Reached EOF without hitting a newline */
3365 {
3366 fprintf(outfile, "\n");
3367 break;
3368 }
3369 done = 1;
3370 goto CONTINUE;
3371 }
3372 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3373 len = (int)strlen((char *)buffer);
3374 if (buffer[len-1] == '\n') break;
3375 }
3376
3377 while (len > 0 && isspace(buffer[len-1])) len--;
3378 buffer[len] = 0;
3379 if (len == 0) break;
3380
3381 p = buffer;
3382 while (isspace(*p)) p++;
3383
3384 bptr = q = dbuffer;
3385 while ((c = *p++) != 0)
3386 {
3387 int i = 0;
3388 int n = 0;
3389
3390 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3391 In non-UTF mode, allow the value of the byte to fall through to later,
3392 where values greater than 127 are turned into UTF-8 when running in
3393 16-bit mode. */
3394
3395 if (c != '\\')
3396 {
3397 if (use_utf)
3398 {
3399 *q++ = c;
3400 continue;
3401 }
3402 }
3403
3404 /* Handle backslash escapes */
3405
3406 else switch ((c = *p++))
3407 {
3408 case 'a': c = 7; break;
3409 case 'b': c = '\b'; break;
3410 case 'e': c = 27; break;
3411 case 'f': c = '\f'; break;
3412 case 'n': c = '\n'; break;
3413 case 'r': c = '\r'; break;
3414 case 't': c = '\t'; break;
3415 case 'v': c = '\v'; break;
3416
3417 case '0': case '1': case '2': case '3':
3418 case '4': case '5': case '6': case '7':
3419 c -= '0';
3420 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3421 c = c * 8 + *p++ - '0';
3422 break;
3423
3424 case 'x':
3425 if (*p == '{')
3426 {
3427 pcre_uint8 *pt = p;
3428 c = 0;
3429
3430 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3431 when isxdigit() is a macro that refers to its argument more than
3432 once. This is banned by the C Standard, but apparently happens in at
3433 least one MacOS environment. */
3434
3435 for (pt++; isxdigit(*pt); pt++)
3436 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3437 if (*pt == '}')
3438 {
3439 p = pt + 1;
3440 break;
3441 }
3442 /* Not correct form for \x{...}; fall through */
3443 }
3444
3445 /* \x without {} always defines just one byte in 8-bit mode. This
3446 allows UTF-8 characters to be constructed byte by byte, and also allows
3447 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3448 Otherwise, pass it down to later code so that it can be turned into
3449 UTF-8 when running in 16-bit mode. */
3450
3451 c = 0;
3452 while (i++ < 2 && isxdigit(*p))
3453 {
3454 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3455 p++;
3456 }
3457 if (use_utf)
3458 {
3459 *q++ = c;
3460 continue;
3461 }
3462 break;
3463
3464 case 0: /* \ followed by EOF allows for an empty line */
3465 p--;
3466 continue;
3467
3468 case '>':
3469 if (*p == '-')
3470 {
3471 start_offset_sign = -1;
3472 p++;
3473 }
3474 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3475 start_offset *= start_offset_sign;
3476 continue;
3477
3478 case 'A': /* Option setting */
3479 options |= PCRE_ANCHORED;
3480 continue;
3481
3482 case 'B':
3483 options |= PCRE_NOTBOL;
3484 continue;
3485
3486 case 'C':
3487 if (isdigit(*p)) /* Set copy string */
3488 {
3489 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3490 copystrings |= 1 << n;
3491 }
3492 else if (isalnum(*p))
3493 {
3494 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3495 }
3496 else if (*p == '+')
3497 {
3498 callout_extra = 1;
3499 p++;
3500 }
3501 else if (*p == '-')
3502 {
3503 SET_PCRE_CALLOUT(NULL);
3504 p++;
3505 }
3506 else if (*p == '!')
3507 {
3508 callout_fail_id = 0;
3509 p++;
3510 while(isdigit(*p))
3511 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3512 callout_fail_count = 0;
3513 if (*p == '!')
3514 {
3515 p++;
3516 while(isdigit(*p))
3517 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3518 }
3519 }
3520 else if (*p == '*')
3521 {
3522 int sign = 1;
3523 callout_data = 0;
3524 if (*(++p) == '-') { sign = -1; p++; }
3525 while(isdigit(*p))
3526 callout_data = callout_data * 10 + *p++ - '0';
3527 callout_data *= sign;
3528 callout_data_set = 1;
3529 }
3530 continue;
3531
3532 #if !defined NODFA
3533 case 'D':
3534 #if !defined NOPOSIX
3535 if (posix || do_posix)
3536 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3537 else
3538 #endif
3539 use_dfa = 1;
3540 continue;
3541 #endif
3542
3543 #if !defined NODFA
3544 case 'F':
3545 options |= PCRE_DFA_SHORTEST;
3546 continue;
3547 #endif
3548
3549 case 'G':
3550 if (isdigit(*p))
3551 {
3552 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3553 getstrings |= 1 << n;
3554 }
3555 else if (isalnum(*p))
3556 {
3557 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3558 }
3559 continue;
3560
3561 case 'J':
3562 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3563 if (extra != NULL
3564 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3565 && extra->executable_jit != NULL)
3566 {
3567 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3568 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3569 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3570 }
3571 continue;
3572
3573 case 'L':
3574 getlist = 1;
3575 continue;
3576
3577 case 'M':
3578 find_match_limit = 1;
3579 continue;
3580
3581 case 'N':
3582 if ((options & PCRE_NOTEMPTY) != 0)
3583 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3584 else
3585 options |= PCRE_NOTEMPTY;
3586 continue;
3587
3588 case 'O':
3589 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3590 if (n > size_offsets_max)
3591 {
3592 size_offsets_max = n;
3593 free(offsets);
3594 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3595 if (offsets == NULL)
3596 {
3597 printf("** Failed to get %d bytes of memory for offsets vector\n",
3598 (int)(size_offsets_max * sizeof(int)));
3599 yield = 1;
3600 goto EXIT;
3601 }
3602 }
3603 use_size_offsets = n;
3604 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3605 continue;
3606
3607 case 'P':
3608 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3609 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3610 continue;
3611
3612 case 'Q':
3613 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3614 if (extra == NULL)
3615 {
3616 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3617 extra->flags = 0;
3618 }
3619 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3620 extra->match_limit_recursion = n;
3621 continue;
3622
3623 case 'q':
3624 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3625 if (extra == NULL)
3626 {
3627 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3628 extra->flags = 0;
3629 }
3630 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3631 extra->match_limit = n;
3632 continue;
3633
3634 #if !defined NODFA
3635 case 'R':
3636 options |= PCRE_DFA_RESTART;
3637 continue;
3638 #endif
3639
3640 case 'S':
3641 show_malloc = 1;
3642 continue;
3643
3644 case 'Y':
3645 options |= PCRE_NO_START_OPTIMIZE;
3646 continue;
3647
3648 case 'Z':
3649 options |= PCRE_NOTEOL;
3650 continue;
3651
3652 case '?':
3653 options |= PCRE_NO_UTF8_CHECK;
3654 continue;
3655
3656 case '<':
3657 {
3658 int x = check_newline(p, outfile);
3659 if (x == 0) goto NEXT_DATA;
3660 options |= x;
3661 while (*p++ != '>');
3662 }
3663 continue;
3664 }
3665
3666 /* We now have a character value in c that may be greater than 255. In
3667 16-bit mode, we always convert characters to UTF-8 so that values greater
3668 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3669 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3670 mode must have come from \x{...} or octal constructs because values from
3671 \x.. get this far only in non-UTF mode. */
3672
3673 if (use_pcre16 || use_utf)
3674 {
3675 pcre_uint8 buff8[8];
3676 int ii, utn;
3677 utn = ord2utf8(c, buff8);
3678 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3679 }
3680 else
3681 {
3682 if (c > 255)
3683 {
3684 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3685 "and UTF-8 mode is not enabled.\n", c);
3686 fprintf(outfile, "** Truncation will probably give the wrong "
3687 "result.\n");
3688 }
3689 *q++ = c;
3690 }
3691 }
3692
3693 /* Reached end of subject string */
3694
3695 *q = 0;
3696 len = (int)(q - dbuffer);
3697
3698 /* Move the data to the end of the buffer so that a read over the end of
3699 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3700 we are using the POSIX interface, we must include the terminating zero. */
3701
3702 #if !defined NOPOSIX
3703 if (posix || do_posix)
3704 {
3705 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3706 bptr += buffer_size - len - 1;
3707 }
3708 else
3709 #endif
3710 {
3711 memmove(bptr + buffer_size - len, bptr, len);
3712 bptr += buffer_size - len;
3713 }
3714
3715 if ((all_use_dfa || use_dfa) && find_match_limit)
3716 {
3717 printf("**Match limit not relevant for DFA matching: ignored\n");
3718 find_match_limit = 0;
3719 }
3720
3721 /* Handle matching via the POSIX interface, which does not
3722 support timing or playing with the match limit or callout data. */
3723
3724 #if !defined NOPOSIX
3725 if (posix || do_posix)
3726 {
3727 int rc;
3728 int eflags = 0;
3729 regmatch_t *pmatch = NULL;
3730 if (use_size_offsets > 0)
3731 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3732 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3733 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3734 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3735
3736 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3737
3738 if (rc != 0)
3739 {
3740 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3741 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3742 }
3743 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3744 != 0)
3745 {
3746 fprintf(outfile, "Matched with REG_NOSUB\n");
3747 }
3748 else
3749 {
3750 size_t i;
3751 for (i = 0; i < (size_t)use_size_offsets; i++)
3752 {
3753 if (pmatch[i].rm_so >= 0)
3754 {
3755 fprintf(outfile, "%2d: ", (int)i);
3756 PCHARSV(dbuffer, pmatch[i].rm_so,
3757 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3758 fprintf(outfile, "\n");
3759 if (do_showcaprest || (i == 0 && do_showrest))
3760 {
3761 fprintf(outfile, "%2d+ ", (int)i);
3762 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3763 outfile);
3764 fprintf(outfile, "\n");
3765 }
3766 }
3767 }
3768 }
3769 free(pmatch);
3770 goto NEXT_DATA;
3771 }
3772
3773 #endif /* !defined NOPOSIX */
3774
3775 /* Handle matching via the native interface - repeats for /g and /G */
3776
3777 #ifdef SUPPORT_PCRE16
3778 if (use_pcre16)
3779 {
3780 len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3781 switch(len)
3782 {
3783 case -1:
3784 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3785 "converted to UTF-16\n");
3786 goto NEXT_DATA;
3787
3788 case -2:
3789 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3790 "cannot be converted to UTF-16\n");
3791 goto NEXT_DATA;
3792
3793 case -3:
3794 fprintf(outfile, "**Failed: character value greater than 0xffff "
3795 "cannot be converted to 16-bit in non-UTF mode\n");
3796 goto NEXT_DATA;
3797
3798 default:
3799 break;
3800 }
3801 bptr = (pcre_uint8 *)buffer16;
3802 }
3803 #endif
3804
3805 for (;; gmatched++) /* Loop for /g or /G */
3806 {
3807 markptr = NULL;
3808
3809 if (timeitm > 0)
3810 {
3811 register int i;
3812 clock_t time_taken;
3813 clock_t start_time = clock();
3814
3815 #if !defined NODFA
3816 if (all_use_dfa || use_dfa)
3817 {
3818 int workspace[1000];
3819 for (i = 0; i < timeitm; i++)
3820 {
3821 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3822 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3823 (sizeof(workspace)/sizeof(int)));
3824 }
3825 }
3826 else
3827 #endif
3828
3829 for (i = 0; i < timeitm; i++)
3830 {
3831 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3832 (options | g_notempty), use_offsets, use_size_offsets);
3833 }
3834 time_taken = clock() - start_time;
3835 fprintf(outfile, "Execute time %.4f milliseconds\n",
3836 (((double)time_taken * 1000.0) / (double)timeitm) /
3837 (double)CLOCKS_PER_SEC);
3838 }
3839
3840 /* If find_match_limit is set, we want to do repeated matches with
3841 varying limits in order to find the minimum value for the match limit and
3842 for the recursion limit. The match limits are relevant only to the normal
3843 running of pcre_exec(), so disable the JIT optimization. This makes it
3844 possible to run the same set of tests with and without JIT externally
3845 requested. */
3846
3847 if (find_match_limit)
3848 {
3849 if (extra == NULL)
3850 {
3851 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3852 extra->flags = 0;
3853 }
3854 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3855
3856 (void)check_match_limit(re, extra, bptr, len, start_offset,
3857 options|g_notempty, use_offsets, use_size_offsets,
3858 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3859 PCRE_ERROR_MATCHLIMIT, "match()");
3860
3861 count = check_match_limit(re, extra, bptr, len, start_offset,
3862 options|g_notempty, use_offsets, use_size_offsets,
3863 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3864 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3865 }
3866
3867 /* If callout_data is set, use the interface with additional data */
3868
3869 else if (callout_data_set)
3870 {
3871 if (extra == NULL)
3872 {
3873 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3874 extra->flags = 0;
3875 }
3876 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3877 extra->callout_data = &callout_data;
3878 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3879 options | g_notempty, use_offsets, use_size_offsets);
3880 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3881 }
3882
3883 /* The normal case is just to do the match once, with the default
3884 value of match_limit. */
3885
3886 #if !defined NODFA
3887 else if (all_use_dfa || use_dfa)
3888 {
3889 int workspace[1000];
3890 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3891 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3892 (sizeof(workspace)/sizeof(int)));
3893 if (count == 0)
3894 {
3895 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3896 count = use_size_offsets/2;
3897 }
3898 }
3899 #endif
3900
3901 else
3902 {
3903 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3904 options | g_notempty, use_offsets, use_size_offsets);
3905 if (count == 0)
3906 {
3907 fprintf(outfile, "Matched, but too many substrings\n");
3908 count = use_size_offsets/3;
3909 }
3910 }
3911
3912 /* Matched */
3913
3914 if (count >= 0)
3915 {
3916 int i, maxcount;
3917 void *cnptr, *gnptr;
3918
3919 #if !defined NODFA
3920 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3921 #endif
3922 maxcount = use_size_offsets/3;
3923
3924 /* This is a check against a lunatic return value. */
3925
3926 if (count > maxcount)
3927 {
3928 fprintf(outfile,
3929 "** PCRE error: returned count %d is too big for offset size %d\n",
3930 count, use_size_offsets);
3931 count = use_size_offsets/3;
3932 if (do_g || do_G)
3933 {
3934 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3935 do_g = do_G = FALSE; /* Break g/G loop */
3936 }
3937 }
3938
3939 /* do_allcaps requests showing of all captures in the pattern, to check
3940 unset ones at the end. */
3941
3942 if (do_allcaps)
3943 {
3944 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3945 goto SKIP_DATA;
3946 count++; /* Allow for full match */
3947 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3948 }
3949
3950 /* Output the captured substrings */
3951
3952 for (i = 0; i < count * 2; i += 2)
3953 {
3954 if (use_offsets[i] < 0)
3955 {
3956 if (use_offsets[i] != -1)
3957 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3958 use_offsets[i], i);
3959 if (use_offsets[i+1] != -1)
3960 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3961 use_offsets[i+1], i+1);
3962 fprintf(outfile, "%2d: <unset>\n", i/2);
3963 }
3964 else
3965 {
3966 fprintf(outfile, "%2d: ", i/2);
3967 PCHARSV(bptr, use_offsets[i],
3968 use_offsets[i+1] - use_offsets[i], outfile);
3969 fprintf(outfile, "\n");
3970 if (do_showcaprest || (i == 0 && do_showrest))
3971 {
3972 fprintf(outfile, "%2d+ ", i/2);
3973 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3974 outfile);
3975 fprintf(outfile, "\n");
3976 }
3977 }
3978 }
3979
3980 if (markptr != NULL)
3981 {
3982 fprintf(outfile, "MK: ");
3983 PCHARSV(markptr, 0, -1, outfile);
3984 fprintf(outfile, "\n");
3985 }
3986
3987 for (i = 0; i < 32; i++)
3988 {
3989 if ((copystrings & (1 << i)) != 0)
3990 {
3991 int rc;
3992 char copybuffer[256];
3993 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3994 copybuffer, sizeof(copybuffer));
3995 if (rc < 0)
3996 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3997 else
3998 {
3999 fprintf(outfile, "%2dC ", i);
4000 PCHARSV(copybuffer, 0, rc, outfile);
4001 fprintf(outfile, " (%d)\n", rc);
4002 }
4003 }
4004 }
4005
4006 cnptr = copynames;
4007 for (;;)
4008 {
4009 int rc;
4010 char copybuffer[256];
4011
4012 if (use_pcre16)
4013 {
4014 if (*(pcre_uint16 *)cnptr == 0) break;
4015 }
4016 else
4017 {
4018 if (*(pcre_uint8 *)cnptr == 0) break;
4019 }
4020
4021 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4022 cnptr, copybuffer, sizeof(copybuffer));
4023
4024 if (rc < 0)
4025 {
4026 fprintf(outfile, "copy substring ");
4027 PCHARSV(cnptr, 0, -1, outfile);
4028 fprintf(outfile, " failed %d\n", rc);
4029 }
4030 else
4031 {
4032 fprintf(outfile, " C ");
4033 PCHARSV(copybuffer, 0, rc, outfile);
4034 fprintf(outfile, " (%d) ", rc);
4035 PCHARSV(cnptr, 0, -1, outfile);
4036 putc('\n', outfile);
4037 }
4038
4039 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4040 }
4041
4042 for (i = 0; i < 32; i++)
4043 {
4044 if ((getstrings & (1 << i)) != 0)
4045 {
4046 int rc;
4047 const char *substring;
4048 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4049 if (rc < 0)
4050 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4051 else
4052 {
4053 fprintf(outfile, "%2dG ", i);
4054 PCHARSV(substring, 0, rc, outfile);
4055 fprintf(outfile, " (%d)\n", rc);
4056 PCRE_FREE_SUBSTRING(substring);
4057 }
4058 }
4059 }
4060
4061 gnptr = getnames;
4062 for (;;)
4063 {
4064 int rc;
4065 const char *substring;
4066
4067 if (use_pcre16)
4068 {
4069 if (*(pcre_uint16 *)gnptr == 0) break;
4070 }
4071 else
4072 {
4073 if (*(pcre_uint8 *)gnptr == 0) break;
4074 }
4075
4076 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4077 gnptr, &substring);
4078 if (rc < 0)
4079 {
4080 fprintf(outfile, "get substring ");
4081 PCHARSV(gnptr, 0, -1, outfile);
4082 fprintf(outfile, " failed %d\n", rc);
4083 }
4084 else
4085 {
4086 fprintf(outfile, " G ");
4087 PCHARSV(substring, 0, rc, outfile);
4088 fprintf(outfile, " (%d) ", rc);
4089 PCHARSV(gnptr, 0, -1, outfile);
4090 PCRE_FREE_SUBSTRING(substring);
4091 putc('\n', outfile);
4092 }
4093
4094 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4095 }
4096
4097 if (getlist)
4098 {
4099 int rc;
4100 const char **stringlist;
4101 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4102 if (rc < 0)
4103 fprintf(outfile, "get substring list failed %d\n", rc);
4104 else
4105 {
4106 for (i = 0; i < count; i++)
4107 {
4108 fprintf(outfile, "%2dL ", i);
4109 PCHARSV(stringlist[i], 0, -1, outfile);
4110 putc('\n', outfile);
4111 }
4112 if (stringlist[i] != NULL)
4113 fprintf(outfile, "string list not terminated by NULL\n");
4114 PCRE_FREE_SUBSTRING_LIST(stringlist);
4115 }
4116 }
4117 }
4118
4119 /* There was a partial match */
4120
4121 else if (count == PCRE_ERROR_PARTIAL)
4122 {
4123 if (markptr == NULL) fprintf(outfile, "Partial match");
4124 else
4125 {
4126 fprintf(outfile, "Partial match, mark=");
4127 PCHARSV(markptr, 0, -1, outfile);
4128 }
4129 if (use_size_offsets > 1)
4130 {
4131 fprintf(outfile, ": ");
4132 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4133 outfile);
4134 }
4135 fprintf(outfile, "\n");
4136 break; /* Out of the /g loop */
4137 }
4138
4139 /* Failed to match. If this is a /g or /G loop and we previously set
4140 g_notempty after a null match, this is not necessarily the end. We want
4141 to advance the start offset, and continue. We won't be at the end of the
4142 string - that was checked before setting g_notempty.
4143
4144 Complication arises in the case when the newline convention is "any",
4145 "crlf", or "anycrlf". If the previous match was at the end of a line
4146 terminated by CRLF, an advance of one character just passes the \r,
4147 whereas we should prefer the longer newline sequence, as does the code in
4148 pcre_exec(). Fudge the offset value to achieve this. We check for a
4149 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4150 find the default.
4151
4152 Otherwise, in the case of UTF-8 matching, the advance must be one
4153 character, not one byte. */
4154
4155 else
4156 {
4157 if (g_notempty != 0)
4158 {
4159 int onechar = 1;
4160 unsigned int obits = ((real_pcre *)re)->options;
4161 use_offsets[0] = start_offset;
4162 if ((obits & PCRE_NEWLINE_BITS) == 0)
4163 {
4164 int d;
4165 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4166 /* Note that these values are always the ASCII ones, even in
4167 EBCDIC environments. CR = 13, NL = 10. */
4168 obits = (d == 13)? PCRE_NEWLINE_CR :
4169 (d == 10)? PCRE_NEWLINE_LF :
4170 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4171 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4172 (d == -1)? PCRE_NEWLINE_ANY : 0;
4173 }
4174 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4175 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4176 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4177 &&
4178 start_offset < len - 1 &&
4179 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4180 (use_pcre16?
4181 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4182 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4183 :
4184 bptr[start_offset] == '\r'
4185 && bptr[start_offset + 1] == '\n')
4186 #elif defined SUPPORT_PCRE16
4187 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4188 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4189 #else
4190 bptr[start_offset] == '\r'
4191 && bptr[start_offset + 1] == '\n'
4192 #endif
4193 )
4194 onechar++;
4195 else if (use_utf)
4196 {
4197 while (start_offset + onechar < len)
4198 {
4199 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4200 onechar++;
4201 }
4202 }
4203 use_offsets[1] = start_offset + onechar;
4204 }
4205 else
4206 {
4207 switch(count)
4208 {
4209 case PCRE_ERROR_NOMATCH:
4210 if (gmatched == 0)
4211 {
4212 if (markptr == NULL)
4213 {
4214 fprintf(outfile, "No match\n");
4215 }
4216 else
4217 {
4218 fprintf(outfile, "No match, mark = ");
4219 PCHARSV(markptr, 0, -1, outfile);
4220 putc('\n', outfile);
4221 }
4222 }
4223 break;
4224
4225 case PCRE_ERROR_BADUTF8:
4226 case PCRE_ERROR_SHORTUTF8:
4227 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4228 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4229 use_pcre16? "16" : "8");
4230 if (use_size_offsets >= 2)
4231 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4232 use_offsets[1]);
4233 fprintf(outfile, "\n");
4234 break;
4235
4236 case PCRE_ERROR_BADUTF8_OFFSET:
4237 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4238 use_pcre16? "16" : "8");
4239 break;
4240
4241 default:
4242 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4243 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4244 else
4245 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4246 break;
4247 }
4248
4249 break; /* Out of the /g loop */
4250 }
4251 }
4252
4253 /* If not /g or /G we are done */
4254
4255 if (!do_g && !do_G) break;
4256
4257 /* If we have matched an empty string, first check to see if we are at
4258 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4259 Perl's /g options does. This turns out to be rather cunning. First we set
4260 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4261 same point. If this fails (picked up above) we advance to the next
4262 character. */
4263
4264 g_notempty = 0;
4265
4266 if (use_offsets[0] == use_offsets[1])
4267 {
4268 if (use_offsets[0] == len) break;
4269 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4270 }
4271
4272 /* For /g, update the start offset, leaving the rest alone */
4273
4274 if (do_g) start_offset = use_offsets[1];
4275
4276 /* For /G, update the pointer and length */
4277
4278 else
4279 {
4280 bptr += use_offsets[1] * CHAR_SIZE;
4281 len -= use_offsets[1];
4282 }
4283 } /* End of loop for /g and /G */
4284
4285 NEXT_DATA: continue;
4286 } /* End of loop for data lines */
4287
4288 CONTINUE:
4289
4290 #if !defined NOPOSIX
4291 if (posix || do_posix) regfree(&preg);
4292 #endif
4293
4294 if (re != NULL) new_free(re);
4295 if (extra != NULL)
4296 {
4297 PCRE_FREE_STUDY(extra);
4298 }
4299 if (locale_set)
4300 {
4301 new_free((void *)tables);
4302 setlocale(LC_CTYPE, "C");
4303 locale_set = 0;
4304 }
4305 if (jit_stack != NULL)
4306 {
4307 PCRE_JIT_STACK_FREE(jit_stack);
4308 jit_stack = NULL;
4309 }
4310 }
4311
4312 if (infile == stdin) fprintf(outfile, "\n");
4313
4314 EXIT:
4315
4316 if (infile != NULL && infile != stdin) fclose(infile);
4317 if (outfile != NULL && outfile != stdout) fclose(outfile);
4318
4319 free(buffer);
4320 free(dbuffer);
4321 free(pbuffer);
4322 free(offsets);
4323
4324 #ifdef SUPPORT_PCRE16
4325 if (buffer16 != NULL) free(buffer16);
4326 #endif
4327
4328 return yield;
4329 }
4330
4331 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12