/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 893 - (show annotations) (download)
Thu Jan 19 17:15:11 2012 UTC (2 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 130426 byte(s)
Experimental stack size determination.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define STRLEN8(p) ((int)strlen((char *)p))
213
214 #define SET_PCRE_CALLOUT8(callout) \
215 pcre_callout = callout
216
217 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218 pcre_assign_jit_stack(extra, callback, userdata)
219
220 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221 re = pcre_compile((char *)pat, options, error, erroffset, tables)
222
223 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224 namesptr, cbuffer, size) \
225 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226 (char *)namesptr, cbuffer, size)
227
228 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230
231 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace) \
233 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234 offsets, size_offsets, workspace, size_workspace)
235
236 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237 offsets, size_offsets) \
238 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239 offsets, size_offsets)
240
241 #define PCRE_FREE_STUDY8(extra) \
242 pcre_free_study(extra)
243
244 #define PCRE_FREE_SUBSTRING8(substring) \
245 pcre_free_substring(substring)
246
247 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248 pcre_free_substring_list(listptr)
249
250 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 getnamesptr, subsptr) \
252 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)getnamesptr, subsptr)
254
255 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256 n = pcre_get_stringnumber(re, (char *)ptr)
257
258 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260
261 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263
264 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266
267 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268 pcre_printint(re, outfile, debug_lengths)
269
270 #define PCRE_STUDY8(extra, re, options, error) \
271 extra = pcre_study(re, options, error)
272
273 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274 pcre_jit_stack_alloc(startsize, maxsize)
275
276 #define PCRE_JIT_STACK_FREE8(stack) \
277 pcre_jit_stack_free(stack)
278
279 #endif /* SUPPORT_PCRE8 */
280
281 /* -----------------------------------------------------------*/
282
283 #ifdef SUPPORT_PCRE16
284
285 #define PCHARS16(lv, p, offset, len, f) \
286 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287
288 #define PCHARSV16(p, offset, len, f) \
289 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290
291 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292 p = read_capture_name16(p, cn16, re)
293
294 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295
296 #define SET_PCRE_CALLOUT16(callout) \
297 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298
299 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300 pcre16_assign_jit_stack((pcre16_extra *)extra, \
301 (pcre16_jit_callback)callback, userdata)
302
303 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305 tables)
306
307 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308 namesptr, cbuffer, size) \
309 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311
312 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 (PCRE_UCHAR16 *)cbuffer, size/2)
315
316 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317 offsets, size_offsets, workspace, size_workspace) \
318 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320 workspace, size_workspace)
321
322 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323 offsets, size_offsets) \
324 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325 len, start_offset, options, offsets, size_offsets)
326
327 #define PCRE_FREE_STUDY16(extra) \
328 pcre16_free_study((pcre16_extra *)extra)
329
330 #define PCRE_FREE_SUBSTRING16(substring) \
331 pcre16_free_substring((PCRE_SPTR16)substring)
332
333 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335
336 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337 getnamesptr, subsptr) \
338 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340
341 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343
344 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346 (PCRE_SPTR16 *)(void*)subsptr)
347
348 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350 (PCRE_SPTR16 **)(void*)listptr)
351
352 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354 tables)
355
356 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357 pcre16_printint(re, outfile, debug_lengths)
358
359 #define PCRE_STUDY16(extra, re, options, error) \
360 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361
362 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364
365 #define PCRE_JIT_STACK_FREE16(stack) \
366 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367
368 #endif /* SUPPORT_PCRE16 */
369
370
371 /* ----- Both modes are supported; a runtime test is needed, except for
372 pcre_config(), and the JIT stack functions, when it doesn't matter which
373 version is called. ----- */
374
375 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376
377 #define CHAR_SIZE (use_pcre16? 2:1)
378
379 #define PCHARS(lv, p, offset, len, f) \
380 if (use_pcre16) \
381 PCHARS16(lv, p, offset, len, f); \
382 else \
383 PCHARS8(lv, p, offset, len, f)
384
385 #define PCHARSV(p, offset, len, f) \
386 if (use_pcre16) \
387 PCHARSV16(p, offset, len, f); \
388 else \
389 PCHARSV8(p, offset, len, f)
390
391 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392 if (use_pcre16) \
393 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394 else \
395 READ_CAPTURE_NAME8(p, cn8, cn16, re)
396
397 #define SET_PCRE_CALLOUT(callout) \
398 if (use_pcre16) \
399 SET_PCRE_CALLOUT16(callout); \
400 else \
401 SET_PCRE_CALLOUT8(callout)
402
403 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404
405 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406 if (use_pcre16) \
407 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408 else \
409 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410
411 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412 if (use_pcre16) \
413 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414 else \
415 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416
417 #define PCRE_CONFIG pcre_config
418
419 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420 namesptr, cbuffer, size) \
421 if (use_pcre16) \
422 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423 namesptr, cbuffer, size); \
424 else \
425 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426 namesptr, cbuffer, size)
427
428 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429 if (use_pcre16) \
430 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431 else \
432 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433
434 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435 offsets, size_offsets, workspace, size_workspace) \
436 if (use_pcre16) \
437 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438 offsets, size_offsets, workspace, size_workspace); \
439 else \
440 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441 offsets, size_offsets, workspace, size_workspace)
442
443 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444 offsets, size_offsets) \
445 if (use_pcre16) \
446 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447 offsets, size_offsets); \
448 else \
449 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets)
451
452 #define PCRE_FREE_STUDY(extra) \
453 if (use_pcre16) \
454 PCRE_FREE_STUDY16(extra); \
455 else \
456 PCRE_FREE_STUDY8(extra)
457
458 #define PCRE_FREE_SUBSTRING(substring) \
459 if (use_pcre16) \
460 PCRE_FREE_SUBSTRING16(substring); \
461 else \
462 PCRE_FREE_SUBSTRING8(substring)
463
464 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465 if (use_pcre16) \
466 PCRE_FREE_SUBSTRING_LIST16(listptr); \
467 else \
468 PCRE_FREE_SUBSTRING_LIST8(listptr)
469
470 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471 getnamesptr, subsptr) \
472 if (use_pcre16) \
473 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474 getnamesptr, subsptr); \
475 else \
476 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477 getnamesptr, subsptr)
478
479 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480 if (use_pcre16) \
481 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482 else \
483 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484
485 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486 if (use_pcre16) \
487 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488 else \
489 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490
491 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492 if (use_pcre16) \
493 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494 else \
495 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496
497 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498 (use_pcre16 ? \
499 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501
502 #define PCRE_JIT_STACK_FREE(stack) \
503 if (use_pcre16) \
504 PCRE_JIT_STACK_FREE16(stack); \
505 else \
506 PCRE_JIT_STACK_FREE8(stack)
507
508 #define PCRE_MAKETABLES \
509 (use_pcre16? pcre16_maketables() : pcre_maketables())
510
511 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 if (use_pcre16) \
513 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 else \
515 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516
517 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518 if (use_pcre16) \
519 PCRE_PRINTINT16(re, outfile, debug_lengths); \
520 else \
521 PCRE_PRINTINT8(re, outfile, debug_lengths)
522
523 #define PCRE_STUDY(extra, re, options, error) \
524 if (use_pcre16) \
525 PCRE_STUDY16(extra, re, options, error); \
526 else \
527 PCRE_STUDY8(extra, re, options, error)
528
529 /* ----- Only 8-bit mode is supported ----- */
530
531 #elif defined SUPPORT_PCRE8
532 #define CHAR_SIZE 1
533 #define PCHARS PCHARS8
534 #define PCHARSV PCHARSV8
535 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537 #define STRLEN STRLEN8
538 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 #define PCRE_COMPILE PCRE_COMPILE8
540 #define PCRE_CONFIG pcre_config
541 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544 #define PCRE_EXEC PCRE_EXEC8
545 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 #define PCRE_MAKETABLES pcre_maketables()
555 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556 #define PCRE_PRINTINT PCRE_PRINTINT8
557 #define PCRE_STUDY PCRE_STUDY8
558
559 /* ----- Only 16-bit mode is supported ----- */
560
561 #else
562 #define CHAR_SIZE 2
563 #define PCHARS PCHARS16
564 #define PCHARSV PCHARSV16
565 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567 #define STRLEN STRLEN16
568 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 #define PCRE_COMPILE PCRE_COMPILE16
570 #define PCRE_CONFIG pcre16_config
571 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574 #define PCRE_EXEC PCRE_EXEC16
575 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 #define PCRE_MAKETABLES pcre16_maketables()
585 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586 #define PCRE_PRINTINT PCRE_PRINTINT16
587 #define PCRE_STUDY PCRE_STUDY16
588 #endif
589
590 /* ----- End of mode-specific function call macros ----- */
591
592
593 /* Other parameters */
594
595 #ifndef CLOCKS_PER_SEC
596 #ifdef CLK_TCK
597 #define CLOCKS_PER_SEC CLK_TCK
598 #else
599 #define CLOCKS_PER_SEC 100
600 #endif
601 #endif
602
603 /* This is the default loop count for timing. */
604
605 #define LOOPREPEAT 500000
606
607 /* Static variables */
608
609 static FILE *outfile;
610 static int log_store = 0;
611 static int callout_count;
612 static int callout_extra;
613 static int callout_fail_count;
614 static int callout_fail_id;
615 static int debug_lengths;
616 static int first_callout;
617 static int locale_set = 0;
618 static int show_malloc;
619 static int use_utf;
620 static size_t gotten_store;
621 static size_t first_gotten_store = 0;
622 static const unsigned char *last_callout_mark = NULL;
623
624 /* The buffers grow automatically if very long input lines are encountered. */
625
626 static int buffer_size = 50000;
627 static pcre_uint8 *buffer = NULL;
628 static pcre_uint8 *dbuffer = NULL;
629 static pcre_uint8 *pbuffer = NULL;
630
631 /* Another buffer is needed translation to 16-bit character strings. It will
632 obtained and extended as required. */
633
634 #ifdef SUPPORT_PCRE16
635 static int buffer16_size = 0;
636 static pcre_uint16 *buffer16 = NULL;
637
638 #ifdef SUPPORT_PCRE8
639
640 /* We need the table of operator lengths that is used for 16-bit compiling, in
641 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643 appropriately for the 16-bit world. Just as a safety check, make sure that
644 COMPILE_PCRE16 is *not* set. */
645
646 #ifdef COMPILE_PCRE16
647 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648 #endif
649
650 #if LINK_SIZE == 2
651 #undef LINK_SIZE
652 #define LINK_SIZE 1
653 #elif LINK_SIZE == 3 || LINK_SIZE == 4
654 #undef LINK_SIZE
655 #define LINK_SIZE 2
656 #else
657 #error LINK_SIZE must be either 2, 3, or 4
658 #endif
659
660 #undef IMM2_SIZE
661 #define IMM2_SIZE 1
662
663 #endif /* SUPPORT_PCRE8 */
664
665 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666 #endif /* SUPPORT_PCRE16 */
667
668 /* If we have 8-bit support, default use_pcre16 to false; if there is also
669 16-bit support, it can be changed by an option. If there is no 8-bit support,
670 there must be 16-bit support, so default it to 1. */
671
672 #ifdef SUPPORT_PCRE8
673 static int use_pcre16 = 0;
674 #else
675 static int use_pcre16 = 1;
676 #endif
677
678 /* Textual explanations for runtime error codes */
679
680 static const char *errtexts[] = {
681 NULL, /* 0 is no error */
682 NULL, /* NOMATCH is handled specially */
683 "NULL argument passed",
684 "bad option value",
685 "magic number missing",
686 "unknown opcode - pattern overwritten?",
687 "no more memory",
688 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 "match limit exceeded",
690 "callout error code",
691 NULL, /* BADUTF8/16 is handled specially */
692 NULL, /* BADUTF8/16 offset is handled specially */
693 NULL, /* PARTIAL is handled specially */
694 "not used - internal error",
695 "internal error - pattern overwritten?",
696 "bad count value",
697 "item unsupported for DFA matching",
698 "backreference condition or recursion test not supported for DFA matching",
699 "match limit not supported for DFA matching",
700 "workspace size exceeded in DFA matching",
701 "too much recursion for DFA matching",
702 "recursion limit exceeded",
703 "not used - internal error",
704 "invalid combination of newline options",
705 "bad offset value",
706 NULL, /* SHORTUTF8/16 is handled specially */
707 "nested recursion at the same subject position",
708 "JIT stack limit reached",
709 "pattern compiled in wrong mode: 8-bit/16-bit error"
710 };
711
712
713 /*************************************************
714 * Alternate character tables *
715 *************************************************/
716
717 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718 using the default tables of the library. However, the T option can be used to
719 select alternate sets of tables, for different kinds of testing. Note also that
720 the L (locale) option also adjusts the tables. */
721
722 /* This is the set of tables distributed as default with PCRE. It recognizes
723 only ASCII characters. */
724
725 static const pcre_uint8 tables0[] = {
726
727 /* This table is a lower casing table. */
728
729 0, 1, 2, 3, 4, 5, 6, 7,
730 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23,
732 24, 25, 26, 27, 28, 29, 30, 31,
733 32, 33, 34, 35, 36, 37, 38, 39,
734 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55,
736 56, 57, 58, 59, 60, 61, 62, 63,
737 64, 97, 98, 99,100,101,102,103,
738 104,105,106,107,108,109,110,111,
739 112,113,114,115,116,117,118,119,
740 120,121,122, 91, 92, 93, 94, 95,
741 96, 97, 98, 99,100,101,102,103,
742 104,105,106,107,108,109,110,111,
743 112,113,114,115,116,117,118,119,
744 120,121,122,123,124,125,126,127,
745 128,129,130,131,132,133,134,135,
746 136,137,138,139,140,141,142,143,
747 144,145,146,147,148,149,150,151,
748 152,153,154,155,156,157,158,159,
749 160,161,162,163,164,165,166,167,
750 168,169,170,171,172,173,174,175,
751 176,177,178,179,180,181,182,183,
752 184,185,186,187,188,189,190,191,
753 192,193,194,195,196,197,198,199,
754 200,201,202,203,204,205,206,207,
755 208,209,210,211,212,213,214,215,
756 216,217,218,219,220,221,222,223,
757 224,225,226,227,228,229,230,231,
758 232,233,234,235,236,237,238,239,
759 240,241,242,243,244,245,246,247,
760 248,249,250,251,252,253,254,255,
761
762 /* This table is a case flipping table. */
763
764 0, 1, 2, 3, 4, 5, 6, 7,
765 8, 9, 10, 11, 12, 13, 14, 15,
766 16, 17, 18, 19, 20, 21, 22, 23,
767 24, 25, 26, 27, 28, 29, 30, 31,
768 32, 33, 34, 35, 36, 37, 38, 39,
769 40, 41, 42, 43, 44, 45, 46, 47,
770 48, 49, 50, 51, 52, 53, 54, 55,
771 56, 57, 58, 59, 60, 61, 62, 63,
772 64, 97, 98, 99,100,101,102,103,
773 104,105,106,107,108,109,110,111,
774 112,113,114,115,116,117,118,119,
775 120,121,122, 91, 92, 93, 94, 95,
776 96, 65, 66, 67, 68, 69, 70, 71,
777 72, 73, 74, 75, 76, 77, 78, 79,
778 80, 81, 82, 83, 84, 85, 86, 87,
779 88, 89, 90,123,124,125,126,127,
780 128,129,130,131,132,133,134,135,
781 136,137,138,139,140,141,142,143,
782 144,145,146,147,148,149,150,151,
783 152,153,154,155,156,157,158,159,
784 160,161,162,163,164,165,166,167,
785 168,169,170,171,172,173,174,175,
786 176,177,178,179,180,181,182,183,
787 184,185,186,187,188,189,190,191,
788 192,193,194,195,196,197,198,199,
789 200,201,202,203,204,205,206,207,
790 208,209,210,211,212,213,214,215,
791 216,217,218,219,220,221,222,223,
792 224,225,226,227,228,229,230,231,
793 232,233,234,235,236,237,238,239,
794 240,241,242,243,244,245,246,247,
795 248,249,250,251,252,253,254,255,
796
797 /* This table contains bit maps for various character classes. Each map is 32
798 bytes long and the bits run from the least significant end of each byte. The
799 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800 graph, print, punct, and cntrl. Other classes are built from combinations. */
801
802 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806
807 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811
812 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821
822 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826
827 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831
832 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836
837 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841
842 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846
847 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851
852 /* This table identifies various classes of character by individual bits:
853 0x01 white space character
854 0x02 letter
855 0x04 decimal digit
856 0x08 hexadecimal digit
857 0x10 alphanumeric or '_'
858 0x80 regular expression metacharacter or binary zero
859 */
860
861 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893
894 /* This is a set of tables that came orginally from a Windows user. It seems to
895 be at least an approximation of ISO 8859. In particular, there are characters
896 greater than 128 that are marked as spaces, letters, etc. */
897
898 static const pcre_uint8 tables1[] = {
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,97,98,99,100,101,102,103,
912 104,105,106,107,108,109,110,111,
913 112,113,114,115,116,117,118,119,
914 120,121,122,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 224,225,226,227,228,229,230,231,
928 232,233,234,235,236,237,238,239,
929 240,241,242,243,244,245,246,247,
930 248,249,250,251,252,253,254,255,
931 0,1,2,3,4,5,6,7,
932 8,9,10,11,12,13,14,15,
933 16,17,18,19,20,21,22,23,
934 24,25,26,27,28,29,30,31,
935 32,33,34,35,36,37,38,39,
936 40,41,42,43,44,45,46,47,
937 48,49,50,51,52,53,54,55,
938 56,57,58,59,60,61,62,63,
939 64,97,98,99,100,101,102,103,
940 104,105,106,107,108,109,110,111,
941 112,113,114,115,116,117,118,119,
942 120,121,122,91,92,93,94,95,
943 96,65,66,67,68,69,70,71,
944 72,73,74,75,76,77,78,79,
945 80,81,82,83,84,85,86,87,
946 88,89,90,123,124,125,126,127,
947 128,129,130,131,132,133,134,135,
948 136,137,138,139,140,141,142,143,
949 144,145,146,147,148,149,150,151,
950 152,153,154,155,156,157,158,159,
951 160,161,162,163,164,165,166,167,
952 168,169,170,171,172,173,174,175,
953 176,177,178,179,180,181,182,183,
954 184,185,186,187,188,189,190,191,
955 224,225,226,227,228,229,230,231,
956 232,233,234,235,236,237,238,239,
957 240,241,242,243,244,245,246,215,
958 248,249,250,251,252,253,254,223,
959 192,193,194,195,196,197,198,199,
960 200,201,202,203,204,205,206,207,
961 208,209,210,211,212,213,214,247,
962 216,217,218,219,220,221,222,255,
963 0,62,0,0,1,0,0,0,
964 0,0,0,0,0,0,0,0,
965 32,0,0,0,1,0,0,0,
966 0,0,0,0,0,0,0,0,
967 0,0,0,0,0,0,255,3,
968 126,0,0,0,126,0,0,0,
969 0,0,0,0,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 0,0,0,0,0,0,255,3,
972 0,0,0,0,0,0,0,0,
973 0,0,0,0,0,0,12,2,
974 0,0,0,0,0,0,0,0,
975 0,0,0,0,0,0,0,0,
976 254,255,255,7,0,0,0,0,
977 0,0,0,0,0,0,0,0,
978 255,255,127,127,0,0,0,0,
979 0,0,0,0,0,0,0,0,
980 0,0,0,0,254,255,255,7,
981 0,0,0,0,0,4,32,4,
982 0,0,0,128,255,255,127,255,
983 0,0,0,0,0,0,255,3,
984 254,255,255,135,254,255,255,7,
985 0,0,0,0,0,4,44,6,
986 255,255,127,255,255,255,127,255,
987 0,0,0,0,254,255,255,255,
988 255,255,255,255,255,255,255,127,
989 0,0,0,0,254,255,255,255,
990 255,255,255,255,255,255,255,255,
991 0,2,0,0,255,255,255,255,
992 255,255,255,255,255,255,255,127,
993 0,0,0,0,255,255,255,255,
994 255,255,255,255,255,255,255,255,
995 0,0,0,0,254,255,0,252,
996 1,0,0,248,1,0,0,120,
997 0,0,0,0,254,255,255,255,
998 0,0,128,0,0,0,128,0,
999 255,255,255,255,0,0,0,0,
1000 0,0,0,0,0,0,0,128,
1001 255,255,255,255,0,0,0,0,
1002 0,0,0,0,0,0,0,0,
1003 128,0,0,0,0,0,0,0,
1004 0,1,1,0,1,1,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 1,0,0,0,128,0,0,0,
1008 128,128,128,128,0,0,128,0,
1009 28,28,28,28,28,28,28,28,
1010 28,28,0,0,0,0,0,128,
1011 0,26,26,26,26,26,26,18,
1012 18,18,18,18,18,18,18,18,
1013 18,18,18,18,18,18,18,18,
1014 18,18,18,128,128,0,128,16,
1015 0,26,26,26,26,26,26,18,
1016 18,18,18,18,18,18,18,18,
1017 18,18,18,18,18,18,18,18,
1018 18,18,18,128,128,0,0,0,
1019 0,0,0,0,0,1,0,0,
1020 0,0,0,0,0,0,0,0,
1021 0,0,0,0,0,0,0,0,
1022 0,0,0,0,0,0,0,0,
1023 1,0,0,0,0,0,0,0,
1024 0,0,18,0,0,0,0,0,
1025 0,0,20,20,0,18,0,0,
1026 0,20,18,0,0,0,0,0,
1027 18,18,18,18,18,18,18,18,
1028 18,18,18,18,18,18,18,18,
1029 18,18,18,18,18,18,18,0,
1030 18,18,18,18,18,18,18,18,
1031 18,18,18,18,18,18,18,18,
1032 18,18,18,18,18,18,18,18,
1033 18,18,18,18,18,18,18,0,
1034 18,18,18,18,18,18,18,18
1035 };
1036
1037
1038
1039
1040 #ifndef HAVE_STRERROR
1041 /*************************************************
1042 * Provide strerror() for non-ANSI libraries *
1043 *************************************************/
1044
1045 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046 in their libraries, but can provide the same facility by this simple
1047 alternative function. */
1048
1049 extern int sys_nerr;
1050 extern char *sys_errlist[];
1051
1052 char *
1053 strerror(int n)
1054 {
1055 if (n < 0 || n >= sys_nerr) return "unknown error number";
1056 return sys_errlist[n];
1057 }
1058 #endif /* HAVE_STRERROR */
1059
1060
1061 /*************************************************
1062 * JIT memory callback *
1063 *************************************************/
1064
1065 static pcre_jit_stack* jit_callback(void *arg)
1066 {
1067 return (pcre_jit_stack *)arg;
1068 }
1069
1070
1071 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 /*************************************************
1073 * Convert UTF-8 string to value *
1074 *************************************************/
1075
1076 /* This function takes one or more bytes that represents a UTF-8 character,
1077 and returns the value of the character.
1078
1079 Argument:
1080 utf8bytes a pointer to the byte vector
1081 vptr a pointer to an int to receive the value
1082
1083 Returns: > 0 => the number of bytes consumed
1084 -6 to 0 => malformed UTF-8 character at offset = (-return)
1085 */
1086
1087 static int
1088 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089 {
1090 int c = *utf8bytes++;
1091 int d = c;
1092 int i, j, s;
1093
1094 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095 {
1096 if ((d & 0x80) == 0) break;
1097 d <<= 1;
1098 }
1099
1100 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102
1103 /* i now has a value in the range 1-5 */
1104
1105 s = 6*i;
1106 d = (c & utf8_table3[i]) << s;
1107
1108 for (j = 0; j < i; j++)
1109 {
1110 c = *utf8bytes++;
1111 if ((c & 0xc0) != 0x80) return -(j+1);
1112 s -= 6;
1113 d |= (c & 0x3f) << s;
1114 }
1115
1116 /* Check that encoding was the correct unique one */
1117
1118 for (j = 0; j < utf8_table1_size; j++)
1119 if (d <= utf8_table1[j]) break;
1120 if (j != i) return -(i+1);
1121
1122 /* Valid value */
1123
1124 *vptr = d;
1125 return i+1;
1126 }
1127 #endif /* NOUTF || SUPPORT_PCRE16 */
1128
1129
1130
1131 #if !defined NOUTF || defined SUPPORT_PCRE16
1132 /*************************************************
1133 * Convert character value to UTF-8 *
1134 *************************************************/
1135
1136 /* This function takes an integer value in the range 0 - 0x7fffffff
1137 and encodes it as a UTF-8 character in 0 to 6 bytes.
1138
1139 Arguments:
1140 cvalue the character value
1141 utf8bytes pointer to buffer for result - at least 6 bytes long
1142
1143 Returns: number of characters placed in the buffer
1144 */
1145
1146 static int
1147 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148 {
1149 register int i, j;
1150 for (i = 0; i < utf8_table1_size; i++)
1151 if (cvalue <= utf8_table1[i]) break;
1152 utf8bytes += i;
1153 for (j = i; j > 0; j--)
1154 {
1155 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156 cvalue >>= 6;
1157 }
1158 *utf8bytes = utf8_table2[i] | cvalue;
1159 return i + 1;
1160 }
1161 #endif
1162
1163
1164 #ifdef SUPPORT_PCRE16
1165 /*************************************************
1166 * Convert a string to 16-bit *
1167 *************************************************/
1168
1169 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173 result is always left in buffer16.
1174
1175 Note that this function does not object to surrogate values. This is
1176 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177 for the purpose of testing that they are correctly faulted.
1178
1179 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 in UTF-8 so that values greater than 255 can be handled.
1181
1182 Arguments:
1183 data TRUE if converting a data line; FALSE for a regex
1184 p points to a byte string
1185 utf true if UTF-8 (to be converted to UTF-16)
1186 len number of bytes in the string (excluding trailing zero)
1187
1188 Returns: number of 16-bit data items used (excluding trailing zero)
1189 OR -1 if a UTF-8 string is malformed
1190 OR -2 if a value > 0x10ffff is encountered
1191 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 */
1193
1194 static int
1195 to16(int data, pcre_uint8 *p, int utf, int len)
1196 {
1197 pcre_uint16 *pp;
1198
1199 if (buffer16_size < 2*len + 2)
1200 {
1201 if (buffer16 != NULL) free(buffer16);
1202 buffer16_size = 2*len + 2;
1203 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204 if (buffer16 == NULL)
1205 {
1206 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207 exit(1);
1208 }
1209 }
1210
1211 pp = buffer16;
1212
1213 if (!utf && !data)
1214 {
1215 while (len-- > 0) *pp++ = *p++;
1216 }
1217
1218 else
1219 {
1220 int c = 0;
1221 while (len > 0)
1222 {
1223 int chlen = utf82ord(p, &c);
1224 if (chlen <= 0) return -1;
1225 if (c > 0x10ffff) return -2;
1226 p += chlen;
1227 len -= chlen;
1228 if (c < 0x10000) *pp++ = c; else
1229 {
1230 if (!utf) return -3;
1231 c -= 0x10000;
1232 *pp++ = 0xD800 | (c >> 10);
1233 *pp++ = 0xDC00 | (c & 0x3ff);
1234 }
1235 }
1236 }
1237
1238 *pp = 0;
1239 return pp - buffer16;
1240 }
1241 #endif
1242
1243
1244 /*************************************************
1245 * Read or extend an input line *
1246 *************************************************/
1247
1248 /* Input lines are read into buffer, but both patterns and data lines can be
1249 continued over multiple input lines. In addition, if the buffer fills up, we
1250 want to automatically expand it so as to be able to handle extremely large
1251 lines that are needed for certain stress tests. When the input buffer is
1252 expanded, the other two buffers must also be expanded likewise, and the
1253 contents of pbuffer, which are a copy of the input for callouts, must be
1254 preserved (for when expansion happens for a data line). This is not the most
1255 optimal way of handling this, but hey, this is just a test program!
1256
1257 Arguments:
1258 f the file to read
1259 start where in buffer to start (this *must* be within buffer)
1260 prompt for stdin or readline()
1261
1262 Returns: pointer to the start of new data
1263 could be a copy of start, or could be moved
1264 NULL if no data read and EOF reached
1265 */
1266
1267 static pcre_uint8 *
1268 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 {
1270 pcre_uint8 *here = start;
1271
1272 for (;;)
1273 {
1274 int rlen = (int)(buffer_size - (here - buffer));
1275
1276 if (rlen > 1000)
1277 {
1278 int dlen;
1279
1280 /* If libreadline support is required, use readline() to read a line if the
1281 input is a terminal. Note that readline() removes the trailing newline, so
1282 we must put it back again, to be compatible with fgets(). */
1283
1284 #ifdef SUPPORT_LIBREADLINE
1285 if (isatty(fileno(f)))
1286 {
1287 size_t len;
1288 char *s = readline(prompt);
1289 if (s == NULL) return (here == start)? NULL : start;
1290 len = strlen(s);
1291 if (len > 0) add_history(s);
1292 if (len > rlen - 1) len = rlen - 1;
1293 memcpy(here, s, len);
1294 here[len] = '\n';
1295 here[len+1] = 0;
1296 free(s);
1297 }
1298 else
1299 #endif
1300
1301 /* Read the next line by normal means, prompting if the file is stdin. */
1302
1303 {
1304 if (f == stdin) printf("%s", prompt);
1305 if (fgets((char *)here, rlen, f) == NULL)
1306 return (here == start)? NULL : start;
1307 }
1308
1309 dlen = (int)strlen((char *)here);
1310 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311 here += dlen;
1312 }
1313
1314 else
1315 {
1316 int new_buffer_size = 2*buffer_size;
1317 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320
1321 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322 {
1323 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324 exit(1);
1325 }
1326
1327 memcpy(new_buffer, buffer, buffer_size);
1328 memcpy(new_pbuffer, pbuffer, buffer_size);
1329
1330 buffer_size = new_buffer_size;
1331
1332 start = new_buffer + (start - buffer);
1333 here = new_buffer + (here - buffer);
1334
1335 free(buffer);
1336 free(dbuffer);
1337 free(pbuffer);
1338
1339 buffer = new_buffer;
1340 dbuffer = new_dbuffer;
1341 pbuffer = new_pbuffer;
1342 }
1343 }
1344
1345 return NULL; /* Control never gets here */
1346 }
1347
1348
1349
1350 /*************************************************
1351 * Read number from string *
1352 *************************************************/
1353
1354 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355 around with conditional compilation, just do the job by hand. It is only used
1356 for unpicking arguments, so just keep it simple.
1357
1358 Arguments:
1359 str string to be converted
1360 endptr where to put the end pointer
1361
1362 Returns: the unsigned long
1363 */
1364
1365 static int
1366 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 {
1368 int result = 0;
1369 while(*str != 0 && isspace(*str)) str++;
1370 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371 *endptr = str;
1372 return(result);
1373 }
1374
1375
1376
1377 /*************************************************
1378 * Print one character *
1379 *************************************************/
1380
1381 /* Print a single character either literally, or as a hex escape. */
1382
1383 static int pchar(int c, FILE *f)
1384 {
1385 if (PRINTOK(c))
1386 {
1387 if (f != NULL) fprintf(f, "%c", c);
1388 return 1;
1389 }
1390
1391 if (c < 0x100)
1392 {
1393 if (use_utf)
1394 {
1395 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396 return 6;
1397 }
1398 else
1399 {
1400 if (f != NULL) fprintf(f, "\\x%02x", c);
1401 return 4;
1402 }
1403 }
1404
1405 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406 return (c <= 0x000000ff)? 6 :
1407 (c <= 0x00000fff)? 7 :
1408 (c <= 0x0000ffff)? 8 :
1409 (c <= 0x000fffff)? 9 : 10;
1410 }
1411
1412
1413
1414 #ifdef SUPPORT_PCRE8
1415 /*************************************************
1416 * Print 8-bit character string *
1417 *************************************************/
1418
1419 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420 If handed a NULL file, just counts chars without printing. */
1421
1422 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423 {
1424 int c = 0;
1425 int yield = 0;
1426
1427 if (length < 0)
1428 length = strlen((char *)p);
1429
1430 while (length-- > 0)
1431 {
1432 #if !defined NOUTF
1433 if (use_utf)
1434 {
1435 int rc = utf82ord(p, &c);
1436 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437 {
1438 length -= rc - 1;
1439 p += rc;
1440 yield += pchar(c, f);
1441 continue;
1442 }
1443 }
1444 #endif
1445 c = *p++;
1446 yield += pchar(c, f);
1447 }
1448
1449 return yield;
1450 }
1451 #endif
1452
1453
1454
1455 #ifdef SUPPORT_PCRE16
1456 /*************************************************
1457 * Find length of 0-terminated 16-bit string *
1458 *************************************************/
1459
1460 static int strlen16(PCRE_SPTR16 p)
1461 {
1462 int len = 0;
1463 while (*p++ != 0) len++;
1464 return len;
1465 }
1466 #endif /* SUPPORT_PCRE16 */
1467
1468
1469 #ifdef SUPPORT_PCRE16
1470 /*************************************************
1471 * Print 16-bit character string *
1472 *************************************************/
1473
1474 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475 If handed a NULL file, just counts chars without printing. */
1476
1477 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 {
1479 int yield = 0;
1480
1481 if (length < 0)
1482 length = strlen16(p);
1483
1484 while (length-- > 0)
1485 {
1486 int c = *p++ & 0xffff;
1487 #if !defined NOUTF
1488 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 {
1490 int d = *p & 0xffff;
1491 if (d >= 0xDC00 && d < 0xDFFF)
1492 {
1493 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494 length--;
1495 p++;
1496 }
1497 }
1498 #endif
1499 yield += pchar(c, f);
1500 }
1501
1502 return yield;
1503 }
1504 #endif /* SUPPORT_PCRE16 */
1505
1506
1507
1508 #ifdef SUPPORT_PCRE8
1509 /*************************************************
1510 * Read a capture name (8-bit) and check it *
1511 *************************************************/
1512
1513 static pcre_uint8 *
1514 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515 {
1516 pcre_uint8 *npp = *pp;
1517 while (isalnum(*p)) *npp++ = *p++;
1518 *npp++ = 0;
1519 *npp = 0;
1520 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521 {
1522 fprintf(outfile, "no parentheses with name \"");
1523 PCHARSV(*pp, 0, -1, outfile);
1524 fprintf(outfile, "\"\n");
1525 }
1526
1527 *pp = npp;
1528 return p;
1529 }
1530 #endif /* SUPPORT_PCRE8 */
1531
1532
1533
1534 #ifdef SUPPORT_PCRE16
1535 /*************************************************
1536 * Read a capture name (16-bit) and check it *
1537 *************************************************/
1538
1539 /* Note that the text being read is 8-bit. */
1540
1541 static pcre_uint8 *
1542 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543 {
1544 pcre_uint16 *npp = *pp;
1545 while (isalnum(*p)) *npp++ = *p++;
1546 *npp++ = 0;
1547 *npp = 0;
1548 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 {
1550 fprintf(outfile, "no parentheses with name \"");
1551 PCHARSV(*pp, 0, -1, outfile);
1552 fprintf(outfile, "\"\n");
1553 }
1554 *pp = npp;
1555 return p;
1556 }
1557 #endif /* SUPPORT_PCRE16 */
1558
1559
1560
1561 /*************************************************
1562 * Callout function *
1563 *************************************************/
1564
1565 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566 the match. Yield zero unless more callouts than the fail count, or the callout
1567 data is not zero. */
1568
1569 static int callout(pcre_callout_block *cb)
1570 {
1571 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 int i, pre_start, post_start, subject_length;
1573
1574 if (callout_extra)
1575 {
1576 fprintf(f, "Callout %d: last capture = %d\n",
1577 cb->callout_number, cb->capture_last);
1578
1579 for (i = 0; i < cb->capture_top * 2; i += 2)
1580 {
1581 if (cb->offset_vector[i] < 0)
1582 fprintf(f, "%2d: <unset>\n", i/2);
1583 else
1584 {
1585 fprintf(f, "%2d: ", i/2);
1586 PCHARSV(cb->subject, cb->offset_vector[i],
1587 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588 fprintf(f, "\n");
1589 }
1590 }
1591 }
1592
1593 /* Re-print the subject in canonical form, the first time or if giving full
1594 datails. On subsequent calls in the same match, we use pchars just to find the
1595 printed lengths of the substrings. */
1596
1597 if (f != NULL) fprintf(f, "--->");
1598
1599 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600 PCHARS(post_start, cb->subject, cb->start_match,
1601 cb->current_position - cb->start_match, f);
1602
1603 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604
1605 PCHARSV(cb->subject, cb->current_position,
1606 cb->subject_length - cb->current_position, f);
1607
1608 if (f != NULL) fprintf(f, "\n");
1609
1610 /* Always print appropriate indicators, with callout number if not already
1611 shown. For automatic callouts, show the pattern offset. */
1612
1613 if (cb->callout_number == 255)
1614 {
1615 fprintf(outfile, "%+3d ", cb->pattern_position);
1616 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617 }
1618 else
1619 {
1620 if (callout_extra) fprintf(outfile, " ");
1621 else fprintf(outfile, "%3d ", cb->callout_number);
1622 }
1623
1624 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625 fprintf(outfile, "^");
1626
1627 if (post_start > 0)
1628 {
1629 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630 fprintf(outfile, "^");
1631 }
1632
1633 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634 fprintf(outfile, " ");
1635
1636 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637 pbuffer + cb->pattern_position);
1638
1639 fprintf(outfile, "\n");
1640 first_callout = 0;
1641
1642 if (cb->mark != last_callout_mark)
1643 {
1644 if (cb->mark == NULL)
1645 fprintf(outfile, "Latest Mark: <unset>\n");
1646 else
1647 {
1648 fprintf(outfile, "Latest Mark: ");
1649 PCHARSV(cb->mark, 0, -1, outfile);
1650 putc('\n', outfile);
1651 }
1652 last_callout_mark = cb->mark;
1653 }
1654
1655 if (cb->callout_data != NULL)
1656 {
1657 int callout_data = *((int *)(cb->callout_data));
1658 if (callout_data != 0)
1659 {
1660 fprintf(outfile, "Callout data = %d\n", callout_data);
1661 return callout_data;
1662 }
1663 }
1664
1665 return (cb->callout_number != callout_fail_id)? 0 :
1666 (++callout_count >= callout_fail_count)? 1 : 0;
1667 }
1668
1669
1670 /*************************************************
1671 * Local malloc functions *
1672 *************************************************/
1673
1674 /* Alternative malloc function, to test functionality and save the size of a
1675 compiled re, which is the first store request that pcre_compile() makes. The
1676 show_malloc variable is set only during matching. */
1677
1678 static void *new_malloc(size_t size)
1679 {
1680 void *block = malloc(size);
1681 gotten_store = size;
1682 if (first_gotten_store == 0) first_gotten_store = size;
1683 if (show_malloc)
1684 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 return block;
1686 }
1687
1688 static void new_free(void *block)
1689 {
1690 if (show_malloc)
1691 fprintf(outfile, "free %p\n", block);
1692 free(block);
1693 }
1694
1695 /* For recursion malloc/free, to test stacking calls */
1696
1697 static void *stack_malloc(size_t size)
1698 {
1699 void *block = malloc(size);
1700 if (show_malloc)
1701 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 return block;
1703 }
1704
1705 static void stack_free(void *block)
1706 {
1707 if (show_malloc)
1708 fprintf(outfile, "stack_free %p\n", block);
1709 free(block);
1710 }
1711
1712
1713 /*************************************************
1714 * Call pcre_fullinfo() *
1715 *************************************************/
1716
1717 /* Get one piece of information from the pcre_fullinfo() function. When only
1718 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719 value, but the code is defensive.
1720
1721 Arguments:
1722 re compiled regex
1723 study study data
1724 option PCRE_INFO_xxx option
1725 ptr where to put the data
1726
1727 Returns: 0 when OK, < 0 on error
1728 */
1729
1730 static int
1731 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 {
1733 int rc;
1734
1735 if (use_pcre16)
1736 #ifdef SUPPORT_PCRE16
1737 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 #else
1739 rc = PCRE_ERROR_BADMODE;
1740 #endif
1741 else
1742 #ifdef SUPPORT_PCRE8
1743 rc = pcre_fullinfo(re, study, option, ptr);
1744 #else
1745 rc = PCRE_ERROR_BADMODE;
1746 #endif
1747
1748 if (rc < 0)
1749 {
1750 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751 use_pcre16? "16" : "", option);
1752 if (rc == PCRE_ERROR_BADMODE)
1753 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755 }
1756
1757 return rc;
1758 }
1759
1760
1761
1762 /*************************************************
1763 * Swap byte functions *
1764 *************************************************/
1765
1766 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767 value, respectively.
1768
1769 Arguments:
1770 value any number
1771
1772 Returns: the byte swapped value
1773 */
1774
1775 static pcre_uint32
1776 swap_uint32(pcre_uint32 value)
1777 {
1778 return ((value & 0x000000ff) << 24) |
1779 ((value & 0x0000ff00) << 8) |
1780 ((value & 0x00ff0000) >> 8) |
1781 (value >> 24);
1782 }
1783
1784 static pcre_uint16
1785 swap_uint16(pcre_uint16 value)
1786 {
1787 return (value >> 8) | (value << 8);
1788 }
1789
1790
1791
1792 /*************************************************
1793 * Flip bytes in a compiled pattern *
1794 *************************************************/
1795
1796 /* This function is called if the 'F' option was present on a pattern that is
1797 to be written to a file. We flip the bytes of all the integer fields in the
1798 regex data block and the study block. In 16-bit mode this also flips relevant
1799 bytes in the pattern itself. This is to make it possible to test PCRE's
1800 ability to reload byte-flipped patterns, e.g. those compiled on a different
1801 architecture. */
1802
1803 static void
1804 regexflip(pcre *ere, pcre_extra *extra)
1805 {
1806 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 #ifdef SUPPORT_PCRE16
1808 int op;
1809 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810 int length = re->name_count * re->name_entry_size;
1811 #ifdef SUPPORT_UTF
1812 BOOL utf = (re->options & PCRE_UTF16) != 0;
1813 BOOL utf16_char = FALSE;
1814 #endif /* SUPPORT_UTF */
1815 #endif /* SUPPORT_PCRE16 */
1816
1817 /* Always flip the bytes in the main data block and study blocks. */
1818
1819 re->magic_number = REVERSED_MAGIC_NUMBER;
1820 re->size = swap_uint32(re->size);
1821 re->options = swap_uint32(re->options);
1822 re->flags = swap_uint16(re->flags);
1823 re->top_bracket = swap_uint16(re->top_bracket);
1824 re->top_backref = swap_uint16(re->top_backref);
1825 re->first_char = swap_uint16(re->first_char);
1826 re->req_char = swap_uint16(re->req_char);
1827 re->name_table_offset = swap_uint16(re->name_table_offset);
1828 re->name_entry_size = swap_uint16(re->name_entry_size);
1829 re->name_count = swap_uint16(re->name_count);
1830
1831 if (extra != NULL)
1832 {
1833 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834 rsd->size = swap_uint32(rsd->size);
1835 rsd->flags = swap_uint32(rsd->flags);
1836 rsd->minlength = swap_uint32(rsd->minlength);
1837 }
1838
1839 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840 in the name table, if present, and then in the pattern itself. */
1841
1842 #ifdef SUPPORT_PCRE16
1843 if (!use_pcre16) return;
1844
1845 while(TRUE)
1846 {
1847 /* Swap previous characters. */
1848 while (length-- > 0)
1849 {
1850 *ptr = swap_uint16(*ptr);
1851 ptr++;
1852 }
1853 #ifdef SUPPORT_UTF
1854 if (utf16_char)
1855 {
1856 if ((ptr[-1] & 0xfc00) == 0xd800)
1857 {
1858 /* We know that there is only one extra character in UTF-16. */
1859 *ptr = swap_uint16(*ptr);
1860 ptr++;
1861 }
1862 }
1863 utf16_char = FALSE;
1864 #endif /* SUPPORT_UTF */
1865
1866 /* Get next opcode. */
1867
1868 length = 0;
1869 op = *ptr;
1870 *ptr++ = swap_uint16(op);
1871
1872 switch (op)
1873 {
1874 case OP_END:
1875 return;
1876
1877 #ifdef SUPPORT_UTF
1878 case OP_CHAR:
1879 case OP_CHARI:
1880 case OP_NOT:
1881 case OP_NOTI:
1882 case OP_STAR:
1883 case OP_MINSTAR:
1884 case OP_PLUS:
1885 case OP_MINPLUS:
1886 case OP_QUERY:
1887 case OP_MINQUERY:
1888 case OP_UPTO:
1889 case OP_MINUPTO:
1890 case OP_EXACT:
1891 case OP_POSSTAR:
1892 case OP_POSPLUS:
1893 case OP_POSQUERY:
1894 case OP_POSUPTO:
1895 case OP_STARI:
1896 case OP_MINSTARI:
1897 case OP_PLUSI:
1898 case OP_MINPLUSI:
1899 case OP_QUERYI:
1900 case OP_MINQUERYI:
1901 case OP_UPTOI:
1902 case OP_MINUPTOI:
1903 case OP_EXACTI:
1904 case OP_POSSTARI:
1905 case OP_POSPLUSI:
1906 case OP_POSQUERYI:
1907 case OP_POSUPTOI:
1908 case OP_NOTSTAR:
1909 case OP_NOTMINSTAR:
1910 case OP_NOTPLUS:
1911 case OP_NOTMINPLUS:
1912 case OP_NOTQUERY:
1913 case OP_NOTMINQUERY:
1914 case OP_NOTUPTO:
1915 case OP_NOTMINUPTO:
1916 case OP_NOTEXACT:
1917 case OP_NOTPOSSTAR:
1918 case OP_NOTPOSPLUS:
1919 case OP_NOTPOSQUERY:
1920 case OP_NOTPOSUPTO:
1921 case OP_NOTSTARI:
1922 case OP_NOTMINSTARI:
1923 case OP_NOTPLUSI:
1924 case OP_NOTMINPLUSI:
1925 case OP_NOTQUERYI:
1926 case OP_NOTMINQUERYI:
1927 case OP_NOTUPTOI:
1928 case OP_NOTMINUPTOI:
1929 case OP_NOTEXACTI:
1930 case OP_NOTPOSSTARI:
1931 case OP_NOTPOSPLUSI:
1932 case OP_NOTPOSQUERYI:
1933 case OP_NOTPOSUPTOI:
1934 if (utf) utf16_char = TRUE;
1935 #endif
1936 /* Fall through. */
1937
1938 default:
1939 length = OP_lengths16[op] - 1;
1940 break;
1941
1942 case OP_CLASS:
1943 case OP_NCLASS:
1944 /* Skip the character bit map. */
1945 ptr += 32/sizeof(pcre_uint16);
1946 length = 0;
1947 break;
1948
1949 case OP_XCLASS:
1950 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951 if (LINK_SIZE > 1)
1952 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953 - (1 + LINK_SIZE + 1));
1954 else
1955 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956
1957 /* Reverse the size of the XCLASS instance. */
1958 *ptr = swap_uint16(*ptr);
1959 ptr++;
1960 if (LINK_SIZE > 1)
1961 {
1962 *ptr = swap_uint16(*ptr);
1963 ptr++;
1964 }
1965
1966 op = *ptr;
1967 *ptr = swap_uint16(op);
1968 ptr++;
1969 if ((op & XCL_MAP) != 0)
1970 {
1971 /* Skip the character bit map. */
1972 ptr += 32/sizeof(pcre_uint16);
1973 length -= 32/sizeof(pcre_uint16);
1974 }
1975 break;
1976 }
1977 }
1978 /* Control should never reach here in 16 bit mode. */
1979 #endif /* SUPPORT_PCRE16 */
1980 }
1981
1982
1983
1984 /*************************************************
1985 * Check match or recursion limit *
1986 *************************************************/
1987
1988 static int
1989 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991 int flag, unsigned long int *limit, int errnumber, const char *msg)
1992 {
1993 int count;
1994 int min = 0;
1995 int mid = 64;
1996 int max = -1;
1997
1998 extra->flags |= flag;
1999
2000 for (;;)
2001 {
2002 *limit = mid;
2003
2004 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 use_offsets, use_size_offsets);
2006
2007 if (count == errnumber)
2008 {
2009 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010 min = mid;
2011 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012 }
2013
2014 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015 count == PCRE_ERROR_PARTIAL)
2016 {
2017 if (mid == min + 1)
2018 {
2019 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020 break;
2021 }
2022 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023 max = mid;
2024 mid = (min + mid)/2;
2025 }
2026 else break; /* Some other error */
2027 }
2028
2029 extra->flags &= ~flag;
2030 return count;
2031 }
2032
2033
2034
2035 /*************************************************
2036 * Case-independent strncmp() function *
2037 *************************************************/
2038
2039 /*
2040 Arguments:
2041 s first string
2042 t second string
2043 n number of characters to compare
2044
2045 Returns: < 0, = 0, or > 0, according to the comparison
2046 */
2047
2048 static int
2049 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 {
2051 while (n--)
2052 {
2053 int c = tolower(*s++) - tolower(*t++);
2054 if (c) return c;
2055 }
2056 return 0;
2057 }
2058
2059
2060
2061 /*************************************************
2062 * Check newline indicator *
2063 *************************************************/
2064
2065 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066 a message and return 0 if there is no match.
2067
2068 Arguments:
2069 p points after the leading '<'
2070 f file for error message
2071
2072 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073 */
2074
2075 static int
2076 check_newline(pcre_uint8 *p, FILE *f)
2077 {
2078 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 fprintf(f, "Unknown newline type at: <%s\n", p);
2086 return 0;
2087 }
2088
2089
2090
2091 /*************************************************
2092 * Usage function *
2093 *************************************************/
2094
2095 static void
2096 usage(void)
2097 {
2098 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099 printf("Input and output default to stdin and stdout.\n");
2100 #ifdef SUPPORT_LIBREADLINE
2101 printf("If input is a terminal, readline() is used to read from it.\n");
2102 #else
2103 printf("This version of pcretest is not linked with readline().\n");
2104 #endif
2105 printf("\nOptions:\n");
2106 #ifdef SUPPORT_PCRE16
2107 printf(" -16 use the 16-bit library\n");
2108 #endif
2109 printf(" -b show compiled code\n");
2110 printf(" -C show PCRE compile-time options and exit\n");
2111 printf(" -C arg show a specific compile-time option\n");
2112 printf(" and exit with its value. The arg can be:\n");
2113 printf(" linksize internal link size [2, 3, 4]\n");
2114 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117 printf(" ucp Unicode Properties supported [0, 1]\n");
2118 printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121 #if !defined NODFA
2122 printf(" -dfa force DFA matching for all subjects\n");
2123 #endif
2124 printf(" -help show usage information\n");
2125 printf(" -i show information about compiled patterns\n"
2126 " -M find MATCH_LIMIT minimum for each subject\n"
2127 " -m output memory used information\n"
2128 " -o <n> set size of offsets vector to <n>\n");
2129 #if !defined NOPOSIX
2130 printf(" -p use POSIX interface\n");
2131 #endif
2132 printf(" -q quiet: do not output PCRE version number at start\n");
2133 printf(" -S <n> set stack size to <n> megabytes\n");
2134 printf(" -s force each pattern to be studied at basic level\n"
2135 " -s+ force each pattern to be studied, using JIT if available\n"
2136 " -t time compilation and execution\n");
2137 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138 printf(" -tm time execution (matching) only\n");
2139 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140 }
2141
2142
2143
2144 /*************************************************
2145 * Main Program *
2146 *************************************************/
2147
2148 /* Read lines from named file or stdin and write to named file or stdout; lines
2149 consist of a regular expression, in delimiters and optionally followed by
2150 options, followed by a set of test data, terminated by an empty line. */
2151
2152 int main(int argc, char **argv)
2153 {
2154 FILE *infile = stdin;
2155 const char *version;
2156 int options = 0;
2157 int study_options = 0;
2158 int default_find_match_limit = FALSE;
2159 int op = 1;
2160 int timeit = 0;
2161 int timeitm = 0;
2162 int showinfo = 0;
2163 int showstore = 0;
2164 int force_study = -1;
2165 int force_study_options = 0;
2166 int quiet = 0;
2167 int size_offsets = 45;
2168 int size_offsets_max;
2169 int *offsets = NULL;
2170 #if !defined NOPOSIX
2171 int posix = 0;
2172 #endif
2173 int debug = 0;
2174 int done = 0;
2175 int all_use_dfa = 0;
2176 int yield = 0;
2177 int stack_size;
2178
2179 pcre_jit_stack *jit_stack = NULL;
2180
2181 /* These vectors store, end-to-end, a list of zero-terminated captured
2182 substring names, each list itself being terminated by an empty name. Assume
2183 that 1024 is plenty long enough for the few names we'll be testing. It is
2184 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185 for the actual memory, to ensure alignment. */
2186
2187 pcre_uint16 copynames[1024];
2188 pcre_uint16 getnames[1024];
2189
2190 #ifdef SUPPORT_PCRE16
2191 pcre_uint16 *cn16ptr;
2192 pcre_uint16 *gn16ptr;
2193 #endif
2194
2195 #ifdef SUPPORT_PCRE8
2196 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198 pcre_uint8 *cn8ptr;
2199 pcre_uint8 *gn8ptr;
2200 #endif
2201
2202 /* Get buffers from malloc() so that valgrind will check their misuse when
2203 debugging. They grow automatically when very long lines are read. The 16-bit
2204 buffer (buffer16) is obtained only if needed. */
2205
2206 buffer = (pcre_uint8 *)malloc(buffer_size);
2207 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209
2210 /* The outfile variable is static so that new_malloc can use it. */
2211
2212 outfile = stdout;
2213
2214 /* The following _setmode() stuff is some Windows magic that tells its runtime
2215 library to translate CRLF into a single LF character. At least, that's what
2216 I've been told: never having used Windows I take this all on trust. Originally
2217 it set 0x8000, but then I was advised that _O_BINARY was better. */
2218
2219 #if defined(_WIN32) || defined(WIN32)
2220 _setmode( _fileno( stdout ), _O_BINARY );
2221 #endif
2222
2223 /* Get the version number: both pcre_version() and pcre16_version() give the
2224 same answer. We just need to ensure that we call one that is available. */
2225
2226 #ifdef SUPPORT_PCRE8
2227 version = pcre_version();
2228 #else
2229 version = pcre16_version();
2230 #endif
2231
2232 /* Scan options */
2233
2234 while (argc > 1 && argv[op][0] == '-')
2235 {
2236 pcre_uint8 *endptr;
2237
2238 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240 else if (strcmp(argv[op], "-s+") == 0)
2241 {
2242 force_study = 1;
2243 force_study_options = PCRE_STUDY_JIT_COMPILE;
2244 }
2245 else if (strcmp(argv[op], "-16") == 0)
2246 {
2247 #ifdef SUPPORT_PCRE16
2248 use_pcre16 = 1;
2249 #else
2250 printf("** This version of PCRE was built without 16-bit support\n");
2251 exit(1);
2252 #endif
2253 }
2254 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2257 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2258 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259 #if !defined NODFA
2260 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261 #endif
2262 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264 *endptr == 0))
2265 {
2266 op++;
2267 argc--;
2268 }
2269 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2270 {
2271 int both = argv[op][2] == 0;
2272 int temp;
2273 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274 *endptr == 0))
2275 {
2276 timeitm = temp;
2277 op++;
2278 argc--;
2279 }
2280 else timeitm = LOOPREPEAT;
2281 if (both) timeit = timeitm;
2282 }
2283 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285 *endptr == 0))
2286 {
2287 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288 printf("PCRE: -S not supported on this OS\n");
2289 exit(1);
2290 #else
2291 int rc;
2292 struct rlimit rlim;
2293 getrlimit(RLIMIT_STACK, &rlim);
2294 rlim.rlim_cur = stack_size * 1024 * 1024;
2295 rc = setrlimit(RLIMIT_STACK, &rlim);
2296 if (rc != 0)
2297 {
2298 printf("PCRE: setrlimit() failed with error %d\n", rc);
2299 exit(1);
2300 }
2301 op++;
2302 argc--;
2303 #endif
2304 }
2305 #if !defined NOPOSIX
2306 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2307 #endif
2308 else if (strcmp(argv[op], "-C") == 0)
2309 {
2310 int rc;
2311 unsigned long int lrc;
2312
2313 if (argc > 2)
2314 {
2315 if (strcmp(argv[op + 1], "linksize") == 0)
2316 {
2317 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318 printf("%d\n", rc);
2319 yield = rc;
2320 goto EXIT;
2321 }
2322 if (strcmp(argv[op + 1], "pcre8") == 0)
2323 {
2324 #ifdef SUPPORT_PCRE8
2325 printf("1\n");
2326 yield = 1;
2327 #else
2328 printf("0\n");
2329 yield = 0;
2330 #endif
2331 goto EXIT;
2332 }
2333 if (strcmp(argv[op + 1], "pcre16") == 0)
2334 {
2335 #ifdef SUPPORT_PCRE16
2336 printf("1\n");
2337 yield = 1;
2338 #else
2339 printf("0\n");
2340 yield = 0;
2341 #endif
2342 goto EXIT;
2343 }
2344 if (strcmp(argv[op + 1], "utf") == 0)
2345 {
2346 #ifdef SUPPORT_PCRE8
2347 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348 printf("%d\n", rc);
2349 yield = rc;
2350 #else
2351 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352 printf("%d\n", rc);
2353 yield = rc;
2354 #endif
2355 goto EXIT;
2356 }
2357 if (strcmp(argv[op + 1], "ucp") == 0)
2358 {
2359 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360 printf("%d\n", rc);
2361 yield = rc;
2362 goto EXIT;
2363 }
2364 if (strcmp(argv[op + 1], "jit") == 0)
2365 {
2366 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367 printf("%d\n", rc);
2368 yield = rc;
2369 goto EXIT;
2370 }
2371 if (strcmp(argv[op + 1], "newline") == 0)
2372 {
2373 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374 /* Note that these values are always the ASCII values, even
2375 in EBCDIC environments. CR is 13 and NL is 10. */
2376 printf("%s\n", (rc == 13)? "CR" :
2377 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378 (rc == -2)? "ANYCRLF" :
2379 (rc == -1)? "ANY" : "???");
2380 goto EXIT;
2381 }
2382 printf("Unknown -C option: %s\n", argv[op + 1]);
2383 goto EXIT;
2384 }
2385
2386 printf("PCRE version %s\n", version);
2387 printf("Compiled with\n");
2388
2389 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390 are set, either both UTFs are supported or both are not supported. */
2391
2392 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393 printf(" 8-bit and 16-bit support\n");
2394 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395 if (rc)
2396 printf(" UTF-8 and UTF-16 support\n");
2397 else
2398 printf(" No UTF-8 or UTF-16 support\n");
2399 #elif defined SUPPORT_PCRE8
2400 printf(" 8-bit support only\n");
2401 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402 printf(" %sUTF-8 support\n", rc? "" : "No ");
2403 #else
2404 printf(" 16-bit support only\n");
2405 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406 printf(" %sUTF-16 support\n", rc? "" : "No ");
2407 #endif
2408
2409 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410 printf(" %sUnicode properties support\n", rc? "" : "No ");
2411 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412 if (rc)
2413 {
2414 const char *arch;
2415 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416 printf(" Just-in-time compiler support: %s\n", arch);
2417 }
2418 else
2419 printf(" No just-in-time compiler support\n");
2420 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421 /* Note that these values are always the ASCII values, even
2422 in EBCDIC environments. CR is 13 and NL is 10. */
2423 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2424 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425 (rc == -2)? "ANYCRLF" :
2426 (rc == -1)? "ANY" : "???");
2427 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429 "all Unicode newlines");
2430 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431 printf(" Internal link size = %d\n", rc);
2432 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433 printf(" POSIX malloc threshold = %d\n", rc);
2434 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435 printf(" Default match limit = %ld\n", lrc);
2436 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437 printf(" Default recursion depth limit = %ld\n", lrc);
2438 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439 printf(" Match recursion uses %s: ", rc? "stack" : "heap");
2440 PCRE_EXEC(rc, NULL, NULL, NULL, -1, -1, 0, NULL, 0);
2441 printf("frame size = %d bytes\n", -rc);
2442 goto EXIT;
2443 }
2444 else if (strcmp(argv[op], "-help") == 0 ||
2445 strcmp(argv[op], "--help") == 0)
2446 {
2447 usage();
2448 goto EXIT;
2449 }
2450 else
2451 {
2452 printf("** Unknown or malformed option %s\n", argv[op]);
2453 usage();
2454 yield = 1;
2455 goto EXIT;
2456 }
2457 op++;
2458 argc--;
2459 }
2460
2461 /* Get the store for the offsets vector, and remember what it was */
2462
2463 size_offsets_max = size_offsets;
2464 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2465 if (offsets == NULL)
2466 {
2467 printf("** Failed to get %d bytes of memory for offsets vector\n",
2468 (int)(size_offsets_max * sizeof(int)));
2469 yield = 1;
2470 goto EXIT;
2471 }
2472
2473 /* Sort out the input and output files */
2474
2475 if (argc > 1)
2476 {
2477 infile = fopen(argv[op], INPUT_MODE);
2478 if (infile == NULL)
2479 {
2480 printf("** Failed to open %s\n", argv[op]);
2481 yield = 1;
2482 goto EXIT;
2483 }
2484 }
2485
2486 if (argc > 2)
2487 {
2488 outfile = fopen(argv[op+1], OUTPUT_MODE);
2489 if (outfile == NULL)
2490 {
2491 printf("** Failed to open %s\n", argv[op+1]);
2492 yield = 1;
2493 goto EXIT;
2494 }
2495 }
2496
2497 /* Set alternative malloc function */
2498
2499 #ifdef SUPPORT_PCRE8
2500 pcre_malloc = new_malloc;
2501 pcre_free = new_free;
2502 pcre_stack_malloc = stack_malloc;
2503 pcre_stack_free = stack_free;
2504 #endif
2505
2506 #ifdef SUPPORT_PCRE16
2507 pcre16_malloc = new_malloc;
2508 pcre16_free = new_free;
2509 pcre16_stack_malloc = stack_malloc;
2510 pcre16_stack_free = stack_free;
2511 #endif
2512
2513 /* Heading line unless quiet, then prompt for first regex if stdin */
2514
2515 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2516
2517 /* Main loop */
2518
2519 while (!done)
2520 {
2521 pcre *re = NULL;
2522 pcre_extra *extra = NULL;
2523
2524 #if !defined NOPOSIX /* There are still compilers that require no indent */
2525 regex_t preg;
2526 int do_posix = 0;
2527 #endif
2528
2529 const char *error;
2530 pcre_uint8 *markptr;
2531 pcre_uint8 *p, *pp, *ppp;
2532 pcre_uint8 *to_file = NULL;
2533 const pcre_uint8 *tables = NULL;
2534 unsigned long int get_options;
2535 unsigned long int true_size, true_study_size = 0;
2536 size_t size, regex_gotten_store;
2537 int do_allcaps = 0;
2538 int do_mark = 0;
2539 int do_study = 0;
2540 int no_force_study = 0;
2541 int do_debug = debug;
2542 int do_G = 0;
2543 int do_g = 0;
2544 int do_showinfo = showinfo;
2545 int do_showrest = 0;
2546 int do_showcaprest = 0;
2547 int do_flip = 0;
2548 int erroroffset, len, delimiter, poffset;
2549
2550 use_utf = 0;
2551 debug_lengths = 1;
2552
2553 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2554 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2555 fflush(outfile);
2556
2557 p = buffer;
2558 while (isspace(*p)) p++;
2559 if (*p == 0) continue;
2560
2561 /* See if the pattern is to be loaded pre-compiled from a file. */
2562
2563 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2564 {
2565 pcre_uint32 magic;
2566 pcre_uint8 sbuf[8];
2567 FILE *f;
2568
2569 p++;
2570 if (*p == '!')
2571 {
2572 do_debug = TRUE;
2573 do_showinfo = TRUE;
2574 p++;
2575 }
2576
2577 pp = p + (int)strlen((char *)p);
2578 while (isspace(pp[-1])) pp--;
2579 *pp = 0;
2580
2581 f = fopen((char *)p, "rb");
2582 if (f == NULL)
2583 {
2584 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2585 continue;
2586 }
2587
2588 first_gotten_store = 0;
2589 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2590
2591 true_size =
2592 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2593 true_study_size =
2594 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2595
2596 re = (pcre *)new_malloc(true_size);
2597 regex_gotten_store = first_gotten_store;
2598
2599 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2600
2601 magic = ((REAL_PCRE *)re)->magic_number;
2602 if (magic != MAGIC_NUMBER)
2603 {
2604 if (swap_uint32(magic) == MAGIC_NUMBER)
2605 {
2606 do_flip = 1;
2607 }
2608 else
2609 {
2610 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2611 fclose(f);
2612 continue;
2613 }
2614 }
2615
2616 /* We hide the byte-invert info for little and big endian tests. */
2617 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2618 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2619
2620 /* Now see if there is any following study data. */
2621
2622 if (true_study_size != 0)
2623 {
2624 pcre_study_data *psd;
2625
2626 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2627 extra->flags = PCRE_EXTRA_STUDY_DATA;
2628
2629 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2630 extra->study_data = psd;
2631
2632 if (fread(psd, 1, true_study_size, f) != true_study_size)
2633 {
2634 FAIL_READ:
2635 fprintf(outfile, "Failed to read data from %s\n", p);
2636 if (extra != NULL)
2637 {
2638 PCRE_FREE_STUDY(extra);
2639 }
2640 if (re != NULL) new_free(re);
2641 fclose(f);
2642 continue;
2643 }
2644 fprintf(outfile, "Study data loaded from %s\n", p);
2645 do_study = 1; /* To get the data output if requested */
2646 }
2647 else fprintf(outfile, "No study data\n");
2648
2649 /* Flip the necessary bytes. */
2650 if (do_flip)
2651 {
2652 int rc;
2653 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2654 if (rc == PCRE_ERROR_BADMODE)
2655 {
2656 /* Simulate the result of the function call below. */
2657 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2658 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2659 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2660 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2661 continue;
2662 }
2663 }
2664
2665 /* Need to know if UTF-8 for printing data strings. */
2666
2667 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2668 use_utf = (get_options & PCRE_UTF8) != 0;
2669
2670 fclose(f);
2671 goto SHOW_INFO;
2672 }
2673
2674 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2675 the pattern; if it isn't complete, read more. */
2676
2677 delimiter = *p++;
2678
2679 if (isalnum(delimiter) || delimiter == '\\')
2680 {
2681 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2682 goto SKIP_DATA;
2683 }
2684
2685 pp = p;
2686 poffset = (int)(p - buffer);
2687
2688 for(;;)
2689 {
2690 while (*pp != 0)
2691 {
2692 if (*pp == '\\' && pp[1] != 0) pp++;
2693 else if (*pp == delimiter) break;
2694 pp++;
2695 }
2696 if (*pp != 0) break;
2697 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2698 {
2699 fprintf(outfile, "** Unexpected EOF\n");
2700 done = 1;
2701 goto CONTINUE;
2702 }
2703 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2704 }
2705
2706 /* The buffer may have moved while being extended; reset the start of data
2707 pointer to the correct relative point in the buffer. */
2708
2709 p = buffer + poffset;
2710
2711 /* If the first character after the delimiter is backslash, make
2712 the pattern end with backslash. This is purely to provide a way
2713 of testing for the error message when a pattern ends with backslash. */
2714
2715 if (pp[1] == '\\') *pp++ = '\\';
2716
2717 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2718 for callouts. */
2719
2720 *pp++ = 0;
2721 strcpy((char *)pbuffer, (char *)p);
2722
2723 /* Look for options after final delimiter */
2724
2725 options = 0;
2726 study_options = 0;
2727 log_store = showstore; /* default from command line */
2728
2729 while (*pp != 0)
2730 {
2731 switch (*pp++)
2732 {
2733 case 'f': options |= PCRE_FIRSTLINE; break;
2734 case 'g': do_g = 1; break;
2735 case 'i': options |= PCRE_CASELESS; break;
2736 case 'm': options |= PCRE_MULTILINE; break;
2737 case 's': options |= PCRE_DOTALL; break;
2738 case 'x': options |= PCRE_EXTENDED; break;
2739
2740 case '+':
2741 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2742 break;
2743
2744 case '=': do_allcaps = 1; break;
2745 case 'A': options |= PCRE_ANCHORED; break;
2746 case 'B': do_debug = 1; break;
2747 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2748 case 'D': do_debug = do_showinfo = 1; break;
2749 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2750 case 'F': do_flip = 1; break;
2751 case 'G': do_G = 1; break;
2752 case 'I': do_showinfo = 1; break;
2753 case 'J': options |= PCRE_DUPNAMES; break;
2754 case 'K': do_mark = 1; break;
2755 case 'M': log_store = 1; break;
2756 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2757
2758 #if !defined NOPOSIX
2759 case 'P': do_posix = 1; break;
2760 #endif
2761
2762 case 'S':
2763 if (do_study == 0)
2764 {
2765 do_study = 1;
2766 if (*pp == '+')
2767 {
2768 study_options |= PCRE_STUDY_JIT_COMPILE;
2769 pp++;
2770 }
2771 }
2772 else
2773 {
2774 do_study = 0;
2775 no_force_study = 1;
2776 }
2777 break;
2778
2779 case 'U': options |= PCRE_UNGREEDY; break;
2780 case 'W': options |= PCRE_UCP; break;
2781 case 'X': options |= PCRE_EXTRA; break;
2782 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2783 case 'Z': debug_lengths = 0; break;
2784 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2785 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2786
2787 case 'T':
2788 switch (*pp++)
2789 {
2790 case '0': tables = tables0; break;
2791 case '1': tables = tables1; break;
2792
2793 case '\r':
2794 case '\n':
2795 case ' ':
2796 case 0:
2797 fprintf(outfile, "** Missing table number after /T\n");
2798 goto SKIP_DATA;
2799
2800 default:
2801 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2802 goto SKIP_DATA;
2803 }
2804 break;
2805
2806 case 'L':
2807 ppp = pp;
2808 /* The '\r' test here is so that it works on Windows. */
2809 /* The '0' test is just in case this is an unterminated line. */
2810 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2811 *ppp = 0;
2812 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2813 {
2814 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2815 goto SKIP_DATA;
2816 }
2817 locale_set = 1;
2818 tables = PCRE_MAKETABLES;
2819 pp = ppp;
2820 break;
2821
2822 case '>':
2823 to_file = pp;
2824 while (*pp != 0) pp++;
2825 while (isspace(pp[-1])) pp--;
2826 *pp = 0;
2827 break;
2828
2829 case '<':
2830 {
2831 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2832 {
2833 options |= PCRE_JAVASCRIPT_COMPAT;
2834 pp += 3;
2835 }
2836 else
2837 {
2838 int x = check_newline(pp, outfile);
2839 if (x == 0) goto SKIP_DATA;
2840 options |= x;
2841 while (*pp++ != '>');
2842 }
2843 }
2844 break;
2845
2846 case '\r': /* So that it works in Windows */
2847 case '\n':
2848 case ' ':
2849 break;
2850
2851 default:
2852 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2853 goto SKIP_DATA;
2854 }
2855 }
2856
2857 /* Handle compiling via the POSIX interface, which doesn't support the
2858 timing, showing, or debugging options, nor the ability to pass over
2859 local character tables. Neither does it have 16-bit support. */
2860
2861 #if !defined NOPOSIX
2862 if (posix || do_posix)
2863 {
2864 int rc;
2865 int cflags = 0;
2866
2867 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2868 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2869 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2870 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2871 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2872 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2873 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2874
2875 first_gotten_store = 0;
2876 rc = regcomp(&preg, (char *)p, cflags);
2877
2878 /* Compilation failed; go back for another re, skipping to blank line
2879 if non-interactive. */
2880
2881 if (rc != 0)
2882 {
2883 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2884 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2885 goto SKIP_DATA;
2886 }
2887 }
2888
2889 /* Handle compiling via the native interface */
2890
2891 else
2892 #endif /* !defined NOPOSIX */
2893
2894 {
2895 /* In 16-bit mode, convert the input. */
2896
2897 #ifdef SUPPORT_PCRE16
2898 if (use_pcre16)
2899 {
2900 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2901 {
2902 case -1:
2903 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2904 "converted to UTF-16\n");
2905 goto SKIP_DATA;
2906
2907 case -2:
2908 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2909 "cannot be converted to UTF-16\n");
2910 goto SKIP_DATA;
2911
2912 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2913 fprintf(outfile, "**Failed: character value greater than 0xffff "
2914 "cannot be converted to 16-bit in non-UTF mode\n");
2915 goto SKIP_DATA;
2916
2917 default:
2918 break;
2919 }
2920 p = (pcre_uint8 *)buffer16;
2921 }
2922 #endif
2923
2924 /* Compile many times when timing */
2925
2926 if (timeit > 0)
2927 {
2928 register int i;
2929 clock_t time_taken;
2930 clock_t start_time = clock();
2931 for (i = 0; i < timeit; i++)
2932 {
2933 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2934 if (re != NULL) free(re);
2935 }
2936 time_taken = clock() - start_time;
2937 fprintf(outfile, "Compile time %.4f milliseconds\n",
2938 (((double)time_taken * 1000.0) / (double)timeit) /
2939 (double)CLOCKS_PER_SEC);
2940 }
2941
2942 first_gotten_store = 0;
2943 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2944
2945 /* Compilation failed; go back for another re, skipping to blank line
2946 if non-interactive. */
2947
2948 if (re == NULL)
2949 {
2950 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2951 SKIP_DATA:
2952 if (infile != stdin)
2953 {
2954 for (;;)
2955 {
2956 if (extend_inputline(infile, buffer, NULL) == NULL)
2957 {
2958 done = 1;
2959 goto CONTINUE;
2960 }
2961 len = (int)strlen((char *)buffer);
2962 while (len > 0 && isspace(buffer[len-1])) len--;
2963 if (len == 0) break;
2964 }
2965 fprintf(outfile, "\n");
2966 }
2967 goto CONTINUE;
2968 }
2969
2970 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2971 within the regex; check for this so that we know how to process the data
2972 lines. */
2973
2974 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2975 goto SKIP_DATA;
2976 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2977
2978 /* Extract the size for possible writing before possibly flipping it,
2979 and remember the store that was got. */
2980
2981 true_size = ((REAL_PCRE *)re)->size;
2982 regex_gotten_store = first_gotten_store;
2983
2984 /* Output code size information if requested */
2985
2986 if (log_store)
2987 fprintf(outfile, "Memory allocation (code space): %d\n",
2988 (int)(first_gotten_store -
2989 sizeof(REAL_PCRE) -
2990 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2991
2992 /* If -s or /S was present, study the regex to generate additional info to
2993 help with the matching, unless the pattern has the SS option, which
2994 suppresses the effect of /S (used for a few test patterns where studying is
2995 never sensible). */
2996
2997 if (do_study || (force_study >= 0 && !no_force_study))
2998 {
2999 if (timeit > 0)
3000 {
3001 register int i;
3002 clock_t time_taken;
3003 clock_t start_time = clock();
3004 for (i = 0; i < timeit; i++)
3005 {
3006 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3007 }
3008 time_taken = clock() - start_time;
3009 if (extra != NULL)
3010 {
3011 PCRE_FREE_STUDY(extra);
3012 }
3013 fprintf(outfile, " Study time %.4f milliseconds\n",
3014 (((double)time_taken * 1000.0) / (double)timeit) /
3015 (double)CLOCKS_PER_SEC);
3016 }
3017 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3018 if (error != NULL)
3019 fprintf(outfile, "Failed to study: %s\n", error);
3020 else if (extra != NULL)
3021 {
3022 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3023 if (log_store)
3024 {
3025 size_t jitsize;
3026 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3027 jitsize != 0)
3028 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3029 }
3030 }
3031 }
3032
3033 /* If /K was present, we set up for handling MARK data. */
3034
3035 if (do_mark)
3036 {
3037 if (extra == NULL)
3038 {
3039 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3040 extra->flags = 0;
3041 }
3042 extra->mark = &markptr;
3043 extra->flags |= PCRE_EXTRA_MARK;
3044 }
3045
3046 /* Extract and display information from the compiled data if required. */
3047
3048 SHOW_INFO:
3049
3050 if (do_debug)
3051 {
3052 fprintf(outfile, "------------------------------------------------------------------\n");
3053 PCRE_PRINTINT(re, outfile, debug_lengths);
3054 }
3055
3056 /* We already have the options in get_options (see above) */
3057
3058 if (do_showinfo)
3059 {
3060 unsigned long int all_options;
3061 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3062 hascrorlf;
3063 int nameentrysize, namecount;
3064 const pcre_uint8 *nametable;
3065
3066 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3067 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3068 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3069 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3070 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3071 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3072 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3073 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3074 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3075 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3076 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3077 != 0)
3078 goto SKIP_DATA;
3079
3080 if (size != regex_gotten_store) fprintf(outfile,
3081 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3082 (int)size, (int)regex_gotten_store);
3083
3084 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3085 if (backrefmax > 0)
3086 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3087
3088 if (namecount > 0)
3089 {
3090 fprintf(outfile, "Named capturing subpatterns:\n");
3091 while (namecount-- > 0)
3092 {
3093 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3094 int imm2_size = use_pcre16 ? 1 : 2;
3095 #else
3096 int imm2_size = IMM2_SIZE;
3097 #endif
3098 int length = (int)STRLEN(nametable + imm2_size);
3099 fprintf(outfile, " ");
3100 PCHARSV(nametable, imm2_size, length, outfile);
3101 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3102 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3103 fprintf(outfile, "%3d\n", use_pcre16?
3104 (int)(((PCRE_SPTR16)nametable)[0])
3105 :((int)nametable[0] << 8) | (int)nametable[1]);
3106 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3107 #else
3108 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3109 #ifdef SUPPORT_PCRE8
3110 nametable += nameentrysize;
3111 #else
3112 nametable += nameentrysize * 2;
3113 #endif
3114 #endif
3115 }
3116 }
3117
3118 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3119 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3120
3121 all_options = ((REAL_PCRE *)re)->options;
3122 if (do_flip) all_options = swap_uint32(all_options);
3123
3124 if (get_options == 0) fprintf(outfile, "No options\n");
3125 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3126 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3127 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3128 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3129 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3130 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3131 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3132 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3133 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3134 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3135 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3136 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3137 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3138 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3139 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3140 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3141 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3142 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3143
3144 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3145
3146 switch (get_options & PCRE_NEWLINE_BITS)
3147 {
3148 case PCRE_NEWLINE_CR:
3149 fprintf(outfile, "Forced newline sequence: CR\n");
3150 break;
3151
3152 case PCRE_NEWLINE_LF:
3153 fprintf(outfile, "Forced newline sequence: LF\n");
3154 break;
3155
3156 case PCRE_NEWLINE_CRLF:
3157 fprintf(outfile, "Forced newline sequence: CRLF\n");
3158 break;
3159
3160 case PCRE_NEWLINE_ANYCRLF:
3161 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3162 break;
3163
3164 case PCRE_NEWLINE_ANY:
3165 fprintf(outfile, "Forced newline sequence: ANY\n");
3166 break;
3167
3168 default:
3169 break;
3170 }
3171
3172 if (first_char == -1)
3173 {
3174 fprintf(outfile, "First char at start or follows newline\n");
3175 }
3176 else if (first_char < 0)
3177 {
3178 fprintf(outfile, "No first char\n");
3179 }
3180 else
3181 {
3182 const char *caseless =
3183 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3184 "" : " (caseless)";
3185
3186 if (PRINTOK(first_char))
3187 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3188 else
3189 {
3190 fprintf(outfile, "First char = ");
3191 pchar(first_char, outfile);
3192 fprintf(outfile, "%s\n", caseless);
3193 }
3194 }
3195
3196 if (need_char < 0)
3197 {
3198 fprintf(outfile, "No need char\n");
3199 }
3200 else
3201 {
3202 const char *caseless =
3203 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3204 "" : " (caseless)";
3205
3206 if (PRINTOK(need_char))
3207 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3208 else
3209 {
3210 fprintf(outfile, "Need char = ");
3211 pchar(need_char, outfile);
3212 fprintf(outfile, "%s\n", caseless);
3213 }
3214 }
3215
3216 /* Don't output study size; at present it is in any case a fixed
3217 value, but it varies, depending on the computer architecture, and
3218 so messes up the test suite. (And with the /F option, it might be
3219 flipped.) If study was forced by an external -s, don't show this
3220 information unless -i or -d was also present. This means that, except
3221 when auto-callouts are involved, the output from runs with and without
3222 -s should be identical. */
3223
3224 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3225 {
3226 if (extra == NULL)
3227 fprintf(outfile, "Study returned NULL\n");
3228 else
3229 {
3230 pcre_uint8 *start_bits = NULL;
3231 int minlength;
3232
3233 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3234 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3235
3236 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3237 {
3238 if (start_bits == NULL)
3239 fprintf(outfile, "No set of starting bytes\n");
3240 else
3241 {
3242 int i;
3243 int c = 24;
3244 fprintf(outfile, "Starting byte set: ");
3245 for (i = 0; i < 256; i++)
3246 {
3247 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3248 {
3249 if (c > 75)
3250 {
3251 fprintf(outfile, "\n ");
3252 c = 2;
3253 }
3254 if (PRINTOK(i) && i != ' ')
3255 {
3256 fprintf(outfile, "%c ", i);
3257 c += 2;
3258 }
3259 else
3260 {
3261 fprintf(outfile, "\\x%02x ", i);
3262 c += 5;
3263 }
3264 }
3265 }
3266 fprintf(outfile, "\n");
3267 }
3268 }
3269 }
3270
3271 /* Show this only if the JIT was set by /S, not by -s. */
3272
3273 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3274 {
3275 int jit;
3276 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3277 {
3278 if (jit)
3279 fprintf(outfile, "JIT study was successful\n");
3280 else
3281 #ifdef SUPPORT_JIT
3282 fprintf(outfile, "JIT study was not successful\n");
3283 #else
3284 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3285 #endif
3286 }
3287 }
3288 }
3289 }
3290
3291 /* If the '>' option was present, we write out the regex to a file, and
3292 that is all. The first 8 bytes of the file are the regex length and then
3293 the study length, in big-endian order. */
3294
3295 if (to_file != NULL)
3296 {
3297 FILE *f = fopen((char *)to_file, "wb");
3298 if (f == NULL)
3299 {
3300 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3301 }
3302 else
3303 {
3304 pcre_uint8 sbuf[8];
3305
3306 if (do_flip) regexflip(re, extra);
3307 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3308 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3309 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3310 sbuf[3] = (pcre_uint8)((true_size) & 255);
3311 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3312 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3313 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3314 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3315
3316 if (fwrite(sbuf, 1, 8, f) < 8 ||
3317 fwrite(re, 1, true_size, f) < true_size)
3318 {
3319 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3320 }
3321 else
3322 {
3323 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3324
3325 /* If there is study data, write it. */
3326
3327 if (extra != NULL)
3328 {
3329 if (fwrite(extra->study_data, 1, true_study_size, f) <
3330 true_study_size)
3331 {
3332 fprintf(outfile, "Write error on %s: %s\n", to_file,
3333 strerror(errno));
3334 }
3335 else fprintf(outfile, "Study data written to %s\n", to_file);
3336 }
3337 }
3338 fclose(f);
3339 }
3340
3341 new_free(re);
3342 if (extra != NULL)
3343 {
3344 PCRE_FREE_STUDY(extra);
3345 }
3346 if (locale_set)
3347 {
3348 new_free((void *)tables);
3349 setlocale(LC_CTYPE, "C");
3350 locale_set = 0;
3351 }
3352 continue; /* With next regex */
3353 }
3354 } /* End of non-POSIX compile */
3355
3356 /* Read data lines and test them */
3357
3358 for (;;)
3359 {
3360 pcre_uint8 *q;
3361 pcre_uint8 *bptr;
3362 int *use_offsets = offsets;
3363 int use_size_offsets = size_offsets;
3364 int callout_data = 0;
3365 int callout_data_set = 0;
3366 int count, c;
3367 int copystrings = 0;
3368 int find_match_limit = default_find_match_limit;
3369 int getstrings = 0;
3370 int getlist = 0;
3371 int gmatched = 0;
3372 int start_offset = 0;
3373 int start_offset_sign = 1;
3374 int g_notempty = 0;
3375 int use_dfa = 0;
3376
3377 *copynames = 0;
3378 *getnames = 0;
3379
3380 #ifdef SUPPORT_PCRE16
3381 cn16ptr = copynames;
3382 gn16ptr = getnames;
3383 #endif
3384 #ifdef SUPPORT_PCRE8
3385 cn8ptr = copynames8;
3386 gn8ptr = getnames8;
3387 #endif
3388
3389 SET_PCRE_CALLOUT(callout);
3390 first_callout = 1;
3391 last_callout_mark = NULL;
3392 callout_extra = 0;
3393 callout_count = 0;
3394 callout_fail_count = 999999;
3395 callout_fail_id = -1;
3396 show_malloc = 0;
3397 options = 0;
3398
3399 if (extra != NULL) extra->flags &=
3400 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3401
3402 len = 0;
3403 for (;;)
3404 {
3405 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3406 {
3407 if (len > 0) /* Reached EOF without hitting a newline */
3408 {
3409 fprintf(outfile, "\n");
3410 break;
3411 }
3412 done = 1;
3413 goto CONTINUE;
3414 }
3415 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3416 len = (int)strlen((char *)buffer);
3417 if (buffer[len-1] == '\n') break;
3418 }
3419
3420 while (len > 0 && isspace(buffer[len-1])) len--;
3421 buffer[len] = 0;
3422 if (len == 0) break;
3423
3424 p = buffer;
3425 while (isspace(*p)) p++;
3426
3427 bptr = q = dbuffer;
3428 while ((c = *p++) != 0)
3429 {
3430 int i = 0;
3431 int n = 0;
3432
3433 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3434 In non-UTF mode, allow the value of the byte to fall through to later,
3435 where values greater than 127 are turned into UTF-8 when running in
3436 16-bit mode. */
3437
3438 if (c != '\\')
3439 {
3440 if (use_utf)
3441 {
3442 *q++ = c;
3443 continue;
3444 }
3445 }
3446
3447 /* Handle backslash escapes */
3448
3449 else switch ((c = *p++))
3450 {
3451 case 'a': c = 7; break;
3452 case 'b': c = '\b'; break;
3453 case 'e': c = 27; break;
3454 case 'f': c = '\f'; break;
3455 case 'n': c = '\n'; break;
3456 case 'r': c = '\r'; break;
3457 case 't': c = '\t'; break;
3458 case 'v': c = '\v'; break;
3459
3460 case '0': case '1': case '2': case '3':
3461 case '4': case '5': case '6': case '7':
3462 c -= '0';
3463 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3464 c = c * 8 + *p++ - '0';
3465 break;
3466
3467 case 'x':
3468 if (*p == '{')
3469 {
3470 pcre_uint8 *pt = p;
3471 c = 0;
3472
3473 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3474 when isxdigit() is a macro that refers to its argument more than
3475 once. This is banned by the C Standard, but apparently happens in at
3476 least one MacOS environment. */
3477
3478 for (pt++; isxdigit(*pt); pt++)
3479 {
3480 if (++i == 9)
3481 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3482 "using only the first eight.\n");
3483 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3484 }
3485 if (*pt == '}')
3486 {
3487 p = pt + 1;
3488 break;
3489 }
3490 /* Not correct form for \x{...}; fall through */
3491 }
3492
3493 /* \x without {} always defines just one byte in 8-bit mode. This
3494 allows UTF-8 characters to be constructed byte by byte, and also allows
3495 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3496 Otherwise, pass it down to later code so that it can be turned into
3497 UTF-8 when running in 16-bit mode. */
3498
3499 c = 0;
3500 while (i++ < 2 && isxdigit(*p))
3501 {
3502 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3503 p++;
3504 }
3505 if (use_utf)
3506 {
3507 *q++ = c;
3508 continue;
3509 }
3510 break;
3511
3512 case 0: /* \ followed by EOF allows for an empty line */
3513 p--;
3514 continue;
3515
3516 case '>':
3517 if (*p == '-')
3518 {
3519 start_offset_sign = -1;
3520 p++;
3521 }
3522 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3523 start_offset *= start_offset_sign;
3524 continue;
3525
3526 case 'A': /* Option setting */
3527 options |= PCRE_ANCHORED;
3528 continue;
3529
3530 case 'B':
3531 options |= PCRE_NOTBOL;
3532 continue;
3533
3534 case 'C':
3535 if (isdigit(*p)) /* Set copy string */
3536 {
3537 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3538 copystrings |= 1 << n;
3539 }
3540 else if (isalnum(*p))
3541 {
3542 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3543 }
3544 else if (*p == '+')
3545 {
3546 callout_extra = 1;
3547 p++;
3548 }
3549 else if (*p == '-')
3550 {
3551 SET_PCRE_CALLOUT(NULL);
3552 p++;
3553 }
3554 else if (*p == '!')
3555 {
3556 callout_fail_id = 0;
3557 p++;
3558 while(isdigit(*p))
3559 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3560 callout_fail_count = 0;
3561 if (*p == '!')
3562 {
3563 p++;
3564 while(isdigit(*p))
3565 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3566 }
3567 }
3568 else if (*p == '*')
3569 {
3570 int sign = 1;
3571 callout_data = 0;
3572 if (*(++p) == '-') { sign = -1; p++; }
3573 while(isdigit(*p))
3574 callout_data = callout_data * 10 + *p++ - '0';
3575 callout_data *= sign;
3576 callout_data_set = 1;
3577 }
3578 continue;
3579
3580 #if !defined NODFA
3581 case 'D':
3582 #if !defined NOPOSIX
3583 if (posix || do_posix)
3584 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3585 else
3586 #endif
3587 use_dfa = 1;
3588 continue;
3589 #endif
3590
3591 #if !defined NODFA
3592 case 'F':
3593 options |= PCRE_DFA_SHORTEST;
3594 continue;
3595 #endif
3596
3597 case 'G':
3598 if (isdigit(*p))
3599 {
3600 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3601 getstrings |= 1 << n;
3602 }
3603 else if (isalnum(*p))
3604 {
3605 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3606 }
3607 continue;
3608
3609 case 'J':
3610 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3611 if (extra != NULL
3612 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3613 && extra->executable_jit != NULL)
3614 {
3615 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3616 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3617 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3618 }
3619 continue;
3620
3621 case 'L':
3622 getlist = 1;
3623 continue;
3624
3625 case 'M':
3626 find_match_limit = 1;
3627 continue;
3628
3629 case 'N':
3630 if ((options & PCRE_NOTEMPTY) != 0)
3631 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3632 else
3633 options |= PCRE_NOTEMPTY;
3634 continue;
3635
3636 case 'O':
3637 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3638 if (n > size_offsets_max)
3639 {
3640 size_offsets_max = n;
3641 free(offsets);
3642 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3643 if (offsets == NULL)
3644 {
3645 printf("** Failed to get %d bytes of memory for offsets vector\n",
3646 (int)(size_offsets_max * sizeof(int)));
3647 yield = 1;
3648 goto EXIT;
3649 }
3650 }
3651 use_size_offsets = n;
3652 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3653 continue;
3654
3655 case 'P':
3656 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3657 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3658 continue;
3659
3660 case 'Q':
3661 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3662 if (extra == NULL)
3663 {
3664 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3665 extra->flags = 0;
3666 }
3667 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3668 extra->match_limit_recursion = n;
3669 continue;
3670
3671 case 'q':
3672 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3673 if (extra == NULL)
3674 {
3675 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3676 extra->flags = 0;
3677 }
3678 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3679 extra->match_limit = n;
3680 continue;
3681
3682 #if !defined NODFA
3683 case 'R':
3684 options |= PCRE_DFA_RESTART;
3685 continue;
3686 #endif
3687
3688 case 'S':
3689 show_malloc = 1;
3690 continue;
3691
3692 case 'Y':
3693 options |= PCRE_NO_START_OPTIMIZE;
3694 continue;
3695
3696 case 'Z':
3697 options |= PCRE_NOTEOL;
3698 continue;
3699
3700 case '?':
3701 options |= PCRE_NO_UTF8_CHECK;
3702 continue;
3703
3704 case '<':
3705 {
3706 int x = check_newline(p, outfile);
3707 if (x == 0) goto NEXT_DATA;
3708 options |= x;
3709 while (*p++ != '>');
3710 }
3711 continue;
3712 }
3713
3714 /* We now have a character value in c that may be greater than 255. In
3715 16-bit mode, we always convert characters to UTF-8 so that values greater
3716 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3717 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3718 mode must have come from \x{...} or octal constructs because values from
3719 \x.. get this far only in non-UTF mode. */
3720
3721 #if !defined NOUTF || defined SUPPORT_PCRE16
3722 if (use_pcre16 || use_utf)
3723 {
3724 pcre_uint8 buff8[8];
3725 int ii, utn;
3726 utn = ord2utf8(c, buff8);
3727 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3728 }
3729 else
3730 #endif
3731 {
3732 if (c > 255)
3733 {
3734 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3735 "and UTF-8 mode is not enabled.\n", c);
3736 fprintf(outfile, "** Truncation will probably give the wrong "
3737 "result.\n");
3738 }
3739 *q++ = c;
3740 }
3741 }
3742
3743 /* Reached end of subject string */
3744
3745 *q = 0;
3746 len = (int)(q - dbuffer);
3747
3748 /* Move the data to the end of the buffer so that a read over the end of
3749 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3750 we are using the POSIX interface, we must include the terminating zero. */
3751
3752 #if !defined NOPOSIX
3753 if (posix || do_posix)
3754 {
3755 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3756 bptr += buffer_size - len - 1;
3757 }
3758 else
3759 #endif
3760 {
3761 memmove(bptr + buffer_size - len, bptr, len);
3762 bptr += buffer_size - len;
3763 }
3764
3765 if ((all_use_dfa || use_dfa) && find_match_limit)
3766 {
3767 printf("**Match limit not relevant for DFA matching: ignored\n");
3768 find_match_limit = 0;
3769 }
3770
3771 /* Handle matching via the POSIX interface, which does not
3772 support timing or playing with the match limit or callout data. */
3773
3774 #if !defined NOPOSIX
3775 if (posix || do_posix)
3776 {
3777 int rc;
3778 int eflags = 0;
3779 regmatch_t *pmatch = NULL;
3780 if (use_size_offsets > 0)
3781 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3782 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3783 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3784 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3785
3786 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3787
3788 if (rc != 0)
3789 {
3790 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3791 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3792 }
3793 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3794 != 0)
3795 {
3796 fprintf(outfile, "Matched with REG_NOSUB\n");
3797 }
3798 else
3799 {
3800 size_t i;
3801 for (i = 0; i < (size_t)use_size_offsets; i++)
3802 {
3803 if (pmatch[i].rm_so >= 0)
3804 {
3805 fprintf(outfile, "%2d: ", (int)i);
3806 PCHARSV(dbuffer, pmatch[i].rm_so,
3807 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3808 fprintf(outfile, "\n");
3809 if (do_showcaprest || (i == 0 && do_showrest))
3810 {
3811 fprintf(outfile, "%2d+ ", (int)i);
3812 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3813 outfile);
3814 fprintf(outfile, "\n");
3815 }
3816 }
3817 }
3818 }
3819 free(pmatch);
3820 goto NEXT_DATA;
3821 }
3822
3823 #endif /* !defined NOPOSIX */
3824
3825 /* Handle matching via the native interface - repeats for /g and /G */
3826
3827 #ifdef SUPPORT_PCRE16
3828 if (use_pcre16)
3829 {
3830 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3831 switch(len)
3832 {
3833 case -1:
3834 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3835 "converted to UTF-16\n");
3836 goto NEXT_DATA;
3837
3838 case -2:
3839 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3840 "cannot be converted to UTF-16\n");
3841 goto NEXT_DATA;
3842
3843 case -3:
3844 fprintf(outfile, "**Failed: character value greater than 0xffff "
3845 "cannot be converted to 16-bit in non-UTF mode\n");
3846 goto NEXT_DATA;
3847
3848 default:
3849 break;
3850 }
3851 bptr = (pcre_uint8 *)buffer16;
3852 }
3853 #endif
3854
3855 for (;; gmatched++) /* Loop for /g or /G */
3856 {
3857 markptr = NULL;
3858
3859 if (timeitm > 0)
3860 {
3861 register int i;
3862 clock_t time_taken;
3863 clock_t start_time = clock();
3864
3865 #if !defined NODFA
3866 if (all_use_dfa || use_dfa)
3867 {
3868 int workspace[1000];
3869 for (i = 0; i < timeitm; i++)
3870 {
3871 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3872 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3873 (sizeof(workspace)/sizeof(int)));
3874 }
3875 }
3876 else
3877 #endif
3878
3879 for (i = 0; i < timeitm; i++)
3880 {
3881 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3882 (options | g_notempty), use_offsets, use_size_offsets);
3883 }
3884 time_taken = clock() - start_time;
3885 fprintf(outfile, "Execute time %.4f milliseconds\n",
3886 (((double)time_taken * 1000.0) / (double)timeitm) /
3887 (double)CLOCKS_PER_SEC);
3888 }
3889
3890 /* If find_match_limit is set, we want to do repeated matches with
3891 varying limits in order to find the minimum value for the match limit and
3892 for the recursion limit. The match limits are relevant only to the normal
3893 running of pcre_exec(), so disable the JIT optimization. This makes it
3894 possible to run the same set of tests with and without JIT externally
3895 requested. */
3896
3897 if (find_match_limit)
3898 {
3899 if (extra == NULL)
3900 {
3901 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3902 extra->flags = 0;
3903 }
3904 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3905
3906 (void)check_match_limit(re, extra, bptr, len, start_offset,
3907 options|g_notempty, use_offsets, use_size_offsets,
3908 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3909 PCRE_ERROR_MATCHLIMIT, "match()");
3910
3911 count = check_match_limit(re, extra, bptr, len, start_offset,
3912 options|g_notempty, use_offsets, use_size_offsets,
3913 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3914 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3915 }
3916
3917 /* If callout_data is set, use the interface with additional data */
3918
3919 else if (callout_data_set)
3920 {
3921 if (extra == NULL)
3922 {
3923 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3924 extra->flags = 0;
3925 }
3926 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3927 extra->callout_data = &callout_data;
3928 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3929 options | g_notempty, use_offsets, use_size_offsets);
3930 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3931 }
3932
3933 /* The normal case is just to do the match once, with the default
3934 value of match_limit. */
3935
3936 #if !defined NODFA
3937 else if (all_use_dfa || use_dfa)
3938 {
3939 int workspace[1000];
3940 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3941 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3942 (sizeof(workspace)/sizeof(int)));
3943 if (count == 0)
3944 {
3945 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3946 count = use_size_offsets/2;
3947 }
3948 }
3949 #endif
3950
3951 else
3952 {
3953 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3954 options | g_notempty, use_offsets, use_size_offsets);
3955 if (count == 0)
3956 {
3957 fprintf(outfile, "Matched, but too many substrings\n");
3958 count = use_size_offsets/3;
3959 }
3960 }
3961
3962 /* Matched */
3963
3964 if (count >= 0)
3965 {
3966 int i, maxcount;
3967 void *cnptr, *gnptr;
3968
3969 #if !defined NODFA
3970 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3971 #endif
3972 maxcount = use_size_offsets/3;
3973
3974 /* This is a check against a lunatic return value. */
3975
3976 if (count > maxcount)
3977 {
3978 fprintf(outfile,
3979 "** PCRE error: returned count %d is too big for offset size %d\n",
3980 count, use_size_offsets);
3981 count = use_size_offsets/3;
3982 if (do_g || do_G)
3983 {
3984 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3985 do_g = do_G = FALSE; /* Break g/G loop */
3986 }
3987 }
3988
3989 /* do_allcaps requests showing of all captures in the pattern, to check
3990 unset ones at the end. */
3991
3992 if (do_allcaps)
3993 {
3994 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3995 goto SKIP_DATA;
3996 count++; /* Allow for full match */
3997 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3998 }
3999
4000 /* Output the captured substrings */
4001
4002 for (i = 0; i < count * 2; i += 2)
4003 {
4004 if (use_offsets[i] < 0)
4005 {
4006 if (use_offsets[i] != -1)
4007 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4008 use_offsets[i], i);
4009 if (use_offsets[i+1] != -1)
4010 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4011 use_offsets[i+1], i+1);
4012 fprintf(outfile, "%2d: <unset>\n", i/2);
4013 }
4014 else
4015 {
4016 fprintf(outfile, "%2d: ", i/2);
4017 PCHARSV(bptr, use_offsets[i],
4018 use_offsets[i+1] - use_offsets[i], outfile);
4019 fprintf(outfile, "\n");
4020 if (do_showcaprest || (i == 0 && do_showrest))
4021 {
4022 fprintf(outfile, "%2d+ ", i/2);
4023 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4024 outfile);
4025 fprintf(outfile, "\n");
4026 }
4027 }
4028 }
4029
4030 if (markptr != NULL)
4031 {
4032 fprintf(outfile, "MK: ");
4033 PCHARSV(markptr, 0, -1, outfile);
4034 fprintf(outfile, "\n");
4035 }
4036
4037 for (i = 0; i < 32; i++)
4038 {
4039 if ((copystrings & (1 << i)) != 0)
4040 {
4041 int rc;
4042 char copybuffer[256];
4043 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4044 copybuffer, sizeof(copybuffer));
4045 if (rc < 0)
4046 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4047 else
4048 {
4049 fprintf(outfile, "%2dC ", i);
4050 PCHARSV(copybuffer, 0, rc, outfile);
4051 fprintf(outfile, " (%d)\n", rc);
4052 }
4053 }
4054 }
4055
4056 cnptr = copynames;
4057 for (;;)
4058 {
4059 int rc;
4060 char copybuffer[256];
4061
4062 if (use_pcre16)
4063 {
4064 if (*(pcre_uint16 *)cnptr == 0) break;
4065 }
4066 else
4067 {
4068 if (*(pcre_uint8 *)cnptr == 0) break;
4069 }
4070
4071 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4072 cnptr, copybuffer, sizeof(copybuffer));
4073
4074 if (rc < 0)
4075 {
4076 fprintf(outfile, "copy substring ");
4077 PCHARSV(cnptr, 0, -1, outfile);
4078 fprintf(outfile, " failed %d\n", rc);
4079 }
4080 else
4081 {
4082 fprintf(outfile, " C ");
4083 PCHARSV(copybuffer, 0, rc, outfile);
4084 fprintf(outfile, " (%d) ", rc);
4085 PCHARSV(cnptr, 0, -1, outfile);
4086 putc('\n', outfile);
4087 }
4088
4089 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4090 }
4091
4092 for (i = 0; i < 32; i++)
4093 {
4094 if ((getstrings & (1 << i)) != 0)
4095 {
4096 int rc;
4097 const char *substring;
4098 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4099 if (rc < 0)
4100 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4101 else
4102 {
4103 fprintf(outfile, "%2dG ", i);
4104 PCHARSV(substring, 0, rc, outfile);
4105 fprintf(outfile, " (%d)\n", rc);
4106 PCRE_FREE_SUBSTRING(substring);
4107 }
4108 }
4109 }
4110
4111 gnptr = getnames;
4112 for (;;)
4113 {
4114 int rc;
4115 const char *substring;
4116
4117 if (use_pcre16)
4118 {
4119 if (*(pcre_uint16 *)gnptr == 0) break;
4120 }
4121 else
4122 {
4123 if (*(pcre_uint8 *)gnptr == 0) break;
4124 }
4125
4126 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4127 gnptr, &substring);
4128 if (rc < 0)
4129 {
4130 fprintf(outfile, "get substring ");
4131 PCHARSV(gnptr, 0, -1, outfile);
4132 fprintf(outfile, " failed %d\n", rc);
4133 }
4134 else
4135 {
4136 fprintf(outfile, " G ");
4137 PCHARSV(substring, 0, rc, outfile);
4138 fprintf(outfile, " (%d) ", rc);
4139 PCHARSV(gnptr, 0, -1, outfile);
4140 PCRE_FREE_SUBSTRING(substring);
4141 putc('\n', outfile);
4142 }
4143
4144 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4145 }
4146
4147 if (getlist)
4148 {
4149 int rc;
4150 const char **stringlist;
4151 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4152 if (rc < 0)
4153 fprintf(outfile, "get substring list failed %d\n", rc);
4154 else
4155 {
4156 for (i = 0; i < count; i++)
4157 {
4158 fprintf(outfile, "%2dL ", i);
4159 PCHARSV(stringlist[i], 0, -1, outfile);
4160 putc('\n', outfile);
4161 }
4162 if (stringlist[i] != NULL)
4163 fprintf(outfile, "string list not terminated by NULL\n");
4164 PCRE_FREE_SUBSTRING_LIST(stringlist);
4165 }
4166 }
4167 }
4168
4169 /* There was a partial match */
4170
4171 else if (count == PCRE_ERROR_PARTIAL)
4172 {
4173 if (markptr == NULL) fprintf(outfile, "Partial match");
4174 else
4175 {
4176 fprintf(outfile, "Partial match, mark=");
4177 PCHARSV(markptr, 0, -1, outfile);
4178 }
4179 if (use_size_offsets > 1)
4180 {
4181 fprintf(outfile, ": ");
4182 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4183 outfile);
4184 }
4185 fprintf(outfile, "\n");
4186 break; /* Out of the /g loop */
4187 }
4188
4189 /* Failed to match. If this is a /g or /G loop and we previously set
4190 g_notempty after a null match, this is not necessarily the end. We want
4191 to advance the start offset, and continue. We won't be at the end of the
4192 string - that was checked before setting g_notempty.
4193
4194 Complication arises in the case when the newline convention is "any",
4195 "crlf", or "anycrlf". If the previous match was at the end of a line
4196 terminated by CRLF, an advance of one character just passes the \r,
4197 whereas we should prefer the longer newline sequence, as does the code in
4198 pcre_exec(). Fudge the offset value to achieve this. We check for a
4199 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4200 find the default.
4201
4202 Otherwise, in the case of UTF-8 matching, the advance must be one
4203 character, not one byte. */
4204
4205 else
4206 {
4207 if (g_notempty != 0)
4208 {
4209 int onechar = 1;
4210 unsigned int obits = ((REAL_PCRE *)re)->options;
4211 use_offsets[0] = start_offset;
4212 if ((obits & PCRE_NEWLINE_BITS) == 0)
4213 {
4214 int d;
4215 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4216 /* Note that these values are always the ASCII ones, even in
4217 EBCDIC environments. CR = 13, NL = 10. */
4218 obits = (d == 13)? PCRE_NEWLINE_CR :
4219 (d == 10)? PCRE_NEWLINE_LF :
4220 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4221 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4222 (d == -1)? PCRE_NEWLINE_ANY : 0;
4223 }
4224 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4225 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4226 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4227 &&
4228 start_offset < len - 1 &&
4229 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4230 (use_pcre16?
4231 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4232 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4233 :
4234 bptr[start_offset] == '\r'
4235 && bptr[start_offset + 1] == '\n')
4236 #elif defined SUPPORT_PCRE16
4237 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4238 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4239 #else
4240 bptr[start_offset] == '\r'
4241 && bptr[start_offset + 1] == '\n'
4242 #endif
4243 )
4244 onechar++;
4245 else if (use_utf)
4246 {
4247 while (start_offset + onechar < len)
4248 {
4249 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4250 onechar++;
4251 }
4252 }
4253 use_offsets[1] = start_offset + onechar;
4254 }
4255 else
4256 {
4257 switch(count)
4258 {
4259 case PCRE_ERROR_NOMATCH:
4260 if (gmatched == 0)
4261 {
4262 if (markptr == NULL)
4263 {
4264 fprintf(outfile, "No match\n");
4265 }
4266 else
4267 {
4268 fprintf(outfile, "No match, mark = ");
4269 PCHARSV(markptr, 0, -1, outfile);
4270 putc('\n', outfile);
4271 }
4272 }
4273 break;
4274
4275 case PCRE_ERROR_BADUTF8:
4276 case PCRE_ERROR_SHORTUTF8:
4277 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4278 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4279 use_pcre16? "16" : "8");
4280 if (use_size_offsets >= 2)
4281 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4282 use_offsets[1]);
4283 fprintf(outfile, "\n");
4284 break;
4285
4286 case PCRE_ERROR_BADUTF8_OFFSET:
4287 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4288 use_pcre16? "16" : "8");
4289 break;
4290
4291 default:
4292 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4293 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4294 else
4295 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4296 break;
4297 }
4298
4299 break; /* Out of the /g loop */
4300 }
4301 }
4302
4303 /* If not /g or /G we are done */
4304
4305 if (!do_g && !do_G) break;
4306
4307 /* If we have matched an empty string, first check to see if we are at
4308 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4309 Perl's /g options does. This turns out to be rather cunning. First we set
4310 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4311 same point. If this fails (picked up above) we advance to the next
4312 character. */
4313
4314 g_notempty = 0;
4315
4316 if (use_offsets[0] == use_offsets[1])
4317 {
4318 if (use_offsets[0] == len) break;
4319 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4320 }
4321
4322 /* For /g, update the start offset, leaving the rest alone */
4323
4324 if (do_g) start_offset = use_offsets[1];
4325
4326 /* For /G, update the pointer and length */
4327
4328 else
4329 {
4330 bptr += use_offsets[1] * CHAR_SIZE;
4331 len -= use_offsets[1];
4332 }
4333 } /* End of loop for /g and /G */
4334
4335 NEXT_DATA: continue;
4336 } /* End of loop for data lines */
4337
4338 CONTINUE:
4339
4340 #if !defined NOPOSIX
4341 if (posix || do_posix) regfree(&preg);
4342 #endif
4343
4344 if (re != NULL) new_free(re);
4345 if (extra != NULL)
4346 {
4347 PCRE_FREE_STUDY(extra);
4348 }
4349 if (locale_set)
4350 {
4351 new_free((void *)tables);
4352 setlocale(LC_CTYPE, "C");
4353 locale_set = 0;
4354 }
4355 if (jit_stack != NULL)
4356 {
4357 PCRE_JIT_STACK_FREE(jit_stack);
4358 jit_stack = NULL;
4359 }
4360 }
4361
4362 if (infile == stdin) fprintf(outfile, "\n");
4363
4364 EXIT:
4365
4366 if (infile != NULL && infile != stdin) fclose(infile);
4367 if (outfile != NULL && outfile != stdout) fclose(outfile);
4368
4369 free(buffer);
4370 free(dbuffer);
4371 free(pbuffer);
4372 free(offsets);
4373
4374 #ifdef SUPPORT_PCRE16
4375 if (buffer16 != NULL) free(buffer16);
4376 #endif
4377
4378 return yield;
4379 }
4380
4381 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12