/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 860 - (show annotations) (download)
Mon Jan 9 20:12:58 2012 UTC (2 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 130029 byte(s)
rename PCRE_SCHAR16 to PCRE_UCHAR16 and JIT compiler update
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define STRLEN8(p) ((int)strlen((char *)p))
213
214 #define SET_PCRE_CALLOUT8(callout) \
215 pcre_callout = callout
216
217 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218 pcre_assign_jit_stack(extra, callback, userdata)
219
220 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221 re = pcre_compile((char *)pat, options, error, erroffset, tables)
222
223 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224 namesptr, cbuffer, size) \
225 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226 (char *)namesptr, cbuffer, size)
227
228 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230
231 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace) \
233 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234 offsets, size_offsets, workspace, size_workspace)
235
236 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237 offsets, size_offsets) \
238 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239 offsets, size_offsets)
240
241 #define PCRE_FREE_STUDY8(extra) \
242 pcre_free_study(extra)
243
244 #define PCRE_FREE_SUBSTRING8(substring) \
245 pcre_free_substring(substring)
246
247 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248 pcre_free_substring_list(listptr)
249
250 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251 getnamesptr, subsptr) \
252 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253 (char *)getnamesptr, subsptr)
254
255 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256 n = pcre_get_stringnumber(re, (char *)ptr)
257
258 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260
261 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263
264 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266
267 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268 pcre_printint(re, outfile, debug_lengths)
269
270 #define PCRE_STUDY8(extra, re, options, error) \
271 extra = pcre_study(re, options, error)
272
273 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274 pcre_jit_stack_alloc(startsize, maxsize)
275
276 #define PCRE_JIT_STACK_FREE8(stack) \
277 pcre_jit_stack_free(stack)
278
279 #endif /* SUPPORT_PCRE8 */
280
281 /* -----------------------------------------------------------*/
282
283 #ifdef SUPPORT_PCRE16
284
285 #define PCHARS16(lv, p, offset, len, f) \
286 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287
288 #define PCHARSV16(p, offset, len, f) \
289 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290
291 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292 p = read_capture_name16(p, cn16, re)
293
294 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295
296 #define SET_PCRE_CALLOUT16(callout) \
297 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298
299 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300 pcre16_assign_jit_stack((pcre16_extra *)extra, \
301 (pcre16_jit_callback)callback, userdata)
302
303 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305 tables)
306
307 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308 namesptr, cbuffer, size) \
309 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311
312 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 (PCRE_UCHAR16 *)cbuffer, size/2)
315
316 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317 offsets, size_offsets, workspace, size_workspace) \
318 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320 workspace, size_workspace)
321
322 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323 offsets, size_offsets) \
324 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325 len, start_offset, options, offsets, size_offsets)
326
327 #define PCRE_FREE_STUDY16(extra) \
328 pcre16_free_study((pcre16_extra *)extra)
329
330 #define PCRE_FREE_SUBSTRING16(substring) \
331 pcre16_free_substring((PCRE_SPTR16)substring)
332
333 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335
336 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337 getnamesptr, subsptr) \
338 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340
341 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343
344 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346 (PCRE_SPTR16 *)(void*)subsptr)
347
348 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350 (PCRE_SPTR16 **)(void*)listptr)
351
352 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354 tables)
355
356 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357 pcre16_printint(re, outfile, debug_lengths)
358
359 #define PCRE_STUDY16(extra, re, options, error) \
360 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361
362 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364
365 #define PCRE_JIT_STACK_FREE16(stack) \
366 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367
368 #endif /* SUPPORT_PCRE16 */
369
370
371 /* ----- Both modes are supported; a runtime test is needed, except for
372 pcre_config(), and the JIT stack functions, when it doesn't matter which
373 version is called. ----- */
374
375 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376
377 #define CHAR_SIZE (use_pcre16? 2:1)
378
379 #define PCHARS(lv, p, offset, len, f) \
380 if (use_pcre16) \
381 PCHARS16(lv, p, offset, len, f); \
382 else \
383 PCHARS8(lv, p, offset, len, f)
384
385 #define PCHARSV(p, offset, len, f) \
386 if (use_pcre16) \
387 PCHARSV16(p, offset, len, f); \
388 else \
389 PCHARSV8(p, offset, len, f)
390
391 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392 if (use_pcre16) \
393 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394 else \
395 READ_CAPTURE_NAME8(p, cn8, cn16, re)
396
397 #define SET_PCRE_CALLOUT(callout) \
398 if (use_pcre16) \
399 SET_PCRE_CALLOUT16(callout); \
400 else \
401 SET_PCRE_CALLOUT8(callout)
402
403 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404
405 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406 if (use_pcre16) \
407 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408 else \
409 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410
411 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412 if (use_pcre16) \
413 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414 else \
415 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416
417 #define PCRE_CONFIG pcre_config
418
419 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420 namesptr, cbuffer, size) \
421 if (use_pcre16) \
422 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423 namesptr, cbuffer, size); \
424 else \
425 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426 namesptr, cbuffer, size)
427
428 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429 if (use_pcre16) \
430 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431 else \
432 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433
434 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435 offsets, size_offsets, workspace, size_workspace) \
436 if (use_pcre16) \
437 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438 offsets, size_offsets, workspace, size_workspace); \
439 else \
440 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441 offsets, size_offsets, workspace, size_workspace)
442
443 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444 offsets, size_offsets) \
445 if (use_pcre16) \
446 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447 offsets, size_offsets); \
448 else \
449 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450 offsets, size_offsets)
451
452 #define PCRE_FREE_STUDY(extra) \
453 if (use_pcre16) \
454 PCRE_FREE_STUDY16(extra); \
455 else \
456 PCRE_FREE_STUDY8(extra)
457
458 #define PCRE_FREE_SUBSTRING(substring) \
459 if (use_pcre16) \
460 PCRE_FREE_SUBSTRING16(substring); \
461 else \
462 PCRE_FREE_SUBSTRING8(substring)
463
464 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465 if (use_pcre16) \
466 PCRE_FREE_SUBSTRING_LIST16(listptr); \
467 else \
468 PCRE_FREE_SUBSTRING_LIST8(listptr)
469
470 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471 getnamesptr, subsptr) \
472 if (use_pcre16) \
473 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474 getnamesptr, subsptr); \
475 else \
476 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477 getnamesptr, subsptr)
478
479 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480 if (use_pcre16) \
481 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482 else \
483 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484
485 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486 if (use_pcre16) \
487 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488 else \
489 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490
491 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492 if (use_pcre16) \
493 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494 else \
495 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496
497 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498 (use_pcre16 ? \
499 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501
502 #define PCRE_JIT_STACK_FREE(stack) \
503 if (use_pcre16) \
504 PCRE_JIT_STACK_FREE16(stack); \
505 else \
506 PCRE_JIT_STACK_FREE8(stack)
507
508 #define PCRE_MAKETABLES \
509 (use_pcre16? pcre16_maketables() : pcre_maketables())
510
511 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 if (use_pcre16) \
513 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 else \
515 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516
517 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518 if (use_pcre16) \
519 PCRE_PRINTINT16(re, outfile, debug_lengths); \
520 else \
521 PCRE_PRINTINT8(re, outfile, debug_lengths)
522
523 #define PCRE_STUDY(extra, re, options, error) \
524 if (use_pcre16) \
525 PCRE_STUDY16(extra, re, options, error); \
526 else \
527 PCRE_STUDY8(extra, re, options, error)
528
529 /* ----- Only 8-bit mode is supported ----- */
530
531 #elif defined SUPPORT_PCRE8
532 #define CHAR_SIZE 1
533 #define PCHARS PCHARS8
534 #define PCHARSV PCHARSV8
535 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537 #define STRLEN STRLEN8
538 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 #define PCRE_COMPILE PCRE_COMPILE8
540 #define PCRE_CONFIG pcre_config
541 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544 #define PCRE_EXEC PCRE_EXEC8
545 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 #define PCRE_MAKETABLES pcre_maketables()
555 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556 #define PCRE_PRINTINT PCRE_PRINTINT8
557 #define PCRE_STUDY PCRE_STUDY8
558
559 /* ----- Only 16-bit mode is supported ----- */
560
561 #else
562 #define CHAR_SIZE 2
563 #define PCHARS PCHARS16
564 #define PCHARSV PCHARSV16
565 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567 #define STRLEN STRLEN16
568 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 #define PCRE_COMPILE PCRE_COMPILE16
570 #define PCRE_CONFIG pcre16_config
571 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574 #define PCRE_EXEC PCRE_EXEC16
575 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 #define PCRE_MAKETABLES pcre16_maketables()
585 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586 #define PCRE_PRINTINT PCRE_PRINTINT16
587 #define PCRE_STUDY PCRE_STUDY16
588 #endif
589
590 /* ----- End of mode-specific function call macros ----- */
591
592
593 /* Other parameters */
594
595 #ifndef CLOCKS_PER_SEC
596 #ifdef CLK_TCK
597 #define CLOCKS_PER_SEC CLK_TCK
598 #else
599 #define CLOCKS_PER_SEC 100
600 #endif
601 #endif
602
603 /* This is the default loop count for timing. */
604
605 #define LOOPREPEAT 500000
606
607 /* Static variables */
608
609 static FILE *outfile;
610 static int log_store = 0;
611 static int callout_count;
612 static int callout_extra;
613 static int callout_fail_count;
614 static int callout_fail_id;
615 static int debug_lengths;
616 static int first_callout;
617 static int locale_set = 0;
618 static int show_malloc;
619 static int use_utf;
620 static size_t gotten_store;
621 static size_t first_gotten_store = 0;
622 static const unsigned char *last_callout_mark = NULL;
623
624 /* The buffers grow automatically if very long input lines are encountered. */
625
626 static int buffer_size = 50000;
627 static pcre_uint8 *buffer = NULL;
628 static pcre_uint8 *dbuffer = NULL;
629 static pcre_uint8 *pbuffer = NULL;
630
631 /* Another buffer is needed translation to 16-bit character strings. It will
632 obtained and extended as required. */
633
634 #ifdef SUPPORT_PCRE16
635 static int buffer16_size = 0;
636 static pcre_uint16 *buffer16 = NULL;
637
638 #ifdef SUPPORT_PCRE8
639
640 /* We need the table of operator lengths that is used for 16-bit compiling, in
641 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643 appropriately for the 16-bit world. Just as a safety check, make sure that
644 COMPILE_PCRE16 is *not* set. */
645
646 #ifdef COMPILE_PCRE16
647 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648 #endif
649
650 #if LINK_SIZE == 2
651 #undef LINK_SIZE
652 #define LINK_SIZE 1
653 #elif LINK_SIZE == 3 || LINK_SIZE == 4
654 #undef LINK_SIZE
655 #define LINK_SIZE 2
656 #else
657 #error LINK_SIZE must be either 2, 3, or 4
658 #endif
659
660 #undef IMM2_SIZE
661 #define IMM2_SIZE 1
662
663 #endif /* SUPPORT_PCRE8 */
664
665 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666 #endif /* SUPPORT_PCRE16 */
667
668 /* If we have 8-bit support, default use_pcre16 to false; if there is also
669 16-bit support, it can be changed by an option. If there is no 8-bit support,
670 there must be 16-bit support, so default it to 1. */
671
672 #ifdef SUPPORT_PCRE8
673 static int use_pcre16 = 0;
674 #else
675 static int use_pcre16 = 1;
676 #endif
677
678 /* Textual explanations for runtime error codes */
679
680 static const char *errtexts[] = {
681 NULL, /* 0 is no error */
682 NULL, /* NOMATCH is handled specially */
683 "NULL argument passed",
684 "bad option value",
685 "magic number missing",
686 "unknown opcode - pattern overwritten?",
687 "no more memory",
688 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 "match limit exceeded",
690 "callout error code",
691 NULL, /* BADUTF8/16 is handled specially */
692 NULL, /* BADUTF8/16 offset is handled specially */
693 NULL, /* PARTIAL is handled specially */
694 "not used - internal error",
695 "internal error - pattern overwritten?",
696 "bad count value",
697 "item unsupported for DFA matching",
698 "backreference condition or recursion test not supported for DFA matching",
699 "match limit not supported for DFA matching",
700 "workspace size exceeded in DFA matching",
701 "too much recursion for DFA matching",
702 "recursion limit exceeded",
703 "not used - internal error",
704 "invalid combination of newline options",
705 "bad offset value",
706 NULL, /* SHORTUTF8/16 is handled specially */
707 "nested recursion at the same subject position",
708 "JIT stack limit reached",
709 "pattern compiled in wrong mode: 8-bit/16-bit error"
710 };
711
712
713 /*************************************************
714 * Alternate character tables *
715 *************************************************/
716
717 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718 using the default tables of the library. However, the T option can be used to
719 select alternate sets of tables, for different kinds of testing. Note also that
720 the L (locale) option also adjusts the tables. */
721
722 /* This is the set of tables distributed as default with PCRE. It recognizes
723 only ASCII characters. */
724
725 static const pcre_uint8 tables0[] = {
726
727 /* This table is a lower casing table. */
728
729 0, 1, 2, 3, 4, 5, 6, 7,
730 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23,
732 24, 25, 26, 27, 28, 29, 30, 31,
733 32, 33, 34, 35, 36, 37, 38, 39,
734 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55,
736 56, 57, 58, 59, 60, 61, 62, 63,
737 64, 97, 98, 99,100,101,102,103,
738 104,105,106,107,108,109,110,111,
739 112,113,114,115,116,117,118,119,
740 120,121,122, 91, 92, 93, 94, 95,
741 96, 97, 98, 99,100,101,102,103,
742 104,105,106,107,108,109,110,111,
743 112,113,114,115,116,117,118,119,
744 120,121,122,123,124,125,126,127,
745 128,129,130,131,132,133,134,135,
746 136,137,138,139,140,141,142,143,
747 144,145,146,147,148,149,150,151,
748 152,153,154,155,156,157,158,159,
749 160,161,162,163,164,165,166,167,
750 168,169,170,171,172,173,174,175,
751 176,177,178,179,180,181,182,183,
752 184,185,186,187,188,189,190,191,
753 192,193,194,195,196,197,198,199,
754 200,201,202,203,204,205,206,207,
755 208,209,210,211,212,213,214,215,
756 216,217,218,219,220,221,222,223,
757 224,225,226,227,228,229,230,231,
758 232,233,234,235,236,237,238,239,
759 240,241,242,243,244,245,246,247,
760 248,249,250,251,252,253,254,255,
761
762 /* This table is a case flipping table. */
763
764 0, 1, 2, 3, 4, 5, 6, 7,
765 8, 9, 10, 11, 12, 13, 14, 15,
766 16, 17, 18, 19, 20, 21, 22, 23,
767 24, 25, 26, 27, 28, 29, 30, 31,
768 32, 33, 34, 35, 36, 37, 38, 39,
769 40, 41, 42, 43, 44, 45, 46, 47,
770 48, 49, 50, 51, 52, 53, 54, 55,
771 56, 57, 58, 59, 60, 61, 62, 63,
772 64, 97, 98, 99,100,101,102,103,
773 104,105,106,107,108,109,110,111,
774 112,113,114,115,116,117,118,119,
775 120,121,122, 91, 92, 93, 94, 95,
776 96, 65, 66, 67, 68, 69, 70, 71,
777 72, 73, 74, 75, 76, 77, 78, 79,
778 80, 81, 82, 83, 84, 85, 86, 87,
779 88, 89, 90,123,124,125,126,127,
780 128,129,130,131,132,133,134,135,
781 136,137,138,139,140,141,142,143,
782 144,145,146,147,148,149,150,151,
783 152,153,154,155,156,157,158,159,
784 160,161,162,163,164,165,166,167,
785 168,169,170,171,172,173,174,175,
786 176,177,178,179,180,181,182,183,
787 184,185,186,187,188,189,190,191,
788 192,193,194,195,196,197,198,199,
789 200,201,202,203,204,205,206,207,
790 208,209,210,211,212,213,214,215,
791 216,217,218,219,220,221,222,223,
792 224,225,226,227,228,229,230,231,
793 232,233,234,235,236,237,238,239,
794 240,241,242,243,244,245,246,247,
795 248,249,250,251,252,253,254,255,
796
797 /* This table contains bit maps for various character classes. Each map is 32
798 bytes long and the bits run from the least significant end of each byte. The
799 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800 graph, print, punct, and cntrl. Other classes are built from combinations. */
801
802 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806
807 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811
812 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816
817 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821
822 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826
827 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831
832 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836
837 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841
842 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846
847 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851
852 /* This table identifies various classes of character by individual bits:
853 0x01 white space character
854 0x02 letter
855 0x04 decimal digit
856 0x08 hexadecimal digit
857 0x10 alphanumeric or '_'
858 0x80 regular expression metacharacter or binary zero
859 */
860
861 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893
894 /* This is a set of tables that came orginally from a Windows user. It seems to
895 be at least an approximation of ISO 8859. In particular, there are characters
896 greater than 128 that are marked as spaces, letters, etc. */
897
898 static const pcre_uint8 tables1[] = {
899 0,1,2,3,4,5,6,7,
900 8,9,10,11,12,13,14,15,
901 16,17,18,19,20,21,22,23,
902 24,25,26,27,28,29,30,31,
903 32,33,34,35,36,37,38,39,
904 40,41,42,43,44,45,46,47,
905 48,49,50,51,52,53,54,55,
906 56,57,58,59,60,61,62,63,
907 64,97,98,99,100,101,102,103,
908 104,105,106,107,108,109,110,111,
909 112,113,114,115,116,117,118,119,
910 120,121,122,91,92,93,94,95,
911 96,97,98,99,100,101,102,103,
912 104,105,106,107,108,109,110,111,
913 112,113,114,115,116,117,118,119,
914 120,121,122,123,124,125,126,127,
915 128,129,130,131,132,133,134,135,
916 136,137,138,139,140,141,142,143,
917 144,145,146,147,148,149,150,151,
918 152,153,154,155,156,157,158,159,
919 160,161,162,163,164,165,166,167,
920 168,169,170,171,172,173,174,175,
921 176,177,178,179,180,181,182,183,
922 184,185,186,187,188,189,190,191,
923 224,225,226,227,228,229,230,231,
924 232,233,234,235,236,237,238,239,
925 240,241,242,243,244,245,246,215,
926 248,249,250,251,252,253,254,223,
927 224,225,226,227,228,229,230,231,
928 232,233,234,235,236,237,238,239,
929 240,241,242,243,244,245,246,247,
930 248,249,250,251,252,253,254,255,
931 0,1,2,3,4,5,6,7,
932 8,9,10,11,12,13,14,15,
933 16,17,18,19,20,21,22,23,
934 24,25,26,27,28,29,30,31,
935 32,33,34,35,36,37,38,39,
936 40,41,42,43,44,45,46,47,
937 48,49,50,51,52,53,54,55,
938 56,57,58,59,60,61,62,63,
939 64,97,98,99,100,101,102,103,
940 104,105,106,107,108,109,110,111,
941 112,113,114,115,116,117,118,119,
942 120,121,122,91,92,93,94,95,
943 96,65,66,67,68,69,70,71,
944 72,73,74,75,76,77,78,79,
945 80,81,82,83,84,85,86,87,
946 88,89,90,123,124,125,126,127,
947 128,129,130,131,132,133,134,135,
948 136,137,138,139,140,141,142,143,
949 144,145,146,147,148,149,150,151,
950 152,153,154,155,156,157,158,159,
951 160,161,162,163,164,165,166,167,
952 168,169,170,171,172,173,174,175,
953 176,177,178,179,180,181,182,183,
954 184,185,186,187,188,189,190,191,
955 224,225,226,227,228,229,230,231,
956 232,233,234,235,236,237,238,239,
957 240,241,242,243,244,245,246,215,
958 248,249,250,251,252,253,254,223,
959 192,193,194,195,196,197,198,199,
960 200,201,202,203,204,205,206,207,
961 208,209,210,211,212,213,214,247,
962 216,217,218,219,220,221,222,255,
963 0,62,0,0,1,0,0,0,
964 0,0,0,0,0,0,0,0,
965 32,0,0,0,1,0,0,0,
966 0,0,0,0,0,0,0,0,
967 0,0,0,0,0,0,255,3,
968 126,0,0,0,126,0,0,0,
969 0,0,0,0,0,0,0,0,
970 0,0,0,0,0,0,0,0,
971 0,0,0,0,0,0,255,3,
972 0,0,0,0,0,0,0,0,
973 0,0,0,0,0,0,12,2,
974 0,0,0,0,0,0,0,0,
975 0,0,0,0,0,0,0,0,
976 254,255,255,7,0,0,0,0,
977 0,0,0,0,0,0,0,0,
978 255,255,127,127,0,0,0,0,
979 0,0,0,0,0,0,0,0,
980 0,0,0,0,254,255,255,7,
981 0,0,0,0,0,4,32,4,
982 0,0,0,128,255,255,127,255,
983 0,0,0,0,0,0,255,3,
984 254,255,255,135,254,255,255,7,
985 0,0,0,0,0,4,44,6,
986 255,255,127,255,255,255,127,255,
987 0,0,0,0,254,255,255,255,
988 255,255,255,255,255,255,255,127,
989 0,0,0,0,254,255,255,255,
990 255,255,255,255,255,255,255,255,
991 0,2,0,0,255,255,255,255,
992 255,255,255,255,255,255,255,127,
993 0,0,0,0,255,255,255,255,
994 255,255,255,255,255,255,255,255,
995 0,0,0,0,254,255,0,252,
996 1,0,0,248,1,0,0,120,
997 0,0,0,0,254,255,255,255,
998 0,0,128,0,0,0,128,0,
999 255,255,255,255,0,0,0,0,
1000 0,0,0,0,0,0,0,128,
1001 255,255,255,255,0,0,0,0,
1002 0,0,0,0,0,0,0,0,
1003 128,0,0,0,0,0,0,0,
1004 0,1,1,0,1,1,0,0,
1005 0,0,0,0,0,0,0,0,
1006 0,0,0,0,0,0,0,0,
1007 1,0,0,0,128,0,0,0,
1008 128,128,128,128,0,0,128,0,
1009 28,28,28,28,28,28,28,28,
1010 28,28,0,0,0,0,0,128,
1011 0,26,26,26,26,26,26,18,
1012 18,18,18,18,18,18,18,18,
1013 18,18,18,18,18,18,18,18,
1014 18,18,18,128,128,0,128,16,
1015 0,26,26,26,26,26,26,18,
1016 18,18,18,18,18,18,18,18,
1017 18,18,18,18,18,18,18,18,
1018 18,18,18,128,128,0,0,0,
1019 0,0,0,0,0,1,0,0,
1020 0,0,0,0,0,0,0,0,
1021 0,0,0,0,0,0,0,0,
1022 0,0,0,0,0,0,0,0,
1023 1,0,0,0,0,0,0,0,
1024 0,0,18,0,0,0,0,0,
1025 0,0,20,20,0,18,0,0,
1026 0,20,18,0,0,0,0,0,
1027 18,18,18,18,18,18,18,18,
1028 18,18,18,18,18,18,18,18,
1029 18,18,18,18,18,18,18,0,
1030 18,18,18,18,18,18,18,18,
1031 18,18,18,18,18,18,18,18,
1032 18,18,18,18,18,18,18,18,
1033 18,18,18,18,18,18,18,0,
1034 18,18,18,18,18,18,18,18
1035 };
1036
1037
1038
1039
1040 #ifndef HAVE_STRERROR
1041 /*************************************************
1042 * Provide strerror() for non-ANSI libraries *
1043 *************************************************/
1044
1045 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046 in their libraries, but can provide the same facility by this simple
1047 alternative function. */
1048
1049 extern int sys_nerr;
1050 extern char *sys_errlist[];
1051
1052 char *
1053 strerror(int n)
1054 {
1055 if (n < 0 || n >= sys_nerr) return "unknown error number";
1056 return sys_errlist[n];
1057 }
1058 #endif /* HAVE_STRERROR */
1059
1060
1061 /*************************************************
1062 * JIT memory callback *
1063 *************************************************/
1064
1065 static pcre_jit_stack* jit_callback(void *arg)
1066 {
1067 return (pcre_jit_stack *)arg;
1068 }
1069
1070
1071 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 /*************************************************
1073 * Convert UTF-8 string to value *
1074 *************************************************/
1075
1076 /* This function takes one or more bytes that represents a UTF-8 character,
1077 and returns the value of the character.
1078
1079 Argument:
1080 utf8bytes a pointer to the byte vector
1081 vptr a pointer to an int to receive the value
1082
1083 Returns: > 0 => the number of bytes consumed
1084 -6 to 0 => malformed UTF-8 character at offset = (-return)
1085 */
1086
1087 static int
1088 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089 {
1090 int c = *utf8bytes++;
1091 int d = c;
1092 int i, j, s;
1093
1094 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095 {
1096 if ((d & 0x80) == 0) break;
1097 d <<= 1;
1098 }
1099
1100 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102
1103 /* i now has a value in the range 1-5 */
1104
1105 s = 6*i;
1106 d = (c & utf8_table3[i]) << s;
1107
1108 for (j = 0; j < i; j++)
1109 {
1110 c = *utf8bytes++;
1111 if ((c & 0xc0) != 0x80) return -(j+1);
1112 s -= 6;
1113 d |= (c & 0x3f) << s;
1114 }
1115
1116 /* Check that encoding was the correct unique one */
1117
1118 for (j = 0; j < utf8_table1_size; j++)
1119 if (d <= utf8_table1[j]) break;
1120 if (j != i) return -(i+1);
1121
1122 /* Valid value */
1123
1124 *vptr = d;
1125 return i+1;
1126 }
1127 #endif /* NOUTF || SUPPORT_PCRE16 */
1128
1129
1130
1131 #if !defined NOUTF || defined SUPPORT_PCRE16
1132 /*************************************************
1133 * Convert character value to UTF-8 *
1134 *************************************************/
1135
1136 /* This function takes an integer value in the range 0 - 0x7fffffff
1137 and encodes it as a UTF-8 character in 0 to 6 bytes.
1138
1139 Arguments:
1140 cvalue the character value
1141 utf8bytes pointer to buffer for result - at least 6 bytes long
1142
1143 Returns: number of characters placed in the buffer
1144 */
1145
1146 static int
1147 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148 {
1149 register int i, j;
1150 for (i = 0; i < utf8_table1_size; i++)
1151 if (cvalue <= utf8_table1[i]) break;
1152 utf8bytes += i;
1153 for (j = i; j > 0; j--)
1154 {
1155 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156 cvalue >>= 6;
1157 }
1158 *utf8bytes = utf8_table2[i] | cvalue;
1159 return i + 1;
1160 }
1161 #endif
1162
1163
1164 #ifdef SUPPORT_PCRE16
1165 /*************************************************
1166 * Convert a string to 16-bit *
1167 *************************************************/
1168
1169 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173 result is always left in buffer16.
1174
1175 Note that this function does not object to surrogate values. This is
1176 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177 for the purpose of testing that they are correctly faulted.
1178
1179 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 in UTF-8 so that values greater than 255 can be handled.
1181
1182 Arguments:
1183 data TRUE if converting a data line; FALSE for a regex
1184 p points to a byte string
1185 utf true if UTF-8 (to be converted to UTF-16)
1186 len number of bytes in the string (excluding trailing zero)
1187
1188 Returns: number of 16-bit data items used (excluding trailing zero)
1189 OR -1 if a UTF-8 string is malformed
1190 OR -2 if a value > 0x10ffff is encountered
1191 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 */
1193
1194 static int
1195 to16(int data, pcre_uint8 *p, int utf, int len)
1196 {
1197 pcre_uint16 *pp;
1198
1199 if (buffer16_size < 2*len + 2)
1200 {
1201 if (buffer16 != NULL) free(buffer16);
1202 buffer16_size = 2*len + 2;
1203 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204 if (buffer16 == NULL)
1205 {
1206 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207 exit(1);
1208 }
1209 }
1210
1211 pp = buffer16;
1212
1213 if (!utf && !data)
1214 {
1215 while (len-- > 0) *pp++ = *p++;
1216 }
1217
1218 else
1219 {
1220 int c = 0;
1221 while (len > 0)
1222 {
1223 int chlen = utf82ord(p, &c);
1224 if (chlen <= 0) return -1;
1225 if (c > 0x10ffff) return -2;
1226 p += chlen;
1227 len -= chlen;
1228 if (c < 0x10000) *pp++ = c; else
1229 {
1230 if (!utf) return -3;
1231 c -= 0x10000;
1232 *pp++ = 0xD800 | (c >> 10);
1233 *pp++ = 0xDC00 | (c & 0x3ff);
1234 }
1235 }
1236 }
1237
1238 *pp = 0;
1239 return pp - buffer16;
1240 }
1241 #endif
1242
1243
1244 /*************************************************
1245 * Read or extend an input line *
1246 *************************************************/
1247
1248 /* Input lines are read into buffer, but both patterns and data lines can be
1249 continued over multiple input lines. In addition, if the buffer fills up, we
1250 want to automatically expand it so as to be able to handle extremely large
1251 lines that are needed for certain stress tests. When the input buffer is
1252 expanded, the other two buffers must also be expanded likewise, and the
1253 contents of pbuffer, which are a copy of the input for callouts, must be
1254 preserved (for when expansion happens for a data line). This is not the most
1255 optimal way of handling this, but hey, this is just a test program!
1256
1257 Arguments:
1258 f the file to read
1259 start where in buffer to start (this *must* be within buffer)
1260 prompt for stdin or readline()
1261
1262 Returns: pointer to the start of new data
1263 could be a copy of start, or could be moved
1264 NULL if no data read and EOF reached
1265 */
1266
1267 static pcre_uint8 *
1268 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 {
1270 pcre_uint8 *here = start;
1271
1272 for (;;)
1273 {
1274 int rlen = (int)(buffer_size - (here - buffer));
1275
1276 if (rlen > 1000)
1277 {
1278 int dlen;
1279
1280 /* If libreadline support is required, use readline() to read a line if the
1281 input is a terminal. Note that readline() removes the trailing newline, so
1282 we must put it back again, to be compatible with fgets(). */
1283
1284 #ifdef SUPPORT_LIBREADLINE
1285 if (isatty(fileno(f)))
1286 {
1287 size_t len;
1288 char *s = readline(prompt);
1289 if (s == NULL) return (here == start)? NULL : start;
1290 len = strlen(s);
1291 if (len > 0) add_history(s);
1292 if (len > rlen - 1) len = rlen - 1;
1293 memcpy(here, s, len);
1294 here[len] = '\n';
1295 here[len+1] = 0;
1296 free(s);
1297 }
1298 else
1299 #endif
1300
1301 /* Read the next line by normal means, prompting if the file is stdin. */
1302
1303 {
1304 if (f == stdin) printf("%s", prompt);
1305 if (fgets((char *)here, rlen, f) == NULL)
1306 return (here == start)? NULL : start;
1307 }
1308
1309 dlen = (int)strlen((char *)here);
1310 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311 here += dlen;
1312 }
1313
1314 else
1315 {
1316 int new_buffer_size = 2*buffer_size;
1317 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320
1321 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322 {
1323 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324 exit(1);
1325 }
1326
1327 memcpy(new_buffer, buffer, buffer_size);
1328 memcpy(new_pbuffer, pbuffer, buffer_size);
1329
1330 buffer_size = new_buffer_size;
1331
1332 start = new_buffer + (start - buffer);
1333 here = new_buffer + (here - buffer);
1334
1335 free(buffer);
1336 free(dbuffer);
1337 free(pbuffer);
1338
1339 buffer = new_buffer;
1340 dbuffer = new_dbuffer;
1341 pbuffer = new_pbuffer;
1342 }
1343 }
1344
1345 return NULL; /* Control never gets here */
1346 }
1347
1348
1349
1350 /*************************************************
1351 * Read number from string *
1352 *************************************************/
1353
1354 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355 around with conditional compilation, just do the job by hand. It is only used
1356 for unpicking arguments, so just keep it simple.
1357
1358 Arguments:
1359 str string to be converted
1360 endptr where to put the end pointer
1361
1362 Returns: the unsigned long
1363 */
1364
1365 static int
1366 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 {
1368 int result = 0;
1369 while(*str != 0 && isspace(*str)) str++;
1370 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371 *endptr = str;
1372 return(result);
1373 }
1374
1375
1376
1377 /*************************************************
1378 * Print one character *
1379 *************************************************/
1380
1381 /* Print a single character either literally, or as a hex escape. */
1382
1383 static int pchar(int c, FILE *f)
1384 {
1385 if (PRINTOK(c))
1386 {
1387 if (f != NULL) fprintf(f, "%c", c);
1388 return 1;
1389 }
1390
1391 if (c < 0x100)
1392 {
1393 if (use_utf)
1394 {
1395 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396 return 6;
1397 }
1398 else
1399 {
1400 if (f != NULL) fprintf(f, "\\x%02x", c);
1401 return 4;
1402 }
1403 }
1404
1405 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406 return (c <= 0x000000ff)? 6 :
1407 (c <= 0x00000fff)? 7 :
1408 (c <= 0x0000ffff)? 8 :
1409 (c <= 0x000fffff)? 9 : 10;
1410 }
1411
1412
1413
1414 #ifdef SUPPORT_PCRE8
1415 /*************************************************
1416 * Print 8-bit character string *
1417 *************************************************/
1418
1419 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420 If handed a NULL file, just counts chars without printing. */
1421
1422 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423 {
1424 int c = 0;
1425 int yield = 0;
1426
1427 if (length < 0)
1428 length = strlen((char *)p);
1429
1430 while (length-- > 0)
1431 {
1432 #if !defined NOUTF
1433 if (use_utf)
1434 {
1435 int rc = utf82ord(p, &c);
1436 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437 {
1438 length -= rc - 1;
1439 p += rc;
1440 yield += pchar(c, f);
1441 continue;
1442 }
1443 }
1444 #endif
1445 c = *p++;
1446 yield += pchar(c, f);
1447 }
1448
1449 return yield;
1450 }
1451 #endif
1452
1453
1454
1455 #ifdef SUPPORT_PCRE16
1456 /*************************************************
1457 * Find length of 0-terminated 16-bit string *
1458 *************************************************/
1459
1460 static int strlen16(PCRE_SPTR16 p)
1461 {
1462 int len = 0;
1463 while (*p++ != 0) len++;
1464 return len;
1465 }
1466 #endif /* SUPPORT_PCRE16 */
1467
1468
1469 #ifdef SUPPORT_PCRE16
1470 /*************************************************
1471 * Print 16-bit character string *
1472 *************************************************/
1473
1474 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475 If handed a NULL file, just counts chars without printing. */
1476
1477 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 {
1479 int yield = 0;
1480
1481 if (length < 0)
1482 length = strlen16(p);
1483
1484 while (length-- > 0)
1485 {
1486 int c = *p++ & 0xffff;
1487 #if !defined NOUTF
1488 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 {
1490 int d = *p & 0xffff;
1491 if (d >= 0xDC00 && d < 0xDFFF)
1492 {
1493 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494 length--;
1495 p++;
1496 }
1497 }
1498 #endif
1499 yield += pchar(c, f);
1500 }
1501
1502 return yield;
1503 }
1504 #endif /* SUPPORT_PCRE16 */
1505
1506
1507
1508 #ifdef SUPPORT_PCRE8
1509 /*************************************************
1510 * Read a capture name (8-bit) and check it *
1511 *************************************************/
1512
1513 static pcre_uint8 *
1514 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515 {
1516 pcre_uint8 *npp = *pp;
1517 while (isalnum(*p)) *npp++ = *p++;
1518 *npp++ = 0;
1519 *npp = 0;
1520 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521 {
1522 fprintf(outfile, "no parentheses with name \"");
1523 PCHARSV(*pp, 0, -1, outfile);
1524 fprintf(outfile, "\"\n");
1525 }
1526
1527 *pp = npp;
1528 return p;
1529 }
1530 #endif /* SUPPORT_PCRE8 */
1531
1532
1533
1534 #ifdef SUPPORT_PCRE16
1535 /*************************************************
1536 * Read a capture name (16-bit) and check it *
1537 *************************************************/
1538
1539 /* Note that the text being read is 8-bit. */
1540
1541 static pcre_uint8 *
1542 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543 {
1544 pcre_uint16 *npp = *pp;
1545 while (isalnum(*p)) *npp++ = *p++;
1546 *npp++ = 0;
1547 *npp = 0;
1548 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 {
1550 fprintf(outfile, "no parentheses with name \"");
1551 PCHARSV(*pp, 0, -1, outfile);
1552 fprintf(outfile, "\"\n");
1553 }
1554 *pp = npp;
1555 return p;
1556 }
1557 #endif /* SUPPORT_PCRE16 */
1558
1559
1560
1561 /*************************************************
1562 * Callout function *
1563 *************************************************/
1564
1565 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566 the match. Yield zero unless more callouts than the fail count, or the callout
1567 data is not zero. */
1568
1569 static int callout(pcre_callout_block *cb)
1570 {
1571 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 int i, pre_start, post_start, subject_length;
1573
1574 if (callout_extra)
1575 {
1576 fprintf(f, "Callout %d: last capture = %d\n",
1577 cb->callout_number, cb->capture_last);
1578
1579 for (i = 0; i < cb->capture_top * 2; i += 2)
1580 {
1581 if (cb->offset_vector[i] < 0)
1582 fprintf(f, "%2d: <unset>\n", i/2);
1583 else
1584 {
1585 fprintf(f, "%2d: ", i/2);
1586 PCHARSV(cb->subject, cb->offset_vector[i],
1587 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588 fprintf(f, "\n");
1589 }
1590 }
1591 }
1592
1593 /* Re-print the subject in canonical form, the first time or if giving full
1594 datails. On subsequent calls in the same match, we use pchars just to find the
1595 printed lengths of the substrings. */
1596
1597 if (f != NULL) fprintf(f, "--->");
1598
1599 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600 PCHARS(post_start, cb->subject, cb->start_match,
1601 cb->current_position - cb->start_match, f);
1602
1603 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604
1605 PCHARSV(cb->subject, cb->current_position,
1606 cb->subject_length - cb->current_position, f);
1607
1608 if (f != NULL) fprintf(f, "\n");
1609
1610 /* Always print appropriate indicators, with callout number if not already
1611 shown. For automatic callouts, show the pattern offset. */
1612
1613 if (cb->callout_number == 255)
1614 {
1615 fprintf(outfile, "%+3d ", cb->pattern_position);
1616 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617 }
1618 else
1619 {
1620 if (callout_extra) fprintf(outfile, " ");
1621 else fprintf(outfile, "%3d ", cb->callout_number);
1622 }
1623
1624 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625 fprintf(outfile, "^");
1626
1627 if (post_start > 0)
1628 {
1629 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630 fprintf(outfile, "^");
1631 }
1632
1633 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634 fprintf(outfile, " ");
1635
1636 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637 pbuffer + cb->pattern_position);
1638
1639 fprintf(outfile, "\n");
1640 first_callout = 0;
1641
1642 if (cb->mark != last_callout_mark)
1643 {
1644 if (cb->mark == NULL)
1645 fprintf(outfile, "Latest Mark: <unset>\n");
1646 else
1647 {
1648 fprintf(outfile, "Latest Mark: ");
1649 PCHARSV(cb->mark, 0, -1, outfile);
1650 putc('\n', outfile);
1651 }
1652 last_callout_mark = cb->mark;
1653 }
1654
1655 if (cb->callout_data != NULL)
1656 {
1657 int callout_data = *((int *)(cb->callout_data));
1658 if (callout_data != 0)
1659 {
1660 fprintf(outfile, "Callout data = %d\n", callout_data);
1661 return callout_data;
1662 }
1663 }
1664
1665 return (cb->callout_number != callout_fail_id)? 0 :
1666 (++callout_count >= callout_fail_count)? 1 : 0;
1667 }
1668
1669
1670 /*************************************************
1671 * Local malloc functions *
1672 *************************************************/
1673
1674 /* Alternative malloc function, to test functionality and save the size of a
1675 compiled re, which is the first store request that pcre_compile() makes. The
1676 show_malloc variable is set only during matching. */
1677
1678 static void *new_malloc(size_t size)
1679 {
1680 void *block = malloc(size);
1681 gotten_store = size;
1682 if (first_gotten_store == 0) first_gotten_store = size;
1683 if (show_malloc)
1684 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 return block;
1686 }
1687
1688 static void new_free(void *block)
1689 {
1690 if (show_malloc)
1691 fprintf(outfile, "free %p\n", block);
1692 free(block);
1693 }
1694
1695 /* For recursion malloc/free, to test stacking calls */
1696
1697 static void *stack_malloc(size_t size)
1698 {
1699 void *block = malloc(size);
1700 if (show_malloc)
1701 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 return block;
1703 }
1704
1705 static void stack_free(void *block)
1706 {
1707 if (show_malloc)
1708 fprintf(outfile, "stack_free %p\n", block);
1709 free(block);
1710 }
1711
1712
1713 /*************************************************
1714 * Call pcre_fullinfo() *
1715 *************************************************/
1716
1717 /* Get one piece of information from the pcre_fullinfo() function. When only
1718 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719 value, but the code is defensive.
1720
1721 Arguments:
1722 re compiled regex
1723 study study data
1724 option PCRE_INFO_xxx option
1725 ptr where to put the data
1726
1727 Returns: 0 when OK, < 0 on error
1728 */
1729
1730 static int
1731 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 {
1733 int rc;
1734
1735 if (use_pcre16)
1736 #ifdef SUPPORT_PCRE16
1737 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 #else
1739 rc = PCRE_ERROR_BADMODE;
1740 #endif
1741 else
1742 #ifdef SUPPORT_PCRE8
1743 rc = pcre_fullinfo(re, study, option, ptr);
1744 #else
1745 rc = PCRE_ERROR_BADMODE;
1746 #endif
1747
1748 if (rc < 0)
1749 {
1750 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751 use_pcre16? "16" : "", option);
1752 if (rc == PCRE_ERROR_BADMODE)
1753 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755 }
1756
1757 return rc;
1758 }
1759
1760
1761
1762 /*************************************************
1763 * Swap byte functions *
1764 *************************************************/
1765
1766 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767 value, respectively.
1768
1769 Arguments:
1770 value any number
1771
1772 Returns: the byte swapped value
1773 */
1774
1775 static pcre_uint32
1776 swap_uint32(pcre_uint32 value)
1777 {
1778 return ((value & 0x000000ff) << 24) |
1779 ((value & 0x0000ff00) << 8) |
1780 ((value & 0x00ff0000) >> 8) |
1781 (value >> 24);
1782 }
1783
1784 static pcre_uint16
1785 swap_uint16(pcre_uint16 value)
1786 {
1787 return (value >> 8) | (value << 8);
1788 }
1789
1790
1791
1792 /*************************************************
1793 * Flip bytes in a compiled pattern *
1794 *************************************************/
1795
1796 /* This function is called if the 'F' option was present on a pattern that is
1797 to be written to a file. We flip the bytes of all the integer fields in the
1798 regex data block and the study block. In 16-bit mode this also flips relevant
1799 bytes in the pattern itself. This is to make it possible to test PCRE's
1800 ability to reload byte-flipped patterns, e.g. those compiled on a different
1801 architecture. */
1802
1803 static void
1804 regexflip(pcre *ere, pcre_extra *extra)
1805 {
1806 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 #ifdef SUPPORT_PCRE16
1808 int op;
1809 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810 int length = re->name_count * re->name_entry_size;
1811 #ifdef SUPPORT_UTF
1812 BOOL utf = (re->options & PCRE_UTF16) != 0;
1813 BOOL utf16_char = FALSE;
1814 #endif /* SUPPORT_UTF */
1815 #endif /* SUPPORT_PCRE16 */
1816
1817 /* Always flip the bytes in the main data block and study blocks. */
1818
1819 re->magic_number = REVERSED_MAGIC_NUMBER;
1820 re->size = swap_uint32(re->size);
1821 re->options = swap_uint32(re->options);
1822 re->flags = swap_uint16(re->flags);
1823 re->top_bracket = swap_uint16(re->top_bracket);
1824 re->top_backref = swap_uint16(re->top_backref);
1825 re->first_char = swap_uint16(re->first_char);
1826 re->req_char = swap_uint16(re->req_char);
1827 re->name_table_offset = swap_uint16(re->name_table_offset);
1828 re->name_entry_size = swap_uint16(re->name_entry_size);
1829 re->name_count = swap_uint16(re->name_count);
1830
1831 if (extra != NULL)
1832 {
1833 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834 rsd->size = swap_uint32(rsd->size);
1835 rsd->flags = swap_uint32(rsd->flags);
1836 rsd->minlength = swap_uint32(rsd->minlength);
1837 }
1838
1839 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840 in the name table, if present, and then in the pattern itself. */
1841
1842 #ifdef SUPPORT_PCRE16
1843 if (!use_pcre16) return;
1844
1845 while(TRUE)
1846 {
1847 /* Swap previous characters. */
1848 while (length-- > 0)
1849 {
1850 *ptr = swap_uint16(*ptr);
1851 ptr++;
1852 }
1853 #ifdef SUPPORT_UTF
1854 if (utf16_char)
1855 {
1856 if ((ptr[-1] & 0xfc00) == 0xd800)
1857 {
1858 /* We know that there is only one extra character in UTF-16. */
1859 *ptr = swap_uint16(*ptr);
1860 ptr++;
1861 }
1862 }
1863 utf16_char = FALSE;
1864 #endif /* SUPPORT_UTF */
1865
1866 /* Get next opcode. */
1867
1868 length = 0;
1869 op = *ptr;
1870 *ptr++ = swap_uint16(op);
1871
1872 switch (op)
1873 {
1874 case OP_END:
1875 return;
1876
1877 #ifdef SUPPORT_UTF
1878 case OP_CHAR:
1879 case OP_CHARI:
1880 case OP_NOT:
1881 case OP_NOTI:
1882 case OP_STAR:
1883 case OP_MINSTAR:
1884 case OP_PLUS:
1885 case OP_MINPLUS:
1886 case OP_QUERY:
1887 case OP_MINQUERY:
1888 case OP_UPTO:
1889 case OP_MINUPTO:
1890 case OP_EXACT:
1891 case OP_POSSTAR:
1892 case OP_POSPLUS:
1893 case OP_POSQUERY:
1894 case OP_POSUPTO:
1895 case OP_STARI:
1896 case OP_MINSTARI:
1897 case OP_PLUSI:
1898 case OP_MINPLUSI:
1899 case OP_QUERYI:
1900 case OP_MINQUERYI:
1901 case OP_UPTOI:
1902 case OP_MINUPTOI:
1903 case OP_EXACTI:
1904 case OP_POSSTARI:
1905 case OP_POSPLUSI:
1906 case OP_POSQUERYI:
1907 case OP_POSUPTOI:
1908 case OP_NOTSTAR:
1909 case OP_NOTMINSTAR:
1910 case OP_NOTPLUS:
1911 case OP_NOTMINPLUS:
1912 case OP_NOTQUERY:
1913 case OP_NOTMINQUERY:
1914 case OP_NOTUPTO:
1915 case OP_NOTMINUPTO:
1916 case OP_NOTEXACT:
1917 case OP_NOTPOSSTAR:
1918 case OP_NOTPOSPLUS:
1919 case OP_NOTPOSQUERY:
1920 case OP_NOTPOSUPTO:
1921 case OP_NOTSTARI:
1922 case OP_NOTMINSTARI:
1923 case OP_NOTPLUSI:
1924 case OP_NOTMINPLUSI:
1925 case OP_NOTQUERYI:
1926 case OP_NOTMINQUERYI:
1927 case OP_NOTUPTOI:
1928 case OP_NOTMINUPTOI:
1929 case OP_NOTEXACTI:
1930 case OP_NOTPOSSTARI:
1931 case OP_NOTPOSPLUSI:
1932 case OP_NOTPOSQUERYI:
1933 case OP_NOTPOSUPTOI:
1934 if (utf) utf16_char = TRUE;
1935 #endif
1936 /* Fall through. */
1937
1938 default:
1939 length = OP_lengths16[op] - 1;
1940 break;
1941
1942 case OP_CLASS:
1943 case OP_NCLASS:
1944 /* Skip the character bit map. */
1945 ptr += 32/sizeof(pcre_uint16);
1946 length = 0;
1947 break;
1948
1949 case OP_XCLASS:
1950 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951 if (LINK_SIZE > 1)
1952 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953 - (1 + LINK_SIZE + 1));
1954 else
1955 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956
1957 /* Reverse the size of the XCLASS instance. */
1958 *ptr = swap_uint16(*ptr);
1959 ptr++;
1960 if (LINK_SIZE > 1)
1961 {
1962 *ptr = swap_uint16(*ptr);
1963 ptr++;
1964 }
1965
1966 op = *ptr;
1967 *ptr = swap_uint16(op);
1968 ptr++;
1969 if ((op & XCL_MAP) != 0)
1970 {
1971 /* Skip the character bit map. */
1972 ptr += 32/sizeof(pcre_uint16);
1973 length -= 32/sizeof(pcre_uint16);
1974 }
1975 break;
1976 }
1977 }
1978 /* Control should never reach here in 16 bit mode. */
1979 #endif /* SUPPORT_PCRE16 */
1980 }
1981
1982
1983
1984 /*************************************************
1985 * Check match or recursion limit *
1986 *************************************************/
1987
1988 static int
1989 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991 int flag, unsigned long int *limit, int errnumber, const char *msg)
1992 {
1993 int count;
1994 int min = 0;
1995 int mid = 64;
1996 int max = -1;
1997
1998 extra->flags |= flag;
1999
2000 for (;;)
2001 {
2002 *limit = mid;
2003
2004 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 use_offsets, use_size_offsets);
2006
2007 if (count == errnumber)
2008 {
2009 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010 min = mid;
2011 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012 }
2013
2014 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015 count == PCRE_ERROR_PARTIAL)
2016 {
2017 if (mid == min + 1)
2018 {
2019 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020 break;
2021 }
2022 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023 max = mid;
2024 mid = (min + mid)/2;
2025 }
2026 else break; /* Some other error */
2027 }
2028
2029 extra->flags &= ~flag;
2030 return count;
2031 }
2032
2033
2034
2035 /*************************************************
2036 * Case-independent strncmp() function *
2037 *************************************************/
2038
2039 /*
2040 Arguments:
2041 s first string
2042 t second string
2043 n number of characters to compare
2044
2045 Returns: < 0, = 0, or > 0, according to the comparison
2046 */
2047
2048 static int
2049 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 {
2051 while (n--)
2052 {
2053 int c = tolower(*s++) - tolower(*t++);
2054 if (c) return c;
2055 }
2056 return 0;
2057 }
2058
2059
2060
2061 /*************************************************
2062 * Check newline indicator *
2063 *************************************************/
2064
2065 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066 a message and return 0 if there is no match.
2067
2068 Arguments:
2069 p points after the leading '<'
2070 f file for error message
2071
2072 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073 */
2074
2075 static int
2076 check_newline(pcre_uint8 *p, FILE *f)
2077 {
2078 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 fprintf(f, "Unknown newline type at: <%s\n", p);
2086 return 0;
2087 }
2088
2089
2090
2091 /*************************************************
2092 * Usage function *
2093 *************************************************/
2094
2095 static void
2096 usage(void)
2097 {
2098 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099 printf("Input and output default to stdin and stdout.\n");
2100 #ifdef SUPPORT_LIBREADLINE
2101 printf("If input is a terminal, readline() is used to read from it.\n");
2102 #else
2103 printf("This version of pcretest is not linked with readline().\n");
2104 #endif
2105 printf("\nOptions:\n");
2106 #ifdef SUPPORT_PCRE16
2107 printf(" -16 use 16-bit interface\n");
2108 #endif
2109 printf(" -b show compiled code (bytecode)\n");
2110 printf(" -C show PCRE compile-time options and exit\n");
2111 printf(" -C arg show a specific compile-time option\n");
2112 printf(" and exit with its value. The arg can be:\n");
2113 printf(" linksize internal link size [2, 3, 4]\n");
2114 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117 printf(" ucp Unicode Properties supported [0, 1]\n");
2118 printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121 #if !defined NODFA
2122 printf(" -dfa force DFA matching for all subjects\n");
2123 #endif
2124 printf(" -help show usage information\n");
2125 printf(" -i show information about compiled patterns\n"
2126 " -M find MATCH_LIMIT minimum for each subject\n"
2127 " -m output memory used information\n"
2128 " -o <n> set size of offsets vector to <n>\n");
2129 #if !defined NOPOSIX
2130 printf(" -p use POSIX interface\n");
2131 #endif
2132 printf(" -q quiet: do not output PCRE version number at start\n");
2133 printf(" -S <n> set stack size to <n> megabytes\n");
2134 printf(" -s force each pattern to be studied at basic level\n"
2135 " -s+ force each pattern to be studied, using JIT if available\n"
2136 " -t time compilation and execution\n");
2137 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138 printf(" -tm time execution (matching) only\n");
2139 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140 }
2141
2142
2143
2144 /*************************************************
2145 * Main Program *
2146 *************************************************/
2147
2148 /* Read lines from named file or stdin and write to named file or stdout; lines
2149 consist of a regular expression, in delimiters and optionally followed by
2150 options, followed by a set of test data, terminated by an empty line. */
2151
2152 int main(int argc, char **argv)
2153 {
2154 FILE *infile = stdin;
2155 const char *version;
2156 int options = 0;
2157 int study_options = 0;
2158 int default_find_match_limit = FALSE;
2159 int op = 1;
2160 int timeit = 0;
2161 int timeitm = 0;
2162 int showinfo = 0;
2163 int showstore = 0;
2164 int force_study = -1;
2165 int force_study_options = 0;
2166 int quiet = 0;
2167 int size_offsets = 45;
2168 int size_offsets_max;
2169 int *offsets = NULL;
2170 #if !defined NOPOSIX
2171 int posix = 0;
2172 #endif
2173 int debug = 0;
2174 int done = 0;
2175 int all_use_dfa = 0;
2176 int yield = 0;
2177 int stack_size;
2178
2179 pcre_jit_stack *jit_stack = NULL;
2180
2181 /* These vectors store, end-to-end, a list of zero-terminated captured
2182 substring names, each list itself being terminated by an empty name. Assume
2183 that 1024 is plenty long enough for the few names we'll be testing. It is
2184 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185 for the actual memory, to ensure alignment. By defining these variables always
2186 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2187 #ifdefs in the code. */
2188
2189 pcre_uint16 copynames[1024];
2190 pcre_uint16 getnames[1024];
2191
2192 pcre_uint16 *cn16ptr;
2193 pcre_uint16 *gn16ptr;
2194
2195 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2196 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2197 pcre_uint8 *cn8ptr;
2198 pcre_uint8 *gn8ptr;
2199
2200 /* Get buffers from malloc() so that valgrind will check their misuse when
2201 debugging. They grow automatically when very long lines are read. The 16-bit
2202 buffer (buffer16) is obtained only if needed. */
2203
2204 buffer = (pcre_uint8 *)malloc(buffer_size);
2205 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2206 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2207
2208 /* The outfile variable is static so that new_malloc can use it. */
2209
2210 outfile = stdout;
2211
2212 /* The following _setmode() stuff is some Windows magic that tells its runtime
2213 library to translate CRLF into a single LF character. At least, that's what
2214 I've been told: never having used Windows I take this all on trust. Originally
2215 it set 0x8000, but then I was advised that _O_BINARY was better. */
2216
2217 #if defined(_WIN32) || defined(WIN32)
2218 _setmode( _fileno( stdout ), _O_BINARY );
2219 #endif
2220
2221 /* Get the version number: both pcre_version() and pcre16_version() give the
2222 same answer. We just need to ensure that we call one that is available. */
2223
2224 #ifdef SUPPORT_PCRE8
2225 version = pcre_version();
2226 #else
2227 version = pcre16_version();
2228 #endif
2229
2230 /* Scan options */
2231
2232 while (argc > 1 && argv[op][0] == '-')
2233 {
2234 pcre_uint8 *endptr;
2235
2236 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2237 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2238 else if (strcmp(argv[op], "-s+") == 0)
2239 {
2240 force_study = 1;
2241 force_study_options = PCRE_STUDY_JIT_COMPILE;
2242 }
2243 else if (strcmp(argv[op], "-16") == 0)
2244 {
2245 #ifdef SUPPORT_PCRE16
2246 use_pcre16 = 1;
2247 #else
2248 printf("** This version of PCRE was built without 16-bit support\n");
2249 exit(1);
2250 #endif
2251 }
2252 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2253 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2254 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2255 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2256 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2257 #if !defined NODFA
2258 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2259 #endif
2260 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2261 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2262 *endptr == 0))
2263 {
2264 op++;
2265 argc--;
2266 }
2267 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2268 {
2269 int both = argv[op][2] == 0;
2270 int temp;
2271 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2272 *endptr == 0))
2273 {
2274 timeitm = temp;
2275 op++;
2276 argc--;
2277 }
2278 else timeitm = LOOPREPEAT;
2279 if (both) timeit = timeitm;
2280 }
2281 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2282 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2283 *endptr == 0))
2284 {
2285 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2286 printf("PCRE: -S not supported on this OS\n");
2287 exit(1);
2288 #else
2289 int rc;
2290 struct rlimit rlim;
2291 getrlimit(RLIMIT_STACK, &rlim);
2292 rlim.rlim_cur = stack_size * 1024 * 1024;
2293 rc = setrlimit(RLIMIT_STACK, &rlim);
2294 if (rc != 0)
2295 {
2296 printf("PCRE: setrlimit() failed with error %d\n", rc);
2297 exit(1);
2298 }
2299 op++;
2300 argc--;
2301 #endif
2302 }
2303 #if !defined NOPOSIX
2304 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2305 #endif
2306 else if (strcmp(argv[op], "-C") == 0)
2307 {
2308 int rc;
2309 unsigned long int lrc;
2310
2311 if (argc > 2)
2312 {
2313 if (strcmp(argv[op + 1], "linksize") == 0)
2314 {
2315 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2316 printf("%d\n", rc);
2317 yield = rc;
2318 goto EXIT;
2319 }
2320 if (strcmp(argv[op + 1], "pcre8") == 0)
2321 {
2322 #ifdef SUPPORT_PCRE8
2323 printf("1\n");
2324 yield = 1;
2325 #else
2326 printf("0\n");
2327 yield = 0;
2328 #endif
2329 goto EXIT;
2330 }
2331 if (strcmp(argv[op + 1], "pcre16") == 0)
2332 {
2333 #ifdef SUPPORT_PCRE16
2334 printf("1\n");
2335 yield = 1;
2336 #else
2337 printf("0\n");
2338 yield = 0;
2339 #endif
2340 goto EXIT;
2341 }
2342 if (strcmp(argv[op + 1], "utf") == 0)
2343 {
2344 #ifdef SUPPORT_PCRE8
2345 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2346 printf("%d\n", rc);
2347 yield = rc;
2348 #else
2349 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2350 printf("%d\n", rc);
2351 yield = rc;
2352 #endif
2353 goto EXIT;
2354 }
2355 if (strcmp(argv[op + 1], "ucp") == 0)
2356 {
2357 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2358 printf("%d\n", rc);
2359 yield = rc;
2360 goto EXIT;
2361 }
2362 if (strcmp(argv[op + 1], "jit") == 0)
2363 {
2364 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2365 printf("%d\n", rc);
2366 yield = rc;
2367 goto EXIT;
2368 }
2369 if (strcmp(argv[op + 1], "newline") == 0)
2370 {
2371 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2372 /* Note that these values are always the ASCII values, even
2373 in EBCDIC environments. CR is 13 and NL is 10. */
2374 printf("%s\n", (rc == 13)? "CR" :
2375 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2376 (rc == -2)? "ANYCRLF" :
2377 (rc == -1)? "ANY" : "???");
2378 goto EXIT;
2379 }
2380 printf("Unknown -C option: %s\n", argv[op + 1]);
2381 goto EXIT;
2382 }
2383
2384 printf("PCRE version %s\n", version);
2385 printf("Compiled with\n");
2386
2387 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2388 are set, either both UTFs are supported or both are not supported. */
2389
2390 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2391 printf(" 8-bit and 16-bit support\n");
2392 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2393 if (rc)
2394 printf(" UTF-8 and UTF-16 support\n");
2395 else
2396 printf(" No UTF-8 or UTF-16 support\n");
2397 #elif defined SUPPORT_PCRE8
2398 printf(" 8-bit support only\n");
2399 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2400 printf(" %sUTF-8 support\n", rc? "" : "No ");
2401 #else
2402 printf(" 16-bit support only\n");
2403 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2404 printf(" %sUTF-16 support\n", rc? "" : "No ");
2405 #endif
2406
2407 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2408 printf(" %sUnicode properties support\n", rc? "" : "No ");
2409 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2410 if (rc)
2411 printf(" Just-in-time compiler support\n");
2412 else
2413 printf(" No just-in-time compiler support\n");
2414 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2415 /* Note that these values are always the ASCII values, even
2416 in EBCDIC environments. CR is 13 and NL is 10. */
2417 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2418 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2419 (rc == -2)? "ANYCRLF" :
2420 (rc == -1)? "ANY" : "???");
2421 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2422 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2423 "all Unicode newlines");
2424 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2425 printf(" Internal link size = %d\n", rc);
2426 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2427 printf(" POSIX malloc threshold = %d\n", rc);
2428 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2429 printf(" Default match limit = %ld\n", lrc);
2430 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2431 printf(" Default recursion depth limit = %ld\n", lrc);
2432 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2433 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2434 goto EXIT;
2435 }
2436 else if (strcmp(argv[op], "-help") == 0 ||
2437 strcmp(argv[op], "--help") == 0)
2438 {
2439 usage();
2440 goto EXIT;
2441 }
2442 else
2443 {
2444 printf("** Unknown or malformed option %s\n", argv[op]);
2445 usage();
2446 yield = 1;
2447 goto EXIT;
2448 }
2449 op++;
2450 argc--;
2451 }
2452
2453 /* Get the store for the offsets vector, and remember what it was */
2454
2455 size_offsets_max = size_offsets;
2456 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2457 if (offsets == NULL)
2458 {
2459 printf("** Failed to get %d bytes of memory for offsets vector\n",
2460 (int)(size_offsets_max * sizeof(int)));
2461 yield = 1;
2462 goto EXIT;
2463 }
2464
2465 /* Sort out the input and output files */
2466
2467 if (argc > 1)
2468 {
2469 infile = fopen(argv[op], INPUT_MODE);
2470 if (infile == NULL)
2471 {
2472 printf("** Failed to open %s\n", argv[op]);
2473 yield = 1;
2474 goto EXIT;
2475 }
2476 }
2477
2478 if (argc > 2)
2479 {
2480 outfile = fopen(argv[op+1], OUTPUT_MODE);
2481 if (outfile == NULL)
2482 {
2483 printf("** Failed to open %s\n", argv[op+1]);
2484 yield = 1;
2485 goto EXIT;
2486 }
2487 }
2488
2489 /* Set alternative malloc function */
2490
2491 #ifdef SUPPORT_PCRE8
2492 pcre_malloc = new_malloc;
2493 pcre_free = new_free;
2494 pcre_stack_malloc = stack_malloc;
2495 pcre_stack_free = stack_free;
2496 #endif
2497
2498 #ifdef SUPPORT_PCRE16
2499 pcre16_malloc = new_malloc;
2500 pcre16_free = new_free;
2501 pcre16_stack_malloc = stack_malloc;
2502 pcre16_stack_free = stack_free;
2503 #endif
2504
2505 /* Heading line unless quiet, then prompt for first regex if stdin */
2506
2507 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2508
2509 /* Main loop */
2510
2511 while (!done)
2512 {
2513 pcre *re = NULL;
2514 pcre_extra *extra = NULL;
2515
2516 #if !defined NOPOSIX /* There are still compilers that require no indent */
2517 regex_t preg;
2518 int do_posix = 0;
2519 #endif
2520
2521 const char *error;
2522 pcre_uint8 *markptr;
2523 pcre_uint8 *p, *pp, *ppp;
2524 pcre_uint8 *to_file = NULL;
2525 const pcre_uint8 *tables = NULL;
2526 unsigned long int get_options;
2527 unsigned long int true_size, true_study_size = 0;
2528 size_t size, regex_gotten_store;
2529 int do_allcaps = 0;
2530 int do_mark = 0;
2531 int do_study = 0;
2532 int no_force_study = 0;
2533 int do_debug = debug;
2534 int do_G = 0;
2535 int do_g = 0;
2536 int do_showinfo = showinfo;
2537 int do_showrest = 0;
2538 int do_showcaprest = 0;
2539 int do_flip = 0;
2540 int erroroffset, len, delimiter, poffset;
2541
2542 use_utf = 0;
2543 debug_lengths = 1;
2544
2545 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2546 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2547 fflush(outfile);
2548
2549 p = buffer;
2550 while (isspace(*p)) p++;
2551 if (*p == 0) continue;
2552
2553 /* See if the pattern is to be loaded pre-compiled from a file. */
2554
2555 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2556 {
2557 pcre_uint32 magic;
2558 pcre_uint8 sbuf[8];
2559 FILE *f;
2560
2561 p++;
2562 if (*p == '!')
2563 {
2564 do_debug = TRUE;
2565 do_showinfo = TRUE;
2566 p++;
2567 }
2568
2569 pp = p + (int)strlen((char *)p);
2570 while (isspace(pp[-1])) pp--;
2571 *pp = 0;
2572
2573 f = fopen((char *)p, "rb");
2574 if (f == NULL)
2575 {
2576 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2577 continue;
2578 }
2579
2580 first_gotten_store = 0;
2581 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2582
2583 true_size =
2584 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2585 true_study_size =
2586 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2587
2588 re = (pcre *)new_malloc(true_size);
2589 regex_gotten_store = first_gotten_store;
2590
2591 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2592
2593 magic = ((REAL_PCRE *)re)->magic_number;
2594 if (magic != MAGIC_NUMBER)
2595 {
2596 if (swap_uint32(magic) == MAGIC_NUMBER)
2597 {
2598 do_flip = 1;
2599 }
2600 else
2601 {
2602 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2603 fclose(f);
2604 continue;
2605 }
2606 }
2607
2608 /* We hide the byte-invert info for little and big endian tests. */
2609 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2610 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2611
2612 /* Now see if there is any following study data. */
2613
2614 if (true_study_size != 0)
2615 {
2616 pcre_study_data *psd;
2617
2618 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2619 extra->flags = PCRE_EXTRA_STUDY_DATA;
2620
2621 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2622 extra->study_data = psd;
2623
2624 if (fread(psd, 1, true_study_size, f) != true_study_size)
2625 {
2626 FAIL_READ:
2627 fprintf(outfile, "Failed to read data from %s\n", p);
2628 if (extra != NULL)
2629 {
2630 PCRE_FREE_STUDY(extra);
2631 }
2632 if (re != NULL) new_free(re);
2633 fclose(f);
2634 continue;
2635 }
2636 fprintf(outfile, "Study data loaded from %s\n", p);
2637 do_study = 1; /* To get the data output if requested */
2638 }
2639 else fprintf(outfile, "No study data\n");
2640
2641 /* Flip the necessary bytes. */
2642 if (do_flip)
2643 {
2644 int rc;
2645 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2646 if (rc == PCRE_ERROR_BADMODE)
2647 {
2648 /* Simulate the result of the function call below. */
2649 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2650 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2651 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2652 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2653 continue;
2654 }
2655 }
2656
2657 /* Need to know if UTF-8 for printing data strings. */
2658
2659 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2660 use_utf = (get_options & PCRE_UTF8) != 0;
2661
2662 fclose(f);
2663 goto SHOW_INFO;
2664 }
2665
2666 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2667 the pattern; if it isn't complete, read more. */
2668
2669 delimiter = *p++;
2670
2671 if (isalnum(delimiter) || delimiter == '\\')
2672 {
2673 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2674 goto SKIP_DATA;
2675 }
2676
2677 pp = p;
2678 poffset = (int)(p - buffer);
2679
2680 for(;;)
2681 {
2682 while (*pp != 0)
2683 {
2684 if (*pp == '\\' && pp[1] != 0) pp++;
2685 else if (*pp == delimiter) break;
2686 pp++;
2687 }
2688 if (*pp != 0) break;
2689 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2690 {
2691 fprintf(outfile, "** Unexpected EOF\n");
2692 done = 1;
2693 goto CONTINUE;
2694 }
2695 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2696 }
2697
2698 /* The buffer may have moved while being extended; reset the start of data
2699 pointer to the correct relative point in the buffer. */
2700
2701 p = buffer + poffset;
2702
2703 /* If the first character after the delimiter is backslash, make
2704 the pattern end with backslash. This is purely to provide a way
2705 of testing for the error message when a pattern ends with backslash. */
2706
2707 if (pp[1] == '\\') *pp++ = '\\';
2708
2709 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2710 for callouts. */
2711
2712 *pp++ = 0;
2713 strcpy((char *)pbuffer, (char *)p);
2714
2715 /* Look for options after final delimiter */
2716
2717 options = 0;
2718 study_options = 0;
2719 log_store = showstore; /* default from command line */
2720
2721 while (*pp != 0)
2722 {
2723 switch (*pp++)
2724 {
2725 case 'f': options |= PCRE_FIRSTLINE; break;
2726 case 'g': do_g = 1; break;
2727 case 'i': options |= PCRE_CASELESS; break;
2728 case 'm': options |= PCRE_MULTILINE; break;
2729 case 's': options |= PCRE_DOTALL; break;
2730 case 'x': options |= PCRE_EXTENDED; break;
2731
2732 case '+':
2733 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2734 break;
2735
2736 case '=': do_allcaps = 1; break;
2737 case 'A': options |= PCRE_ANCHORED; break;
2738 case 'B': do_debug = 1; break;
2739 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2740 case 'D': do_debug = do_showinfo = 1; break;
2741 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2742 case 'F': do_flip = 1; break;
2743 case 'G': do_G = 1; break;
2744 case 'I': do_showinfo = 1; break;
2745 case 'J': options |= PCRE_DUPNAMES; break;
2746 case 'K': do_mark = 1; break;
2747 case 'M': log_store = 1; break;
2748 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2749
2750 #if !defined NOPOSIX
2751 case 'P': do_posix = 1; break;
2752 #endif
2753
2754 case 'S':
2755 if (do_study == 0)
2756 {
2757 do_study = 1;
2758 if (*pp == '+')
2759 {
2760 study_options |= PCRE_STUDY_JIT_COMPILE;
2761 pp++;
2762 }
2763 }
2764 else
2765 {
2766 do_study = 0;
2767 no_force_study = 1;
2768 }
2769 break;
2770
2771 case 'U': options |= PCRE_UNGREEDY; break;
2772 case 'W': options |= PCRE_UCP; break;
2773 case 'X': options |= PCRE_EXTRA; break;
2774 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2775 case 'Z': debug_lengths = 0; break;
2776 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2777 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2778
2779 case 'T':
2780 switch (*pp++)
2781 {
2782 case '0': tables = tables0; break;
2783 case '1': tables = tables1; break;
2784
2785 case '\r':
2786 case '\n':
2787 case ' ':
2788 case 0:
2789 fprintf(outfile, "** Missing table number after /T\n");
2790 goto SKIP_DATA;
2791
2792 default:
2793 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2794 goto SKIP_DATA;
2795 }
2796 break;
2797
2798 case 'L':
2799 ppp = pp;
2800 /* The '\r' test here is so that it works on Windows. */
2801 /* The '0' test is just in case this is an unterminated line. */
2802 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2803 *ppp = 0;
2804 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2805 {
2806 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2807 goto SKIP_DATA;
2808 }
2809 locale_set = 1;
2810 tables = PCRE_MAKETABLES;
2811 pp = ppp;
2812 break;
2813
2814 case '>':
2815 to_file = pp;
2816 while (*pp != 0) pp++;
2817 while (isspace(pp[-1])) pp--;
2818 *pp = 0;
2819 break;
2820
2821 case '<':
2822 {
2823 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2824 {
2825 options |= PCRE_JAVASCRIPT_COMPAT;
2826 pp += 3;
2827 }
2828 else
2829 {
2830 int x = check_newline(pp, outfile);
2831 if (x == 0) goto SKIP_DATA;
2832 options |= x;
2833 while (*pp++ != '>');
2834 }
2835 }
2836 break;
2837
2838 case '\r': /* So that it works in Windows */
2839 case '\n':
2840 case ' ':
2841 break;
2842
2843 default:
2844 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2845 goto SKIP_DATA;
2846 }
2847 }
2848
2849 /* Handle compiling via the POSIX interface, which doesn't support the
2850 timing, showing, or debugging options, nor the ability to pass over
2851 local character tables. Neither does it have 16-bit support. */
2852
2853 #if !defined NOPOSIX
2854 if (posix || do_posix)
2855 {
2856 int rc;
2857 int cflags = 0;
2858
2859 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2860 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2861 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2862 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2863 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2864 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2865 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2866
2867 first_gotten_store = 0;
2868 rc = regcomp(&preg, (char *)p, cflags);
2869
2870 /* Compilation failed; go back for another re, skipping to blank line
2871 if non-interactive. */
2872
2873 if (rc != 0)
2874 {
2875 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2876 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2877 goto SKIP_DATA;
2878 }
2879 }
2880
2881 /* Handle compiling via the native interface */
2882
2883 else
2884 #endif /* !defined NOPOSIX */
2885
2886 {
2887 /* In 16-bit mode, convert the input. */
2888
2889 #ifdef SUPPORT_PCRE16
2890 if (use_pcre16)
2891 {
2892 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2893 {
2894 case -1:
2895 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2896 "converted to UTF-16\n");
2897 goto SKIP_DATA;
2898
2899 case -2:
2900 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2901 "cannot be converted to UTF-16\n");
2902 goto SKIP_DATA;
2903
2904 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2905 fprintf(outfile, "**Failed: character value greater than 0xffff "
2906 "cannot be converted to 16-bit in non-UTF mode\n");
2907 goto SKIP_DATA;
2908
2909 default:
2910 break;
2911 }
2912 p = (pcre_uint8 *)buffer16;
2913 }
2914 #endif
2915
2916 /* Compile many times when timing */
2917
2918 if (timeit > 0)
2919 {
2920 register int i;
2921 clock_t time_taken;
2922 clock_t start_time = clock();
2923 for (i = 0; i < timeit; i++)
2924 {
2925 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2926 if (re != NULL) free(re);
2927 }
2928 time_taken = clock() - start_time;
2929 fprintf(outfile, "Compile time %.4f milliseconds\n",
2930 (((double)time_taken * 1000.0) / (double)timeit) /
2931 (double)CLOCKS_PER_SEC);
2932 }
2933
2934 first_gotten_store = 0;
2935 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2936
2937 /* Compilation failed; go back for another re, skipping to blank line
2938 if non-interactive. */
2939
2940 if (re == NULL)
2941 {
2942 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2943 SKIP_DATA:
2944 if (infile != stdin)
2945 {
2946 for (;;)
2947 {
2948 if (extend_inputline(infile, buffer, NULL) == NULL)
2949 {
2950 done = 1;
2951 goto CONTINUE;
2952 }
2953 len = (int)strlen((char *)buffer);
2954 while (len > 0 && isspace(buffer[len-1])) len--;
2955 if (len == 0) break;
2956 }
2957 fprintf(outfile, "\n");
2958 }
2959 goto CONTINUE;
2960 }
2961
2962 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2963 within the regex; check for this so that we know how to process the data
2964 lines. */
2965
2966 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2967 goto SKIP_DATA;
2968 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2969
2970 /* Extract the size for possible writing before possibly flipping it,
2971 and remember the store that was got. */
2972
2973 true_size = ((REAL_PCRE *)re)->size;
2974 regex_gotten_store = first_gotten_store;
2975
2976 /* Output code size information if requested */
2977
2978 if (log_store)
2979 fprintf(outfile, "Memory allocation (code space): %d\n",
2980 (int)(first_gotten_store -
2981 sizeof(REAL_PCRE) -
2982 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2983
2984 /* If -s or /S was present, study the regex to generate additional info to
2985 help with the matching, unless the pattern has the SS option, which
2986 suppresses the effect of /S (used for a few test patterns where studying is
2987 never sensible). */
2988
2989 if (do_study || (force_study >= 0 && !no_force_study))
2990 {
2991 if (timeit > 0)
2992 {
2993 register int i;
2994 clock_t time_taken;
2995 clock_t start_time = clock();
2996 for (i = 0; i < timeit; i++)
2997 {
2998 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2999 }
3000 time_taken = clock() - start_time;
3001 if (extra != NULL)
3002 {
3003 PCRE_FREE_STUDY(extra);
3004 }
3005 fprintf(outfile, " Study time %.4f milliseconds\n",
3006 (((double)time_taken * 1000.0) / (double)timeit) /
3007 (double)CLOCKS_PER_SEC);
3008 }
3009 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3010 if (error != NULL)
3011 fprintf(outfile, "Failed to study: %s\n", error);
3012 else if (extra != NULL)
3013 {
3014 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3015 if (log_store)
3016 {
3017 size_t jitsize;
3018 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3019 jitsize != 0)
3020 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3021 }
3022 }
3023 }
3024
3025 /* If /K was present, we set up for handling MARK data. */
3026
3027 if (do_mark)
3028 {
3029 if (extra == NULL)
3030 {
3031 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3032 extra->flags = 0;
3033 }
3034 extra->mark = &markptr;
3035 extra->flags |= PCRE_EXTRA_MARK;
3036 }
3037
3038 /* Extract and display information from the compiled data if required. */
3039
3040 SHOW_INFO:
3041
3042 if (do_debug)
3043 {
3044 fprintf(outfile, "------------------------------------------------------------------\n");
3045 PCRE_PRINTINT(re, outfile, debug_lengths);
3046 }
3047
3048 /* We already have the options in get_options (see above) */
3049
3050 if (do_showinfo)
3051 {
3052 unsigned long int all_options;
3053 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3054 hascrorlf;
3055 int nameentrysize, namecount;
3056 const pcre_uint8 *nametable;
3057
3058 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3059 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3060 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3061 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3062 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3063 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3064 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3065 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3066 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3067 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3068 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3069 != 0)
3070 goto SKIP_DATA;
3071
3072 if (size != regex_gotten_store) fprintf(outfile,
3073 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3074 (int)size, (int)regex_gotten_store);
3075
3076 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3077 if (backrefmax > 0)
3078 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3079
3080 if (namecount > 0)
3081 {
3082 fprintf(outfile, "Named capturing subpatterns:\n");
3083 while (namecount-- > 0)
3084 {
3085 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3086 int imm2_size = use_pcre16 ? 1 : 2;
3087 #else
3088 int imm2_size = IMM2_SIZE;
3089 #endif
3090 int length = (int)STRLEN(nametable + imm2_size);
3091 fprintf(outfile, " ");
3092 PCHARSV(nametable, imm2_size, length, outfile);
3093 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3094 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3095 fprintf(outfile, "%3d\n", use_pcre16?
3096 (int)(((PCRE_SPTR16)nametable)[0])
3097 :((int)nametable[0] << 8) | (int)nametable[1]);
3098 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3099 #else
3100 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3101 #ifdef SUPPORT_PCRE8
3102 nametable += nameentrysize;
3103 #else
3104 nametable += nameentrysize * 2;
3105 #endif
3106 #endif
3107 }
3108 }
3109
3110 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3111 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3112
3113 all_options = ((REAL_PCRE *)re)->options;
3114 if (do_flip) all_options = swap_uint32(all_options);
3115
3116 if (get_options == 0) fprintf(outfile, "No options\n");
3117 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3118 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3119 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3120 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3121 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3122 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3123 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3124 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3125 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3126 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3127 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3128 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3129 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3130 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3131 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3132 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3133 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3134 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3135
3136 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3137
3138 switch (get_options & PCRE_NEWLINE_BITS)
3139 {
3140 case PCRE_NEWLINE_CR:
3141 fprintf(outfile, "Forced newline sequence: CR\n");
3142 break;
3143
3144 case PCRE_NEWLINE_LF:
3145 fprintf(outfile, "Forced newline sequence: LF\n");
3146 break;
3147
3148 case PCRE_NEWLINE_CRLF:
3149 fprintf(outfile, "Forced newline sequence: CRLF\n");
3150 break;
3151
3152 case PCRE_NEWLINE_ANYCRLF:
3153 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3154 break;
3155
3156 case PCRE_NEWLINE_ANY:
3157 fprintf(outfile, "Forced newline sequence: ANY\n");
3158 break;
3159
3160 default:
3161 break;
3162 }
3163
3164 if (first_char == -1)
3165 {
3166 fprintf(outfile, "First char at start or follows newline\n");
3167 }
3168 else if (first_char < 0)
3169 {
3170 fprintf(outfile, "No first char\n");
3171 }
3172 else
3173 {
3174 const char *caseless =
3175 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3176 "" : " (caseless)";
3177
3178 if (PRINTOK(first_char))
3179 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3180 else
3181 {
3182 fprintf(outfile, "First char = ");
3183 pchar(first_char, outfile);
3184 fprintf(outfile, "%s\n", caseless);
3185 }
3186 }
3187
3188 if (need_char < 0)
3189 {
3190 fprintf(outfile, "No need char\n");
3191 }
3192 else
3193 {
3194 const char *caseless =
3195 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3196 "" : " (caseless)";
3197
3198 if (PRINTOK(need_char))
3199 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3200 else
3201 {
3202 fprintf(outfile, "Need char = ");
3203 pchar(need_char, outfile);
3204 fprintf(outfile, "%s\n", caseless);
3205 }
3206 }
3207
3208 /* Don't output study size; at present it is in any case a fixed
3209 value, but it varies, depending on the computer architecture, and
3210 so messes up the test suite. (And with the /F option, it might be
3211 flipped.) If study was forced by an external -s, don't show this
3212 information unless -i or -d was also present. This means that, except
3213 when auto-callouts are involved, the output from runs with and without
3214 -s should be identical. */
3215
3216 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3217 {
3218 if (extra == NULL)
3219 fprintf(outfile, "Study returned NULL\n");
3220 else
3221 {
3222 pcre_uint8 *start_bits = NULL;
3223 int minlength;
3224
3225 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3226 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3227
3228 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3229 {
3230 if (start_bits == NULL)
3231 fprintf(outfile, "No set of starting bytes\n");
3232 else
3233 {
3234 int i;
3235 int c = 24;
3236 fprintf(outfile, "Starting byte set: ");
3237 for (i = 0; i < 256; i++)
3238 {
3239 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3240 {
3241 if (c > 75)
3242 {
3243 fprintf(outfile, "\n ");
3244 c = 2;
3245 }
3246 if (PRINTOK(i) && i != ' ')
3247 {
3248 fprintf(outfile, "%c ", i);
3249 c += 2;
3250 }
3251 else
3252 {
3253 fprintf(outfile, "\\x%02x ", i);
3254 c += 5;
3255 }
3256 }
3257 }
3258 fprintf(outfile, "\n");
3259 }
3260 }
3261 }
3262
3263 /* Show this only if the JIT was set by /S, not by -s. */
3264
3265 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3266 {
3267 int jit;
3268 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3269 {
3270 if (jit)
3271 fprintf(outfile, "JIT study was successful\n");
3272 else
3273 #ifdef SUPPORT_JIT
3274 fprintf(outfile, "JIT study was not successful\n");
3275 #else
3276 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3277 #endif
3278 }
3279 }
3280 }
3281 }
3282
3283 /* If the '>' option was present, we write out the regex to a file, and
3284 that is all. The first 8 bytes of the file are the regex length and then
3285 the study length, in big-endian order. */
3286
3287 if (to_file != NULL)
3288 {
3289 FILE *f = fopen((char *)to_file, "wb");
3290 if (f == NULL)
3291 {
3292 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3293 }
3294 else
3295 {
3296 pcre_uint8 sbuf[8];
3297
3298 if (do_flip) regexflip(re, extra);
3299 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3300 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3301 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3302 sbuf[3] = (pcre_uint8)((true_size) & 255);
3303 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3304 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3305 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3306 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3307
3308 if (fwrite(sbuf, 1, 8, f) < 8 ||
3309 fwrite(re, 1, true_size, f) < true_size)
3310 {
3311 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3312 }
3313 else
3314 {
3315 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3316
3317 /* If there is study data, write it. */
3318
3319 if (extra != NULL)
3320 {
3321 if (fwrite(extra->study_data, 1, true_study_size, f) <
3322 true_study_size)
3323 {
3324 fprintf(outfile, "Write error on %s: %s\n", to_file,
3325 strerror(errno));
3326 }
3327 else fprintf(outfile, "Study data written to %s\n", to_file);
3328 }
3329 }
3330 fclose(f);
3331 }
3332
3333 new_free(re);
3334 if (extra != NULL)
3335 {
3336 PCRE_FREE_STUDY(extra);
3337 }
3338 if (locale_set)
3339 {
3340 new_free((void *)tables);
3341 setlocale(LC_CTYPE, "C");
3342 locale_set = 0;
3343 }
3344 continue; /* With next regex */
3345 }
3346 } /* End of non-POSIX compile */
3347
3348 /* Read data lines and test them */
3349
3350 for (;;)
3351 {
3352 pcre_uint8 *q;
3353 pcre_uint8 *bptr;
3354 int *use_offsets = offsets;
3355 int use_size_offsets = size_offsets;
3356 int callout_data = 0;
3357 int callout_data_set = 0;
3358 int count, c;
3359 int copystrings = 0;
3360 int find_match_limit = default_find_match_limit;
3361 int getstrings = 0;
3362 int getlist = 0;
3363 int gmatched = 0;
3364 int start_offset = 0;
3365 int start_offset_sign = 1;
3366 int g_notempty = 0;
3367 int use_dfa = 0;
3368
3369 *copynames = 0;
3370 *getnames = 0;
3371
3372 cn16ptr = copynames;
3373 gn16ptr = getnames;
3374 cn8ptr = copynames8;
3375 gn8ptr = getnames8;
3376
3377 SET_PCRE_CALLOUT(callout);
3378 first_callout = 1;
3379 last_callout_mark = NULL;
3380 callout_extra = 0;
3381 callout_count = 0;
3382 callout_fail_count = 999999;
3383 callout_fail_id = -1;
3384 show_malloc = 0;
3385 options = 0;
3386
3387 if (extra != NULL) extra->flags &=
3388 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3389
3390 len = 0;
3391 for (;;)
3392 {
3393 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3394 {
3395 if (len > 0) /* Reached EOF without hitting a newline */
3396 {
3397 fprintf(outfile, "\n");
3398 break;
3399 }
3400 done = 1;
3401 goto CONTINUE;
3402 }
3403 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3404 len = (int)strlen((char *)buffer);
3405 if (buffer[len-1] == '\n') break;
3406 }
3407
3408 while (len > 0 && isspace(buffer[len-1])) len--;
3409 buffer[len] = 0;
3410 if (len == 0) break;
3411
3412 p = buffer;
3413 while (isspace(*p)) p++;
3414
3415 bptr = q = dbuffer;
3416 while ((c = *p++) != 0)
3417 {
3418 int i = 0;
3419 int n = 0;
3420
3421 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3422 In non-UTF mode, allow the value of the byte to fall through to later,
3423 where values greater than 127 are turned into UTF-8 when running in
3424 16-bit mode. */
3425
3426 if (c != '\\')
3427 {
3428 if (use_utf)
3429 {
3430 *q++ = c;
3431 continue;
3432 }
3433 }
3434
3435 /* Handle backslash escapes */
3436
3437 else switch ((c = *p++))
3438 {
3439 case 'a': c = 7; break;
3440 case 'b': c = '\b'; break;
3441 case 'e': c = 27; break;
3442 case 'f': c = '\f'; break;
3443 case 'n': c = '\n'; break;
3444 case 'r': c = '\r'; break;
3445 case 't': c = '\t'; break;
3446 case 'v': c = '\v'; break;
3447
3448 case '0': case '1': case '2': case '3':
3449 case '4': case '5': case '6': case '7':
3450 c -= '0';
3451 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3452 c = c * 8 + *p++ - '0';
3453 break;
3454
3455 case 'x':
3456 if (*p == '{')
3457 {
3458 pcre_uint8 *pt = p;
3459 c = 0;
3460
3461 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3462 when isxdigit() is a macro that refers to its argument more than
3463 once. This is banned by the C Standard, but apparently happens in at
3464 least one MacOS environment. */
3465
3466 for (pt++; isxdigit(*pt); pt++)
3467 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3468 if (*pt == '}')
3469 {
3470 p = pt + 1;
3471 break;
3472 }
3473 /* Not correct form for \x{...}; fall through */
3474 }
3475
3476 /* \x without {} always defines just one byte in 8-bit mode. This
3477 allows UTF-8 characters to be constructed byte by byte, and also allows
3478 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3479 Otherwise, pass it down to later code so that it can be turned into
3480 UTF-8 when running in 16-bit mode. */
3481
3482 c = 0;
3483 while (i++ < 2 && isxdigit(*p))
3484 {
3485 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3486 p++;
3487 }
3488 if (use_utf)
3489 {
3490 *q++ = c;
3491 continue;
3492 }
3493 break;
3494
3495 case 0: /* \ followed by EOF allows for an empty line */
3496 p--;
3497 continue;
3498
3499 case '>':
3500 if (*p == '-')
3501 {
3502 start_offset_sign = -1;
3503 p++;
3504 }
3505 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3506 start_offset *= start_offset_sign;
3507 continue;
3508
3509 case 'A': /* Option setting */
3510 options |= PCRE_ANCHORED;
3511 continue;
3512
3513 case 'B':
3514 options |= PCRE_NOTBOL;
3515 continue;
3516
3517 case 'C':
3518 if (isdigit(*p)) /* Set copy string */
3519 {
3520 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3521 copystrings |= 1 << n;
3522 }
3523 else if (isalnum(*p))
3524 {
3525 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3526 }
3527 else if (*p == '+')
3528 {
3529 callout_extra = 1;
3530 p++;
3531 }
3532 else if (*p == '-')
3533 {
3534 SET_PCRE_CALLOUT(NULL);
3535 p++;
3536 }
3537 else if (*p == '!')
3538 {
3539 callout_fail_id = 0;
3540 p++;
3541 while(isdigit(*p))
3542 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3543 callout_fail_count = 0;
3544 if (*p == '!')
3545 {
3546 p++;
3547 while(isdigit(*p))
3548 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3549 }
3550 }
3551 else if (*p == '*')
3552 {
3553 int sign = 1;
3554 callout_data = 0;
3555 if (*(++p) == '-') { sign = -1; p++; }
3556 while(isdigit(*p))
3557 callout_data = callout_data * 10 + *p++ - '0';
3558 callout_data *= sign;
3559 callout_data_set = 1;
3560 }
3561 continue;
3562
3563 #if !defined NODFA
3564 case 'D':
3565 #if !defined NOPOSIX
3566 if (posix || do_posix)
3567 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3568 else
3569 #endif
3570 use_dfa = 1;
3571 continue;
3572 #endif
3573
3574 #if !defined NODFA
3575 case 'F':
3576 options |= PCRE_DFA_SHORTEST;
3577 continue;
3578 #endif
3579
3580 case 'G':
3581 if (isdigit(*p))
3582 {
3583 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3584 getstrings |= 1 << n;
3585 }
3586 else if (isalnum(*p))
3587 {
3588 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3589 }
3590 continue;
3591
3592 case 'J':
3593 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3594 if (extra != NULL
3595 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3596 && extra->executable_jit != NULL)
3597 {
3598 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3599 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3600 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3601 }
3602 continue;
3603
3604 case 'L':
3605 getlist = 1;
3606 continue;
3607
3608 case 'M':
3609 find_match_limit = 1;
3610 continue;
3611
3612 case 'N':
3613 if ((options & PCRE_NOTEMPTY) != 0)
3614 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3615 else
3616 options |= PCRE_NOTEMPTY;
3617 continue;
3618
3619 case 'O':
3620 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3621 if (n > size_offsets_max)
3622 {
3623 size_offsets_max = n;
3624 free(offsets);
3625 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3626 if (offsets == NULL)
3627 {
3628 printf("** Failed to get %d bytes of memory for offsets vector\n",
3629 (int)(size_offsets_max * sizeof(int)));
3630 yield = 1;
3631 goto EXIT;
3632 }
3633 }
3634 use_size_offsets = n;
3635 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3636 continue;
3637
3638 case 'P':
3639 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3640 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3641 continue;
3642
3643 case 'Q':
3644 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3645 if (extra == NULL)
3646 {
3647 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3648 extra->flags = 0;
3649 }
3650 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3651 extra->match_limit_recursion = n;
3652 continue;
3653
3654 case 'q':
3655 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3656 if (extra == NULL)
3657 {
3658 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3659 extra->flags = 0;
3660 }
3661 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3662 extra->match_limit = n;
3663 continue;
3664
3665 #if !defined NODFA
3666 case 'R':
3667 options |= PCRE_DFA_RESTART;
3668 continue;
3669 #endif
3670
3671 case 'S':
3672 show_malloc = 1;
3673 continue;
3674
3675 case 'Y':
3676 options |= PCRE_NO_START_OPTIMIZE;
3677 continue;
3678
3679 case 'Z':
3680 options |= PCRE_NOTEOL;
3681 continue;
3682
3683 case '?':
3684 options |= PCRE_NO_UTF8_CHECK;
3685 continue;
3686
3687 case '<':
3688 {
3689 int x = check_newline(p, outfile);
3690 if (x == 0) goto NEXT_DATA;
3691 options |= x;
3692 while (*p++ != '>');
3693 }
3694 continue;
3695 }
3696
3697 /* We now have a character value in c that may be greater than 255. In
3698 16-bit mode, we always convert characters to UTF-8 so that values greater
3699 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3700 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3701 mode must have come from \x{...} or octal constructs because values from
3702 \x.. get this far only in non-UTF mode. */
3703
3704 #if !defined NOUTF || defined SUPPORT_PCRE16
3705 if (use_pcre16 || use_utf)
3706 {
3707 pcre_uint8 buff8[8];
3708 int ii, utn;
3709 utn = ord2utf8(c, buff8);
3710 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3711 }
3712 else
3713 #endif
3714 {
3715 if (c > 255)
3716 {
3717 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3718 "and UTF-8 mode is not enabled.\n", c);
3719 fprintf(outfile, "** Truncation will probably give the wrong "
3720 "result.\n");
3721 }
3722 *q++ = c;
3723 }
3724 }
3725
3726 /* Reached end of subject string */
3727
3728 *q = 0;
3729 len = (int)(q - dbuffer);
3730
3731 /* Move the data to the end of the buffer so that a read over the end of
3732 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3733 we are using the POSIX interface, we must include the terminating zero. */
3734
3735 #if !defined NOPOSIX
3736 if (posix || do_posix)
3737 {
3738 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3739 bptr += buffer_size - len - 1;
3740 }
3741 else
3742 #endif
3743 {
3744 memmove(bptr + buffer_size - len, bptr, len);
3745 bptr += buffer_size - len;
3746 }
3747
3748 if ((all_use_dfa || use_dfa) && find_match_limit)
3749 {
3750 printf("**Match limit not relevant for DFA matching: ignored\n");
3751 find_match_limit = 0;
3752 }
3753
3754 /* Handle matching via the POSIX interface, which does not
3755 support timing or playing with the match limit or callout data. */
3756
3757 #if !defined NOPOSIX
3758 if (posix || do_posix)
3759 {
3760 int rc;
3761 int eflags = 0;
3762 regmatch_t *pmatch = NULL;
3763 if (use_size_offsets > 0)
3764 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3765 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3766 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3767 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3768
3769 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3770
3771 if (rc != 0)
3772 {
3773 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3774 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3775 }
3776 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3777 != 0)
3778 {
3779 fprintf(outfile, "Matched with REG_NOSUB\n");
3780 }
3781 else
3782 {
3783 size_t i;
3784 for (i = 0; i < (size_t)use_size_offsets; i++)
3785 {
3786 if (pmatch[i].rm_so >= 0)
3787 {
3788 fprintf(outfile, "%2d: ", (int)i);
3789 PCHARSV(dbuffer, pmatch[i].rm_so,
3790 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3791 fprintf(outfile, "\n");
3792 if (do_showcaprest || (i == 0 && do_showrest))
3793 {
3794 fprintf(outfile, "%2d+ ", (int)i);
3795 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3796 outfile);
3797 fprintf(outfile, "\n");
3798 }
3799 }
3800 }
3801 }
3802 free(pmatch);
3803 goto NEXT_DATA;
3804 }
3805
3806 #endif /* !defined NOPOSIX */
3807
3808 /* Handle matching via the native interface - repeats for /g and /G */
3809
3810 #ifdef SUPPORT_PCRE16
3811 if (use_pcre16)
3812 {
3813 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3814 switch(len)
3815 {
3816 case -1:
3817 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3818 "converted to UTF-16\n");
3819 goto NEXT_DATA;
3820
3821 case -2:
3822 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3823 "cannot be converted to UTF-16\n");
3824 goto NEXT_DATA;
3825
3826 case -3:
3827 fprintf(outfile, "**Failed: character value greater than 0xffff "
3828 "cannot be converted to 16-bit in non-UTF mode\n");
3829 goto NEXT_DATA;
3830
3831 default:
3832 break;
3833 }
3834 bptr = (pcre_uint8 *)buffer16;
3835 }
3836 #endif
3837
3838 for (;; gmatched++) /* Loop for /g or /G */
3839 {
3840 markptr = NULL;
3841
3842 if (timeitm > 0)
3843 {
3844 register int i;
3845 clock_t time_taken;
3846 clock_t start_time = clock();
3847
3848 #if !defined NODFA
3849 if (all_use_dfa || use_dfa)
3850 {
3851 int workspace[1000];
3852 for (i = 0; i < timeitm; i++)
3853 {
3854 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3855 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3856 (sizeof(workspace)/sizeof(int)));
3857 }
3858 }
3859 else
3860 #endif
3861
3862 for (i = 0; i < timeitm; i++)
3863 {
3864 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3865 (options | g_notempty), use_offsets, use_size_offsets);
3866 }
3867 time_taken = clock() - start_time;
3868 fprintf(outfile, "Execute time %.4f milliseconds\n",
3869 (((double)time_taken * 1000.0) / (double)timeitm) /
3870 (double)CLOCKS_PER_SEC);
3871 }
3872
3873 /* If find_match_limit is set, we want to do repeated matches with
3874 varying limits in order to find the minimum value for the match limit and
3875 for the recursion limit. The match limits are relevant only to the normal
3876 running of pcre_exec(), so disable the JIT optimization. This makes it
3877 possible to run the same set of tests with and without JIT externally
3878 requested. */
3879
3880 if (find_match_limit)
3881 {
3882 if (extra == NULL)
3883 {
3884 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3885 extra->flags = 0;
3886 }
3887 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3888
3889 (void)check_match_limit(re, extra, bptr, len, start_offset,
3890 options|g_notempty, use_offsets, use_size_offsets,
3891 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3892 PCRE_ERROR_MATCHLIMIT, "match()");
3893
3894 count = check_match_limit(re, extra, bptr, len, start_offset,
3895 options|g_notempty, use_offsets, use_size_offsets,
3896 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3897 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3898 }
3899
3900 /* If callout_data is set, use the interface with additional data */
3901
3902 else if (callout_data_set)
3903 {
3904 if (extra == NULL)
3905 {
3906 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3907 extra->flags = 0;
3908 }
3909 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3910 extra->callout_data = &callout_data;
3911 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3912 options | g_notempty, use_offsets, use_size_offsets);
3913 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3914 }
3915
3916 /* The normal case is just to do the match once, with the default
3917 value of match_limit. */
3918
3919 #if !defined NODFA
3920 else if (all_use_dfa || use_dfa)
3921 {
3922 int workspace[1000];
3923 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3924 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3925 (sizeof(workspace)/sizeof(int)));
3926 if (count == 0)
3927 {
3928 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3929 count = use_size_offsets/2;
3930 }
3931 }
3932 #endif
3933
3934 else
3935 {
3936 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3937 options | g_notempty, use_offsets, use_size_offsets);
3938 if (count == 0)
3939 {
3940 fprintf(outfile, "Matched, but too many substrings\n");
3941 count = use_size_offsets/3;
3942 }
3943 }
3944
3945 /* Matched */
3946
3947 if (count >= 0)
3948 {
3949 int i, maxcount;
3950 void *cnptr, *gnptr;
3951
3952 #if !defined NODFA
3953 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3954 #endif
3955 maxcount = use_size_offsets/3;
3956
3957 /* This is a check against a lunatic return value. */
3958
3959 if (count > maxcount)
3960 {
3961 fprintf(outfile,
3962 "** PCRE error: returned count %d is too big for offset size %d\n",
3963 count, use_size_offsets);
3964 count = use_size_offsets/3;
3965 if (do_g || do_G)
3966 {
3967 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3968 do_g = do_G = FALSE; /* Break g/G loop */
3969 }
3970 }
3971
3972 /* do_allcaps requests showing of all captures in the pattern, to check
3973 unset ones at the end. */
3974
3975 if (do_allcaps)
3976 {
3977 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3978 goto SKIP_DATA;
3979 count++; /* Allow for full match */
3980 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3981 }
3982
3983 /* Output the captured substrings */
3984
3985 for (i = 0; i < count * 2; i += 2)
3986 {
3987 if (use_offsets[i] < 0)
3988 {
3989 if (use_offsets[i] != -1)
3990 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3991 use_offsets[i], i);
3992 if (use_offsets[i+1] != -1)
3993 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3994 use_offsets[i+1], i+1);
3995 fprintf(outfile, "%2d: <unset>\n", i/2);
3996 }
3997 else
3998 {
3999 fprintf(outfile, "%2d: ", i/2);
4000 PCHARSV(bptr, use_offsets[i],
4001 use_offsets[i+1] - use_offsets[i], outfile);
4002 fprintf(outfile, "\n");
4003 if (do_showcaprest || (i == 0 && do_showrest))
4004 {
4005 fprintf(outfile, "%2d+ ", i/2);
4006 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4007 outfile);
4008 fprintf(outfile, "\n");
4009 }
4010 }
4011 }
4012
4013 if (markptr != NULL)
4014 {
4015 fprintf(outfile, "MK: ");
4016 PCHARSV(markptr, 0, -1, outfile);
4017 fprintf(outfile, "\n");
4018 }
4019
4020 for (i = 0; i < 32; i++)
4021 {
4022 if ((copystrings & (1 << i)) != 0)
4023 {
4024 int rc;
4025 char copybuffer[256];
4026 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4027 copybuffer, sizeof(copybuffer));
4028 if (rc < 0)
4029 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4030 else
4031 {
4032 fprintf(outfile, "%2dC ", i);
4033 PCHARSV(copybuffer, 0, rc, outfile);
4034 fprintf(outfile, " (%d)\n", rc);
4035 }
4036 }
4037 }
4038
4039 cnptr = copynames;
4040 for (;;)
4041 {
4042 int rc;
4043 char copybuffer[256];
4044
4045 if (use_pcre16)
4046 {
4047 if (*(pcre_uint16 *)cnptr == 0) break;
4048 }
4049 else
4050 {
4051 if (*(pcre_uint8 *)cnptr == 0) break;
4052 }
4053
4054 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4055 cnptr, copybuffer, sizeof(copybuffer));
4056
4057 if (rc < 0)
4058 {
4059 fprintf(outfile, "copy substring ");
4060 PCHARSV(cnptr, 0, -1, outfile);
4061 fprintf(outfile, " failed %d\n", rc);
4062 }
4063 else
4064 {
4065 fprintf(outfile, " C ");
4066 PCHARSV(copybuffer, 0, rc, outfile);
4067 fprintf(outfile, " (%d) ", rc);
4068 PCHARSV(cnptr, 0, -1, outfile);
4069 putc('\n', outfile);
4070 }
4071
4072 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4073 }
4074
4075 for (i = 0; i < 32; i++)
4076 {
4077 if ((getstrings & (1 << i)) != 0)
4078 {
4079 int rc;
4080 const char *substring;
4081 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4082 if (rc < 0)
4083 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4084 else
4085 {
4086 fprintf(outfile, "%2dG ", i);
4087 PCHARSV(substring, 0, rc, outfile);
4088 fprintf(outfile, " (%d)\n", rc);
4089 PCRE_FREE_SUBSTRING(substring);
4090 }
4091 }
4092 }
4093
4094 gnptr = getnames;
4095 for (;;)
4096 {
4097 int rc;
4098 const char *substring;
4099
4100 if (use_pcre16)
4101 {
4102 if (*(pcre_uint16 *)gnptr == 0) break;
4103 }
4104 else
4105 {
4106 if (*(pcre_uint8 *)gnptr == 0) break;
4107 }
4108
4109 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4110 gnptr, &substring);
4111 if (rc < 0)
4112 {
4113 fprintf(outfile, "get substring ");
4114 PCHARSV(gnptr, 0, -1, outfile);
4115 fprintf(outfile, " failed %d\n", rc);
4116 }
4117 else
4118 {
4119 fprintf(outfile, " G ");
4120 PCHARSV(substring, 0, rc, outfile);
4121 fprintf(outfile, " (%d) ", rc);
4122 PCHARSV(gnptr, 0, -1, outfile);
4123 PCRE_FREE_SUBSTRING(substring);
4124 putc('\n', outfile);
4125 }
4126
4127 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4128 }
4129
4130 if (getlist)
4131 {
4132 int rc;
4133 const char **stringlist;
4134 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4135 if (rc < 0)
4136 fprintf(outfile, "get substring list failed %d\n", rc);
4137 else
4138 {
4139 for (i = 0; i < count; i++)
4140 {
4141 fprintf(outfile, "%2dL ", i);
4142 PCHARSV(stringlist[i], 0, -1, outfile);
4143 putc('\n', outfile);
4144 }
4145 if (stringlist[i] != NULL)
4146 fprintf(outfile, "string list not terminated by NULL\n");
4147 PCRE_FREE_SUBSTRING_LIST(stringlist);
4148 }
4149 }
4150 }
4151
4152 /* There was a partial match */
4153
4154 else if (count == PCRE_ERROR_PARTIAL)
4155 {
4156 if (markptr == NULL) fprintf(outfile, "Partial match");
4157 else
4158 {
4159 fprintf(outfile, "Partial match, mark=");
4160 PCHARSV(markptr, 0, -1, outfile);
4161 }
4162 if (use_size_offsets > 1)
4163 {
4164 fprintf(outfile, ": ");
4165 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4166 outfile);
4167 }
4168 fprintf(outfile, "\n");
4169 break; /* Out of the /g loop */
4170 }
4171
4172 /* Failed to match. If this is a /g or /G loop and we previously set
4173 g_notempty after a null match, this is not necessarily the end. We want
4174 to advance the start offset, and continue. We won't be at the end of the
4175 string - that was checked before setting g_notempty.
4176
4177 Complication arises in the case when the newline convention is "any",
4178 "crlf", or "anycrlf". If the previous match was at the end of a line
4179 terminated by CRLF, an advance of one character just passes the \r,
4180 whereas we should prefer the longer newline sequence, as does the code in
4181 pcre_exec(). Fudge the offset value to achieve this. We check for a
4182 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4183 find the default.
4184
4185 Otherwise, in the case of UTF-8 matching, the advance must be one
4186 character, not one byte. */
4187
4188 else
4189 {
4190 if (g_notempty != 0)
4191 {
4192 int onechar = 1;
4193 unsigned int obits = ((REAL_PCRE *)re)->options;
4194 use_offsets[0] = start_offset;
4195 if ((obits & PCRE_NEWLINE_BITS) == 0)
4196 {
4197 int d;
4198 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4199 /* Note that these values are always the ASCII ones, even in
4200 EBCDIC environments. CR = 13, NL = 10. */
4201 obits = (d == 13)? PCRE_NEWLINE_CR :
4202 (d == 10)? PCRE_NEWLINE_LF :
4203 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4204 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4205 (d == -1)? PCRE_NEWLINE_ANY : 0;
4206 }
4207 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4208 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4209 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4210 &&
4211 start_offset < len - 1 &&
4212 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4213 (use_pcre16?
4214 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4215 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4216 :
4217 bptr[start_offset] == '\r'
4218 && bptr[start_offset + 1] == '\n')
4219 #elif defined SUPPORT_PCRE16
4220 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4221 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4222 #else
4223 bptr[start_offset] == '\r'
4224 && bptr[start_offset + 1] == '\n'
4225 #endif
4226 )
4227 onechar++;
4228 else if (use_utf)
4229 {
4230 while (start_offset + onechar < len)
4231 {
4232 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4233 onechar++;
4234 }
4235 }
4236 use_offsets[1] = start_offset + onechar;
4237 }
4238 else
4239 {
4240 switch(count)
4241 {
4242 case PCRE_ERROR_NOMATCH:
4243 if (gmatched == 0)
4244 {
4245 if (markptr == NULL)
4246 {
4247 fprintf(outfile, "No match\n");
4248 }
4249 else
4250 {
4251 fprintf(outfile, "No match, mark = ");
4252 PCHARSV(markptr, 0, -1, outfile);
4253 putc('\n', outfile);
4254 }
4255 }
4256 break;
4257
4258 case PCRE_ERROR_BADUTF8:
4259 case PCRE_ERROR_SHORTUTF8:
4260 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4261 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4262 use_pcre16? "16" : "8");
4263 if (use_size_offsets >= 2)
4264 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4265 use_offsets[1]);
4266 fprintf(outfile, "\n");
4267 break;
4268
4269 case PCRE_ERROR_BADUTF8_OFFSET:
4270 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4271 use_pcre16? "16" : "8");
4272 break;
4273
4274 default:
4275 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4276 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4277 else
4278 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4279 break;
4280 }
4281
4282 break; /* Out of the /g loop */
4283 }
4284 }
4285
4286 /* If not /g or /G we are done */
4287
4288 if (!do_g && !do_G) break;
4289
4290 /* If we have matched an empty string, first check to see if we are at
4291 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4292 Perl's /g options does. This turns out to be rather cunning. First we set
4293 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4294 same point. If this fails (picked up above) we advance to the next
4295 character. */
4296
4297 g_notempty = 0;
4298
4299 if (use_offsets[0] == use_offsets[1])
4300 {
4301 if (use_offsets[0] == len) break;
4302 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4303 }
4304
4305 /* For /g, update the start offset, leaving the rest alone */
4306
4307 if (do_g) start_offset = use_offsets[1];
4308
4309 /* For /G, update the pointer and length */
4310
4311 else
4312 {
4313 bptr += use_offsets[1] * CHAR_SIZE;
4314 len -= use_offsets[1];
4315 }
4316 } /* End of loop for /g and /G */
4317
4318 NEXT_DATA: continue;
4319 } /* End of loop for data lines */
4320
4321 CONTINUE:
4322
4323 #if !defined NOPOSIX
4324 if (posix || do_posix) regfree(&preg);
4325 #endif
4326
4327 if (re != NULL) new_free(re);
4328 if (extra != NULL)
4329 {
4330 PCRE_FREE_STUDY(extra);
4331 }
4332 if (locale_set)
4333 {
4334 new_free((void *)tables);
4335 setlocale(LC_CTYPE, "C");
4336 locale_set = 0;
4337 }
4338 if (jit_stack != NULL)
4339 {
4340 PCRE_JIT_STACK_FREE(jit_stack);
4341 jit_stack = NULL;
4342 }
4343 }
4344
4345 if (infile == stdin) fprintf(outfile, "\n");
4346
4347 EXIT:
4348
4349 if (infile != NULL && infile != stdin) fclose(infile);
4350 if (outfile != NULL && outfile != stdout) fclose(outfile);
4351
4352 free(buffer);
4353 free(dbuffer);
4354 free(pbuffer);
4355 free(offsets);
4356
4357 #ifdef SUPPORT_PCRE16
4358 if (buffer16 != NULL) free(buffer16);
4359 #endif
4360
4361 return yield;
4362 }
4363
4364 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12