/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 829 - (show annotations) (download)
Tue Dec 27 12:24:23 2011 UTC (2 years, 11 months ago) by zherczeg
File MIME type: text/plain
File size: 127611 byte(s)
Change some preprocessor directives to support various configure modes
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
48
49
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <stdlib.h>
58 #include <time.h>
59 #include <locale.h>
60 #include <errno.h>
61
62 #ifdef SUPPORT_LIBREADLINE
63 #ifdef HAVE_UNISTD_H
64 #include <unistd.h>
65 #endif
66 #include <readline/readline.h>
67 #include <readline/history.h>
68 #endif
69
70
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
78
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
84
85 #ifndef isatty
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
89 #ifndef fileno
90 #define fileno _fileno
91 #endif
92
93 /* A user sent this fix for Borland Builder 5 under Windows. */
94
95 #ifdef __BORLANDC__
96 #define _setmode(handle, mode) setmode(handle, mode)
97 #endif
98
99 /* Not Windows */
100
101 #else
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
106 #endif
107
108
109 /* We have to include pcre_internal.h because we need the internal info for
110 displaying the results of pcre_study() and we also need to know about the
111 internal macros, structures, and other internal data values; pcretest has
112 "inside information" compared to a program that strictly follows the PCRE API.
113
114 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116 appropriately for an application, not for building PCRE. */
117
118 #include "pcre.h"
119
120 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121 /* Configure internal macros to 16 bit mode. */
122 #define COMPILE_PCRE16
123 #endif
124
125 #include "pcre_internal.h"
126
127 /* The pcre_printint() function, which prints the internal form of a compiled
128 regex, is held in a separate file so that (a) it can be compiled in either
129 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130 when that is compiled in debug mode. */
131
132 #ifdef SUPPORT_PCRE8
133 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134 #endif
135 #ifdef SUPPORT_PCRE16
136 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137 #endif
138
139 /* We need access to some of the data tables that PCRE uses. So as not to have
140 to keep two copies, we include the source file here, changing the names of the
141 external symbols to prevent clashes. */
142
143 #define PCRE_INCLUDED
144 #undef PRIV
145 #define PRIV(name) name
146
147 #include "pcre_tables.c"
148
149 /* The definition of the macro PRINTABLE, which determines whether to print an
150 output character as-is or as a hex value when showing compiled patterns, is
151 the same as in the printint.src file. We uses it here in cases when the locale
152 has not been explicitly changed, so as to get consistent output from systems
153 that differ in their output from isprint() even in the "C" locale. */
154
155 #ifdef EBCDIC
156 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #else
158 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159 #endif
160
161 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162
163 /* Posix support is disabled in 16 bit only mode. */
164 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165 #define NOPOSIX
166 #endif
167
168 /* It is possible to compile this test program without including support for
169 testing the POSIX interface, though this is not available via the standard
170 Makefile. */
171
172 #if !defined NOPOSIX
173 #include "pcreposix.h"
174 #endif
175
176 /* It is also possible, originally for the benefit of a version that was
177 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179 automatically cut out the UTF support if PCRE is built without it. */
180
181 #ifndef SUPPORT_UTF
182 #ifndef NOUTF
183 #define NOUTF
184 #endif
185 #endif
186
187 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189 only from one place and is handled differently). I couldn't dream up any way of
190 using a single macro to do this in a generic way, because of the many different
191 argument requirements. We know that at least one of SUPPORT_PCRE8 and
192 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193 use these in the definitions of generic macros.
194
195 **** Special note about the PCHARSxxx macros: the address of the string to be
196 printed is always given as two arguments: a base address followed by an offset.
197 The base address is cast to the correct data size for 8 or 16 bit data; the
198 offset is in units of this size. If the string were given as base+offset in one
199 argument, the casting might be incorrectly applied. */
200
201 #ifdef SUPPORT_PCRE8
202
203 #define PCHARS8(lv, p, offset, len, f) \
204 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205
206 #define PCHARSV8(p, offset, len, f) \
207 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208
209 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210 p = read_capture_name8(p, cn8, re)
211
212 #define SET_PCRE_CALLOUT8(callout) \
213 pcre_callout = callout
214
215 #define STRLEN8(p) ((int)strlen((char *)p))
216
217
218 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219 re = pcre_compile((char *)pat, options, error, erroffset, tables)
220
221 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222 namesptr, cbuffer, size) \
223 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224 (char *)namesptr, cbuffer, size)
225
226 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228
229 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230 offsets, size_offsets, workspace, size_workspace) \
231 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232 offsets, size_offsets, workspace, size_workspace)
233
234 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235 offsets, size_offsets) \
236 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237 offsets, size_offsets)
238
239 #define PCRE_FREE_STUDY8(extra) \
240 pcre_free_study(extra)
241
242 #define PCRE_FREE_SUBSTRING8(substring) \
243 pcre_free_substring(substring)
244
245 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246 pcre_free_substring_list(listptr)
247
248 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249 getnamesptr, subsptr) \
250 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251 (char *)getnamesptr, subsptr)
252
253 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254 n = pcre_get_stringnumber(re, (char *)ptr)
255
256 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258
259 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261
262 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
263 pcre_pattern_to_host_byte_order(re, extra, tables)
264
265 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266 pcre_printint(re, outfile, debug_lengths)
267
268 #define PCRE_STUDY8(extra, re, options, error) \
269 extra = pcre_study(re, options, error)
270
271 #endif /* SUPPORT_PCRE8 */
272
273 /* -----------------------------------------------------------*/
274
275 #ifdef SUPPORT_PCRE16
276
277 #define PCHARS16(lv, p, offset, len, f) \
278 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279
280 #define PCHARSV16(p, offset, len, f) \
281 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282
283 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284 p = read_capture_name16(p, cn16, re)
285
286 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287
288 #define SET_PCRE_CALLOUT16(callout) \
289 pcre16_callout = callout
290
291
292 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293 re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294
295 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296 namesptr, cbuffer, size) \
297 rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298 (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299
300 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302 (PCRE_SCHAR16 *)cbuffer, size/2)
303
304 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305 offsets, size_offsets, workspace, size_workspace) \
306 count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307 options, offsets, size_offsets, workspace, size_workspace)
308
309 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310 offsets, size_offsets) \
311 count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312 options, offsets, size_offsets)
313
314 #define PCRE_FREE_STUDY16(extra) \
315 pcre16_free_study(extra)
316
317 #define PCRE_FREE_SUBSTRING16(substring) \
318 pcre16_free_substring((PCRE_SPTR16)substring)
319
320 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322
323 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324 getnamesptr, subsptr) \
325 rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326 (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327
328 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330
331 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333 (PCRE_SPTR16 *)(void*)subsptr)
334
335 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337 (PCRE_SPTR16 **)(void*)listptr)
338
339 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
340 pcre16_pattern_to_host_byte_order(re, extra, tables)
341
342 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343 pcre16_printint(re, outfile, debug_lengths)
344
345 #define PCRE_STUDY16(extra, re, options, error) \
346 extra = pcre16_study(re, options, error)
347
348 #endif /* SUPPORT_PCRE16 */
349
350
351 /* ----- Both modes are supported; a runtime test is needed, except for
352 pcre_config(), and the JIT stack functions, when it doesn't matter which
353 version is called. ----- */
354
355 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356
357 #define CHAR_SIZE (use_pcre16? 2:1)
358
359 #define PCHARS(lv, p, offset, len, f) \
360 if (use_pcre16) \
361 PCHARS16(lv, p, offset, len, f); \
362 else \
363 PCHARS8(lv, p, offset, len, f)
364
365 #define PCHARSV(p, offset, len, f) \
366 if (use_pcre16) \
367 PCHARSV16(p, offset, len, f); \
368 else \
369 PCHARSV8(p, offset, len, f)
370
371 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372 if (use_pcre16) \
373 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374 else \
375 READ_CAPTURE_NAME8(p, cn8, cn16, re)
376
377 #define SET_PCRE_CALLOUT(callout) \
378 if (use_pcre16) \
379 SET_PCRE_CALLOUT16(callout); \
380 else \
381 SET_PCRE_CALLOUT8(callout)
382
383 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384
385 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386
387 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388 if (use_pcre16) \
389 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390 else \
391 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392
393 #define PCRE_CONFIG pcre_config
394
395 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396 namesptr, cbuffer, size) \
397 if (use_pcre16) \
398 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399 namesptr, cbuffer, size); \
400 else \
401 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402 namesptr, cbuffer, size)
403
404 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405 if (use_pcre16) \
406 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407 else \
408 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409
410 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411 offsets, size_offsets, workspace, size_workspace) \
412 if (use_pcre16) \
413 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414 offsets, size_offsets, workspace, size_workspace); \
415 else \
416 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417 offsets, size_offsets, workspace, size_workspace)
418
419 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420 offsets, size_offsets) \
421 if (use_pcre16) \
422 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423 offsets, size_offsets); \
424 else \
425 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426 offsets, size_offsets)
427
428 #define PCRE_FREE_STUDY(extra) \
429 if (use_pcre16) \
430 PCRE_FREE_STUDY16(extra); \
431 else \
432 PCRE_FREE_STUDY8(extra)
433
434 #define PCRE_FREE_SUBSTRING(substring) \
435 if (use_pcre16) \
436 PCRE_FREE_SUBSTRING16(substring); \
437 else \
438 PCRE_FREE_SUBSTRING8(substring)
439
440 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441 if (use_pcre16) \
442 PCRE_FREE_SUBSTRING_LIST16(listptr); \
443 else \
444 PCRE_FREE_SUBSTRING_LIST8(listptr)
445
446 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447 getnamesptr, subsptr) \
448 if (use_pcre16) \
449 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450 getnamesptr, subsptr); \
451 else \
452 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453 getnamesptr, subsptr)
454
455 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456 if (use_pcre16) \
457 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458 else \
459 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460
461 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462 if (use_pcre16) \
463 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464 else \
465 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466
467 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468 if (use_pcre16) \
469 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470 else \
471 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472
473 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475
476 #define PCRE_MAKETABLES \
477 (use_pcre16? pcre16_maketables() : pcre_maketables())
478
479 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
480 if (use_pcre16) \
481 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
482 else \
483 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
484
485 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486 if (use_pcre16) \
487 PCRE_PRINTINT16(re, outfile, debug_lengths); \
488 else \
489 PCRE_PRINTINT8(re, outfile, debug_lengths)
490
491 #define PCRE_STUDY(extra, re, options, error) \
492 if (use_pcre16) \
493 PCRE_STUDY16(extra, re, options, error); \
494 else \
495 PCRE_STUDY8(extra, re, options, error)
496
497 /* ----- Only 8-bit mode is supported ----- */
498
499 #elif defined SUPPORT_PCRE8
500 #define CHAR_SIZE 1
501 #define PCHARS PCHARS8
502 #define PCHARSV PCHARSV8
503 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
504 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
505 #define STRLEN STRLEN8
506 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
507 #define PCRE_COMPILE PCRE_COMPILE8
508 #define PCRE_CONFIG pcre_config
509 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
511 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
512 #define PCRE_EXEC PCRE_EXEC8
513 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
514 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
515 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
516 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
517 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
518 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
519 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
520 #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
521 #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
522 #define PCRE_MAKETABLES pcre_maketables()
523 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524 #define PCRE_PRINTINT PCRE_PRINTINT8
525 #define PCRE_STUDY PCRE_STUDY8
526
527 /* ----- Only 16-bit mode is supported ----- */
528
529 #else
530 #define CHAR_SIZE 2
531 #define PCHARS PCHARS16
532 #define PCHARSV PCHARSV16
533 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
534 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
535 #define STRLEN STRLEN16
536 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
537 #define PCRE_COMPILE PCRE_COMPILE16
538 #define PCRE_CONFIG pcre16_config
539 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
541 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
542 #define PCRE_EXEC PCRE_EXEC16
543 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
544 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
545 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
546 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
547 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
548 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
549 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
550 #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
551 #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
552 #define PCRE_MAKETABLES pcre16_maketables()
553 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554 #define PCRE_PRINTINT PCRE_PRINTINT16
555 #define PCRE_STUDY PCRE_STUDY16
556 #endif
557
558 /* ----- End of mode-specific function call macros ----- */
559
560
561 /* Other parameters */
562
563 #ifndef CLOCKS_PER_SEC
564 #ifdef CLK_TCK
565 #define CLOCKS_PER_SEC CLK_TCK
566 #else
567 #define CLOCKS_PER_SEC 100
568 #endif
569 #endif
570
571 /* This is the default loop count for timing. */
572
573 #define LOOPREPEAT 500000
574
575 /* Static variables */
576
577 static FILE *outfile;
578 static int log_store = 0;
579 static int callout_count;
580 static int callout_extra;
581 static int callout_fail_count;
582 static int callout_fail_id;
583 static int debug_lengths;
584 static int first_callout;
585 static int locale_set = 0;
586 static int show_malloc;
587 static int use_utf;
588 static size_t gotten_store;
589 static size_t first_gotten_store = 0;
590 static const unsigned char *last_callout_mark = NULL;
591
592 /* The buffers grow automatically if very long input lines are encountered. */
593
594 static int buffer_size = 50000;
595 static pcre_uint8 *buffer = NULL;
596 static pcre_uint8 *dbuffer = NULL;
597 static pcre_uint8 *pbuffer = NULL;
598
599 /* Another buffer is needed translation to 16-bit character strings. It will
600 obtained and extended as required. */
601
602 #ifdef SUPPORT_PCRE16
603 static int buffer16_size = 0;
604 static pcre_uint16 *buffer16 = NULL;
605
606 #ifdef SUPPORT_PCRE8
607
608 /* We need the table of operator lengths that is used for 16-bit compiling, in
609 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611 appropriately for the 16-bit world. Just as a safety check, make sure that
612 COMPILE_PCRE16 is *not* set. */
613
614 #ifdef COMPILE_PCRE16
615 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616 #endif
617
618 #if LINK_SIZE == 2
619 #undef LINK_SIZE
620 #define LINK_SIZE 1
621 #elif LINK_SIZE == 3 || LINK_SIZE == 4
622 #undef LINK_SIZE
623 #define LINK_SIZE 2
624 #else
625 #error LINK_SIZE must be either 2, 3, or 4
626 #endif
627
628 #endif /* SUPPORT_PCRE8 */
629
630 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
631 #endif /* SUPPORT_PCRE16 */
632
633 /* If we have 8-bit support, default use_pcre16 to false; if there is also
634 16-bit support, it can be changed by an option. If there is no 8-bit support,
635 there must be 16-bit support, so default it to 1. */
636
637 #ifdef SUPPORT_PCRE8
638 static int use_pcre16 = 0;
639 #else
640 static int use_pcre16 = 1;
641 #endif
642
643 /* Textual explanations for runtime error codes */
644
645 static const char *errtexts[] = {
646 NULL, /* 0 is no error */
647 NULL, /* NOMATCH is handled specially */
648 "NULL argument passed",
649 "bad option value",
650 "magic number missing",
651 "unknown opcode - pattern overwritten?",
652 "no more memory",
653 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
654 "match limit exceeded",
655 "callout error code",
656 NULL, /* BADUTF8/16 is handled specially */
657 NULL, /* BADUTF8/16 offset is handled specially */
658 NULL, /* PARTIAL is handled specially */
659 "not used - internal error",
660 "internal error - pattern overwritten?",
661 "bad count value",
662 "item unsupported for DFA matching",
663 "backreference condition or recursion test not supported for DFA matching",
664 "match limit not supported for DFA matching",
665 "workspace size exceeded in DFA matching",
666 "too much recursion for DFA matching",
667 "recursion limit exceeded",
668 "not used - internal error",
669 "invalid combination of newline options",
670 "bad offset value",
671 NULL, /* SHORTUTF8/16 is handled specially */
672 "nested recursion at the same subject position",
673 "JIT stack limit reached",
674 "pattern compiled in wrong mode: 8-bit/16-bit error"
675 };
676
677
678 /*************************************************
679 * Alternate character tables *
680 *************************************************/
681
682 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
683 using the default tables of the library. However, the T option can be used to
684 select alternate sets of tables, for different kinds of testing. Note also that
685 the L (locale) option also adjusts the tables. */
686
687 /* This is the set of tables distributed as default with PCRE. It recognizes
688 only ASCII characters. */
689
690 static const pcre_uint8 tables0[] = {
691
692 /* This table is a lower casing table. */
693
694 0, 1, 2, 3, 4, 5, 6, 7,
695 8, 9, 10, 11, 12, 13, 14, 15,
696 16, 17, 18, 19, 20, 21, 22, 23,
697 24, 25, 26, 27, 28, 29, 30, 31,
698 32, 33, 34, 35, 36, 37, 38, 39,
699 40, 41, 42, 43, 44, 45, 46, 47,
700 48, 49, 50, 51, 52, 53, 54, 55,
701 56, 57, 58, 59, 60, 61, 62, 63,
702 64, 97, 98, 99,100,101,102,103,
703 104,105,106,107,108,109,110,111,
704 112,113,114,115,116,117,118,119,
705 120,121,122, 91, 92, 93, 94, 95,
706 96, 97, 98, 99,100,101,102,103,
707 104,105,106,107,108,109,110,111,
708 112,113,114,115,116,117,118,119,
709 120,121,122,123,124,125,126,127,
710 128,129,130,131,132,133,134,135,
711 136,137,138,139,140,141,142,143,
712 144,145,146,147,148,149,150,151,
713 152,153,154,155,156,157,158,159,
714 160,161,162,163,164,165,166,167,
715 168,169,170,171,172,173,174,175,
716 176,177,178,179,180,181,182,183,
717 184,185,186,187,188,189,190,191,
718 192,193,194,195,196,197,198,199,
719 200,201,202,203,204,205,206,207,
720 208,209,210,211,212,213,214,215,
721 216,217,218,219,220,221,222,223,
722 224,225,226,227,228,229,230,231,
723 232,233,234,235,236,237,238,239,
724 240,241,242,243,244,245,246,247,
725 248,249,250,251,252,253,254,255,
726
727 /* This table is a case flipping table. */
728
729 0, 1, 2, 3, 4, 5, 6, 7,
730 8, 9, 10, 11, 12, 13, 14, 15,
731 16, 17, 18, 19, 20, 21, 22, 23,
732 24, 25, 26, 27, 28, 29, 30, 31,
733 32, 33, 34, 35, 36, 37, 38, 39,
734 40, 41, 42, 43, 44, 45, 46, 47,
735 48, 49, 50, 51, 52, 53, 54, 55,
736 56, 57, 58, 59, 60, 61, 62, 63,
737 64, 97, 98, 99,100,101,102,103,
738 104,105,106,107,108,109,110,111,
739 112,113,114,115,116,117,118,119,
740 120,121,122, 91, 92, 93, 94, 95,
741 96, 65, 66, 67, 68, 69, 70, 71,
742 72, 73, 74, 75, 76, 77, 78, 79,
743 80, 81, 82, 83, 84, 85, 86, 87,
744 88, 89, 90,123,124,125,126,127,
745 128,129,130,131,132,133,134,135,
746 136,137,138,139,140,141,142,143,
747 144,145,146,147,148,149,150,151,
748 152,153,154,155,156,157,158,159,
749 160,161,162,163,164,165,166,167,
750 168,169,170,171,172,173,174,175,
751 176,177,178,179,180,181,182,183,
752 184,185,186,187,188,189,190,191,
753 192,193,194,195,196,197,198,199,
754 200,201,202,203,204,205,206,207,
755 208,209,210,211,212,213,214,215,
756 216,217,218,219,220,221,222,223,
757 224,225,226,227,228,229,230,231,
758 232,233,234,235,236,237,238,239,
759 240,241,242,243,244,245,246,247,
760 248,249,250,251,252,253,254,255,
761
762 /* This table contains bit maps for various character classes. Each map is 32
763 bytes long and the bits run from the least significant end of each byte. The
764 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
765 graph, print, punct, and cntrl. Other classes are built from combinations. */
766
767 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
768 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
769 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
770 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
771
772 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
773 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
774 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
775 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
776
777 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
778 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
780 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
781
782 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
784 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
785 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786
787 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
789 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
790 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791
792 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
793 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
794 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
795 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
796
797 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
798 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
799 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
800 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
801
802 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
803 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806
807 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
808 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811
812 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816
817 /* This table identifies various classes of character by individual bits:
818 0x01 white space character
819 0x02 letter
820 0x04 decimal digit
821 0x08 hexadecimal digit
822 0x10 alphanumeric or '_'
823 0x80 regular expression metacharacter or binary zero
824 */
825
826 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
827 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
828 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
830 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
831 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
832 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
833 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
834 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
835 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
836 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
837 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
838 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
839 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
840 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
841 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
842 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
852 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
854 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
857 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
858
859 /* This is a set of tables that came orginally from a Windows user. It seems to
860 be at least an approximation of ISO 8859. In particular, there are characters
861 greater than 128 that are marked as spaces, letters, etc. */
862
863 static const pcre_uint8 tables1[] = {
864 0,1,2,3,4,5,6,7,
865 8,9,10,11,12,13,14,15,
866 16,17,18,19,20,21,22,23,
867 24,25,26,27,28,29,30,31,
868 32,33,34,35,36,37,38,39,
869 40,41,42,43,44,45,46,47,
870 48,49,50,51,52,53,54,55,
871 56,57,58,59,60,61,62,63,
872 64,97,98,99,100,101,102,103,
873 104,105,106,107,108,109,110,111,
874 112,113,114,115,116,117,118,119,
875 120,121,122,91,92,93,94,95,
876 96,97,98,99,100,101,102,103,
877 104,105,106,107,108,109,110,111,
878 112,113,114,115,116,117,118,119,
879 120,121,122,123,124,125,126,127,
880 128,129,130,131,132,133,134,135,
881 136,137,138,139,140,141,142,143,
882 144,145,146,147,148,149,150,151,
883 152,153,154,155,156,157,158,159,
884 160,161,162,163,164,165,166,167,
885 168,169,170,171,172,173,174,175,
886 176,177,178,179,180,181,182,183,
887 184,185,186,187,188,189,190,191,
888 224,225,226,227,228,229,230,231,
889 232,233,234,235,236,237,238,239,
890 240,241,242,243,244,245,246,215,
891 248,249,250,251,252,253,254,223,
892 224,225,226,227,228,229,230,231,
893 232,233,234,235,236,237,238,239,
894 240,241,242,243,244,245,246,247,
895 248,249,250,251,252,253,254,255,
896 0,1,2,3,4,5,6,7,
897 8,9,10,11,12,13,14,15,
898 16,17,18,19,20,21,22,23,
899 24,25,26,27,28,29,30,31,
900 32,33,34,35,36,37,38,39,
901 40,41,42,43,44,45,46,47,
902 48,49,50,51,52,53,54,55,
903 56,57,58,59,60,61,62,63,
904 64,97,98,99,100,101,102,103,
905 104,105,106,107,108,109,110,111,
906 112,113,114,115,116,117,118,119,
907 120,121,122,91,92,93,94,95,
908 96,65,66,67,68,69,70,71,
909 72,73,74,75,76,77,78,79,
910 80,81,82,83,84,85,86,87,
911 88,89,90,123,124,125,126,127,
912 128,129,130,131,132,133,134,135,
913 136,137,138,139,140,141,142,143,
914 144,145,146,147,148,149,150,151,
915 152,153,154,155,156,157,158,159,
916 160,161,162,163,164,165,166,167,
917 168,169,170,171,172,173,174,175,
918 176,177,178,179,180,181,182,183,
919 184,185,186,187,188,189,190,191,
920 224,225,226,227,228,229,230,231,
921 232,233,234,235,236,237,238,239,
922 240,241,242,243,244,245,246,215,
923 248,249,250,251,252,253,254,223,
924 192,193,194,195,196,197,198,199,
925 200,201,202,203,204,205,206,207,
926 208,209,210,211,212,213,214,247,
927 216,217,218,219,220,221,222,255,
928 0,62,0,0,1,0,0,0,
929 0,0,0,0,0,0,0,0,
930 32,0,0,0,1,0,0,0,
931 0,0,0,0,0,0,0,0,
932 0,0,0,0,0,0,255,3,
933 126,0,0,0,126,0,0,0,
934 0,0,0,0,0,0,0,0,
935 0,0,0,0,0,0,0,0,
936 0,0,0,0,0,0,255,3,
937 0,0,0,0,0,0,0,0,
938 0,0,0,0,0,0,12,2,
939 0,0,0,0,0,0,0,0,
940 0,0,0,0,0,0,0,0,
941 254,255,255,7,0,0,0,0,
942 0,0,0,0,0,0,0,0,
943 255,255,127,127,0,0,0,0,
944 0,0,0,0,0,0,0,0,
945 0,0,0,0,254,255,255,7,
946 0,0,0,0,0,4,32,4,
947 0,0,0,128,255,255,127,255,
948 0,0,0,0,0,0,255,3,
949 254,255,255,135,254,255,255,7,
950 0,0,0,0,0,4,44,6,
951 255,255,127,255,255,255,127,255,
952 0,0,0,0,254,255,255,255,
953 255,255,255,255,255,255,255,127,
954 0,0,0,0,254,255,255,255,
955 255,255,255,255,255,255,255,255,
956 0,2,0,0,255,255,255,255,
957 255,255,255,255,255,255,255,127,
958 0,0,0,0,255,255,255,255,
959 255,255,255,255,255,255,255,255,
960 0,0,0,0,254,255,0,252,
961 1,0,0,248,1,0,0,120,
962 0,0,0,0,254,255,255,255,
963 0,0,128,0,0,0,128,0,
964 255,255,255,255,0,0,0,0,
965 0,0,0,0,0,0,0,128,
966 255,255,255,255,0,0,0,0,
967 0,0,0,0,0,0,0,0,
968 128,0,0,0,0,0,0,0,
969 0,1,1,0,1,1,0,0,
970 0,0,0,0,0,0,0,0,
971 0,0,0,0,0,0,0,0,
972 1,0,0,0,128,0,0,0,
973 128,128,128,128,0,0,128,0,
974 28,28,28,28,28,28,28,28,
975 28,28,0,0,0,0,0,128,
976 0,26,26,26,26,26,26,18,
977 18,18,18,18,18,18,18,18,
978 18,18,18,18,18,18,18,18,
979 18,18,18,128,128,0,128,16,
980 0,26,26,26,26,26,26,18,
981 18,18,18,18,18,18,18,18,
982 18,18,18,18,18,18,18,18,
983 18,18,18,128,128,0,0,0,
984 0,0,0,0,0,1,0,0,
985 0,0,0,0,0,0,0,0,
986 0,0,0,0,0,0,0,0,
987 0,0,0,0,0,0,0,0,
988 1,0,0,0,0,0,0,0,
989 0,0,18,0,0,0,0,0,
990 0,0,20,20,0,18,0,0,
991 0,20,18,0,0,0,0,0,
992 18,18,18,18,18,18,18,18,
993 18,18,18,18,18,18,18,18,
994 18,18,18,18,18,18,18,0,
995 18,18,18,18,18,18,18,18,
996 18,18,18,18,18,18,18,18,
997 18,18,18,18,18,18,18,18,
998 18,18,18,18,18,18,18,0,
999 18,18,18,18,18,18,18,18
1000 };
1001
1002
1003
1004
1005 #ifndef HAVE_STRERROR
1006 /*************************************************
1007 * Provide strerror() for non-ANSI libraries *
1008 *************************************************/
1009
1010 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1011 in their libraries, but can provide the same facility by this simple
1012 alternative function. */
1013
1014 extern int sys_nerr;
1015 extern char *sys_errlist[];
1016
1017 char *
1018 strerror(int n)
1019 {
1020 if (n < 0 || n >= sys_nerr) return "unknown error number";
1021 return sys_errlist[n];
1022 }
1023 #endif /* HAVE_STRERROR */
1024
1025
1026 /*************************************************
1027 * JIT memory callback *
1028 *************************************************/
1029
1030 static pcre_jit_stack* jit_callback(void *arg)
1031 {
1032 return (pcre_jit_stack *)arg;
1033 }
1034
1035
1036 #if !defined NOUTF || defined SUPPORT_PCRE16
1037 /*************************************************
1038 * Convert UTF-8 string to value *
1039 *************************************************/
1040
1041 /* This function takes one or more bytes that represents a UTF-8 character,
1042 and returns the value of the character.
1043
1044 Argument:
1045 utf8bytes a pointer to the byte vector
1046 vptr a pointer to an int to receive the value
1047
1048 Returns: > 0 => the number of bytes consumed
1049 -6 to 0 => malformed UTF-8 character at offset = (-return)
1050 */
1051
1052 static int
1053 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1054 {
1055 int c = *utf8bytes++;
1056 int d = c;
1057 int i, j, s;
1058
1059 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1060 {
1061 if ((d & 0x80) == 0) break;
1062 d <<= 1;
1063 }
1064
1065 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1066 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1067
1068 /* i now has a value in the range 1-5 */
1069
1070 s = 6*i;
1071 d = (c & utf8_table3[i]) << s;
1072
1073 for (j = 0; j < i; j++)
1074 {
1075 c = *utf8bytes++;
1076 if ((c & 0xc0) != 0x80) return -(j+1);
1077 s -= 6;
1078 d |= (c & 0x3f) << s;
1079 }
1080
1081 /* Check that encoding was the correct unique one */
1082
1083 for (j = 0; j < utf8_table1_size; j++)
1084 if (d <= utf8_table1[j]) break;
1085 if (j != i) return -(i+1);
1086
1087 /* Valid value */
1088
1089 *vptr = d;
1090 return i+1;
1091 }
1092 #endif /* NOUTF || SUPPORT_PCRE16 */
1093
1094
1095
1096 #if !defined NOUTF || defined SUPPORT_PCRE16
1097 /*************************************************
1098 * Convert character value to UTF-8 *
1099 *************************************************/
1100
1101 /* This function takes an integer value in the range 0 - 0x7fffffff
1102 and encodes it as a UTF-8 character in 0 to 6 bytes.
1103
1104 Arguments:
1105 cvalue the character value
1106 utf8bytes pointer to buffer for result - at least 6 bytes long
1107
1108 Returns: number of characters placed in the buffer
1109 */
1110
1111 static int
1112 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1113 {
1114 register int i, j;
1115 for (i = 0; i < utf8_table1_size; i++)
1116 if (cvalue <= utf8_table1[i]) break;
1117 utf8bytes += i;
1118 for (j = i; j > 0; j--)
1119 {
1120 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1121 cvalue >>= 6;
1122 }
1123 *utf8bytes = utf8_table2[i] | cvalue;
1124 return i + 1;
1125 }
1126 #endif /* NOUTF || SUPPORT_PCRE16 */
1127
1128
1129
1130 #ifdef SUPPORT_PCRE16
1131 /*************************************************
1132 * Convert a string to 16-bit *
1133 *************************************************/
1134
1135 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1136 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1137 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1138 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1139 result is always left in buffer16.
1140
1141 Note that this function does not object to surrogate values. This is
1142 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1143 for the purpose of testing that they are correctly faulted.
1144
1145 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1146 in UTF-8 so that values greater than 255 can be handled.
1147
1148 Arguments:
1149 data TRUE if converting a data line; FALSE for a regex
1150 p points to a byte string
1151 utf true if UTF-8 (to be converted to UTF-16)
1152 len number of bytes in the string (excluding trailing zero)
1153
1154 Returns: number of 16-bit data items used (excluding trailing zero)
1155 OR -1 if a UTF-8 string is malformed
1156 OR -2 if a value > 0x10ffff is encountered
1157 OR -3 if a value > 0xffff is encountered when not in UTF mode
1158 */
1159
1160 static int
1161 to16(int data, pcre_uint8 *p, int utf, int len)
1162 {
1163 pcre_uint16 *pp;
1164
1165 if (buffer16_size < 2*len + 2)
1166 {
1167 if (buffer16 != NULL) free(buffer16);
1168 buffer16_size = 2*len + 2;
1169 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1170 if (buffer16 == NULL)
1171 {
1172 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1173 exit(1);
1174 }
1175 }
1176
1177 pp = buffer16;
1178
1179 if (!utf && !data)
1180 {
1181 while (len-- > 0) *pp++ = *p++;
1182 }
1183
1184 else
1185 {
1186 int c = 0;
1187 while (len > 0)
1188 {
1189 int chlen = utf82ord(p, &c);
1190 if (chlen <= 0) return -1;
1191 if (c > 0x10ffff) return -2;
1192 p += chlen;
1193 len -= chlen;
1194 if (c < 0x10000) *pp++ = c; else
1195 {
1196 if (!utf) return -3;
1197 c -= 0x10000;
1198 *pp++ = 0xD800 | (c >> 10);
1199 *pp++ = 0xDC00 | (c & 0x3ff);
1200 }
1201 }
1202 }
1203
1204 *pp = 0;
1205 return pp - buffer16;
1206 }
1207 #endif
1208
1209
1210 /*************************************************
1211 * Read or extend an input line *
1212 *************************************************/
1213
1214 /* Input lines are read into buffer, but both patterns and data lines can be
1215 continued over multiple input lines. In addition, if the buffer fills up, we
1216 want to automatically expand it so as to be able to handle extremely large
1217 lines that are needed for certain stress tests. When the input buffer is
1218 expanded, the other two buffers must also be expanded likewise, and the
1219 contents of pbuffer, which are a copy of the input for callouts, must be
1220 preserved (for when expansion happens for a data line). This is not the most
1221 optimal way of handling this, but hey, this is just a test program!
1222
1223 Arguments:
1224 f the file to read
1225 start where in buffer to start (this *must* be within buffer)
1226 prompt for stdin or readline()
1227
1228 Returns: pointer to the start of new data
1229 could be a copy of start, or could be moved
1230 NULL if no data read and EOF reached
1231 */
1232
1233 static pcre_uint8 *
1234 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1235 {
1236 pcre_uint8 *here = start;
1237
1238 for (;;)
1239 {
1240 int rlen = (int)(buffer_size - (here - buffer));
1241
1242 if (rlen > 1000)
1243 {
1244 int dlen;
1245
1246 /* If libreadline support is required, use readline() to read a line if the
1247 input is a terminal. Note that readline() removes the trailing newline, so
1248 we must put it back again, to be compatible with fgets(). */
1249
1250 #ifdef SUPPORT_LIBREADLINE
1251 if (isatty(fileno(f)))
1252 {
1253 size_t len;
1254 char *s = readline(prompt);
1255 if (s == NULL) return (here == start)? NULL : start;
1256 len = strlen(s);
1257 if (len > 0) add_history(s);
1258 if (len > rlen - 1) len = rlen - 1;
1259 memcpy(here, s, len);
1260 here[len] = '\n';
1261 here[len+1] = 0;
1262 free(s);
1263 }
1264 else
1265 #endif
1266
1267 /* Read the next line by normal means, prompting if the file is stdin. */
1268
1269 {
1270 if (f == stdin) printf("%s", prompt);
1271 if (fgets((char *)here, rlen, f) == NULL)
1272 return (here == start)? NULL : start;
1273 }
1274
1275 dlen = (int)strlen((char *)here);
1276 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1277 here += dlen;
1278 }
1279
1280 else
1281 {
1282 int new_buffer_size = 2*buffer_size;
1283 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1284 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1285 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1286
1287 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1288 {
1289 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1290 exit(1);
1291 }
1292
1293 memcpy(new_buffer, buffer, buffer_size);
1294 memcpy(new_pbuffer, pbuffer, buffer_size);
1295
1296 buffer_size = new_buffer_size;
1297
1298 start = new_buffer + (start - buffer);
1299 here = new_buffer + (here - buffer);
1300
1301 free(buffer);
1302 free(dbuffer);
1303 free(pbuffer);
1304
1305 buffer = new_buffer;
1306 dbuffer = new_dbuffer;
1307 pbuffer = new_pbuffer;
1308 }
1309 }
1310
1311 return NULL; /* Control never gets here */
1312 }
1313
1314
1315
1316 /*************************************************
1317 * Read number from string *
1318 *************************************************/
1319
1320 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1321 around with conditional compilation, just do the job by hand. It is only used
1322 for unpicking arguments, so just keep it simple.
1323
1324 Arguments:
1325 str string to be converted
1326 endptr where to put the end pointer
1327
1328 Returns: the unsigned long
1329 */
1330
1331 static int
1332 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1333 {
1334 int result = 0;
1335 while(*str != 0 && isspace(*str)) str++;
1336 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1337 *endptr = str;
1338 return(result);
1339 }
1340
1341
1342
1343 /*************************************************
1344 * Print one character *
1345 *************************************************/
1346
1347 /* Print a single character either literally, or as a hex escape. */
1348
1349 static int pchar(int c, FILE *f)
1350 {
1351 if (PRINTOK(c))
1352 {
1353 if (f != NULL) fprintf(f, "%c", c);
1354 return 1;
1355 }
1356
1357 if (c < 0x100)
1358 {
1359 if (use_utf)
1360 {
1361 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1362 return 6;
1363 }
1364 else
1365 {
1366 if (f != NULL) fprintf(f, "\\x%02x", c);
1367 return 4;
1368 }
1369 }
1370
1371 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1372 return (c <= 0x000000ff)? 6 :
1373 (c <= 0x00000fff)? 7 :
1374 (c <= 0x0000ffff)? 8 :
1375 (c <= 0x000fffff)? 9 : 10;
1376 }
1377
1378
1379
1380 #ifdef SUPPORT_PCRE8
1381 /*************************************************
1382 * Print 8-bit character string *
1383 *************************************************/
1384
1385 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1386 If handed a NULL file, just counts chars without printing. */
1387
1388 static int pchars(pcre_uint8 *p, int length, FILE *f)
1389 {
1390 int c = 0;
1391 int yield = 0;
1392
1393 if (length < 0)
1394 length = strlen((char *)p);
1395
1396 while (length-- > 0)
1397 {
1398 #if !defined NOUTF
1399 if (use_utf)
1400 {
1401 int rc = utf82ord(p, &c);
1402 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1403 {
1404 length -= rc - 1;
1405 p += rc;
1406 yield += pchar(c, f);
1407 continue;
1408 }
1409 }
1410 #endif
1411 c = *p++;
1412 yield += pchar(c, f);
1413 }
1414
1415 return yield;
1416 }
1417 #endif
1418
1419
1420
1421 #ifdef SUPPORT_PCRE16
1422 /*************************************************
1423 * Find length of 0-terminated 16-bit string *
1424 *************************************************/
1425
1426 static int strlen16(PCRE_SPTR16 p)
1427 {
1428 int len = 0;
1429 while (*p++ != 0) len++;
1430 return len;
1431 }
1432 #endif /* SUPPORT_PCRE16 */
1433
1434
1435 #ifdef SUPPORT_PCRE16
1436 /*************************************************
1437 * Print 16-bit character string *
1438 *************************************************/
1439
1440 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1441 If handed a NULL file, just counts chars without printing. */
1442
1443 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1444 {
1445 int yield = 0;
1446
1447 if (length < 0)
1448 length = strlen16(p);
1449
1450 while (length-- > 0)
1451 {
1452 int c = *p++ & 0xffff;
1453 #if !defined NOUTF
1454 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1455 {
1456 int d = *p & 0xffff;
1457 if (d >= 0xDC00 && d < 0xDFFF)
1458 {
1459 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1460 length--;
1461 p++;
1462 }
1463 }
1464 #endif
1465 yield += pchar(c, f);
1466 }
1467
1468 return yield;
1469 }
1470 #endif /* SUPPORT_PCRE16 */
1471
1472
1473
1474 #ifdef SUPPORT_PCRE8
1475 /*************************************************
1476 * Read a capture name (8-bit) and check it *
1477 *************************************************/
1478
1479 static pcre_uint8 *
1480 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1481 {
1482 pcre_uint8 *npp = *pp;
1483 while (isalnum(*p)) *npp++ = *p++;
1484 *npp++ = 0;
1485 *npp = 0;
1486 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1487 {
1488 fprintf(outfile, "no parentheses with name \"");
1489 PCHARSV(*pp, 0, -1, outfile);
1490 fprintf(outfile, "\"\n");
1491 }
1492
1493 *pp = npp;
1494 return p;
1495 }
1496 #endif /* SUPPORT_PCRE8 */
1497
1498
1499
1500 #ifdef SUPPORT_PCRE16
1501 /*************************************************
1502 * Read a capture name (16-bit) and check it *
1503 *************************************************/
1504
1505 /* Note that the text being read is 8-bit. */
1506
1507 static pcre_uint8 *
1508 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1509 {
1510 pcre_uint16 *npp = *pp;
1511 while (isalnum(*p)) *npp++ = *p++;
1512 *npp++ = 0;
1513 *npp = 0;
1514 if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1515 {
1516 fprintf(outfile, "no parentheses with name \"");
1517 PCHARSV(*pp, 0, -1, outfile);
1518 fprintf(outfile, "\"\n");
1519 }
1520 *pp = npp;
1521 return p;
1522 }
1523 #endif /* SUPPORT_PCRE16 */
1524
1525
1526
1527 /*************************************************
1528 * Callout function *
1529 *************************************************/
1530
1531 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1532 the match. Yield zero unless more callouts than the fail count, or the callout
1533 data is not zero. */
1534
1535 static int callout(pcre_callout_block *cb)
1536 {
1537 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1538 int i, pre_start, post_start, subject_length;
1539
1540 if (callout_extra)
1541 {
1542 fprintf(f, "Callout %d: last capture = %d\n",
1543 cb->callout_number, cb->capture_last);
1544
1545 for (i = 0; i < cb->capture_top * 2; i += 2)
1546 {
1547 if (cb->offset_vector[i] < 0)
1548 fprintf(f, "%2d: <unset>\n", i/2);
1549 else
1550 {
1551 fprintf(f, "%2d: ", i/2);
1552 PCHARSV(cb->subject, cb->offset_vector[i],
1553 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1554 fprintf(f, "\n");
1555 }
1556 }
1557 }
1558
1559 /* Re-print the subject in canonical form, the first time or if giving full
1560 datails. On subsequent calls in the same match, we use pchars just to find the
1561 printed lengths of the substrings. */
1562
1563 if (f != NULL) fprintf(f, "--->");
1564
1565 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1566 PCHARS(post_start, cb->subject, cb->start_match,
1567 cb->current_position - cb->start_match, f);
1568
1569 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1570
1571 PCHARSV(cb->subject, cb->current_position,
1572 cb->subject_length - cb->current_position, f);
1573
1574 if (f != NULL) fprintf(f, "\n");
1575
1576 /* Always print appropriate indicators, with callout number if not already
1577 shown. For automatic callouts, show the pattern offset. */
1578
1579 if (cb->callout_number == 255)
1580 {
1581 fprintf(outfile, "%+3d ", cb->pattern_position);
1582 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1583 }
1584 else
1585 {
1586 if (callout_extra) fprintf(outfile, " ");
1587 else fprintf(outfile, "%3d ", cb->callout_number);
1588 }
1589
1590 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1591 fprintf(outfile, "^");
1592
1593 if (post_start > 0)
1594 {
1595 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1596 fprintf(outfile, "^");
1597 }
1598
1599 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1600 fprintf(outfile, " ");
1601
1602 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1603 pbuffer + cb->pattern_position);
1604
1605 fprintf(outfile, "\n");
1606 first_callout = 0;
1607
1608 if (cb->mark != last_callout_mark)
1609 {
1610 if (cb->mark == NULL)
1611 fprintf(outfile, "Latest Mark: <unset>\n");
1612 else
1613 {
1614 fprintf(outfile, "Latest Mark: ");
1615 PCHARSV(cb->mark, 0, -1, outfile);
1616 putc('\n', outfile);
1617 }
1618 last_callout_mark = cb->mark;
1619 }
1620
1621 if (cb->callout_data != NULL)
1622 {
1623 int callout_data = *((int *)(cb->callout_data));
1624 if (callout_data != 0)
1625 {
1626 fprintf(outfile, "Callout data = %d\n", callout_data);
1627 return callout_data;
1628 }
1629 }
1630
1631 return (cb->callout_number != callout_fail_id)? 0 :
1632 (++callout_count >= callout_fail_count)? 1 : 0;
1633 }
1634
1635
1636 /*************************************************
1637 * Local malloc functions *
1638 *************************************************/
1639
1640 /* Alternative malloc function, to test functionality and save the size of a
1641 compiled re, which is the first store request that pcre_compile() makes. The
1642 show_malloc variable is set only during matching. */
1643
1644 static void *new_malloc(size_t size)
1645 {
1646 void *block = malloc(size);
1647 gotten_store = size;
1648 if (first_gotten_store == 0) first_gotten_store = size;
1649 if (show_malloc)
1650 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1651 return block;
1652 }
1653
1654 static void new_free(void *block)
1655 {
1656 if (show_malloc)
1657 fprintf(outfile, "free %p\n", block);
1658 free(block);
1659 }
1660
1661 /* For recursion malloc/free, to test stacking calls */
1662
1663 static void *stack_malloc(size_t size)
1664 {
1665 void *block = malloc(size);
1666 if (show_malloc)
1667 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1668 return block;
1669 }
1670
1671 static void stack_free(void *block)
1672 {
1673 if (show_malloc)
1674 fprintf(outfile, "stack_free %p\n", block);
1675 free(block);
1676 }
1677
1678
1679 /*************************************************
1680 * Call pcre_fullinfo() *
1681 *************************************************/
1682
1683 /* Get one piece of information from the pcre_fullinfo() function. When only
1684 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1685 value, but the code is defensive.
1686
1687 Arguments:
1688 re compiled regex
1689 study study data
1690 option PCRE_INFO_xxx option
1691 ptr where to put the data
1692
1693 Returns: 0 when OK, < 0 on error
1694 */
1695
1696 static int
1697 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1698 {
1699 int rc;
1700
1701 if (use_pcre16)
1702 #ifdef SUPPORT_PCRE16
1703 rc = pcre16_fullinfo(re, study, option, ptr);
1704 #else
1705 rc = PCRE_ERROR_BADMODE;
1706 #endif
1707 else
1708 #ifdef SUPPORT_PCRE8
1709 rc = pcre_fullinfo(re, study, option, ptr);
1710 #else
1711 rc = PCRE_ERROR_BADMODE;
1712 #endif
1713
1714 if (rc < 0)
1715 {
1716 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1717 use_pcre16? "16" : "", option);
1718 if (rc == PCRE_ERROR_BADMODE)
1719 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1720 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1721 }
1722
1723 return rc;
1724 }
1725
1726
1727
1728 /*************************************************
1729 * Swap byte functions *
1730 *************************************************/
1731
1732 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1733 value, respectively.
1734
1735 Arguments:
1736 value any number
1737
1738 Returns: the byte swapped value
1739 */
1740
1741 static pcre_uint32
1742 swap_uint32(pcre_uint32 value)
1743 {
1744 return ((value & 0x000000ff) << 24) |
1745 ((value & 0x0000ff00) << 8) |
1746 ((value & 0x00ff0000) >> 8) |
1747 (value >> 24);
1748 }
1749
1750 static pcre_uint16
1751 swap_uint16(pcre_uint16 value)
1752 {
1753 return (value >> 8) | (value << 8);
1754 }
1755
1756
1757
1758 /*************************************************
1759 * Flip bytes in a compiled pattern *
1760 *************************************************/
1761
1762 /* This function is called if the 'F' option was present on a pattern that is
1763 to be written to a file. We flip the bytes of all the integer fields in the
1764 regex data block and the study block. In 16-bit mode this also flips relevant
1765 bytes in the pattern itself. This is to make it possible to test PCRE's
1766 ability to reload byte-flipped patterns, e.g. those compiled on a different
1767 architecture. */
1768
1769 static void
1770 regexflip(pcre *ere, pcre_extra *extra)
1771 {
1772 real_pcre *re = (real_pcre *)ere;
1773 #ifdef SUPPORT_PCRE16
1774 int op;
1775 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1776 int length = re->name_count * re->name_entry_size;
1777 #ifdef SUPPORT_UTF
1778 BOOL utf = (re->options & PCRE_UTF16) != 0;
1779 BOOL utf16_char = FALSE;
1780 #endif /* SUPPORT_UTF */
1781 #endif /* SUPPORT_PCRE16 */
1782
1783 /* Always flip the bytes in the main data block and study blocks. */
1784
1785 re->magic_number = REVERSED_MAGIC_NUMBER;
1786 re->size = swap_uint32(re->size);
1787 re->options = swap_uint32(re->options);
1788 re->flags = swap_uint16(re->flags);
1789 re->top_bracket = swap_uint16(re->top_bracket);
1790 re->top_backref = swap_uint16(re->top_backref);
1791 re->first_char = swap_uint16(re->first_char);
1792 re->req_char = swap_uint16(re->req_char);
1793 re->name_table_offset = swap_uint16(re->name_table_offset);
1794 re->name_entry_size = swap_uint16(re->name_entry_size);
1795 re->name_count = swap_uint16(re->name_count);
1796
1797 if (extra != NULL)
1798 {
1799 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1800 rsd->size = swap_uint32(rsd->size);
1801 rsd->flags = swap_uint32(rsd->flags);
1802 rsd->minlength = swap_uint32(rsd->minlength);
1803 }
1804
1805 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1806 in the name table, if present, and then in the pattern itself. */
1807
1808 #ifdef SUPPORT_PCRE16
1809 if (!use_pcre16) return;
1810
1811 while(TRUE)
1812 {
1813 /* Swap previous characters. */
1814 while (length-- > 0)
1815 {
1816 *ptr = swap_uint16(*ptr);
1817 ptr++;
1818 }
1819 #ifdef SUPPORT_UTF
1820 if (utf16_char)
1821 {
1822 if ((ptr[-1] & 0xfc00) == 0xd800)
1823 {
1824 /* We know that there is only one extra character in UTF-16. */
1825 *ptr = swap_uint16(*ptr);
1826 ptr++;
1827 }
1828 }
1829 utf16_char = FALSE;
1830 #endif /* SUPPORT_UTF */
1831
1832 /* Get next opcode. */
1833
1834 length = 0;
1835 op = *ptr;
1836 *ptr++ = swap_uint16(op);
1837
1838 switch (op)
1839 {
1840 case OP_END:
1841 return;
1842
1843 #ifdef SUPPORT_UTF
1844 case OP_CHAR:
1845 case OP_CHARI:
1846 case OP_NOT:
1847 case OP_NOTI:
1848 case OP_STAR:
1849 case OP_MINSTAR:
1850 case OP_PLUS:
1851 case OP_MINPLUS:
1852 case OP_QUERY:
1853 case OP_MINQUERY:
1854 case OP_UPTO:
1855 case OP_MINUPTO:
1856 case OP_EXACT:
1857 case OP_POSSTAR:
1858 case OP_POSPLUS:
1859 case OP_POSQUERY:
1860 case OP_POSUPTO:
1861 case OP_STARI:
1862 case OP_MINSTARI:
1863 case OP_PLUSI:
1864 case OP_MINPLUSI:
1865 case OP_QUERYI:
1866 case OP_MINQUERYI:
1867 case OP_UPTOI:
1868 case OP_MINUPTOI:
1869 case OP_EXACTI:
1870 case OP_POSSTARI:
1871 case OP_POSPLUSI:
1872 case OP_POSQUERYI:
1873 case OP_POSUPTOI:
1874 case OP_NOTSTAR:
1875 case OP_NOTMINSTAR:
1876 case OP_NOTPLUS:
1877 case OP_NOTMINPLUS:
1878 case OP_NOTQUERY:
1879 case OP_NOTMINQUERY:
1880 case OP_NOTUPTO:
1881 case OP_NOTMINUPTO:
1882 case OP_NOTEXACT:
1883 case OP_NOTPOSSTAR:
1884 case OP_NOTPOSPLUS:
1885 case OP_NOTPOSQUERY:
1886 case OP_NOTPOSUPTO:
1887 case OP_NOTSTARI:
1888 case OP_NOTMINSTARI:
1889 case OP_NOTPLUSI:
1890 case OP_NOTMINPLUSI:
1891 case OP_NOTQUERYI:
1892 case OP_NOTMINQUERYI:
1893 case OP_NOTUPTOI:
1894 case OP_NOTMINUPTOI:
1895 case OP_NOTEXACTI:
1896 case OP_NOTPOSSTARI:
1897 case OP_NOTPOSPLUSI:
1898 case OP_NOTPOSQUERYI:
1899 case OP_NOTPOSUPTOI:
1900 if (utf) utf16_char = TRUE;
1901 #endif
1902 /* Fall through. */
1903
1904 default:
1905 length = OP_lengths16[op] - 1;
1906 break;
1907
1908 case OP_CLASS:
1909 case OP_NCLASS:
1910 /* Skip the character bit map. */
1911 ptr += 32/sizeof(pcre_uint16);
1912 length = 0;
1913 break;
1914
1915 case OP_XCLASS:
1916 /* Reverse the size of the XCLASS instance. */
1917 ptr++;
1918 *ptr = swap_uint16(*ptr);
1919 if (LINK_SIZE > 1)
1920 {
1921 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1922 ptr++;
1923 *ptr = swap_uint16(*ptr);
1924 }
1925 ptr++;
1926
1927 if (LINK_SIZE > 1)
1928 length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1929 (1 + LINK_SIZE + 1);
1930 else
1931 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1932
1933 op = *ptr;
1934 *ptr = swap_uint16(op);
1935 if ((op & XCL_MAP) != 0)
1936 {
1937 /* Skip the character bit map. */
1938 ptr += 32/sizeof(pcre_uint16);
1939 length -= 32/sizeof(pcre_uint16);
1940 }
1941 break;
1942 }
1943 }
1944 /* Control should never reach here in 16 bit mode. */
1945 #endif /* SUPPORT_PCRE16 */
1946 }
1947
1948
1949
1950 /*************************************************
1951 * Check match or recursion limit *
1952 *************************************************/
1953
1954 static int
1955 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1956 int start_offset, int options, int *use_offsets, int use_size_offsets,
1957 int flag, unsigned long int *limit, int errnumber, const char *msg)
1958 {
1959 int count;
1960 int min = 0;
1961 int mid = 64;
1962 int max = -1;
1963
1964 extra->flags |= flag;
1965
1966 for (;;)
1967 {
1968 *limit = mid;
1969
1970 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1971 use_offsets, use_size_offsets);
1972
1973 if (count == errnumber)
1974 {
1975 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1976 min = mid;
1977 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1978 }
1979
1980 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1981 count == PCRE_ERROR_PARTIAL)
1982 {
1983 if (mid == min + 1)
1984 {
1985 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1986 break;
1987 }
1988 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1989 max = mid;
1990 mid = (min + mid)/2;
1991 }
1992 else break; /* Some other error */
1993 }
1994
1995 extra->flags &= ~flag;
1996 return count;
1997 }
1998
1999
2000
2001 /*************************************************
2002 * Case-independent strncmp() function *
2003 *************************************************/
2004
2005 /*
2006 Arguments:
2007 s first string
2008 t second string
2009 n number of characters to compare
2010
2011 Returns: < 0, = 0, or > 0, according to the comparison
2012 */
2013
2014 static int
2015 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2016 {
2017 while (n--)
2018 {
2019 int c = tolower(*s++) - tolower(*t++);
2020 if (c) return c;
2021 }
2022 return 0;
2023 }
2024
2025
2026
2027 /*************************************************
2028 * Check newline indicator *
2029 *************************************************/
2030
2031 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2032 a message and return 0 if there is no match.
2033
2034 Arguments:
2035 p points after the leading '<'
2036 f file for error message
2037
2038 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2039 */
2040
2041 static int
2042 check_newline(pcre_uint8 *p, FILE *f)
2043 {
2044 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2045 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2046 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2047 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2048 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2049 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2050 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2051 fprintf(f, "Unknown newline type at: <%s\n", p);
2052 return 0;
2053 }
2054
2055
2056
2057 /*************************************************
2058 * Usage function *
2059 *************************************************/
2060
2061 static void
2062 usage(void)
2063 {
2064 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2065 printf("Input and output default to stdin and stdout.\n");
2066 #ifdef SUPPORT_LIBREADLINE
2067 printf("If input is a terminal, readline() is used to read from it.\n");
2068 #else
2069 printf("This version of pcretest is not linked with readline().\n");
2070 #endif
2071 printf("\nOptions:\n");
2072 #ifdef SUPPORT_PCRE16
2073 printf(" -16 use 16-bit interface\n");
2074 #endif
2075 printf(" -b show compiled code (bytecode)\n");
2076 printf(" -C show PCRE compile-time options and exit\n");
2077 printf(" -C arg show a specific compile-time option\n");
2078 printf(" and exit with its value. The arg can be:\n");
2079 printf(" linksize internal link size [2, 3, 4]\n");
2080 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2081 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2082 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2083 printf(" ucp Unicode Properties supported [0, 1]\n");
2084 printf(" jit Just-in-time compiler supported [0, 1]\n");
2085 printf(" -d debug: show compiled code and information (-b and -i)\n");
2086 #if !defined NODFA
2087 printf(" -dfa force DFA matching for all subjects\n");
2088 #endif
2089 printf(" -help show usage information\n");
2090 printf(" -i show information about compiled patterns\n"
2091 " -M find MATCH_LIMIT minimum for each subject\n"
2092 " -m output memory used information\n"
2093 " -o <n> set size of offsets vector to <n>\n");
2094 #if !defined NOPOSIX
2095 printf(" -p use POSIX interface\n");
2096 #endif
2097 printf(" -q quiet: do not output PCRE version number at start\n");
2098 printf(" -S <n> set stack size to <n> megabytes\n");
2099 printf(" -s force each pattern to be studied at basic level\n"
2100 " -s+ force each pattern to be studied, using JIT if available\n"
2101 " -t time compilation and execution\n");
2102 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2103 printf(" -tm time execution (matching) only\n");
2104 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2105 }
2106
2107
2108
2109 /*************************************************
2110 * Main Program *
2111 *************************************************/
2112
2113 /* Read lines from named file or stdin and write to named file or stdout; lines
2114 consist of a regular expression, in delimiters and optionally followed by
2115 options, followed by a set of test data, terminated by an empty line. */
2116
2117 int main(int argc, char **argv)
2118 {
2119 FILE *infile = stdin;
2120 const char *version;
2121 int options = 0;
2122 int study_options = 0;
2123 int default_find_match_limit = FALSE;
2124 int op = 1;
2125 int timeit = 0;
2126 int timeitm = 0;
2127 int showinfo = 0;
2128 int showstore = 0;
2129 int force_study = -1;
2130 int force_study_options = 0;
2131 int quiet = 0;
2132 int size_offsets = 45;
2133 int size_offsets_max;
2134 int *offsets = NULL;
2135 #if !defined NOPOSIX
2136 int posix = 0;
2137 #endif
2138 int debug = 0;
2139 int done = 0;
2140 int all_use_dfa = 0;
2141 int yield = 0;
2142 int stack_size;
2143
2144 pcre_jit_stack *jit_stack = NULL;
2145
2146 /* These vectors store, end-to-end, a list of zero-terminated captured
2147 substring names, each list itself being terminated by an empty name. Assume
2148 that 1024 is plenty long enough for the few names we'll be testing. It is
2149 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2150 for the actual memory, to ensure alignment. By defining these variables always
2151 (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2152 #ifdefs in the code. */
2153
2154 pcre_uint16 copynames[1024];
2155 pcre_uint16 getnames[1024];
2156
2157 pcre_uint16 *cn16ptr;
2158 pcre_uint16 *gn16ptr;
2159
2160 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2161 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2162 pcre_uint8 *cn8ptr;
2163 pcre_uint8 *gn8ptr;
2164
2165 /* Get buffers from malloc() so that valgrind will check their misuse when
2166 debugging. They grow automatically when very long lines are read. The 16-bit
2167 buffer (buffer16) is obtained only if needed. */
2168
2169 buffer = (pcre_uint8 *)malloc(buffer_size);
2170 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2171 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2172
2173 /* The outfile variable is static so that new_malloc can use it. */
2174
2175 outfile = stdout;
2176
2177 /* The following _setmode() stuff is some Windows magic that tells its runtime
2178 library to translate CRLF into a single LF character. At least, that's what
2179 I've been told: never having used Windows I take this all on trust. Originally
2180 it set 0x8000, but then I was advised that _O_BINARY was better. */
2181
2182 #if defined(_WIN32) || defined(WIN32)
2183 _setmode( _fileno( stdout ), _O_BINARY );
2184 #endif
2185
2186 /* Get the version number: both pcre_version() and pcre16_version() give the
2187 same answer. We just need to ensure that we call one that is available. */
2188
2189 #ifdef SUPPORT_PCRE8
2190 version = pcre_version();
2191 #else
2192 version = pcre16_version();
2193 #endif
2194
2195 /* Scan options */
2196
2197 while (argc > 1 && argv[op][0] == '-')
2198 {
2199 pcre_uint8 *endptr;
2200
2201 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2202 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2203 else if (strcmp(argv[op], "-s+") == 0)
2204 {
2205 force_study = 1;
2206 force_study_options = PCRE_STUDY_JIT_COMPILE;
2207 }
2208 else if (strcmp(argv[op], "-16") == 0)
2209 {
2210 #ifdef SUPPORT_PCRE16
2211 use_pcre16 = 1;
2212 #else
2213 printf("** This version of PCRE was built without 16-bit support\n");
2214 exit(1);
2215 #endif
2216 }
2217 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2218 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2219 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2220 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2221 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2222 #if !defined NODFA
2223 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2224 #endif
2225 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2226 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2227 *endptr == 0))
2228 {
2229 op++;
2230 argc--;
2231 }
2232 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2233 {
2234 int both = argv[op][2] == 0;
2235 int temp;
2236 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2237 *endptr == 0))
2238 {
2239 timeitm = temp;
2240 op++;
2241 argc--;
2242 }
2243 else timeitm = LOOPREPEAT;
2244 if (both) timeit = timeitm;
2245 }
2246 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2247 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2248 *endptr == 0))
2249 {
2250 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2251 printf("PCRE: -S not supported on this OS\n");
2252 exit(1);
2253 #else
2254 int rc;
2255 struct rlimit rlim;
2256 getrlimit(RLIMIT_STACK, &rlim);
2257 rlim.rlim_cur = stack_size * 1024 * 1024;
2258 rc = setrlimit(RLIMIT_STACK, &rlim);
2259 if (rc != 0)
2260 {
2261 printf("PCRE: setrlimit() failed with error %d\n", rc);
2262 exit(1);
2263 }
2264 op++;
2265 argc--;
2266 #endif
2267 }
2268 #if !defined NOPOSIX
2269 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2270 #endif
2271 else if (strcmp(argv[op], "-C") == 0)
2272 {
2273 int rc;
2274 unsigned long int lrc;
2275
2276 if (argc > 2)
2277 {
2278 if (strcmp(argv[op + 1], "linksize") == 0)
2279 {
2280 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2281 printf("%d\n", rc);
2282 yield = rc;
2283 goto EXIT;
2284 }
2285 if (strcmp(argv[op + 1], "pcre8") == 0)
2286 {
2287 #ifdef SUPPORT_PCRE8
2288 printf("1\n");
2289 yield = 1;
2290 #else
2291 printf("0\n");
2292 yield = 0;
2293 #endif
2294 goto EXIT;
2295 }
2296 if (strcmp(argv[op + 1], "pcre16") == 0)
2297 {
2298 #ifdef SUPPORT_PCRE16
2299 printf("1\n");
2300 yield = 1;
2301 #else
2302 printf("0\n");
2303 yield = 0;
2304 #endif
2305 goto EXIT;
2306 }
2307 if (strcmp(argv[op + 1], "utf") == 0)
2308 {
2309 #ifdef SUPPORT_PCRE8
2310 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2311 printf("%d\n", rc);
2312 yield = rc;
2313 #else
2314 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2315 printf("%d\n", rc);
2316 yield = rc;
2317 #endif
2318 goto EXIT;
2319 }
2320 if (strcmp(argv[op + 1], "ucp") == 0)
2321 {
2322 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2323 printf("%d\n", rc);
2324 yield = rc;
2325 goto EXIT;
2326 }
2327 if (strcmp(argv[op + 1], "jit") == 0)
2328 {
2329 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2330 printf("%d\n", rc);
2331 yield = rc;
2332 goto EXIT;
2333 }
2334 printf("Unknown option: %s\n", argv[op + 1]);
2335 goto EXIT;
2336 }
2337
2338 printf("PCRE version %s\n", version);
2339 printf("Compiled with\n");
2340
2341 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2342 are set, either both UTFs are supported or both are not supported. */
2343
2344 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2345 printf(" 8-bit and 16-bit support\n");
2346 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2347 if (rc)
2348 printf(" UTF-8 and UTF-16 support\n");
2349 else
2350 printf(" No UTF-8 or UTF-16 support\n");
2351 #elif defined SUPPORT_PCRE8
2352 printf(" 8-bit support only\n");
2353 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2354 printf(" %sUTF-8 support\n", rc? "" : "No ");
2355 #else
2356 printf(" 16-bit support only\n");
2357 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2358 printf(" %sUTF-16 support\n", rc? "" : "No ");
2359 #endif
2360
2361 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2362 printf(" %sUnicode properties support\n", rc? "" : "No ");
2363 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2364 if (rc)
2365 printf(" Just-in-time compiler support\n");
2366 else
2367 printf(" No just-in-time compiler support\n");
2368 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2369 /* Note that these values are always the ASCII values, even
2370 in EBCDIC environments. CR is 13 and NL is 10. */
2371 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2372 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2373 (rc == -2)? "ANYCRLF" :
2374 (rc == -1)? "ANY" : "???");
2375 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2376 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2377 "all Unicode newlines");
2378 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2379 printf(" Internal link size = %d\n", rc);
2380 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2381 printf(" POSIX malloc threshold = %d\n", rc);
2382 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2383 printf(" Default match limit = %ld\n", lrc);
2384 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2385 printf(" Default recursion depth limit = %ld\n", lrc);
2386 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2387 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2388 goto EXIT;
2389 }
2390 else if (strcmp(argv[op], "-help") == 0 ||
2391 strcmp(argv[op], "--help") == 0)
2392 {
2393 usage();
2394 goto EXIT;
2395 }
2396 else
2397 {
2398 printf("** Unknown or malformed option %s\n", argv[op]);
2399 usage();
2400 yield = 1;
2401 goto EXIT;
2402 }
2403 op++;
2404 argc--;
2405 }
2406
2407 /* Get the store for the offsets vector, and remember what it was */
2408
2409 size_offsets_max = size_offsets;
2410 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2411 if (offsets == NULL)
2412 {
2413 printf("** Failed to get %d bytes of memory for offsets vector\n",
2414 (int)(size_offsets_max * sizeof(int)));
2415 yield = 1;
2416 goto EXIT;
2417 }
2418
2419 /* Sort out the input and output files */
2420
2421 if (argc > 1)
2422 {
2423 infile = fopen(argv[op], INPUT_MODE);
2424 if (infile == NULL)
2425 {
2426 printf("** Failed to open %s\n", argv[op]);
2427 yield = 1;
2428 goto EXIT;
2429 }
2430 }
2431
2432 if (argc > 2)
2433 {
2434 outfile = fopen(argv[op+1], OUTPUT_MODE);
2435 if (outfile == NULL)
2436 {
2437 printf("** Failed to open %s\n", argv[op+1]);
2438 yield = 1;
2439 goto EXIT;
2440 }
2441 }
2442
2443 /* Set alternative malloc function */
2444
2445 #ifdef SUPPORT_PCRE8
2446 pcre_malloc = new_malloc;
2447 pcre_free = new_free;
2448 pcre_stack_malloc = stack_malloc;
2449 pcre_stack_free = stack_free;
2450 #endif
2451
2452 #ifdef SUPPORT_PCRE16
2453 pcre16_malloc = new_malloc;
2454 pcre16_free = new_free;
2455 pcre16_stack_malloc = stack_malloc;
2456 pcre16_stack_free = stack_free;
2457 #endif
2458
2459 /* Heading line unless quiet, then prompt for first regex if stdin */
2460
2461 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2462
2463 /* Main loop */
2464
2465 while (!done)
2466 {
2467 pcre *re = NULL;
2468 pcre_extra *extra = NULL;
2469
2470 #if !defined NOPOSIX /* There are still compilers that require no indent */
2471 regex_t preg;
2472 int do_posix = 0;
2473 #endif
2474
2475 const char *error;
2476 pcre_uint8 *markptr;
2477 pcre_uint8 *p, *pp, *ppp;
2478 pcre_uint8 *to_file = NULL;
2479 const pcre_uint8 *tables = NULL;
2480 unsigned long int true_size, true_study_size = 0;
2481 size_t size, regex_gotten_store;
2482 int do_allcaps = 0;
2483 int do_mark = 0;
2484 int do_study = 0;
2485 int no_force_study = 0;
2486 int do_debug = debug;
2487 int do_G = 0;
2488 int do_g = 0;
2489 int do_showinfo = showinfo;
2490 int do_showrest = 0;
2491 int do_showcaprest = 0;
2492 int do_flip = 0;
2493 int erroroffset, len, delimiter, poffset;
2494
2495 use_utf = 0;
2496 debug_lengths = 1;
2497
2498 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2499 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2500 fflush(outfile);
2501
2502 p = buffer;
2503 while (isspace(*p)) p++;
2504 if (*p == 0) continue;
2505
2506 /* See if the pattern is to be loaded pre-compiled from a file. */
2507
2508 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2509 {
2510 unsigned long int magic, get_options;
2511 pcre_uint8 sbuf[8];
2512 FILE *f;
2513
2514 p++;
2515 pp = p + (int)strlen((char *)p);
2516 while (isspace(pp[-1])) pp--;
2517 *pp = 0;
2518
2519 f = fopen((char *)p, "rb");
2520 if (f == NULL)
2521 {
2522 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2523 continue;
2524 }
2525
2526 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2527
2528 true_size =
2529 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2530 true_study_size =
2531 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2532
2533 re = (real_pcre *)new_malloc(true_size);
2534 regex_gotten_store = first_gotten_store;
2535
2536 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2537
2538 magic = ((real_pcre *)re)->magic_number;
2539 if (magic != MAGIC_NUMBER)
2540 {
2541 if (swap_uint32(magic) == MAGIC_NUMBER)
2542 {
2543 do_flip = 1;
2544 }
2545 else
2546 {
2547 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2548 fclose(f);
2549 continue;
2550 }
2551 }
2552
2553 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2554 do_flip? " (byte-inverted)" : "", p);
2555
2556 /* Now see if there is any following study data. */
2557
2558 if (true_study_size != 0)
2559 {
2560 pcre_study_data *psd;
2561
2562 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2563 extra->flags = PCRE_EXTRA_STUDY_DATA;
2564
2565 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2566 extra->study_data = psd;
2567
2568 if (fread(psd, 1, true_study_size, f) != true_study_size)
2569 {
2570 FAIL_READ:
2571 fprintf(outfile, "Failed to read data from %s\n", p);
2572 if (extra != NULL)
2573 {
2574 PCRE_FREE_STUDY(extra);
2575 }
2576 if (re != NULL) new_free(re);
2577 fclose(f);
2578 continue;
2579 }
2580 fprintf(outfile, "Study data loaded from %s\n", p);
2581 do_study = 1; /* To get the data output if requested */
2582 }
2583 else fprintf(outfile, "No study data\n");
2584
2585 /* Flip the necessary bytes. */
2586 if (do_flip)
2587 {
2588 PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2589 }
2590
2591 /* Need to know if UTF-8 for printing data strings. */
2592
2593 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2594 use_utf = (get_options & PCRE_UTF8) != 0;
2595
2596 fclose(f);
2597 goto SHOW_INFO;
2598 }
2599
2600 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2601 the pattern; if it isn't complete, read more. */
2602
2603 delimiter = *p++;
2604
2605 if (isalnum(delimiter) || delimiter == '\\')
2606 {
2607 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2608 goto SKIP_DATA;
2609 }
2610
2611 pp = p;
2612 poffset = (int)(p - buffer);
2613
2614 for(;;)
2615 {
2616 while (*pp != 0)
2617 {
2618 if (*pp == '\\' && pp[1] != 0) pp++;
2619 else if (*pp == delimiter) break;
2620 pp++;
2621 }
2622 if (*pp != 0) break;
2623 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2624 {
2625 fprintf(outfile, "** Unexpected EOF\n");
2626 done = 1;
2627 goto CONTINUE;
2628 }
2629 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2630 }
2631
2632 /* The buffer may have moved while being extended; reset the start of data
2633 pointer to the correct relative point in the buffer. */
2634
2635 p = buffer + poffset;
2636
2637 /* If the first character after the delimiter is backslash, make
2638 the pattern end with backslash. This is purely to provide a way
2639 of testing for the error message when a pattern ends with backslash. */
2640
2641 if (pp[1] == '\\') *pp++ = '\\';
2642
2643 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2644 for callouts. */
2645
2646 *pp++ = 0;
2647 strcpy((char *)pbuffer, (char *)p);
2648
2649 /* Look for options after final delimiter */
2650
2651 options = 0;
2652 study_options = 0;
2653 log_store = showstore; /* default from command line */
2654
2655 while (*pp != 0)
2656 {
2657 switch (*pp++)
2658 {
2659 case 'f': options |= PCRE_FIRSTLINE; break;
2660 case 'g': do_g = 1; break;
2661 case 'i': options |= PCRE_CASELESS; break;
2662 case 'm': options |= PCRE_MULTILINE; break;
2663 case 's': options |= PCRE_DOTALL; break;
2664 case 'x': options |= PCRE_EXTENDED; break;
2665
2666 case '+':
2667 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2668 break;
2669
2670 case '=': do_allcaps = 1; break;
2671 case 'A': options |= PCRE_ANCHORED; break;
2672 case 'B': do_debug = 1; break;
2673 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2674 case 'D': do_debug = do_showinfo = 1; break;
2675 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2676 case 'F': do_flip = 1; break;
2677 case 'G': do_G = 1; break;
2678 case 'I': do_showinfo = 1; break;
2679 case 'J': options |= PCRE_DUPNAMES; break;
2680 case 'K': do_mark = 1; break;
2681 case 'M': log_store = 1; break;
2682 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2683
2684 #if !defined NOPOSIX
2685 case 'P': do_posix = 1; break;
2686 #endif
2687
2688 case 'S':
2689 if (do_study == 0)
2690 {
2691 do_study = 1;
2692 if (*pp == '+')
2693 {
2694 study_options |= PCRE_STUDY_JIT_COMPILE;
2695 pp++;
2696 }
2697 }
2698 else
2699 {
2700 do_study = 0;
2701 no_force_study = 1;
2702 }
2703 break;
2704
2705 case 'U': options |= PCRE_UNGREEDY; break;
2706 case 'W': options |= PCRE_UCP; break;
2707 case 'X': options |= PCRE_EXTRA; break;
2708 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2709 case 'Z': debug_lengths = 0; break;
2710 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2711 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2712
2713 case 'T':
2714 switch (*pp++)
2715 {
2716 case '0': tables = tables0; break;
2717 case '1': tables = tables1; break;
2718
2719 case '\r':
2720 case '\n':
2721 case ' ':
2722 case 0:
2723 fprintf(outfile, "** Missing table number after /T\n");
2724 goto SKIP_DATA;
2725
2726 default:
2727 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2728 goto SKIP_DATA;
2729 }
2730 break;
2731
2732 case 'L':
2733 ppp = pp;
2734 /* The '\r' test here is so that it works on Windows. */
2735 /* The '0' test is just in case this is an unterminated line. */
2736 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2737 *ppp = 0;
2738 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2739 {
2740 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2741 goto SKIP_DATA;
2742 }
2743 locale_set = 1;
2744 tables = PCRE_MAKETABLES;
2745 pp = ppp;
2746 break;
2747
2748 case '>':
2749 to_file = pp;
2750 while (*pp != 0) pp++;
2751 while (isspace(pp[-1])) pp--;
2752 *pp = 0;
2753 break;
2754
2755 case '<':
2756 {
2757 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2758 {
2759 options |= PCRE_JAVASCRIPT_COMPAT;
2760 pp += 3;
2761 }
2762 else
2763 {
2764 int x = check_newline(pp, outfile);
2765 if (x == 0) goto SKIP_DATA;
2766 options |= x;
2767 while (*pp++ != '>');
2768 }
2769 }
2770 break;
2771
2772 case '\r': /* So that it works in Windows */
2773 case '\n':
2774 case ' ':
2775 break;
2776
2777 default:
2778 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2779 goto SKIP_DATA;
2780 }
2781 }
2782
2783 /* Handle compiling via the POSIX interface, which doesn't support the
2784 timing, showing, or debugging options, nor the ability to pass over
2785 local character tables. Neither does it have 16-bit support. */
2786
2787 #if !defined NOPOSIX
2788 if (posix || do_posix)
2789 {
2790 int rc;
2791 int cflags = 0;
2792
2793 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2794 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2795 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2796 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2797 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2798 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2799 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2800
2801 first_gotten_store = 0;
2802 rc = regcomp(&preg, (char *)p, cflags);
2803
2804 /* Compilation failed; go back for another re, skipping to blank line
2805 if non-interactive. */
2806
2807 if (rc != 0)
2808 {
2809 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2810 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2811 goto SKIP_DATA;
2812 }
2813 }
2814
2815 /* Handle compiling via the native interface */
2816
2817 else
2818 #endif /* !defined NOPOSIX */
2819
2820 {
2821 unsigned long int get_options;
2822
2823 /* In 16-bit mode, convert the input. */
2824
2825 #ifdef SUPPORT_PCRE16
2826 if (use_pcre16)
2827 {
2828 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2829 {
2830 case -1:
2831 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2832 "converted to UTF-16\n");
2833 goto SKIP_DATA;
2834
2835 case -2:
2836 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2837 "cannot be converted to UTF-16\n");
2838 goto SKIP_DATA;
2839
2840 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2841 fprintf(outfile, "**Failed: character value greater than 0xffff "
2842 "cannot be converted to 16-bit in non-UTF mode\n");
2843 goto SKIP_DATA;
2844
2845 default:
2846 break;
2847 }
2848 p = (pcre_uint8 *)buffer16;
2849 }
2850 #endif
2851
2852 /* Compile many times when timing */
2853
2854 if (timeit > 0)
2855 {
2856 register int i;
2857 clock_t time_taken;
2858 clock_t start_time = clock();
2859 for (i = 0; i < timeit; i++)
2860 {
2861 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2862 if (re != NULL) free(re);
2863 }
2864 time_taken = clock() - start_time;
2865 fprintf(outfile, "Compile time %.4f milliseconds\n",
2866 (((double)time_taken * 1000.0) / (double)timeit) /
2867 (double)CLOCKS_PER_SEC);
2868 }
2869
2870 first_gotten_store = 0;
2871 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2872
2873 /* Compilation failed; go back for another re, skipping to blank line
2874 if non-interactive. */
2875
2876 if (re == NULL)
2877 {
2878 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2879 SKIP_DATA:
2880 if (infile != stdin)
2881 {
2882 for (;;)
2883 {
2884 if (extend_inputline(infile, buffer, NULL) == NULL)
2885 {
2886 done = 1;
2887 goto CONTINUE;
2888 }
2889 len = (int)strlen((char *)buffer);
2890 while (len > 0 && isspace(buffer[len-1])) len--;
2891 if (len == 0) break;
2892 }
2893 fprintf(outfile, "\n");
2894 }
2895 goto CONTINUE;
2896 }
2897
2898 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2899 within the regex; check for this so that we know how to process the data
2900 lines. */
2901
2902 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2903 goto SKIP_DATA;
2904 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2905
2906 /* Extract the size for possible writing before possibly flipping it,
2907 and remember the store that was got. */
2908
2909 true_size = ((real_pcre *)re)->size;
2910 regex_gotten_store = first_gotten_store;
2911
2912 /* Output code size information if requested */
2913
2914 if (log_store)
2915 fprintf(outfile, "Memory allocation (code space): %d\n",
2916 (int)(first_gotten_store -
2917 sizeof(real_pcre) -
2918 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2919
2920 /* If -s or /S was present, study the regex to generate additional info to
2921 help with the matching, unless the pattern has the SS option, which
2922 suppresses the effect of /S (used for a few test patterns where studying is
2923 never sensible). */
2924
2925 if (do_study || (force_study >= 0 && !no_force_study))
2926 {
2927 if (timeit > 0)
2928 {
2929 register int i;
2930 clock_t time_taken;
2931 clock_t start_time = clock();
2932 for (i = 0; i < timeit; i++)
2933 {
2934 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2935 }
2936 time_taken = clock() - start_time;
2937 if (extra != NULL)
2938 {
2939 PCRE_FREE_STUDY(extra);
2940 }
2941 fprintf(outfile, " Study time %.4f milliseconds\n",
2942 (((double)time_taken * 1000.0) / (double)timeit) /
2943 (double)CLOCKS_PER_SEC);
2944 }
2945 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2946 if (error != NULL)
2947 fprintf(outfile, "Failed to study: %s\n", error);
2948 else if (extra != NULL)
2949 {
2950 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2951 if (log_store)
2952 {
2953 size_t jitsize;
2954 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2955 jitsize != 0)
2956 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2957 }
2958 }
2959 }
2960
2961 /* If /K was present, we set up for handling MARK data. */
2962
2963 if (do_mark)
2964 {
2965 if (extra == NULL)
2966 {
2967 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2968 extra->flags = 0;
2969 }
2970 extra->mark = &markptr;
2971 extra->flags |= PCRE_EXTRA_MARK;
2972 }
2973
2974 /* Extract and display information from the compiled data if required. */
2975
2976 SHOW_INFO:
2977
2978 if (do_debug)
2979 {
2980 fprintf(outfile, "------------------------------------------------------------------\n");
2981 PCRE_PRINTINT(re, outfile, debug_lengths);
2982 }
2983
2984 /* We already have the options in get_options (see above) */
2985
2986 if (do_showinfo)
2987 {
2988 unsigned long int all_options;
2989 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2990 hascrorlf;
2991 int nameentrysize, namecount;
2992 const pcre_uint8 *nametable;
2993
2994 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
2995 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
2996 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
2997 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
2998 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
2999 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3000 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3001 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3002 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3003 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3004 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3005 != 0)
3006 goto SKIP_DATA;
3007
3008 if (size != regex_gotten_store) fprintf(outfile,
3009 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3010 (int)size, (int)regex_gotten_store);
3011
3012 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3013 if (backrefmax > 0)
3014 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3015
3016 if (namecount > 0)
3017 {
3018 fprintf(outfile, "Named capturing subpatterns:\n");
3019 while (namecount-- > 0)
3020 {
3021 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3022 int imm2_size = use_pcre16 ? 1 : 2;
3023 #else
3024 int imm2_size = IMM2_SIZE;
3025 #endif
3026 int length = (int)STRLEN(nametable + imm2_size);
3027 fprintf(outfile, " ");
3028 PCHARSV(nametable, imm2_size, length, outfile);
3029 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3030 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3031 fprintf(outfile, "%3d\n", use_pcre16?
3032 (int)(((PCRE_SPTR16)nametable)[0])
3033 :((int)nametable[0] << 8) | (int)nametable[1]);
3034 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3035 #else
3036 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3037 #ifdef SUPPORT_PCRE8
3038 nametable += nameentrysize;
3039 #else
3040 nametable += nameentrysize * 2;
3041 #endif
3042 #endif
3043 }
3044 }
3045
3046 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3047 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3048
3049 all_options = ((real_pcre *)re)->options;
3050 if (do_flip) all_options = swap_uint32(all_options);
3051
3052 if (get_options == 0) fprintf(outfile, "No options\n");
3053 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3054 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3055 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3056 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3057 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3058 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3059 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3060 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3061 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3062 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3063 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3064 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3065 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3066 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3067 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3068 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3069 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3070 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3071
3072 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3073
3074 switch (get_options & PCRE_NEWLINE_BITS)
3075 {
3076 case PCRE_NEWLINE_CR:
3077 fprintf(outfile, "Forced newline sequence: CR\n");
3078 break;
3079
3080 case PCRE_NEWLINE_LF:
3081 fprintf(outfile, "Forced newline sequence: LF\n");
3082 break;
3083
3084 case PCRE_NEWLINE_CRLF:
3085 fprintf(outfile, "Forced newline sequence: CRLF\n");
3086 break;
3087
3088 case PCRE_NEWLINE_ANYCRLF:
3089 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3090 break;
3091
3092 case PCRE_NEWLINE_ANY:
3093 fprintf(outfile, "Forced newline sequence: ANY\n");
3094 break;
3095
3096 default:
3097 break;
3098 }
3099
3100 if (first_char == -1)
3101 {
3102 fprintf(outfile, "First char at start or follows newline\n");
3103 }
3104 else if (first_char < 0)
3105 {
3106 fprintf(outfile, "No first char\n");
3107 }
3108 else
3109 {
3110 const char *caseless =
3111 ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3112 "" : " (caseless)";
3113
3114 if (PRINTOK(first_char))
3115 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3116 else
3117 {
3118 fprintf(outfile, "First char = ");
3119 pchar(first_char, outfile);
3120 fprintf(outfile, "%s\n", caseless);
3121 }
3122 }
3123
3124 if (need_char < 0)
3125 {
3126 fprintf(outfile, "No need char\n");
3127 }
3128 else
3129 {
3130 const char *caseless =
3131 ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3132 "" : " (caseless)";
3133
3134 if (PRINTOK(need_char))
3135 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3136 else
3137 {
3138 fprintf(outfile, "Need char = ");
3139 pchar(need_char, outfile);
3140 fprintf(outfile, "%s\n", caseless);
3141 }
3142 }
3143
3144 /* Don't output study size; at present it is in any case a fixed
3145 value, but it varies, depending on the computer architecture, and
3146 so messes up the test suite. (And with the /F option, it might be
3147 flipped.) If study was forced by an external -s, don't show this
3148 information unless -i or -d was also present. This means that, except
3149 when auto-callouts are involved, the output from runs with and without
3150 -s should be identical. */
3151
3152 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3153 {
3154 if (extra == NULL)
3155 fprintf(outfile, "Study returned NULL\n");
3156 else
3157 {
3158 pcre_uint8 *start_bits = NULL;
3159 int minlength;
3160
3161 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3162 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3163
3164 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3165 {
3166 if (start_bits == NULL)
3167 fprintf(outfile, "No set of starting bytes\n");
3168 else
3169 {
3170 int i;
3171 int c = 24;
3172 fprintf(outfile, "Starting byte set: ");
3173 for (i = 0; i < 256; i++)
3174 {
3175 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3176 {
3177 if (c > 75)
3178 {
3179 fprintf(outfile, "\n ");
3180 c = 2;
3181 }
3182 if (PRINTOK(i) && i != ' ')
3183 {
3184 fprintf(outfile, "%c ", i);
3185 c += 2;
3186 }
3187 else
3188 {
3189 fprintf(outfile, "\\x%02x ", i);
3190 c += 5;
3191 }
3192 }
3193 }
3194 fprintf(outfile, "\n");
3195 }
3196 }
3197 }
3198
3199 /* Show this only if the JIT was set by /S, not by -s. */
3200
3201 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3202 {
3203 int jit;
3204 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3205 {
3206 if (jit)
3207 fprintf(outfile, "JIT study was successful\n");
3208 else
3209 #ifdef SUPPORT_JIT
3210 fprintf(outfile, "JIT study was not successful\n");
3211 #else
3212 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3213 #endif
3214 }
3215 }
3216 }
3217 }
3218
3219 /* If the '>' option was present, we write out the regex to a file, and
3220 that is all. The first 8 bytes of the file are the regex length and then
3221 the study length, in big-endian order. */
3222
3223 if (to_file != NULL)
3224 {
3225 FILE *f = fopen((char *)to_file, "wb");
3226 if (f == NULL)
3227 {
3228 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3229 }
3230 else
3231 {
3232 pcre_uint8 sbuf[8];
3233
3234 if (do_flip) regexflip(re, extra);
3235 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3236 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3237 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3238 sbuf[3] = (pcre_uint8)((true_size) & 255);
3239 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3240 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3241 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3242 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3243
3244 if (fwrite(sbuf, 1, 8, f) < 8 ||
3245 fwrite(re, 1, true_size, f) < true_size)
3246 {
3247 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3248 }
3249 else
3250 {
3251 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3252
3253 /* If there is study data, write it. */
3254
3255 if (extra != NULL)
3256 {
3257 if (fwrite(extra->study_data, 1, true_study_size, f) <
3258 true_study_size)
3259 {
3260 fprintf(outfile, "Write error on %s: %s\n", to_file,
3261 strerror(errno));
3262 }
3263 else fprintf(outfile, "Study data written to %s\n", to_file);
3264 }
3265 }
3266 fclose(f);
3267 }
3268
3269 new_free(re);
3270 if (extra != NULL)
3271 {
3272 PCRE_FREE_STUDY(extra);
3273 }
3274 if (locale_set)
3275 {
3276 new_free((void *)tables);
3277 setlocale(LC_CTYPE, "C");
3278 locale_set = 0;
3279 }
3280 continue; /* With next regex */
3281 }
3282 } /* End of non-POSIX compile */
3283
3284 /* Read data lines and test them */
3285
3286 for (;;)
3287 {
3288 pcre_uint8 *q;
3289 pcre_uint8 *bptr;
3290 int *use_offsets = offsets;
3291 int use_size_offsets = size_offsets;
3292 int callout_data = 0;
3293 int callout_data_set = 0;
3294 int count, c;
3295 int copystrings = 0;
3296 int find_match_limit = default_find_match_limit;
3297 int getstrings = 0;
3298 int getlist = 0;
3299 int gmatched = 0;
3300 int start_offset = 0;
3301 int start_offset_sign = 1;
3302 int g_notempty = 0;
3303 int use_dfa = 0;
3304
3305 *copynames = 0;
3306 *getnames = 0;
3307
3308 cn16ptr = copynames;
3309 gn16ptr = getnames;
3310 cn8ptr = copynames8;
3311 gn8ptr = getnames8;
3312
3313 SET_PCRE_CALLOUT(callout);
3314 first_callout = 1;
3315 last_callout_mark = NULL;
3316 callout_extra = 0;
3317 callout_count = 0;
3318 callout_fail_count = 999999;
3319 callout_fail_id = -1;
3320 show_malloc = 0;
3321 options = 0;
3322
3323 if (extra != NULL) extra->flags &=
3324 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3325
3326 len = 0;
3327 for (;;)
3328 {
3329 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3330 {
3331 if (len > 0) /* Reached EOF without hitting a newline */
3332 {
3333 fprintf(outfile, "\n");
3334 break;
3335 }
3336 done = 1;
3337 goto CONTINUE;
3338 }
3339 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3340 len = (int)strlen((char *)buffer);
3341 if (buffer[len-1] == '\n') break;
3342 }
3343
3344 while (len > 0 && isspace(buffer[len-1])) len--;
3345 buffer[len] = 0;
3346 if (len == 0) break;
3347
3348 p = buffer;
3349 while (isspace(*p)) p++;
3350
3351 bptr = q = dbuffer;
3352 while ((c = *p++) != 0)
3353 {
3354 int i = 0;
3355 int n = 0;
3356
3357 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3358 In non-UTF mode, allow the value of the byte to fall through to later,
3359 where values greater than 127 are turned into UTF-8 when running in
3360 16-bit mode. */
3361
3362 if (c != '\\')
3363 {
3364 if (use_utf)
3365 {
3366 *q++ = c;
3367 continue;
3368 }
3369 }
3370
3371 /* Handle backslash escapes */
3372
3373 else switch ((c = *p++))
3374 {
3375 case 'a': c = 7; break;
3376 case 'b': c = '\b'; break;
3377 case 'e': c = 27; break;
3378 case 'f': c = '\f'; break;
3379 case 'n': c = '\n'; break;
3380 case 'r': c = '\r'; break;
3381 case 't': c = '\t'; break;
3382 case 'v': c = '\v'; break;
3383
3384 case '0': case '1': case '2': case '3':
3385 case '4': case '5': case '6': case '7':
3386 c -= '0';
3387 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3388 c = c * 8 + *p++ - '0';
3389 break;
3390
3391 case 'x':
3392 if (*p == '{')
3393 {
3394 pcre_uint8 *pt = p;
3395 c = 0;
3396
3397 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3398 when isxdigit() is a macro that refers to its argument more than
3399 once. This is banned by the C Standard, but apparently happens in at
3400 least one MacOS environment. */
3401
3402 for (pt++; isxdigit(*pt); pt++)
3403 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3404 if (*pt == '}')
3405 {
3406 p = pt + 1;
3407 break;
3408 }
3409 /* Not correct form for \x{...}; fall through */
3410 }
3411
3412 /* \x without {} always defines just one byte in 8-bit mode. This
3413 allows UTF-8 characters to be constructed byte by byte, and also allows
3414 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3415 Otherwise, pass it down to later code so that it can be turned into
3416 UTF-8 when running in 16-bit mode. */
3417
3418 c = 0;
3419 while (i++ < 2 && isxdigit(*p))
3420 {
3421 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3422 p++;
3423 }
3424 if (use_utf)
3425 {
3426 *q++ = c;
3427 continue;
3428 }
3429 break;
3430
3431 case 0: /* \ followed by EOF allows for an empty line */
3432 p--;
3433 continue;
3434
3435 case '>':
3436 if (*p == '-')
3437 {
3438 start_offset_sign = -1;
3439 p++;
3440 }
3441 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3442 start_offset *= start_offset_sign;
3443 continue;
3444
3445 case 'A': /* Option setting */
3446 options |= PCRE_ANCHORED;
3447 continue;
3448
3449 case 'B':
3450 options |= PCRE_NOTBOL;
3451 continue;
3452
3453 case 'C':
3454 if (isdigit(*p)) /* Set copy string */
3455 {
3456 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3457 copystrings |= 1 << n;
3458 }
3459 else if (isalnum(*p))
3460 {
3461 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3462 }
3463 else if (*p == '+')
3464 {
3465 callout_extra = 1;
3466 p++;
3467 }
3468 else if (*p == '-')
3469 {
3470 SET_PCRE_CALLOUT(NULL);
3471 p++;
3472 }
3473 else if (*p == '!')
3474 {
3475 callout_fail_id = 0;
3476 p++;
3477 while(isdigit(*p))
3478 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3479 callout_fail_count = 0;
3480 if (*p == '!')
3481 {
3482 p++;
3483 while(isdigit(*p))
3484 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3485 }
3486 }
3487 else if (*p == '*')
3488 {
3489 int sign = 1;
3490 callout_data = 0;
3491 if (*(++p) == '-') { sign = -1; p++; }
3492 while(isdigit(*p))
3493 callout_data = callout_data * 10 + *p++ - '0';
3494 callout_data *= sign;
3495 callout_data_set = 1;
3496 }
3497 continue;
3498
3499 #if !defined NODFA
3500 case 'D':
3501 #if !defined NOPOSIX
3502 if (posix || do_posix)
3503 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3504 else
3505 #endif
3506 use_dfa = 1;
3507 continue;
3508 #endif
3509
3510 #if !defined NODFA
3511 case 'F':
3512 options |= PCRE_DFA_SHORTEST;
3513 continue;
3514 #endif
3515
3516 case 'G':
3517 if (isdigit(*p))
3518 {
3519 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3520 getstrings |= 1 << n;
3521 }
3522 else if (isalnum(*p))
3523 {
3524 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3525 }
3526 continue;
3527
3528 case 'J':
3529 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3530 if (extra != NULL
3531 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3532 && extra->executable_jit != NULL)
3533 {
3534 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3535 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3536 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3537 }
3538 continue;
3539
3540 case 'L':
3541 getlist = 1;
3542 continue;
3543
3544 case 'M':
3545 find_match_limit = 1;
3546 continue;
3547
3548 case 'N':
3549 if ((options & PCRE_NOTEMPTY) != 0)
3550 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3551 else
3552 options |= PCRE_NOTEMPTY;
3553 continue;
3554
3555 case 'O':
3556 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3557 if (n > size_offsets_max)
3558 {
3559 size_offsets_max = n;
3560 free(offsets);
3561 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3562 if (offsets == NULL)
3563 {
3564 printf("** Failed to get %d bytes of memory for offsets vector\n",
3565 (int)(size_offsets_max * sizeof(int)));
3566 yield = 1;
3567 goto EXIT;
3568 }
3569 }
3570 use_size_offsets = n;
3571 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3572 continue;
3573
3574 case 'P':
3575 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3576 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3577 continue;
3578
3579 case 'Q':
3580 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3581 if (extra == NULL)
3582 {
3583 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3584 extra->flags = 0;
3585 }
3586 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3587 extra->match_limit_recursion = n;
3588 continue;
3589
3590 case 'q':
3591 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3592 if (extra == NULL)
3593 {
3594 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3595 extra->flags = 0;
3596 }
3597 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3598 extra->match_limit = n;
3599 continue;
3600
3601 #if !defined NODFA
3602 case 'R':
3603 options |= PCRE_DFA_RESTART;
3604 continue;
3605 #endif
3606
3607 case 'S':
3608 show_malloc = 1;
3609 continue;
3610
3611 case 'Y':
3612 options |= PCRE_NO_START_OPTIMIZE;
3613 continue;
3614
3615 case 'Z':
3616 options |= PCRE_NOTEOL;
3617 continue;
3618
3619 case '?':
3620 options |= PCRE_NO_UTF8_CHECK;
3621 continue;
3622
3623 case '<':
3624 {
3625 int x = check_newline(p, outfile);
3626 if (x == 0) goto NEXT_DATA;
3627 options |= x;
3628 while (*p++ != '>');
3629 }
3630 continue;
3631 }
3632
3633 /* We now have a character value in c that may be greater than 255. In
3634 16-bit mode, we always convert characters to UTF-8 so that values greater
3635 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3636 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3637 mode must have come from \x{...} or octal constructs because values from
3638 \x.. get this far only in non-UTF mode. */
3639
3640 if (use_pcre16 || use_utf)
3641 {
3642 pcre_uint8 buff8[8];
3643 int ii, utn;
3644 utn = ord2utf8(c, buff8);
3645 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3646 }
3647 else
3648 {
3649 if (c > 255)
3650 {
3651 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3652 "and UTF-8 mode is not enabled.\n", c);
3653 fprintf(outfile, "** Truncation will probably give the wrong "
3654 "result.\n");
3655 }
3656 *q++ = c;
3657 }
3658 }
3659
3660 /* Reached end of subject string */
3661
3662 *q = 0;
3663 len = (int)(q - dbuffer);
3664
3665 /* Move the data to the end of the buffer so that a read over the end of
3666 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3667 we are using the POSIX interface, we must include the terminating zero. */
3668
3669 #if !defined NOPOSIX
3670 if (posix || do_posix)
3671 {
3672 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3673 bptr += buffer_size - len - 1;
3674 }
3675 else
3676 #endif
3677 {
3678 memmove(bptr + buffer_size - len, bptr, len);
3679 bptr += buffer_size - len;
3680 }
3681
3682 if ((all_use_dfa || use_dfa) && find_match_limit)
3683 {
3684 printf("**Match limit not relevant for DFA matching: ignored\n");
3685 find_match_limit = 0;
3686 }
3687
3688 /* Handle matching via the POSIX interface, which does not
3689 support timing or playing with the match limit or callout data. */
3690
3691 #if !defined NOPOSIX
3692 if (posix || do_posix)
3693 {
3694 int rc;
3695 int eflags = 0;
3696 regmatch_t *pmatch = NULL;
3697 if (use_size_offsets > 0)
3698 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3699 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3700 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3701 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3702
3703 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3704
3705 if (rc != 0)
3706 {
3707 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3708 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3709 }
3710 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3711 != 0)
3712 {
3713 fprintf(outfile, "Matched with REG_NOSUB\n");
3714 }
3715 else
3716 {
3717 size_t i;
3718 for (i = 0; i < (size_t)use_size_offsets; i++)
3719 {
3720 if (pmatch[i].rm_so >= 0)
3721 {
3722 fprintf(outfile, "%2d: ", (int)i);
3723 PCHARSV(dbuffer, pmatch[i].rm_so,
3724 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3725 fprintf(outfile, "\n");
3726 if (do_showcaprest || (i == 0 && do_showrest))
3727 {
3728 fprintf(outfile, "%2d+ ", (int)i);
3729 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3730 outfile);
3731 fprintf(outfile, "\n");
3732 }
3733 }
3734 }
3735 }
3736 free(pmatch);
3737 goto NEXT_DATA;
3738 }
3739
3740 #endif /* !defined NOPOSIX */
3741
3742 /* Handle matching via the native interface - repeats for /g and /G */
3743
3744 #ifdef SUPPORT_PCRE16
3745 if (use_pcre16)
3746 {
3747 len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3748 switch(len)
3749 {
3750 case -1:
3751 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3752 "converted to UTF-16\n");
3753 goto NEXT_DATA;
3754
3755 case -2:
3756 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3757 "cannot be converted to UTF-16\n");
3758 goto NEXT_DATA;
3759
3760 case -3:
3761 fprintf(outfile, "**Failed: character value greater than 0xffff "
3762 "cannot be converted to 16-bit in non-UTF mode\n");
3763 goto NEXT_DATA;
3764
3765 default:
3766 break;
3767 }
3768 bptr = (pcre_uint8 *)buffer16;
3769 }
3770 #endif
3771
3772 for (;; gmatched++) /* Loop for /g or /G */
3773 {
3774 markptr = NULL;
3775
3776 if (timeitm > 0)
3777 {
3778 register int i;
3779 clock_t time_taken;
3780 clock_t start_time = clock();
3781
3782 #if !defined NODFA
3783 if (all_use_dfa || use_dfa)
3784 {
3785 int workspace[1000];
3786 for (i = 0; i < timeitm; i++)
3787 {
3788 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3789 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3790 (sizeof(workspace)/sizeof(int)));
3791 }
3792 }
3793 else
3794 #endif
3795
3796 for (i = 0; i < timeitm; i++)
3797 {
3798 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3799 (options | g_notempty), use_offsets, use_size_offsets);
3800 }
3801 time_taken = clock() - start_time;
3802 fprintf(outfile, "Execute time %.4f milliseconds\n",
3803 (((double)time_taken * 1000.0) / (double)timeitm) /
3804 (double)CLOCKS_PER_SEC);
3805 }
3806
3807 /* If find_match_limit is set, we want to do repeated matches with
3808 varying limits in order to find the minimum value for the match limit and
3809 for the recursion limit. The match limits are relevant only to the normal
3810 running of pcre_exec(), so disable the JIT optimization. This makes it
3811 possible to run the same set of tests with and without JIT externally
3812 requested. */
3813
3814 if (find_match_limit)
3815 {
3816 if (extra == NULL)
3817 {
3818 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3819 extra->flags = 0;
3820 }
3821 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3822
3823 (void)check_match_limit(re, extra, bptr, len, start_offset,
3824 options|g_notempty, use_offsets, use_size_offsets,
3825 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3826 PCRE_ERROR_MATCHLIMIT, "match()");
3827
3828 count = check_match_limit(re, extra, bptr, len, start_offset,
3829 options|g_notempty, use_offsets, use_size_offsets,
3830 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3831 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3832 }
3833
3834 /* If callout_data is set, use the interface with additional data */
3835
3836 else if (callout_data_set)
3837 {
3838 if (extra == NULL)
3839 {
3840 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3841 extra->flags = 0;
3842 }
3843 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3844 extra->callout_data = &callout_data;
3845 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3846 options | g_notempty, use_offsets, use_size_offsets);
3847 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3848 }
3849
3850 /* The normal case is just to do the match once, with the default
3851 value of match_limit. */
3852
3853 #if !defined NODFA
3854 else if (all_use_dfa || use_dfa)
3855 {
3856 int workspace[1000];
3857 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3858 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3859 (sizeof(workspace)/sizeof(int)));
3860 if (count == 0)
3861 {
3862 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3863 count = use_size_offsets/2;
3864 }
3865 }
3866 #endif
3867
3868 else
3869 {
3870 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3871 options | g_notempty, use_offsets, use_size_offsets);
3872 if (count == 0)
3873 {
3874 fprintf(outfile, "Matched, but too many substrings\n");
3875 count = use_size_offsets/3;
3876 }
3877 }
3878
3879 /* Matched */
3880
3881 if (count >= 0)
3882 {
3883 int i, maxcount;
3884 void *cnptr, *gnptr;
3885
3886 #if !defined NODFA
3887 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3888 #endif
3889 maxcount = use_size_offsets/3;
3890
3891 /* This is a check against a lunatic return value. */
3892
3893 if (count > maxcount)
3894 {
3895 fprintf(outfile,
3896 "** PCRE error: returned count %d is too big for offset size %d\n",
3897 count, use_size_offsets);
3898 count = use_size_offsets/3;
3899 if (do_g || do_G)
3900 {
3901 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3902 do_g = do_G = FALSE; /* Break g/G loop */
3903 }
3904 }
3905
3906 /* do_allcaps requests showing of all captures in the pattern, to check
3907 unset ones at the end. */
3908
3909 if (do_allcaps)
3910 {
3911 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3912 goto SKIP_DATA;
3913 count++; /* Allow for full match */
3914 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3915 }
3916
3917 /* Output the captured substrings */
3918
3919 for (i = 0; i < count * 2; i += 2)
3920 {
3921 if (use_offsets[i] < 0)
3922 {
3923 if (use_offsets[i] != -1)
3924 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3925 use_offsets[i], i);
3926 if (use_offsets[i+1] != -1)
3927 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3928 use_offsets[i+1], i+1);
3929 fprintf(outfile, "%2d: <unset>\n", i/2);
3930 }
3931 else
3932 {
3933 fprintf(outfile, "%2d: ", i/2);
3934 PCHARSV(bptr, use_offsets[i],
3935 use_offsets[i+1] - use_offsets[i], outfile);
3936 fprintf(outfile, "\n");
3937 if (do_showcaprest || (i == 0 && do_showrest))
3938 {
3939 fprintf(outfile, "%2d+ ", i/2);
3940 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3941 outfile);
3942 fprintf(outfile, "\n");
3943 }
3944 }
3945 }
3946
3947 if (markptr != NULL)
3948 {
3949 fprintf(outfile, "MK: ");
3950 PCHARSV(markptr, 0, -1, outfile);
3951 fprintf(outfile, "\n");
3952 }
3953
3954 for (i = 0; i < 32; i++)
3955 {
3956 if ((copystrings & (1 << i)) != 0)
3957 {
3958 int rc;
3959 char copybuffer[256];
3960 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3961 copybuffer, sizeof(copybuffer));
3962 if (rc < 0)
3963 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3964 else
3965 {
3966 fprintf(outfile, "%2dC ", i);
3967 PCHARSV(copybuffer, 0, rc, outfile);
3968 fprintf(outfile, " (%d)\n", rc);
3969 }
3970 }
3971 }
3972
3973 cnptr = copynames;
3974 for (;;)
3975 {
3976 int rc;
3977 char copybuffer[256];
3978
3979 if (use_pcre16)
3980 {
3981 if (*(pcre_uint16 *)cnptr == 0) break;
3982 }
3983 else
3984 {
3985 if (*(pcre_uint8 *)cnptr == 0) break;
3986 }
3987
3988 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3989 cnptr, copybuffer, sizeof(copybuffer));
3990
3991 if (rc < 0)
3992 {
3993 fprintf(outfile, "copy substring ");
3994 PCHARSV(cnptr, 0, -1, outfile);
3995 fprintf(outfile, " failed %d\n", rc);
3996 }
3997 else
3998 {
3999 fprintf(outfile, " C ");
4000 PCHARSV(copybuffer, 0, rc, outfile);
4001 fprintf(outfile, " (%d) ", rc);
4002 PCHARSV(cnptr, 0, -1, outfile);
4003 putc('\n', outfile);
4004 }
4005
4006 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4007 }
4008
4009 for (i = 0; i < 32; i++)
4010 {
4011 if ((getstrings & (1 << i)) != 0)
4012 {
4013 int rc;
4014 const char *substring;
4015 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4016 if (rc < 0)
4017 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4018 else
4019 {
4020 fprintf(outfile, "%2dG ", i);
4021 PCHARSV(substring, 0, rc, outfile);
4022 fprintf(outfile, " (%d)\n", rc);
4023 PCRE_FREE_SUBSTRING(substring);
4024 }
4025 }
4026 }
4027
4028 gnptr = getnames;
4029 for (;;)
4030 {
4031 int rc;
4032 const char *substring;
4033
4034 if (use_pcre16)
4035 {
4036 if (*(pcre_uint16 *)gnptr == 0) break;
4037 }
4038 else
4039 {
4040 if (*(pcre_uint8 *)gnptr == 0) break;
4041 }
4042
4043 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4044 gnptr, &substring);
4045 if (rc < 0)
4046 {
4047 fprintf(outfile, "get substring ");
4048 PCHARSV(gnptr, 0, -1, outfile);
4049 fprintf(outfile, " failed %d\n", rc);
4050 }
4051 else
4052 {
4053 fprintf(outfile, " G ");
4054 PCHARSV(substring, 0, rc, outfile);
4055 fprintf(outfile, " (%d) ", rc);
4056 PCHARSV(gnptr, 0, -1, outfile);
4057 PCRE_FREE_SUBSTRING(substring);
4058 putc('\n', outfile);
4059 }
4060
4061 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4062 }
4063
4064 if (getlist)
4065 {
4066 int rc;
4067 const char **stringlist;
4068 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4069 if (rc < 0)
4070 fprintf(outfile, "get substring list failed %d\n", rc);
4071 else
4072 {
4073 for (i = 0; i < count; i++)
4074 {
4075 fprintf(outfile, "%2dL ", i);
4076 PCHARSV(stringlist[i], 0, -1, outfile);
4077 putc('\n', outfile);
4078 }
4079 if (stringlist[i] != NULL)
4080 fprintf(outfile, "string list not terminated by NULL\n");
4081 PCRE_FREE_SUBSTRING_LIST(stringlist);
4082 }
4083 }
4084 }
4085
4086 /* There was a partial match */
4087
4088 else if (count == PCRE_ERROR_PARTIAL)
4089 {
4090 if (markptr == NULL) fprintf(outfile, "Partial match");
4091 else
4092 {
4093 fprintf(outfile, "Partial match, mark=");
4094 PCHARSV(markptr, 0, -1, outfile);
4095 }
4096 if (use_size_offsets > 1)
4097 {
4098 fprintf(outfile, ": ");
4099 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4100 outfile);
4101 }
4102 fprintf(outfile, "\n");
4103 break; /* Out of the /g loop */
4104 }
4105
4106 /* Failed to match. If this is a /g or /G loop and we previously set
4107 g_notempty after a null match, this is not necessarily the end. We want
4108 to advance the start offset, and continue. We won't be at the end of the
4109 string - that was checked before setting g_notempty.
4110
4111 Complication arises in the case when the newline convention is "any",
4112 "crlf", or "anycrlf". If the previous match was at the end of a line
4113 terminated by CRLF, an advance of one character just passes the \r,
4114 whereas we should prefer the longer newline sequence, as does the code in
4115 pcre_exec(). Fudge the offset value to achieve this. We check for a
4116 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4117 find the default.
4118
4119 Otherwise, in the case of UTF-8 matching, the advance must be one
4120 character, not one byte. */
4121
4122 else
4123 {
4124 if (g_notempty != 0)
4125 {
4126 int onechar = 1;
4127 unsigned int obits = ((real_pcre *)re)->options;
4128 use_offsets[0] = start_offset;
4129 if ((obits & PCRE_NEWLINE_BITS) == 0)
4130 {
4131 int d;
4132 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4133 /* Note that these values are always the ASCII ones, even in
4134 EBCDIC environments. CR = 13, NL = 10. */
4135 obits = (d == 13)? PCRE_NEWLINE_CR :
4136 (d == 10)? PCRE_NEWLINE_LF :
4137 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4138 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4139 (d == -1)? PCRE_NEWLINE_ANY : 0;
4140 }
4141 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4142 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4143 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4144 &&
4145 start_offset < len - 1 &&
4146 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4147 (use_pcre16?
4148 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4149 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4150 :
4151 bptr[start_offset] == '\r'
4152 && bptr[start_offset + 1] == '\n')
4153 #elif defined SUPPORT_PCRE16
4154 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4155 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4156 #else
4157 bptr[start_offset] == '\r'
4158 && bptr[start_offset + 1] == '\n'
4159 #endif
4160 )
4161 onechar++;
4162 else if (use_utf)
4163 {
4164 while (start_offset + onechar < len)
4165 {
4166 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4167 onechar++;
4168 }
4169 }
4170 use_offsets[1] = start_offset + onechar;
4171 }
4172 else
4173 {
4174 switch(count)
4175 {
4176 case PCRE_ERROR_NOMATCH:
4177 if (gmatched == 0)
4178 {
4179 if (markptr == NULL)
4180 {
4181 fprintf(outfile, "No match\n");
4182 }
4183 else
4184 {
4185 fprintf(outfile, "No match, mark = ");
4186 PCHARSV(markptr, 0, -1, outfile);
4187 putc('\n', outfile);
4188 }
4189 }
4190 break;
4191
4192 case PCRE_ERROR_BADUTF8:
4193 case PCRE_ERROR_SHORTUTF8:
4194 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4195 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4196 use_pcre16? "16" : "8");
4197 if (use_size_offsets >= 2)
4198 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4199 use_offsets[1]);
4200 fprintf(outfile, "\n");
4201 break;
4202
4203 case PCRE_ERROR_BADUTF8_OFFSET:
4204 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4205 use_pcre16? "16" : "8");
4206 break;
4207
4208 default:
4209 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4210 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4211 else
4212 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4213 break;
4214 }
4215
4216 break; /* Out of the /g loop */
4217 }
4218 }
4219
4220 /* If not /g or /G we are done */
4221
4222 if (!do_g && !do_G) break;
4223
4224 /* If we have matched an empty string, first check to see if we are at
4225 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4226 Perl's /g options does. This turns out to be rather cunning. First we set
4227 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4228 same point. If this fails (picked up above) we advance to the next
4229 character. */
4230
4231 g_notempty = 0;
4232
4233 if (use_offsets[0] == use_offsets[1])
4234 {
4235 if (use_offsets[0] == len) break;
4236 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4237 }
4238
4239 /* For /g, update the start offset, leaving the rest alone */
4240
4241 if (do_g) start_offset = use_offsets[1];
4242
4243 /* For /G, update the pointer and length */
4244
4245 else
4246 {
4247 bptr += use_offsets[1] * CHAR_SIZE;
4248 len -= use_offsets[1];
4249 }
4250 } /* End of loop for /g and /G */
4251
4252 NEXT_DATA: continue;
4253 } /* End of loop for data lines */
4254
4255 CONTINUE:
4256
4257 #if !defined NOPOSIX
4258 if (posix || do_posix) regfree(&preg);
4259 #endif
4260
4261 if (re != NULL) new_free(re);
4262 if (extra != NULL)
4263 {
4264 PCRE_FREE_STUDY(extra);
4265 }
4266 if (locale_set)
4267 {
4268 new_free((void *)tables);
4269 setlocale(LC_CTYPE, "C");
4270 locale_set = 0;
4271 }
4272 if (jit_stack != NULL)
4273 {
4274 PCRE_JIT_STACK_FREE(jit_stack);
4275 jit_stack = NULL;
4276 }
4277 }
4278
4279 if (infile == stdin) fprintf(outfile, "\n");
4280
4281 EXIT:
4282
4283 if (infile != NULL && infile != stdin) fclose(infile);
4284 if (outfile != NULL && outfile != stdout) fclose(outfile);
4285
4286 free(buffer);
4287 free(dbuffer);
4288 free(pbuffer);
4289 free(offsets);
4290
4291 #ifdef SUPPORT_PCRE16
4292 if (buffer16 != NULL) free(buffer16);
4293 #endif
4294
4295 return yield;
4296 }
4297
4298 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12