/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 901 - (hide annotations) (download)
Sat Jan 21 15:47:59 2012 UTC (15 months, 4 weeks ago) by ph10
File MIME type: text/plain
File size: 130532 byte(s)
More tidies and documentation for stack frame measurement.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 836 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40     libraries in a single program. This is different from the modules such as
41     pcre_compile.c in the library itself, which are compiled separately for each
42     mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43     (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44     compiled only once. Therefore, it must not make use of any of the macros from
45     pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46     however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47     only supported library functions. */
48 nigel 75
49 ph10 836
50 ph10 200 #ifdef HAVE_CONFIG_H
51 ph10 236 #include "config.h"
52 ph10 200 #endif
53 ph10 199
54 nigel 3 #include <ctype.h>
55     #include <stdio.h>
56     #include <string.h>
57     #include <stdlib.h>
58     #include <time.h>
59 nigel 25 #include <locale.h>
60 nigel 75 #include <errno.h>
61 nigel 3
62 ph10 287 #ifdef SUPPORT_LIBREADLINE
63 ph10 343 #ifdef HAVE_UNISTD_H
64 ph10 287 #include <unistd.h>
65 ph10 343 #endif
66 ph10 287 #include <readline/readline.h>
67     #include <readline/history.h>
68     #endif
69 nigel 93
70 ph10 287
71 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
72     input and output without "b"; then I was told that "b" was needed in some
73     environments, so it was added for release 5.0 to both the input and output. (It
74     makes no difference on Unix-like systems.) Later I was told that it is wrong
75     for the input on Windows. I've now abstracted the modes into two macros that
76     are set here, to make it easier to fiddle with them, and removed "b" from the
77     input mode under Windows. */
78    
79     #if defined(_WIN32) || defined(WIN32)
80     #include <io.h> /* For _setmode() */
81     #include <fcntl.h> /* For _O_BINARY */
82     #define INPUT_MODE "r"
83     #define OUTPUT_MODE "wb"
84    
85 ph10 411 #ifndef isatty
86     #define isatty _isatty /* This is what Windows calls them, I'm told, */
87     #endif /* though in some environments they seem to */
88     /* be already defined, hence the #ifndefs. */
89     #ifndef fileno
90 ph10 343 #define fileno _fileno
91 ph10 411 #endif
92 ph10 343
93 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95     #ifdef __BORLANDC__
96     #define _setmode(handle, mode) setmode(handle, mode)
97     #endif
98    
99     /* Not Windows */
100    
101 nigel 93 #else
102     #include <sys/time.h> /* These two includes are needed */
103     #include <sys/resource.h> /* for setrlimit(). */
104     #define INPUT_MODE "rb"
105     #define OUTPUT_MODE "wb"
106 nigel 91 #endif
107    
108 nigel 93
109 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
110     displaying the results of pcre_study() and we also need to know about the
111     internal macros, structures, and other internal data values; pcretest has
112     "inside information" compared to a program that strictly follows the PCRE API.
113 nigel 37
114 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116     appropriately for an application, not for building PCRE. */
117 nigel 77
118 ph10 145 #include "pcre.h"
119 ph10 836
120     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121     /* Configure internal macros to 16 bit mode. */
122     #define COMPILE_PCRE16
123     #endif
124    
125 nigel 77 #include "pcre_internal.h"
126    
127 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
128     regex, is held in a separate file so that (a) it can be compiled in either
129     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130     when that is compiled in debug mode. */
131    
132     #ifdef SUPPORT_PCRE8
133     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134     #endif
135     #ifdef SUPPORT_PCRE16
136     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137     #endif
138    
139 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
140     to keep two copies, we include the source file here, changing the names of the
141     external symbols to prevent clashes. */
142 nigel 77
143 ph10 836 #define PCRE_INCLUDED
144     #undef PRIV
145     #define PRIV(name) name
146 nigel 85
147     #include "pcre_tables.c"
148    
149 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
150 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
151 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
152     has not been explicitly changed, so as to get consistent output from systems
153     that differ in their output from isprint() even in the "C" locale. */
154 nigel 93
155 ph10 836 #ifdef EBCDIC
156     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157     #else
158     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159     #endif
160 nigel 85
161 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163     /* Posix support is disabled in 16 bit only mode. */
164     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165     #define NOPOSIX
166     #endif
167    
168 nigel 37 /* It is possible to compile this test program without including support for
169     testing the POSIX interface, though this is not available via the standard
170     Makefile. */
171    
172     #if !defined NOPOSIX
173 nigel 3 #include "pcreposix.h"
174 nigel 37 #endif
175 nigel 3
176 ph10 836 /* It is also possible, originally for the benefit of a version that was
177     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179     automatically cut out the UTF support if PCRE is built without it. */
180 nigel 79
181 ph10 836 #ifndef SUPPORT_UTF
182     #ifndef NOUTF
183     #define NOUTF
184 ph10 107 #endif
185     #endif
186 nigel 79
187 ph10 836 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189     only from one place and is handled differently). I couldn't dream up any way of
190     using a single macro to do this in a generic way, because of the many different
191     argument requirements. We know that at least one of SUPPORT_PCRE8 and
192     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193     use these in the definitions of generic macros.
194 ph10 107
195 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
196     printed is always given as two arguments: a base address followed by an offset.
197     The base address is cast to the correct data size for 8 or 16 bit data; the
198     offset is in units of this size. If the string were given as base+offset in one
199     argument, the casting might be incorrectly applied. */
200    
201     #ifdef SUPPORT_PCRE8
202    
203     #define PCHARS8(lv, p, offset, len, f) \
204     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206     #define PCHARSV8(p, offset, len, f) \
207     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209     #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210     p = read_capture_name8(p, cn8, re)
211    
212 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
213    
214 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
215     pcre_callout = callout
216    
217 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218     pcre_assign_jit_stack(extra, callback, userdata)
219 ph10 836
220     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221     re = pcre_compile((char *)pat, options, error, erroffset, tables)
222    
223     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224     namesptr, cbuffer, size) \
225     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226     (char *)namesptr, cbuffer, size)
227    
228     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230    
231     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232     offsets, size_offsets, workspace, size_workspace) \
233     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234     offsets, size_offsets, workspace, size_workspace)
235    
236     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237     offsets, size_offsets) \
238     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239     offsets, size_offsets)
240    
241     #define PCRE_FREE_STUDY8(extra) \
242     pcre_free_study(extra)
243    
244     #define PCRE_FREE_SUBSTRING8(substring) \
245     pcre_free_substring(substring)
246    
247     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248     pcre_free_substring_list(listptr)
249    
250     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251     getnamesptr, subsptr) \
252     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253     (char *)getnamesptr, subsptr)
254    
255     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256     n = pcre_get_stringnumber(re, (char *)ptr)
257    
258     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260    
261     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263    
264 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266 ph10 836
267     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268     pcre_printint(re, outfile, debug_lengths)
269    
270     #define PCRE_STUDY8(extra, re, options, error) \
271     extra = pcre_study(re, options, error)
272    
273 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274     pcre_jit_stack_alloc(startsize, maxsize)
275    
276     #define PCRE_JIT_STACK_FREE8(stack) \
277     pcre_jit_stack_free(stack)
278    
279 ph10 836 #endif /* SUPPORT_PCRE8 */
280    
281     /* -----------------------------------------------------------*/
282    
283     #ifdef SUPPORT_PCRE16
284    
285     #define PCHARS16(lv, p, offset, len, f) \
286     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287    
288     #define PCHARSV16(p, offset, len, f) \
289     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290    
291     #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292     p = read_capture_name16(p, cn16, re)
293    
294     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295    
296     #define SET_PCRE_CALLOUT16(callout) \
297 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298 ph10 836
299 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300     pcre16_assign_jit_stack((pcre16_extra *)extra, \
301     (pcre16_jit_callback)callback, userdata)
302 ph10 836
303     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305     tables)
306 ph10 836
307     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308     namesptr, cbuffer, size) \
309 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311 ph10 836
312     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
315 ph10 836
316     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317     offsets, size_offsets, workspace, size_workspace) \
318 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320     workspace, size_workspace)
321 ph10 836
322     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323     offsets, size_offsets) \
324 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325     len, start_offset, options, offsets, size_offsets)
326 ph10 836
327     #define PCRE_FREE_STUDY16(extra) \
328 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
329 ph10 836
330     #define PCRE_FREE_SUBSTRING16(substring) \
331     pcre16_free_substring((PCRE_SPTR16)substring)
332    
333     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335    
336     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337     getnamesptr, subsptr) \
338 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340 ph10 836
341     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343    
344     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346     (PCRE_SPTR16 *)(void*)subsptr)
347    
348     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350     (PCRE_SPTR16 **)(void*)listptr)
351    
352 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354     tables)
355 ph10 836
356     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357     pcre16_printint(re, outfile, debug_lengths)
358    
359     #define PCRE_STUDY16(extra, re, options, error) \
360 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361 ph10 836
362 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364    
365     #define PCRE_JIT_STACK_FREE16(stack) \
366     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367    
368 ph10 836 #endif /* SUPPORT_PCRE16 */
369    
370    
371     /* ----- Both modes are supported; a runtime test is needed, except for
372     pcre_config(), and the JIT stack functions, when it doesn't matter which
373     version is called. ----- */
374    
375     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376    
377     #define CHAR_SIZE (use_pcre16? 2:1)
378    
379     #define PCHARS(lv, p, offset, len, f) \
380     if (use_pcre16) \
381     PCHARS16(lv, p, offset, len, f); \
382     else \
383     PCHARS8(lv, p, offset, len, f)
384    
385     #define PCHARSV(p, offset, len, f) \
386     if (use_pcre16) \
387     PCHARSV16(p, offset, len, f); \
388     else \
389     PCHARSV8(p, offset, len, f)
390    
391     #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392     if (use_pcre16) \
393     READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394     else \
395     READ_CAPTURE_NAME8(p, cn8, cn16, re)
396    
397     #define SET_PCRE_CALLOUT(callout) \
398     if (use_pcre16) \
399     SET_PCRE_CALLOUT16(callout); \
400     else \
401     SET_PCRE_CALLOUT8(callout)
402    
403     #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404    
405 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406     if (use_pcre16) \
407     PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408     else \
409     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410 ph10 836
411     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412     if (use_pcre16) \
413     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414     else \
415     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416    
417     #define PCRE_CONFIG pcre_config
418    
419     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420     namesptr, cbuffer, size) \
421     if (use_pcre16) \
422     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423     namesptr, cbuffer, size); \
424     else \
425     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426     namesptr, cbuffer, size)
427    
428     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429     if (use_pcre16) \
430     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431     else \
432     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433    
434     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435     offsets, size_offsets, workspace, size_workspace) \
436     if (use_pcre16) \
437     PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438     offsets, size_offsets, workspace, size_workspace); \
439     else \
440     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441     offsets, size_offsets, workspace, size_workspace)
442    
443     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444     offsets, size_offsets) \
445     if (use_pcre16) \
446     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447     offsets, size_offsets); \
448     else \
449     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450     offsets, size_offsets)
451    
452     #define PCRE_FREE_STUDY(extra) \
453     if (use_pcre16) \
454     PCRE_FREE_STUDY16(extra); \
455     else \
456     PCRE_FREE_STUDY8(extra)
457    
458     #define PCRE_FREE_SUBSTRING(substring) \
459     if (use_pcre16) \
460     PCRE_FREE_SUBSTRING16(substring); \
461     else \
462     PCRE_FREE_SUBSTRING8(substring)
463    
464     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465     if (use_pcre16) \
466     PCRE_FREE_SUBSTRING_LIST16(listptr); \
467     else \
468     PCRE_FREE_SUBSTRING_LIST8(listptr)
469    
470     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471     getnamesptr, subsptr) \
472     if (use_pcre16) \
473     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474     getnamesptr, subsptr); \
475     else \
476     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477     getnamesptr, subsptr)
478    
479     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480     if (use_pcre16) \
481     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482     else \
483     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484    
485     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486     if (use_pcre16) \
487     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488     else \
489     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490    
491     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492     if (use_pcre16) \
493     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494     else \
495     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496    
497 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498     (use_pcre16 ? \
499     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500     :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501 ph10 836
502 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
503     if (use_pcre16) \
504     PCRE_JIT_STACK_FREE16(stack); \
505     else \
506     PCRE_JIT_STACK_FREE8(stack)
507    
508 ph10 836 #define PCRE_MAKETABLES \
509     (use_pcre16? pcre16_maketables() : pcre_maketables())
510    
511 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 ph10 836 if (use_pcre16) \
513 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 ph10 836 else \
515 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516 ph10 836
517     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518     if (use_pcre16) \
519     PCRE_PRINTINT16(re, outfile, debug_lengths); \
520     else \
521     PCRE_PRINTINT8(re, outfile, debug_lengths)
522    
523     #define PCRE_STUDY(extra, re, options, error) \
524     if (use_pcre16) \
525     PCRE_STUDY16(extra, re, options, error); \
526     else \
527     PCRE_STUDY8(extra, re, options, error)
528    
529     /* ----- Only 8-bit mode is supported ----- */
530    
531     #elif defined SUPPORT_PCRE8
532     #define CHAR_SIZE 1
533     #define PCHARS PCHARS8
534     #define PCHARSV PCHARSV8
535     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537     #define STRLEN STRLEN8
538 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
540     #define PCRE_CONFIG pcre_config
541     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544     #define PCRE_EXEC PCRE_EXEC8
545     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
555     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556     #define PCRE_PRINTINT PCRE_PRINTINT8
557     #define PCRE_STUDY PCRE_STUDY8
558    
559     /* ----- Only 16-bit mode is supported ----- */
560    
561     #else
562     #define CHAR_SIZE 2
563     #define PCHARS PCHARS16
564     #define PCHARSV PCHARSV16
565     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567     #define STRLEN STRLEN16
568 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
570     #define PCRE_CONFIG pcre16_config
571     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574     #define PCRE_EXEC PCRE_EXEC16
575     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
585     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586     #define PCRE_PRINTINT PCRE_PRINTINT16
587     #define PCRE_STUDY PCRE_STUDY16
588     #endif
589    
590     /* ----- End of mode-specific function call macros ----- */
591    
592    
593 nigel 85 /* Other parameters */
594    
595 nigel 3 #ifndef CLOCKS_PER_SEC
596     #ifdef CLK_TCK
597     #define CLOCKS_PER_SEC CLK_TCK
598     #else
599     #define CLOCKS_PER_SEC 100
600     #endif
601     #endif
602    
603 nigel 93 /* This is the default loop count for timing. */
604    
605 nigel 75 #define LOOPREPEAT 500000
606 nigel 3
607 nigel 85 /* Static variables */
608    
609 nigel 3 static FILE *outfile;
610     static int log_store = 0;
611 nigel 63 static int callout_count;
612     static int callout_extra;
613     static int callout_fail_count;
614     static int callout_fail_id;
615 ph10 210 static int debug_lengths;
616 nigel 63 static int first_callout;
617 nigel 93 static int locale_set = 0;
618 nigel 73 static int show_malloc;
619 ph10 836 static int use_utf;
620 nigel 43 static size_t gotten_store;
621 ph10 836 static size_t first_gotten_store = 0;
622 ph10 645 static const unsigned char *last_callout_mark = NULL;
623 nigel 3
624 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
625    
626     static int buffer_size = 50000;
627 ph10 836 static pcre_uint8 *buffer = NULL;
628     static pcre_uint8 *dbuffer = NULL;
629     static pcre_uint8 *pbuffer = NULL;
630 nigel 3
631 ph10 836 /* Another buffer is needed translation to 16-bit character strings. It will
632     obtained and extended as required. */
633    
634     #ifdef SUPPORT_PCRE16
635     static int buffer16_size = 0;
636     static pcre_uint16 *buffer16 = NULL;
637    
638     #ifdef SUPPORT_PCRE8
639    
640     /* We need the table of operator lengths that is used for 16-bit compiling, in
641     order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642     data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643     appropriately for the 16-bit world. Just as a safety check, make sure that
644     COMPILE_PCRE16 is *not* set. */
645    
646     #ifdef COMPILE_PCRE16
647     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648     #endif
649    
650     #if LINK_SIZE == 2
651     #undef LINK_SIZE
652     #define LINK_SIZE 1
653     #elif LINK_SIZE == 3 || LINK_SIZE == 4
654     #undef LINK_SIZE
655     #define LINK_SIZE 2
656     #else
657     #error LINK_SIZE must be either 2, 3, or 4
658     #endif
659    
660 zherczeg 839 #undef IMM2_SIZE
661     #define IMM2_SIZE 1
662    
663 ph10 836 #endif /* SUPPORT_PCRE8 */
664    
665     static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666     #endif /* SUPPORT_PCRE16 */
667    
668     /* If we have 8-bit support, default use_pcre16 to false; if there is also
669     16-bit support, it can be changed by an option. If there is no 8-bit support,
670     there must be 16-bit support, so default it to 1. */
671    
672     #ifdef SUPPORT_PCRE8
673     static int use_pcre16 = 0;
674     #else
675     static int use_pcre16 = 1;
676     #endif
677    
678 ph10 598 /* Textual explanations for runtime error codes */
679 nigel 75
680 ph10 598 static const char *errtexts[] = {
681     NULL, /* 0 is no error */
682     NULL, /* NOMATCH is handled specially */
683     "NULL argument passed",
684     "bad option value",
685     "magic number missing",
686     "unknown opcode - pattern overwritten?",
687     "no more memory",
688 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 ph10 598 "match limit exceeded",
690     "callout error code",
691 ph10 836 NULL, /* BADUTF8/16 is handled specially */
692     NULL, /* BADUTF8/16 offset is handled specially */
693 ph10 598 NULL, /* PARTIAL is handled specially */
694     "not used - internal error",
695     "internal error - pattern overwritten?",
696     "bad count value",
697     "item unsupported for DFA matching",
698     "backreference condition or recursion test not supported for DFA matching",
699     "match limit not supported for DFA matching",
700     "workspace size exceeded in DFA matching",
701 ph10 654 "too much recursion for DFA matching",
702 ph10 598 "recursion limit exceeded",
703     "not used - internal error",
704     "invalid combination of newline options",
705     "bad offset value",
706 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
707 ph10 676 "nested recursion at the same subject position",
708 ph10 836 "JIT stack limit reached",
709     "pattern compiled in wrong mode: 8-bit/16-bit error"
710 ph10 598 };
711    
712 ph10 654
713 ph10 541 /*************************************************
714     * Alternate character tables *
715     *************************************************/
716 nigel 49
717 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718     using the default tables of the library. However, the T option can be used to
719     select alternate sets of tables, for different kinds of testing. Note also that
720 ph10 541 the L (locale) option also adjusts the tables. */
721    
722 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
723 ph10 541 only ASCII characters. */
724    
725 ph10 836 static const pcre_uint8 tables0[] = {
726 ph10 541
727     /* This table is a lower casing table. */
728    
729     0, 1, 2, 3, 4, 5, 6, 7,
730     8, 9, 10, 11, 12, 13, 14, 15,
731     16, 17, 18, 19, 20, 21, 22, 23,
732     24, 25, 26, 27, 28, 29, 30, 31,
733     32, 33, 34, 35, 36, 37, 38, 39,
734     40, 41, 42, 43, 44, 45, 46, 47,
735     48, 49, 50, 51, 52, 53, 54, 55,
736     56, 57, 58, 59, 60, 61, 62, 63,
737     64, 97, 98, 99,100,101,102,103,
738     104,105,106,107,108,109,110,111,
739     112,113,114,115,116,117,118,119,
740     120,121,122, 91, 92, 93, 94, 95,
741     96, 97, 98, 99,100,101,102,103,
742     104,105,106,107,108,109,110,111,
743     112,113,114,115,116,117,118,119,
744     120,121,122,123,124,125,126,127,
745     128,129,130,131,132,133,134,135,
746     136,137,138,139,140,141,142,143,
747     144,145,146,147,148,149,150,151,
748     152,153,154,155,156,157,158,159,
749     160,161,162,163,164,165,166,167,
750     168,169,170,171,172,173,174,175,
751     176,177,178,179,180,181,182,183,
752     184,185,186,187,188,189,190,191,
753     192,193,194,195,196,197,198,199,
754     200,201,202,203,204,205,206,207,
755     208,209,210,211,212,213,214,215,
756     216,217,218,219,220,221,222,223,
757     224,225,226,227,228,229,230,231,
758     232,233,234,235,236,237,238,239,
759     240,241,242,243,244,245,246,247,
760     248,249,250,251,252,253,254,255,
761    
762     /* This table is a case flipping table. */
763    
764     0, 1, 2, 3, 4, 5, 6, 7,
765     8, 9, 10, 11, 12, 13, 14, 15,
766     16, 17, 18, 19, 20, 21, 22, 23,
767     24, 25, 26, 27, 28, 29, 30, 31,
768     32, 33, 34, 35, 36, 37, 38, 39,
769     40, 41, 42, 43, 44, 45, 46, 47,
770     48, 49, 50, 51, 52, 53, 54, 55,
771     56, 57, 58, 59, 60, 61, 62, 63,
772     64, 97, 98, 99,100,101,102,103,
773     104,105,106,107,108,109,110,111,
774     112,113,114,115,116,117,118,119,
775     120,121,122, 91, 92, 93, 94, 95,
776     96, 65, 66, 67, 68, 69, 70, 71,
777     72, 73, 74, 75, 76, 77, 78, 79,
778     80, 81, 82, 83, 84, 85, 86, 87,
779     88, 89, 90,123,124,125,126,127,
780     128,129,130,131,132,133,134,135,
781     136,137,138,139,140,141,142,143,
782     144,145,146,147,148,149,150,151,
783     152,153,154,155,156,157,158,159,
784     160,161,162,163,164,165,166,167,
785     168,169,170,171,172,173,174,175,
786     176,177,178,179,180,181,182,183,
787     184,185,186,187,188,189,190,191,
788     192,193,194,195,196,197,198,199,
789     200,201,202,203,204,205,206,207,
790     208,209,210,211,212,213,214,215,
791     216,217,218,219,220,221,222,223,
792     224,225,226,227,228,229,230,231,
793     232,233,234,235,236,237,238,239,
794     240,241,242,243,244,245,246,247,
795     248,249,250,251,252,253,254,255,
796    
797     /* This table contains bit maps for various character classes. Each map is 32
798     bytes long and the bits run from the least significant end of each byte. The
799     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800     graph, print, punct, and cntrl. Other classes are built from combinations. */
801    
802     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806    
807     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811    
812     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816    
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821    
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826    
827     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831    
832     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836    
837     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841    
842     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846    
847     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851    
852     /* This table identifies various classes of character by individual bits:
853     0x01 white space character
854     0x02 letter
855     0x04 decimal digit
856     0x08 hexadecimal digit
857     0x10 alphanumeric or '_'
858     0x80 regular expression metacharacter or binary zero
859     */
860    
861     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893    
894 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
895     be at least an approximation of ISO 8859. In particular, there are characters
896 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
897    
898 ph10 836 static const pcre_uint8 tables1[] = {
899 ph10 541 0,1,2,3,4,5,6,7,
900     8,9,10,11,12,13,14,15,
901     16,17,18,19,20,21,22,23,
902     24,25,26,27,28,29,30,31,
903     32,33,34,35,36,37,38,39,
904     40,41,42,43,44,45,46,47,
905     48,49,50,51,52,53,54,55,
906     56,57,58,59,60,61,62,63,
907     64,97,98,99,100,101,102,103,
908     104,105,106,107,108,109,110,111,
909     112,113,114,115,116,117,118,119,
910     120,121,122,91,92,93,94,95,
911     96,97,98,99,100,101,102,103,
912     104,105,106,107,108,109,110,111,
913     112,113,114,115,116,117,118,119,
914     120,121,122,123,124,125,126,127,
915     128,129,130,131,132,133,134,135,
916     136,137,138,139,140,141,142,143,
917     144,145,146,147,148,149,150,151,
918     152,153,154,155,156,157,158,159,
919     160,161,162,163,164,165,166,167,
920     168,169,170,171,172,173,174,175,
921     176,177,178,179,180,181,182,183,
922     184,185,186,187,188,189,190,191,
923     224,225,226,227,228,229,230,231,
924     232,233,234,235,236,237,238,239,
925     240,241,242,243,244,245,246,215,
926     248,249,250,251,252,253,254,223,
927     224,225,226,227,228,229,230,231,
928     232,233,234,235,236,237,238,239,
929     240,241,242,243,244,245,246,247,
930     248,249,250,251,252,253,254,255,
931     0,1,2,3,4,5,6,7,
932     8,9,10,11,12,13,14,15,
933     16,17,18,19,20,21,22,23,
934     24,25,26,27,28,29,30,31,
935     32,33,34,35,36,37,38,39,
936     40,41,42,43,44,45,46,47,
937     48,49,50,51,52,53,54,55,
938     56,57,58,59,60,61,62,63,
939     64,97,98,99,100,101,102,103,
940     104,105,106,107,108,109,110,111,
941     112,113,114,115,116,117,118,119,
942     120,121,122,91,92,93,94,95,
943     96,65,66,67,68,69,70,71,
944     72,73,74,75,76,77,78,79,
945     80,81,82,83,84,85,86,87,
946     88,89,90,123,124,125,126,127,
947     128,129,130,131,132,133,134,135,
948     136,137,138,139,140,141,142,143,
949     144,145,146,147,148,149,150,151,
950     152,153,154,155,156,157,158,159,
951     160,161,162,163,164,165,166,167,
952     168,169,170,171,172,173,174,175,
953     176,177,178,179,180,181,182,183,
954     184,185,186,187,188,189,190,191,
955     224,225,226,227,228,229,230,231,
956     232,233,234,235,236,237,238,239,
957     240,241,242,243,244,245,246,215,
958     248,249,250,251,252,253,254,223,
959     192,193,194,195,196,197,198,199,
960     200,201,202,203,204,205,206,207,
961     208,209,210,211,212,213,214,247,
962     216,217,218,219,220,221,222,255,
963     0,62,0,0,1,0,0,0,
964     0,0,0,0,0,0,0,0,
965     32,0,0,0,1,0,0,0,
966     0,0,0,0,0,0,0,0,
967     0,0,0,0,0,0,255,3,
968     126,0,0,0,126,0,0,0,
969     0,0,0,0,0,0,0,0,
970     0,0,0,0,0,0,0,0,
971     0,0,0,0,0,0,255,3,
972     0,0,0,0,0,0,0,0,
973     0,0,0,0,0,0,12,2,
974     0,0,0,0,0,0,0,0,
975     0,0,0,0,0,0,0,0,
976     254,255,255,7,0,0,0,0,
977     0,0,0,0,0,0,0,0,
978     255,255,127,127,0,0,0,0,
979     0,0,0,0,0,0,0,0,
980     0,0,0,0,254,255,255,7,
981     0,0,0,0,0,4,32,4,
982     0,0,0,128,255,255,127,255,
983     0,0,0,0,0,0,255,3,
984     254,255,255,135,254,255,255,7,
985     0,0,0,0,0,4,44,6,
986     255,255,127,255,255,255,127,255,
987     0,0,0,0,254,255,255,255,
988     255,255,255,255,255,255,255,127,
989     0,0,0,0,254,255,255,255,
990     255,255,255,255,255,255,255,255,
991     0,2,0,0,255,255,255,255,
992     255,255,255,255,255,255,255,127,
993     0,0,0,0,255,255,255,255,
994     255,255,255,255,255,255,255,255,
995     0,0,0,0,254,255,0,252,
996     1,0,0,248,1,0,0,120,
997     0,0,0,0,254,255,255,255,
998     0,0,128,0,0,0,128,0,
999     255,255,255,255,0,0,0,0,
1000     0,0,0,0,0,0,0,128,
1001     255,255,255,255,0,0,0,0,
1002     0,0,0,0,0,0,0,0,
1003     128,0,0,0,0,0,0,0,
1004     0,1,1,0,1,1,0,0,
1005     0,0,0,0,0,0,0,0,
1006     0,0,0,0,0,0,0,0,
1007     1,0,0,0,128,0,0,0,
1008     128,128,128,128,0,0,128,0,
1009     28,28,28,28,28,28,28,28,
1010     28,28,0,0,0,0,0,128,
1011     0,26,26,26,26,26,26,18,
1012     18,18,18,18,18,18,18,18,
1013     18,18,18,18,18,18,18,18,
1014     18,18,18,128,128,0,128,16,
1015     0,26,26,26,26,26,26,18,
1016     18,18,18,18,18,18,18,18,
1017     18,18,18,18,18,18,18,18,
1018     18,18,18,128,128,0,0,0,
1019     0,0,0,0,0,1,0,0,
1020     0,0,0,0,0,0,0,0,
1021     0,0,0,0,0,0,0,0,
1022     0,0,0,0,0,0,0,0,
1023     1,0,0,0,0,0,0,0,
1024     0,0,18,0,0,0,0,0,
1025     0,0,20,20,0,18,0,0,
1026     0,20,18,0,0,0,0,0,
1027     18,18,18,18,18,18,18,18,
1028     18,18,18,18,18,18,18,18,
1029     18,18,18,18,18,18,18,0,
1030     18,18,18,18,18,18,18,18,
1031     18,18,18,18,18,18,18,18,
1032     18,18,18,18,18,18,18,18,
1033     18,18,18,18,18,18,18,0,
1034     18,18,18,18,18,18,18,18
1035     };
1036    
1037    
1038    
1039 ph10 558
1040     #ifndef HAVE_STRERROR
1041 nigel 49 /*************************************************
1042 ph10 558 * Provide strerror() for non-ANSI libraries *
1043     *************************************************/
1044    
1045     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046     in their libraries, but can provide the same facility by this simple
1047     alternative function. */
1048    
1049     extern int sys_nerr;
1050     extern char *sys_errlist[];
1051    
1052     char *
1053     strerror(int n)
1054     {
1055     if (n < 0 || n >= sys_nerr) return "unknown error number";
1056     return sys_errlist[n];
1057     }
1058     #endif /* HAVE_STRERROR */
1059    
1060    
1061 ph10 667 /*************************************************
1062     * JIT memory callback *
1063     *************************************************/
1064 ph10 558
1065 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1066     {
1067     return (pcre_jit_stack *)arg;
1068     }
1069 ph10 558
1070 ph10 667
1071 ph10 836 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 ph10 558 /*************************************************
1073 ph10 836 * Convert UTF-8 string to value *
1074     *************************************************/
1075    
1076     /* This function takes one or more bytes that represents a UTF-8 character,
1077     and returns the value of the character.
1078    
1079     Argument:
1080     utf8bytes a pointer to the byte vector
1081     vptr a pointer to an int to receive the value
1082    
1083     Returns: > 0 => the number of bytes consumed
1084     -6 to 0 => malformed UTF-8 character at offset = (-return)
1085     */
1086    
1087     static int
1088     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089     {
1090     int c = *utf8bytes++;
1091     int d = c;
1092     int i, j, s;
1093    
1094     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095     {
1096     if ((d & 0x80) == 0) break;
1097     d <<= 1;
1098     }
1099    
1100     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102    
1103     /* i now has a value in the range 1-5 */
1104    
1105     s = 6*i;
1106     d = (c & utf8_table3[i]) << s;
1107    
1108     for (j = 0; j < i; j++)
1109     {
1110     c = *utf8bytes++;
1111     if ((c & 0xc0) != 0x80) return -(j+1);
1112     s -= 6;
1113     d |= (c & 0x3f) << s;
1114     }
1115    
1116     /* Check that encoding was the correct unique one */
1117    
1118     for (j = 0; j < utf8_table1_size; j++)
1119     if (d <= utf8_table1[j]) break;
1120     if (j != i) return -(i+1);
1121    
1122     /* Valid value */
1123    
1124     *vptr = d;
1125     return i+1;
1126     }
1127     #endif /* NOUTF || SUPPORT_PCRE16 */
1128    
1129    
1130    
1131     #if !defined NOUTF || defined SUPPORT_PCRE16
1132     /*************************************************
1133     * Convert character value to UTF-8 *
1134     *************************************************/
1135    
1136     /* This function takes an integer value in the range 0 - 0x7fffffff
1137     and encodes it as a UTF-8 character in 0 to 6 bytes.
1138    
1139     Arguments:
1140     cvalue the character value
1141     utf8bytes pointer to buffer for result - at least 6 bytes long
1142    
1143     Returns: number of characters placed in the buffer
1144     */
1145    
1146     static int
1147     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148     {
1149     register int i, j;
1150     for (i = 0; i < utf8_table1_size; i++)
1151     if (cvalue <= utf8_table1[i]) break;
1152     utf8bytes += i;
1153     for (j = i; j > 0; j--)
1154     {
1155     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156     cvalue >>= 6;
1157     }
1158     *utf8bytes = utf8_table2[i] | cvalue;
1159     return i + 1;
1160     }
1161 ph10 842 #endif
1162 ph10 836
1163    
1164     #ifdef SUPPORT_PCRE16
1165     /*************************************************
1166     * Convert a string to 16-bit *
1167     *************************************************/
1168    
1169     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173     result is always left in buffer16.
1174    
1175     Note that this function does not object to surrogate values. This is
1176     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177     for the purpose of testing that they are correctly faulted.
1178    
1179 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1181    
1182     Arguments:
1183     data TRUE if converting a data line; FALSE for a regex
1184     p points to a byte string
1185     utf true if UTF-8 (to be converted to UTF-16)
1186     len number of bytes in the string (excluding trailing zero)
1187    
1188     Returns: number of 16-bit data items used (excluding trailing zero)
1189     OR -1 if a UTF-8 string is malformed
1190     OR -2 if a value > 0x10ffff is encountered
1191 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 ph10 836 */
1193    
1194     static int
1195     to16(int data, pcre_uint8 *p, int utf, int len)
1196     {
1197     pcre_uint16 *pp;
1198    
1199     if (buffer16_size < 2*len + 2)
1200     {
1201     if (buffer16 != NULL) free(buffer16);
1202     buffer16_size = 2*len + 2;
1203     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204     if (buffer16 == NULL)
1205     {
1206     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207     exit(1);
1208     }
1209     }
1210    
1211     pp = buffer16;
1212    
1213     if (!utf && !data)
1214     {
1215     while (len-- > 0) *pp++ = *p++;
1216     }
1217    
1218     else
1219     {
1220     int c = 0;
1221     while (len > 0)
1222     {
1223     int chlen = utf82ord(p, &c);
1224     if (chlen <= 0) return -1;
1225     if (c > 0x10ffff) return -2;
1226     p += chlen;
1227     len -= chlen;
1228     if (c < 0x10000) *pp++ = c; else
1229     {
1230     if (!utf) return -3;
1231     c -= 0x10000;
1232     *pp++ = 0xD800 | (c >> 10);
1233     *pp++ = 0xDC00 | (c & 0x3ff);
1234     }
1235     }
1236     }
1237    
1238     *pp = 0;
1239     return pp - buffer16;
1240     }
1241     #endif
1242    
1243    
1244     /*************************************************
1245 nigel 91 * Read or extend an input line *
1246     *************************************************/
1247    
1248     /* Input lines are read into buffer, but both patterns and data lines can be
1249     continued over multiple input lines. In addition, if the buffer fills up, we
1250     want to automatically expand it so as to be able to handle extremely large
1251     lines that are needed for certain stress tests. When the input buffer is
1252     expanded, the other two buffers must also be expanded likewise, and the
1253     contents of pbuffer, which are a copy of the input for callouts, must be
1254     preserved (for when expansion happens for a data line). This is not the most
1255     optimal way of handling this, but hey, this is just a test program!
1256    
1257     Arguments:
1258     f the file to read
1259     start where in buffer to start (this *must* be within buffer)
1260 ph10 287 prompt for stdin or readline()
1261 nigel 91
1262     Returns: pointer to the start of new data
1263     could be a copy of start, or could be moved
1264     NULL if no data read and EOF reached
1265     */
1266    
1267 ph10 836 static pcre_uint8 *
1268     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 nigel 91 {
1270 ph10 836 pcre_uint8 *here = start;
1271 nigel 91
1272     for (;;)
1273     {
1274 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
1275 nigel 93
1276 nigel 91 if (rlen > 1000)
1277     {
1278     int dlen;
1279 ph10 289
1280 ph10 287 /* If libreadline support is required, use readline() to read a line if the
1281     input is a terminal. Note that readline() removes the trailing newline, so
1282     we must put it back again, to be compatible with fgets(). */
1283 ph10 289
1284 ph10 287 #ifdef SUPPORT_LIBREADLINE
1285     if (isatty(fileno(f)))
1286     {
1287 ph10 289 size_t len;
1288 ph10 287 char *s = readline(prompt);
1289     if (s == NULL) return (here == start)? NULL : start;
1290     len = strlen(s);
1291 ph10 289 if (len > 0) add_history(s);
1292 ph10 287 if (len > rlen - 1) len = rlen - 1;
1293     memcpy(here, s, len);
1294     here[len] = '\n';
1295 ph10 289 here[len+1] = 0;
1296     free(s);
1297 ph10 287 }
1298 ph10 289 else
1299     #endif
1300    
1301 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1302 ph10 289
1303 ph10 287 {
1304 ph10 516 if (f == stdin) printf("%s", prompt);
1305 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1306     return (here == start)? NULL : start;
1307 ph10 289 }
1308    
1309 nigel 91 dlen = (int)strlen((char *)here);
1310     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311     here += dlen;
1312     }
1313    
1314     else
1315     {
1316     int new_buffer_size = 2*buffer_size;
1317 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320 nigel 91
1321     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322     {
1323     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324     exit(1);
1325     }
1326    
1327     memcpy(new_buffer, buffer, buffer_size);
1328     memcpy(new_pbuffer, pbuffer, buffer_size);
1329    
1330     buffer_size = new_buffer_size;
1331    
1332     start = new_buffer + (start - buffer);
1333     here = new_buffer + (here - buffer);
1334    
1335     free(buffer);
1336     free(dbuffer);
1337     free(pbuffer);
1338    
1339     buffer = new_buffer;
1340     dbuffer = new_dbuffer;
1341     pbuffer = new_pbuffer;
1342     }
1343     }
1344    
1345     return NULL; /* Control never gets here */
1346     }
1347    
1348    
1349    
1350     /*************************************************
1351 nigel 63 * Read number from string *
1352     *************************************************/
1353    
1354     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355     around with conditional compilation, just do the job by hand. It is only used
1356 nigel 93 for unpicking arguments, so just keep it simple.
1357 nigel 63
1358     Arguments:
1359     str string to be converted
1360     endptr where to put the end pointer
1361    
1362     Returns: the unsigned long
1363     */
1364    
1365     static int
1366 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 nigel 63 {
1368     int result = 0;
1369     while(*str != 0 && isspace(*str)) str++;
1370     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371     *endptr = str;
1372     return(result);
1373     }
1374    
1375    
1376    
1377 nigel 49 /*************************************************
1378 ph10 836 * Print one character *
1379 nigel 49 *************************************************/
1380    
1381 ph10 836 /* Print a single character either literally, or as a hex escape. */
1382 nigel 49
1383 ph10 836 static int pchar(int c, FILE *f)
1384 nigel 49 {
1385 ph10 836 if (PRINTOK(c))
1386     {
1387     if (f != NULL) fprintf(f, "%c", c);
1388     return 1;
1389     }
1390 nigel 49
1391 ph10 836 if (c < 0x100)
1392 nigel 49 {
1393 ph10 836 if (use_utf)
1394     {
1395     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396     return 6;
1397     }
1398     else
1399     {
1400     if (f != NULL) fprintf(f, "\\x%02x", c);
1401     return 4;
1402     }
1403 nigel 49 }
1404    
1405 ph10 836 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406     return (c <= 0x000000ff)? 6 :
1407     (c <= 0x00000fff)? 7 :
1408     (c <= 0x0000ffff)? 8 :
1409     (c <= 0x000fffff)? 9 : 10;
1410     }
1411 nigel 49
1412    
1413    
1414 ph10 836 #ifdef SUPPORT_PCRE8
1415     /*************************************************
1416     * Print 8-bit character string *
1417     *************************************************/
1418 nigel 49
1419 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420     If handed a NULL file, just counts chars without printing. */
1421 nigel 49
1422 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423     {
1424     int c = 0;
1425     int yield = 0;
1426 nigel 49
1427 ph10 836 if (length < 0)
1428     length = strlen((char *)p);
1429 nigel 49
1430 ph10 836 while (length-- > 0)
1431     {
1432     #if !defined NOUTF
1433     if (use_utf)
1434     {
1435     int rc = utf82ord(p, &c);
1436     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437     {
1438     length -= rc - 1;
1439     p += rc;
1440     yield += pchar(c, f);
1441     continue;
1442     }
1443     }
1444     #endif
1445     c = *p++;
1446     yield += pchar(c, f);
1447     }
1448    
1449     return yield;
1450 nigel 49 }
1451 nigel 79 #endif
1452 nigel 49
1453    
1454 nigel 79
1455 ph10 836 #ifdef SUPPORT_PCRE16
1456 nigel 63 /*************************************************
1457 ph10 836 * Find length of 0-terminated 16-bit string *
1458 nigel 85 *************************************************/
1459    
1460 ph10 836 static int strlen16(PCRE_SPTR16 p)
1461 nigel 85 {
1462 ph10 836 int len = 0;
1463     while (*p++ != 0) len++;
1464     return len;
1465 nigel 85 }
1466 ph10 836 #endif /* SUPPORT_PCRE16 */
1467 nigel 85
1468    
1469 ph10 836 #ifdef SUPPORT_PCRE16
1470 nigel 85 /*************************************************
1471 ph10 836 * Print 16-bit character string *
1472 nigel 63 *************************************************/
1473 nigel 49
1474 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475     If handed a NULL file, just counts chars without printing. */
1476 nigel 49
1477 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 nigel 3 {
1479 nigel 63 int yield = 0;
1480 nigel 3
1481 ph10 836 if (length < 0)
1482     length = strlen16(p);
1483    
1484 nigel 63 while (length-- > 0)
1485 nigel 3 {
1486 ph10 836 int c = *p++ & 0xffff;
1487     #if !defined NOUTF
1488     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 nigel 63 {
1490 ph10 836 int d = *p & 0xffff;
1491     if (d >= 0xDC00 && d < 0xDFFF)
1492 nigel 63 {
1493 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494     length--;
1495     p++;
1496 nigel 63 }
1497     }
1498 nigel 79 #endif
1499 ph10 836 yield += pchar(c, f);
1500     }
1501 nigel 3
1502 ph10 836 return yield;
1503     }
1504     #endif /* SUPPORT_PCRE16 */
1505 nigel 63
1506 ph10 836
1507    
1508     #ifdef SUPPORT_PCRE8
1509     /*************************************************
1510     * Read a capture name (8-bit) and check it *
1511     *************************************************/
1512    
1513     static pcre_uint8 *
1514     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515     {
1516     pcre_uint8 *npp = *pp;
1517     while (isalnum(*p)) *npp++ = *p++;
1518     *npp++ = 0;
1519     *npp = 0;
1520     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521     {
1522     fprintf(outfile, "no parentheses with name \"");
1523     PCHARSV(*pp, 0, -1, outfile);
1524     fprintf(outfile, "\"\n");
1525 nigel 63 }
1526 nigel 3
1527 ph10 836 *pp = npp;
1528     return p;
1529 nigel 63 }
1530 ph10 836 #endif /* SUPPORT_PCRE8 */
1531 nigel 23
1532 nigel 3
1533 nigel 23
1534 ph10 836 #ifdef SUPPORT_PCRE16
1535 nigel 63 /*************************************************
1536 ph10 836 * Read a capture name (16-bit) and check it *
1537     *************************************************/
1538    
1539     /* Note that the text being read is 8-bit. */
1540    
1541     static pcre_uint8 *
1542     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543     {
1544     pcre_uint16 *npp = *pp;
1545     while (isalnum(*p)) *npp++ = *p++;
1546     *npp++ = 0;
1547     *npp = 0;
1548 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 ph10 836 {
1550     fprintf(outfile, "no parentheses with name \"");
1551     PCHARSV(*pp, 0, -1, outfile);
1552     fprintf(outfile, "\"\n");
1553     }
1554     *pp = npp;
1555     return p;
1556     }
1557     #endif /* SUPPORT_PCRE16 */
1558    
1559    
1560    
1561     /*************************************************
1562 nigel 63 * Callout function *
1563     *************************************************/
1564 nigel 3
1565 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566     the match. Yield zero unless more callouts than the fail count, or the callout
1567     data is not zero. */
1568 nigel 3
1569 nigel 63 static int callout(pcre_callout_block *cb)
1570     {
1571     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 nigel 75 int i, pre_start, post_start, subject_length;
1573 nigel 3
1574 nigel 63 if (callout_extra)
1575     {
1576     fprintf(f, "Callout %d: last capture = %d\n",
1577     cb->callout_number, cb->capture_last);
1578 nigel 3
1579 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1580     {
1581     if (cb->offset_vector[i] < 0)
1582     fprintf(f, "%2d: <unset>\n", i/2);
1583     else
1584     {
1585     fprintf(f, "%2d: ", i/2);
1586 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
1587 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588     fprintf(f, "\n");
1589     }
1590     }
1591     }
1592 nigel 3
1593 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1594     datails. On subsequent calls in the same match, we use pchars just to find the
1595     printed lengths of the substrings. */
1596 nigel 3
1597 nigel 63 if (f != NULL) fprintf(f, "--->");
1598 nigel 3
1599 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600     PCHARS(post_start, cb->subject, cb->start_match,
1601 nigel 63 cb->current_position - cb->start_match, f);
1602 nigel 3
1603 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604 nigel 75
1605 ph10 836 PCHARSV(cb->subject, cb->current_position,
1606 nigel 63 cb->subject_length - cb->current_position, f);
1607 nigel 3
1608 nigel 63 if (f != NULL) fprintf(f, "\n");
1609 nigel 9
1610 nigel 63 /* Always print appropriate indicators, with callout number if not already
1611 nigel 75 shown. For automatic callouts, show the pattern offset. */
1612 nigel 3
1613 nigel 75 if (cb->callout_number == 255)
1614     {
1615     fprintf(outfile, "%+3d ", cb->pattern_position);
1616     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617     }
1618     else
1619     {
1620     if (callout_extra) fprintf(outfile, " ");
1621     else fprintf(outfile, "%3d ", cb->callout_number);
1622     }
1623 nigel 3
1624 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625     fprintf(outfile, "^");
1626 nigel 3
1627 nigel 63 if (post_start > 0)
1628     {
1629     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630     fprintf(outfile, "^");
1631 nigel 3 }
1632    
1633 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634     fprintf(outfile, " ");
1635    
1636     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637     pbuffer + cb->pattern_position);
1638    
1639 nigel 63 fprintf(outfile, "\n");
1640     first_callout = 0;
1641 nigel 3
1642 ph10 654 if (cb->mark != last_callout_mark)
1643 ph10 645 {
1644 ph10 836 if (cb->mark == NULL)
1645     fprintf(outfile, "Latest Mark: <unset>\n");
1646     else
1647     {
1648     fprintf(outfile, "Latest Mark: ");
1649     PCHARSV(cb->mark, 0, -1, outfile);
1650     putc('\n', outfile);
1651     }
1652 ph10 654 last_callout_mark = cb->mark;
1653     }
1654 ph10 645
1655 nigel 71 if (cb->callout_data != NULL)
1656 nigel 49 {
1657 nigel 71 int callout_data = *((int *)(cb->callout_data));
1658     if (callout_data != 0)
1659     {
1660     fprintf(outfile, "Callout data = %d\n", callout_data);
1661     return callout_data;
1662     }
1663 nigel 63 }
1664 nigel 49
1665 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1666     (++callout_count >= callout_fail_count)? 1 : 0;
1667 nigel 3 }
1668    
1669    
1670 nigel 63 /*************************************************
1671 nigel 73 * Local malloc functions *
1672 nigel 63 *************************************************/
1673 nigel 3
1674 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1675 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
1676     show_malloc variable is set only during matching. */
1677 nigel 3
1678     static void *new_malloc(size_t size)
1679     {
1680 nigel 73 void *block = malloc(size);
1681 nigel 43 gotten_store = size;
1682 ph10 836 if (first_gotten_store == 0) first_gotten_store = size;
1683 nigel 73 if (show_malloc)
1684 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 nigel 73 return block;
1686 nigel 3 }
1687    
1688 nigel 73 static void new_free(void *block)
1689     {
1690     if (show_malloc)
1691     fprintf(outfile, "free %p\n", block);
1692     free(block);
1693     }
1694 nigel 3
1695 nigel 73 /* For recursion malloc/free, to test stacking calls */
1696    
1697     static void *stack_malloc(size_t size)
1698     {
1699     void *block = malloc(size);
1700     if (show_malloc)
1701 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 nigel 73 return block;
1703     }
1704    
1705     static void stack_free(void *block)
1706     {
1707     if (show_malloc)
1708     fprintf(outfile, "stack_free %p\n", block);
1709     free(block);
1710     }
1711    
1712    
1713 nigel 63 /*************************************************
1714     * Call pcre_fullinfo() *
1715     *************************************************/
1716 nigel 43
1717 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
1718     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719     value, but the code is defensive.
1720 nigel 43
1721 ph10 836 Arguments:
1722     re compiled regex
1723     study study data
1724     option PCRE_INFO_xxx option
1725     ptr where to put the data
1726    
1727     Returns: 0 when OK, < 0 on error
1728     */
1729    
1730     static int
1731     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 nigel 43 {
1733     int rc;
1734 ph10 836
1735     if (use_pcre16)
1736     #ifdef SUPPORT_PCRE16
1737 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 ph10 836 #else
1739     rc = PCRE_ERROR_BADMODE;
1740     #endif
1741     else
1742     #ifdef SUPPORT_PCRE8
1743     rc = pcre_fullinfo(re, study, option, ptr);
1744     #else
1745     rc = PCRE_ERROR_BADMODE;
1746     #endif
1747    
1748     if (rc < 0)
1749     {
1750     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751     use_pcre16? "16" : "", option);
1752     if (rc == PCRE_ERROR_BADMODE)
1753     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755     }
1756    
1757     return rc;
1758 nigel 43 }
1759    
1760    
1761    
1762 nigel 63 /*************************************************
1763 ph10 836 * Swap byte functions *
1764 nigel 75 *************************************************/
1765    
1766 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767     value, respectively.
1768    
1769     Arguments:
1770     value any number
1771    
1772     Returns: the byte swapped value
1773     */
1774    
1775     static pcre_uint32
1776     swap_uint32(pcre_uint32 value)
1777 nigel 75 {
1778     return ((value & 0x000000ff) << 24) |
1779     ((value & 0x0000ff00) << 8) |
1780     ((value & 0x00ff0000) >> 8) |
1781 ph10 836 (value >> 24);
1782 nigel 75 }
1783    
1784 ph10 836 static pcre_uint16
1785     swap_uint16(pcre_uint16 value)
1786     {
1787     return (value >> 8) | (value << 8);
1788     }
1789 nigel 75
1790    
1791    
1792     /*************************************************
1793 ph10 836 * Flip bytes in a compiled pattern *
1794     *************************************************/
1795    
1796     /* This function is called if the 'F' option was present on a pattern that is
1797     to be written to a file. We flip the bytes of all the integer fields in the
1798     regex data block and the study block. In 16-bit mode this also flips relevant
1799     bytes in the pattern itself. This is to make it possible to test PCRE's
1800     ability to reload byte-flipped patterns, e.g. those compiled on a different
1801     architecture. */
1802    
1803     static void
1804     regexflip(pcre *ere, pcre_extra *extra)
1805     {
1806 zherczeg 852 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 ph10 836 #ifdef SUPPORT_PCRE16
1808     int op;
1809     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810     int length = re->name_count * re->name_entry_size;
1811     #ifdef SUPPORT_UTF
1812     BOOL utf = (re->options & PCRE_UTF16) != 0;
1813     BOOL utf16_char = FALSE;
1814     #endif /* SUPPORT_UTF */
1815     #endif /* SUPPORT_PCRE16 */
1816    
1817     /* Always flip the bytes in the main data block and study blocks. */
1818    
1819     re->magic_number = REVERSED_MAGIC_NUMBER;
1820     re->size = swap_uint32(re->size);
1821     re->options = swap_uint32(re->options);
1822     re->flags = swap_uint16(re->flags);
1823     re->top_bracket = swap_uint16(re->top_bracket);
1824     re->top_backref = swap_uint16(re->top_backref);
1825     re->first_char = swap_uint16(re->first_char);
1826     re->req_char = swap_uint16(re->req_char);
1827     re->name_table_offset = swap_uint16(re->name_table_offset);
1828     re->name_entry_size = swap_uint16(re->name_entry_size);
1829     re->name_count = swap_uint16(re->name_count);
1830    
1831     if (extra != NULL)
1832     {
1833     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834     rsd->size = swap_uint32(rsd->size);
1835     rsd->flags = swap_uint32(rsd->flags);
1836     rsd->minlength = swap_uint32(rsd->minlength);
1837     }
1838    
1839     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840     in the name table, if present, and then in the pattern itself. */
1841    
1842     #ifdef SUPPORT_PCRE16
1843     if (!use_pcre16) return;
1844    
1845     while(TRUE)
1846     {
1847     /* Swap previous characters. */
1848     while (length-- > 0)
1849     {
1850     *ptr = swap_uint16(*ptr);
1851     ptr++;
1852     }
1853     #ifdef SUPPORT_UTF
1854     if (utf16_char)
1855     {
1856     if ((ptr[-1] & 0xfc00) == 0xd800)
1857     {
1858     /* We know that there is only one extra character in UTF-16. */
1859     *ptr = swap_uint16(*ptr);
1860     ptr++;
1861     }
1862     }
1863     utf16_char = FALSE;
1864     #endif /* SUPPORT_UTF */
1865    
1866     /* Get next opcode. */
1867    
1868     length = 0;
1869     op = *ptr;
1870     *ptr++ = swap_uint16(op);
1871    
1872     switch (op)
1873     {
1874     case OP_END:
1875     return;
1876    
1877     #ifdef SUPPORT_UTF
1878     case OP_CHAR:
1879     case OP_CHARI:
1880     case OP_NOT:
1881     case OP_NOTI:
1882     case OP_STAR:
1883     case OP_MINSTAR:
1884     case OP_PLUS:
1885     case OP_MINPLUS:
1886     case OP_QUERY:
1887     case OP_MINQUERY:
1888     case OP_UPTO:
1889     case OP_MINUPTO:
1890     case OP_EXACT:
1891     case OP_POSSTAR:
1892     case OP_POSPLUS:
1893     case OP_POSQUERY:
1894     case OP_POSUPTO:
1895     case OP_STARI:
1896     case OP_MINSTARI:
1897     case OP_PLUSI:
1898     case OP_MINPLUSI:
1899     case OP_QUERYI:
1900     case OP_MINQUERYI:
1901     case OP_UPTOI:
1902     case OP_MINUPTOI:
1903     case OP_EXACTI:
1904     case OP_POSSTARI:
1905     case OP_POSPLUSI:
1906     case OP_POSQUERYI:
1907     case OP_POSUPTOI:
1908     case OP_NOTSTAR:
1909     case OP_NOTMINSTAR:
1910     case OP_NOTPLUS:
1911     case OP_NOTMINPLUS:
1912     case OP_NOTQUERY:
1913     case OP_NOTMINQUERY:
1914     case OP_NOTUPTO:
1915     case OP_NOTMINUPTO:
1916     case OP_NOTEXACT:
1917     case OP_NOTPOSSTAR:
1918     case OP_NOTPOSPLUS:
1919     case OP_NOTPOSQUERY:
1920     case OP_NOTPOSUPTO:
1921     case OP_NOTSTARI:
1922     case OP_NOTMINSTARI:
1923     case OP_NOTPLUSI:
1924     case OP_NOTMINPLUSI:
1925     case OP_NOTQUERYI:
1926     case OP_NOTMINQUERYI:
1927     case OP_NOTUPTOI:
1928     case OP_NOTMINUPTOI:
1929     case OP_NOTEXACTI:
1930     case OP_NOTPOSSTARI:
1931     case OP_NOTPOSPLUSI:
1932     case OP_NOTPOSQUERYI:
1933     case OP_NOTPOSUPTOI:
1934     if (utf) utf16_char = TRUE;
1935     #endif
1936     /* Fall through. */
1937    
1938     default:
1939     length = OP_lengths16[op] - 1;
1940     break;
1941    
1942     case OP_CLASS:
1943     case OP_NCLASS:
1944     /* Skip the character bit map. */
1945     ptr += 32/sizeof(pcre_uint16);
1946     length = 0;
1947     break;
1948    
1949     case OP_XCLASS:
1950 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951     if (LINK_SIZE > 1)
1952     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953     - (1 + LINK_SIZE + 1));
1954     else
1955     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956    
1957 ph10 836 /* Reverse the size of the XCLASS instance. */
1958 zherczeg 839 *ptr = swap_uint16(*ptr);
1959 ph10 836 ptr++;
1960     if (LINK_SIZE > 1)
1961     {
1962 zherczeg 839 *ptr = swap_uint16(*ptr);
1963 ph10 836 ptr++;
1964     }
1965    
1966     op = *ptr;
1967     *ptr = swap_uint16(op);
1968 zherczeg 839 ptr++;
1969 ph10 836 if ((op & XCL_MAP) != 0)
1970     {
1971     /* Skip the character bit map. */
1972     ptr += 32/sizeof(pcre_uint16);
1973     length -= 32/sizeof(pcre_uint16);
1974     }
1975     break;
1976     }
1977     }
1978     /* Control should never reach here in 16 bit mode. */
1979     #endif /* SUPPORT_PCRE16 */
1980     }
1981    
1982    
1983    
1984     /*************************************************
1985 nigel 87 * Check match or recursion limit *
1986     *************************************************/
1987    
1988     static int
1989 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991     int flag, unsigned long int *limit, int errnumber, const char *msg)
1992     {
1993     int count;
1994     int min = 0;
1995     int mid = 64;
1996     int max = -1;
1997    
1998     extra->flags |= flag;
1999    
2000     for (;;)
2001     {
2002     *limit = mid;
2003    
2004 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 nigel 87 use_offsets, use_size_offsets);
2006    
2007     if (count == errnumber)
2008     {
2009     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010     min = mid;
2011     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012     }
2013    
2014     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015     count == PCRE_ERROR_PARTIAL)
2016     {
2017     if (mid == min + 1)
2018     {
2019     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020     break;
2021     }
2022     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023     max = mid;
2024     mid = (min + mid)/2;
2025     }
2026     else break; /* Some other error */
2027     }
2028    
2029     extra->flags &= ~flag;
2030     return count;
2031     }
2032    
2033    
2034    
2035     /*************************************************
2036 ph10 227 * Case-independent strncmp() function *
2037     *************************************************/
2038    
2039     /*
2040     Arguments:
2041     s first string
2042     t second string
2043     n number of characters to compare
2044    
2045     Returns: < 0, = 0, or > 0, according to the comparison
2046     */
2047    
2048     static int
2049 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 ph10 227 {
2051     while (n--)
2052     {
2053     int c = tolower(*s++) - tolower(*t++);
2054     if (c) return c;
2055     }
2056     return 0;
2057     }
2058    
2059    
2060    
2061     /*************************************************
2062 nigel 91 * Check newline indicator *
2063     *************************************************/
2064    
2065 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066     a message and return 0 if there is no match.
2067 nigel 91
2068     Arguments:
2069     p points after the leading '<'
2070     f file for error message
2071    
2072     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073     */
2074    
2075     static int
2076 ph10 836 check_newline(pcre_uint8 *p, FILE *f)
2077 nigel 91 {
2078 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
2086     return 0;
2087     }
2088    
2089    
2090    
2091     /*************************************************
2092 nigel 93 * Usage function *
2093     *************************************************/
2094    
2095     static void
2096     usage(void)
2097     {
2098 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099     printf("Input and output default to stdin and stdout.\n");
2100     #ifdef SUPPORT_LIBREADLINE
2101     printf("If input is a terminal, readline() is used to read from it.\n");
2102     #else
2103     printf("This version of pcretest is not linked with readline().\n");
2104     #endif
2105     printf("\nOptions:\n");
2106 ph10 836 #ifdef SUPPORT_PCRE16
2107 ph10 862 printf(" -16 use the 16-bit library\n");
2108 ph10 836 #endif
2109 ph10 862 printf(" -b show compiled code\n");
2110 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2111 ph10 836 printf(" -C arg show a specific compile-time option\n");
2112     printf(" and exit with its value. The arg can be:\n");
2113     printf(" linksize internal link size [2, 3, 4]\n");
2114     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116     printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117     printf(" ucp Unicode Properties supported [0, 1]\n");
2118     printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 zherczeg 839 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121     #if !defined NODFA
2122     printf(" -dfa force DFA matching for all subjects\n");
2123     #endif
2124     printf(" -help show usage information\n");
2125     printf(" -i show information about compiled patterns\n"
2126 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2127 nigel 93 " -m output memory used information\n"
2128     " -o <n> set size of offsets vector to <n>\n");
2129     #if !defined NOPOSIX
2130     printf(" -p use POSIX interface\n");
2131     #endif
2132     printf(" -q quiet: do not output PCRE version number at start\n");
2133     printf(" -S <n> set stack size to <n> megabytes\n");
2134 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2135     " -s+ force each pattern to be studied, using JIT if available\n"
2136 nigel 93 " -t time compilation and execution\n");
2137     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138     printf(" -tm time execution (matching) only\n");
2139     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140     }
2141    
2142    
2143    
2144     /*************************************************
2145 nigel 63 * Main Program *
2146     *************************************************/
2147 nigel 43
2148 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2149     consist of a regular expression, in delimiters and optionally followed by
2150     options, followed by a set of test data, terminated by an empty line. */
2151    
2152     int main(int argc, char **argv)
2153     {
2154     FILE *infile = stdin;
2155 ph10 836 const char *version;
2156 nigel 3 int options = 0;
2157     int study_options = 0;
2158 ph10 386 int default_find_match_limit = FALSE;
2159 nigel 3 int op = 1;
2160     int timeit = 0;
2161 nigel 93 int timeitm = 0;
2162 nigel 3 int showinfo = 0;
2163 nigel 31 int showstore = 0;
2164 ph10 667 int force_study = -1;
2165     int force_study_options = 0;
2166 nigel 87 int quiet = 0;
2167 nigel 53 int size_offsets = 45;
2168     int size_offsets_max;
2169 nigel 77 int *offsets = NULL;
2170 nigel 53 #if !defined NOPOSIX
2171 nigel 3 int posix = 0;
2172 nigel 53 #endif
2173 nigel 3 int debug = 0;
2174 nigel 11 int done = 0;
2175 nigel 77 int all_use_dfa = 0;
2176     int yield = 0;
2177 nigel 91 int stack_size;
2178 nigel 3
2179 ph10 667 pcre_jit_stack *jit_stack = NULL;
2180    
2181 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
2182     substring names, each list itself being terminated by an empty name. Assume
2183     that 1024 is plenty long enough for the few names we'll be testing. It is
2184     easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185 ph10 881 for the actual memory, to ensure alignment. */
2186 ph10 667
2187 ph10 836 pcre_uint16 copynames[1024];
2188     pcre_uint16 getnames[1024];
2189 nigel 69
2190 ph10 881 #ifdef SUPPORT_PCRE16
2191 ph10 836 pcre_uint16 *cn16ptr;
2192     pcre_uint16 *gn16ptr;
2193 ph10 881 #endif
2194 nigel 91
2195 ph10 881 #ifdef SUPPORT_PCRE8
2196 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198     pcre_uint8 *cn8ptr;
2199     pcre_uint8 *gn8ptr;
2200 ph10 881 #endif
2201 nigel 91
2202 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
2203     debugging. They grow automatically when very long lines are read. The 16-bit
2204     buffer (buffer16) is obtained only if needed. */
2205 nigel 69
2206 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
2207     dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208     pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209 nigel 69
2210 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
2211 nigel 3
2212 nigel 93 outfile = stdout;
2213    
2214     /* The following _setmode() stuff is some Windows magic that tells its runtime
2215     library to translate CRLF into a single LF character. At least, that's what
2216     I've been told: never having used Windows I take this all on trust. Originally
2217     it set 0x8000, but then I was advised that _O_BINARY was better. */
2218    
2219 nigel 75 #if defined(_WIN32) || defined(WIN32)
2220 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
2221     #endif
2222 nigel 75
2223 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
2224     same answer. We just need to ensure that we call one that is available. */
2225    
2226     #ifdef SUPPORT_PCRE8
2227     version = pcre_version();
2228     #else
2229     version = pcre16_version();
2230     #endif
2231    
2232 nigel 3 /* Scan options */
2233    
2234     while (argc > 1 && argv[op][0] == '-')
2235     {
2236 ph10 836 pcre_uint8 *endptr;
2237 nigel 53
2238 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
2241 ph10 667 {
2242     force_study = 1;
2243     force_study_options = PCRE_STUDY_JIT_COMPILE;
2244 ph10 691 }
2245 ph10 836 else if (strcmp(argv[op], "-16") == 0)
2246     {
2247     #ifdef SUPPORT_PCRE16
2248     use_pcre16 = 1;
2249     #else
2250     printf("** This version of PCRE was built without 16-bit support\n");
2251     exit(1);
2252     #endif
2253     }
2254 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2257     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2258 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259 nigel 79 #if !defined NODFA
2260 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261 nigel 79 #endif
2262 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264 nigel 65 *endptr == 0))
2265 nigel 53 {
2266     op++;
2267     argc--;
2268     }
2269 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2270     {
2271     int both = argv[op][2] == 0;
2272     int temp;
2273 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274 nigel 93 *endptr == 0))
2275     {
2276     timeitm = temp;
2277     op++;
2278     argc--;
2279     }
2280     else timeitm = LOOPREPEAT;
2281     if (both) timeit = timeitm;
2282     }
2283 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285 nigel 91 *endptr == 0))
2286     {
2287 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288 nigel 91 printf("PCRE: -S not supported on this OS\n");
2289     exit(1);
2290     #else
2291     int rc;
2292     struct rlimit rlim;
2293     getrlimit(RLIMIT_STACK, &rlim);
2294     rlim.rlim_cur = stack_size * 1024 * 1024;
2295     rc = setrlimit(RLIMIT_STACK, &rlim);
2296     if (rc != 0)
2297     {
2298     printf("PCRE: setrlimit() failed with error %d\n", rc);
2299     exit(1);
2300     }
2301     op++;
2302     argc--;
2303     #endif
2304     }
2305 nigel 53 #if !defined NOPOSIX
2306 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2307 nigel 53 #endif
2308 nigel 63 else if (strcmp(argv[op], "-C") == 0)
2309     {
2310     int rc;
2311 ph10 392 unsigned long int lrc;
2312 ph10 836
2313     if (argc > 2)
2314     {
2315     if (strcmp(argv[op + 1], "linksize") == 0)
2316     {
2317     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318     printf("%d\n", rc);
2319     yield = rc;
2320     goto EXIT;
2321     }
2322     if (strcmp(argv[op + 1], "pcre8") == 0)
2323     {
2324     #ifdef SUPPORT_PCRE8
2325     printf("1\n");
2326     yield = 1;
2327     #else
2328     printf("0\n");
2329     yield = 0;
2330     #endif
2331     goto EXIT;
2332     }
2333     if (strcmp(argv[op + 1], "pcre16") == 0)
2334     {
2335     #ifdef SUPPORT_PCRE16
2336     printf("1\n");
2337     yield = 1;
2338     #else
2339     printf("0\n");
2340     yield = 0;
2341     #endif
2342     goto EXIT;
2343     }
2344     if (strcmp(argv[op + 1], "utf") == 0)
2345     {
2346     #ifdef SUPPORT_PCRE8
2347     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348     printf("%d\n", rc);
2349     yield = rc;
2350     #else
2351     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352     printf("%d\n", rc);
2353     yield = rc;
2354     #endif
2355     goto EXIT;
2356     }
2357     if (strcmp(argv[op + 1], "ucp") == 0)
2358     {
2359     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360     printf("%d\n", rc);
2361     yield = rc;
2362     goto EXIT;
2363     }
2364     if (strcmp(argv[op + 1], "jit") == 0)
2365     {
2366     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367     printf("%d\n", rc);
2368     yield = rc;
2369     goto EXIT;
2370     }
2371 ph10 838 if (strcmp(argv[op + 1], "newline") == 0)
2372 ph10 842 {
2373 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374     /* Note that these values are always the ASCII values, even
2375     in EBCDIC environments. CR is 13 and NL is 10. */
2376     printf("%s\n", (rc == 13)? "CR" :
2377     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378     (rc == -2)? "ANYCRLF" :
2379     (rc == -1)? "ANY" : "???");
2380     goto EXIT;
2381 ph10 842 }
2382 ph10 838 printf("Unknown -C option: %s\n", argv[op + 1]);
2383 ph10 836 goto EXIT;
2384     }
2385    
2386     printf("PCRE version %s\n", version);
2387 nigel 63 printf("Compiled with\n");
2388 ph10 836
2389     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390     are set, either both UTFs are supported or both are not supported. */
2391    
2392     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393     printf(" 8-bit and 16-bit support\n");
2394 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395 ph10 836 if (rc)
2396     printf(" UTF-8 and UTF-16 support\n");
2397     else
2398     printf(" No UTF-8 or UTF-16 support\n");
2399     #elif defined SUPPORT_PCRE8
2400     printf(" 8-bit support only\n");
2401     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402 nigel 63 printf(" %sUTF-8 support\n", rc? "" : "No ");
2403 ph10 836 #else
2404     printf(" 16-bit support only\n");
2405     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406     printf(" %sUTF-16 support\n", rc? "" : "No ");
2407     #endif
2408    
2409     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
2411 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412 ph10 674 if (rc)
2413 ph10 890 {
2414     const char *arch;
2415     (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416     printf(" Just-in-time compiler support: %s\n", arch);
2417     }
2418 ph10 674 else
2419     printf(" No just-in-time compiler support\n");
2420 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421 ph10 391 /* Note that these values are always the ASCII values, even
2422 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
2423 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2424     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425 ph10 150 (rc == -2)? "ANYCRLF" :
2426 nigel 93 (rc == -1)? "ANY" : "???");
2427 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429     "all Unicode newlines");
2430 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431 nigel 63 printf(" Internal link size = %d\n", rc);
2432 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
2434 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435 ph10 376 printf(" Default match limit = %ld\n", lrc);
2436 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
2438 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439 ph10 895 printf(" Match recursion uses %s", rc? "stack" : "heap");
2440     if (showstore)
2441     {
2442 ph10 901 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2443 ph10 895 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2444     }
2445     printf("\n");
2446 ph10 121 goto EXIT;
2447 nigel 63 }
2448 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
2449     strcmp(argv[op], "--help") == 0)
2450     {
2451     usage();
2452     goto EXIT;
2453     }
2454 nigel 3 else
2455     {
2456 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
2457 nigel 93 usage();
2458 nigel 77 yield = 1;
2459     goto EXIT;
2460 nigel 3 }
2461     op++;
2462     argc--;
2463     }
2464    
2465 nigel 53 /* Get the store for the offsets vector, and remember what it was */
2466    
2467     size_offsets_max = size_offsets;
2468 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2469 nigel 53 if (offsets == NULL)
2470     {
2471     printf("** Failed to get %d bytes of memory for offsets vector\n",
2472 ph10 151 (int)(size_offsets_max * sizeof(int)));
2473 nigel 77 yield = 1;
2474     goto EXIT;
2475 nigel 53 }
2476    
2477 nigel 3 /* Sort out the input and output files */
2478    
2479     if (argc > 1)
2480     {
2481 nigel 93 infile = fopen(argv[op], INPUT_MODE);
2482 nigel 3 if (infile == NULL)
2483     {
2484     printf("** Failed to open %s\n", argv[op]);
2485 nigel 77 yield = 1;
2486     goto EXIT;
2487 nigel 3 }
2488     }
2489    
2490     if (argc > 2)
2491     {
2492 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
2493 nigel 3 if (outfile == NULL)
2494     {
2495     printf("** Failed to open %s\n", argv[op+1]);
2496 nigel 77 yield = 1;
2497     goto EXIT;
2498 nigel 3 }
2499     }
2500    
2501     /* Set alternative malloc function */
2502    
2503 ph10 836 #ifdef SUPPORT_PCRE8
2504 nigel 3 pcre_malloc = new_malloc;
2505 nigel 73 pcre_free = new_free;
2506     pcre_stack_malloc = stack_malloc;
2507     pcre_stack_free = stack_free;
2508 ph10 836 #endif
2509 nigel 3
2510 ph10 836 #ifdef SUPPORT_PCRE16
2511     pcre16_malloc = new_malloc;
2512     pcre16_free = new_free;
2513     pcre16_stack_malloc = stack_malloc;
2514     pcre16_stack_free = stack_free;
2515     #endif
2516    
2517 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
2518 nigel 3
2519 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2520 nigel 3
2521     /* Main loop */
2522    
2523 nigel 11 while (!done)
2524 nigel 3 {
2525     pcre *re = NULL;
2526     pcre_extra *extra = NULL;
2527 nigel 37
2528     #if !defined NOPOSIX /* There are still compilers that require no indent */
2529 nigel 3 regex_t preg;
2530 nigel 45 int do_posix = 0;
2531 nigel 37 #endif
2532    
2533 nigel 7 const char *error;
2534 ph10 836 pcre_uint8 *markptr;
2535     pcre_uint8 *p, *pp, *ppp;
2536     pcre_uint8 *to_file = NULL;
2537     const pcre_uint8 *tables = NULL;
2538 zherczeg 847 unsigned long int get_options;
2539 nigel 75 unsigned long int true_size, true_study_size = 0;
2540     size_t size, regex_gotten_store;
2541 ph10 654 int do_allcaps = 0;
2542 ph10 512 int do_mark = 0;
2543 nigel 3 int do_study = 0;
2544 ph10 654 int no_force_study = 0;
2545 nigel 25 int do_debug = debug;
2546 nigel 35 int do_G = 0;
2547     int do_g = 0;
2548 nigel 25 int do_showinfo = showinfo;
2549 nigel 35 int do_showrest = 0;
2550 ph10 616 int do_showcaprest = 0;
2551 nigel 75 int do_flip = 0;
2552 nigel 93 int erroroffset, len, delimiter, poffset;
2553 nigel 3
2554 ph10 836 use_utf = 0;
2555 ph10 211 debug_lengths = 1;
2556 nigel 63
2557 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2558 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2559 nigel 63 fflush(outfile);
2560 nigel 3
2561     p = buffer;
2562     while (isspace(*p)) p++;
2563     if (*p == 0) continue;
2564    
2565 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
2566 nigel 3
2567 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2568     {
2569 zherczeg 841 pcre_uint32 magic;
2570 ph10 836 pcre_uint8 sbuf[8];
2571 nigel 75 FILE *f;
2572    
2573     p++;
2574 zherczeg 839 if (*p == '!')
2575     {
2576     do_debug = TRUE;
2577     do_showinfo = TRUE;
2578     p++;
2579     }
2580    
2581 nigel 75 pp = p + (int)strlen((char *)p);
2582     while (isspace(pp[-1])) pp--;
2583     *pp = 0;
2584    
2585     f = fopen((char *)p, "rb");
2586     if (f == NULL)
2587     {
2588     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2589     continue;
2590     }
2591    
2592 zherczeg 839 first_gotten_store = 0;
2593 nigel 75 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2594    
2595     true_size =
2596     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2597     true_study_size =
2598     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2599    
2600 zherczeg 852 re = (pcre *)new_malloc(true_size);
2601 ph10 836 regex_gotten_store = first_gotten_store;
2602 nigel 75
2603     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2604    
2605 zherczeg 852 magic = ((REAL_PCRE *)re)->magic_number;
2606 nigel 75 if (magic != MAGIC_NUMBER)
2607     {
2608 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
2609 nigel 75 {
2610     do_flip = 1;
2611     }
2612     else
2613     {
2614     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2615     fclose(f);
2616     continue;
2617     }
2618     }
2619    
2620 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
2621 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2622 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2623 nigel 75
2624 ph10 612 /* Now see if there is any following study data. */
2625 nigel 75
2626     if (true_study_size != 0)
2627     {
2628     pcre_study_data *psd;
2629    
2630     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2631     extra->flags = PCRE_EXTRA_STUDY_DATA;
2632    
2633     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2634     extra->study_data = psd;
2635    
2636     if (fread(psd, 1, true_study_size, f) != true_study_size)
2637     {
2638     FAIL_READ:
2639     fprintf(outfile, "Failed to read data from %s\n", p);
2640 ph10 836 if (extra != NULL)
2641     {
2642     PCRE_FREE_STUDY(extra);
2643     }
2644 nigel 75 if (re != NULL) new_free(re);
2645     fclose(f);
2646     continue;
2647     }
2648     fprintf(outfile, "Study data loaded from %s\n", p);
2649     do_study = 1; /* To get the data output if requested */
2650     }
2651     else fprintf(outfile, "No study data\n");
2652    
2653 ph10 836 /* Flip the necessary bytes. */
2654     if (do_flip)
2655     {
2656 zherczeg 839 int rc;
2657     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2658     if (rc == PCRE_ERROR_BADMODE)
2659     {
2660     /* Simulate the result of the function call below. */
2661     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2662     use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2663     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2664     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2665     continue;
2666     }
2667 ph10 836 }
2668    
2669     /* Need to know if UTF-8 for printing data strings. */
2670    
2671     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2672     use_utf = (get_options & PCRE_UTF8) != 0;
2673    
2674 nigel 75 fclose(f);
2675     goto SHOW_INFO;
2676     }
2677    
2678     /* In-line pattern (the usual case). Get the delimiter and seek the end of
2679 ph10 836 the pattern; if it isn't complete, read more. */
2680 nigel 75
2681 nigel 3 delimiter = *p++;
2682    
2683 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
2684 nigel 3 {
2685 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2686 nigel 3 goto SKIP_DATA;
2687     }
2688    
2689     pp = p;
2690 ph10 530 poffset = (int)(p - buffer);
2691 nigel 3
2692     for(;;)
2693     {
2694 nigel 29 while (*pp != 0)
2695     {
2696     if (*pp == '\\' && pp[1] != 0) pp++;
2697     else if (*pp == delimiter) break;
2698     pp++;
2699     }
2700 nigel 3 if (*pp != 0) break;
2701 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2702 nigel 3 {
2703     fprintf(outfile, "** Unexpected EOF\n");
2704 nigel 11 done = 1;
2705     goto CONTINUE;
2706 nigel 3 }
2707 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2708 nigel 3 }
2709    
2710 nigel 93 /* The buffer may have moved while being extended; reset the start of data
2711     pointer to the correct relative point in the buffer. */
2712    
2713     p = buffer + poffset;
2714    
2715 nigel 29 /* If the first character after the delimiter is backslash, make
2716     the pattern end with backslash. This is purely to provide a way
2717     of testing for the error message when a pattern ends with backslash. */
2718    
2719     if (pp[1] == '\\') *pp++ = '\\';
2720    
2721 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2722     for callouts. */
2723 nigel 3
2724     *pp++ = 0;
2725 nigel 75 strcpy((char *)pbuffer, (char *)p);
2726 nigel 3
2727     /* Look for options after final delimiter */
2728    
2729     options = 0;
2730 ph10 836 study_options = 0;
2731 nigel 31 log_store = showstore; /* default from command line */
2732    
2733 nigel 3 while (*pp != 0)
2734     {
2735     switch (*pp++)
2736     {
2737 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
2738 nigel 35 case 'g': do_g = 1; break;
2739 nigel 3 case 'i': options |= PCRE_CASELESS; break;
2740     case 'm': options |= PCRE_MULTILINE; break;
2741     case 's': options |= PCRE_DOTALL; break;
2742     case 'x': options |= PCRE_EXTENDED; break;
2743 nigel 25
2744 ph10 616 case '+':
2745 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2746 ph10 616 break;
2747 ph10 654
2748     case '=': do_allcaps = 1; break;
2749 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
2750 nigel 93 case 'B': do_debug = 1; break;
2751 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2752 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
2753 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2754 nigel 75 case 'F': do_flip = 1; break;
2755 nigel 35 case 'G': do_G = 1; break;
2756 nigel 25 case 'I': do_showinfo = 1; break;
2757 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
2758 ph10 512 case 'K': do_mark = 1; break;
2759 nigel 31 case 'M': log_store = 1; break;
2760 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2761 nigel 37
2762     #if !defined NOPOSIX
2763 nigel 3 case 'P': do_posix = 1; break;
2764 nigel 37 #endif
2765    
2766 ph10 654 case 'S':
2767 ph10 691 if (do_study == 0)
2768 ph10 612 {
2769 ph10 691 do_study = 1;
2770 ph10 667 if (*pp == '+')
2771     {
2772     study_options |= PCRE_STUDY_JIT_COMPILE;
2773 ph10 691 pp++;
2774     }
2775     }
2776 ph10 667 else
2777     {
2778 ph10 612 do_study = 0;
2779     no_force_study = 1;
2780 ph10 654 }
2781 ph10 612 break;
2782    
2783 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2784 ph10 535 case 'W': options |= PCRE_UCP; break;
2785 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2786 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2787 ph10 126 case 'Z': debug_lengths = 0; break;
2788 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2789 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2790 ph10 545
2791 ph10 541 case 'T':
2792     switch (*pp++)
2793     {
2794     case '0': tables = tables0; break;
2795     case '1': tables = tables1; break;
2796 ph10 545
2797 ph10 541 case '\r':
2798     case '\n':
2799 ph10 545 case ' ':
2800     case 0:
2801 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2802 ph10 545 goto SKIP_DATA;
2803    
2804     default:
2805 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2806 ph10 545 goto SKIP_DATA;
2807 ph10 541 }
2808 ph10 545 break;
2809 nigel 25
2810     case 'L':
2811     ppp = pp;
2812 nigel 93 /* The '\r' test here is so that it works on Windows. */
2813     /* The '0' test is just in case this is an unterminated line. */
2814     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2815 nigel 25 *ppp = 0;
2816     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2817     {
2818     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2819     goto SKIP_DATA;
2820     }
2821 nigel 93 locale_set = 1;
2822 ph10 836 tables = PCRE_MAKETABLES;
2823 nigel 25 pp = ppp;
2824     break;
2825    
2826 nigel 75 case '>':
2827     to_file = pp;
2828     while (*pp != 0) pp++;
2829     while (isspace(pp[-1])) pp--;
2830     *pp = 0;
2831     break;
2832    
2833 nigel 91 case '<':
2834     {
2835 ph10 836 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2836 ph10 336 {
2837     options |= PCRE_JAVASCRIPT_COMPAT;
2838 ph10 345 pp += 3;
2839 ph10 336 }
2840     else
2841 ph10 345 {
2842 ph10 336 int x = check_newline(pp, outfile);
2843     if (x == 0) goto SKIP_DATA;
2844     options |= x;
2845     while (*pp++ != '>');
2846 ph10 345 }
2847 nigel 91 }
2848     break;
2849    
2850 nigel 77 case '\r': /* So that it works in Windows */
2851     case '\n':
2852     case ' ':
2853     break;
2854 nigel 75
2855 nigel 3 default:
2856     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2857     goto SKIP_DATA;
2858     }
2859     }
2860    
2861 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2862 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2863 ph10 836 local character tables. Neither does it have 16-bit support. */
2864 nigel 3
2865 nigel 37 #if !defined NOPOSIX
2866 nigel 3 if (posix || do_posix)
2867     {
2868     int rc;
2869     int cflags = 0;
2870 nigel 75
2871 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2872     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2873 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2874 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2875     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2876 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2877 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2878 nigel 87
2879 ph10 836 first_gotten_store = 0;
2880 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2881    
2882     /* Compilation failed; go back for another re, skipping to blank line
2883     if non-interactive. */
2884    
2885     if (rc != 0)
2886     {
2887 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2888 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2889     goto SKIP_DATA;
2890     }
2891     }
2892    
2893     /* Handle compiling via the native interface */
2894    
2895     else
2896 nigel 37 #endif /* !defined NOPOSIX */
2897    
2898 nigel 3 {
2899 ph10 836 /* In 16-bit mode, convert the input. */
2900    
2901     #ifdef SUPPORT_PCRE16
2902     if (use_pcre16)
2903     {
2904     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2905     {
2906     case -1:
2907     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2908     "converted to UTF-16\n");
2909     goto SKIP_DATA;
2910    
2911     case -2:
2912     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2913     "cannot be converted to UTF-16\n");
2914     goto SKIP_DATA;
2915 ph10 842
2916 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2917     fprintf(outfile, "**Failed: character value greater than 0xffff "
2918     "cannot be converted to 16-bit in non-UTF mode\n");
2919 ph10 842 goto SKIP_DATA;
2920 ph10 836
2921     default:
2922     break;
2923     }
2924     p = (pcre_uint8 *)buffer16;
2925     }
2926     #endif
2927    
2928     /* Compile many times when timing */
2929    
2930 nigel 93 if (timeit > 0)
2931 nigel 3 {
2932     register int i;
2933     clock_t time_taken;
2934     clock_t start_time = clock();
2935 nigel 93 for (i = 0; i < timeit; i++)
2936 nigel 3 {
2937 ph10 836 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2938 nigel 3 if (re != NULL) free(re);
2939     }
2940     time_taken = clock() - start_time;
2941 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2942     (((double)time_taken * 1000.0) / (double)timeit) /
2943 nigel 63 (double)CLOCKS_PER_SEC);
2944 nigel 3 }
2945    
2946 ph10 836 first_gotten_store = 0;
2947     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2948 nigel 3
2949     /* Compilation failed; go back for another re, skipping to blank line
2950     if non-interactive. */
2951    
2952     if (re == NULL)
2953     {
2954     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2955     SKIP_DATA:
2956     if (infile != stdin)
2957     {
2958     for (;;)
2959     {
2960 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2961 nigel 11 {
2962     done = 1;
2963     goto CONTINUE;
2964     }
2965 nigel 3 len = (int)strlen((char *)buffer);
2966     while (len > 0 && isspace(buffer[len-1])) len--;
2967     if (len == 0) break;
2968     }
2969     fprintf(outfile, "\n");
2970     }
2971 nigel 25 goto CONTINUE;
2972 nigel 3 }
2973 ph10 416
2974     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2975     within the regex; check for this so that we know how to process the data
2976 ph10 412 lines. */
2977 ph10 416
2978 ph10 836 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2979     goto SKIP_DATA;
2980     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2981 nigel 3
2982 ph10 836 /* Extract the size for possible writing before possibly flipping it,
2983     and remember the store that was got. */
2984 nigel 3
2985 zherczeg 852 true_size = ((REAL_PCRE *)re)->size;
2986 ph10 836 regex_gotten_store = first_gotten_store;
2987    
2988     /* Output code size information if requested */
2989    
2990 nigel 63 if (log_store)
2991     fprintf(outfile, "Memory allocation (code space): %d\n",
2992 ph10 836 (int)(first_gotten_store -
2993 zherczeg 852 sizeof(REAL_PCRE) -
2994     ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2995 nigel 63
2996 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2997 ph10 654 help with the matching, unless the pattern has the SS option, which
2998 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2999     never sensible). */
3000 nigel 75
3001 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
3002 nigel 75 {
3003 nigel 93 if (timeit > 0)
3004 nigel 75 {
3005     register int i;
3006     clock_t time_taken;
3007     clock_t start_time = clock();
3008 nigel 93 for (i = 0; i < timeit; i++)
3009 ph10 836 {
3010     PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3011     }
3012 nigel 75 time_taken = clock() - start_time;
3013 ph10 836 if (extra != NULL)
3014     {
3015     PCRE_FREE_STUDY(extra);
3016     }
3017 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
3018     (((double)time_taken * 1000.0) / (double)timeit) /
3019 nigel 75 (double)CLOCKS_PER_SEC);
3020     }
3021 ph10 836 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3022 nigel 75 if (error != NULL)
3023     fprintf(outfile, "Failed to study: %s\n", error);
3024     else if (extra != NULL)
3025 ph10 836 {
3026 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3027 ph10 836 if (log_store)
3028     {
3029     size_t jitsize;
3030     if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3031     jitsize != 0)
3032     fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3033     }
3034     }
3035 nigel 75 }
3036 ph10 788
3037 ph10 510 /* If /K was present, we set up for handling MARK data. */
3038 ph10 512
3039 ph10 510 if (do_mark)
3040     {
3041     if (extra == NULL)
3042     {
3043     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3044     extra->flags = 0;
3045     }
3046 ph10 512 extra->mark = &markptr;
3047 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
3048 ph10 512 }
3049 nigel 75
3050 ph10 836 /* Extract and display information from the compiled data if required. */
3051 nigel 75
3052     SHOW_INFO:
3053    
3054 nigel 93 if (do_debug)
3055     {
3056     fprintf(outfile, "------------------------------------------------------------------\n");
3057 ph10 836 PCRE_PRINTINT(re, outfile, debug_lengths);
3058 nigel 93 }
3059 ph10 416
3060 ph10 412 /* We already have the options in get_options (see above) */
3061 nigel 93
3062 nigel 25 if (do_showinfo)
3063 nigel 3 {
3064 ph10 412 unsigned long int all_options;
3065 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3066 ph10 227 hascrorlf;
3067 nigel 63 int nameentrysize, namecount;
3068 ph10 836 const pcre_uint8 *nametable;
3069 nigel 3
3070 ph10 836 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3071     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3072     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3073     new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3074     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3075     new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3076     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3077     new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3078     new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3079     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3080     new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3081     != 0)
3082     goto SKIP_DATA;
3083 nigel 43
3084 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
3085 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3086 nigel 77 (int)size, (int)regex_gotten_store);
3087 nigel 43
3088     fprintf(outfile, "Capturing subpattern count = %d\n", count);
3089     if (backrefmax > 0)
3090     fprintf(outfile, "Max back reference = %d\n", backrefmax);
3091 nigel 63
3092     if (namecount > 0)
3093     {
3094     fprintf(outfile, "Named capturing subpatterns:\n");
3095     while (namecount-- > 0)
3096     {
3097 ph10 836 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3098     int imm2_size = use_pcre16 ? 1 : 2;
3099     #else
3100     int imm2_size = IMM2_SIZE;
3101     #endif
3102     int length = (int)STRLEN(nametable + imm2_size);
3103     fprintf(outfile, " ");
3104     PCHARSV(nametable, imm2_size, length, outfile);
3105     while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3106     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3107     fprintf(outfile, "%3d\n", use_pcre16?
3108     (int)(((PCRE_SPTR16)nametable)[0])
3109     :((int)nametable[0] << 8) | (int)nametable[1]);
3110     nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3111     #else
3112     fprintf(outfile, "%3d\n", GET2(nametable, 0));
3113     #ifdef SUPPORT_PCRE8
3114 nigel 63 nametable += nameentrysize;
3115 ph10 836 #else
3116     nametable += nameentrysize * 2;
3117     #endif
3118     #endif
3119 nigel 63 }
3120     }
3121 ph10 172
3122 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3123 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3124 nigel 63
3125 zherczeg 852 all_options = ((REAL_PCRE *)re)->options;
3126 ph10 836 if (do_flip) all_options = swap_uint32(all_options);
3127 nigel 75
3128 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
3129 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3130 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3131     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3132     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3133     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3134 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3135 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3136 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3137     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3138 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3139     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3140     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3141 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3142 ph10 836 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3143 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3144 ph10 836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3145 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3146 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3147 ph10 172
3148 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3149 nigel 43
3150 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
3151 nigel 91 {
3152     case PCRE_NEWLINE_CR:
3153     fprintf(outfile, "Forced newline sequence: CR\n");
3154     break;
3155 nigel 43
3156 nigel 91 case PCRE_NEWLINE_LF:
3157     fprintf(outfile, "Forced newline sequence: LF\n");
3158     break;
3159    
3160     case PCRE_NEWLINE_CRLF:
3161     fprintf(outfile, "Forced newline sequence: CRLF\n");
3162     break;
3163    
3164 ph10 149 case PCRE_NEWLINE_ANYCRLF:
3165     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3166     break;
3167    
3168 nigel 93 case PCRE_NEWLINE_ANY:
3169     fprintf(outfile, "Forced newline sequence: ANY\n");
3170     break;
3171    
3172 nigel 91 default:
3173     break;
3174     }
3175    
3176 nigel 43 if (first_char == -1)
3177     {
3178 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
3179 nigel 43 }
3180     else if (first_char < 0)
3181     {
3182     fprintf(outfile, "No first char\n");
3183     }
3184     else
3185     {
3186 ph10 836 const char *caseless =
3187 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3188 nigel 63 "" : " (caseless)";
3189 ph10 836
3190     if (PRINTOK(first_char))
3191     fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3192 nigel 3 else
3193 ph10 836 {
3194     fprintf(outfile, "First char = ");
3195     pchar(first_char, outfile);
3196     fprintf(outfile, "%s\n", caseless);
3197     }
3198 nigel 43 }
3199 nigel 37
3200 nigel 43 if (need_char < 0)
3201     {
3202     fprintf(outfile, "No need char\n");
3203 nigel 3 }
3204 nigel 43 else
3205     {
3206 ph10 836 const char *caseless =
3207 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3208 nigel 63 "" : " (caseless)";
3209 ph10 836
3210     if (PRINTOK(need_char))
3211     fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3212 nigel 43 else
3213 ph10 836 {
3214     fprintf(outfile, "Need char = ");
3215     pchar(need_char, outfile);
3216     fprintf(outfile, "%s\n", caseless);
3217     }
3218 nigel 43 }
3219 nigel 75
3220     /* Don't output study size; at present it is in any case a fixed
3221     value, but it varies, depending on the computer architecture, and
3222     so messes up the test suite. (And with the /F option, it might be
3223 ph10 654 flipped.) If study was forced by an external -s, don't show this
3224 ph10 612 information unless -i or -d was also present. This means that, except
3225     when auto-callouts are involved, the output from runs with and without
3226     -s should be identical. */
3227 nigel 75
3228 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3229 nigel 75 {
3230     if (extra == NULL)
3231     fprintf(outfile, "Study returned NULL\n");
3232     else
3233     {
3234 ph10 836 pcre_uint8 *start_bits = NULL;
3235 ph10 455 int minlength;
3236 ph10 461
3237 ph10 836 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3238     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3239 ph10 461
3240 ph10 836 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3241 nigel 75 {
3242 ph10 836 if (start_bits == NULL)
3243     fprintf(outfile, "No set of starting bytes\n");
3244     else
3245 nigel 75 {
3246 ph10 836 int i;
3247     int c = 24;
3248     fprintf(outfile, "Starting byte set: ");
3249     for (i = 0; i < 256; i++)
3250 nigel 75 {
3251 ph10 836 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3252 nigel 75 {
3253 ph10 836 if (c > 75)
3254     {
3255     fprintf(outfile, "\n ");
3256     c = 2;
3257     }
3258     if (PRINTOK(i) && i != ' ')
3259     {
3260     fprintf(outfile, "%c ", i);
3261     c += 2;
3262     }
3263     else
3264     {
3265     fprintf(outfile, "\\x%02x ", i);
3266     c += 5;
3267     }
3268 nigel 75 }
3269     }
3270 ph10 836 fprintf(outfile, "\n");
3271 nigel 75 }
3272     }
3273     }
3274 ph10 691
3275 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
3276 ph10 691
3277 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3278     {
3279 ph10 691 int jit;
3280 ph10 836 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3281     {
3282     if (jit)
3283     fprintf(outfile, "JIT study was successful\n");
3284     else
3285 ph10 691 #ifdef SUPPORT_JIT
3286 ph10 836 fprintf(outfile, "JIT study was not successful\n");
3287 ph10 667 #else
3288 ph10 836 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3289 ph10 667 #endif
3290 ph10 836 }
3291 ph10 691 }
3292 nigel 75 }
3293 nigel 3 }
3294    
3295 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
3296     that is all. The first 8 bytes of the file are the regex length and then
3297     the study length, in big-endian order. */
3298 nigel 3
3299 nigel 75 if (to_file != NULL)
3300 nigel 3 {
3301 nigel 75 FILE *f = fopen((char *)to_file, "wb");
3302     if (f == NULL)
3303 nigel 3 {
3304 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3305 nigel 3 }
3306 nigel 75 else
3307     {
3308 ph10 836 pcre_uint8 sbuf[8];
3309 ph10 259
3310 ph10 836 if (do_flip) regexflip(re, extra);
3311     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3312     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3313     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3314     sbuf[3] = (pcre_uint8)((true_size) & 255);
3315     sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3316     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3317     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3318     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3319 nigel 3
3320 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
3321     fwrite(re, 1, true_size, f) < true_size)
3322     {
3323     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3324     }
3325 nigel 3 else
3326     {
3327 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3328 ph10 654
3329 ph10 658 /* If there is study data, write it. */
3330 ph10 654
3331 nigel 75 if (extra != NULL)
3332 nigel 3 {
3333 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
3334     true_study_size)
3335 nigel 3 {
3336 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
3337     strerror(errno));
3338 nigel 3 }
3339 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
3340 nigel 3 }
3341     }
3342 nigel 75 fclose(f);
3343 nigel 3 }
3344 nigel 77
3345     new_free(re);
3346 ph10 836 if (extra != NULL)
3347     {
3348     PCRE_FREE_STUDY(extra);
3349     }
3350 ph10 545 if (locale_set)
3351 ph10 541 {
3352     new_free((void *)tables);
3353     setlocale(LC_CTYPE, "C");
3354 ph10 545 locale_set = 0;
3355     }
3356 nigel 75 continue; /* With next regex */
3357 nigel 3 }
3358 nigel 75 } /* End of non-POSIX compile */
3359 nigel 3
3360     /* Read data lines and test them */
3361    
3362     for (;;)
3363     {
3364 ph10 836 pcre_uint8 *q;
3365     pcre_uint8 *bptr;
3366 nigel 57 int *use_offsets = offsets;
3367 nigel 53 int use_size_offsets = size_offsets;
3368 nigel 63 int callout_data = 0;
3369     int callout_data_set = 0;
3370 nigel 3 int count, c;
3371 nigel 29 int copystrings = 0;
3372 ph10 386 int find_match_limit = default_find_match_limit;
3373 nigel 29 int getstrings = 0;
3374     int getlist = 0;
3375 nigel 39 int gmatched = 0;
3376 nigel 35 int start_offset = 0;
3377 ph10 579 int start_offset_sign = 1;
3378 nigel 41 int g_notempty = 0;
3379 nigel 77 int use_dfa = 0;
3380 nigel 3
3381 nigel 91 *copynames = 0;
3382     *getnames = 0;
3383    
3384 ph10 881 #ifdef SUPPORT_PCRE16
3385 ph10 836 cn16ptr = copynames;
3386     gn16ptr = getnames;
3387 ph10 881 #endif
3388     #ifdef SUPPORT_PCRE8
3389 ph10 836 cn8ptr = copynames8;
3390     gn8ptr = getnames8;
3391 ph10 881 #endif
3392 nigel 91
3393 ph10 836 SET_PCRE_CALLOUT(callout);
3394 nigel 63 first_callout = 1;
3395 ph10 654 last_callout_mark = NULL;
3396 nigel 63 callout_extra = 0;
3397     callout_count = 0;
3398     callout_fail_count = 999999;
3399     callout_fail_id = -1;
3400 nigel 73 show_malloc = 0;
3401 ph10 836 options = 0;
3402 nigel 63
3403 nigel 91 if (extra != NULL) extra->flags &=
3404     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3405    
3406     len = 0;
3407     for (;;)
3408 nigel 11 {
3409 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3410 nigel 91 {
3411 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
3412     {
3413 ph10 545 fprintf(outfile, "\n");
3414 ph10 537 break;
3415 ph10 545 }
3416 nigel 91 done = 1;
3417     goto CONTINUE;
3418     }
3419     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3420     len = (int)strlen((char *)buffer);
3421     if (buffer[len-1] == '\n') break;
3422 nigel 11 }
3423 nigel 3
3424     while (len > 0 && isspace(buffer[len-1])) len--;
3425     buffer[len] = 0;
3426     if (len == 0) break;
3427    
3428     p = buffer;
3429     while (isspace(*p)) p++;
3430    
3431 ph10 147 bptr = q = dbuffer;
3432 nigel 3 while ((c = *p++) != 0)
3433     {
3434     int i = 0;
3435     int n = 0;
3436 ph10 842
3437 ph10 836 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3438     In non-UTF mode, allow the value of the byte to fall through to later,
3439     where values greater than 127 are turned into UTF-8 when running in
3440     16-bit mode. */
3441 ph10 842
3442 ph10 836 if (c != '\\')
3443 nigel 3 {
3444 ph10 836 if (use_utf)
3445     {
3446     *q++ = c;
3447     continue;
3448 ph10 842 }
3449     }
3450    
3451 ph10 836 /* Handle backslash escapes */
3452 ph10 842
3453 ph10 836 else switch ((c = *p++))
3454     {
3455 nigel 3 case 'a': c = 7; break;
3456     case 'b': c = '\b'; break;
3457     case 'e': c = 27; break;
3458     case 'f': c = '\f'; break;
3459     case 'n': c = '\n'; break;
3460     case 'r': c = '\r'; break;
3461     case 't': c = '\t'; break;
3462     case 'v': c = '\v'; break;
3463    
3464     case '0': case '1': case '2': case '3':
3465     case '4': case '5': case '6': case '7':
3466     c -= '0';
3467     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3468     c = c * 8 + *p++ - '0';
3469     break;
3470    
3471     case 'x':
3472 nigel 49 if (*p == '{')
3473     {
3474 ph10 836 pcre_uint8 *pt = p;
3475 nigel 49 c = 0;
3476 ph10 738
3477 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3478     when isxdigit() is a macro that refers to its argument more than
3479     once. This is banned by the C Standard, but apparently happens in at
3480     least one MacOS environment. */
3481 ph10 738
3482 ph10 735 for (pt++; isxdigit(*pt); pt++)
3483 ph10 862 {
3484     if (++i == 9)
3485     fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3486     "using only the first eight.\n");
3487     else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3488     }
3489 nigel 49 if (*pt == '}')
3490     {
3491     p = pt + 1;
3492     break;
3493     }
3494 ph10 836 /* Not correct form for \x{...}; fall through */
3495 nigel 49 }
3496    
3497 ph10 842 /* \x without {} always defines just one byte in 8-bit mode. This
3498     allows UTF-8 characters to be constructed byte by byte, and also allows
3499     invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3500     Otherwise, pass it down to later code so that it can be turned into
3501 ph10 836 UTF-8 when running in 16-bit mode. */
3502 nigel 49
3503 nigel 3 c = 0;
3504     while (i++ < 2 && isxdigit(*p))
3505     {
3506 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3507 nigel 3 p++;
3508     }
3509 ph10 836 if (use_utf)
3510 ph10 842 {
3511 ph10 836 *q++ = c;
3512 ph10 842 continue;
3513     }
3514 nigel 3 break;
3515    
3516 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
3517 nigel 3 p--;
3518     continue;
3519    
3520 nigel 75 case '>':
3521 ph10 579 if (*p == '-')
3522 ph10 567 {
3523     start_offset_sign = -1;
3524     p++;
3525 ph10 579 }
3526 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3527 ph10 579 start_offset *= start_offset_sign;
3528 nigel 75 continue;
3529    
3530 nigel 3 case 'A': /* Option setting */
3531     options |= PCRE_ANCHORED;
3532     continue;
3533    
3534     case 'B':
3535     options |= PCRE_NOTBOL;
3536     continue;
3537    
3538 nigel 29 case 'C':
3539 nigel 63 if (isdigit(*p)) /* Set copy string */
3540     {
3541     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3542     copystrings |= 1 << n;
3543     }
3544     else if (isalnum(*p))
3545     {
3546 ph10 836 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3547 nigel 63 }
3548     else if (*p == '+')
3549     {
3550     callout_extra = 1;
3551     p++;
3552     }
3553     else if (*p == '-')
3554     {
3555 ph10 836 SET_PCRE_CALLOUT(NULL);
3556 nigel 63 p++;
3557     }
3558     else if (*p == '!')
3559     {
3560     callout_fail_id = 0;
3561     p++;
3562     while(isdigit(*p))
3563     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3564     callout_fail_count = 0;
3565     if (*p == '!')
3566     {
3567     p++;
3568     while(isdigit(*p))
3569     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3570     }
3571     }
3572     else if (*p == '*')
3573     {
3574     int sign = 1;
3575     callout_data = 0;
3576     if (*(++p) == '-') { sign = -1; p++; }
3577     while(isdigit(*p))
3578     callout_data = callout_data * 10 + *p++ - '0';
3579     callout_data *= sign;
3580     callout_data_set = 1;
3581     }
3582 nigel 29 continue;
3583    
3584 nigel 79 #if !defined NODFA
3585 nigel 77 case 'D':
3586 nigel 79 #if !defined NOPOSIX
3587 nigel 77 if (posix || do_posix)
3588     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3589     else
3590 nigel 79 #endif
3591 nigel 77 use_dfa = 1;
3592     continue;
3593 ph10 553 #endif
3594 nigel 77
3595 ph10 553 #if !defined NODFA
3596 nigel 77 case 'F':
3597     options |= PCRE_DFA_SHORTEST;
3598     continue;
3599 nigel 79 #endif
3600 nigel 77
3601 nigel 29 case 'G':
3602 nigel 63 if (isdigit(*p))
3603     {
3604     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3605     getstrings |= 1 << n;
3606     }
3607     else if (isalnum(*p))
3608     {
3609 ph10 836 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3610 nigel 63 }
3611 nigel 29 continue;
3612 ph10 691
3613 ph10 667 case 'J':
3614     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3615 ph10 691 if (extra != NULL
3616     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3617 ph10 667 && extra->executable_jit != NULL)
3618 ph10 691 {
3619 zherczeg 852 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3620 ph10 836 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3621     PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3622 ph10 691 }
3623 ph10 667 continue;
3624 nigel 29
3625     case 'L':
3626     getlist = 1;
3627     continue;
3628    
3629 nigel 63 case 'M':
3630     find_match_limit = 1;
3631     continue;
3632    
3633 nigel 37 case 'N':
3634 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
3635     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3636 ph10 461 else
3637 ph10 442 options |= PCRE_NOTEMPTY;
3638 nigel 37 continue;
3639    
3640 nigel 3 case 'O':
3641     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3642 nigel 53 if (n > size_offsets_max)
3643     {
3644     size_offsets_max = n;
3645 nigel 57 free(offsets);
3646 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3647 nigel 53 if (offsets == NULL)
3648     {
3649     printf("** Failed to get %d bytes of memory for offsets vector\n",
3650 ph10 151 (int)(size_offsets_max * sizeof(int)));
3651 nigel 77 yield = 1;
3652     goto EXIT;
3653 nigel 53 }
3654     }
3655     use_size_offsets = n;
3656 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3657 nigel 3 continue;
3658    
3659 nigel 75 case 'P':
3660 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3661 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3662 nigel 75 continue;
3663    
3664 nigel 91 case 'Q':
3665     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3666     if (extra == NULL)
3667     {
3668     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3669     extra->flags = 0;
3670     }
3671     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3672     extra->match_limit_recursion = n;
3673     continue;
3674    
3675     case 'q':
3676     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3677     if (extra == NULL)
3678     {
3679     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3680     extra->flags = 0;
3681     }
3682     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3683     extra->match_limit = n;
3684     continue;
3685    
3686 nigel 79 #if !defined NODFA
3687 nigel 77 case 'R':
3688     options |= PCRE_DFA_RESTART;
3689     continue;
3690 nigel 79 #endif
3691 nigel 77
3692 nigel 73 case 'S':
3693     show_malloc = 1;
3694     continue;
3695 ph10 392
3696 ph10 389 case 'Y':
3697     options |= PCRE_NO_START_OPTIMIZE;
3698 ph10 392 continue;
3699 nigel 73
3700 nigel 3 case 'Z':
3701     options |= PCRE_NOTEOL;
3702     continue;
3703 nigel 71
3704     case '?':
3705     options |= PCRE_NO_UTF8_CHECK;
3706     continue;
3707 nigel 91
3708     case '<':
3709     {
3710     int x = check_newline(p, outfile);
3711     if (x == 0) goto NEXT_DATA;
3712     options |= x;
3713     while (*p++ != '>');
3714     }
3715     continue;
3716 nigel 3 }
3717 ph10 836
3718 ph10 842 /* We now have a character value in c that may be greater than 255. In
3719     16-bit mode, we always convert characters to UTF-8 so that values greater
3720 ph10 836 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3721 ph10 842 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3722 ph10 836 mode must have come from \x{...} or octal constructs because values from
3723     \x.. get this far only in non-UTF mode. */
3724    
3725 ph10 842 #if !defined NOUTF || defined SUPPORT_PCRE16
3726 ph10 836 if (use_pcre16 || use_utf)
3727     {
3728     pcre_uint8 buff8[8];
3729     int ii, utn;
3730     utn = ord2utf8(c, buff8);
3731     for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3732     }
3733     else
3734 ph10 842 #endif
3735 ph10 836 {
3736     if (c > 255)
3737     {
3738     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3739     "and UTF-8 mode is not enabled.\n", c);
3740     fprintf(outfile, "** Truncation will probably give the wrong "
3741     "result.\n");
3742     }
3743     *q++ = c;
3744     }
3745 nigel 3 }
3746 ph10 842
3747 ph10 836 /* Reached end of subject string */
3748 ph10 842
3749 nigel 9 *q = 0;
3750 ph10 530 len = (int)(q - dbuffer);
3751 ph10 545
3752 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3753 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3754 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3755 ph10 371
3756 ph10 363 #if !defined NOPOSIX
3757     if (posix || do_posix)
3758     {
3759     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3760 ph10 371 bptr += buffer_size - len - 1;
3761 ph10 363 }
3762 ph10 371 else
3763     #endif
3764 ph10 363 {
3765     memmove(bptr + buffer_size - len, bptr, len);
3766 ph10 371 bptr += buffer_size - len;
3767     }
3768 nigel 3
3769 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3770     {
3771     printf("**Match limit not relevant for DFA matching: ignored\n");
3772     find_match_limit = 0;
3773     }
3774    
3775 nigel 3 /* Handle matching via the POSIX interface, which does not
3776 nigel 63 support timing or playing with the match limit or callout data. */
3777 nigel 3
3778 nigel 37 #if !defined NOPOSIX
3779 nigel 3 if (posix || do_posix)
3780     {
3781     int rc;
3782     int eflags = 0;
3783 nigel 63 regmatch_t *pmatch = NULL;
3784     if (use_size_offsets > 0)
3785 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3786 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3787     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3788 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3789 nigel 3
3790 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3791 nigel 3
3792     if (rc != 0)
3793     {
3794 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3795 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3796     }
3797 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3798     != 0)
3799     {
3800     fprintf(outfile, "Matched with REG_NOSUB\n");
3801     }
3802 nigel 3 else
3803     {
3804 nigel 7 size_t i;
3805 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3806 nigel 3 {
3807     if (pmatch[i].rm_so >= 0)
3808     {
3809 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3810 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_so,
3811 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3812 nigel 3 fprintf(outfile, "\n");
3813 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3814 nigel 35 {
3815 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3816 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3817 nigel 63 outfile);
3818 nigel 35 fprintf(outfile, "\n");
3819     }
3820 nigel 3 }
3821     }
3822     }
3823 nigel 53 free(pmatch);
3824 ph10 836 goto NEXT_DATA;
3825 nigel 3 }
3826    
3827 ph10 836 #endif /* !defined NOPOSIX */
3828    
3829 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3830 nigel 3
3831 ph10 836 #ifdef SUPPORT_PCRE16
3832     if (use_pcre16)
3833     {
3834 zherczeg 852 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3835 ph10 836 switch(len)
3836     {
3837     case -1:
3838     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3839     "converted to UTF-16\n");
3840     goto NEXT_DATA;
3841 nigel 37
3842 ph10 836 case -2:
3843     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3844     "cannot be converted to UTF-16\n");
3845     goto NEXT_DATA;
3846    
3847     case -3:
3848     fprintf(outfile, "**Failed: character value greater than 0xffff "
3849     "cannot be converted to 16-bit in non-UTF mode\n");
3850 ph10 842 goto NEXT_DATA;
3851 ph10 836
3852     default:
3853     break;
3854     }
3855     bptr = (pcre_uint8 *)buffer16;
3856     }
3857     #endif
3858    
3859 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3860 nigel 3 {
3861 ph10 512 markptr = NULL;
3862    
3863 nigel 93 if (timeitm > 0)
3864 nigel 3 {
3865     register int i;
3866     clock_t time_taken;
3867     clock_t start_time = clock();
3868 nigel 77
3869 nigel 79 #if !defined NODFA
3870 nigel 77 if (all_use_dfa || use_dfa)
3871     {
3872     int workspace[1000];
3873 nigel 93 for (i = 0; i < timeitm; i++)
3874 ph10 836 {
3875     PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3876     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3877     (sizeof(workspace)/sizeof(int)));
3878     }
3879 nigel 77 }
3880     else
3881 nigel 79 #endif
3882 nigel 77
3883 nigel 93 for (i = 0; i < timeitm; i++)
3884 ph10 836 {
3885     PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3886     (options | g_notempty), use_offsets, use_size_offsets);
3887     }
3888 nigel 3 time_taken = clock() - start_time;
3889 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3890     (((double)time_taken * 1000.0) / (double)timeitm) /
3891 nigel 63 (double)CLOCKS_PER_SEC);
3892 nigel 3 }
3893    
3894 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3895 nigel 87 varying limits in order to find the minimum value for the match limit and
3896 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3897     running of pcre_exec(), so disable the JIT optimization. This makes it
3898     possible to run the same set of tests with and without JIT externally
3899     requested. */
3900 nigel 63
3901     if (find_match_limit)
3902     {
3903     if (extra == NULL)
3904     {
3905 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3906 nigel 63 extra->flags = 0;
3907     }
3908 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3909 ph10 691
3910 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
3911 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
3912     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3913     PCRE_ERROR_MATCHLIMIT, "match()");
3914 nigel 63
3915 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
3916     options|g_notempty, use_offsets, use_size_offsets,
3917     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3918     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3919 nigel 63 }
3920    
3921     /* If callout_data is set, use the interface with additional data */
3922    
3923     else if (callout_data_set)
3924     {
3925     if (extra == NULL)
3926     {
3927 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3928 nigel 63 extra->flags = 0;
3929     }
3930     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3931 nigel 71 extra->callout_data = &callout_data;
3932 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3933 nigel 63 options | g_notempty, use_offsets, use_size_offsets);
3934     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3935     }
3936    
3937     /* The normal case is just to do the match once, with the default
3938     value of match_limit. */
3939    
3940 nigel 79 #if !defined NODFA
3941 nigel 77 else if (all_use_dfa || use_dfa)
3942     {
3943     int workspace[1000];
3944 ph10 836 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3945     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3946     (sizeof(workspace)/sizeof(int)));
3947 nigel 77 if (count == 0)
3948     {
3949     fprintf(outfile, "Matched, but too many subsidiary matches\n");
3950     count = use_size_offsets/2;
3951     }
3952     }
3953 nigel 79 #endif
3954 nigel 77
3955 nigel 75 else
3956     {
3957 ph10