/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 862 - (hide annotations) (download)
Wed Jan 11 16:07:32 2012 UTC (16 months, 1 week ago) by ph10
File MIME type: text/plain
File size: 130222 byte(s)
Diagnose more than 8 hex digits in \x{...} in pcretest data lines.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 836 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40     libraries in a single program. This is different from the modules such as
41     pcre_compile.c in the library itself, which are compiled separately for each
42     mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43     (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44     compiled only once. Therefore, it must not make use of any of the macros from
45     pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46     however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47     only supported library functions. */
48 nigel 75
49 ph10 836
50 ph10 200 #ifdef HAVE_CONFIG_H
51 ph10 236 #include "config.h"
52 ph10 200 #endif
53 ph10 199
54 nigel 3 #include <ctype.h>
55     #include <stdio.h>
56     #include <string.h>
57     #include <stdlib.h>
58     #include <time.h>
59 nigel 25 #include <locale.h>
60 nigel 75 #include <errno.h>
61 nigel 3
62 ph10 287 #ifdef SUPPORT_LIBREADLINE
63 ph10 343 #ifdef HAVE_UNISTD_H
64 ph10 287 #include <unistd.h>
65 ph10 343 #endif
66 ph10 287 #include <readline/readline.h>
67     #include <readline/history.h>
68     #endif
69 nigel 93
70 ph10 287
71 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
72     input and output without "b"; then I was told that "b" was needed in some
73     environments, so it was added for release 5.0 to both the input and output. (It
74     makes no difference on Unix-like systems.) Later I was told that it is wrong
75     for the input on Windows. I've now abstracted the modes into two macros that
76     are set here, to make it easier to fiddle with them, and removed "b" from the
77     input mode under Windows. */
78    
79     #if defined(_WIN32) || defined(WIN32)
80     #include <io.h> /* For _setmode() */
81     #include <fcntl.h> /* For _O_BINARY */
82     #define INPUT_MODE "r"
83     #define OUTPUT_MODE "wb"
84    
85 ph10 411 #ifndef isatty
86     #define isatty _isatty /* This is what Windows calls them, I'm told, */
87     #endif /* though in some environments they seem to */
88     /* be already defined, hence the #ifndefs. */
89     #ifndef fileno
90 ph10 343 #define fileno _fileno
91 ph10 411 #endif
92 ph10 343
93 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95     #ifdef __BORLANDC__
96     #define _setmode(handle, mode) setmode(handle, mode)
97     #endif
98    
99     /* Not Windows */
100    
101 nigel 93 #else
102     #include <sys/time.h> /* These two includes are needed */
103     #include <sys/resource.h> /* for setrlimit(). */
104     #define INPUT_MODE "rb"
105     #define OUTPUT_MODE "wb"
106 nigel 91 #endif
107    
108 nigel 93
109 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
110     displaying the results of pcre_study() and we also need to know about the
111     internal macros, structures, and other internal data values; pcretest has
112     "inside information" compared to a program that strictly follows the PCRE API.
113 nigel 37
114 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116     appropriately for an application, not for building PCRE. */
117 nigel 77
118 ph10 145 #include "pcre.h"
119 ph10 836
120     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121     /* Configure internal macros to 16 bit mode. */
122     #define COMPILE_PCRE16
123     #endif
124    
125 nigel 77 #include "pcre_internal.h"
126    
127 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
128     regex, is held in a separate file so that (a) it can be compiled in either
129     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130     when that is compiled in debug mode. */
131    
132     #ifdef SUPPORT_PCRE8
133     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134     #endif
135     #ifdef SUPPORT_PCRE16
136     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137     #endif
138    
139 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
140     to keep two copies, we include the source file here, changing the names of the
141     external symbols to prevent clashes. */
142 nigel 77
143 ph10 836 #define PCRE_INCLUDED
144     #undef PRIV
145     #define PRIV(name) name
146 nigel 85
147     #include "pcre_tables.c"
148    
149 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
150 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
151 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
152     has not been explicitly changed, so as to get consistent output from systems
153     that differ in their output from isprint() even in the "C" locale. */
154 nigel 93
155 ph10 836 #ifdef EBCDIC
156     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157     #else
158     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159     #endif
160 nigel 85
161 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163     /* Posix support is disabled in 16 bit only mode. */
164     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165     #define NOPOSIX
166     #endif
167    
168 nigel 37 /* It is possible to compile this test program without including support for
169     testing the POSIX interface, though this is not available via the standard
170     Makefile. */
171    
172     #if !defined NOPOSIX
173 nigel 3 #include "pcreposix.h"
174 nigel 37 #endif
175 nigel 3
176 ph10 836 /* It is also possible, originally for the benefit of a version that was
177     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179     automatically cut out the UTF support if PCRE is built without it. */
180 nigel 79
181 ph10 836 #ifndef SUPPORT_UTF
182     #ifndef NOUTF
183     #define NOUTF
184 ph10 107 #endif
185     #endif
186 nigel 79
187 ph10 836 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189     only from one place and is handled differently). I couldn't dream up any way of
190     using a single macro to do this in a generic way, because of the many different
191     argument requirements. We know that at least one of SUPPORT_PCRE8 and
192     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193     use these in the definitions of generic macros.
194 ph10 107
195 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
196     printed is always given as two arguments: a base address followed by an offset.
197     The base address is cast to the correct data size for 8 or 16 bit data; the
198     offset is in units of this size. If the string were given as base+offset in one
199     argument, the casting might be incorrectly applied. */
200    
201     #ifdef SUPPORT_PCRE8
202    
203     #define PCHARS8(lv, p, offset, len, f) \
204     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206     #define PCHARSV8(p, offset, len, f) \
207     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209     #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210     p = read_capture_name8(p, cn8, re)
211    
212 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
213    
214 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
215     pcre_callout = callout
216    
217 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218     pcre_assign_jit_stack(extra, callback, userdata)
219 ph10 836
220     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221     re = pcre_compile((char *)pat, options, error, erroffset, tables)
222    
223     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224     namesptr, cbuffer, size) \
225     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226     (char *)namesptr, cbuffer, size)
227    
228     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230    
231     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232     offsets, size_offsets, workspace, size_workspace) \
233     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234     offsets, size_offsets, workspace, size_workspace)
235    
236     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237     offsets, size_offsets) \
238     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239     offsets, size_offsets)
240    
241     #define PCRE_FREE_STUDY8(extra) \
242     pcre_free_study(extra)
243    
244     #define PCRE_FREE_SUBSTRING8(substring) \
245     pcre_free_substring(substring)
246    
247     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248     pcre_free_substring_list(listptr)
249    
250     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251     getnamesptr, subsptr) \
252     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253     (char *)getnamesptr, subsptr)
254    
255     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256     n = pcre_get_stringnumber(re, (char *)ptr)
257    
258     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260    
261     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263    
264 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266 ph10 836
267     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268     pcre_printint(re, outfile, debug_lengths)
269    
270     #define PCRE_STUDY8(extra, re, options, error) \
271     extra = pcre_study(re, options, error)
272    
273 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274     pcre_jit_stack_alloc(startsize, maxsize)
275    
276     #define PCRE_JIT_STACK_FREE8(stack) \
277     pcre_jit_stack_free(stack)
278    
279 ph10 836 #endif /* SUPPORT_PCRE8 */
280    
281     /* -----------------------------------------------------------*/
282    
283     #ifdef SUPPORT_PCRE16
284    
285     #define PCHARS16(lv, p, offset, len, f) \
286     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287    
288     #define PCHARSV16(p, offset, len, f) \
289     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290    
291     #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292     p = read_capture_name16(p, cn16, re)
293    
294     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295    
296     #define SET_PCRE_CALLOUT16(callout) \
297 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298 ph10 836
299 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300     pcre16_assign_jit_stack((pcre16_extra *)extra, \
301     (pcre16_jit_callback)callback, userdata)
302 ph10 836
303     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305     tables)
306 ph10 836
307     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308     namesptr, cbuffer, size) \
309 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311 ph10 836
312     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
315 ph10 836
316     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317     offsets, size_offsets, workspace, size_workspace) \
318 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320     workspace, size_workspace)
321 ph10 836
322     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323     offsets, size_offsets) \
324 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325     len, start_offset, options, offsets, size_offsets)
326 ph10 836
327     #define PCRE_FREE_STUDY16(extra) \
328 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
329 ph10 836
330     #define PCRE_FREE_SUBSTRING16(substring) \
331     pcre16_free_substring((PCRE_SPTR16)substring)
332    
333     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335    
336     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337     getnamesptr, subsptr) \
338 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340 ph10 836
341     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343    
344     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346     (PCRE_SPTR16 *)(void*)subsptr)
347    
348     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350     (PCRE_SPTR16 **)(void*)listptr)
351    
352 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354     tables)
355 ph10 836
356     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357     pcre16_printint(re, outfile, debug_lengths)
358    
359     #define PCRE_STUDY16(extra, re, options, error) \
360 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361 ph10 836
362 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364    
365     #define PCRE_JIT_STACK_FREE16(stack) \
366     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367    
368 ph10 836 #endif /* SUPPORT_PCRE16 */
369    
370    
371     /* ----- Both modes are supported; a runtime test is needed, except for
372     pcre_config(), and the JIT stack functions, when it doesn't matter which
373     version is called. ----- */
374    
375     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376    
377     #define CHAR_SIZE (use_pcre16? 2:1)
378    
379     #define PCHARS(lv, p, offset, len, f) \
380     if (use_pcre16) \
381     PCHARS16(lv, p, offset, len, f); \
382     else \
383     PCHARS8(lv, p, offset, len, f)
384    
385     #define PCHARSV(p, offset, len, f) \
386     if (use_pcre16) \
387     PCHARSV16(p, offset, len, f); \
388     else \
389     PCHARSV8(p, offset, len, f)
390    
391     #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392     if (use_pcre16) \
393     READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394     else \
395     READ_CAPTURE_NAME8(p, cn8, cn16, re)
396    
397     #define SET_PCRE_CALLOUT(callout) \
398     if (use_pcre16) \
399     SET_PCRE_CALLOUT16(callout); \
400     else \
401     SET_PCRE_CALLOUT8(callout)
402    
403     #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404    
405 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406     if (use_pcre16) \
407     PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408     else \
409     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410 ph10 836
411     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412     if (use_pcre16) \
413     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414     else \
415     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416    
417     #define PCRE_CONFIG pcre_config
418    
419     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420     namesptr, cbuffer, size) \
421     if (use_pcre16) \
422     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423     namesptr, cbuffer, size); \
424     else \
425     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426     namesptr, cbuffer, size)
427    
428     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429     if (use_pcre16) \
430     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431     else \
432     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433    
434     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435     offsets, size_offsets, workspace, size_workspace) \
436     if (use_pcre16) \
437     PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438     offsets, size_offsets, workspace, size_workspace); \
439     else \
440     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441     offsets, size_offsets, workspace, size_workspace)
442    
443     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444     offsets, size_offsets) \
445     if (use_pcre16) \
446     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447     offsets, size_offsets); \
448     else \
449     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450     offsets, size_offsets)
451    
452     #define PCRE_FREE_STUDY(extra) \
453     if (use_pcre16) \
454     PCRE_FREE_STUDY16(extra); \
455     else \
456     PCRE_FREE_STUDY8(extra)
457    
458     #define PCRE_FREE_SUBSTRING(substring) \
459     if (use_pcre16) \
460     PCRE_FREE_SUBSTRING16(substring); \
461     else \
462     PCRE_FREE_SUBSTRING8(substring)
463    
464     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465     if (use_pcre16) \
466     PCRE_FREE_SUBSTRING_LIST16(listptr); \
467     else \
468     PCRE_FREE_SUBSTRING_LIST8(listptr)
469    
470     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471     getnamesptr, subsptr) \
472     if (use_pcre16) \
473     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474     getnamesptr, subsptr); \
475     else \
476     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477     getnamesptr, subsptr)
478    
479     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480     if (use_pcre16) \
481     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482     else \
483     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484    
485     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486     if (use_pcre16) \
487     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488     else \
489     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490    
491     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492     if (use_pcre16) \
493     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494     else \
495     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496    
497 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498     (use_pcre16 ? \
499     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500     :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501 ph10 836
502 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
503     if (use_pcre16) \
504     PCRE_JIT_STACK_FREE16(stack); \
505     else \
506     PCRE_JIT_STACK_FREE8(stack)
507    
508 ph10 836 #define PCRE_MAKETABLES \
509     (use_pcre16? pcre16_maketables() : pcre_maketables())
510    
511 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 ph10 836 if (use_pcre16) \
513 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 ph10 836 else \
515 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516 ph10 836
517     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518     if (use_pcre16) \
519     PCRE_PRINTINT16(re, outfile, debug_lengths); \
520     else \
521     PCRE_PRINTINT8(re, outfile, debug_lengths)
522    
523     #define PCRE_STUDY(extra, re, options, error) \
524     if (use_pcre16) \
525     PCRE_STUDY16(extra, re, options, error); \
526     else \
527     PCRE_STUDY8(extra, re, options, error)
528    
529     /* ----- Only 8-bit mode is supported ----- */
530    
531     #elif defined SUPPORT_PCRE8
532     #define CHAR_SIZE 1
533     #define PCHARS PCHARS8
534     #define PCHARSV PCHARSV8
535     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537     #define STRLEN STRLEN8
538 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
540     #define PCRE_CONFIG pcre_config
541     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544     #define PCRE_EXEC PCRE_EXEC8
545     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
555     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556     #define PCRE_PRINTINT PCRE_PRINTINT8
557     #define PCRE_STUDY PCRE_STUDY8
558    
559     /* ----- Only 16-bit mode is supported ----- */
560    
561     #else
562     #define CHAR_SIZE 2
563     #define PCHARS PCHARS16
564     #define PCHARSV PCHARSV16
565     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567     #define STRLEN STRLEN16
568 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
570     #define PCRE_CONFIG pcre16_config
571     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574     #define PCRE_EXEC PCRE_EXEC16
575     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
585     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586     #define PCRE_PRINTINT PCRE_PRINTINT16
587     #define PCRE_STUDY PCRE_STUDY16
588     #endif
589    
590     /* ----- End of mode-specific function call macros ----- */
591    
592    
593 nigel 85 /* Other parameters */
594    
595 nigel 3 #ifndef CLOCKS_PER_SEC
596     #ifdef CLK_TCK
597     #define CLOCKS_PER_SEC CLK_TCK
598     #else
599     #define CLOCKS_PER_SEC 100
600     #endif
601     #endif
602    
603 nigel 93 /* This is the default loop count for timing. */
604    
605 nigel 75 #define LOOPREPEAT 500000
606 nigel 3
607 nigel 85 /* Static variables */
608    
609 nigel 3 static FILE *outfile;
610     static int log_store = 0;
611 nigel 63 static int callout_count;
612     static int callout_extra;
613     static int callout_fail_count;
614     static int callout_fail_id;
615 ph10 210 static int debug_lengths;
616 nigel 63 static int first_callout;
617 nigel 93 static int locale_set = 0;
618 nigel 73 static int show_malloc;
619 ph10 836 static int use_utf;
620 nigel 43 static size_t gotten_store;
621 ph10 836 static size_t first_gotten_store = 0;
622 ph10 645 static const unsigned char *last_callout_mark = NULL;
623 nigel 3
624 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
625    
626     static int buffer_size = 50000;
627 ph10 836 static pcre_uint8 *buffer = NULL;
628     static pcre_uint8 *dbuffer = NULL;
629     static pcre_uint8 *pbuffer = NULL;
630 nigel 3
631 ph10 836 /* Another buffer is needed translation to 16-bit character strings. It will
632     obtained and extended as required. */
633    
634     #ifdef SUPPORT_PCRE16
635     static int buffer16_size = 0;
636     static pcre_uint16 *buffer16 = NULL;
637    
638     #ifdef SUPPORT_PCRE8
639    
640     /* We need the table of operator lengths that is used for 16-bit compiling, in
641     order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642     data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643     appropriately for the 16-bit world. Just as a safety check, make sure that
644     COMPILE_PCRE16 is *not* set. */
645    
646     #ifdef COMPILE_PCRE16
647     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648     #endif
649    
650     #if LINK_SIZE == 2
651     #undef LINK_SIZE
652     #define LINK_SIZE 1
653     #elif LINK_SIZE == 3 || LINK_SIZE == 4
654     #undef LINK_SIZE
655     #define LINK_SIZE 2
656     #else
657     #error LINK_SIZE must be either 2, 3, or 4
658     #endif
659    
660 zherczeg 839 #undef IMM2_SIZE
661     #define IMM2_SIZE 1
662    
663 ph10 836 #endif /* SUPPORT_PCRE8 */
664    
665     static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666     #endif /* SUPPORT_PCRE16 */
667    
668     /* If we have 8-bit support, default use_pcre16 to false; if there is also
669     16-bit support, it can be changed by an option. If there is no 8-bit support,
670     there must be 16-bit support, so default it to 1. */
671    
672     #ifdef SUPPORT_PCRE8
673     static int use_pcre16 = 0;
674     #else
675     static int use_pcre16 = 1;
676     #endif
677    
678 ph10 598 /* Textual explanations for runtime error codes */
679 nigel 75
680 ph10 598 static const char *errtexts[] = {
681     NULL, /* 0 is no error */
682     NULL, /* NOMATCH is handled specially */
683     "NULL argument passed",
684     "bad option value",
685     "magic number missing",
686     "unknown opcode - pattern overwritten?",
687     "no more memory",
688 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 ph10 598 "match limit exceeded",
690     "callout error code",
691 ph10 836 NULL, /* BADUTF8/16 is handled specially */
692     NULL, /* BADUTF8/16 offset is handled specially */
693 ph10 598 NULL, /* PARTIAL is handled specially */
694     "not used - internal error",
695     "internal error - pattern overwritten?",
696     "bad count value",
697     "item unsupported for DFA matching",
698     "backreference condition or recursion test not supported for DFA matching",
699     "match limit not supported for DFA matching",
700     "workspace size exceeded in DFA matching",
701 ph10 654 "too much recursion for DFA matching",
702 ph10 598 "recursion limit exceeded",
703     "not used - internal error",
704     "invalid combination of newline options",
705     "bad offset value",
706 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
707 ph10 676 "nested recursion at the same subject position",
708 ph10 836 "JIT stack limit reached",
709     "pattern compiled in wrong mode: 8-bit/16-bit error"
710 ph10 598 };
711    
712 ph10 654
713 ph10 541 /*************************************************
714     * Alternate character tables *
715     *************************************************/
716 nigel 49
717 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718     using the default tables of the library. However, the T option can be used to
719     select alternate sets of tables, for different kinds of testing. Note also that
720 ph10 541 the L (locale) option also adjusts the tables. */
721    
722 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
723 ph10 541 only ASCII characters. */
724    
725 ph10 836 static const pcre_uint8 tables0[] = {
726 ph10 541
727     /* This table is a lower casing table. */
728    
729     0, 1, 2, 3, 4, 5, 6, 7,
730     8, 9, 10, 11, 12, 13, 14, 15,
731     16, 17, 18, 19, 20, 21, 22, 23,
732     24, 25, 26, 27, 28, 29, 30, 31,
733     32, 33, 34, 35, 36, 37, 38, 39,
734     40, 41, 42, 43, 44, 45, 46, 47,
735     48, 49, 50, 51, 52, 53, 54, 55,
736     56, 57, 58, 59, 60, 61, 62, 63,
737     64, 97, 98, 99,100,101,102,103,
738     104,105,106,107,108,109,110,111,
739     112,113,114,115,116,117,118,119,
740     120,121,122, 91, 92, 93, 94, 95,
741     96, 97, 98, 99,100,101,102,103,
742     104,105,106,107,108,109,110,111,
743     112,113,114,115,116,117,118,119,
744     120,121,122,123,124,125,126,127,
745     128,129,130,131,132,133,134,135,
746     136,137,138,139,140,141,142,143,
747     144,145,146,147,148,149,150,151,
748     152,153,154,155,156,157,158,159,
749     160,161,162,163,164,165,166,167,
750     168,169,170,171,172,173,174,175,
751     176,177,178,179,180,181,182,183,
752     184,185,186,187,188,189,190,191,
753     192,193,194,195,196,197,198,199,
754     200,201,202,203,204,205,206,207,
755     208,209,210,211,212,213,214,215,
756     216,217,218,219,220,221,222,223,
757     224,225,226,227,228,229,230,231,
758     232,233,234,235,236,237,238,239,
759     240,241,242,243,244,245,246,247,
760     248,249,250,251,252,253,254,255,
761    
762     /* This table is a case flipping table. */
763    
764     0, 1, 2, 3, 4, 5, 6, 7,
765     8, 9, 10, 11, 12, 13, 14, 15,
766     16, 17, 18, 19, 20, 21, 22, 23,
767     24, 25, 26, 27, 28, 29, 30, 31,
768     32, 33, 34, 35, 36, 37, 38, 39,
769     40, 41, 42, 43, 44, 45, 46, 47,
770     48, 49, 50, 51, 52, 53, 54, 55,
771     56, 57, 58, 59, 60, 61, 62, 63,
772     64, 97, 98, 99,100,101,102,103,
773     104,105,106,107,108,109,110,111,
774     112,113,114,115,116,117,118,119,
775     120,121,122, 91, 92, 93, 94, 95,
776     96, 65, 66, 67, 68, 69, 70, 71,
777     72, 73, 74, 75, 76, 77, 78, 79,
778     80, 81, 82, 83, 84, 85, 86, 87,
779     88, 89, 90,123,124,125,126,127,
780     128,129,130,131,132,133,134,135,
781     136,137,138,139,140,141,142,143,
782     144,145,146,147,148,149,150,151,
783     152,153,154,155,156,157,158,159,
784     160,161,162,163,164,165,166,167,
785     168,169,170,171,172,173,174,175,
786     176,177,178,179,180,181,182,183,
787     184,185,186,187,188,189,190,191,
788     192,193,194,195,196,197,198,199,
789     200,201,202,203,204,205,206,207,
790     208,209,210,211,212,213,214,215,
791     216,217,218,219,220,221,222,223,
792     224,225,226,227,228,229,230,231,
793     232,233,234,235,236,237,238,239,
794     240,241,242,243,244,245,246,247,
795     248,249,250,251,252,253,254,255,
796    
797     /* This table contains bit maps for various character classes. Each map is 32
798     bytes long and the bits run from the least significant end of each byte. The
799     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800     graph, print, punct, and cntrl. Other classes are built from combinations. */
801    
802     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806    
807     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811    
812     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816    
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821    
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826    
827     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831    
832     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836    
837     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841    
842     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846    
847     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851    
852     /* This table identifies various classes of character by individual bits:
853     0x01 white space character
854     0x02 letter
855     0x04 decimal digit
856     0x08 hexadecimal digit
857     0x10 alphanumeric or '_'
858     0x80 regular expression metacharacter or binary zero
859     */
860    
861     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893    
894 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
895     be at least an approximation of ISO 8859. In particular, there are characters
896 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
897    
898 ph10 836 static const pcre_uint8 tables1[] = {
899 ph10 541 0,1,2,3,4,5,6,7,
900     8,9,10,11,12,13,14,15,
901     16,17,18,19,20,21,22,23,
902     24,25,26,27,28,29,30,31,
903     32,33,34,35,36,37,38,39,
904     40,41,42,43,44,45,46,47,
905     48,49,50,51,52,53,54,55,
906     56,57,58,59,60,61,62,63,
907     64,97,98,99,100,101,102,103,
908     104,105,106,107,108,109,110,111,
909     112,113,114,115,116,117,118,119,
910     120,121,122,91,92,93,94,95,
911     96,97,98,99,100,101,102,103,
912     104,105,106,107,108,109,110,111,
913     112,113,114,115,116,117,118,119,
914     120,121,122,123,124,125,126,127,
915     128,129,130,131,132,133,134,135,
916     136,137,138,139,140,141,142,143,
917     144,145,146,147,148,149,150,151,
918     152,153,154,155,156,157,158,159,
919     160,161,162,163,164,165,166,167,
920     168,169,170,171,172,173,174,175,
921     176,177,178,179,180,181,182,183,
922     184,185,186,187,188,189,190,191,
923     224,225,226,227,228,229,230,231,
924     232,233,234,235,236,237,238,239,
925     240,241,242,243,244,245,246,215,
926     248,249,250,251,252,253,254,223,
927     224,225,226,227,228,229,230,231,
928     232,233,234,235,236,237,238,239,
929     240,241,242,243,244,245,246,247,
930     248,249,250,251,252,253,254,255,
931     0,1,2,3,4,5,6,7,
932     8,9,10,11,12,13,14,15,
933     16,17,18,19,20,21,22,23,
934     24,25,26,27,28,29,30,31,
935     32,33,34,35,36,37,38,39,
936     40,41,42,43,44,45,46,47,
937     48,49,50,51,52,53,54,55,
938     56,57,58,59,60,61,62,63,
939     64,97,98,99,100,101,102,103,
940     104,105,106,107,108,109,110,111,
941     112,113,114,115,116,117,118,119,
942     120,121,122,91,92,93,94,95,
943     96,65,66,67,68,69,70,71,
944     72,73,74,75,76,77,78,79,
945     80,81,82,83,84,85,86,87,
946     88,89,90,123,124,125,126,127,
947     128,129,130,131,132,133,134,135,
948     136,137,138,139,140,141,142,143,
949     144,145,146,147,148,149,150,151,
950     152,153,154,155,156,157,158,159,
951     160,161,162,163,164,165,166,167,
952     168,169,170,171,172,173,174,175,
953     176,177,178,179,180,181,182,183,
954     184,185,186,187,188,189,190,191,
955     224,225,226,227,228,229,230,231,
956     232,233,234,235,236,237,238,239,
957     240,241,242,243,244,245,246,215,
958     248,249,250,251,252,253,254,223,
959     192,193,194,195,196,197,198,199,
960     200,201,202,203,204,205,206,207,
961     208,209,210,211,212,213,214,247,
962     216,217,218,219,220,221,222,255,
963     0,62,0,0,1,0,0,0,
964     0,0,0,0,0,0,0,0,
965     32,0,0,0,1,0,0,0,
966     0,0,0,0,0,0,0,0,
967     0,0,0,0,0,0,255,3,
968     126,0,0,0,126,0,0,0,
969     0,0,0,0,0,0,0,0,
970     0,0,0,0,0,0,0,0,
971     0,0,0,0,0,0,255,3,
972     0,0,0,0,0,0,0,0,
973     0,0,0,0,0,0,12,2,
974     0,0,0,0,0,0,0,0,
975     0,0,0,0,0,0,0,0,
976     254,255,255,7,0,0,0,0,
977     0,0,0,0,0,0,0,0,
978     255,255,127,127,0,0,0,0,
979     0,0,0,0,0,0,0,0,
980     0,0,0,0,254,255,255,7,
981     0,0,0,0,0,4,32,4,
982     0,0,0,128,255,255,127,255,
983     0,0,0,0,0,0,255,3,
984     254,255,255,135,254,255,255,7,
985     0,0,0,0,0,4,44,6,
986     255,255,127,255,255,255,127,255,
987     0,0,0,0,254,255,255,255,
988     255,255,255,255,255,255,255,127,
989     0,0,0,0,254,255,255,255,
990     255,255,255,255,255,255,255,255,
991     0,2,0,0,255,255,255,255,
992     255,255,255,255,255,255,255,127,
993     0,0,0,0,255,255,255,255,
994     255,255,255,255,255,255,255,255,
995     0,0,0,0,254,255,0,252,
996     1,0,0,248,1,0,0,120,
997     0,0,0,0,254,255,255,255,
998     0,0,128,0,0,0,128,0,
999     255,255,255,255,0,0,0,0,
1000     0,0,0,0,0,0,0,128,
1001     255,255,255,255,0,0,0,0,
1002     0,0,0,0,0,0,0,0,
1003     128,0,0,0,0,0,0,0,
1004     0,1,1,0,1,1,0,0,
1005     0,0,0,0,0,0,0,0,
1006     0,0,0,0,0,0,0,0,
1007     1,0,0,0,128,0,0,0,
1008     128,128,128,128,0,0,128,0,
1009     28,28,28,28,28,28,28,28,
1010     28,28,0,0,0,0,0,128,
1011     0,26,26,26,26,26,26,18,
1012     18,18,18,18,18,18,18,18,
1013     18,18,18,18,18,18,18,18,
1014     18,18,18,128,128,0,128,16,
1015     0,26,26,26,26,26,26,18,
1016     18,18,18,18,18,18,18,18,
1017     18,18,18,18,18,18,18,18,
1018     18,18,18,128,128,0,0,0,
1019     0,0,0,0,0,1,0,0,
1020     0,0,0,0,0,0,0,0,
1021     0,0,0,0,0,0,0,0,
1022     0,0,0,0,0,0,0,0,
1023     1,0,0,0,0,0,0,0,
1024     0,0,18,0,0,0,0,0,
1025     0,0,20,20,0,18,0,0,
1026     0,20,18,0,0,0,0,0,
1027     18,18,18,18,18,18,18,18,
1028     18,18,18,18,18,18,18,18,
1029     18,18,18,18,18,18,18,0,
1030     18,18,18,18,18,18,18,18,
1031     18,18,18,18,18,18,18,18,
1032     18,18,18,18,18,18,18,18,
1033     18,18,18,18,18,18,18,0,
1034     18,18,18,18,18,18,18,18
1035     };
1036    
1037    
1038    
1039 ph10 558
1040     #ifndef HAVE_STRERROR
1041 nigel 49 /*************************************************
1042 ph10 558 * Provide strerror() for non-ANSI libraries *
1043     *************************************************/
1044    
1045     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046     in their libraries, but can provide the same facility by this simple
1047     alternative function. */
1048    
1049     extern int sys_nerr;
1050     extern char *sys_errlist[];
1051    
1052     char *
1053     strerror(int n)
1054     {
1055     if (n < 0 || n >= sys_nerr) return "unknown error number";
1056     return sys_errlist[n];
1057     }
1058     #endif /* HAVE_STRERROR */
1059    
1060    
1061 ph10 667 /*************************************************
1062     * JIT memory callback *
1063     *************************************************/
1064 ph10 558
1065 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1066     {
1067     return (pcre_jit_stack *)arg;
1068     }
1069 ph10 558
1070 ph10 667
1071 ph10 836 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 ph10 558 /*************************************************
1073 ph10 836 * Convert UTF-8 string to value *
1074     *************************************************/
1075    
1076     /* This function takes one or more bytes that represents a UTF-8 character,
1077     and returns the value of the character.
1078    
1079     Argument:
1080     utf8bytes a pointer to the byte vector
1081     vptr a pointer to an int to receive the value
1082    
1083     Returns: > 0 => the number of bytes consumed
1084     -6 to 0 => malformed UTF-8 character at offset = (-return)
1085     */
1086    
1087     static int
1088     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089     {
1090     int c = *utf8bytes++;
1091     int d = c;
1092     int i, j, s;
1093    
1094     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095     {
1096     if ((d & 0x80) == 0) break;
1097     d <<= 1;
1098     }
1099    
1100     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102    
1103     /* i now has a value in the range 1-5 */
1104    
1105     s = 6*i;
1106     d = (c & utf8_table3[i]) << s;
1107    
1108     for (j = 0; j < i; j++)
1109     {
1110     c = *utf8bytes++;
1111     if ((c & 0xc0) != 0x80) return -(j+1);
1112     s -= 6;
1113     d |= (c & 0x3f) << s;
1114     }
1115    
1116     /* Check that encoding was the correct unique one */
1117    
1118     for (j = 0; j < utf8_table1_size; j++)
1119     if (d <= utf8_table1[j]) break;
1120     if (j != i) return -(i+1);
1121    
1122     /* Valid value */
1123    
1124     *vptr = d;
1125     return i+1;
1126     }
1127     #endif /* NOUTF || SUPPORT_PCRE16 */
1128    
1129    
1130    
1131     #if !defined NOUTF || defined SUPPORT_PCRE16
1132     /*************************************************
1133     * Convert character value to UTF-8 *
1134     *************************************************/
1135    
1136     /* This function takes an integer value in the range 0 - 0x7fffffff
1137     and encodes it as a UTF-8 character in 0 to 6 bytes.
1138    
1139     Arguments:
1140     cvalue the character value
1141     utf8bytes pointer to buffer for result - at least 6 bytes long
1142    
1143     Returns: number of characters placed in the buffer
1144     */
1145    
1146     static int
1147     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148     {
1149     register int i, j;
1150     for (i = 0; i < utf8_table1_size; i++)
1151     if (cvalue <= utf8_table1[i]) break;
1152     utf8bytes += i;
1153     for (j = i; j > 0; j--)
1154     {
1155     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156     cvalue >>= 6;
1157     }
1158     *utf8bytes = utf8_table2[i] | cvalue;
1159     return i + 1;
1160     }
1161 ph10 842 #endif
1162 ph10 836
1163    
1164     #ifdef SUPPORT_PCRE16
1165     /*************************************************
1166     * Convert a string to 16-bit *
1167     *************************************************/
1168    
1169     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173     result is always left in buffer16.
1174    
1175     Note that this function does not object to surrogate values. This is
1176     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177     for the purpose of testing that they are correctly faulted.
1178    
1179 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1181    
1182     Arguments:
1183     data TRUE if converting a data line; FALSE for a regex
1184     p points to a byte string
1185     utf true if UTF-8 (to be converted to UTF-16)
1186     len number of bytes in the string (excluding trailing zero)
1187    
1188     Returns: number of 16-bit data items used (excluding trailing zero)
1189     OR -1 if a UTF-8 string is malformed
1190     OR -2 if a value > 0x10ffff is encountered
1191 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 ph10 836 */
1193    
1194     static int
1195     to16(int data, pcre_uint8 *p, int utf, int len)
1196     {
1197     pcre_uint16 *pp;
1198    
1199     if (buffer16_size < 2*len + 2)
1200     {
1201     if (buffer16 != NULL) free(buffer16);
1202     buffer16_size = 2*len + 2;
1203     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204     if (buffer16 == NULL)
1205     {
1206     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207     exit(1);
1208     }
1209     }
1210    
1211     pp = buffer16;
1212    
1213     if (!utf && !data)
1214     {
1215     while (len-- > 0) *pp++ = *p++;
1216     }
1217    
1218     else
1219     {
1220     int c = 0;
1221     while (len > 0)
1222     {
1223     int chlen = utf82ord(p, &c);
1224     if (chlen <= 0) return -1;
1225     if (c > 0x10ffff) return -2;
1226     p += chlen;
1227     len -= chlen;
1228     if (c < 0x10000) *pp++ = c; else
1229     {
1230     if (!utf) return -3;
1231     c -= 0x10000;
1232     *pp++ = 0xD800 | (c >> 10);
1233     *pp++ = 0xDC00 | (c & 0x3ff);
1234     }
1235     }
1236     }
1237    
1238     *pp = 0;
1239     return pp - buffer16;
1240     }
1241     #endif
1242    
1243    
1244     /*************************************************
1245 nigel 91 * Read or extend an input line *
1246     *************************************************/
1247    
1248     /* Input lines are read into buffer, but both patterns and data lines can be
1249     continued over multiple input lines. In addition, if the buffer fills up, we
1250     want to automatically expand it so as to be able to handle extremely large
1251     lines that are needed for certain stress tests. When the input buffer is
1252     expanded, the other two buffers must also be expanded likewise, and the
1253     contents of pbuffer, which are a copy of the input for callouts, must be
1254     preserved (for when expansion happens for a data line). This is not the most
1255     optimal way of handling this, but hey, this is just a test program!
1256    
1257     Arguments:
1258     f the file to read
1259     start where in buffer to start (this *must* be within buffer)
1260 ph10 287 prompt for stdin or readline()
1261 nigel 91
1262     Returns: pointer to the start of new data
1263     could be a copy of start, or could be moved
1264     NULL if no data read and EOF reached
1265     */
1266    
1267 ph10 836 static pcre_uint8 *
1268     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 nigel 91 {
1270 ph10 836 pcre_uint8 *here = start;
1271 nigel 91
1272     for (;;)
1273     {
1274 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
1275 nigel 93
1276 nigel 91 if (rlen > 1000)
1277     {
1278     int dlen;
1279 ph10 289
1280 ph10 287 /* If libreadline support is required, use readline() to read a line if the
1281     input is a terminal. Note that readline() removes the trailing newline, so
1282     we must put it back again, to be compatible with fgets(). */
1283 ph10 289
1284 ph10 287 #ifdef SUPPORT_LIBREADLINE
1285     if (isatty(fileno(f)))
1286     {
1287 ph10 289 size_t len;
1288 ph10 287 char *s = readline(prompt);
1289     if (s == NULL) return (here == start)? NULL : start;
1290     len = strlen(s);
1291 ph10 289 if (len > 0) add_history(s);
1292 ph10 287 if (len > rlen - 1) len = rlen - 1;
1293     memcpy(here, s, len);
1294     here[len] = '\n';
1295 ph10 289 here[len+1] = 0;
1296     free(s);
1297 ph10 287 }
1298 ph10 289 else
1299     #endif
1300    
1301 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1302 ph10 289
1303 ph10 287 {
1304 ph10 516 if (f == stdin) printf("%s", prompt);
1305 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1306     return (here == start)? NULL : start;
1307 ph10 289 }
1308    
1309 nigel 91 dlen = (int)strlen((char *)here);
1310     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311     here += dlen;
1312     }
1313    
1314     else
1315     {
1316     int new_buffer_size = 2*buffer_size;
1317 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320 nigel 91
1321     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322     {
1323     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324     exit(1);
1325     }
1326    
1327     memcpy(new_buffer, buffer, buffer_size);
1328     memcpy(new_pbuffer, pbuffer, buffer_size);
1329    
1330     buffer_size = new_buffer_size;
1331    
1332     start = new_buffer + (start - buffer);
1333     here = new_buffer + (here - buffer);
1334    
1335     free(buffer);
1336     free(dbuffer);
1337     free(pbuffer);
1338    
1339     buffer = new_buffer;
1340     dbuffer = new_dbuffer;
1341     pbuffer = new_pbuffer;
1342     }
1343     }
1344    
1345     return NULL; /* Control never gets here */
1346     }
1347    
1348    
1349    
1350     /*************************************************
1351 nigel 63 * Read number from string *
1352     *************************************************/
1353    
1354     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355     around with conditional compilation, just do the job by hand. It is only used
1356 nigel 93 for unpicking arguments, so just keep it simple.
1357 nigel 63
1358     Arguments:
1359     str string to be converted
1360     endptr where to put the end pointer
1361    
1362     Returns: the unsigned long
1363     */
1364    
1365     static int
1366 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 nigel 63 {
1368     int result = 0;
1369     while(*str != 0 && isspace(*str)) str++;
1370     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371     *endptr = str;
1372     return(result);
1373     }
1374    
1375    
1376    
1377 nigel 49 /*************************************************
1378 ph10 836 * Print one character *
1379 nigel 49 *************************************************/
1380    
1381 ph10 836 /* Print a single character either literally, or as a hex escape. */
1382 nigel 49
1383 ph10 836 static int pchar(int c, FILE *f)
1384 nigel 49 {
1385 ph10 836 if (PRINTOK(c))
1386     {
1387     if (f != NULL) fprintf(f, "%c", c);
1388     return 1;
1389     }
1390 nigel 49
1391 ph10 836 if (c < 0x100)
1392 nigel 49 {
1393 ph10 836 if (use_utf)
1394     {
1395     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396     return 6;
1397     }
1398     else
1399     {
1400     if (f != NULL) fprintf(f, "\\x%02x", c);
1401     return 4;
1402     }
1403 nigel 49 }
1404    
1405 ph10 836 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406     return (c <= 0x000000ff)? 6 :
1407     (c <= 0x00000fff)? 7 :
1408     (c <= 0x0000ffff)? 8 :
1409     (c <= 0x000fffff)? 9 : 10;
1410     }
1411 nigel 49
1412    
1413    
1414 ph10 836 #ifdef SUPPORT_PCRE8
1415     /*************************************************
1416     * Print 8-bit character string *
1417     *************************************************/
1418 nigel 49
1419 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420     If handed a NULL file, just counts chars without printing. */
1421 nigel 49
1422 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423     {
1424     int c = 0;
1425     int yield = 0;
1426 nigel 49
1427 ph10 836 if (length < 0)
1428     length = strlen((char *)p);
1429 nigel 49
1430 ph10 836 while (length-- > 0)
1431     {
1432     #if !defined NOUTF
1433     if (use_utf)
1434     {
1435     int rc = utf82ord(p, &c);
1436     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437     {
1438     length -= rc - 1;
1439     p += rc;
1440     yield += pchar(c, f);
1441     continue;
1442     }
1443     }
1444     #endif
1445     c = *p++;
1446     yield += pchar(c, f);
1447     }
1448    
1449     return yield;
1450 nigel 49 }
1451 nigel 79 #endif
1452 nigel 49
1453    
1454 nigel 79
1455 ph10 836 #ifdef SUPPORT_PCRE16
1456 nigel 63 /*************************************************
1457 ph10 836 * Find length of 0-terminated 16-bit string *
1458 nigel 85 *************************************************/
1459    
1460 ph10 836 static int strlen16(PCRE_SPTR16 p)
1461 nigel 85 {
1462 ph10 836 int len = 0;
1463     while (*p++ != 0) len++;
1464     return len;
1465 nigel 85 }
1466 ph10 836 #endif /* SUPPORT_PCRE16 */
1467 nigel 85
1468    
1469 ph10 836 #ifdef SUPPORT_PCRE16
1470 nigel 85 /*************************************************
1471 ph10 836 * Print 16-bit character string *
1472 nigel 63 *************************************************/
1473 nigel 49
1474 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475     If handed a NULL file, just counts chars without printing. */
1476 nigel 49
1477 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 nigel 3 {
1479 nigel 63 int yield = 0;
1480 nigel 3
1481 ph10 836 if (length < 0)
1482     length = strlen16(p);
1483    
1484 nigel 63 while (length-- > 0)
1485 nigel 3 {
1486 ph10 836 int c = *p++ & 0xffff;
1487     #if !defined NOUTF
1488     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 nigel 63 {
1490 ph10 836 int d = *p & 0xffff;
1491     if (d >= 0xDC00 && d < 0xDFFF)
1492 nigel 63 {
1493 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494     length--;
1495     p++;
1496 nigel 63 }
1497     }
1498 nigel 79 #endif
1499 ph10 836 yield += pchar(c, f);
1500     }
1501 nigel 3
1502 ph10 836 return yield;
1503     }
1504     #endif /* SUPPORT_PCRE16 */
1505 nigel 63
1506 ph10 836
1507    
1508     #ifdef SUPPORT_PCRE8
1509     /*************************************************
1510     * Read a capture name (8-bit) and check it *
1511     *************************************************/
1512    
1513     static pcre_uint8 *
1514     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515     {
1516     pcre_uint8 *npp = *pp;
1517     while (isalnum(*p)) *npp++ = *p++;
1518     *npp++ = 0;
1519     *npp = 0;
1520     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521     {
1522     fprintf(outfile, "no parentheses with name \"");
1523     PCHARSV(*pp, 0, -1, outfile);
1524     fprintf(outfile, "\"\n");
1525 nigel 63 }
1526 nigel 3
1527 ph10 836 *pp = npp;
1528     return p;
1529 nigel 63 }
1530 ph10 836 #endif /* SUPPORT_PCRE8 */
1531 nigel 23
1532 nigel 3
1533 nigel 23
1534 ph10 836 #ifdef SUPPORT_PCRE16
1535 nigel 63 /*************************************************
1536 ph10 836 * Read a capture name (16-bit) and check it *
1537     *************************************************/
1538    
1539     /* Note that the text being read is 8-bit. */
1540    
1541     static pcre_uint8 *
1542     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543     {
1544     pcre_uint16 *npp = *pp;
1545     while (isalnum(*p)) *npp++ = *p++;
1546     *npp++ = 0;
1547     *npp = 0;
1548 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 ph10 836 {
1550     fprintf(outfile, "no parentheses with name \"");
1551     PCHARSV(*pp, 0, -1, outfile);
1552     fprintf(outfile, "\"\n");
1553     }
1554     *pp = npp;
1555     return p;
1556     }
1557     #endif /* SUPPORT_PCRE16 */
1558    
1559    
1560    
1561     /*************************************************
1562 nigel 63 * Callout function *
1563     *************************************************/
1564 nigel 3
1565 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566     the match. Yield zero unless more callouts than the fail count, or the callout
1567     data is not zero. */
1568 nigel 3
1569 nigel 63 static int callout(pcre_callout_block *cb)
1570     {
1571     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 nigel 75 int i, pre_start, post_start, subject_length;
1573 nigel 3
1574 nigel 63 if (callout_extra)
1575     {
1576     fprintf(f, "Callout %d: last capture = %d\n",
1577     cb->callout_number, cb->capture_last);
1578 nigel 3
1579 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1580     {
1581     if (cb->offset_vector[i] < 0)
1582     fprintf(f, "%2d: <unset>\n", i/2);
1583     else
1584     {
1585     fprintf(f, "%2d: ", i/2);
1586 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
1587 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588     fprintf(f, "\n");
1589     }
1590     }
1591     }
1592 nigel 3
1593 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1594     datails. On subsequent calls in the same match, we use pchars just to find the
1595     printed lengths of the substrings. */
1596 nigel 3
1597 nigel 63 if (f != NULL) fprintf(f, "--->");
1598 nigel 3
1599 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600     PCHARS(post_start, cb->subject, cb->start_match,
1601 nigel 63 cb->current_position - cb->start_match, f);
1602 nigel 3
1603 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604 nigel 75
1605 ph10 836 PCHARSV(cb->subject, cb->current_position,
1606 nigel 63 cb->subject_length - cb->current_position, f);
1607 nigel 3
1608 nigel 63 if (f != NULL) fprintf(f, "\n");
1609 nigel 9
1610 nigel 63 /* Always print appropriate indicators, with callout number if not already
1611 nigel 75 shown. For automatic callouts, show the pattern offset. */
1612 nigel 3
1613 nigel 75 if (cb->callout_number == 255)
1614     {
1615     fprintf(outfile, "%+3d ", cb->pattern_position);
1616     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617     }
1618     else
1619     {
1620     if (callout_extra) fprintf(outfile, " ");
1621     else fprintf(outfile, "%3d ", cb->callout_number);
1622     }
1623 nigel 3
1624 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625     fprintf(outfile, "^");
1626 nigel 3
1627 nigel 63 if (post_start > 0)
1628     {
1629     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630     fprintf(outfile, "^");
1631 nigel 3 }
1632    
1633 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634     fprintf(outfile, " ");
1635    
1636     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637     pbuffer + cb->pattern_position);
1638    
1639 nigel 63 fprintf(outfile, "\n");
1640     first_callout = 0;
1641 nigel 3
1642 ph10 654 if (cb->mark != last_callout_mark)
1643 ph10 645 {
1644 ph10 836 if (cb->mark == NULL)
1645     fprintf(outfile, "Latest Mark: <unset>\n");
1646     else
1647     {
1648     fprintf(outfile, "Latest Mark: ");
1649     PCHARSV(cb->mark, 0, -1, outfile);
1650     putc('\n', outfile);
1651     }
1652 ph10 654 last_callout_mark = cb->mark;
1653     }
1654 ph10 645
1655 nigel 71 if (cb->callout_data != NULL)
1656 nigel 49 {
1657 nigel 71 int callout_data = *((int *)(cb->callout_data));
1658     if (callout_data != 0)
1659     {
1660     fprintf(outfile, "Callout data = %d\n", callout_data);
1661     return callout_data;
1662     }
1663 nigel 63 }
1664 nigel 49
1665 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1666     (++callout_count >= callout_fail_count)? 1 : 0;
1667 nigel 3 }
1668    
1669    
1670 nigel 63 /*************************************************
1671 nigel 73 * Local malloc functions *
1672 nigel 63 *************************************************/
1673 nigel 3
1674 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1675 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
1676     show_malloc variable is set only during matching. */
1677 nigel 3
1678     static void *new_malloc(size_t size)
1679     {
1680 nigel 73 void *block = malloc(size);
1681 nigel 43 gotten_store = size;
1682 ph10 836 if (first_gotten_store == 0) first_gotten_store = size;
1683 nigel 73 if (show_malloc)
1684 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 nigel 73 return block;
1686 nigel 3 }
1687    
1688 nigel 73 static void new_free(void *block)
1689     {
1690     if (show_malloc)
1691     fprintf(outfile, "free %p\n", block);
1692     free(block);
1693     }
1694 nigel 3
1695 nigel 73 /* For recursion malloc/free, to test stacking calls */
1696    
1697     static void *stack_malloc(size_t size)
1698     {
1699     void *block = malloc(size);
1700     if (show_malloc)
1701 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 nigel 73 return block;
1703     }
1704    
1705     static void stack_free(void *block)
1706     {
1707     if (show_malloc)
1708     fprintf(outfile, "stack_free %p\n", block);
1709     free(block);
1710     }
1711    
1712    
1713 nigel 63 /*************************************************
1714     * Call pcre_fullinfo() *
1715     *************************************************/
1716 nigel 43
1717 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
1718     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719     value, but the code is defensive.
1720 nigel 43
1721 ph10 836 Arguments:
1722     re compiled regex
1723     study study data
1724     option PCRE_INFO_xxx option
1725     ptr where to put the data
1726    
1727     Returns: 0 when OK, < 0 on error
1728     */
1729    
1730     static int
1731     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 nigel 43 {
1733     int rc;
1734 ph10 836
1735     if (use_pcre16)
1736     #ifdef SUPPORT_PCRE16
1737 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 ph10 836 #else
1739     rc = PCRE_ERROR_BADMODE;
1740     #endif
1741     else
1742     #ifdef SUPPORT_PCRE8
1743     rc = pcre_fullinfo(re, study, option, ptr);
1744     #else
1745     rc = PCRE_ERROR_BADMODE;
1746     #endif
1747    
1748     if (rc < 0)
1749     {
1750     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751     use_pcre16? "16" : "", option);
1752     if (rc == PCRE_ERROR_BADMODE)
1753     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755     }
1756    
1757     return rc;
1758 nigel 43 }
1759    
1760    
1761    
1762 nigel 63 /*************************************************
1763 ph10 836 * Swap byte functions *
1764 nigel 75 *************************************************/
1765    
1766 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767     value, respectively.
1768    
1769     Arguments:
1770     value any number
1771    
1772     Returns: the byte swapped value
1773     */
1774    
1775     static pcre_uint32
1776     swap_uint32(pcre_uint32 value)
1777 nigel 75 {
1778     return ((value & 0x000000ff) << 24) |
1779     ((value & 0x0000ff00) << 8) |
1780     ((value & 0x00ff0000) >> 8) |
1781 ph10 836 (value >> 24);
1782 nigel 75 }
1783    
1784 ph10 836 static pcre_uint16
1785     swap_uint16(pcre_uint16 value)
1786     {
1787     return (value >> 8) | (value << 8);
1788     }
1789 nigel 75
1790    
1791    
1792     /*************************************************
1793 ph10 836 * Flip bytes in a compiled pattern *
1794     *************************************************/
1795    
1796     /* This function is called if the 'F' option was present on a pattern that is
1797     to be written to a file. We flip the bytes of all the integer fields in the
1798     regex data block and the study block. In 16-bit mode this also flips relevant
1799     bytes in the pattern itself. This is to make it possible to test PCRE's
1800     ability to reload byte-flipped patterns, e.g. those compiled on a different
1801     architecture. */
1802    
1803     static void
1804     regexflip(pcre *ere, pcre_extra *extra)
1805     {
1806 zherczeg 852 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 ph10 836 #ifdef SUPPORT_PCRE16
1808     int op;
1809     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810     int length = re->name_count * re->name_entry_size;
1811     #ifdef SUPPORT_UTF
1812     BOOL utf = (re->options & PCRE_UTF16) != 0;
1813     BOOL utf16_char = FALSE;
1814     #endif /* SUPPORT_UTF */
1815     #endif /* SUPPORT_PCRE16 */
1816    
1817     /* Always flip the bytes in the main data block and study blocks. */
1818    
1819     re->magic_number = REVERSED_MAGIC_NUMBER;
1820     re->size = swap_uint32(re->size);
1821     re->options = swap_uint32(re->options);
1822     re->flags = swap_uint16(re->flags);
1823     re->top_bracket = swap_uint16(re->top_bracket);
1824     re->top_backref = swap_uint16(re->top_backref);
1825     re->first_char = swap_uint16(re->first_char);
1826     re->req_char = swap_uint16(re->req_char);
1827     re->name_table_offset = swap_uint16(re->name_table_offset);
1828     re->name_entry_size = swap_uint16(re->name_entry_size);
1829     re->name_count = swap_uint16(re->name_count);
1830    
1831     if (extra != NULL)
1832     {
1833     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834     rsd->size = swap_uint32(rsd->size);
1835     rsd->flags = swap_uint32(rsd->flags);
1836     rsd->minlength = swap_uint32(rsd->minlength);
1837     }
1838    
1839     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840     in the name table, if present, and then in the pattern itself. */
1841    
1842     #ifdef SUPPORT_PCRE16
1843     if (!use_pcre16) return;
1844    
1845     while(TRUE)
1846     {
1847     /* Swap previous characters. */
1848     while (length-- > 0)
1849     {
1850     *ptr = swap_uint16(*ptr);
1851     ptr++;
1852     }
1853     #ifdef SUPPORT_UTF
1854     if (utf16_char)
1855     {
1856     if ((ptr[-1] & 0xfc00) == 0xd800)
1857     {
1858     /* We know that there is only one extra character in UTF-16. */
1859     *ptr = swap_uint16(*ptr);
1860     ptr++;
1861     }
1862     }
1863     utf16_char = FALSE;
1864     #endif /* SUPPORT_UTF */
1865    
1866     /* Get next opcode. */
1867    
1868     length = 0;
1869     op = *ptr;
1870     *ptr++ = swap_uint16(op);
1871    
1872     switch (op)
1873     {
1874     case OP_END:
1875     return;
1876    
1877     #ifdef SUPPORT_UTF
1878     case OP_CHAR:
1879     case OP_CHARI:
1880     case OP_NOT:
1881     case OP_NOTI:
1882     case OP_STAR:
1883     case OP_MINSTAR:
1884     case OP_PLUS:
1885     case OP_MINPLUS:
1886     case OP_QUERY:
1887     case OP_MINQUERY:
1888     case OP_UPTO:
1889     case OP_MINUPTO:
1890     case OP_EXACT:
1891     case OP_POSSTAR:
1892     case OP_POSPLUS:
1893     case OP_POSQUERY:
1894     case OP_POSUPTO:
1895     case OP_STARI:
1896     case OP_MINSTARI:
1897     case OP_PLUSI:
1898     case OP_MINPLUSI:
1899     case OP_QUERYI:
1900     case OP_MINQUERYI:
1901     case OP_UPTOI:
1902     case OP_MINUPTOI:
1903     case OP_EXACTI:
1904     case OP_POSSTARI:
1905     case OP_POSPLUSI:
1906     case OP_POSQUERYI:
1907     case OP_POSUPTOI:
1908     case OP_NOTSTAR:
1909     case OP_NOTMINSTAR:
1910     case OP_NOTPLUS:
1911     case OP_NOTMINPLUS:
1912     case OP_NOTQUERY:
1913     case OP_NOTMINQUERY:
1914     case OP_NOTUPTO:
1915     case OP_NOTMINUPTO:
1916     case OP_NOTEXACT:
1917     case OP_NOTPOSSTAR:
1918     case OP_NOTPOSPLUS:
1919     case OP_NOTPOSQUERY:
1920     case OP_NOTPOSUPTO:
1921     case OP_NOTSTARI:
1922     case OP_NOTMINSTARI:
1923     case OP_NOTPLUSI:
1924     case OP_NOTMINPLUSI:
1925     case OP_NOTQUERYI:
1926     case OP_NOTMINQUERYI:
1927     case OP_NOTUPTOI:
1928     case OP_NOTMINUPTOI:
1929     case OP_NOTEXACTI:
1930     case OP_NOTPOSSTARI:
1931     case OP_NOTPOSPLUSI:
1932     case OP_NOTPOSQUERYI:
1933     case OP_NOTPOSUPTOI:
1934     if (utf) utf16_char = TRUE;
1935     #endif
1936     /* Fall through. */
1937    
1938     default:
1939     length = OP_lengths16[op] - 1;
1940     break;
1941    
1942     case OP_CLASS:
1943     case OP_NCLASS:
1944     /* Skip the character bit map. */
1945     ptr += 32/sizeof(pcre_uint16);
1946     length = 0;
1947     break;
1948    
1949     case OP_XCLASS:
1950 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951     if (LINK_SIZE > 1)
1952     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953     - (1 + LINK_SIZE + 1));
1954     else
1955     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956    
1957 ph10 836 /* Reverse the size of the XCLASS instance. */
1958 zherczeg 839 *ptr = swap_uint16(*ptr);
1959 ph10 836 ptr++;
1960     if (LINK_SIZE > 1)
1961     {
1962 zherczeg 839 *ptr = swap_uint16(*ptr);
1963 ph10 836 ptr++;
1964     }
1965    
1966     op = *ptr;
1967     *ptr = swap_uint16(op);
1968 zherczeg 839 ptr++;
1969 ph10 836 if ((op & XCL_MAP) != 0)
1970     {
1971     /* Skip the character bit map. */
1972     ptr += 32/sizeof(pcre_uint16);
1973     length -= 32/sizeof(pcre_uint16);
1974     }
1975     break;
1976     }
1977     }
1978     /* Control should never reach here in 16 bit mode. */
1979     #endif /* SUPPORT_PCRE16 */
1980     }
1981    
1982    
1983    
1984     /*************************************************
1985 nigel 87 * Check match or recursion limit *
1986     *************************************************/
1987    
1988     static int
1989 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991     int flag, unsigned long int *limit, int errnumber, const char *msg)
1992     {
1993     int count;
1994     int min = 0;
1995     int mid = 64;
1996     int max = -1;
1997    
1998     extra->flags |= flag;
1999    
2000     for (;;)
2001     {
2002     *limit = mid;
2003    
2004 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 nigel 87 use_offsets, use_size_offsets);
2006    
2007     if (count == errnumber)
2008     {
2009     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010     min = mid;
2011     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012     }
2013    
2014     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015     count == PCRE_ERROR_PARTIAL)
2016     {
2017     if (mid == min + 1)
2018     {
2019     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020     break;
2021     }
2022     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023     max = mid;
2024     mid = (min + mid)/2;
2025     }
2026     else break; /* Some other error */
2027     }
2028    
2029     extra->flags &= ~flag;
2030     return count;
2031     }
2032    
2033    
2034    
2035     /*************************************************
2036 ph10 227 * Case-independent strncmp() function *
2037     *************************************************/
2038    
2039     /*
2040     Arguments:
2041     s first string
2042     t second string
2043     n number of characters to compare
2044    
2045     Returns: < 0, = 0, or > 0, according to the comparison
2046     */
2047    
2048     static int
2049 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 ph10 227 {
2051     while (n--)
2052     {
2053     int c = tolower(*s++) - tolower(*t++);
2054     if (c) return c;
2055     }
2056     return 0;
2057     }
2058    
2059    
2060    
2061     /*************************************************
2062 nigel 91 * Check newline indicator *
2063     *************************************************/
2064    
2065 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066     a message and return 0 if there is no match.
2067 nigel 91
2068     Arguments:
2069     p points after the leading '<'
2070     f file for error message
2071    
2072     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073     */
2074    
2075     static int
2076 ph10 836 check_newline(pcre_uint8 *p, FILE *f)
2077 nigel 91 {
2078 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
2086     return 0;
2087     }
2088    
2089    
2090    
2091     /*************************************************
2092 nigel 93 * Usage function *
2093     *************************************************/
2094    
2095     static void
2096     usage(void)
2097     {
2098 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099     printf("Input and output default to stdin and stdout.\n");
2100     #ifdef SUPPORT_LIBREADLINE
2101     printf("If input is a terminal, readline() is used to read from it.\n");
2102     #else
2103     printf("This version of pcretest is not linked with readline().\n");
2104     #endif
2105     printf("\nOptions:\n");
2106 ph10 836 #ifdef SUPPORT_PCRE16
2107 ph10 862 printf(" -16 use the 16-bit library\n");
2108 ph10 836 #endif
2109 ph10 862 printf(" -b show compiled code\n");
2110 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2111 ph10 836 printf(" -C arg show a specific compile-time option\n");
2112     printf(" and exit with its value. The arg can be:\n");
2113     printf(" linksize internal link size [2, 3, 4]\n");
2114     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116     printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117     printf(" ucp Unicode Properties supported [0, 1]\n");
2118     printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 zherczeg 839 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121     #if !defined NODFA
2122     printf(" -dfa force DFA matching for all subjects\n");
2123     #endif
2124     printf(" -help show usage information\n");
2125     printf(" -i show information about compiled patterns\n"
2126 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2127 nigel 93 " -m output memory used information\n"
2128     " -o <n> set size of offsets vector to <n>\n");
2129     #if !defined NOPOSIX
2130     printf(" -p use POSIX interface\n");
2131     #endif
2132     printf(" -q quiet: do not output PCRE version number at start\n");
2133     printf(" -S <n> set stack size to <n> megabytes\n");
2134 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2135     " -s+ force each pattern to be studied, using JIT if available\n"
2136 nigel 93 " -t time compilation and execution\n");
2137     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138     printf(" -tm time execution (matching) only\n");
2139     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140     }
2141    
2142    
2143    
2144     /*************************************************
2145 nigel 63 * Main Program *
2146     *************************************************/
2147 nigel 43
2148 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2149     consist of a regular expression, in delimiters and optionally followed by
2150     options, followed by a set of test data, terminated by an empty line. */
2151    
2152     int main(int argc, char **argv)
2153     {
2154     FILE *infile = stdin;
2155 ph10 836 const char *version;
2156 nigel 3 int options = 0;
2157     int study_options = 0;
2158 ph10 386 int default_find_match_limit = FALSE;
2159 nigel 3 int op = 1;
2160     int timeit = 0;
2161 nigel 93 int timeitm = 0;
2162 nigel 3 int showinfo = 0;
2163 nigel 31 int showstore = 0;
2164 ph10 667 int force_study = -1;
2165     int force_study_options = 0;
2166 nigel 87 int quiet = 0;
2167 nigel 53 int size_offsets = 45;
2168     int size_offsets_max;
2169 nigel 77 int *offsets = NULL;
2170 nigel 53 #if !defined NOPOSIX
2171 nigel 3 int posix = 0;
2172 nigel 53 #endif
2173 nigel 3 int debug = 0;
2174 nigel 11 int done = 0;
2175 nigel 77 int all_use_dfa = 0;
2176     int yield = 0;
2177 nigel 91 int stack_size;
2178 nigel 3
2179 ph10 667 pcre_jit_stack *jit_stack = NULL;
2180    
2181 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
2182     substring names, each list itself being terminated by an empty name. Assume
2183     that 1024 is plenty long enough for the few names we'll be testing. It is
2184     easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185     for the actual memory, to ensure alignment. By defining these variables always
2186     (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2187     #ifdefs in the code. */
2188 ph10 667
2189 ph10 836 pcre_uint16 copynames[1024];
2190     pcre_uint16 getnames[1024];
2191 nigel 69
2192 ph10 836 pcre_uint16 *cn16ptr;
2193     pcre_uint16 *gn16ptr;
2194 nigel 91
2195 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2196     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2197     pcre_uint8 *cn8ptr;
2198     pcre_uint8 *gn8ptr;
2199 nigel 91
2200 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
2201     debugging. They grow automatically when very long lines are read. The 16-bit
2202     buffer (buffer16) is obtained only if needed. */
2203 nigel 69
2204 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
2205     dbuffer = (pcre_uint8 *)malloc(buffer_size);
2206     pbuffer = (pcre_uint8 *)malloc(buffer_size);
2207 nigel 69
2208 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
2209 nigel 3
2210 nigel 93 outfile = stdout;
2211    
2212     /* The following _setmode() stuff is some Windows magic that tells its runtime
2213     library to translate CRLF into a single LF character. At least, that's what
2214     I've been told: never having used Windows I take this all on trust. Originally
2215     it set 0x8000, but then I was advised that _O_BINARY was better. */
2216    
2217 nigel 75 #if defined(_WIN32) || defined(WIN32)
2218 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
2219     #endif
2220 nigel 75
2221 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
2222     same answer. We just need to ensure that we call one that is available. */
2223    
2224     #ifdef SUPPORT_PCRE8
2225     version = pcre_version();
2226     #else
2227     version = pcre16_version();
2228     #endif
2229    
2230 nigel 3 /* Scan options */
2231    
2232     while (argc > 1 && argv[op][0] == '-')
2233     {
2234 ph10 836 pcre_uint8 *endptr;
2235 nigel 53
2236 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2237 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2238 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
2239 ph10 667 {
2240     force_study = 1;
2241     force_study_options = PCRE_STUDY_JIT_COMPILE;
2242 ph10 691 }
2243 ph10 836 else if (strcmp(argv[op], "-16") == 0)
2244     {
2245     #ifdef SUPPORT_PCRE16
2246     use_pcre16 = 1;
2247     #else
2248     printf("** This version of PCRE was built without 16-bit support\n");
2249     exit(1);
2250     #endif
2251     }
2252 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2253 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2254 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2255     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2256 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2257 nigel 79 #if !defined NODFA
2258 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2259 nigel 79 #endif
2260 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2261 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2262 nigel 65 *endptr == 0))
2263 nigel 53 {
2264     op++;
2265     argc--;
2266     }
2267 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2268     {
2269     int both = argv[op][2] == 0;
2270     int temp;
2271 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2272 nigel 93 *endptr == 0))
2273     {
2274     timeitm = temp;
2275     op++;
2276     argc--;
2277     }
2278     else timeitm = LOOPREPEAT;
2279     if (both) timeit = timeitm;
2280     }
2281 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2282 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2283 nigel 91 *endptr == 0))
2284     {
2285 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2286 nigel 91 printf("PCRE: -S not supported on this OS\n");
2287     exit(1);
2288     #else
2289     int rc;
2290     struct rlimit rlim;
2291     getrlimit(RLIMIT_STACK, &rlim);
2292     rlim.rlim_cur = stack_size * 1024 * 1024;
2293     rc = setrlimit(RLIMIT_STACK, &rlim);
2294     if (rc != 0)
2295     {
2296     printf("PCRE: setrlimit() failed with error %d\n", rc);
2297     exit(1);
2298     }
2299     op++;
2300     argc--;
2301     #endif
2302     }
2303 nigel 53 #if !defined NOPOSIX
2304 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2305 nigel 53 #endif
2306 nigel 63 else if (strcmp(argv[op], "-C") == 0)
2307     {
2308     int rc;
2309 ph10 392 unsigned long int lrc;
2310 ph10 836
2311     if (argc > 2)
2312     {
2313     if (strcmp(argv[op + 1], "linksize") == 0)
2314     {
2315     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2316     printf("%d\n", rc);
2317     yield = rc;
2318     goto EXIT;
2319     }
2320     if (strcmp(argv[op + 1], "pcre8") == 0)
2321     {
2322     #ifdef SUPPORT_PCRE8
2323     printf("1\n");
2324     yield = 1;
2325     #else
2326     printf("0\n");
2327     yield = 0;
2328     #endif
2329     goto EXIT;
2330     }
2331     if (strcmp(argv[op + 1], "pcre16") == 0)
2332     {
2333     #ifdef SUPPORT_PCRE16
2334     printf("1\n");
2335     yield = 1;
2336     #else
2337     printf("0\n");
2338     yield = 0;
2339     #endif
2340     goto EXIT;
2341     }
2342     if (strcmp(argv[op + 1], "utf") == 0)
2343     {
2344     #ifdef SUPPORT_PCRE8
2345     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2346     printf("%d\n", rc);
2347     yield = rc;
2348     #else
2349     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2350     printf("%d\n", rc);
2351     yield = rc;
2352     #endif
2353     goto EXIT;
2354     }
2355     if (strcmp(argv[op + 1], "ucp") == 0)
2356     {
2357     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2358     printf("%d\n", rc);
2359     yield = rc;
2360     goto EXIT;
2361     }
2362     if (strcmp(argv[op + 1], "jit") == 0)
2363     {
2364     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2365     printf("%d\n", rc);
2366     yield = rc;
2367     goto EXIT;
2368     }
2369 ph10 838 if (strcmp(argv[op + 1], "newline") == 0)
2370 ph10 842 {
2371 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2372     /* Note that these values are always the ASCII values, even
2373     in EBCDIC environments. CR is 13 and NL is 10. */
2374     printf("%s\n", (rc == 13)? "CR" :
2375     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2376     (rc == -2)? "ANYCRLF" :
2377     (rc == -1)? "ANY" : "???");
2378     goto EXIT;
2379 ph10 842 }
2380 ph10 838 printf("Unknown -C option: %s\n", argv[op + 1]);
2381 ph10 836 goto EXIT;
2382     }
2383    
2384     printf("PCRE version %s\n", version);
2385 nigel 63 printf("Compiled with\n");
2386 ph10 836
2387     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2388     are set, either both UTFs are supported or both are not supported. */
2389    
2390     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2391     printf(" 8-bit and 16-bit support\n");
2392 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2393 ph10 836 if (rc)
2394     printf(" UTF-8 and UTF-16 support\n");
2395     else
2396     printf(" No UTF-8 or UTF-16 support\n");
2397     #elif defined SUPPORT_PCRE8
2398     printf(" 8-bit support only\n");
2399     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2400 nigel 63 printf(" %sUTF-8 support\n", rc? "" : "No ");
2401 ph10 836 #else
2402     printf(" 16-bit support only\n");
2403     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2404     printf(" %sUTF-16 support\n", rc? "" : "No ");
2405     #endif
2406    
2407     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2408 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
2409 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2410 ph10 674 if (rc)
2411 ph10 689 printf(" Just-in-time compiler support\n");
2412 ph10 674 else
2413     printf(" No just-in-time compiler support\n");
2414 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2415 ph10 391 /* Note that these values are always the ASCII values, even
2416 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
2417 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2418     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2419 ph10 150 (rc == -2)? "ANYCRLF" :
2420 nigel 93 (rc == -1)? "ANY" : "???");
2421 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2422 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2423     "all Unicode newlines");
2424 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2425 nigel 63 printf(" Internal link size = %d\n", rc);
2426 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2427 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
2428 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2429 ph10 376 printf(" Default match limit = %ld\n", lrc);
2430 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2431 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
2432 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2433 nigel 73 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2434 ph10 121 goto EXIT;
2435 nigel 63 }
2436 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
2437     strcmp(argv[op], "--help") == 0)
2438     {
2439     usage();
2440     goto EXIT;
2441     }
2442 nigel 3 else
2443     {
2444 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
2445 nigel 93 usage();
2446 nigel 77 yield = 1;
2447     goto EXIT;
2448 nigel 3 }
2449     op++;
2450     argc--;
2451     }
2452    
2453 nigel 53 /* Get the store for the offsets vector, and remember what it was */
2454    
2455     size_offsets_max = size_offsets;
2456 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2457 nigel 53 if (offsets == NULL)
2458     {
2459     printf("** Failed to get %d bytes of memory for offsets vector\n",
2460 ph10 151 (int)(size_offsets_max * sizeof(int)));
2461 nigel 77 yield = 1;
2462     goto EXIT;
2463 nigel 53 }
2464    
2465 nigel 3 /* Sort out the input and output files */
2466    
2467     if (argc > 1)
2468     {
2469 nigel 93 infile = fopen(argv[op], INPUT_MODE);
2470 nigel 3 if (infile == NULL)
2471     {
2472     printf("** Failed to open %s\n", argv[op]);
2473 nigel 77 yield = 1;
2474     goto EXIT;
2475 nigel 3 }
2476     }
2477    
2478     if (argc > 2)
2479     {
2480 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
2481 nigel 3 if (outfile == NULL)
2482     {
2483     printf("** Failed to open %s\n", argv[op+1]);
2484 nigel 77 yield = 1;
2485     goto EXIT;
2486 nigel 3 }
2487     }
2488    
2489     /* Set alternative malloc function */
2490    
2491 ph10 836 #ifdef SUPPORT_PCRE8
2492 nigel 3 pcre_malloc = new_malloc;
2493 nigel 73 pcre_free = new_free;
2494     pcre_stack_malloc = stack_malloc;
2495     pcre_stack_free = stack_free;
2496 ph10 836 #endif
2497 nigel 3
2498 ph10 836 #ifdef SUPPORT_PCRE16
2499     pcre16_malloc = new_malloc;
2500     pcre16_free = new_free;
2501     pcre16_stack_malloc = stack_malloc;
2502     pcre16_stack_free = stack_free;
2503     #endif
2504    
2505 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
2506 nigel 3
2507 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2508 nigel 3
2509     /* Main loop */
2510    
2511 nigel 11 while (!done)
2512 nigel 3 {
2513     pcre *re = NULL;
2514     pcre_extra *extra = NULL;
2515 nigel 37
2516     #if !defined NOPOSIX /* There are still compilers that require no indent */
2517 nigel 3 regex_t preg;
2518 nigel 45 int do_posix = 0;
2519 nigel 37 #endif
2520    
2521 nigel 7 const char *error;
2522 ph10 836 pcre_uint8 *markptr;
2523     pcre_uint8 *p, *pp, *ppp;
2524     pcre_uint8 *to_file = NULL;
2525     const pcre_uint8 *tables = NULL;
2526 zherczeg 847 unsigned long int get_options;
2527 nigel 75 unsigned long int true_size, true_study_size = 0;
2528     size_t size, regex_gotten_store;
2529 ph10 654 int do_allcaps = 0;
2530 ph10 512 int do_mark = 0;
2531 nigel 3 int do_study = 0;
2532 ph10 654 int no_force_study = 0;
2533 nigel 25 int do_debug = debug;
2534 nigel 35 int do_G = 0;
2535     int do_g = 0;
2536 nigel 25 int do_showinfo = showinfo;
2537 nigel 35 int do_showrest = 0;
2538 ph10 616 int do_showcaprest = 0;
2539 nigel 75 int do_flip = 0;
2540 nigel 93 int erroroffset, len, delimiter, poffset;
2541 nigel 3
2542 ph10 836 use_utf = 0;
2543 ph10 211 debug_lengths = 1;
2544 nigel 63
2545 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2546 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2547 nigel 63 fflush(outfile);
2548 nigel 3
2549     p = buffer;
2550     while (isspace(*p)) p++;
2551     if (*p == 0) continue;
2552    
2553 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
2554 nigel 3
2555 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2556     {
2557 zherczeg 841 pcre_uint32 magic;
2558 ph10 836 pcre_uint8 sbuf[8];
2559 nigel 75 FILE *f;
2560    
2561     p++;
2562 zherczeg 839 if (*p == '!')
2563     {
2564     do_debug = TRUE;
2565     do_showinfo = TRUE;
2566     p++;
2567     }
2568    
2569 nigel 75 pp = p + (int)strlen((char *)p);
2570     while (isspace(pp[-1])) pp--;
2571     *pp = 0;
2572    
2573     f = fopen((char *)p, "rb");
2574     if (f == NULL)
2575     {
2576     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2577     continue;
2578     }
2579    
2580 zherczeg 839 first_gotten_store = 0;
2581 nigel 75 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2582    
2583     true_size =
2584     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2585     true_study_size =
2586     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2587    
2588 zherczeg 852 re = (pcre *)new_malloc(true_size);
2589 ph10 836 regex_gotten_store = first_gotten_store;
2590 nigel 75
2591     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2592    
2593 zherczeg 852 magic = ((REAL_PCRE *)re)->magic_number;
2594 nigel 75 if (magic != MAGIC_NUMBER)
2595     {
2596 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
2597 nigel 75 {
2598     do_flip = 1;
2599     }
2600     else
2601     {
2602     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2603     fclose(f);
2604     continue;
2605     }
2606     }
2607    
2608 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
2609 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2610 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2611 nigel 75
2612 ph10 612 /* Now see if there is any following study data. */
2613 nigel 75
2614     if (true_study_size != 0)
2615     {
2616     pcre_study_data *psd;
2617    
2618     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2619     extra->flags = PCRE_EXTRA_STUDY_DATA;
2620    
2621     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2622     extra->study_data = psd;
2623    
2624     if (fread(psd, 1, true_study_size, f) != true_study_size)
2625     {
2626     FAIL_READ:
2627     fprintf(outfile, "Failed to read data from %s\n", p);
2628 ph10 836 if (extra != NULL)
2629     {
2630     PCRE_FREE_STUDY(extra);
2631     }
2632 nigel 75 if (re != NULL) new_free(re);
2633     fclose(f);
2634     continue;
2635     }
2636     fprintf(outfile, "Study data loaded from %s\n", p);
2637     do_study = 1; /* To get the data output if requested */
2638     }
2639     else fprintf(outfile, "No study data\n");
2640    
2641 ph10 836 /* Flip the necessary bytes. */
2642     if (do_flip)
2643     {
2644 zherczeg 839 int rc;
2645     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2646     if (rc == PCRE_ERROR_BADMODE)
2647     {
2648     /* Simulate the result of the function call below. */
2649     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2650     use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2651     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2652     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2653     continue;
2654     }
2655 ph10 836 }
2656    
2657     /* Need to know if UTF-8 for printing data strings. */
2658    
2659     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2660     use_utf = (get_options & PCRE_UTF8) != 0;
2661    
2662 nigel 75 fclose(f);
2663     goto SHOW_INFO;
2664     }
2665    
2666     /* In-line pattern (the usual case). Get the delimiter and seek the end of
2667 ph10 836 the pattern; if it isn't complete, read more. */
2668 nigel 75
2669 nigel 3 delimiter = *p++;
2670    
2671 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
2672 nigel 3 {
2673 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2674 nigel 3 goto SKIP_DATA;
2675     }
2676    
2677     pp = p;
2678 ph10 530 poffset = (int)(p - buffer);
2679 nigel 3
2680     for(;;)
2681     {
2682 nigel 29 while (*pp != 0)
2683     {
2684     if (*pp == '\\' && pp[1] != 0) pp++;
2685     else if (*pp == delimiter) break;
2686     pp++;
2687     }
2688 nigel 3 if (*pp != 0) break;
2689 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2690 nigel 3 {
2691     fprintf(outfile, "** Unexpected EOF\n");
2692 nigel 11 done = 1;
2693     goto CONTINUE;
2694 nigel 3 }
2695 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2696 nigel 3 }
2697    
2698 nigel 93 /* The buffer may have moved while being extended; reset the start of data
2699     pointer to the correct relative point in the buffer. */
2700    
2701     p = buffer + poffset;
2702    
2703 nigel 29 /* If the first character after the delimiter is backslash, make
2704     the pattern end with backslash. This is purely to provide a way
2705     of testing for the error message when a pattern ends with backslash. */
2706    
2707     if (pp[1] == '\\') *pp++ = '\\';
2708    
2709 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2710     for callouts. */
2711 nigel 3
2712     *pp++ = 0;
2713 nigel 75 strcpy((char *)pbuffer, (char *)p);
2714 nigel 3
2715     /* Look for options after final delimiter */
2716    
2717     options = 0;
2718 ph10 836 study_options = 0;
2719 nigel 31 log_store = showstore; /* default from command line */
2720    
2721 nigel 3 while (*pp != 0)
2722     {
2723     switch (*pp++)
2724     {
2725 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
2726 nigel 35 case 'g': do_g = 1; break;
2727 nigel 3 case 'i': options |= PCRE_CASELESS; break;
2728     case 'm': options |= PCRE_MULTILINE; break;
2729     case 's': options |= PCRE_DOTALL; break;
2730     case 'x': options |= PCRE_EXTENDED; break;
2731 nigel 25
2732 ph10 616 case '+':
2733 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2734 ph10 616 break;
2735 ph10 654
2736     case '=': do_allcaps = 1; break;
2737 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
2738 nigel 93 case 'B': do_debug = 1; break;
2739 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2740 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
2741 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2742 nigel 75 case 'F': do_flip = 1; break;
2743 nigel 35 case 'G': do_G = 1; break;
2744 nigel 25 case 'I': do_showinfo = 1; break;
2745 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
2746 ph10 512 case 'K': do_mark = 1; break;
2747 nigel 31 case 'M': log_store = 1; break;
2748 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2749 nigel 37
2750     #if !defined NOPOSIX
2751 nigel 3 case 'P': do_posix = 1; break;
2752 nigel 37 #endif
2753    
2754 ph10 654 case 'S':
2755 ph10 691 if (do_study == 0)
2756 ph10 612 {
2757 ph10 691 do_study = 1;
2758 ph10 667 if (*pp == '+')
2759     {
2760     study_options |= PCRE_STUDY_JIT_COMPILE;
2761 ph10 691 pp++;
2762     }
2763     }
2764 ph10 667 else
2765     {
2766 ph10 612 do_study = 0;
2767     no_force_study = 1;
2768 ph10 654 }
2769 ph10 612 break;
2770    
2771 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2772 ph10 535 case 'W': options |= PCRE_UCP; break;
2773 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2774 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2775 ph10 126 case 'Z': debug_lengths = 0; break;
2776 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2777 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2778 ph10 545
2779 ph10 541 case 'T':
2780     switch (*pp++)
2781     {
2782     case '0': tables = tables0; break;
2783     case '1': tables = tables1; break;
2784 ph10 545
2785 ph10 541 case '\r':
2786     case '\n':
2787 ph10 545 case ' ':
2788     case 0:
2789 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2790 ph10 545 goto SKIP_DATA;
2791    
2792     default:
2793 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2794 ph10 545 goto SKIP_DATA;
2795 ph10 541 }
2796 ph10 545 break;
2797 nigel 25
2798     case 'L':
2799     ppp = pp;
2800 nigel 93 /* The '\r' test here is so that it works on Windows. */
2801     /* The '0' test is just in case this is an unterminated line. */
2802     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2803 nigel 25 *ppp = 0;
2804     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2805     {
2806     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2807     goto SKIP_DATA;
2808     }
2809 nigel 93 locale_set = 1;
2810 ph10 836 tables = PCRE_MAKETABLES;
2811 nigel 25 pp = ppp;
2812     break;
2813    
2814 nigel 75 case '>':
2815     to_file = pp;
2816     while (*pp != 0) pp++;
2817     while (isspace(pp[-1])) pp--;
2818     *pp = 0;
2819     break;
2820    
2821 nigel 91 case '<':
2822     {
2823 ph10 836 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2824 ph10 336 {
2825     options |= PCRE_JAVASCRIPT_COMPAT;
2826 ph10 345 pp += 3;
2827 ph10 336 }
2828     else
2829 ph10 345 {
2830 ph10 336 int x = check_newline(pp, outfile);
2831     if (x == 0) goto SKIP_DATA;
2832     options |= x;
2833     while (*pp++ != '>');
2834 ph10 345 }
2835 nigel 91 }
2836     break;
2837    
2838 nigel 77 case '\r': /* So that it works in Windows */
2839     case '\n':
2840     case ' ':
2841     break;
2842 nigel 75
2843 nigel 3 default:
2844     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2845     goto SKIP_DATA;
2846     }
2847     }
2848    
2849 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2850 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2851 ph10 836 local character tables. Neither does it have 16-bit support. */
2852 nigel 3
2853 nigel 37 #if !defined NOPOSIX
2854 nigel 3 if (posix || do_posix)
2855     {
2856     int rc;
2857     int cflags = 0;
2858 nigel 75
2859 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2860     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2861 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2862 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2863     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2864 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2865 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2866 nigel 87
2867 ph10 836 first_gotten_store = 0;
2868 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2869    
2870     /* Compilation failed; go back for another re, skipping to blank line
2871     if non-interactive. */
2872    
2873     if (rc != 0)
2874     {
2875 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2876 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2877     goto SKIP_DATA;
2878     }
2879     }
2880    
2881     /* Handle compiling via the native interface */
2882    
2883     else
2884 nigel 37 #endif /* !defined NOPOSIX */
2885    
2886 nigel 3 {
2887 ph10 836 /* In 16-bit mode, convert the input. */
2888    
2889     #ifdef SUPPORT_PCRE16
2890     if (use_pcre16)
2891     {
2892     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2893     {
2894     case -1:
2895     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2896     "converted to UTF-16\n");
2897     goto SKIP_DATA;
2898    
2899     case -2:
2900     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2901     "cannot be converted to UTF-16\n");
2902     goto SKIP_DATA;
2903 ph10 842
2904 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2905     fprintf(outfile, "**Failed: character value greater than 0xffff "
2906     "cannot be converted to 16-bit in non-UTF mode\n");
2907 ph10 842 goto SKIP_DATA;
2908 ph10 836
2909     default:
2910     break;
2911     }
2912     p = (pcre_uint8 *)buffer16;
2913     }
2914     #endif
2915    
2916     /* Compile many times when timing */
2917    
2918 nigel 93 if (timeit > 0)
2919 nigel 3 {
2920     register int i;
2921     clock_t time_taken;
2922     clock_t start_time = clock();
2923 nigel 93 for (i = 0; i < timeit; i++)
2924 nigel 3 {
2925 ph10 836 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2926 nigel 3 if (re != NULL) free(re);
2927     }
2928     time_taken = clock() - start_time;
2929 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2930     (((double)time_taken * 1000.0) / (double)timeit) /
2931 nigel 63 (double)CLOCKS_PER_SEC);
2932 nigel 3 }
2933    
2934 ph10 836 first_gotten_store = 0;
2935     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2936 nigel 3
2937     /* Compilation failed; go back for another re, skipping to blank line
2938     if non-interactive. */
2939    
2940     if (re == NULL)
2941     {
2942     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2943     SKIP_DATA:
2944     if (infile != stdin)
2945     {
2946     for (;;)
2947     {
2948 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2949 nigel 11 {
2950     done = 1;
2951     goto CONTINUE;
2952     }
2953 nigel 3 len = (int)strlen((char *)buffer);
2954     while (len > 0 && isspace(buffer[len-1])) len--;
2955     if (len == 0) break;
2956     }
2957     fprintf(outfile, "\n");
2958     }
2959 nigel 25 goto CONTINUE;
2960 nigel 3 }
2961 ph10 416
2962     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2963     within the regex; check for this so that we know how to process the data
2964 ph10 412 lines. */
2965 ph10 416
2966 ph10 836 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2967     goto SKIP_DATA;
2968     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2969 nigel 3
2970 ph10 836 /* Extract the size for possible writing before possibly flipping it,
2971     and remember the store that was got. */
2972 nigel 3
2973 zherczeg 852 true_size = ((REAL_PCRE *)re)->size;
2974 ph10 836 regex_gotten_store = first_gotten_store;
2975    
2976     /* Output code size information if requested */
2977    
2978 nigel 63 if (log_store)
2979     fprintf(outfile, "Memory allocation (code space): %d\n",
2980 ph10 836 (int)(first_gotten_store -
2981 zherczeg 852 sizeof(REAL_PCRE) -
2982     ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2983 nigel 63
2984 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2985 ph10 654 help with the matching, unless the pattern has the SS option, which
2986 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2987     never sensible). */
2988 nigel 75
2989 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
2990 nigel 75 {
2991 nigel 93 if (timeit > 0)
2992 nigel 75 {
2993     register int i;
2994     clock_t time_taken;
2995     clock_t start_time = clock();
2996 nigel 93 for (i = 0; i < timeit; i++)
2997 ph10 836 {
2998     PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2999     }
3000 nigel 75 time_taken = clock() - start_time;
3001 ph10 836 if (extra != NULL)
3002     {
3003     PCRE_FREE_STUDY(extra);
3004     }
3005 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
3006     (((double)time_taken * 1000.0) / (double)timeit) /
3007 nigel 75 (double)CLOCKS_PER_SEC);
3008     }
3009 ph10 836 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3010 nigel 75 if (error != NULL)
3011     fprintf(outfile, "Failed to study: %s\n", error);
3012     else if (extra != NULL)
3013 ph10 836 {
3014 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3015 ph10 836 if (log_store)
3016     {
3017     size_t jitsize;
3018     if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3019     jitsize != 0)
3020     fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3021     }
3022     }
3023 nigel 75 }
3024 ph10 788
3025 ph10 510 /* If /K was present, we set up for handling MARK data. */
3026 ph10 512
3027 ph10 510 if (do_mark)
3028     {
3029     if (extra == NULL)
3030     {
3031     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3032     extra->flags = 0;
3033     }
3034 ph10 512 extra->mark = &markptr;
3035 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
3036 ph10 512 }
3037 nigel 75
3038 ph10 836 /* Extract and display information from the compiled data if required. */
3039 nigel 75
3040     SHOW_INFO:
3041    
3042 nigel 93 if (do_debug)
3043     {
3044     fprintf(outfile, "------------------------------------------------------------------\n");
3045 ph10 836 PCRE_PRINTINT(re, outfile, debug_lengths);
3046 nigel 93 }
3047 ph10 416
3048 ph10 412 /* We already have the options in get_options (see above) */
3049 nigel 93
3050 nigel 25 if (do_showinfo)
3051 nigel 3 {
3052 ph10 412 unsigned long int all_options;
3053 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3054 ph10 227 hascrorlf;
3055 nigel 63 int nameentrysize, namecount;
3056 ph10 836 const pcre_uint8 *nametable;
3057 nigel 3
3058 ph10 836 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3059     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3060     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3061     new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3062     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3063     new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3064     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3065     new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3066     new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3067     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3068     new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3069     != 0)
3070     goto SKIP_DATA;
3071 nigel 43
3072 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
3073 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3074 nigel 77 (int)size, (int)regex_gotten_store);
3075 nigel 43
3076     fprintf(outfile, "Capturing subpattern count = %d\n", count);
3077     if (backrefmax > 0)
3078     fprintf(outfile, "Max back reference = %d\n", backrefmax);
3079 nigel 63
3080     if (namecount > 0)
3081     {
3082     fprintf(outfile, "Named capturing subpatterns:\n");
3083     while (namecount-- > 0)
3084     {
3085 ph10 836 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3086     int imm2_size = use_pcre16 ? 1 : 2;
3087     #else
3088     int imm2_size = IMM2_SIZE;
3089     #endif
3090     int length = (int)STRLEN(nametable + imm2_size);
3091     fprintf(outfile, " ");
3092     PCHARSV(nametable, imm2_size, length, outfile);
3093     while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3094     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3095     fprintf(outfile, "%3d\n", use_pcre16?
3096     (int)(((PCRE_SPTR16)nametable)[0])
3097     :((int)nametable[0] << 8) | (int)nametable[1]);
3098     nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3099     #else
3100     fprintf(outfile, "%3d\n", GET2(nametable, 0));
3101     #ifdef SUPPORT_PCRE8
3102 nigel 63 nametable += nameentrysize;
3103 ph10 836 #else
3104     nametable += nameentrysize * 2;
3105     #endif
3106     #endif
3107 nigel 63 }
3108     }
3109 ph10 172
3110 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3111 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3112 nigel 63
3113 zherczeg 852 all_options = ((REAL_PCRE *)re)->options;
3114 ph10 836 if (do_flip) all_options = swap_uint32(all_options);
3115 nigel 75
3116 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
3117 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3118 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3119     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3120     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3121     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3122 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3123 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3124 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3125     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3126 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3127     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3128     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3129 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3130 ph10 836 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3131 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3132 ph10 836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3133 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3134 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3135 ph10 172
3136 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3137 nigel 43
3138 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
3139 nigel 91 {
3140     case PCRE_NEWLINE_CR:
3141     fprintf(outfile, "Forced newline sequence: CR\n");
3142     break;
3143 nigel 43
3144 nigel 91 case PCRE_NEWLINE_LF:
3145     fprintf(outfile, "Forced newline sequence: LF\n");
3146     break;
3147    
3148     case PCRE_NEWLINE_CRLF:
3149     fprintf(outfile, "Forced newline sequence: CRLF\n");
3150     break;
3151    
3152 ph10 149 case PCRE_NEWLINE_ANYCRLF:
3153     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3154     break;
3155    
3156 nigel 93 case PCRE_NEWLINE_ANY:
3157     fprintf(outfile, "Forced newline sequence: ANY\n");
3158     break;
3159    
3160 nigel 91 default:
3161     break;
3162     }
3163    
3164 nigel 43 if (first_char == -1)
3165     {
3166 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
3167 nigel 43 }
3168     else if (first_char < 0)
3169     {
3170     fprintf(outfile, "No first char\n");
3171     }
3172     else
3173     {
3174 ph10 836 const char *caseless =
3175 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3176 nigel 63 "" : " (caseless)";
3177 ph10 836
3178     if (PRINTOK(first_char))
3179     fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3180 nigel 3 else
3181 ph10 836 {
3182     fprintf(outfile, "First char = ");
3183     pchar(first_char, outfile);
3184     fprintf(outfile, "%s\n", caseless);
3185     }
3186 nigel 43 }
3187 nigel 37
3188 nigel 43 if (need_char < 0)
3189     {
3190     fprintf(outfile, "No need char\n");
3191 nigel 3 }
3192 nigel 43 else
3193     {
3194 ph10 836 const char *caseless =
3195 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3196 nigel 63 "" : " (caseless)";
3197 ph10 836
3198     if (PRINTOK(need_char))
3199     fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3200 nigel 43 else
3201 ph10 836 {
3202     fprintf(outfile, "Need char = ");
3203     pchar(need_char, outfile);
3204     fprintf(outfile, "%s\n", caseless);
3205     }
3206 nigel 43 }
3207 nigel 75
3208     /* Don't output study size; at present it is in any case a fixed
3209     value, but it varies, depending on the computer architecture, and
3210     so messes up the test suite. (And with the /F option, it might be
3211 ph10 654 flipped.) If study was forced by an external -s, don't show this
3212 ph10 612 information unless -i or -d was also present. This means that, except
3213     when auto-callouts are involved, the output from runs with and without
3214     -s should be identical. */
3215 nigel 75
3216 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3217 nigel 75 {
3218     if (extra == NULL)
3219     fprintf(outfile, "Study returned NULL\n");
3220     else
3221     {
3222 ph10 836 pcre_uint8 *start_bits = NULL;
3223 ph10 455 int minlength;
3224 ph10 461
3225 ph10 836 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3226     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3227 ph10 461
3228 ph10 836 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3229 nigel 75 {
3230 ph10 836 if (start_bits == NULL)
3231     fprintf(outfile, "No set of starting bytes\n");
3232     else
3233 nigel 75 {
3234 ph10 836 int i;
3235     int c = 24;
3236     fprintf(outfile, "Starting byte set: ");
3237     for (i = 0; i < 256; i++)
3238 nigel 75 {
3239 ph10 836 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3240 nigel 75 {
3241 ph10 836 if (c > 75)
3242     {
3243     fprintf(outfile, "\n ");
3244     c = 2;
3245     }
3246     if (PRINTOK(i) && i != ' ')
3247     {
3248     fprintf(outfile, "%c ", i);
3249     c += 2;
3250     }
3251     else
3252     {
3253     fprintf(outfile, "\\x%02x ", i);
3254     c += 5;
3255     }
3256 nigel 75 }
3257     }
3258 ph10 836 fprintf(outfile, "\n");
3259 nigel 75 }
3260     }
3261     }
3262 ph10 691
3263 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
3264 ph10 691
3265 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3266     {
3267 ph10 691 int jit;
3268 ph10 836 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3269     {
3270     if (jit)
3271     fprintf(outfile, "JIT study was successful\n");
3272     else
3273 ph10 691 #ifdef SUPPORT_JIT
3274 ph10 836 fprintf(outfile, "JIT study was not successful\n");
3275 ph10 667 #else
3276 ph10 836 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3277 ph10 667 #endif
3278 ph10 836 }
3279 ph10 691 }
3280 nigel 75 }
3281 nigel 3 }
3282    
3283 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
3284     that is all. The first 8 bytes of the file are the regex length and then
3285     the study length, in big-endian order. */
3286 nigel 3
3287 nigel 75 if (to_file != NULL)
3288 nigel 3 {
3289 nigel 75 FILE *f = fopen((char *)to_file, "wb");
3290     if (f == NULL)
3291 nigel 3 {
3292 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3293 nigel 3 }
3294 nigel 75 else
3295     {
3296 ph10 836 pcre_uint8 sbuf[8];
3297 ph10 259
3298 ph10 836 if (do_flip) regexflip(re, extra);
3299     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3300     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3301     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3302     sbuf[3] = (pcre_uint8)((true_size) & 255);
3303     sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3304     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3305     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3306     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3307 nigel 3
3308 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
3309     fwrite(re, 1, true_size, f) < true_size)
3310     {
3311     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3312     }
3313 nigel 3 else
3314     {
3315 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3316 ph10 654
3317 ph10 658 /* If there is study data, write it. */
3318 ph10 654
3319 nigel 75 if (extra != NULL)
3320 nigel 3 {
3321 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
3322     true_study_size)
3323 nigel 3 {
3324 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
3325     strerror(errno));
3326 nigel 3 }
3327 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
3328 nigel 3 }
3329     }
3330 nigel 75 fclose(f);
3331 nigel 3 }
3332 nigel 77
3333     new_free(re);
3334 ph10 836 if (extra != NULL)
3335     {
3336     PCRE_FREE_STUDY(extra);
3337     }
3338 ph10 545 if (locale_set)
3339 ph10 541 {
3340     new_free((void *)tables);
3341     setlocale(LC_CTYPE, "C");
3342 ph10 545 locale_set = 0;
3343     }
3344 nigel 75 continue; /* With next regex */
3345 nigel 3 }
3346 nigel 75 } /* End of non-POSIX compile */
3347 nigel 3
3348     /* Read data lines and test them */
3349    
3350     for (;;)
3351     {
3352 ph10 836 pcre_uint8 *q;
3353     pcre_uint8 *bptr;
3354 nigel 57 int *use_offsets = offsets;
3355 nigel 53 int use_size_offsets = size_offsets;
3356 nigel 63 int callout_data = 0;
3357     int callout_data_set = 0;
3358 nigel 3 int count, c;
3359 nigel 29 int copystrings = 0;
3360 ph10 386 int find_match_limit = default_find_match_limit;
3361 nigel 29 int getstrings = 0;
3362     int getlist = 0;
3363 nigel 39 int gmatched = 0;
3364 nigel 35 int start_offset = 0;
3365 ph10 579 int start_offset_sign = 1;
3366 nigel 41 int g_notempty = 0;
3367 nigel 77 int use_dfa = 0;
3368 nigel 3
3369 nigel 91 *copynames = 0;
3370     *getnames = 0;
3371    
3372 ph10 836 cn16ptr = copynames;
3373     gn16ptr = getnames;
3374     cn8ptr = copynames8;
3375     gn8ptr = getnames8;
3376 nigel 91
3377 ph10 836 SET_PCRE_CALLOUT(callout);
3378 nigel 63 first_callout = 1;
3379 ph10 654 last_callout_mark = NULL;
3380 nigel 63 callout_extra = 0;
3381     callout_count = 0;
3382     callout_fail_count = 999999;
3383     callout_fail_id = -1;
3384 nigel 73 show_malloc = 0;
3385 ph10 836 options = 0;
3386 nigel 63
3387 nigel 91 if (extra != NULL) extra->flags &=
3388     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3389    
3390     len = 0;
3391     for (;;)
3392 nigel 11 {
3393 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3394 nigel 91 {
3395 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
3396     {
3397 ph10 545 fprintf(outfile, "\n");
3398 ph10 537 break;
3399 ph10 545 }
3400 nigel 91 done = 1;
3401     goto CONTINUE;
3402     }
3403     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3404     len = (int)strlen((char *)buffer);
3405     if (buffer[len-1] == '\n') break;
3406 nigel 11 }
3407 nigel 3
3408     while (len > 0 && isspace(buffer[len-1])) len--;
3409     buffer[len] = 0;
3410     if (len == 0) break;
3411    
3412     p = buffer;
3413     while (isspace(*p)) p++;
3414    
3415 ph10 147 bptr = q = dbuffer;
3416 nigel 3 while ((c = *p++) != 0)
3417     {
3418     int i = 0;
3419     int n = 0;
3420 ph10 842
3421 ph10 836 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3422     In non-UTF mode, allow the value of the byte to fall through to later,
3423     where values greater than 127 are turned into UTF-8 when running in
3424     16-bit mode. */
3425 ph10 842
3426 ph10 836 if (c != '\\')
3427 nigel 3 {
3428 ph10 836 if (use_utf)
3429     {
3430     *q++ = c;
3431     continue;
3432 ph10 842 }
3433     }
3434    
3435 ph10 836 /* Handle backslash escapes */
3436 ph10 842
3437 ph10 836 else switch ((c = *p++))
3438     {
3439 nigel 3 case 'a': c = 7; break;
3440     case 'b': c = '\b'; break;
3441     case 'e': c = 27; break;
3442     case 'f': c = '\f'; break;
3443     case 'n': c = '\n'; break;
3444     case 'r': c = '\r'; break;
3445     case 't': c = '\t'; break;
3446     case 'v': c = '\v'; break;
3447    
3448     case '0': case '1': case '2': case '3':
3449     case '4': case '5': case '6': case '7':
3450     c -= '0';
3451     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3452     c = c * 8 + *p++ - '0';
3453     break;
3454    
3455     case 'x':
3456 nigel 49 if (*p == '{')
3457     {
3458 ph10 836 pcre_uint8 *pt = p;
3459 nigel 49 c = 0;
3460 ph10 738
3461 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3462     when isxdigit() is a macro that refers to its argument more than
3463     once. This is banned by the C Standard, but apparently happens in at
3464     least one MacOS environment. */
3465 ph10 738
3466 ph10 735 for (pt++; isxdigit(*pt); pt++)
3467 ph10 862 {
3468     if (++i == 9)
3469     fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3470     "using only the first eight.\n");
3471     else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3472     }
3473 nigel 49 if (*pt == '}')
3474     {
3475     p = pt + 1;
3476     break;
3477     }
3478 ph10 836 /* Not correct form for \x{...}; fall through */
3479 nigel 49 }
3480    
3481 ph10 842 /* \x without {} always defines just one byte in 8-bit mode. This
3482     allows UTF-8 characters to be constructed byte by byte, and also allows
3483     invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3484     Otherwise, pass it down to later code so that it can be turned into
3485 ph10 836 UTF-8 when running in 16-bit mode. */
3486 nigel 49
3487 nigel 3 c = 0;
3488     while (i++ < 2 && isxdigit(*p))
3489     {
3490 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3491 nigel 3 p++;
3492     }
3493 ph10 836 if (use_utf)
3494 ph10 842 {
3495 ph10 836 *q++ = c;
3496 ph10 842 continue;
3497     }
3498 nigel 3 break;
3499    
3500 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
3501 nigel 3 p--;
3502     continue;
3503    
3504 nigel 75 case '>':
3505 ph10 579 if (*p == '-')
3506 ph10 567 {
3507     start_offset_sign = -1;
3508     p++;
3509 ph10 579 }
3510 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3511 ph10 579 start_offset *= start_offset_sign;
3512 nigel 75 continue;
3513    
3514 nigel 3 case 'A': /* Option setting */
3515     options |= PCRE_ANCHORED;
3516     continue;
3517    
3518     case 'B':
3519     options |= PCRE_NOTBOL;
3520     continue;
3521    
3522 nigel 29 case 'C':
3523 nigel 63 if (isdigit(*p)) /* Set copy string */
3524     {
3525     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3526     copystrings |= 1 << n;
3527     }
3528     else if (isalnum(*p))
3529     {
3530 ph10 836 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3531 nigel 63 }
3532     else if (*p == '+')
3533     {
3534     callout_extra = 1;
3535     p++;
3536     }
3537     else if (*p == '-')
3538     {
3539 ph10 836 SET_PCRE_CALLOUT(NULL);
3540 nigel 63 p++;
3541     }
3542     else if (*p == '!')
3543     {
3544     callout_fail_id = 0;
3545     p++;
3546     while(isdigit(*p))
3547     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3548     callout_fail_count = 0;
3549     if (*p == '!')
3550     {
3551     p++;
3552     while(isdigit(*p))
3553     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3554     }
3555     }
3556     else if (*p == '*')
3557     {
3558     int sign = 1;
3559     callout_data = 0;
3560     if (*(++p) == '-') { sign = -1; p++; }
3561     while(isdigit(*p))
3562     callout_data = callout_data * 10 + *p++ - '0';
3563     callout_data *= sign;
3564     callout_data_set = 1;
3565     }
3566 nigel 29 continue;
3567    
3568 nigel 79 #if !defined NODFA
3569 nigel 77 case 'D':
3570 nigel 79 #if !defined NOPOSIX
3571 nigel 77 if (posix || do_posix)
3572     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3573     else
3574 nigel 79 #endif
3575 nigel 77 use_dfa = 1;
3576     continue;
3577 ph10 553 #endif
3578 nigel 77
3579 ph10 553 #if !defined NODFA
3580 nigel 77 case 'F':
3581     options |= PCRE_DFA_SHORTEST;
3582     continue;
3583 nigel 79 #endif
3584 nigel 77
3585 nigel 29 case 'G':
3586 nigel 63 if (isdigit(*p))
3587     {
3588     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3589     getstrings |= 1 << n;
3590     }
3591     else if (isalnum(*p))
3592     {
3593 ph10 836 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3594 nigel 63 }
3595 nigel 29 continue;
3596 ph10 691
3597 ph10 667 case 'J':
3598     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3599 ph10 691 if (extra != NULL
3600     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3601 ph10 667 && extra->executable_jit != NULL)
3602 ph10 691 {
3603 zherczeg 852 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3604 ph10 836 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3605     PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3606 ph10 691 }
3607 ph10 667 continue;
3608 nigel 29
3609     case 'L':
3610     getlist = 1;
3611     continue;
3612    
3613 nigel 63 case 'M':
3614     find_match_limit = 1;
3615     continue;
3616    
3617 nigel 37 case 'N':
3618 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
3619     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3620 ph10 461 else
3621 ph10 442 options |= PCRE_NOTEMPTY;
3622 nigel 37 continue;
3623    
3624 nigel 3 case 'O':
3625     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3626 nigel 53 if (n > size_offsets_max)
3627     {
3628     size_offsets_max = n;
3629 nigel 57 free(offsets);
3630 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3631 nigel 53 if (offsets == NULL)
3632     {
3633     printf("** Failed to get %d bytes of memory for offsets vector\n",
3634 ph10 151 (int)(size_offsets_max * sizeof(int)));
3635 nigel 77 yield = 1;
3636     goto EXIT;
3637 nigel 53 }
3638     }
3639     use_size_offsets = n;
3640 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3641 nigel 3 continue;
3642    
3643 nigel 75 case 'P':
3644 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3645 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3646 nigel 75 continue;
3647    
3648 nigel 91 case 'Q':
3649     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3650     if (extra == NULL)
3651     {
3652     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3653     extra->flags = 0;
3654     }
3655     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3656     extra->match_limit_recursion = n;
3657     continue;
3658    
3659     case 'q':
3660     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3661     if (extra == NULL)
3662     {
3663     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3664     extra->flags = 0;
3665     }
3666     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3667     extra->match_limit = n;
3668     continue;
3669    
3670 nigel 79 #if !defined NODFA
3671 nigel 77 case 'R':
3672     options |= PCRE_DFA_RESTART;
3673     continue;
3674 nigel 79 #endif
3675 nigel 77
3676 nigel 73 case 'S':
3677     show_malloc = 1;
3678     continue;
3679 ph10 392
3680 ph10 389 case 'Y':
3681     options |= PCRE_NO_START_OPTIMIZE;
3682 ph10 392 continue;
3683 nigel 73
3684 nigel 3 case 'Z':
3685     options |= PCRE_NOTEOL;
3686     continue;
3687 nigel 71
3688     case '?':
3689     options |= PCRE_NO_UTF8_CHECK;
3690     continue;
3691 nigel 91
3692     case '<':
3693     {
3694     int x = check_newline(p, outfile);
3695     if (x == 0) goto NEXT_DATA;
3696     options |= x;
3697     while (*p++ != '>');
3698     }
3699     continue;
3700 nigel 3 }
3701 ph10 836
3702 ph10 842 /* We now have a character value in c that may be greater than 255. In
3703     16-bit mode, we always convert characters to UTF-8 so that values greater
3704 ph10 836 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3705 ph10 842 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3706 ph10 836 mode must have come from \x{...} or octal constructs because values from
3707     \x.. get this far only in non-UTF mode. */
3708    
3709 ph10 842 #if !defined NOUTF || defined SUPPORT_PCRE16
3710 ph10 836 if (use_pcre16 || use_utf)
3711     {
3712     pcre_uint8 buff8[8];
3713     int ii, utn;
3714     utn = ord2utf8(c, buff8);
3715     for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3716     }
3717     else
3718 ph10 842 #endif
3719 ph10 836 {
3720     if (c > 255)
3721     {
3722     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3723     "and UTF-8 mode is not enabled.\n", c);
3724     fprintf(outfile, "** Truncation will probably give the wrong "
3725     "result.\n");
3726     }
3727     *q++ = c;
3728     }
3729 nigel 3 }
3730 ph10 842
3731 ph10 836 /* Reached end of subject string */
3732 ph10 842
3733 nigel 9 *q = 0;
3734 ph10 530 len = (int)(q - dbuffer);
3735 ph10 545
3736 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3737 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3738 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3739 ph10 371
3740 ph10 363 #if !defined NOPOSIX
3741     if (posix || do_posix)
3742     {
3743     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3744 ph10 371 bptr += buffer_size - len - 1;
3745 ph10 363 }
3746 ph10 371 else
3747     #endif
3748 ph10 363 {
3749     memmove(bptr + buffer_size - len, bptr, len);
3750 ph10 371 bptr += buffer_size - len;
3751     }
3752 nigel 3
3753 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3754     {
3755     printf("**Match limit not relevant for DFA matching: ignored\n");
3756     find_match_limit = 0;
3757     }
3758    
3759 nigel 3 /* Handle matching via the POSIX interface, which does not
3760 nigel 63 support timing or playing with the match limit or callout data. */
3761 nigel 3
3762 nigel 37 #if !defined NOPOSIX
3763 nigel 3 if (posix || do_posix)
3764     {
3765     int rc;
3766     int eflags = 0;
3767 nigel 63 regmatch_t *pmatch = NULL;
3768     if (use_size_offsets > 0)
3769 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3770 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3771     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3772 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3773 nigel 3
3774 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3775 nigel 3
3776     if (rc != 0)
3777     {
3778 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3779 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3780     }
3781 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3782     != 0)
3783     {
3784     fprintf(outfile, "Matched with REG_NOSUB\n");
3785     }
3786 nigel 3 else
3787     {
3788 nigel 7 size_t i;
3789 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3790 nigel 3 {
3791     if (pmatch[i].rm_so >= 0)
3792     {
3793 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3794 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_so,
3795 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3796 nigel 3 fprintf(outfile, "\n");
3797 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3798 nigel 35 {
3799 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3800 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3801 nigel 63 outfile);
3802 nigel 35 fprintf(outfile, "\n");
3803     }
3804 nigel 3 }
3805     }
3806     }
3807 nigel 53 free(pmatch);
3808 ph10 836 goto NEXT_DATA;
3809 nigel 3 }
3810    
3811 ph10 836 #endif /* !defined NOPOSIX */
3812    
3813 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3814 nigel 3
3815 ph10 836 #ifdef SUPPORT_PCRE16
3816     if (use_pcre16)
3817     {
3818 zherczeg 852 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3819 ph10 836 switch(len)
3820     {
3821     case -1:
3822     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3823     "converted to UTF-16\n");
3824     goto NEXT_DATA;
3825 nigel 37
3826 ph10 836 case -2:
3827     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3828     "cannot be converted to UTF-16\n");
3829     goto NEXT_DATA;
3830    
3831     case -3:
3832     fprintf(outfile, "**Failed: character value greater than 0xffff "
3833     "cannot be converted to 16-bit in non-UTF mode\n");
3834 ph10 842 goto NEXT_DATA;
3835 ph10 836
3836     default:
3837     break;
3838     }
3839     bptr = (pcre_uint8 *)buffer16;
3840     }
3841     #endif
3842    
3843 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3844 nigel 3 {
3845 ph10 512 markptr = NULL;
3846    
3847 nigel 93 if (timeitm > 0)
3848 nigel 3 {
3849     register int i;
3850     clock_t time_taken;
3851     clock_t start_time = clock();
3852 nigel 77
3853 nigel 79 #if !defined NODFA
3854 nigel 77 if (all_use_dfa || use_dfa)
3855     {
3856     int workspace[1000];
3857 nigel 93 for (i = 0; i < timeitm; i++)
3858 ph10 836 {
3859     PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3860     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3861     (sizeof(workspace)/sizeof(int)));
3862     }
3863 nigel 77 }
3864     else
3865 nigel 79 #endif
3866 nigel 77
3867 nigel 93 for (i = 0; i < timeitm; i++)
3868 ph10 836 {
3869     PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3870     (options | g_notempty), use_offsets, use_size_offsets);
3871     }
3872 nigel 3 time_taken = clock() - start_time;
3873 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3874     (((double)time_taken * 1000.0) / (double)timeitm) /
3875 nigel 63 (double)CLOCKS_PER_SEC);
3876 nigel 3 }
3877    
3878 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3879 nigel 87 varying limits in order to find the minimum value for the match limit and
3880 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3881     running of pcre_exec(), so disable the JIT optimization. This makes it
3882     possible to run the same set of tests with and without JIT externally
3883     requested. */
3884 nigel 63
3885     if (find_match_limit)
3886     {
3887     if (extra == NULL)
3888     {
3889 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3890 nigel 63 extra->flags = 0;
3891     }
3892 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3893 ph10 691
3894 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
3895 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
3896     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3897     PCRE_ERROR_MATCHLIMIT, "match()");
3898 nigel 63
3899 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
3900     options|g_notempty, use_offsets, use_size_offsets,
3901     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3902     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3903 nigel 63 }
3904    
3905     /* If callout_data is set, use the interface with additional data */
3906    
3907     else if (callout_data_set)
3908     {
3909     if (extra == NULL)
3910     {
3911 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3912 nigel 63 extra->flags = 0;
3913     }
3914     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3915 nigel 71 extra->callout_data = &callout_data;
3916 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3917 nigel 63 options | g_notempty, use_offsets, use_size_offsets);
3918     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3919     }
3920    
3921     /* The normal case is just to do the match once, with the default
3922     value of match_limit. */
3923    
3924 nigel 79 #if !defined NODFA
3925 nigel 77 else if (all_use_dfa || use_dfa)
3926     {
3927     int workspace[1000];
3928 ph10 836 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3929     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3930     (sizeof(workspace)/sizeof(int)));
3931 nigel 77 if (count == 0)
3932     {
3933     fprintf(outfile, "Matched, but too many subsidiary matches\n");
3934     count = use_size_offsets/2;
3935     }
3936     }
3937 nigel 79 #endif
3938 nigel 77
3939 nigel 75 else
3940     {
3941 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3942     options | g_notempty, use_offsets, use_size_offsets);
3943 nigel 77 if (count == 0)
3944     {
3945     fprintf(outfile, "Matched, but too many substrings\n");
3946     count = use_size_offsets/3;
3947     }
3948 nigel 75 }
3949 nigel 3
3950 nigel 39 /* Matched */
3951    
3952 nigel 3 if (count >= 0)
3953     {
3954 nigel 93 int i, maxcount;
3955 ph10 836 void *cnptr, *gnptr;
3956 nigel