/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 909 - (hide annotations) (download)
Sat Feb 4 15:01:47 2012 UTC (2 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 130537 byte(s)
Final source and document tidies for 8.30.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 836 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40     libraries in a single program. This is different from the modules such as
41     pcre_compile.c in the library itself, which are compiled separately for each
42     mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43     (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44     compiled only once. Therefore, it must not make use of any of the macros from
45     pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46     however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47     only supported library functions. */
48 nigel 75
49 ph10 836
50 ph10 200 #ifdef HAVE_CONFIG_H
51 ph10 236 #include "config.h"
52 ph10 200 #endif
53 ph10 199
54 nigel 3 #include <ctype.h>
55     #include <stdio.h>
56     #include <string.h>
57     #include <stdlib.h>
58     #include <time.h>
59 nigel 25 #include <locale.h>
60 nigel 75 #include <errno.h>
61 nigel 3
62 ph10 287 #ifdef SUPPORT_LIBREADLINE
63 ph10 343 #ifdef HAVE_UNISTD_H
64 ph10 287 #include <unistd.h>
65 ph10 343 #endif
66 ph10 287 #include <readline/readline.h>
67     #include <readline/history.h>
68     #endif
69 nigel 93
70 ph10 287
71 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
72     input and output without "b"; then I was told that "b" was needed in some
73     environments, so it was added for release 5.0 to both the input and output. (It
74     makes no difference on Unix-like systems.) Later I was told that it is wrong
75     for the input on Windows. I've now abstracted the modes into two macros that
76     are set here, to make it easier to fiddle with them, and removed "b" from the
77     input mode under Windows. */
78    
79     #if defined(_WIN32) || defined(WIN32)
80     #include <io.h> /* For _setmode() */
81     #include <fcntl.h> /* For _O_BINARY */
82     #define INPUT_MODE "r"
83     #define OUTPUT_MODE "wb"
84    
85 ph10 411 #ifndef isatty
86     #define isatty _isatty /* This is what Windows calls them, I'm told, */
87     #endif /* though in some environments they seem to */
88     /* be already defined, hence the #ifndefs. */
89     #ifndef fileno
90 ph10 343 #define fileno _fileno
91 ph10 411 #endif
92 ph10 343
93 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95     #ifdef __BORLANDC__
96     #define _setmode(handle, mode) setmode(handle, mode)
97     #endif
98    
99     /* Not Windows */
100    
101 nigel 93 #else
102     #include <sys/time.h> /* These two includes are needed */
103     #include <sys/resource.h> /* for setrlimit(). */
104     #define INPUT_MODE "rb"
105     #define OUTPUT_MODE "wb"
106 nigel 91 #endif
107    
108 zherczeg 905 #define PRIV(name) name
109 nigel 93
110 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
111     displaying the results of pcre_study() and we also need to know about the
112     internal macros, structures, and other internal data values; pcretest has
113     "inside information" compared to a program that strictly follows the PCRE API.
114 nigel 37
115 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
116     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117     appropriately for an application, not for building PCRE. */
118 nigel 77
119 ph10 145 #include "pcre.h"
120 ph10 836
121     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122     /* Configure internal macros to 16 bit mode. */
123     #define COMPILE_PCRE16
124     #endif
125    
126 nigel 77 #include "pcre_internal.h"
127    
128 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
129     regex, is held in a separate file so that (a) it can be compiled in either
130     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131     when that is compiled in debug mode. */
132    
133     #ifdef SUPPORT_PCRE8
134     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135     #endif
136     #ifdef SUPPORT_PCRE16
137     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138     #endif
139    
140 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
141     to keep two copies, we include the source file here, changing the names of the
142     external symbols to prevent clashes. */
143 nigel 77
144 ph10 836 #define PCRE_INCLUDED
145 nigel 85
146     #include "pcre_tables.c"
147    
148 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
149 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
150 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
151     has not been explicitly changed, so as to get consistent output from systems
152     that differ in their output from isprint() even in the "C" locale. */
153 nigel 93
154 ph10 836 #ifdef EBCDIC
155     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156     #else
157     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158     #endif
159 nigel 85
160 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162     /* Posix support is disabled in 16 bit only mode. */
163     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164     #define NOPOSIX
165     #endif
166    
167 nigel 37 /* It is possible to compile this test program without including support for
168     testing the POSIX interface, though this is not available via the standard
169     Makefile. */
170    
171     #if !defined NOPOSIX
172 nigel 3 #include "pcreposix.h"
173 nigel 37 #endif
174 nigel 3
175 ph10 836 /* It is also possible, originally for the benefit of a version that was
176     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178     automatically cut out the UTF support if PCRE is built without it. */
179 nigel 79
180 ph10 836 #ifndef SUPPORT_UTF
181     #ifndef NOUTF
182     #define NOUTF
183 ph10 107 #endif
184     #endif
185 nigel 79
186 ph10 836 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188     only from one place and is handled differently). I couldn't dream up any way of
189     using a single macro to do this in a generic way, because of the many different
190     argument requirements. We know that at least one of SUPPORT_PCRE8 and
191     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192     use these in the definitions of generic macros.
193 ph10 107
194 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
195     printed is always given as two arguments: a base address followed by an offset.
196     The base address is cast to the correct data size for 8 or 16 bit data; the
197     offset is in units of this size. If the string were given as base+offset in one
198     argument, the casting might be incorrectly applied. */
199    
200     #ifdef SUPPORT_PCRE8
201    
202     #define PCHARS8(lv, p, offset, len, f) \
203     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205     #define PCHARSV8(p, offset, len, f) \
206     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208     #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209     p = read_capture_name8(p, cn8, re)
210    
211 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
212    
213 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
214     pcre_callout = callout
215    
216 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217     pcre_assign_jit_stack(extra, callback, userdata)
218 ph10 836
219     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220     re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223     namesptr, cbuffer, size) \
224     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225     (char *)namesptr, cbuffer, size)
226    
227     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231     offsets, size_offsets, workspace, size_workspace) \
232     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233     offsets, size_offsets, workspace, size_workspace)
234    
235     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236     offsets, size_offsets) \
237     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238     offsets, size_offsets)
239    
240     #define PCRE_FREE_STUDY8(extra) \
241     pcre_free_study(extra)
242    
243     #define PCRE_FREE_SUBSTRING8(substring) \
244     pcre_free_substring(substring)
245    
246     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247     pcre_free_substring_list(listptr)
248    
249     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250     getnamesptr, subsptr) \
251     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252     (char *)getnamesptr, subsptr)
253    
254     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255     n = pcre_get_stringnumber(re, (char *)ptr)
256    
257     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265 ph10 836
266     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267     pcre_printint(re, outfile, debug_lengths)
268    
269     #define PCRE_STUDY8(extra, re, options, error) \
270     extra = pcre_study(re, options, error)
271    
272 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273     pcre_jit_stack_alloc(startsize, maxsize)
274    
275     #define PCRE_JIT_STACK_FREE8(stack) \
276     pcre_jit_stack_free(stack)
277    
278 ph10 836 #endif /* SUPPORT_PCRE8 */
279    
280     /* -----------------------------------------------------------*/
281    
282     #ifdef SUPPORT_PCRE16
283    
284     #define PCHARS16(lv, p, offset, len, f) \
285     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287     #define PCHARSV16(p, offset, len, f) \
288     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290     #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291     p = read_capture_name16(p, cn16, re)
292    
293     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295     #define SET_PCRE_CALLOUT16(callout) \
296 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
297 ph10 836
298 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299     pcre16_assign_jit_stack((pcre16_extra *)extra, \
300     (pcre16_jit_callback)callback, userdata)
301 ph10 836
302     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304     tables)
305 ph10 836
306     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307     namesptr, cbuffer, size) \
308 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310 ph10 836
311     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
314 ph10 836
315     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316     offsets, size_offsets, workspace, size_workspace) \
317 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319     workspace, size_workspace)
320 ph10 836
321     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322     offsets, size_offsets) \
323 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324     len, start_offset, options, offsets, size_offsets)
325 ph10 836
326     #define PCRE_FREE_STUDY16(extra) \
327 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
328 ph10 836
329     #define PCRE_FREE_SUBSTRING16(substring) \
330     pcre16_free_substring((PCRE_SPTR16)substring)
331    
332     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336     getnamesptr, subsptr) \
337 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339 ph10 836
340     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345     (PCRE_SPTR16 *)(void*)subsptr)
346    
347     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349     (PCRE_SPTR16 **)(void*)listptr)
350    
351 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353     tables)
354 ph10 836
355     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356     pcre16_printint(re, outfile, debug_lengths)
357    
358     #define PCRE_STUDY16(extra, re, options, error) \
359 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360 ph10 836
361 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364     #define PCRE_JIT_STACK_FREE16(stack) \
365     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367 ph10 836 #endif /* SUPPORT_PCRE16 */
368    
369    
370     /* ----- Both modes are supported; a runtime test is needed, except for
371     pcre_config(), and the JIT stack functions, when it doesn't matter which
372     version is called. ----- */
373    
374     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376     #define CHAR_SIZE (use_pcre16? 2:1)
377    
378     #define PCHARS(lv, p, offset, len, f) \
379     if (use_pcre16) \
380     PCHARS16(lv, p, offset, len, f); \
381     else \
382     PCHARS8(lv, p, offset, len, f)
383    
384     #define PCHARSV(p, offset, len, f) \
385     if (use_pcre16) \
386     PCHARSV16(p, offset, len, f); \
387     else \
388     PCHARSV8(p, offset, len, f)
389    
390     #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391     if (use_pcre16) \
392     READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393     else \
394     READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396     #define SET_PCRE_CALLOUT(callout) \
397     if (use_pcre16) \
398     SET_PCRE_CALLOUT16(callout); \
399     else \
400     SET_PCRE_CALLOUT8(callout)
401    
402     #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405     if (use_pcre16) \
406     PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407     else \
408     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409 ph10 836
410     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411     if (use_pcre16) \
412     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413     else \
414     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416     #define PCRE_CONFIG pcre_config
417    
418     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419     namesptr, cbuffer, size) \
420     if (use_pcre16) \
421     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422     namesptr, cbuffer, size); \
423     else \
424     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425     namesptr, cbuffer, size)
426    
427     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428     if (use_pcre16) \
429     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430     else \
431     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434     offsets, size_offsets, workspace, size_workspace) \
435     if (use_pcre16) \
436     PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437     offsets, size_offsets, workspace, size_workspace); \
438     else \
439     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440     offsets, size_offsets, workspace, size_workspace)
441    
442     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443     offsets, size_offsets) \
444     if (use_pcre16) \
445     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446     offsets, size_offsets); \
447     else \
448     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449     offsets, size_offsets)
450    
451     #define PCRE_FREE_STUDY(extra) \
452     if (use_pcre16) \
453     PCRE_FREE_STUDY16(extra); \
454     else \
455     PCRE_FREE_STUDY8(extra)
456    
457     #define PCRE_FREE_SUBSTRING(substring) \
458     if (use_pcre16) \
459     PCRE_FREE_SUBSTRING16(substring); \
460     else \
461     PCRE_FREE_SUBSTRING8(substring)
462    
463     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464     if (use_pcre16) \
465     PCRE_FREE_SUBSTRING_LIST16(listptr); \
466     else \
467     PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470     getnamesptr, subsptr) \
471     if (use_pcre16) \
472     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473     getnamesptr, subsptr); \
474     else \
475     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476     getnamesptr, subsptr)
477    
478     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479     if (use_pcre16) \
480     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481     else \
482     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485     if (use_pcre16) \
486     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487     else \
488     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491     if (use_pcre16) \
492     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493     else \
494     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497     (use_pcre16 ? \
498     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499     :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500 ph10 836
501 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
502     if (use_pcre16) \
503     PCRE_JIT_STACK_FREE16(stack); \
504     else \
505     PCRE_JIT_STACK_FREE8(stack)
506    
507 ph10 836 #define PCRE_MAKETABLES \
508     (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511 ph10 836 if (use_pcre16) \
512 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513 ph10 836 else \
514 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515 ph10 836
516     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517     if (use_pcre16) \
518     PCRE_PRINTINT16(re, outfile, debug_lengths); \
519     else \
520     PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522     #define PCRE_STUDY(extra, re, options, error) \
523     if (use_pcre16) \
524     PCRE_STUDY16(extra, re, options, error); \
525     else \
526     PCRE_STUDY8(extra, re, options, error)
527    
528     /* ----- Only 8-bit mode is supported ----- */
529    
530     #elif defined SUPPORT_PCRE8
531     #define CHAR_SIZE 1
532     #define PCHARS PCHARS8
533     #define PCHARSV PCHARSV8
534     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
535     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
536     #define STRLEN STRLEN8
537 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
538 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
539     #define PCRE_CONFIG pcre_config
540     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
542     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
543     #define PCRE_EXEC PCRE_EXEC8
544     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
545     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
546     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
547     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
548     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
549     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
550     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
551 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
552     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
553 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
554     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555     #define PCRE_PRINTINT PCRE_PRINTINT8
556     #define PCRE_STUDY PCRE_STUDY8
557    
558     /* ----- Only 16-bit mode is supported ----- */
559    
560     #else
561     #define CHAR_SIZE 2
562     #define PCHARS PCHARS16
563     #define PCHARSV PCHARSV16
564     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
565     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
566     #define STRLEN STRLEN16
567 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
568 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
569     #define PCRE_CONFIG pcre16_config
570     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
572     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
573     #define PCRE_EXEC PCRE_EXEC16
574     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
575     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
576     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
577     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
578     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
579     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
580     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
581 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
582     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
583 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
584     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585     #define PCRE_PRINTINT PCRE_PRINTINT16
586     #define PCRE_STUDY PCRE_STUDY16
587     #endif
588    
589     /* ----- End of mode-specific function call macros ----- */
590    
591    
592 nigel 85 /* Other parameters */
593    
594 nigel 3 #ifndef CLOCKS_PER_SEC
595     #ifdef CLK_TCK
596     #define CLOCKS_PER_SEC CLK_TCK
597     #else
598     #define CLOCKS_PER_SEC 100
599     #endif
600     #endif
601    
602 nigel 93 /* This is the default loop count for timing. */
603    
604 nigel 75 #define LOOPREPEAT 500000
605 nigel 3
606 nigel 85 /* Static variables */
607    
608 nigel 3 static FILE *outfile;
609     static int log_store = 0;
610 nigel 63 static int callout_count;
611     static int callout_extra;
612     static int callout_fail_count;
613     static int callout_fail_id;
614 ph10 210 static int debug_lengths;
615 nigel 63 static int first_callout;
616 nigel 93 static int locale_set = 0;
617 nigel 73 static int show_malloc;
618 ph10 836 static int use_utf;
619 nigel 43 static size_t gotten_store;
620 ph10 836 static size_t first_gotten_store = 0;
621 ph10 645 static const unsigned char *last_callout_mark = NULL;
622 nigel 3
623 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
624    
625     static int buffer_size = 50000;
626 ph10 836 static pcre_uint8 *buffer = NULL;
627     static pcre_uint8 *dbuffer = NULL;
628     static pcre_uint8 *pbuffer = NULL;
629 nigel 3
630 ph10 836 /* Another buffer is needed translation to 16-bit character strings. It will
631     obtained and extended as required. */
632    
633     #ifdef SUPPORT_PCRE16
634     static int buffer16_size = 0;
635     static pcre_uint16 *buffer16 = NULL;
636    
637     #ifdef SUPPORT_PCRE8
638    
639     /* We need the table of operator lengths that is used for 16-bit compiling, in
640     order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641     data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642     appropriately for the 16-bit world. Just as a safety check, make sure that
643     COMPILE_PCRE16 is *not* set. */
644    
645     #ifdef COMPILE_PCRE16
646     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647     #endif
648    
649     #if LINK_SIZE == 2
650     #undef LINK_SIZE
651     #define LINK_SIZE 1
652     #elif LINK_SIZE == 3 || LINK_SIZE == 4
653     #undef LINK_SIZE
654     #define LINK_SIZE 2
655     #else
656     #error LINK_SIZE must be either 2, 3, or 4
657     #endif
658    
659 zherczeg 839 #undef IMM2_SIZE
660     #define IMM2_SIZE 1
661    
662 ph10 836 #endif /* SUPPORT_PCRE8 */
663    
664     static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665     #endif /* SUPPORT_PCRE16 */
666    
667     /* If we have 8-bit support, default use_pcre16 to false; if there is also
668     16-bit support, it can be changed by an option. If there is no 8-bit support,
669     there must be 16-bit support, so default it to 1. */
670    
671     #ifdef SUPPORT_PCRE8
672     static int use_pcre16 = 0;
673     #else
674     static int use_pcre16 = 1;
675     #endif
676    
677 ph10 598 /* Textual explanations for runtime error codes */
678 nigel 75
679 ph10 598 static const char *errtexts[] = {
680     NULL, /* 0 is no error */
681     NULL, /* NOMATCH is handled specially */
682     "NULL argument passed",
683     "bad option value",
684     "magic number missing",
685     "unknown opcode - pattern overwritten?",
686     "no more memory",
687 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
688 ph10 598 "match limit exceeded",
689     "callout error code",
690 ph10 836 NULL, /* BADUTF8/16 is handled specially */
691     NULL, /* BADUTF8/16 offset is handled specially */
692 ph10 598 NULL, /* PARTIAL is handled specially */
693     "not used - internal error",
694     "internal error - pattern overwritten?",
695     "bad count value",
696     "item unsupported for DFA matching",
697     "backreference condition or recursion test not supported for DFA matching",
698     "match limit not supported for DFA matching",
699     "workspace size exceeded in DFA matching",
700 ph10 654 "too much recursion for DFA matching",
701 ph10 598 "recursion limit exceeded",
702     "not used - internal error",
703     "invalid combination of newline options",
704     "bad offset value",
705 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
706 ph10 676 "nested recursion at the same subject position",
707 ph10 836 "JIT stack limit reached",
708     "pattern compiled in wrong mode: 8-bit/16-bit error"
709 ph10 598 };
710    
711 ph10 654
712 ph10 541 /*************************************************
713     * Alternate character tables *
714     *************************************************/
715 nigel 49
716 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
717     using the default tables of the library. However, the T option can be used to
718     select alternate sets of tables, for different kinds of testing. Note also that
719 ph10 541 the L (locale) option also adjusts the tables. */
720    
721 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
722 ph10 541 only ASCII characters. */
723    
724 ph10 836 static const pcre_uint8 tables0[] = {
725 ph10 541
726     /* This table is a lower casing table. */
727    
728     0, 1, 2, 3, 4, 5, 6, 7,
729     8, 9, 10, 11, 12, 13, 14, 15,
730     16, 17, 18, 19, 20, 21, 22, 23,
731     24, 25, 26, 27, 28, 29, 30, 31,
732     32, 33, 34, 35, 36, 37, 38, 39,
733     40, 41, 42, 43, 44, 45, 46, 47,
734     48, 49, 50, 51, 52, 53, 54, 55,
735     56, 57, 58, 59, 60, 61, 62, 63,
736     64, 97, 98, 99,100,101,102,103,
737     104,105,106,107,108,109,110,111,
738     112,113,114,115,116,117,118,119,
739     120,121,122, 91, 92, 93, 94, 95,
740     96, 97, 98, 99,100,101,102,103,
741     104,105,106,107,108,109,110,111,
742     112,113,114,115,116,117,118,119,
743     120,121,122,123,124,125,126,127,
744     128,129,130,131,132,133,134,135,
745     136,137,138,139,140,141,142,143,
746     144,145,146,147,148,149,150,151,
747     152,153,154,155,156,157,158,159,
748     160,161,162,163,164,165,166,167,
749     168,169,170,171,172,173,174,175,
750     176,177,178,179,180,181,182,183,
751     184,185,186,187,188,189,190,191,
752     192,193,194,195,196,197,198,199,
753     200,201,202,203,204,205,206,207,
754     208,209,210,211,212,213,214,215,
755     216,217,218,219,220,221,222,223,
756     224,225,226,227,228,229,230,231,
757     232,233,234,235,236,237,238,239,
758     240,241,242,243,244,245,246,247,
759     248,249,250,251,252,253,254,255,
760    
761     /* This table is a case flipping table. */
762    
763     0, 1, 2, 3, 4, 5, 6, 7,
764     8, 9, 10, 11, 12, 13, 14, 15,
765     16, 17, 18, 19, 20, 21, 22, 23,
766     24, 25, 26, 27, 28, 29, 30, 31,
767     32, 33, 34, 35, 36, 37, 38, 39,
768     40, 41, 42, 43, 44, 45, 46, 47,
769     48, 49, 50, 51, 52, 53, 54, 55,
770     56, 57, 58, 59, 60, 61, 62, 63,
771     64, 97, 98, 99,100,101,102,103,
772     104,105,106,107,108,109,110,111,
773     112,113,114,115,116,117,118,119,
774     120,121,122, 91, 92, 93, 94, 95,
775     96, 65, 66, 67, 68, 69, 70, 71,
776     72, 73, 74, 75, 76, 77, 78, 79,
777     80, 81, 82, 83, 84, 85, 86, 87,
778     88, 89, 90,123,124,125,126,127,
779     128,129,130,131,132,133,134,135,
780     136,137,138,139,140,141,142,143,
781     144,145,146,147,148,149,150,151,
782     152,153,154,155,156,157,158,159,
783     160,161,162,163,164,165,166,167,
784     168,169,170,171,172,173,174,175,
785     176,177,178,179,180,181,182,183,
786     184,185,186,187,188,189,190,191,
787     192,193,194,195,196,197,198,199,
788     200,201,202,203,204,205,206,207,
789     208,209,210,211,212,213,214,215,
790     216,217,218,219,220,221,222,223,
791     224,225,226,227,228,229,230,231,
792     232,233,234,235,236,237,238,239,
793     240,241,242,243,244,245,246,247,
794     248,249,250,251,252,253,254,255,
795    
796     /* This table contains bit maps for various character classes. Each map is 32
797     bytes long and the bits run from the least significant end of each byte. The
798     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
799     graph, print, punct, and cntrl. Other classes are built from combinations. */
800    
801     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
802     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805    
806     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
807     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
808     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810    
811     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
812     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815    
816     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
818     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820    
821     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825    
826     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
827     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
828     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830    
831     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
832     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
833     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835    
836     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
837     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
838     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840    
841     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
842     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845    
846     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850    
851     /* This table identifies various classes of character by individual bits:
852     0x01 white space character
853     0x02 letter
854     0x04 decimal digit
855     0x08 hexadecimal digit
856     0x10 alphanumeric or '_'
857     0x80 regular expression metacharacter or binary zero
858     */
859    
860     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
861     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
862     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
863     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
864     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
865     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
866     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
867     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
868     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
869     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
870     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
871     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
872     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
873     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
874     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
875     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
876     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
878     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
879     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
880     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
881     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
882     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
883     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
884     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
885     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
886     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
887     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
888     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
889     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
890     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
891     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
892    
893 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
894     be at least an approximation of ISO 8859. In particular, there are characters
895 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
896    
897 ph10 836 static const pcre_uint8 tables1[] = {
898 ph10 541 0,1,2,3,4,5,6,7,
899     8,9,10,11,12,13,14,15,
900     16,17,18,19,20,21,22,23,
901     24,25,26,27,28,29,30,31,
902     32,33,34,35,36,37,38,39,
903     40,41,42,43,44,45,46,47,
904     48,49,50,51,52,53,54,55,
905     56,57,58,59,60,61,62,63,
906     64,97,98,99,100,101,102,103,
907     104,105,106,107,108,109,110,111,
908     112,113,114,115,116,117,118,119,
909     120,121,122,91,92,93,94,95,
910     96,97,98,99,100,101,102,103,
911     104,105,106,107,108,109,110,111,
912     112,113,114,115,116,117,118,119,
913     120,121,122,123,124,125,126,127,
914     128,129,130,131,132,133,134,135,
915     136,137,138,139,140,141,142,143,
916     144,145,146,147,148,149,150,151,
917     152,153,154,155,156,157,158,159,
918     160,161,162,163,164,165,166,167,
919     168,169,170,171,172,173,174,175,
920     176,177,178,179,180,181,182,183,
921     184,185,186,187,188,189,190,191,
922     224,225,226,227,228,229,230,231,
923     232,233,234,235,236,237,238,239,
924     240,241,242,243,244,245,246,215,
925     248,249,250,251,252,253,254,223,
926     224,225,226,227,228,229,230,231,
927     232,233,234,235,236,237,238,239,
928     240,241,242,243,244,245,246,247,
929     248,249,250,251,252,253,254,255,
930     0,1,2,3,4,5,6,7,
931     8,9,10,11,12,13,14,15,
932     16,17,18,19,20,21,22,23,
933     24,25,26,27,28,29,30,31,
934     32,33,34,35,36,37,38,39,
935     40,41,42,43,44,45,46,47,
936     48,49,50,51,52,53,54,55,
937     56,57,58,59,60,61,62,63,
938     64,97,98,99,100,101,102,103,
939     104,105,106,107,108,109,110,111,
940     112,113,114,115,116,117,118,119,
941     120,121,122,91,92,93,94,95,
942     96,65,66,67,68,69,70,71,
943     72,73,74,75,76,77,78,79,
944     80,81,82,83,84,85,86,87,
945     88,89,90,123,124,125,126,127,
946     128,129,130,131,132,133,134,135,
947     136,137,138,139,140,141,142,143,
948     144,145,146,147,148,149,150,151,
949     152,153,154,155,156,157,158,159,
950     160,161,162,163,164,165,166,167,
951     168,169,170,171,172,173,174,175,
952     176,177,178,179,180,181,182,183,
953     184,185,186,187,188,189,190,191,
954     224,225,226,227,228,229,230,231,
955     232,233,234,235,236,237,238,239,
956     240,241,242,243,244,245,246,215,
957     248,249,250,251,252,253,254,223,
958     192,193,194,195,196,197,198,199,
959     200,201,202,203,204,205,206,207,
960     208,209,210,211,212,213,214,247,
961     216,217,218,219,220,221,222,255,
962     0,62,0,0,1,0,0,0,
963     0,0,0,0,0,0,0,0,
964     32,0,0,0,1,0,0,0,
965     0,0,0,0,0,0,0,0,
966     0,0,0,0,0,0,255,3,
967     126,0,0,0,126,0,0,0,
968     0,0,0,0,0,0,0,0,
969     0,0,0,0,0,0,0,0,
970     0,0,0,0,0,0,255,3,
971     0,0,0,0,0,0,0,0,
972     0,0,0,0,0,0,12,2,
973     0,0,0,0,0,0,0,0,
974     0,0,0,0,0,0,0,0,
975     254,255,255,7,0,0,0,0,
976     0,0,0,0,0,0,0,0,
977     255,255,127,127,0,0,0,0,
978     0,0,0,0,0,0,0,0,
979     0,0,0,0,254,255,255,7,
980     0,0,0,0,0,4,32,4,
981     0,0,0,128,255,255,127,255,
982     0,0,0,0,0,0,255,3,
983     254,255,255,135,254,255,255,7,
984     0,0,0,0,0,4,44,6,
985     255,255,127,255,255,255,127,255,
986     0,0,0,0,254,255,255,255,
987     255,255,255,255,255,255,255,127,
988     0,0,0,0,254,255,255,255,
989     255,255,255,255,255,255,255,255,
990     0,2,0,0,255,255,255,255,
991     255,255,255,255,255,255,255,127,
992     0,0,0,0,255,255,255,255,
993     255,255,255,255,255,255,255,255,
994     0,0,0,0,254,255,0,252,
995     1,0,0,248,1,0,0,120,
996     0,0,0,0,254,255,255,255,
997     0,0,128,0,0,0,128,0,
998     255,255,255,255,0,0,0,0,
999     0,0,0,0,0,0,0,128,
1000     255,255,255,255,0,0,0,0,
1001     0,0,0,0,0,0,0,0,
1002     128,0,0,0,0,0,0,0,
1003     0,1,1,0,1,1,0,0,
1004     0,0,0,0,0,0,0,0,
1005     0,0,0,0,0,0,0,0,
1006     1,0,0,0,128,0,0,0,
1007     128,128,128,128,0,0,128,0,
1008     28,28,28,28,28,28,28,28,
1009     28,28,0,0,0,0,0,128,
1010     0,26,26,26,26,26,26,18,
1011     18,18,18,18,18,18,18,18,
1012     18,18,18,18,18,18,18,18,
1013     18,18,18,128,128,0,128,16,
1014     0,26,26,26,26,26,26,18,
1015     18,18,18,18,18,18,18,18,
1016     18,18,18,18,18,18,18,18,
1017     18,18,18,128,128,0,0,0,
1018     0,0,0,0,0,1,0,0,
1019     0,0,0,0,0,0,0,0,
1020     0,0,0,0,0,0,0,0,
1021     0,0,0,0,0,0,0,0,
1022     1,0,0,0,0,0,0,0,
1023     0,0,18,0,0,0,0,0,
1024     0,0,20,20,0,18,0,0,
1025     0,20,18,0,0,0,0,0,
1026     18,18,18,18,18,18,18,18,
1027     18,18,18,18,18,18,18,18,
1028     18,18,18,18,18,18,18,0,
1029     18,18,18,18,18,18,18,18,
1030     18,18,18,18,18,18,18,18,
1031     18,18,18,18,18,18,18,18,
1032     18,18,18,18,18,18,18,0,
1033     18,18,18,18,18,18,18,18
1034     };
1035    
1036    
1037    
1038 ph10 558
1039     #ifndef HAVE_STRERROR
1040 nigel 49 /*************************************************
1041 ph10 558 * Provide strerror() for non-ANSI libraries *
1042     *************************************************/
1043    
1044     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1045     in their libraries, but can provide the same facility by this simple
1046     alternative function. */
1047    
1048     extern int sys_nerr;
1049     extern char *sys_errlist[];
1050    
1051     char *
1052     strerror(int n)
1053     {
1054     if (n < 0 || n >= sys_nerr) return "unknown error number";
1055     return sys_errlist[n];
1056     }
1057     #endif /* HAVE_STRERROR */
1058    
1059    
1060 ph10 667 /*************************************************
1061     * JIT memory callback *
1062     *************************************************/
1063 ph10 558
1064 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1065     {
1066     return (pcre_jit_stack *)arg;
1067     }
1068 ph10 558
1069 ph10 667
1070 ph10 836 #if !defined NOUTF || defined SUPPORT_PCRE16
1071 ph10 558 /*************************************************
1072 ph10 836 * Convert UTF-8 string to value *
1073     *************************************************/
1074    
1075     /* This function takes one or more bytes that represents a UTF-8 character,
1076     and returns the value of the character.
1077    
1078     Argument:
1079     utf8bytes a pointer to the byte vector
1080     vptr a pointer to an int to receive the value
1081    
1082     Returns: > 0 => the number of bytes consumed
1083     -6 to 0 => malformed UTF-8 character at offset = (-return)
1084     */
1085    
1086     static int
1087     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1088     {
1089     int c = *utf8bytes++;
1090     int d = c;
1091     int i, j, s;
1092    
1093     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1094     {
1095     if ((d & 0x80) == 0) break;
1096     d <<= 1;
1097     }
1098    
1099     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1100     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1101    
1102     /* i now has a value in the range 1-5 */
1103    
1104     s = 6*i;
1105     d = (c & utf8_table3[i]) << s;
1106    
1107     for (j = 0; j < i; j++)
1108     {
1109     c = *utf8bytes++;
1110     if ((c & 0xc0) != 0x80) return -(j+1);
1111     s -= 6;
1112     d |= (c & 0x3f) << s;
1113     }
1114    
1115     /* Check that encoding was the correct unique one */
1116    
1117     for (j = 0; j < utf8_table1_size; j++)
1118     if (d <= utf8_table1[j]) break;
1119     if (j != i) return -(i+1);
1120    
1121     /* Valid value */
1122    
1123     *vptr = d;
1124     return i+1;
1125     }
1126     #endif /* NOUTF || SUPPORT_PCRE16 */
1127    
1128    
1129    
1130     #if !defined NOUTF || defined SUPPORT_PCRE16
1131     /*************************************************
1132     * Convert character value to UTF-8 *
1133     *************************************************/
1134    
1135     /* This function takes an integer value in the range 0 - 0x7fffffff
1136     and encodes it as a UTF-8 character in 0 to 6 bytes.
1137    
1138     Arguments:
1139     cvalue the character value
1140     utf8bytes pointer to buffer for result - at least 6 bytes long
1141    
1142     Returns: number of characters placed in the buffer
1143     */
1144    
1145     static int
1146     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1147     {
1148     register int i, j;
1149     for (i = 0; i < utf8_table1_size; i++)
1150     if (cvalue <= utf8_table1[i]) break;
1151     utf8bytes += i;
1152     for (j = i; j > 0; j--)
1153     {
1154     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1155     cvalue >>= 6;
1156     }
1157     *utf8bytes = utf8_table2[i] | cvalue;
1158     return i + 1;
1159     }
1160 ph10 842 #endif
1161 ph10 836
1162    
1163     #ifdef SUPPORT_PCRE16
1164     /*************************************************
1165     * Convert a string to 16-bit *
1166     *************************************************/
1167    
1168     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1169     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1170     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1171     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1172     result is always left in buffer16.
1173    
1174     Note that this function does not object to surrogate values. This is
1175     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1176     for the purpose of testing that they are correctly faulted.
1177    
1178 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1179 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1180    
1181     Arguments:
1182     data TRUE if converting a data line; FALSE for a regex
1183     p points to a byte string
1184     utf true if UTF-8 (to be converted to UTF-16)
1185     len number of bytes in the string (excluding trailing zero)
1186    
1187     Returns: number of 16-bit data items used (excluding trailing zero)
1188     OR -1 if a UTF-8 string is malformed
1189     OR -2 if a value > 0x10ffff is encountered
1190 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1191 ph10 836 */
1192    
1193     static int
1194     to16(int data, pcre_uint8 *p, int utf, int len)
1195     {
1196     pcre_uint16 *pp;
1197    
1198     if (buffer16_size < 2*len + 2)
1199     {
1200     if (buffer16 != NULL) free(buffer16);
1201     buffer16_size = 2*len + 2;
1202     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1203     if (buffer16 == NULL)
1204     {
1205     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1206     exit(1);
1207     }
1208     }
1209    
1210     pp = buffer16;
1211    
1212     if (!utf && !data)
1213     {
1214     while (len-- > 0) *pp++ = *p++;
1215     }
1216    
1217     else
1218     {
1219     int c = 0;
1220     while (len > 0)
1221     {
1222     int chlen = utf82ord(p, &c);
1223     if (chlen <= 0) return -1;
1224     if (c > 0x10ffff) return -2;
1225     p += chlen;
1226     len -= chlen;
1227     if (c < 0x10000) *pp++ = c; else
1228     {
1229     if (!utf) return -3;
1230     c -= 0x10000;
1231     *pp++ = 0xD800 | (c >> 10);
1232     *pp++ = 0xDC00 | (c & 0x3ff);
1233     }
1234     }
1235     }
1236    
1237     *pp = 0;
1238     return pp - buffer16;
1239     }
1240     #endif
1241    
1242    
1243     /*************************************************
1244 nigel 91 * Read or extend an input line *
1245     *************************************************/
1246    
1247     /* Input lines are read into buffer, but both patterns and data lines can be
1248     continued over multiple input lines. In addition, if the buffer fills up, we
1249     want to automatically expand it so as to be able to handle extremely large
1250     lines that are needed for certain stress tests. When the input buffer is
1251     expanded, the other two buffers must also be expanded likewise, and the
1252     contents of pbuffer, which are a copy of the input for callouts, must be
1253     preserved (for when expansion happens for a data line). This is not the most
1254     optimal way of handling this, but hey, this is just a test program!
1255    
1256     Arguments:
1257     f the file to read
1258     start where in buffer to start (this *must* be within buffer)
1259 ph10 287 prompt for stdin or readline()
1260 nigel 91
1261     Returns: pointer to the start of new data
1262     could be a copy of start, or could be moved
1263     NULL if no data read and EOF reached
1264     */
1265    
1266 ph10 836 static pcre_uint8 *
1267     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1268 nigel 91 {
1269 ph10 836 pcre_uint8 *here = start;
1270 nigel 91
1271     for (;;)
1272     {
1273 ph10 904 size_t rlen = (size_t)(buffer_size - (here - buffer));
1274 nigel 93
1275 nigel 91 if (rlen > 1000)
1276     {
1277     int dlen;
1278 ph10 289
1279 ph10 287 /* If libreadline support is required, use readline() to read a line if the
1280     input is a terminal. Note that readline() removes the trailing newline, so
1281     we must put it back again, to be compatible with fgets(). */
1282 ph10 289
1283 ph10 287 #ifdef SUPPORT_LIBREADLINE
1284     if (isatty(fileno(f)))
1285     {
1286 ph10 289 size_t len;
1287 ph10 287 char *s = readline(prompt);
1288     if (s == NULL) return (here == start)? NULL : start;
1289     len = strlen(s);
1290 ph10 289 if (len > 0) add_history(s);
1291 ph10 287 if (len > rlen - 1) len = rlen - 1;
1292     memcpy(here, s, len);
1293     here[len] = '\n';
1294 ph10 289 here[len+1] = 0;
1295     free(s);
1296 ph10 287 }
1297 ph10 289 else
1298     #endif
1299    
1300 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1301 ph10 289
1302 ph10 287 {
1303 ph10 516 if (f == stdin) printf("%s", prompt);
1304 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1305     return (here == start)? NULL : start;
1306 ph10 289 }
1307    
1308 nigel 91 dlen = (int)strlen((char *)here);
1309     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1310     here += dlen;
1311     }
1312    
1313     else
1314     {
1315     int new_buffer_size = 2*buffer_size;
1316 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1317     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1318     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319 nigel 91
1320     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1321     {
1322     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1323     exit(1);
1324     }
1325    
1326     memcpy(new_buffer, buffer, buffer_size);
1327     memcpy(new_pbuffer, pbuffer, buffer_size);
1328    
1329     buffer_size = new_buffer_size;
1330    
1331     start = new_buffer + (start - buffer);
1332     here = new_buffer + (here - buffer);
1333    
1334     free(buffer);
1335     free(dbuffer);
1336     free(pbuffer);
1337    
1338     buffer = new_buffer;
1339     dbuffer = new_dbuffer;
1340     pbuffer = new_pbuffer;
1341     }
1342     }
1343    
1344     return NULL; /* Control never gets here */
1345     }
1346    
1347    
1348    
1349     /*************************************************
1350 nigel 63 * Read number from string *
1351     *************************************************/
1352    
1353     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1354     around with conditional compilation, just do the job by hand. It is only used
1355 nigel 93 for unpicking arguments, so just keep it simple.
1356 nigel 63
1357     Arguments:
1358     str string to be converted
1359     endptr where to put the end pointer
1360    
1361     Returns: the unsigned long
1362     */
1363    
1364     static int
1365 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1366 nigel 63 {
1367     int result = 0;
1368     while(*str != 0 && isspace(*str)) str++;
1369     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1370     *endptr = str;
1371     return(result);
1372     }
1373    
1374    
1375    
1376 nigel 49 /*************************************************
1377 ph10 836 * Print one character *
1378 nigel 49 *************************************************/
1379    
1380 ph10 836 /* Print a single character either literally, or as a hex escape. */
1381 nigel 49
1382 ph10 836 static int pchar(int c, FILE *f)
1383 nigel 49 {
1384 ph10 836 if (PRINTOK(c))
1385     {
1386     if (f != NULL) fprintf(f, "%c", c);
1387     return 1;
1388     }
1389 nigel 49
1390 ph10 836 if (c < 0x100)
1391 nigel 49 {
1392 ph10 836 if (use_utf)
1393     {
1394     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1395     return 6;
1396     }
1397     else
1398     {
1399     if (f != NULL) fprintf(f, "\\x%02x", c);
1400     return 4;
1401     }
1402 nigel 49 }
1403    
1404 ph10 836 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1405     return (c <= 0x000000ff)? 6 :
1406     (c <= 0x00000fff)? 7 :
1407     (c <= 0x0000ffff)? 8 :
1408     (c <= 0x000fffff)? 9 : 10;
1409     }
1410 nigel 49
1411    
1412    
1413 ph10 836 #ifdef SUPPORT_PCRE8
1414     /*************************************************
1415     * Print 8-bit character string *
1416     *************************************************/
1417 nigel 49
1418 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1419     If handed a NULL file, just counts chars without printing. */
1420 nigel 49
1421 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
1422     {
1423     int c = 0;
1424     int yield = 0;
1425 nigel 49
1426 ph10 836 if (length < 0)
1427     length = strlen((char *)p);
1428 nigel 49
1429 ph10 836 while (length-- > 0)
1430     {
1431     #if !defined NOUTF
1432     if (use_utf)
1433     {
1434     int rc = utf82ord(p, &c);
1435     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1436     {
1437     length -= rc - 1;
1438     p += rc;
1439     yield += pchar(c, f);
1440     continue;
1441     }
1442     }
1443     #endif
1444     c = *p++;
1445     yield += pchar(c, f);
1446     }
1447    
1448     return yield;
1449 nigel 49 }
1450 nigel 79 #endif
1451 nigel 49
1452    
1453 nigel 79
1454 ph10 836 #ifdef SUPPORT_PCRE16
1455 nigel 63 /*************************************************
1456 ph10 836 * Find length of 0-terminated 16-bit string *
1457 nigel 85 *************************************************/
1458    
1459 ph10 836 static int strlen16(PCRE_SPTR16 p)
1460 nigel 85 {
1461 ph10 836 int len = 0;
1462     while (*p++ != 0) len++;
1463     return len;
1464 nigel 85 }
1465 ph10 836 #endif /* SUPPORT_PCRE16 */
1466 nigel 85
1467    
1468 ph10 836 #ifdef SUPPORT_PCRE16
1469 nigel 85 /*************************************************
1470 ph10 836 * Print 16-bit character string *
1471 nigel 63 *************************************************/
1472 nigel 49
1473 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1474     If handed a NULL file, just counts chars without printing. */
1475 nigel 49
1476 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1477 nigel 3 {
1478 nigel 63 int yield = 0;
1479 nigel 3
1480 ph10 836 if (length < 0)
1481     length = strlen16(p);
1482    
1483 nigel 63 while (length-- > 0)
1484 nigel 3 {
1485 ph10 836 int c = *p++ & 0xffff;
1486     #if !defined NOUTF
1487     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1488 nigel 63 {
1489 ph10 836 int d = *p & 0xffff;
1490     if (d >= 0xDC00 && d < 0xDFFF)
1491 nigel 63 {
1492 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1493     length--;
1494     p++;
1495 nigel 63 }
1496     }
1497 nigel 79 #endif
1498 ph10 836 yield += pchar(c, f);
1499     }
1500 nigel 3
1501 ph10 836 return yield;
1502     }
1503     #endif /* SUPPORT_PCRE16 */
1504 nigel 63
1505 ph10 836
1506    
1507     #ifdef SUPPORT_PCRE8
1508     /*************************************************
1509     * Read a capture name (8-bit) and check it *
1510     *************************************************/
1511    
1512     static pcre_uint8 *
1513     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1514     {
1515     pcre_uint8 *npp = *pp;
1516     while (isalnum(*p)) *npp++ = *p++;
1517     *npp++ = 0;
1518     *npp = 0;
1519     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1520     {
1521     fprintf(outfile, "no parentheses with name \"");
1522     PCHARSV(*pp, 0, -1, outfile);
1523     fprintf(outfile, "\"\n");
1524 nigel 63 }
1525 nigel 3
1526 ph10 836 *pp = npp;
1527     return p;
1528 nigel 63 }
1529 ph10 836 #endif /* SUPPORT_PCRE8 */
1530 nigel 23
1531 nigel 3
1532 nigel 23
1533 ph10 836 #ifdef SUPPORT_PCRE16
1534 nigel 63 /*************************************************
1535 ph10 836 * Read a capture name (16-bit) and check it *
1536     *************************************************/
1537    
1538     /* Note that the text being read is 8-bit. */
1539    
1540     static pcre_uint8 *
1541     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1542     {
1543     pcre_uint16 *npp = *pp;
1544     while (isalnum(*p)) *npp++ = *p++;
1545     *npp++ = 0;
1546     *npp = 0;
1547 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1548 ph10 836 {
1549     fprintf(outfile, "no parentheses with name \"");
1550     PCHARSV(*pp, 0, -1, outfile);
1551     fprintf(outfile, "\"\n");
1552     }
1553     *pp = npp;
1554     return p;
1555     }
1556     #endif /* SUPPORT_PCRE16 */
1557    
1558    
1559    
1560     /*************************************************
1561 nigel 63 * Callout function *
1562     *************************************************/
1563 nigel 3
1564 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1565     the match. Yield zero unless more callouts than the fail count, or the callout
1566     data is not zero. */
1567 nigel 3
1568 nigel 63 static int callout(pcre_callout_block *cb)
1569     {
1570     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1571 nigel 75 int i, pre_start, post_start, subject_length;
1572 nigel 3
1573 nigel 63 if (callout_extra)
1574     {
1575     fprintf(f, "Callout %d: last capture = %d\n",
1576     cb->callout_number, cb->capture_last);
1577 nigel 3
1578 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1579     {
1580     if (cb->offset_vector[i] < 0)
1581     fprintf(f, "%2d: <unset>\n", i/2);
1582     else
1583     {
1584     fprintf(f, "%2d: ", i/2);
1585 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
1586 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1587     fprintf(f, "\n");
1588     }
1589     }
1590     }
1591 nigel 3
1592 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1593     datails. On subsequent calls in the same match, we use pchars just to find the
1594     printed lengths of the substrings. */
1595 nigel 3
1596 nigel 63 if (f != NULL) fprintf(f, "--->");
1597 nigel 3
1598 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1599     PCHARS(post_start, cb->subject, cb->start_match,
1600 nigel 63 cb->current_position - cb->start_match, f);
1601 nigel 3
1602 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1603 nigel 75
1604 ph10 836 PCHARSV(cb->subject, cb->current_position,
1605 nigel 63 cb->subject_length - cb->current_position, f);
1606 nigel 3
1607 nigel 63 if (f != NULL) fprintf(f, "\n");
1608 nigel 9
1609 nigel 63 /* Always print appropriate indicators, with callout number if not already
1610 nigel 75 shown. For automatic callouts, show the pattern offset. */
1611 nigel 3
1612 nigel 75 if (cb->callout_number == 255)
1613     {
1614     fprintf(outfile, "%+3d ", cb->pattern_position);
1615     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1616     }
1617     else
1618     {
1619     if (callout_extra) fprintf(outfile, " ");
1620     else fprintf(outfile, "%3d ", cb->callout_number);
1621     }
1622 nigel 3
1623 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1624     fprintf(outfile, "^");
1625 nigel 3
1626 nigel 63 if (post_start > 0)
1627     {
1628     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1629     fprintf(outfile, "^");
1630 nigel 3 }
1631    
1632 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1633     fprintf(outfile, " ");
1634    
1635     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1636     pbuffer + cb->pattern_position);
1637    
1638 nigel 63 fprintf(outfile, "\n");
1639     first_callout = 0;
1640 nigel 3
1641 ph10 654 if (cb->mark != last_callout_mark)
1642 ph10 645 {
1643 ph10 836 if (cb->mark == NULL)
1644     fprintf(outfile, "Latest Mark: <unset>\n");
1645     else
1646     {
1647     fprintf(outfile, "Latest Mark: ");
1648     PCHARSV(cb->mark, 0, -1, outfile);
1649     putc('\n', outfile);
1650     }
1651 ph10 654 last_callout_mark = cb->mark;
1652     }
1653 ph10 645
1654 nigel 71 if (cb->callout_data != NULL)
1655 nigel 49 {
1656 nigel 71 int callout_data = *((int *)(cb->callout_data));
1657     if (callout_data != 0)
1658     {
1659     fprintf(outfile, "Callout data = %d\n", callout_data);
1660     return callout_data;
1661     }
1662 nigel 63 }
1663 nigel 49
1664 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1665     (++callout_count >= callout_fail_count)? 1 : 0;
1666 nigel 3 }
1667    
1668    
1669 nigel 63 /*************************************************
1670 nigel 73 * Local malloc functions *
1671 nigel 63 *************************************************/
1672 nigel 3
1673 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1674 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
1675     show_malloc variable is set only during matching. */
1676 nigel 3
1677     static void *new_malloc(size_t size)
1678     {
1679 nigel 73 void *block = malloc(size);
1680 nigel 43 gotten_store = size;
1681 ph10 836 if (first_gotten_store == 0) first_gotten_store = size;
1682 nigel 73 if (show_malloc)
1683 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1684 nigel 73 return block;
1685 nigel 3 }
1686    
1687 nigel 73 static void new_free(void *block)
1688     {
1689     if (show_malloc)
1690     fprintf(outfile, "free %p\n", block);
1691     free(block);
1692     }
1693 nigel 3
1694 nigel 73 /* For recursion malloc/free, to test stacking calls */
1695    
1696     static void *stack_malloc(size_t size)
1697     {
1698     void *block = malloc(size);
1699     if (show_malloc)
1700 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1701 nigel 73 return block;
1702     }
1703    
1704     static void stack_free(void *block)
1705     {
1706     if (show_malloc)
1707     fprintf(outfile, "stack_free %p\n", block);
1708     free(block);
1709     }
1710    
1711    
1712 nigel 63 /*************************************************
1713     * Call pcre_fullinfo() *
1714     *************************************************/
1715 nigel 43
1716 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
1717     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1718     value, but the code is defensive.
1719 nigel 43
1720 ph10 836 Arguments:
1721     re compiled regex
1722     study study data
1723     option PCRE_INFO_xxx option
1724     ptr where to put the data
1725    
1726     Returns: 0 when OK, < 0 on error
1727     */
1728    
1729     static int
1730     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1731 nigel 43 {
1732     int rc;
1733 ph10 836
1734     if (use_pcre16)
1735     #ifdef SUPPORT_PCRE16
1736 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1737 ph10 836 #else
1738     rc = PCRE_ERROR_BADMODE;
1739     #endif
1740     else
1741     #ifdef SUPPORT_PCRE8
1742     rc = pcre_fullinfo(re, study, option, ptr);
1743     #else
1744     rc = PCRE_ERROR_BADMODE;
1745     #endif
1746    
1747     if (rc < 0)
1748     {
1749     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1750     use_pcre16? "16" : "", option);
1751     if (rc == PCRE_ERROR_BADMODE)
1752     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1753     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1754     }
1755    
1756     return rc;
1757 nigel 43 }
1758    
1759    
1760    
1761 nigel 63 /*************************************************
1762 ph10 836 * Swap byte functions *
1763 nigel 75 *************************************************/
1764    
1765 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1766     value, respectively.
1767    
1768     Arguments:
1769     value any number
1770    
1771     Returns: the byte swapped value
1772     */
1773    
1774     static pcre_uint32
1775     swap_uint32(pcre_uint32 value)
1776 nigel 75 {
1777     return ((value & 0x000000ff) << 24) |
1778     ((value & 0x0000ff00) << 8) |
1779     ((value & 0x00ff0000) >> 8) |
1780 ph10 836 (value >> 24);
1781 nigel 75 }
1782    
1783 ph10 836 static pcre_uint16
1784     swap_uint16(pcre_uint16 value)
1785     {
1786     return (value >> 8) | (value << 8);
1787     }
1788 nigel 75
1789    
1790    
1791     /*************************************************
1792 ph10 836 * Flip bytes in a compiled pattern *
1793     *************************************************/
1794    
1795     /* This function is called if the 'F' option was present on a pattern that is
1796     to be written to a file. We flip the bytes of all the integer fields in the
1797     regex data block and the study block. In 16-bit mode this also flips relevant
1798     bytes in the pattern itself. This is to make it possible to test PCRE's
1799     ability to reload byte-flipped patterns, e.g. those compiled on a different
1800     architecture. */
1801    
1802     static void
1803     regexflip(pcre *ere, pcre_extra *extra)
1804     {
1805 zherczeg 852 REAL_PCRE *re = (REAL_PCRE *)ere;
1806 ph10 836 #ifdef SUPPORT_PCRE16
1807     int op;
1808     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1809     int length = re->name_count * re->name_entry_size;
1810     #ifdef SUPPORT_UTF
1811     BOOL utf = (re->options & PCRE_UTF16) != 0;
1812     BOOL utf16_char = FALSE;
1813     #endif /* SUPPORT_UTF */
1814     #endif /* SUPPORT_PCRE16 */
1815    
1816     /* Always flip the bytes in the main data block and study blocks. */
1817    
1818     re->magic_number = REVERSED_MAGIC_NUMBER;
1819     re->size = swap_uint32(re->size);
1820     re->options = swap_uint32(re->options);
1821     re->flags = swap_uint16(re->flags);
1822     re->top_bracket = swap_uint16(re->top_bracket);
1823     re->top_backref = swap_uint16(re->top_backref);
1824     re->first_char = swap_uint16(re->first_char);
1825     re->req_char = swap_uint16(re->req_char);
1826     re->name_table_offset = swap_uint16(re->name_table_offset);
1827     re->name_entry_size = swap_uint16(re->name_entry_size);
1828     re->name_count = swap_uint16(re->name_count);
1829    
1830     if (extra != NULL)
1831     {
1832     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1833     rsd->size = swap_uint32(rsd->size);
1834     rsd->flags = swap_uint32(rsd->flags);
1835     rsd->minlength = swap_uint32(rsd->minlength);
1836     }
1837    
1838     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1839     in the name table, if present, and then in the pattern itself. */
1840    
1841     #ifdef SUPPORT_PCRE16
1842     if (!use_pcre16) return;
1843    
1844     while(TRUE)
1845     {
1846     /* Swap previous characters. */
1847     while (length-- > 0)
1848     {
1849     *ptr = swap_uint16(*ptr);
1850     ptr++;
1851     }
1852     #ifdef SUPPORT_UTF
1853     if (utf16_char)
1854     {
1855     if ((ptr[-1] & 0xfc00) == 0xd800)
1856     {
1857     /* We know that there is only one extra character in UTF-16. */
1858     *ptr = swap_uint16(*ptr);
1859     ptr++;
1860     }
1861     }
1862     utf16_char = FALSE;
1863     #endif /* SUPPORT_UTF */
1864    
1865     /* Get next opcode. */
1866    
1867     length = 0;
1868     op = *ptr;
1869     *ptr++ = swap_uint16(op);
1870    
1871     switch (op)
1872     {
1873     case OP_END:
1874     return;
1875    
1876     #ifdef SUPPORT_UTF
1877     case OP_CHAR:
1878     case OP_CHARI:
1879     case OP_NOT:
1880     case OP_NOTI:
1881     case OP_STAR:
1882     case OP_MINSTAR:
1883     case OP_PLUS:
1884     case OP_MINPLUS:
1885     case OP_QUERY:
1886     case OP_MINQUERY:
1887     case OP_UPTO:
1888     case OP_MINUPTO:
1889     case OP_EXACT:
1890     case OP_POSSTAR:
1891     case OP_POSPLUS:
1892     case OP_POSQUERY:
1893     case OP_POSUPTO:
1894     case OP_STARI:
1895     case OP_MINSTARI:
1896     case OP_PLUSI:
1897     case OP_MINPLUSI:
1898     case OP_QUERYI:
1899     case OP_MINQUERYI:
1900     case OP_UPTOI:
1901     case OP_MINUPTOI:
1902     case OP_EXACTI:
1903     case OP_POSSTARI:
1904     case OP_POSPLUSI:
1905     case OP_POSQUERYI:
1906     case OP_POSUPTOI:
1907     case OP_NOTSTAR:
1908     case OP_NOTMINSTAR:
1909     case OP_NOTPLUS:
1910     case OP_NOTMINPLUS:
1911     case OP_NOTQUERY:
1912     case OP_NOTMINQUERY:
1913     case OP_NOTUPTO:
1914     case OP_NOTMINUPTO:
1915     case OP_NOTEXACT:
1916     case OP_NOTPOSSTAR:
1917     case OP_NOTPOSPLUS:
1918     case OP_NOTPOSQUERY:
1919     case OP_NOTPOSUPTO:
1920     case OP_NOTSTARI:
1921     case OP_NOTMINSTARI:
1922     case OP_NOTPLUSI:
1923     case OP_NOTMINPLUSI:
1924     case OP_NOTQUERYI:
1925     case OP_NOTMINQUERYI:
1926     case OP_NOTUPTOI:
1927     case OP_NOTMINUPTOI:
1928     case OP_NOTEXACTI:
1929     case OP_NOTPOSSTARI:
1930     case OP_NOTPOSPLUSI:
1931     case OP_NOTPOSQUERYI:
1932     case OP_NOTPOSUPTOI:
1933     if (utf) utf16_char = TRUE;
1934     #endif
1935     /* Fall through. */
1936    
1937     default:
1938     length = OP_lengths16[op] - 1;
1939     break;
1940    
1941     case OP_CLASS:
1942     case OP_NCLASS:
1943     /* Skip the character bit map. */
1944     ptr += 32/sizeof(pcre_uint16);
1945     length = 0;
1946     break;
1947    
1948     case OP_XCLASS:
1949 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1950     if (LINK_SIZE > 1)
1951     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1952     - (1 + LINK_SIZE + 1));
1953     else
1954     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1955    
1956 ph10 836 /* Reverse the size of the XCLASS instance. */
1957 zherczeg 839 *ptr = swap_uint16(*ptr);
1958 ph10 836 ptr++;
1959     if (LINK_SIZE > 1)
1960     {
1961 zherczeg 839 *ptr = swap_uint16(*ptr);
1962 ph10 836 ptr++;
1963     }
1964    
1965     op = *ptr;
1966     *ptr = swap_uint16(op);
1967 zherczeg 839 ptr++;
1968 ph10 836 if ((op & XCL_MAP) != 0)
1969     {
1970     /* Skip the character bit map. */
1971     ptr += 32/sizeof(pcre_uint16);
1972     length -= 32/sizeof(pcre_uint16);
1973     }
1974     break;
1975     }
1976     }
1977     /* Control should never reach here in 16 bit mode. */
1978     #endif /* SUPPORT_PCRE16 */
1979     }
1980    
1981    
1982    
1983     /*************************************************
1984 nigel 87 * Check match or recursion limit *
1985     *************************************************/
1986    
1987     static int
1988 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1989 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1990     int flag, unsigned long int *limit, int errnumber, const char *msg)
1991     {
1992     int count;
1993     int min = 0;
1994     int mid = 64;
1995     int max = -1;
1996    
1997     extra->flags |= flag;
1998    
1999     for (;;)
2000     {
2001     *limit = mid;
2002    
2003 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2004 nigel 87 use_offsets, use_size_offsets);
2005    
2006     if (count == errnumber)
2007     {
2008     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2009     min = mid;
2010     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2011     }
2012    
2013     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2014     count == PCRE_ERROR_PARTIAL)
2015     {
2016     if (mid == min + 1)
2017     {
2018     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2019     break;
2020     }
2021     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2022     max = mid;
2023     mid = (min + mid)/2;
2024     }
2025     else break; /* Some other error */
2026     }
2027    
2028     extra->flags &= ~flag;
2029     return count;
2030     }
2031    
2032    
2033    
2034     /*************************************************
2035 ph10 227 * Case-independent strncmp() function *
2036     *************************************************/
2037    
2038     /*
2039     Arguments:
2040     s first string
2041     t second string
2042     n number of characters to compare
2043    
2044     Returns: < 0, = 0, or > 0, according to the comparison
2045     */
2046    
2047     static int
2048 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2049 ph10 227 {
2050     while (n--)
2051     {
2052     int c = tolower(*s++) - tolower(*t++);
2053     if (c) return c;
2054     }
2055     return 0;
2056     }
2057    
2058    
2059    
2060     /*************************************************
2061 nigel 91 * Check newline indicator *
2062     *************************************************/
2063    
2064 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2065     a message and return 0 if there is no match.
2066 nigel 91
2067     Arguments:
2068     p points after the leading '<'
2069     f file for error message
2070    
2071     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2072     */
2073    
2074     static int
2075 ph10 836 check_newline(pcre_uint8 *p, FILE *f)
2076 nigel 91 {
2077 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2078     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2079     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2080     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2081     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2082     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2083     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2084 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
2085     return 0;
2086     }
2087    
2088    
2089    
2090     /*************************************************
2091 nigel 93 * Usage function *
2092     *************************************************/
2093    
2094     static void
2095     usage(void)
2096     {
2097 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2098     printf("Input and output default to stdin and stdout.\n");
2099     #ifdef SUPPORT_LIBREADLINE
2100     printf("If input is a terminal, readline() is used to read from it.\n");
2101     #else
2102     printf("This version of pcretest is not linked with readline().\n");
2103     #endif
2104     printf("\nOptions:\n");
2105 ph10 836 #ifdef SUPPORT_PCRE16
2106 ph10 862 printf(" -16 use the 16-bit library\n");
2107 ph10 836 #endif
2108 ph10 862 printf(" -b show compiled code\n");
2109 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2110 ph10 836 printf(" -C arg show a specific compile-time option\n");
2111     printf(" and exit with its value. The arg can be:\n");
2112     printf(" linksize internal link size [2, 3, 4]\n");
2113     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2114     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2115     printf(" utf Unicode Transformation Format supported [0, 1]\n");
2116     printf(" ucp Unicode Properties supported [0, 1]\n");
2117     printf(" jit Just-in-time compiler supported [0, 1]\n");
2118 zherczeg 839 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2119 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2120     #if !defined NODFA
2121     printf(" -dfa force DFA matching for all subjects\n");
2122     #endif
2123     printf(" -help show usage information\n");
2124     printf(" -i show information about compiled patterns\n"
2125 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2126 nigel 93 " -m output memory used information\n"
2127     " -o <n> set size of offsets vector to <n>\n");
2128     #if !defined NOPOSIX
2129     printf(" -p use POSIX interface\n");
2130     #endif
2131     printf(" -q quiet: do not output PCRE version number at start\n");
2132     printf(" -S <n> set stack size to <n> megabytes\n");
2133 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2134     " -s+ force each pattern to be studied, using JIT if available\n"
2135 nigel 93 " -t time compilation and execution\n");
2136     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2137     printf(" -tm time execution (matching) only\n");
2138     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2139     }
2140    
2141    
2142    
2143     /*************************************************
2144 nigel 63 * Main Program *
2145     *************************************************/
2146 nigel 43
2147 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2148     consist of a regular expression, in delimiters and optionally followed by
2149     options, followed by a set of test data, terminated by an empty line. */
2150    
2151     int main(int argc, char **argv)
2152     {
2153     FILE *infile = stdin;
2154 ph10 836 const char *version;
2155 nigel 3 int options = 0;
2156     int study_options = 0;
2157 ph10 386 int default_find_match_limit = FALSE;
2158 nigel 3 int op = 1;
2159     int timeit = 0;
2160 nigel 93 int timeitm = 0;
2161 nigel 3 int showinfo = 0;
2162 nigel 31 int showstore = 0;
2163 ph10 667 int force_study = -1;
2164     int force_study_options = 0;
2165 nigel 87 int quiet = 0;
2166 nigel 53 int size_offsets = 45;
2167     int size_offsets_max;
2168 nigel 77 int *offsets = NULL;
2169 nigel 53 #if !defined NOPOSIX
2170 nigel 3 int posix = 0;
2171 nigel 53 #endif
2172 nigel 3 int debug = 0;
2173 nigel 11 int done = 0;
2174 nigel 77 int all_use_dfa = 0;
2175     int yield = 0;
2176 nigel 91 int stack_size;
2177 nigel 3
2178 ph10 667 pcre_jit_stack *jit_stack = NULL;
2179    
2180 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
2181     substring names, each list itself being terminated by an empty name. Assume
2182     that 1024 is plenty long enough for the few names we'll be testing. It is
2183     easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2184 ph10 881 for the actual memory, to ensure alignment. */
2185 ph10 667
2186 ph10 836 pcre_uint16 copynames[1024];
2187     pcre_uint16 getnames[1024];
2188 nigel 69
2189 ph10 881 #ifdef SUPPORT_PCRE16
2190 ph10 836 pcre_uint16 *cn16ptr;
2191     pcre_uint16 *gn16ptr;
2192 ph10 881 #endif
2193 nigel 91
2194 ph10 881 #ifdef SUPPORT_PCRE8
2195 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2196     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2197     pcre_uint8 *cn8ptr;
2198     pcre_uint8 *gn8ptr;
2199 ph10 881 #endif
2200 nigel 91
2201 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
2202     debugging. They grow automatically when very long lines are read. The 16-bit
2203     buffer (buffer16) is obtained only if needed. */
2204 nigel 69
2205 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
2206     dbuffer = (pcre_uint8 *)malloc(buffer_size);
2207     pbuffer = (pcre_uint8 *)malloc(buffer_size);
2208 nigel 69
2209 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
2210 nigel 3
2211 nigel 93 outfile = stdout;
2212    
2213     /* The following _setmode() stuff is some Windows magic that tells its runtime
2214     library to translate CRLF into a single LF character. At least, that's what
2215     I've been told: never having used Windows I take this all on trust. Originally
2216     it set 0x8000, but then I was advised that _O_BINARY was better. */
2217    
2218 nigel 75 #if defined(_WIN32) || defined(WIN32)
2219 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
2220     #endif
2221 nigel 75
2222 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
2223     same answer. We just need to ensure that we call one that is available. */
2224    
2225     #ifdef SUPPORT_PCRE8
2226     version = pcre_version();
2227     #else
2228     version = pcre16_version();
2229     #endif
2230    
2231 nigel 3 /* Scan options */
2232    
2233     while (argc > 1 && argv[op][0] == '-')
2234     {
2235 ph10 836 pcre_uint8 *endptr;
2236 nigel 53
2237 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2238 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2239 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
2240 ph10 667 {
2241     force_study = 1;
2242     force_study_options = PCRE_STUDY_JIT_COMPILE;
2243 ph10 691 }
2244 ph10 836 else if (strcmp(argv[op], "-16") == 0)
2245     {
2246     #ifdef SUPPORT_PCRE16
2247     use_pcre16 = 1;
2248     #else
2249     printf("** This version of PCRE was built without 16-bit support\n");
2250     exit(1);
2251     #endif
2252     }
2253 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2254 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2255 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2256     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2257 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2258 nigel 79 #if !defined NODFA
2259 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2260 nigel 79 #endif
2261 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2262 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2263 nigel 65 *endptr == 0))
2264 nigel 53 {
2265     op++;
2266     argc--;
2267     }
2268 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2269     {
2270     int both = argv[op][2] == 0;
2271     int temp;
2272 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2273 nigel 93 *endptr == 0))
2274     {
2275     timeitm = temp;
2276     op++;
2277     argc--;
2278     }
2279     else timeitm = LOOPREPEAT;
2280     if (both) timeit = timeitm;
2281     }
2282 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2283 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2284 nigel 91 *endptr == 0))
2285     {
2286 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2287 nigel 91 printf("PCRE: -S not supported on this OS\n");
2288     exit(1);
2289     #else
2290     int rc;
2291     struct rlimit rlim;
2292     getrlimit(RLIMIT_STACK, &rlim);
2293     rlim.rlim_cur = stack_size * 1024 * 1024;
2294     rc = setrlimit(RLIMIT_STACK, &rlim);
2295     if (rc != 0)
2296     {
2297     printf("PCRE: setrlimit() failed with error %d\n", rc);
2298     exit(1);
2299     }
2300     op++;
2301     argc--;
2302     #endif
2303     }
2304 nigel 53 #if !defined NOPOSIX
2305 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2306 nigel 53 #endif
2307 nigel 63 else if (strcmp(argv[op], "-C") == 0)
2308     {
2309     int rc;
2310 ph10 392 unsigned long int lrc;
2311 ph10 836
2312     if (argc > 2)
2313     {
2314     if (strcmp(argv[op + 1], "linksize") == 0)
2315     {
2316     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2317     printf("%d\n", rc);
2318     yield = rc;
2319     goto EXIT;
2320     }
2321     if (strcmp(argv[op + 1], "pcre8") == 0)
2322     {
2323     #ifdef SUPPORT_PCRE8
2324     printf("1\n");
2325     yield = 1;
2326     #else
2327     printf("0\n");
2328     yield = 0;
2329     #endif
2330     goto EXIT;
2331     }
2332     if (strcmp(argv[op + 1], "pcre16") == 0)
2333     {
2334     #ifdef SUPPORT_PCRE16
2335     printf("1\n");
2336     yield = 1;
2337     #else
2338     printf("0\n");
2339     yield = 0;
2340     #endif
2341     goto EXIT;
2342     }
2343     if (strcmp(argv[op + 1], "utf") == 0)
2344     {
2345     #ifdef SUPPORT_PCRE8
2346     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2347     printf("%d\n", rc);
2348     yield = rc;
2349     #else
2350     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2351     printf("%d\n", rc);
2352     yield = rc;
2353     #endif
2354     goto EXIT;
2355     }
2356     if (strcmp(argv[op + 1], "ucp") == 0)
2357     {
2358     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2359     printf("%d\n", rc);
2360     yield = rc;
2361     goto EXIT;
2362     }
2363     if (strcmp(argv[op + 1], "jit") == 0)
2364     {
2365     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2366     printf("%d\n", rc);
2367     yield = rc;
2368     goto EXIT;
2369     }
2370 ph10 838 if (strcmp(argv[op + 1], "newline") == 0)
2371 ph10 842 {
2372 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2373     /* Note that these values are always the ASCII values, even
2374     in EBCDIC environments. CR is 13 and NL is 10. */
2375     printf("%s\n", (rc == 13)? "CR" :
2376     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2377     (rc == -2)? "ANYCRLF" :
2378     (rc == -1)? "ANY" : "???");
2379     goto EXIT;
2380 ph10 842 }
2381 ph10 838 printf("Unknown -C option: %s\n", argv[op + 1]);
2382 ph10 836 goto EXIT;
2383     }
2384    
2385     printf("PCRE version %s\n", version);
2386 nigel 63 printf("Compiled with\n");
2387 ph10 836
2388     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2389     are set, either both UTFs are supported or both are not supported. */
2390    
2391     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2392     printf(" 8-bit and 16-bit support\n");
2393 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2394 ph10 836 if (rc)
2395     printf(" UTF-8 and UTF-16 support\n");
2396     else
2397     printf(" No UTF-8 or UTF-16 support\n");
2398     #elif defined SUPPORT_PCRE8
2399     printf(" 8-bit support only\n");
2400     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2401 nigel 63 printf(" %sUTF-8 support\n", rc? "" : "No ");
2402 ph10 836 #else
2403     printf(" 16-bit support only\n");
2404     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2405     printf(" %sUTF-16 support\n", rc? "" : "No ");
2406     #endif
2407    
2408     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
2410 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2411 ph10 674 if (rc)
2412 ph10 890 {
2413     const char *arch;
2414 ph10 908 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2415 ph10 890 printf(" Just-in-time compiler support: %s\n", arch);
2416 ph10 903 }
2417 ph10 674 else
2418     printf(" No just-in-time compiler support\n");
2419 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2420 ph10 391 /* Note that these values are always the ASCII values, even
2421 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
2422 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2423     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2424 ph10 150 (rc == -2)? "ANYCRLF" :
2425 nigel 93 (rc == -1)? "ANY" : "???");
2426 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2427 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2428     "all Unicode newlines");
2429 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2430 nigel 63 printf(" Internal link size = %d\n", rc);
2431 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2432 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
2433 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2434 ph10 376 printf(" Default match limit = %ld\n", lrc);
2435 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2436 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
2437 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2438 ph10 895 printf(" Match recursion uses %s", rc? "stack" : "heap");
2439     if (showstore)
2440 ph10 903 {
2441 ph10 901 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2442 ph10 903 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2443 ph10 895 }
2444 ph10 903 printf("\n");
2445 ph10 121 goto EXIT;
2446 nigel 63 }
2447 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
2448     strcmp(argv[op], "--help") == 0)
2449     {
2450     usage();
2451     goto EXIT;
2452     }
2453 nigel 3 else
2454     {
2455 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
2456 nigel 93 usage();
2457 nigel 77 yield = 1;
2458     goto EXIT;
2459 nigel 3 }
2460     op++;
2461     argc--;
2462     }
2463    
2464 nigel 53 /* Get the store for the offsets vector, and remember what it was */
2465    
2466     size_offsets_max = size_offsets;
2467 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2468 nigel 53 if (offsets == NULL)
2469     {
2470     printf("** Failed to get %d bytes of memory for offsets vector\n",
2471 ph10 151 (int)(size_offsets_max * sizeof(int)));
2472 nigel 77 yield = 1;
2473     goto EXIT;
2474 nigel 53 }
2475    
2476 nigel 3 /* Sort out the input and output files */
2477    
2478     if (argc > 1)
2479     {
2480 nigel 93 infile = fopen(argv[op], INPUT_MODE);
2481 nigel 3 if (infile == NULL)
2482     {
2483     printf("** Failed to open %s\n", argv[op]);
2484 nigel 77 yield = 1;
2485     goto EXIT;
2486 nigel 3 }
2487     }
2488    
2489     if (argc > 2)
2490     {
2491 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
2492 nigel 3 if (outfile == NULL)
2493     {
2494     printf("** Failed to open %s\n", argv[op+1]);
2495 nigel 77 yield = 1;
2496     goto EXIT;
2497 nigel 3 }
2498     }
2499    
2500     /* Set alternative malloc function */
2501    
2502 ph10 836 #ifdef SUPPORT_PCRE8
2503 nigel 3 pcre_malloc = new_malloc;
2504 nigel 73 pcre_free = new_free;
2505     pcre_stack_malloc = stack_malloc;
2506     pcre_stack_free = stack_free;
2507 ph10 836 #endif
2508 nigel 3
2509 ph10 836 #ifdef SUPPORT_PCRE16
2510     pcre16_malloc = new_malloc;
2511     pcre16_free = new_free;
2512     pcre16_stack_malloc = stack_malloc;
2513     pcre16_stack_free = stack_free;
2514     #endif
2515    
2516 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
2517 nigel 3
2518 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2519 nigel 3
2520     /* Main loop */
2521    
2522 nigel 11 while (!done)
2523 nigel 3 {
2524     pcre *re = NULL;
2525     pcre_extra *extra = NULL;
2526 nigel 37
2527     #if !defined NOPOSIX /* There are still compilers that require no indent */
2528 nigel 3 regex_t preg;
2529 nigel 45 int do_posix = 0;
2530 nigel 37 #endif
2531    
2532 nigel 7 const char *error;
2533 ph10 836 pcre_uint8 *markptr;
2534     pcre_uint8 *p, *pp, *ppp;
2535     pcre_uint8 *to_file = NULL;
2536     const pcre_uint8 *tables = NULL;
2537 zherczeg 847 unsigned long int get_options;
2538 nigel 75 unsigned long int true_size, true_study_size = 0;
2539     size_t size, regex_gotten_store;
2540 ph10 654 int do_allcaps = 0;
2541 ph10 512 int do_mark = 0;
2542 nigel 3 int do_study = 0;
2543 ph10 654 int no_force_study = 0;
2544 nigel 25 int do_debug = debug;
2545 nigel 35 int do_G = 0;
2546     int do_g = 0;
2547 nigel 25 int do_showinfo = showinfo;
2548 nigel 35 int do_showrest = 0;
2549 ph10 616 int do_showcaprest = 0;
2550 nigel 75 int do_flip = 0;
2551 nigel 93 int erroroffset, len, delimiter, poffset;
2552 nigel 3
2553 ph10 836 use_utf = 0;
2554 ph10 211 debug_lengths = 1;
2555 nigel 63
2556 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2557 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2558 nigel 63 fflush(outfile);
2559 nigel 3
2560     p = buffer;
2561     while (isspace(*p)) p++;
2562     if (*p == 0) continue;
2563    
2564 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
2565 nigel 3
2566 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2567     {
2568 zherczeg 841 pcre_uint32 magic;
2569 ph10 836 pcre_uint8 sbuf[8];
2570 nigel 75 FILE *f;
2571    
2572     p++;
2573 zherczeg 839 if (*p == '!')
2574     {
2575     do_debug = TRUE;
2576     do_showinfo = TRUE;
2577     p++;
2578     }
2579    
2580 nigel 75 pp = p + (int)strlen((char *)p);
2581     while (isspace(pp[-1])) pp--;
2582     *pp = 0;
2583    
2584     f = fopen((char *)p, "rb");
2585     if (f == NULL)
2586     {
2587     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2588     continue;
2589     }
2590    
2591 zherczeg 839 first_gotten_store = 0;
2592 nigel 75 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2593    
2594     true_size =
2595     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2596     true_study_size =
2597     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2598    
2599 zherczeg 852 re = (pcre *)new_malloc(true_size);
2600 ph10 836 regex_gotten_store = first_gotten_store;
2601 nigel 75
2602     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2603    
2604 zherczeg 852 magic = ((REAL_PCRE *)re)->magic_number;
2605 nigel 75 if (magic != MAGIC_NUMBER)
2606     {
2607 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
2608 nigel 75 {
2609     do_flip = 1;
2610     }
2611     else
2612     {
2613     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2614     fclose(f);
2615     continue;
2616     }
2617     }
2618    
2619 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
2620 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2621 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2622 nigel 75
2623 ph10 612 /* Now see if there is any following study data. */
2624 nigel 75
2625     if (true_study_size != 0)
2626     {
2627     pcre_study_data *psd;
2628    
2629     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2630     extra->flags = PCRE_EXTRA_STUDY_DATA;
2631    
2632     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2633     extra->study_data = psd;
2634    
2635     if (fread(psd, 1, true_study_size, f) != true_study_size)
2636     {
2637     FAIL_READ:
2638     fprintf(outfile, "Failed to read data from %s\n", p);
2639 ph10 836 if (extra != NULL)
2640     {
2641     PCRE_FREE_STUDY(extra);
2642     }
2643 nigel 75 if (re != NULL) new_free(re);
2644     fclose(f);
2645     continue;
2646     }
2647     fprintf(outfile, "Study data loaded from %s\n", p);
2648     do_study = 1; /* To get the data output if requested */
2649     }
2650     else fprintf(outfile, "No study data\n");
2651    
2652 ph10 836 /* Flip the necessary bytes. */
2653     if (do_flip)
2654     {
2655 zherczeg 839 int rc;
2656     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2657     if (rc == PCRE_ERROR_BADMODE)
2658     {
2659     /* Simulate the result of the function call below. */
2660     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2661     use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2662     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2663     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2664     continue;
2665     }
2666 ph10 836 }
2667    
2668     /* Need to know if UTF-8 for printing data strings. */
2669    
2670     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2671     use_utf = (get_options & PCRE_UTF8) != 0;
2672    
2673 nigel 75 fclose(f);
2674     goto SHOW_INFO;
2675     }
2676    
2677     /* In-line pattern (the usual case). Get the delimiter and seek the end of
2678 ph10 836 the pattern; if it isn't complete, read more. */
2679 nigel 75
2680 nigel 3 delimiter = *p++;
2681    
2682 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
2683 nigel 3 {
2684 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2685 nigel 3 goto SKIP_DATA;
2686     }
2687    
2688     pp = p;
2689 ph10 530 poffset = (int)(p - buffer);
2690 nigel 3
2691     for(;;)
2692     {
2693 nigel 29 while (*pp != 0)
2694     {
2695     if (*pp == '\\' && pp[1] != 0) pp++;
2696     else if (*pp == delimiter) break;
2697     pp++;
2698     }
2699 nigel 3 if (*pp != 0) break;
2700 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2701 nigel 3 {
2702     fprintf(outfile, "** Unexpected EOF\n");
2703 nigel 11 done = 1;
2704     goto CONTINUE;
2705 nigel 3 }
2706 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2707 nigel 3 }
2708    
2709 nigel 93 /* The buffer may have moved while being extended; reset the start of data
2710     pointer to the correct relative point in the buffer. */
2711    
2712     p = buffer + poffset;
2713    
2714 nigel 29 /* If the first character after the delimiter is backslash, make
2715     the pattern end with backslash. This is purely to provide a way
2716     of testing for the error message when a pattern ends with backslash. */
2717    
2718     if (pp[1] == '\\') *pp++ = '\\';
2719    
2720 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2721     for callouts. */
2722 nigel 3
2723     *pp++ = 0;
2724 nigel 75 strcpy((char *)pbuffer, (char *)p);
2725 nigel 3
2726     /* Look for options after final delimiter */
2727    
2728     options = 0;
2729 ph10 836 study_options = 0;
2730 nigel 31 log_store = showstore; /* default from command line */
2731    
2732 nigel 3 while (*pp != 0)
2733     {
2734     switch (*pp++)
2735     {
2736 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
2737 nigel 35 case 'g': do_g = 1; break;
2738 nigel 3 case 'i': options |= PCRE_CASELESS; break;
2739     case 'm': options |= PCRE_MULTILINE; break;
2740     case 's': options |= PCRE_DOTALL; break;
2741     case 'x': options |= PCRE_EXTENDED; break;
2742 nigel 25
2743 ph10 616 case '+':
2744 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2745 ph10 616 break;
2746 ph10 654
2747     case '=': do_allcaps = 1; break;
2748 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
2749 nigel 93 case 'B': do_debug = 1; break;
2750 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2751 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
2752 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2753 nigel 75 case 'F': do_flip = 1; break;
2754 nigel 35 case 'G': do_G = 1; break;
2755 nigel 25 case 'I': do_showinfo = 1; break;
2756 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
2757 ph10 512 case 'K': do_mark = 1; break;
2758 nigel 31 case 'M': log_store = 1; break;
2759 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2760 nigel 37
2761     #if !defined NOPOSIX
2762 nigel 3 case 'P': do_posix = 1; break;
2763 nigel 37 #endif
2764    
2765 ph10 654 case 'S':
2766 ph10 691 if (do_study == 0)
2767 ph10 612 {
2768 ph10 691 do_study = 1;
2769 ph10 667 if (*pp == '+')
2770     {
2771     study_options |= PCRE_STUDY_JIT_COMPILE;
2772 ph10 691 pp++;
2773     }
2774     }
2775 ph10 667 else
2776     {
2777 ph10 612 do_study = 0;
2778     no_force_study = 1;
2779 ph10 654 }
2780 ph10 612 break;
2781    
2782 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2783 ph10 535 case 'W': options |= PCRE_UCP; break;
2784 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2785 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2786 ph10 126 case 'Z': debug_lengths = 0; break;
2787 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2788 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2789 ph10 545
2790 ph10 541 case 'T':
2791     switch (*pp++)
2792     {
2793     case '0': tables = tables0; break;
2794     case '1': tables = tables1; break;
2795 ph10 545
2796 ph10 541 case '\r':
2797     case '\n':
2798 ph10 545 case ' ':
2799     case 0:
2800 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2801 ph10 545 goto SKIP_DATA;
2802    
2803     default:
2804 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2805 ph10 545 goto SKIP_DATA;
2806 ph10 541 }
2807 ph10 545 break;
2808 nigel 25
2809     case 'L':
2810     ppp = pp;
2811 nigel 93 /* The '\r' test here is so that it works on Windows. */
2812     /* The '0' test is just in case this is an unterminated line. */
2813     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2814 nigel 25 *ppp = 0;
2815     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2816     {
2817     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2818     goto SKIP_DATA;
2819     }
2820 nigel 93 locale_set = 1;
2821 ph10 836 tables = PCRE_MAKETABLES;
2822 nigel 25 pp = ppp;
2823     break;
2824    
2825 nigel 75 case '>':
2826     to_file = pp;
2827     while (*pp != 0) pp++;
2828     while (isspace(pp[-1])) pp--;
2829     *pp = 0;
2830     break;
2831    
2832 nigel 91 case '<':
2833     {
2834 ph10 836 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2835 ph10 336 {
2836     options |= PCRE_JAVASCRIPT_COMPAT;
2837 ph10 345 pp += 3;
2838 ph10 336 }
2839     else
2840 ph10 345 {
2841 ph10 336 int x = check_newline(pp, outfile);
2842     if (x == 0) goto SKIP_DATA;
2843     options |= x;
2844     while (*pp++ != '>');
2845 ph10 345 }
2846 nigel 91 }
2847     break;
2848    
2849 nigel 77 case '\r': /* So that it works in Windows */
2850     case '\n':
2851     case ' ':
2852     break;
2853 nigel 75
2854 nigel 3 default:
2855     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2856     goto SKIP_DATA;
2857     }
2858     }
2859    
2860 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2861 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2862 ph10 836 local character tables. Neither does it have 16-bit support. */
2863 nigel 3
2864 nigel 37 #if !defined NOPOSIX
2865 nigel 3 if (posix || do_posix)
2866     {
2867     int rc;
2868     int cflags = 0;
2869 nigel 75
2870 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2871     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2872 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2873 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2874     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2875 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2876 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2877 nigel 87
2878 ph10 836 first_gotten_store = 0;
2879 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2880    
2881     /* Compilation failed; go back for another re, skipping to blank line
2882     if non-interactive. */
2883    
2884     if (rc != 0)
2885     {
2886 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2887 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2888     goto SKIP_DATA;
2889     }
2890     }
2891    
2892     /* Handle compiling via the native interface */
2893    
2894     else
2895 nigel 37 #endif /* !defined NOPOSIX */
2896    
2897 nigel 3 {
2898 ph10 836 /* In 16-bit mode, convert the input. */
2899    
2900     #ifdef SUPPORT_PCRE16
2901     if (use_pcre16)
2902     {
2903     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2904     {
2905     case -1:
2906     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2907     "converted to UTF-16\n");
2908     goto SKIP_DATA;
2909    
2910     case -2:
2911     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2912     "cannot be converted to UTF-16\n");
2913     goto SKIP_DATA;
2914 ph10 842
2915 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2916     fprintf(outfile, "**Failed: character value greater than 0xffff "
2917     "cannot be converted to 16-bit in non-UTF mode\n");
2918 ph10 842 goto SKIP_DATA;
2919 ph10 836
2920     default:
2921     break;
2922     }
2923     p = (pcre_uint8 *)buffer16;
2924     }
2925     #endif
2926    
2927     /* Compile many times when timing */
2928    
2929 nigel 93 if (timeit > 0)
2930 nigel 3 {
2931     register int i;
2932     clock_t time_taken;
2933     clock_t start_time = clock();
2934 nigel 93 for (i = 0; i < timeit; i++)
2935 nigel 3 {
2936 ph10 836 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2937 nigel 3 if (re != NULL) free(re);
2938     }
2939     time_taken = clock() - start_time;
2940 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2941     (((double)time_taken * 1000.0) / (double)timeit) /
2942 nigel 63 (double)CLOCKS_PER_SEC);
2943 nigel 3 }
2944    
2945 ph10 836 first_gotten_store = 0;
2946     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2947 nigel 3
2948     /* Compilation failed; go back for another re, skipping to blank line
2949     if non-interactive. */
2950    
2951     if (re == NULL)
2952     {
2953     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2954     SKIP_DATA:
2955     if (infile != stdin)
2956     {
2957     for (;;)
2958     {
2959 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2960 nigel 11 {
2961     done = 1;
2962     goto CONTINUE;
2963     }
2964 nigel 3 len = (int)strlen((char *)buffer);
2965     while (len > 0 && isspace(buffer[len-1])) len--;
2966     if (len == 0) break;
2967     }
2968     fprintf(outfile, "\n");
2969     }
2970 nigel 25 goto CONTINUE;
2971 nigel 3 }
2972 ph10 416
2973     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2974     within the regex; check for this so that we know how to process the data
2975 ph10 412 lines. */
2976 ph10 416
2977 ph10 836 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2978     goto SKIP_DATA;
2979     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2980 nigel 3
2981 ph10 836 /* Extract the size for possible writing before possibly flipping it,
2982     and remember the store that was got. */
2983 nigel 3
2984 zherczeg 852 true_size = ((REAL_PCRE *)re)->size;
2985 ph10 836 regex_gotten_store = first_gotten_store;
2986    
2987     /* Output code size information if requested */
2988    
2989 nigel 63 if (log_store)
2990     fprintf(outfile, "Memory allocation (code space): %d\n",
2991 ph10 836 (int)(first_gotten_store -
2992 zherczeg 852 sizeof(REAL_PCRE) -
2993     ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2994 nigel 63
2995 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2996 ph10 654 help with the matching, unless the pattern has the SS option, which
2997 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2998     never sensible). */
2999 nigel 75
3000 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
3001 nigel 75 {
3002 nigel 93 if (timeit > 0)
3003 nigel 75 {
3004     register int i;
3005     clock_t time_taken;
3006     clock_t start_time = clock();
3007 nigel 93 for (i = 0; i < timeit; i++)
3008 ph10 836 {
3009     PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3010     }
3011 nigel 75 time_taken = clock() - start_time;
3012 ph10 836 if (extra != NULL)
3013     {
3014     PCRE_FREE_STUDY(extra);
3015     }
3016 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
3017     (((double)time_taken * 1000.0) / (double)timeit) /
3018 nigel 75 (double)CLOCKS_PER_SEC);
3019     }
3020 ph10 836 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3021 nigel 75 if (error != NULL)
3022     fprintf(outfile, "Failed to study: %s\n", error);
3023     else if (extra != NULL)
3024 ph10 836 {
3025 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3026 ph10 836 if (log_store)
3027     {
3028     size_t jitsize;
3029     if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3030     jitsize != 0)
3031     fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3032     }
3033     }
3034 nigel 75 }
3035 ph10 788
3036 ph10 510 /* If /K was present, we set up for handling MARK data. */
3037 ph10 512
3038 ph10 510 if (do_mark)
3039     {
3040     if (extra == NULL)
3041     {
3042     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3043     extra->flags = 0;
3044     }
3045 ph10 512 extra->mark = &markptr;
3046 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
3047 ph10 512 }
3048 nigel 75
3049 ph10 836 /* Extract and display information from the compiled data if required. */
3050 nigel 75
3051     SHOW_INFO:
3052    
3053 nigel 93 if (do_debug)
3054     {
3055     fprintf(outfile, "------------------------------------------------------------------\n");
3056 ph10 836 PCRE_PRINTINT(re, outfile, debug_lengths);
3057 nigel 93 }
3058 ph10 416
3059 ph10 412 /* We already have the options in get_options (see above) */
3060 nigel 93
3061 nigel 25 if (do_showinfo)
3062 nigel 3 {
3063 ph10 412 unsigned long int all_options;
3064 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3065 ph10 227 hascrorlf;
3066 nigel 63 int nameentrysize, namecount;
3067 ph10 836 const pcre_uint8 *nametable;
3068 nigel 3
3069 ph10 836 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3070     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3071     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3072     new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3073     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3074     new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3075     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3076     new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3077     new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3078     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3079     new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3080     != 0)
3081     goto SKIP_DATA;
3082 nigel 43
3083 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
3084 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3085 nigel 77 (int)size, (int)regex_gotten_store);
3086 nigel 43
3087     fprintf(outfile, "Capturing subpattern count = %d\n", count);
3088     if (backrefmax > 0)
3089     fprintf(outfile, "Max back reference = %d\n", backrefmax);
3090 nigel 63
3091     if (namecount > 0)
3092     {
3093     fprintf(outfile, "Named capturing subpatterns:\n");
3094     while (namecount-- > 0)
3095     {
3096 ph10 836 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3097     int imm2_size = use_pcre16 ? 1 : 2;
3098     #else
3099     int imm2_size = IMM2_SIZE;
3100     #endif
3101     int length = (int)STRLEN(nametable + imm2_size);
3102     fprintf(outfile, " ");
3103     PCHARSV(nametable, imm2_size, length, outfile);
3104     while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3105     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3106     fprintf(outfile, "%3d\n", use_pcre16?
3107     (int)(((PCRE_SPTR16)nametable)[0])
3108     :((int)nametable[0] << 8) | (int)nametable[1]);
3109     nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3110     #else
3111     fprintf(outfile, "%3d\n", GET2(nametable, 0));
3112     #ifdef SUPPORT_PCRE8
3113 nigel 63 nametable += nameentrysize;
3114 ph10 836 #else
3115     nametable += nameentrysize * 2;
3116     #endif
3117     #endif
3118 nigel 63 }
3119     }
3120 ph10 172
3121 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3122 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3123 nigel 63
3124 zherczeg 852 all_options = ((REAL_PCRE *)re)->options;
3125 ph10 836 if (do_flip) all_options = swap_uint32(all_options);
3126 nigel 75
3127 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
3128 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3129 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3130     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3131     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3132     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3133 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3134 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3135 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3136     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3137 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3138     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3139     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3140 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3141 ph10 836 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3142 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3143 ph10 836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3144 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3145 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3146 ph10 172
3147 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3148 nigel 43
3149 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
3150 nigel 91 {
3151     case PCRE_NEWLINE_CR:
3152     fprintf(outfile, "Forced newline sequence: CR\n");
3153     break;
3154 nigel 43
3155 nigel 91 case PCRE_NEWLINE_LF:
3156     fprintf(outfile, "Forced newline sequence: LF\n");
3157     break;
3158    
3159     case PCRE_NEWLINE_CRLF:
3160     fprintf(outfile, "Forced newline sequence: CRLF\n");
3161     break;
3162    
3163 ph10 149 case PCRE_NEWLINE_ANYCRLF:
3164     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3165     break;
3166    
3167 nigel 93 case PCRE_NEWLINE_ANY:
3168     fprintf(outfile, "Forced newline sequence: ANY\n");
3169     break;
3170    
3171 nigel 91 default:
3172     break;
3173     }
3174    
3175 nigel 43 if (first_char == -1)
3176     {
3177 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
3178 nigel 43 }
3179     else if (first_char < 0)
3180     {
3181     fprintf(outfile, "No first char\n");
3182     }
3183     else
3184     {
3185 ph10 836 const char *caseless =
3186 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3187 nigel 63 "" : " (caseless)";
3188 ph10 836
3189     if (PRINTOK(first_char))
3190     fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3191 nigel 3 else
3192 ph10 836 {
3193     fprintf(outfile, "First char = ");
3194     pchar(first_char, outfile);
3195     fprintf(outfile, "%s\n", caseless);
3196     }
3197 nigel 43 }
3198 nigel 37
3199 nigel 43 if (need_char < 0)
3200     {
3201     fprintf(outfile, "No need char\n");
3202 nigel 3 }
3203 nigel 43 else
3204     {
3205 ph10 836 const char *caseless =
3206 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3207 nigel 63 "" : " (caseless)";
3208 ph10 836
3209     if (PRINTOK(need_char))
3210     fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3211 nigel 43 else
3212 ph10 836 {
3213     fprintf(outfile, "Need char = ");
3214     pchar(need_char, outfile);
3215     fprintf(outfile, "%s\n", caseless);
3216     }
3217 nigel 43 }
3218 nigel 75
3219     /* Don't output study size; at present it is in any case a fixed
3220     value, but it varies, depending on the computer architecture, and
3221     so messes up the test suite. (And with the /F option, it might be
3222 ph10 654 flipped.) If study was forced by an external -s, don't show this
3223 ph10 612 information unless -i or -d was also present. This means that, except
3224     when auto-callouts are involved, the output from runs with and without
3225     -s should be identical. */
3226 nigel 75
3227 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3228 nigel 75 {
3229     if (extra == NULL)
3230     fprintf(outfile, "Study returned NULL\n");
3231     else
3232     {
3233 ph10 836 pcre_uint8 *start_bits = NULL;
3234 ph10 455 int minlength;
3235 ph10 461
3236 ph10 836 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3237     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3238 ph10 461
3239 ph10 836 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3240 nigel 75 {
3241 ph10 836 if (start_bits == NULL)
3242     fprintf(outfile, "No set of starting bytes\n");
3243     else
3244 nigel 75 {
3245 ph10 836 int i;
3246     int c = 24;
3247     fprintf(outfile, "Starting byte set: ");
3248     for (i = 0; i < 256; i++)
3249 nigel 75 {
3250 ph10 836 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3251 nigel 75 {
3252 ph10 836 if (c > 75)
3253     {
3254     fprintf(outfile, "\n ");
3255     c = 2;
3256     }
3257     if (PRINTOK(i) && i != ' ')
3258     {
3259     fprintf(outfile, "%c ", i);
3260     c += 2;
3261     }
3262     else
3263     {
3264     fprintf(outfile, "\\x%02x ", i);
3265     c += 5;
3266     }
3267 nigel 75 }
3268     }
3269 ph10 836 fprintf(outfile, "\n");
3270 nigel 75 }
3271     }
3272     }
3273 ph10 691
3274 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
3275 ph10 691
3276 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3277     {
3278 ph10 691 int jit;
3279 ph10 836 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3280     {
3281     if (jit)
3282     fprintf(outfile, "JIT study was successful\n");
3283     else
3284 ph10 691 #ifdef SUPPORT_JIT
3285 ph10 836 fprintf(outfile, "JIT study was not successful\n");
3286 ph10 667 #else
3287 ph10 836 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3288 ph10 667 #endif
3289 ph10 836 }
3290 ph10 691 }
3291 nigel 75 }
3292 nigel 3 }
3293    
3294 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
3295     that is all. The first 8 bytes of the file are the regex length and then
3296     the study length, in big-endian order. */
3297 nigel 3
3298 nigel 75 if (to_file != NULL)
3299 nigel 3 {
3300 nigel 75 FILE *f = fopen((char *)to_file, "wb");
3301     if (f == NULL)
3302 nigel 3 {
3303 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3304 nigel 3 }
3305 nigel 75 else
3306     {
3307 ph10 836 pcre_uint8 sbuf[8];
3308 ph10 259
3309 ph10 836 if (do_flip) regexflip(re, extra);
3310     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3311     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3312     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3313     sbuf[3] = (pcre_uint8)((true_size) & 255);
3314     sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3315     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3316     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3317     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3318 nigel 3
3319 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
3320     fwrite(re, 1, true_size, f) < true_size)
3321     {
3322     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3323     }
3324 nigel 3 else
3325     {
3326 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3327 ph10 654
3328 ph10 658 /* If there is study data, write it. */
3329 ph10 654
3330 nigel 75 if (extra != NULL)
3331 nigel 3 {
3332 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
3333     true_study_size)
3334 nigel 3 {
3335 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
3336     strerror(errno));
3337 nigel 3 }
3338 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
3339 nigel 3 }
3340     }
3341 nigel 75 fclose(f);
3342 nigel 3 }
3343 nigel 77
3344     new_free(re);
3345 ph10 836 if (extra != NULL)
3346     {
3347     PCRE_FREE_STUDY(extra);
3348     }
3349 ph10 545 if (locale_set)
3350 ph10 541 {
3351     new_free((void *)tables);
3352     setlocale(LC_CTYPE, "C");
3353 ph10 545 locale_set = 0;
3354     }
3355 nigel 75 continue; /* With next regex */
3356 nigel 3 }
3357 nigel 75 } /* End of non-POSIX compile */
3358 nigel 3
3359     /* Read data lines and test them */
3360    
3361     for (;;)
3362     {
3363 ph10 836 pcre_uint8 *q;
3364     pcre_uint8 *bptr;
3365 nigel 57 int *use_offsets = offsets;
3366 nigel 53 int use_size_offsets = size_offsets;
3367 nigel 63 int callout_data = 0;
3368     int callout_data_set = 0;
3369 nigel 3 int count, c;
3370 nigel 29 int copystrings = 0;
3371 ph10 386 int find_match_limit = default_find_match_limit;
3372 nigel 29 int getstrings = 0;
3373     int getlist = 0;
3374 nigel 39 int gmatched = 0;
3375 nigel 35 int start_offset = 0;
3376 ph10 579 int start_offset_sign = 1;
3377 nigel 41 int g_notempty = 0;
3378 nigel 77 int use_dfa = 0;
3379 nigel 3
3380 nigel 91 *copynames = 0;
3381     *getnames = 0;
3382    
3383 ph10 881 #ifdef SUPPORT_PCRE16
3384 ph10 836 cn16ptr = copynames;
3385     gn16ptr = getnames;
3386 ph10 881 #endif
3387 ph10 903 #ifdef SUPPORT_PCRE8
3388 ph10 836 cn8ptr = copynames8;
3389     gn8ptr = getnames8;
3390 ph10 903 #endif
3391 nigel 91
3392 ph10 836 SET_PCRE_CALLOUT(callout);
3393 nigel 63 first_callout = 1;
3394 ph10 654 last_callout_mark = NULL;
3395 nigel 63 callout_extra = 0;
3396     callout_count = 0;
3397     callout_fail_count = 999999;
3398     callout_fail_id = -1;
3399 nigel 73 show_malloc = 0;
3400 ph10 836 options = 0;
3401 nigel 63
3402 nigel 91 if (extra != NULL) extra->flags &=
3403     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3404    
3405     len = 0;
3406     for (;;)
3407 nigel 11 {
3408 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3409 nigel 91 {
3410 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
3411     {
3412 ph10 545 fprintf(outfile, "\n");
3413 ph10 537 break;
3414 ph10 545 }
3415 nigel 91 done = 1;
3416     goto CONTINUE;
3417     }
3418     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3419     len = (int)strlen((char *)buffer);
3420     if (buffer[len-1] == '\n') break;
3421 nigel 11 }
3422 nigel 3
3423     while (len > 0 && isspace(buffer[len-1])) len--;
3424     buffer[len] = 0;
3425     if (len == 0) break;
3426    
3427     p = buffer;
3428     while (isspace(*p)) p++;
3429    
3430 ph10 147 bptr = q = dbuffer;
3431 nigel 3 while ((c = *p++) != 0)
3432     {
3433     int i = 0;
3434     int n = 0;
3435 ph10 842
3436 ph10 836 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3437     In non-UTF mode, allow the value of the byte to fall through to later,
3438     where values greater than 127 are turned into UTF-8 when running in
3439     16-bit mode. */
3440 ph10 842
3441 ph10 836 if (c != '\\')
3442 nigel 3 {
3443 ph10 836 if (use_utf)
3444     {
3445     *q++ = c;
3446     continue;
3447 ph10 842 }
3448     }
3449    
3450 ph10 836 /* Handle backslash escapes */
3451 ph10 842
3452 ph10 836 else switch ((c = *p++))
3453     {
3454 nigel 3 case 'a': c = 7; break;
3455     case 'b': c = '\b'; break;
3456     case 'e': c = 27; break;
3457     case 'f': c = '\f'; break;
3458     case 'n': c = '\n'; break;
3459     case 'r': c = '\r'; break;
3460     case 't': c = '\t'; break;
3461     case 'v': c = '\v'; break;
3462    
3463     case '0': case '1': case '2': case '3':
3464     case '4': case '5': case '6': case '7':
3465     c -= '0';
3466     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3467     c = c * 8 + *p++ - '0';
3468     break;
3469    
3470     case 'x':
3471 nigel 49 if (*p == '{')
3472     {
3473 ph10 836 pcre_uint8 *pt = p;
3474 nigel 49 c = 0;
3475 ph10 738
3476 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3477     when isxdigit() is a macro that refers to its argument more than
3478     once. This is banned by the C Standard, but apparently happens in at
3479     least one MacOS environment. */
3480 ph10 738
3481 ph10 735 for (pt++; isxdigit(*pt); pt++)
3482 ph10 862 {
3483     if (++i == 9)
3484     fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3485 ph10 903 "using only the first eight.\n");
3486 ph10 862 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3487 ph10 903 }
3488 nigel 49 if (*pt == '}')
3489     {
3490     p = pt + 1;
3491     break;
3492     }
3493 ph10 836 /* Not correct form for \x{...}; fall through */
3494 nigel 49 }
3495    
3496 ph10 842 /* \x without {} always defines just one byte in 8-bit mode. This
3497     allows UTF-8 characters to be constructed byte by byte, and also allows
3498     invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3499     Otherwise, pass it down to later code so that it can be turned into
3500 ph10 836 UTF-8 when running in 16-bit mode. */
3501 nigel 49
3502 nigel 3 c = 0;
3503     while (i++ < 2 && isxdigit(*p))
3504     {
3505 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3506 nigel 3 p++;
3507     }
3508 ph10 836 if (use_utf)
3509 ph10 842 {
3510 ph10 836 *q++ = c;
3511 ph10 842 continue;
3512     }
3513 nigel 3 break;
3514    
3515 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
3516 nigel 3 p--;
3517     continue;
3518    
3519 nigel 75 case '>':
3520 ph10 579 if (*p == '-')
3521 ph10 567 {
3522     start_offset_sign = -1;
3523     p++;
3524 ph10 579 }
3525 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3526 ph10 579 start_offset *= start_offset_sign;
3527 nigel 75 continue;
3528    
3529 nigel 3 case 'A': /* Option setting */
3530     options |= PCRE_ANCHORED;
3531     continue;
3532    
3533     case 'B':
3534     options |= PCRE_NOTBOL;
3535     continue;
3536    
3537 nigel 29 case 'C':
3538 nigel 63 if (isdigit(*p)) /* Set copy string */
3539     {
3540     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3541     copystrings |= 1 << n;
3542     }
3543     else if (isalnum(*p))
3544     {
3545 ph10 836 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3546 nigel 63 }
3547     else if (*p == '+')
3548     {
3549     callout_extra = 1;
3550     p++;
3551     }
3552     else if (*p == '-')
3553     {
3554 ph10 836 SET_PCRE_CALLOUT(NULL);
3555 nigel 63 p++;
3556     }
3557     else if (*p == '!')
3558     {
3559     callout_fail_id = 0;
3560     p++;
3561     while(isdigit(*p))
3562     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3563     callout_fail_count = 0;
3564     if (*p == '!')
3565     {
3566     p++;
3567     while(isdigit(*p))
3568     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3569     }
3570     }
3571     else if (*p == '*')
3572     {
3573     int sign = 1;
3574     callout_data = 0;
3575     if (*(++p) == '-') { sign = -1; p++; }
3576     while(isdigit(*p))
3577     callout_data = callout_data * 10 + *p++ - '0';
3578     callout_data *= sign;
3579     callout_data_set = 1;
3580     }
3581 nigel 29 continue;
3582    
3583 nigel 79 #if !defined NODFA
3584 nigel 77 case 'D':
3585 nigel 79 #if !defined NOPOSIX
3586 nigel 77 if (posix || do_posix)
3587     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3588     else
3589 nigel 79 #endif
3590 nigel 77 use_dfa = 1;
3591     continue;
3592 ph10 553 #endif
3593 nigel 77
3594 ph10 553 #if !defined NODFA
3595 nigel 77 case 'F':
3596     options |= PCRE_DFA_SHORTEST;
3597     continue;
3598 nigel 79 #endif
3599 nigel 77
3600 nigel 29 case 'G':
3601 nigel 63 if (isdigit(*p))
3602     {
3603     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3604     getstrings |= 1 << n;
3605     }
3606     else if (isalnum(*p))
3607     {
3608 ph10 836 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3609 nigel 63 }
3610 nigel 29 continue;
3611 ph10 691
3612 ph10 667 case 'J':
3613     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3614 ph10 691 if (extra != NULL
3615     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3616 ph10 667 && extra->executable_jit != NULL)
3617 ph10 691 {
3618 zherczeg 852 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3619 ph10 836 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3620     PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3621 ph10 691 }
3622 ph10 667 continue;
3623 nigel 29
3624     case 'L':
3625     getlist = 1;
3626     continue;
3627    
3628 nigel 63 case 'M':
3629     find_match_limit = 1;
3630     continue;
3631    
3632 nigel 37 case 'N':
3633 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
3634     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3635 ph10 461 else
3636 ph10 442 options |= PCRE_NOTEMPTY;
3637 nigel 37 continue;
3638    
3639 nigel 3 case 'O':
3640     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3641 nigel 53 if (n > size_offsets_max)
3642     {
3643     size_offsets_max = n;
3644 nigel 57 free(offsets);
3645 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3646 nigel 53 if (offsets == NULL)
3647     {
3648     printf("** Failed to get %d bytes of memory for offsets vector\n",
3649 ph10 151 (int)(size_offsets_max * sizeof(int)));
3650 nigel 77 yield = 1;
3651     goto EXIT;
3652 nigel 53 }
3653     }
3654     use_size_offsets = n;
3655 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3656 nigel 3 continue;
3657    
3658 nigel 75 case 'P':
3659 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3660 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3661 nigel 75 continue;
3662    
3663 nigel 91 case 'Q':
3664     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3665     if (extra == NULL)
3666     {
3667     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3668     extra->flags = 0;
3669     }
3670     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3671     extra->match_limit_recursion = n;
3672     continue;
3673    
3674     case 'q':
3675     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3676     if (extra == NULL)
3677     {
3678     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3679     extra->flags = 0;
3680     }
3681     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3682     extra->match_limit = n;
3683     continue;
3684    
3685 nigel 79 #if !defined NODFA
3686 nigel 77 case 'R':
3687     options |= PCRE_DFA_RESTART;
3688     continue;
3689 nigel 79 #endif
3690 nigel 77
3691 nigel 73 case 'S':
3692     show_malloc = 1;
3693     continue;
3694 ph10 392
3695 ph10 389 case 'Y':
3696     options |= PCRE_NO_START_OPTIMIZE;
3697 ph10 392 continue;
3698 nigel 73
3699 nigel 3 case 'Z':
3700     options |= PCRE_NOTEOL;
3701     continue;
3702 nigel 71
3703     case '?':
3704     options |= PCRE_NO_UTF8_CHECK;
3705     continue;
3706 nigel 91
3707     case '<':
3708     {
3709     int x = check_newline(p, outfile);
3710     if (x == 0) goto NEXT_DATA;
3711     options |= x;
3712     while (*p++ != '>');
3713     }
3714     continue;
3715 nigel 3 }
3716 ph10 836
3717 ph10 842 /* We now have a character value in c that may be greater than 255. In
3718     16-bit mode, we always convert characters to UTF-8 so that values greater
3719 ph10 836 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3720 ph10 842 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3721 ph10 836 mode must have come from \x{...} or octal constructs because values from
3722     \x.. get this far only in non-UTF mode. */
3723    
3724 ph10 842 #if !defined NOUTF || defined SUPPORT_PCRE16
3725 ph10 836 if (use_pcre16 || use_utf)
3726     {
3727     pcre_uint8 buff8[8];
3728     int ii, utn;
3729     utn = ord2utf8(c, buff8);
3730     for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3731     }
3732     else
3733 ph10 842 #endif
3734 ph10 836 {
3735     if (c > 255)
3736     {
3737     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3738     "and UTF-8 mode is not enabled.\n", c);
3739     fprintf(outfile, "** Truncation will probably give the wrong "
3740     "result.\n");
3741     }
3742     *q++ = c;
3743     }
3744 nigel 3 }
3745 ph10 842
3746 ph10 836 /* Reached end of subject string */
3747 ph10 842
3748 nigel 9 *q = 0;
3749 ph10 530 len = (int)(q - dbuffer);
3750 ph10 545
3751 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3752 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3753 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3754 ph10 371
3755 ph10 363 #if !defined NOPOSIX
3756     if (posix || do_posix)
3757     {
3758     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3759 ph10 371 bptr += buffer_size - len - 1;
3760 ph10 363 }
3761 ph10 371 else
3762     #endif
3763 ph10 363 {
3764     memmove(bptr + buffer_size - len, bptr, len);
3765 ph10 371 bptr += buffer_size - len;
3766     }
3767 nigel 3
3768 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3769     {
3770     printf("**Match limit not relevant for DFA matching: ignored\n");
3771     find_match_limit = 0;
3772     }
3773    
3774 nigel 3 /* Handle matching via the POSIX interface, which does not
3775 nigel 63 support timing or playing with the match limit or callout data. */
3776 nigel 3
3777 nigel 37 #if !defined NOPOSIX
3778 nigel 3 if (posix || do_posix)
3779     {
3780     int rc;
3781     int eflags = 0;
3782 nigel 63 regmatch_t *pmatch = NULL;
3783     if (use_size_offsets > 0)
3784 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3785 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3786     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3787 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3788 nigel 3
3789 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3790 nigel 3
3791     if (rc != 0)
3792     {
3793 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3794 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3795     }
3796 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3797     != 0)
3798     {
3799     fprintf(outfile, "Matched with REG_NOSUB\n");
3800     }
3801 nigel 3 else
3802     {
3803 nigel 7 size_t i;
3804 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3805 nigel 3 {
3806     if (pmatch[i].rm_so >= 0)
3807     {
3808 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3809 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_so,
3810 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3811 nigel 3 fprintf(outfile, "\n");
3812 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3813 nigel 35 {
3814 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3815 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3816 nigel 63 outfile);
3817 nigel 35 fprintf(outfile, "\n");
3818     }
3819 nigel 3 }
3820     }
3821     }
3822 nigel 53 free(pmatch);
3823 ph10 836 goto NEXT_DATA;
3824 nigel 3 }
3825    
3826 ph10 836 #endif /* !defined NOPOSIX */
3827    
3828 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3829 nigel 3
3830 ph10 836 #ifdef SUPPORT_PCRE16
3831     if (use_pcre16)
3832     {
3833 zherczeg 852 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3834 ph10 836 switch(len)
3835     {
3836     case -1:
3837     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3838     "converted to UTF-16\n");
3839     goto NEXT_DATA;
3840 nigel 37
3841 ph10 836 case -2:
3842     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3843     "cannot be converted to UTF-16\n");
3844     goto NEXT_DATA;
3845    
3846     case -3:
3847     fprintf(outfile, "**Failed: character value greater than 0xffff "
3848     "cannot be converted to 16-bit in non-UTF mode\n");
3849 ph10 842 goto NEXT_DATA;
3850 ph10 836
3851     default:
3852     break;
3853     }
3854     bptr = (pcre_uint8 *)buffer16;
3855     }
3856     #endif
3857    
3858 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3859 nigel 3 {
3860 ph10 512 markptr = NULL;
3861    
3862 nigel 93 if (timeitm > 0)
3863 nigel 3 {
3864     register int i;
3865     clock_t time_taken;
3866     clock_t start_time = clock();
3867 nigel 77
3868 nigel 79 #if !defined NODFA
3869 nigel 77 if (all_use_dfa || use_dfa)
3870     {
3871     int workspace[1000];
3872 nigel 93 for (i = 0; i < timeitm; i++)
3873 ph10 836 {
3874     PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3875     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3876     (sizeof(workspace)/sizeof(int)));
3877     }
3878 nigel 77 }
3879     else
3880 nigel 79 #endif
3881 nigel 77
3882 nigel 93 for (i = 0; i < timeitm; i++)
3883 ph10 836 {
3884     PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3885     (options | g_notempty), use_offsets, use_size_offsets);
3886     }
3887 nigel 3 time_taken = clock() - start_time;
3888 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3889     (((double)time_taken * 1000.0) / (double)timeitm) /
3890 nigel 63 (double)CLOCKS_PER_SEC);
3891 nigel 3 }
3892    
3893 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3894 nigel 87 varying limits in order to find the minimum value for the match limit and
3895 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3896     running of pcre_exec(), so disable the JIT optimization. This makes it
3897     possible to run the same set of tests with and without JIT externally
3898     requested. */
3899 nigel 63
3900     if (find_match_limit)
3901     {
3902     if (extra == NULL)
3903     {
3904 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3905 nigel 63 extra->flags = 0;
3906     }
3907 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3908 ph10 691
3909 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
3910 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
3911     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3912     PCRE_ERROR_MATCHLIMIT, "match()");
3913 nigel 63
3914 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
3915     options|g_notempty, use_offsets, use_size_offsets,
3916     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3917     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3918 nigel 63 }
3919    
3920     /* If callout_data is set, use the interface with additional data */
3921    
3922     else if (callout_data_set)
3923     {
3924     if (extra == NULL)
3925     {
3926 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3927 nigel 63 extra->flags = 0;
3928     }
3929     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3930 nigel 71 extra->callout_data = &callout_data;
3931 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3932 nigel 63 options | g_notempty, use_offsets, use_size_offsets);
3933     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3934     }
3935    
3936