/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1526 - (hide annotations) (download)
Fri Feb 20 10:51:07 2015 UTC (5 weeks ago) by ph10
File MIME type: text/plain
File size: 171965 byte(s)
Fixed two pcretest crashes caused by mangled input (the fuzzers are at work).

1 zherczeg 929 /*************************************************
2 nigel 3 * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 1221 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 ph10 1142 32-bit PCRE libraries in a single program. This is different from the modules
41     such as pcre_compile.c in the library itself, which are compiled separately for
42     each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43     twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44     make use of any of the macros from pcre_internal.h that depend on
45     COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46     SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47     supported library functions. */
48 nigel 75
49 ph10 200 #ifdef HAVE_CONFIG_H
50 ph10 236 #include "config.h"
51 ph10 200 #endif
52 ph10 199
53 nigel 3 #include <ctype.h>
54     #include <stdio.h>
55     #include <string.h>
56     #include <stdlib.h>
57     #include <time.h>
58 nigel 25 #include <locale.h>
59 nigel 75 #include <errno.h>
60 nigel 3
61 ph10 936 /* Both libreadline and libedit are optionally supported. The user-supplied
62 ph10 960 original patch uses readline/readline.h for libedit, but in at least one system
63     it is installed as editline/readline.h, so the configuration code now looks for
64 ph10 936 that first, falling back to readline/readline.h. */
65    
66     #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 ph10 343 #ifdef HAVE_UNISTD_H
68 ph10 287 #include <unistd.h>
69 ph10 343 #endif
70 ph10 936 #if defined(SUPPORT_LIBREADLINE)
71 ph10 287 #include <readline/readline.h>
72     #include <readline/history.h>
73 ph10 936 #else
74     #if defined(HAVE_EDITLINE_READLINE_H)
75     #include <editline/readline.h>
76     #else
77     #include <readline/readline.h>
78 ph10 287 #endif
79 ph10 936 #endif
80     #endif
81 nigel 93
82     /* A number of things vary for Windows builds. Originally, pcretest opened its
83     input and output without "b"; then I was told that "b" was needed in some
84     environments, so it was added for release 5.0 to both the input and output. (It
85     makes no difference on Unix-like systems.) Later I was told that it is wrong
86     for the input on Windows. I've now abstracted the modes into two macros that
87     are set here, to make it easier to fiddle with them, and removed "b" from the
88     input mode under Windows. */
89    
90     #if defined(_WIN32) || defined(WIN32)
91     #include <io.h> /* For _setmode() */
92     #include <fcntl.h> /* For _O_BINARY */
93     #define INPUT_MODE "r"
94     #define OUTPUT_MODE "wb"
95    
96 ph10 411 #ifndef isatty
97     #define isatty _isatty /* This is what Windows calls them, I'm told, */
98     #endif /* though in some environments they seem to */
99     /* be already defined, hence the #ifndefs. */
100     #ifndef fileno
101 ph10 343 #define fileno _fileno
102 ph10 411 #endif
103 ph10 343
104 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106     #ifdef __BORLANDC__
107     #define _setmode(handle, mode) setmode(handle, mode)
108     #endif
109    
110     /* Not Windows */
111    
112 nigel 93 #else
113     #include <sys/time.h> /* These two includes are needed */
114     #include <sys/resource.h> /* for setrlimit(). */
115 ph10 1027 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116     #define INPUT_MODE "r"
117     #define OUTPUT_MODE "w"
118     #else
119 nigel 93 #define INPUT_MODE "rb"
120     #define OUTPUT_MODE "wb"
121 nigel 91 #endif
122 ph10 1027 #endif
123 nigel 91
124 ph10 1254 #ifdef __VMS
125     #include <ssdef.h>
126     void vms_setsymbol( char *, char *, int );
127     #endif
128    
129    
130 zherczeg 905 #define PRIV(name) name
131 nigel 93
132 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
133     displaying the results of pcre_study() and we also need to know about the
134     internal macros, structures, and other internal data values; pcretest has
135     "inside information" compared to a program that strictly follows the PCRE API.
136 nigel 37
137 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139     appropriately for an application, not for building PCRE. */
140 nigel 77
141 ph10 145 #include "pcre.h"
142 nigel 77 #include "pcre_internal.h"
143    
144 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
145     regex, is held in a separate file so that (a) it can be compiled in either
146 chpe 1087 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 ph10 836 when that is compiled in debug mode. */
148    
149     #ifdef SUPPORT_PCRE8
150     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151     #endif
152     #ifdef SUPPORT_PCRE16
153     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154     #endif
155 chpe 1055 #ifdef SUPPORT_PCRE32
156     void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157     #endif
158 ph10 836
159 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
160 ph10 1046 to keep two copies, we include the source files here, changing the names of the
161 ph10 351 external symbols to prevent clashes. */
162 nigel 77
163 ph10 836 #define PCRE_INCLUDED
164 nigel 85
165     #include "pcre_tables.c"
166 ph10 1046 #include "pcre_ucd.c"
167 nigel 85
168 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
169 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
170 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
171     has not been explicitly changed, so as to get consistent output from systems
172     that differ in their output from isprint() even in the "C" locale. */
173 nigel 93
174 ph10 836 #ifdef EBCDIC
175     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176     #else
177     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178     #endif
179 nigel 85
180 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181    
182 chpe 1055 /* Posix support is disabled in 16 or 32 bit only mode. */
183     #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 ph10 836 #define NOPOSIX
185     #endif
186    
187 nigel 37 /* It is possible to compile this test program without including support for
188     testing the POSIX interface, though this is not available via the standard
189     Makefile. */
190    
191     #if !defined NOPOSIX
192 nigel 3 #include "pcreposix.h"
193 nigel 37 #endif
194 nigel 3
195 ph10 836 /* It is also possible, originally for the benefit of a version that was
196     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198     automatically cut out the UTF support if PCRE is built without it. */
199 nigel 79
200 ph10 836 #ifndef SUPPORT_UTF
201     #ifndef NOUTF
202     #define NOUTF
203 ph10 107 #endif
204     #endif
205 nigel 79
206 chpe 1087 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 ph10 836 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208     only from one place and is handled differently). I couldn't dream up any way of
209     using a single macro to do this in a generic way, because of the many different
210     argument requirements. We know that at least one of SUPPORT_PCRE8 and
211     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212     use these in the definitions of generic macros.
213 ph10 107
214 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
215     printed is always given as two arguments: a base address followed by an offset.
216     The base address is cast to the correct data size for 8 or 16 bit data; the
217     offset is in units of this size. If the string were given as base+offset in one
218     argument, the casting might be incorrectly applied. */
219    
220     #ifdef SUPPORT_PCRE8
221    
222     #define PCHARS8(lv, p, offset, len, f) \
223     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224    
225     #define PCHARSV8(p, offset, len, f) \
226     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227    
228 chpe 1055 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 ph10 836 p = read_capture_name8(p, cn8, re)
230    
231 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
232    
233 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
234     pcre_callout = callout
235    
236 ph10 1454 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237     pcre_stack_guard = stack_guard
238    
239 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240     pcre_assign_jit_stack(extra, callback, userdata)
241 ph10 836
242     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243     re = pcre_compile((char *)pat, options, error, erroffset, tables)
244    
245     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246     namesptr, cbuffer, size) \
247     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248     (char *)namesptr, cbuffer, size)
249    
250     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
252    
253     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254     offsets, size_offsets, workspace, size_workspace) \
255     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256     offsets, size_offsets, workspace, size_workspace)
257    
258     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259     offsets, size_offsets) \
260     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261     offsets, size_offsets)
262    
263     #define PCRE_FREE_STUDY8(extra) \
264     pcre_free_study(extra)
265    
266     #define PCRE_FREE_SUBSTRING8(substring) \
267     pcre_free_substring(substring)
268    
269     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270     pcre_free_substring_list(listptr)
271    
272     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273     getnamesptr, subsptr) \
274     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275     (char *)getnamesptr, subsptr)
276    
277     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278     n = pcre_get_stringnumber(re, (char *)ptr)
279    
280     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
282    
283     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
285    
286 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
288 ph10 836
289     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290     pcre_printint(re, outfile, debug_lengths)
291    
292     #define PCRE_STUDY8(extra, re, options, error) \
293     extra = pcre_study(re, options, error)
294    
295 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296     pcre_jit_stack_alloc(startsize, maxsize)
297    
298     #define PCRE_JIT_STACK_FREE8(stack) \
299     pcre_jit_stack_free(stack)
300    
301 ph10 1221 #define pcre8_maketables pcre_maketables
302    
303 ph10 836 #endif /* SUPPORT_PCRE8 */
304    
305     /* -----------------------------------------------------------*/
306    
307     #ifdef SUPPORT_PCRE16
308    
309     #define PCHARS16(lv, p, offset, len, f) \
310     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
311    
312     #define PCHARSV16(p, offset, len, f) \
313     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315 chpe 1055 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316 ph10 836 p = read_capture_name16(p, cn16, re)
317    
318     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
319    
320     #define SET_PCRE_CALLOUT16(callout) \
321 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
322 ph10 836
323 ph10 1454 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324     pcre16_stack_guard = (int (*)(void))stack_guard
325    
326 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327     pcre16_assign_jit_stack((pcre16_extra *)extra, \
328     (pcre16_jit_callback)callback, userdata)
329 ph10 836
330     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332     tables)
333 ph10 836
334     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335     namesptr, cbuffer, size) \
336 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338 ph10 836
339     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
342 ph10 836
343     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344     offsets, size_offsets, workspace, size_workspace) \
345 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347     workspace, size_workspace)
348 ph10 836
349     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350     offsets, size_offsets) \
351 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352     len, start_offset, options, offsets, size_offsets)
353 ph10 836
354     #define PCRE_FREE_STUDY16(extra) \
355 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
356 ph10 836
357     #define PCRE_FREE_SUBSTRING16(substring) \
358     pcre16_free_substring((PCRE_SPTR16)substring)
359    
360     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364     getnamesptr, subsptr) \
365 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367 ph10 836
368     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370    
371     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373     (PCRE_SPTR16 *)(void*)subsptr)
374    
375     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377     (PCRE_SPTR16 **)(void*)listptr)
378    
379 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381     tables)
382 ph10 836
383     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384     pcre16_printint(re, outfile, debug_lengths)
385    
386     #define PCRE_STUDY16(extra, re, options, error) \
387 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388 ph10 836
389 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392     #define PCRE_JIT_STACK_FREE16(stack) \
393     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394    
395 ph10 836 #endif /* SUPPORT_PCRE16 */
396    
397 chpe 1055 /* -----------------------------------------------------------*/
398 ph10 836
399 chpe 1055 #ifdef SUPPORT_PCRE32
400    
401     #define PCHARS32(lv, p, offset, len, f) \
402 chpe 1117 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403 chpe 1055
404 chpe 1117 #define PCHARSV32(p, offset, len, f) \
405     (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406 chpe 1055
407     #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408     p = read_capture_name32(p, cn32, re)
409    
410     #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412     #define SET_PCRE_CALLOUT32(callout) \
413     pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415 ph10 1454 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416     pcre32_stack_guard = (int (*)(void))stack_guard
417    
418 chpe 1055 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419     pcre32_assign_jit_stack((pcre32_extra *)extra, \
420     (pcre32_jit_callback)callback, userdata)
421    
422     #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423     re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424     tables)
425    
426     #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427     namesptr, cbuffer, size) \
428     rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429     count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
430    
431     #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432     rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433     (PCRE_UCHAR32 *)cbuffer, size/2)
434    
435     #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436     offsets, size_offsets, workspace, size_workspace) \
437     count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438     (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439     workspace, size_workspace)
440    
441     #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442     offsets, size_offsets) \
443     count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444     len, start_offset, options, offsets, size_offsets)
445    
446     #define PCRE_FREE_STUDY32(extra) \
447     pcre32_free_study((pcre32_extra *)extra)
448    
449     #define PCRE_FREE_SUBSTRING32(substring) \
450     pcre32_free_substring((PCRE_SPTR32)substring)
451    
452     #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453     pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
454    
455     #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456     getnamesptr, subsptr) \
457     rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458     count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
459    
460     #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461     n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
462    
463     #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464     rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465     (PCRE_SPTR32 *)(void*)subsptr)
466    
467     #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468     rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469     (PCRE_SPTR32 **)(void*)listptr)
470    
471     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472     rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473     tables)
474    
475     #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476     pcre32_printint(re, outfile, debug_lengths)
477    
478     #define PCRE_STUDY32(extra, re, options, error) \
479     extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
480    
481     #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482     (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
483    
484     #define PCRE_JIT_STACK_FREE32(stack) \
485     pcre32_jit_stack_free((pcre32_jit_stack *)stack)
486    
487     #endif /* SUPPORT_PCRE32 */
488    
489    
490 ph10 1122 /* ----- More than one mode is supported; a runtime test is needed, except for
491 ph10 836 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 ph10 1140 available version is called. ----- */
493 ph10 836
494 chpe 1055 enum {
495     PCRE8_MODE,
496     PCRE16_MODE,
497     PCRE32_MODE
498     };
499 ph10 836
500 ph10 1122 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501     defined (SUPPORT_PCRE32)) >= 2
502 ph10 836
503 chpe 1055 #define CHAR_SIZE (1 << pcre_mode)
504    
505 ph10 1122 /* There doesn't seem to be an easy way of writing these macros that can cope
506     with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507     cases separately. */
508    
509     /* ----- All three modes supported ----- */
510    
511     #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
512    
513 ph10 836 #define PCHARS(lv, p, offset, len, f) \
514 chpe 1055 if (pcre_mode == PCRE32_MODE) \
515     PCHARS32(lv, p, offset, len, f); \
516     else if (pcre_mode == PCRE16_MODE) \
517 ph10 836 PCHARS16(lv, p, offset, len, f); \
518     else \
519     PCHARS8(lv, p, offset, len, f)
520    
521     #define PCHARSV(p, offset, len, f) \
522 chpe 1055 if (pcre_mode == PCRE32_MODE) \
523     PCHARSV32(p, offset, len, f); \
524     else if (pcre_mode == PCRE16_MODE) \
525 ph10 836 PCHARSV16(p, offset, len, f); \
526     else \
527     PCHARSV8(p, offset, len, f)
528    
529 chpe 1055 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530     if (pcre_mode == PCRE32_MODE) \
531     READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532     else if (pcre_mode == PCRE16_MODE) \
533     READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534 ph10 836 else \
535 chpe 1055 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
536 ph10 836
537     #define SET_PCRE_CALLOUT(callout) \
538 chpe 1055 if (pcre_mode == PCRE32_MODE) \
539     SET_PCRE_CALLOUT32(callout); \
540     else if (pcre_mode == PCRE16_MODE) \
541 ph10 836 SET_PCRE_CALLOUT16(callout); \
542     else \
543     SET_PCRE_CALLOUT8(callout)
544    
545 ph10 1454 #define SET_PCRE_STACK_GUARD(stack_guard) \
546     if (pcre_mode == PCRE32_MODE) \
547     SET_PCRE_STACK_GUARD32(stack_guard); \
548     else if (pcre_mode == PCRE16_MODE) \
549     SET_PCRE_STACK_GUARD16(stack_guard); \
550     else \
551     SET_PCRE_STACK_GUARD8(stack_guard)
552    
553 chpe 1055 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
554 ph10 836
555 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556 chpe 1055 if (pcre_mode == PCRE32_MODE) \
557     PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558     else if (pcre_mode == PCRE16_MODE) \
559 zherczeg 852 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560     else \
561     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
562 ph10 836
563     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564 chpe 1055 if (pcre_mode == PCRE32_MODE) \
565     PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566     else if (pcre_mode == PCRE16_MODE) \
567 ph10 836 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568     else \
569     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
570    
571     #define PCRE_CONFIG pcre_config
572    
573     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574     namesptr, cbuffer, size) \
575 chpe 1055 if (pcre_mode == PCRE32_MODE) \
576     PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577     namesptr, cbuffer, size); \
578     else if (pcre_mode == PCRE16_MODE) \
579 ph10 836 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580     namesptr, cbuffer, size); \
581     else \
582     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583     namesptr, cbuffer, size)
584    
585     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586 chpe 1055 if (pcre_mode == PCRE32_MODE) \
587     PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588     else if (pcre_mode == PCRE16_MODE) \
589 ph10 836 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590     else \
591     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
592    
593     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594     offsets, size_offsets, workspace, size_workspace) \
595 chpe 1055 if (pcre_mode == PCRE32_MODE) \
596     PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597     offsets, size_offsets, workspace, size_workspace); \
598     else if (pcre_mode == PCRE16_MODE) \
599 ph10 836 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600     offsets, size_offsets, workspace, size_workspace); \
601     else \
602     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603     offsets, size_offsets, workspace, size_workspace)
604    
605     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606     offsets, size_offsets) \
607 chpe 1055 if (pcre_mode == PCRE32_MODE) \
608     PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609     offsets, size_offsets); \
610     else if (pcre_mode == PCRE16_MODE) \
611 ph10 836 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612     offsets, size_offsets); \
613     else \
614     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615     offsets, size_offsets)
616    
617     #define PCRE_FREE_STUDY(extra) \
618 chpe 1055 if (pcre_mode == PCRE32_MODE) \
619     PCRE_FREE_STUDY32(extra); \
620     else if (pcre_mode == PCRE16_MODE) \
621 ph10 836 PCRE_FREE_STUDY16(extra); \
622     else \
623     PCRE_FREE_STUDY8(extra)
624    
625     #define PCRE_FREE_SUBSTRING(substring) \
626 chpe 1055 if (pcre_mode == PCRE32_MODE) \
627     PCRE_FREE_SUBSTRING32(substring); \
628     else if (pcre_mode == PCRE16_MODE) \
629 ph10 836 PCRE_FREE_SUBSTRING16(substring); \
630     else \
631     PCRE_FREE_SUBSTRING8(substring)
632    
633     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634 chpe 1055 if (pcre_mode == PCRE32_MODE) \
635     PCRE_FREE_SUBSTRING_LIST32(listptr); \
636     else if (pcre_mode == PCRE16_MODE) \
637 ph10 836 PCRE_FREE_SUBSTRING_LIST16(listptr); \
638     else \
639     PCRE_FREE_SUBSTRING_LIST8(listptr)
640    
641     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642     getnamesptr, subsptr) \
643 chpe 1055 if (pcre_mode == PCRE32_MODE) \
644     PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645     getnamesptr, subsptr); \
646     else if (pcre_mode == PCRE16_MODE) \
647 ph10 836 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648     getnamesptr, subsptr); \
649     else \
650     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651     getnamesptr, subsptr)
652    
653     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654 chpe 1055 if (pcre_mode == PCRE32_MODE) \
655     PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656     else if (pcre_mode == PCRE16_MODE) \
657 ph10 836 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658     else \
659     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
660    
661     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662 chpe 1055 if (pcre_mode == PCRE32_MODE) \
663     PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664     else if (pcre_mode == PCRE16_MODE) \
665 ph10 836 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666     else \
667     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
668    
669     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670 chpe 1055 if (pcre_mode == PCRE32_MODE) \
671     PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672     else if (pcre_mode == PCRE16_MODE) \
673 ph10 836 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674     else \
675     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
676    
677 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678 chpe 1055 (pcre_mode == PCRE32_MODE ? \
679     PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680     : pcre_mode == PCRE16_MODE ? \
681     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682     : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
683 ph10 836
684 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
685 chpe 1055 if (pcre_mode == PCRE32_MODE) \
686     PCRE_JIT_STACK_FREE32(stack); \
687     else if (pcre_mode == PCRE16_MODE) \
688 zherczeg 852 PCRE_JIT_STACK_FREE16(stack); \
689     else \
690     PCRE_JIT_STACK_FREE8(stack)
691    
692 ph10 836 #define PCRE_MAKETABLES \
693 chpe 1055 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
694 ph10 836
695 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696 chpe 1055 if (pcre_mode == PCRE32_MODE) \
697     PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698     else if (pcre_mode == PCRE16_MODE) \
699 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700 ph10 836 else \
701 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
702 ph10 836
703     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704 chpe 1055 if (pcre_mode == PCRE32_MODE) \
705     PCRE_PRINTINT32(re, outfile, debug_lengths); \
706     else if (pcre_mode == PCRE16_MODE) \
707 ph10 836 PCRE_PRINTINT16(re, outfile, debug_lengths); \
708     else \
709     PCRE_PRINTINT8(re, outfile, debug_lengths)
710    
711     #define PCRE_STUDY(extra, re, options, error) \
712 chpe 1055 if (pcre_mode == PCRE32_MODE) \
713     PCRE_STUDY32(extra, re, options, error); \
714     else if (pcre_mode == PCRE16_MODE) \
715 ph10 836 PCRE_STUDY16(extra, re, options, error); \
716     else \
717     PCRE_STUDY8(extra, re, options, error)
718    
719 ph10 1122
720 ph10 1140 /* ----- Two out of three modes are supported ----- */
721 ph10 1122
722 ph10 1140 #else
723 ph10 1122
724 ph10 1140 /* We can use some macro trickery to make a single set of definitions work in
725     the three different cases. */
726 ph10 1122
727 ph10 1140 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
728 ph10 1122
729 ph10 1140 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730     #define BITONE 32
731     #define BITTWO 16
732 ph10 1122
733     /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
734    
735     #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 ph10 1140 #define BITONE 32
737     #define BITTWO 8
738 ph10 1122
739 ph10 1140 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
740 ph10 1122
741 ph10 1140 #else
742     #define BITONE 16
743     #define BITTWO 8
744     #endif
745 ph10 1122
746 ph10 1140 #define glue(a,b) a##b
747     #define G(a,b) glue(a,b)
748 ph10 1122
749    
750 ph10 1140 /* ----- Common macros for two-mode cases ----- */
751 ph10 1122
752     #define PCHARS(lv, p, offset, len, f) \
753 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754     G(PCHARS,BITONE)(lv, p, offset, len, f); \
755 ph10 1122 else \
756 ph10 1140 G(PCHARS,BITTWO)(lv, p, offset, len, f)
757 ph10 1122
758     #define PCHARSV(p, offset, len, f) \
759 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760     G(PCHARSV,BITONE)(p, offset, len, f); \
761 ph10 1122 else \
762 ph10 1140 G(PCHARSV,BITTWO)(p, offset, len, f)
763 ph10 1122
764     #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766     G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767 ph10 1122 else \
768 ph10 1140 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
769 ph10 1122
770     #define SET_PCRE_CALLOUT(callout) \
771 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772     G(SET_PCRE_CALLOUT,BITONE)(callout); \
773 ph10 1122 else \
774 ph10 1140 G(SET_PCRE_CALLOUT,BITTWO)(callout)
775 ph10 1122
776 ph10 1454 #define SET_PCRE_STACK_GUARD(stack_guard) \
777     if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778     G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779     else \
780     G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
781    
782 ph10 1140 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783     G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
784 ph10 1122
785     #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787     G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788 ph10 1122 else \
789 ph10 1140 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
790 ph10 1122
791     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793     G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794 ph10 1122 else \
795 ph10 1140 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
796 ph10 1122
797 ph10 1140 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
798 ph10 1122
799     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800     namesptr, cbuffer, size) \
801 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802     G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803 ph10 1122 namesptr, cbuffer, size); \
804     else \
805 ph10 1140 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806 ph10 1122 namesptr, cbuffer, size)
807    
808     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810     G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811 ph10 1122 else \
812 ph10 1140 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
813 ph10 1122
814     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815     offsets, size_offsets, workspace, size_workspace) \
816 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817     G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818 ph10 1122 offsets, size_offsets, workspace, size_workspace); \
819     else \
820 ph10 1140 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821 ph10 1122 offsets, size_offsets, workspace, size_workspace)
822    
823     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824     offsets, size_offsets) \
825 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826     G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827 ph10 1122 offsets, size_offsets); \
828     else \
829 ph10 1140 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830 ph10 1122 offsets, size_offsets)
831    
832     #define PCRE_FREE_STUDY(extra) \
833 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834     G(PCRE_FREE_STUDY,BITONE)(extra); \
835 ph10 1122 else \
836 ph10 1140 G(PCRE_FREE_STUDY,BITTWO)(extra)
837 ph10 1122
838     #define PCRE_FREE_SUBSTRING(substring) \
839 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840     G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841 ph10 1122 else \
842 ph10 1140 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
843 ph10 1122
844     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846     G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847 ph10 1122 else \
848 ph10 1140 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
849 ph10 1122
850     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851     getnamesptr, subsptr) \
852 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853     G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854 ph10 1122 getnamesptr, subsptr); \
855     else \
856 ph10 1140 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857 ph10 1122 getnamesptr, subsptr)
858    
859     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861     G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862 ph10 1122 else \
863 ph10 1140 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
864 ph10 1122
865     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867     G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868 ph10 1122 else \
869 ph10 1140 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
870 ph10 1122
871     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873     G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874 ph10 1122 else \
875 ph10 1140 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
876 ph10 1122
877     #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878 ph10 1140 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879     G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880     : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
881 ph10 1122
882     #define PCRE_JIT_STACK_FREE(stack) \
883 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884     G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885 ph10 1122 else \
886 ph10 1140 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
887 ph10 1122
888     #define PCRE_MAKETABLES \
889 ph10 1140 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890     G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
891 ph10 1122
892     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894     G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895 ph10 1122 else \
896 ph10 1140 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
897 ph10 1122
898     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900     G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901 ph10 1122 else \
902 ph10 1140 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
903 ph10 1122
904     #define PCRE_STUDY(extra, re, options, error) \
905 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906     G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907 ph10 1122 else \
908 ph10 1140 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
909 ph10 1122
910 ph10 1140 #endif /* Two out of three modes */
911 ph10 1122
912     /* ----- End of cases where more than one mode is supported ----- */
913    
914    
915 ph10 836 /* ----- Only 8-bit mode is supported ----- */
916    
917     #elif defined SUPPORT_PCRE8
918     #define CHAR_SIZE 1
919     #define PCHARS PCHARS8
920     #define PCHARSV PCHARSV8
921     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 ph10 1454 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 ph10 836 #define STRLEN STRLEN8
925 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
927     #define PCRE_CONFIG pcre_config
928     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931     #define PCRE_EXEC PCRE_EXEC8
932     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
942     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943     #define PCRE_PRINTINT PCRE_PRINTINT8
944     #define PCRE_STUDY PCRE_STUDY8
945    
946     /* ----- Only 16-bit mode is supported ----- */
947    
948 chpe 1055 #elif defined SUPPORT_PCRE16
949 ph10 836 #define CHAR_SIZE 2
950     #define PCHARS PCHARS16
951     #define PCHARSV PCHARSV16
952     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 ph10 1454 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 ph10 836 #define STRLEN STRLEN16
956 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
958     #define PCRE_CONFIG pcre16_config
959     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962     #define PCRE_EXEC PCRE_EXEC16
963     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
973     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974     #define PCRE_PRINTINT PCRE_PRINTINT16
975     #define PCRE_STUDY PCRE_STUDY16
976 chpe 1055
977     /* ----- Only 32-bit mode is supported ----- */
978    
979     #elif defined SUPPORT_PCRE32
980     #define CHAR_SIZE 4
981     #define PCHARS PCHARS32
982     #define PCHARSV PCHARSV32
983     #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 ph10 1454 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 chpe 1055 #define STRLEN STRLEN32
987     #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988     #define PCRE_COMPILE PCRE_COMPILE32
989     #define PCRE_CONFIG pcre32_config
990     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992     #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993     #define PCRE_EXEC PCRE_EXEC32
994     #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001     #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003     #define PCRE_MAKETABLES pcre32_maketables()
1004     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005     #define PCRE_PRINTINT PCRE_PRINTINT32
1006     #define PCRE_STUDY PCRE_STUDY32
1007    
1008 ph10 836 #endif
1009    
1010     /* ----- End of mode-specific function call macros ----- */
1011    
1012    
1013 nigel 85 /* Other parameters */
1014    
1015 nigel 3 #ifndef CLOCKS_PER_SEC
1016     #ifdef CLK_TCK
1017     #define CLOCKS_PER_SEC CLK_TCK
1018     #else
1019     #define CLOCKS_PER_SEC 100
1020     #endif
1021     #endif
1022    
1023 ph10 960 #if !defined NODFA
1024     #define DFA_WS_DIMENSION 1000
1025     #endif
1026    
1027 nigel 93 /* This is the default loop count for timing. */
1028    
1029 nigel 75 #define LOOPREPEAT 500000
1030 nigel 3
1031 nigel 85 /* Static variables */
1032    
1033 nigel 3 static FILE *outfile;
1034     static int log_store = 0;
1035 nigel 63 static int callout_count;
1036     static int callout_extra;
1037     static int callout_fail_count;
1038     static int callout_fail_id;
1039 ph10 210 static int debug_lengths;
1040 nigel 63 static int first_callout;
1041 ph10 960 static int jit_was_used;
1042 nigel 93 static int locale_set = 0;
1043 nigel 73 static int show_malloc;
1044 ph10 1454 static int stack_guard_return;
1045 ph10 836 static int use_utf;
1046 ph10 645 static const unsigned char *last_callout_mark = NULL;
1047 nigel 3
1048 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
1049    
1050     static int buffer_size = 50000;
1051 ph10 836 static pcre_uint8 *buffer = NULL;
1052     static pcre_uint8 *pbuffer = NULL;
1053 nigel 3
1054 ph10 1142 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1055 ph10 836
1056     #ifdef COMPILE_PCRE16
1057     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058     #endif
1059    
1060 chpe 1055 #ifdef COMPILE_PCRE32
1061     #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062     #endif
1063    
1064 ph10 1142 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065     lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066     pattern for saving/reloading testing. Luckily, the data for these tables is
1067     defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068     are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069     LINK_SIZE is also used later in this program. */
1070    
1071     #ifdef SUPPORT_PCRE16
1072     #undef IMM2_SIZE
1073     #define IMM2_SIZE 1
1074    
1075 ph10 836 #if LINK_SIZE == 2
1076     #undef LINK_SIZE
1077     #define LINK_SIZE 1
1078     #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079     #undef LINK_SIZE
1080     #define LINK_SIZE 2
1081     #else
1082     #error LINK_SIZE must be either 2, 3, or 4
1083     #endif
1084    
1085 chpe 1055 static int buffer16_size = 0;
1086     static pcre_uint16 *buffer16 = NULL;
1087 ph10 836 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088     #endif /* SUPPORT_PCRE16 */
1089    
1090 chpe 1055 #ifdef SUPPORT_PCRE32
1091 ph10 1142 #undef IMM2_SIZE
1092     #define IMM2_SIZE 1
1093     #undef LINK_SIZE
1094     #define LINK_SIZE 1
1095    
1096 chpe 1055 static int buffer32_size = 0;
1097     static pcre_uint32 *buffer32 = NULL;
1098     static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099     #endif /* SUPPORT_PCRE32 */
1100 ph10 836
1101 ph10 1140 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102     support, it can be changed by an option. If there is no 8-bit support, there
1103     must be 16-or 32-bit support, so default it to 1. */
1104 chpe 1055
1105     #if defined SUPPORT_PCRE8
1106     static int pcre_mode = PCRE8_MODE;
1107     #elif defined SUPPORT_PCRE16
1108     static int pcre_mode = PCRE16_MODE;
1109     #elif defined SUPPORT_PCRE32
1110     static int pcre_mode = PCRE32_MODE;
1111 ph10 836 #endif
1112    
1113 ph10 923 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1114    
1115     static int jit_study_bits[] =
1116 ph10 960 {
1117     PCRE_STUDY_JIT_COMPILE,
1118 ph10 923 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120 ph10 960 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1125     };
1126 ph10 923
1127 ph10 1022 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1129    
1130 ph10 598 /* Textual explanations for runtime error codes */
1131 nigel 75
1132 ph10 598 static const char *errtexts[] = {
1133     NULL, /* 0 is no error */
1134     NULL, /* NOMATCH is handled specially */
1135     "NULL argument passed",
1136     "bad option value",
1137     "magic number missing",
1138     "unknown opcode - pattern overwritten?",
1139     "no more memory",
1140 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141 ph10 598 "match limit exceeded",
1142     "callout error code",
1143 ph10 836 NULL, /* BADUTF8/16 is handled specially */
1144     NULL, /* BADUTF8/16 offset is handled specially */
1145 ph10 598 NULL, /* PARTIAL is handled specially */
1146     "not used - internal error",
1147     "internal error - pattern overwritten?",
1148     "bad count value",
1149     "item unsupported for DFA matching",
1150     "backreference condition or recursion test not supported for DFA matching",
1151     "match limit not supported for DFA matching",
1152     "workspace size exceeded in DFA matching",
1153 ph10 654 "too much recursion for DFA matching",
1154 ph10 598 "recursion limit exceeded",
1155     "not used - internal error",
1156     "invalid combination of newline options",
1157     "bad offset value",
1158 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
1159 ph10 676 "nested recursion at the same subject position",
1160 ph10 836 "JIT stack limit reached",
1161 ph10 960 "pattern compiled in wrong mode: 8-bit/16-bit error",
1162     "pattern compiled with other endianness",
1163 ph10 1189 "invalid data in workspace for DFA restart",
1164     "bad JIT option",
1165 ph10 1221 "bad length"
1166 ph10 598 };
1167    
1168 ph10 654
1169 ph10 541 /*************************************************
1170     * Alternate character tables *
1171     *************************************************/
1172 nigel 49
1173 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174     using the default tables of the library. However, the T option can be used to
1175     select alternate sets of tables, for different kinds of testing. Note also that
1176 ph10 541 the L (locale) option also adjusts the tables. */
1177    
1178 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 ph10 541 only ASCII characters. */
1180    
1181 ph10 836 static const pcre_uint8 tables0[] = {
1182 ph10 541
1183     /* This table is a lower casing table. */
1184    
1185     0, 1, 2, 3, 4, 5, 6, 7,
1186     8, 9, 10, 11, 12, 13, 14, 15,
1187     16, 17, 18, 19, 20, 21, 22, 23,
1188     24, 25, 26, 27, 28, 29, 30, 31,
1189     32, 33, 34, 35, 36, 37, 38, 39,
1190     40, 41, 42, 43, 44, 45, 46, 47,
1191     48, 49, 50, 51, 52, 53, 54, 55,
1192     56, 57, 58, 59, 60, 61, 62, 63,
1193     64, 97, 98, 99,100,101,102,103,
1194     104,105,106,107,108,109,110,111,
1195     112,113,114,115,116,117,118,119,
1196     120,121,122, 91, 92, 93, 94, 95,
1197     96, 97, 98, 99,100,101,102,103,
1198     104,105,106,107,108,109,110,111,
1199     112,113,114,115,116,117,118,119,
1200     120,121,122,123,124,125,126,127,
1201     128,129,130,131,132,133,134,135,
1202     136,137,138,139,140,141,142,143,
1203     144,145,146,147,148,149,150,151,
1204     152,153,154,155,156,157,158,159,
1205     160,161,162,163,164,165,166,167,
1206     168,169,170,171,172,173,174,175,
1207     176,177,178,179,180,181,182,183,
1208     184,185,186,187,188,189,190,191,
1209     192,193,194,195,196,197,198,199,
1210     200,201,202,203,204,205,206,207,
1211     208,209,210,211,212,213,214,215,
1212     216,217,218,219,220,221,222,223,
1213     224,225,226,227,228,229,230,231,
1214     232,233,234,235,236,237,238,239,
1215     240,241,242,243,244,245,246,247,
1216     248,249,250,251,252,253,254,255,
1217    
1218     /* This table is a case flipping table. */
1219    
1220     0, 1, 2, 3, 4, 5, 6, 7,
1221     8, 9, 10, 11, 12, 13, 14, 15,
1222     16, 17, 18, 19, 20, 21, 22, 23,
1223     24, 25, 26, 27, 28, 29, 30, 31,
1224     32, 33, 34, 35, 36, 37, 38, 39,
1225     40, 41, 42, 43, 44, 45, 46, 47,
1226     48, 49, 50, 51, 52, 53, 54, 55,
1227     56, 57, 58, 59, 60, 61, 62, 63,
1228     64, 97, 98, 99,100,101,102,103,
1229     104,105,106,107,108,109,110,111,
1230     112,113,114,115,116,117,118,119,
1231     120,121,122, 91, 92, 93, 94, 95,
1232     96, 65, 66, 67, 68, 69, 70, 71,
1233     72, 73, 74, 75, 76, 77, 78, 79,
1234     80, 81, 82, 83, 84, 85, 86, 87,
1235     88, 89, 90,123,124,125,126,127,
1236     128,129,130,131,132,133,134,135,
1237     136,137,138,139,140,141,142,143,
1238     144,145,146,147,148,149,150,151,
1239     152,153,154,155,156,157,158,159,
1240     160,161,162,163,164,165,166,167,
1241     168,169,170,171,172,173,174,175,
1242     176,177,178,179,180,181,182,183,
1243     184,185,186,187,188,189,190,191,
1244     192,193,194,195,196,197,198,199,
1245     200,201,202,203,204,205,206,207,
1246     208,209,210,211,212,213,214,215,
1247     216,217,218,219,220,221,222,223,
1248     224,225,226,227,228,229,230,231,
1249     232,233,234,235,236,237,238,239,
1250     240,241,242,243,244,245,246,247,
1251     248,249,250,251,252,253,254,255,
1252    
1253     /* This table contains bit maps for various character classes. Each map is 32
1254     bytes long and the bits run from the least significant end of each byte. The
1255     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256     graph, print, punct, and cntrl. Other classes are built from combinations. */
1257    
1258     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262    
1263     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267    
1268     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272    
1273     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277    
1278     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282    
1283     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287    
1288     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1292    
1293     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1297    
1298     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1302    
1303     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1307    
1308     /* This table identifies various classes of character by individual bits:
1309     0x01 white space character
1310     0x02 letter
1311     0x04 decimal digit
1312     0x08 hexadecimal digit
1313     0x10 alphanumeric or '_'
1314     0x80 regular expression metacharacter or binary zero
1315     */
1316    
1317     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318 ph10 1405 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319 ph10 541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1349    
1350 ph10 1405 /* This is a set of tables that came originally from a Windows user. It seems
1351     to be at least an approximation of ISO 8859. In particular, there are
1352     characters greater than 128 that are marked as spaces, letters, etc. */
1353 ph10 541
1354 ph10 836 static const pcre_uint8 tables1[] = {
1355 ph10 541 0,1,2,3,4,5,6,7,
1356     8,9,10,11,12,13,14,15,
1357     16,17,18,19,20,21,22,23,
1358     24,25,26,27,28,29,30,31,
1359     32,33,34,35,36,37,38,39,
1360     40,41,42,43,44,45,46,47,
1361     48,49,50,51,52,53,54,55,
1362     56,57,58,59,60,61,62,63,
1363     64,97,98,99,100,101,102,103,
1364     104,105,106,107,108,109,110,111,
1365     112,113,114,115,116,117,118,119,
1366     120,121,122,91,92,93,94,95,
1367     96,97,98,99,100,101,102,103,
1368     104,105,106,107,108,109,110,111,
1369     112,113,114,115,116,117,118,119,
1370     120,121,122,123,124,125,126,127,
1371     128,129,130,131,132,133,134,135,
1372     136,137,138,139,140,141,142,143,
1373     144,145,146,147,148,149,150,151,
1374     152,153,154,155,156,157,158,159,
1375     160,161,162,163,164,165,166,167,
1376     168,169,170,171,172,173,174,175,
1377     176,177,178,179,180,181,182,183,
1378     184,185,186,187,188,189,190,191,
1379     224,225,226,227,228,229,230,231,
1380     232,233,234,235,236,237,238,239,
1381     240,241,242,243,244,245,246,215,
1382     248,249,250,251,252,253,254,223,
1383     224,225,226,227,228,229,230,231,
1384     232,233,234,235,236,237,238,239,
1385     240,241,242,243,244,245,246,247,
1386     248,249,250,251,252,253,254,255,
1387     0,1,2,3,4,5,6,7,
1388     8,9,10,11,12,13,14,15,
1389     16,17,18,19,20,21,22,23,
1390     24,25,26,27,28,29,30,31,
1391     32,33,34,35,36,37,38,39,
1392     40,41,42,43,44,45,46,47,
1393     48,49,50,51,52,53,54,55,
1394     56,57,58,59,60,61,62,63,
1395     64,97,98,99,100,101,102,103,
1396     104,105,106,107,108,109,110,111,
1397     112,113,114,115,116,117,118,119,
1398     120,121,122,91,92,93,94,95,
1399     96,65,66,67,68,69,70,71,
1400     72,73,74,75,76,77,78,79,
1401     80,81,82,83,84,85,86,87,
1402     88,89,90,123,124,125,126,127,
1403     128,129,130,131,132,133,134,135,
1404     136,137,138,139,140,141,142,143,
1405     144,145,146,147,148,149,150,151,
1406     152,153,154,155,156,157,158,159,
1407     160,161,162,163,164,165,166,167,
1408     168,169,170,171,172,173,174,175,
1409     176,177,178,179,180,181,182,183,
1410     184,185,186,187,188,189,190,191,
1411     224,225,226,227,228,229,230,231,
1412     232,233,234,235,236,237,238,239,
1413     240,241,242,243,244,245,246,215,
1414     248,249,250,251,252,253,254,223,
1415     192,193,194,195,196,197,198,199,
1416     200,201,202,203,204,205,206,207,
1417     208,209,210,211,212,213,214,247,
1418     216,217,218,219,220,221,222,255,
1419     0,62,0,0,1,0,0,0,
1420     0,0,0,0,0,0,0,0,
1421     32,0,0,0,1,0,0,0,
1422     0,0,0,0,0,0,0,0,
1423     0,0,0,0,0,0,255,3,
1424     126,0,0,0,126,0,0,0,
1425     0,0,0,0,0,0,0,0,
1426     0,0,0,0,0,0,0,0,
1427     0,0,0,0,0,0,255,3,
1428     0,0,0,0,0,0,0,0,
1429     0,0,0,0,0,0,12,2,
1430     0,0,0,0,0,0,0,0,
1431     0,0,0,0,0,0,0,0,
1432     254,255,255,7,0,0,0,0,
1433     0,0,0,0,0,0,0,0,
1434     255,255,127,127,0,0,0,0,
1435     0,0,0,0,0,0,0,0,
1436     0,0,0,0,254,255,255,7,
1437     0,0,0,0,0,4,32,4,
1438     0,0,0,128,255,255,127,255,
1439     0,0,0,0,0,0,255,3,
1440     254,255,255,135,254,255,255,7,
1441     0,0,0,0,0,4,44,6,
1442     255,255,127,255,255,255,127,255,
1443     0,0,0,0,254,255,255,255,
1444     255,255,255,255,255,255,255,127,
1445     0,0,0,0,254,255,255,255,
1446     255,255,255,255,255,255,255,255,
1447     0,2,0,0,255,255,255,255,
1448     255,255,255,255,255,255,255,127,
1449     0,0,0,0,255,255,255,255,
1450     255,255,255,255,255,255,255,255,
1451     0,0,0,0,254,255,0,252,
1452     1,0,0,248,1,0,0,120,
1453     0,0,0,0,254,255,255,255,
1454     0,0,128,0,0,0,128,0,
1455     255,255,255,255,0,0,0,0,
1456     0,0,0,0,0,0,0,128,
1457     255,255,255,255,0,0,0,0,
1458     0,0,0,0,0,0,0,0,
1459     128,0,0,0,0,0,0,0,
1460     0,1,1,0,1,1,0,0,
1461     0,0,0,0,0,0,0,0,
1462     0,0,0,0,0,0,0,0,
1463     1,0,0,0,128,0,0,0,
1464     128,128,128,128,0,0,128,0,
1465     28,28,28,28,28,28,28,28,
1466     28,28,0,0,0,0,0,128,
1467     0,26,26,26,26,26,26,18,
1468     18,18,18,18,18,18,18,18,
1469     18,18,18,18,18,18,18,18,
1470     18,18,18,128,128,0,128,16,
1471     0,26,26,26,26,26,26,18,
1472     18,18,18,18,18,18,18,18,
1473     18,18,18,18,18,18,18,18,
1474     18,18,18,128,128,0,0,0,
1475     0,0,0,0,0,1,0,0,
1476     0,0,0,0,0,0,0,0,
1477     0,0,0,0,0,0,0,0,
1478     0,0,0,0,0,0,0,0,
1479     1,0,0,0,0,0,0,0,
1480     0,0,18,0,0,0,0,0,
1481     0,0,20,20,0,18,0,0,
1482     0,20,18,0,0,0,0,0,
1483     18,18,18,18,18,18,18,18,
1484     18,18,18,18,18,18,18,18,
1485     18,18,18,18,18,18,18,0,
1486     18,18,18,18,18,18,18,18,
1487     18,18,18,18,18,18,18,18,
1488     18,18,18,18,18,18,18,18,
1489     18,18,18,18,18,18,18,0,
1490     18,18,18,18,18,18,18,18
1491     };
1492    
1493    
1494    
1495 ph10 558
1496     #ifndef HAVE_STRERROR
1497 nigel 49 /*************************************************
1498 ph10 558 * Provide strerror() for non-ANSI libraries *
1499     *************************************************/
1500    
1501     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502     in their libraries, but can provide the same facility by this simple
1503     alternative function. */
1504    
1505     extern int sys_nerr;
1506     extern char *sys_errlist[];
1507    
1508     char *
1509     strerror(int n)
1510     {
1511     if (n < 0 || n >= sys_nerr) return "unknown error number";
1512     return sys_errlist[n];
1513     }
1514     #endif /* HAVE_STRERROR */
1515    
1516    
1517 ph10 1030
1518 ph10 667 /*************************************************
1519 ph10 1030 * Print newline configuration *
1520     *************************************************/
1521    
1522 ph10 1122 /*
1523     Arguments:
1524 ph10 1033 rc the return code from PCRE_CONFIG_NEWLINE
1525 ph10 1122 isc TRUE if called from "-C newline"
1526 ph10 1033 Returns: nothing
1527 ph10 1030 */
1528    
1529     static void
1530 ph10 1033 print_newline_config(int rc, BOOL isc)
1531 ph10 1030 {
1532     const char *s = NULL;
1533 ph10 1033 if (!isc) printf(" Newline sequence is ");
1534 ph10 1030 switch(rc)
1535     {
1536     case CHAR_CR: s = "CR"; break;
1537     case CHAR_LF: s = "LF"; break;
1538     case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539     case -1: s = "ANY"; break;
1540     case -2: s = "ANYCRLF"; break;
1541 ph10 1122
1542 ph10 1030 default:
1543 ph10 1122 printf("a non-standard value: 0x%04x\n", rc);
1544 ph10 1030 return;
1545 ph10 1122 }
1546 ph10 1030
1547     printf("%s\n", s);
1548     }
1549    
1550    
1551    
1552     /*************************************************
1553 ph10 667 * JIT memory callback *
1554     *************************************************/
1555 ph10 558
1556 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1557     {
1558 ph10 926 jit_was_used = TRUE;
1559 ph10 667 return (pcre_jit_stack *)arg;
1560     }
1561 ph10 558
1562 ph10 667
1563 chpe 1055 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 ph10 558 /*************************************************
1565 ph10 836 * Convert UTF-8 string to value *
1566     *************************************************/
1567    
1568     /* This function takes one or more bytes that represents a UTF-8 character,
1569     and returns the value of the character.
1570    
1571     Argument:
1572     utf8bytes a pointer to the byte vector
1573     vptr a pointer to an int to receive the value
1574    
1575     Returns: > 0 => the number of bytes consumed
1576     -6 to 0 => malformed UTF-8 character at offset = (-return)
1577     */
1578    
1579     static int
1580 chpe 1086 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1581 ph10 836 {
1582 chpe 1086 pcre_uint32 c = *utf8bytes++;
1583     pcre_uint32 d = c;
1584 ph10 836 int i, j, s;
1585    
1586     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1587     {
1588     if ((d & 0x80) == 0) break;
1589     d <<= 1;
1590     }
1591    
1592     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1594    
1595     /* i now has a value in the range 1-5 */
1596    
1597     s = 6*i;
1598     d = (c & utf8_table3[i]) << s;
1599    
1600     for (j = 0; j < i; j++)
1601     {
1602     c = *utf8bytes++;
1603     if ((c & 0xc0) != 0x80) return -(j+1);
1604     s -= 6;
1605     d |= (c & 0x3f) << s;
1606     }
1607    
1608     /* Check that encoding was the correct unique one */
1609    
1610     for (j = 0; j < utf8_table1_size; j++)
1611 ph10 1122 if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 ph10 836 if (j != i) return -(i+1);
1613    
1614     /* Valid value */
1615    
1616     *vptr = d;
1617     return i+1;
1618     }
1619     #endif /* NOUTF || SUPPORT_PCRE16 */
1620    
1621    
1622    
1623 ph10 1140 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 ph10 836 /*************************************************
1625     * Convert character value to UTF-8 *
1626     *************************************************/
1627    
1628     /* This function takes an integer value in the range 0 - 0x7fffffff
1629     and encodes it as a UTF-8 character in 0 to 6 bytes.
1630    
1631     Arguments:
1632     cvalue the character value
1633     utf8bytes pointer to buffer for result - at least 6 bytes long
1634    
1635     Returns: number of characters placed in the buffer
1636     */
1637    
1638     static int
1639 chpe 1086 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1640 ph10 836 {
1641     register int i, j;
1642 chpe 1086 if (cvalue > 0x7fffffffu)
1643     return -1;
1644 ph10 836 for (i = 0; i < utf8_table1_size; i++)
1645 ph10 1122 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 ph10 836 utf8bytes += i;
1647     for (j = i; j > 0; j--)
1648     {
1649     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650     cvalue >>= 6;
1651     }
1652     *utf8bytes = utf8_table2[i] | cvalue;
1653     return i + 1;
1654     }
1655 ph10 842 #endif
1656 ph10 836
1657    
1658     #ifdef SUPPORT_PCRE16
1659     /*************************************************
1660     * Convert a string to 16-bit *
1661     *************************************************/
1662    
1663     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667     result is always left in buffer16.
1668    
1669     Note that this function does not object to surrogate values. This is
1670     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671     for the purpose of testing that they are correctly faulted.
1672    
1673 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1675    
1676     Arguments:
1677     data TRUE if converting a data line; FALSE for a regex
1678     p points to a byte string
1679     utf true if UTF-8 (to be converted to UTF-16)
1680     len number of bytes in the string (excluding trailing zero)
1681    
1682     Returns: number of 16-bit data items used (excluding trailing zero)
1683     OR -1 if a UTF-8 string is malformed
1684     OR -2 if a value > 0x10ffff is encountered
1685 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1686 ph10 836 */
1687    
1688     static int
1689     to16(int data, pcre_uint8 *p, int utf, int len)
1690     {
1691     pcre_uint16 *pp;
1692    
1693     if (buffer16_size < 2*len + 2)
1694     {
1695     if (buffer16 != NULL) free(buffer16);
1696     buffer16_size = 2*len + 2;
1697     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698     if (buffer16 == NULL)
1699     {
1700     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701     exit(1);
1702     }
1703     }
1704    
1705     pp = buffer16;
1706    
1707     if (!utf && !data)
1708     {
1709     while (len-- > 0) *pp++ = *p++;
1710     }
1711    
1712     else
1713     {
1714 chpe 1086 pcre_uint32 c = 0;
1715 ph10 836 while (len > 0)
1716     {
1717     int chlen = utf82ord(p, &c);
1718     if (chlen <= 0) return -1;
1719     if (c > 0x10ffff) return -2;
1720     p += chlen;
1721     len -= chlen;
1722     if (c < 0x10000) *pp++ = c; else
1723     {
1724     if (!utf) return -3;
1725     c -= 0x10000;
1726     *pp++ = 0xD800 | (c >> 10);
1727     *pp++ = 0xDC00 | (c & 0x3ff);
1728     }
1729     }
1730     }
1731    
1732     *pp = 0;
1733     return pp - buffer16;
1734     }
1735     #endif
1736    
1737 chpe 1055 #ifdef SUPPORT_PCRE32
1738     /*************************************************
1739     * Convert a string to 32-bit *
1740     *************************************************/
1741 ph10 836
1742 chpe 1055 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743     8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744     times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745     in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746     result is always left in buffer32.
1747    
1748     Note that this function does not object to surrogate values. This is
1749     deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750     for the purpose of testing that they are correctly faulted.
1751    
1752     Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753     in UTF-8 so that values greater than 255 can be handled.
1754    
1755     Arguments:
1756     data TRUE if converting a data line; FALSE for a regex
1757     p points to a byte string
1758     utf true if UTF-8 (to be converted to UTF-32)
1759     len number of bytes in the string (excluding trailing zero)
1760    
1761     Returns: number of 32-bit data items used (excluding trailing zero)
1762     OR -1 if a UTF-8 string is malformed
1763     OR -2 if a value > 0x10ffff is encountered
1764     OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1765     */
1766    
1767     static int
1768     to32(int data, pcre_uint8 *p, int utf, int len)
1769     {
1770     pcre_uint32 *pp;
1771    
1772     if (buffer32_size < 4*len + 4)
1773     {
1774     if (buffer32 != NULL) free(buffer32);
1775     buffer32_size = 4*len + 4;
1776     buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777     if (buffer32 == NULL)
1778     {
1779     fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780     exit(1);
1781     }
1782     }
1783    
1784     pp = buffer32;
1785    
1786     if (!utf && !data)
1787     {
1788     while (len-- > 0) *pp++ = *p++;
1789     }
1790    
1791     else
1792     {
1793 chpe 1086 pcre_uint32 c = 0;
1794 chpe 1055 while (len > 0)
1795     {
1796     int chlen = utf82ord(p, &c);
1797     if (chlen <= 0) return -1;
1798     if (utf)
1799     {
1800     if (c > 0x10ffff) return -2;
1801     if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1802     }
1803    
1804     p += chlen;
1805     len -= chlen;
1806     *pp++ = c;
1807     }
1808     }
1809    
1810     *pp = 0;
1811     return pp - buffer32;
1812     }
1813 chpe 1117
1814     /* Check that a 32-bit character string is valid UTF-32.
1815    
1816     Arguments:
1817     string points to the string
1818     length length of string, or -1 if the string is zero-terminated
1819    
1820     Returns: TRUE if the string is a valid UTF-32 string
1821     FALSE otherwise
1822     */
1823    
1824 ph10 1261 #ifdef NEVER /* Not used */
1825 chpe 1117 #ifdef SUPPORT_UTF
1826     static BOOL
1827     valid_utf32(pcre_uint32 *string, int length)
1828     {
1829     register pcre_uint32 *p;
1830     register pcre_uint32 c;
1831    
1832     for (p = string; length-- > 0; p++)
1833     {
1834     c = *p;
1835 ph10 1261 if (c > 0x10ffffu) return FALSE; /* Too big */
1836     if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1837 chpe 1117 }
1838    
1839     return TRUE;
1840     }
1841     #endif /* SUPPORT_UTF */
1842 ph10 1202 #endif /* NEVER */
1843 ph10 1261 #endif /* SUPPORT_PCRE32 */
1844 ph10 1202
1845    
1846 ph10 836 /*************************************************
1847 nigel 91 * Read or extend an input line *
1848     *************************************************/
1849    
1850     /* Input lines are read into buffer, but both patterns and data lines can be
1851     continued over multiple input lines. In addition, if the buffer fills up, we
1852     want to automatically expand it so as to be able to handle extremely large
1853     lines that are needed for certain stress tests. When the input buffer is
1854     expanded, the other two buffers must also be expanded likewise, and the
1855     contents of pbuffer, which are a copy of the input for callouts, must be
1856     preserved (for when expansion happens for a data line). This is not the most
1857     optimal way of handling this, but hey, this is just a test program!
1858    
1859     Arguments:
1860     f the file to read
1861     start where in buffer to start (this *must* be within buffer)
1862 ph10 287 prompt for stdin or readline()
1863 nigel 91
1864     Returns: pointer to the start of new data
1865     could be a copy of start, or could be moved
1866     NULL if no data read and EOF reached
1867     */
1868    
1869 ph10 836 static pcre_uint8 *
1870     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1871 nigel 91 {
1872 ph10 836 pcre_uint8 *here = start;
1873 nigel 91
1874     for (;;)
1875     {
1876 ph10 904 size_t rlen = (size_t)(buffer_size - (here - buffer));
1877 nigel 93
1878 nigel 91 if (rlen > 1000)
1879     {
1880     int dlen;
1881 ph10 289
1882 ph10 936 /* If libreadline or libedit support is required, use readline() to read a
1883     line if the input is a terminal. Note that readline() removes the trailing
1884     newline, so we must put it back again, to be compatible with fgets(). */
1885 ph10 289
1886 ph10 936 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887 ph10 287 if (isatty(fileno(f)))
1888     {
1889 ph10 289 size_t len;
1890 ph10 287 char *s = readline(prompt);
1891     if (s == NULL) return (here == start)? NULL : start;
1892     len = strlen(s);
1893 ph10 289 if (len > 0) add_history(s);
1894 ph10 287 if (len > rlen - 1) len = rlen - 1;
1895     memcpy(here, s, len);
1896     here[len] = '\n';
1897 ph10 289 here[len+1] = 0;
1898     free(s);
1899 ph10 287 }
1900 ph10 289 else
1901     #endif
1902    
1903 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1904 ph10 289
1905 ph10 287 {
1906 ph10 516 if (f == stdin) printf("%s", prompt);
1907 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1908     return (here == start)? NULL : start;
1909 ph10 289 }
1910    
1911 nigel 91 dlen = (int)strlen((char *)here);
1912     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913     here += dlen;
1914     }
1915    
1916     else
1917     {
1918     int new_buffer_size = 2*buffer_size;
1919 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1921 nigel 91
1922 chpe 1090 if (new_buffer == NULL || new_pbuffer == NULL)
1923 nigel 91 {
1924     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925     exit(1);
1926     }
1927    
1928     memcpy(new_buffer, buffer, buffer_size);
1929     memcpy(new_pbuffer, pbuffer, buffer_size);
1930    
1931     buffer_size = new_buffer_size;
1932    
1933     start = new_buffer + (start - buffer);
1934     here = new_buffer + (here - buffer);
1935    
1936     free(buffer);
1937     free(pbuffer);
1938    
1939     buffer = new_buffer;
1940     pbuffer = new_pbuffer;
1941     }
1942     }
1943    
1944 ph10 1346 /* Control never gets here */
1945 nigel 91 }
1946    
1947    
1948    
1949     /*************************************************
1950 nigel 63 * Read number from string *
1951     *************************************************/
1952    
1953     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954     around with conditional compilation, just do the job by hand. It is only used
1955 nigel 93 for unpicking arguments, so just keep it simple.
1956 nigel 63
1957     Arguments:
1958     str string to be converted
1959     endptr where to put the end pointer
1960    
1961     Returns: the unsigned long
1962     */
1963    
1964     static int
1965 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1966 nigel 63 {
1967     int result = 0;
1968     while(*str != 0 && isspace(*str)) str++;
1969     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970     *endptr = str;
1971     return(result);
1972     }
1973    
1974    
1975    
1976 nigel 49 /*************************************************
1977 ph10 836 * Print one character *
1978 nigel 49 *************************************************/
1979    
1980 ph10 836 /* Print a single character either literally, or as a hex escape. */
1981 nigel 49
1982 chpe 1079 static int pchar(pcre_uint32 c, FILE *f)
1983 nigel 49 {
1984 chpe 1126 int n = 0;
1985 ph10 836 if (PRINTOK(c))
1986     {
1987     if (f != NULL) fprintf(f, "%c", c);
1988     return 1;
1989     }
1990 nigel 49
1991 ph10 836 if (c < 0x100)
1992 nigel 49 {
1993 ph10 836 if (use_utf)
1994     {
1995     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1996     return 6;
1997     }
1998     else
1999     {
2000     if (f != NULL) fprintf(f, "\\x%02x", c);
2001     return 4;
2002     }
2003 nigel 49 }
2004    
2005 chpe 1085 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2006     return n >= 0 ? n : 0;
2007 ph10 836 }
2008 nigel 49
2009    
2010    
2011 ph10 836 #ifdef SUPPORT_PCRE8
2012     /*************************************************
2013     * Print 8-bit character string *
2014     *************************************************/
2015 nigel 49
2016 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2017     If handed a NULL file, just counts chars without printing. */
2018 nigel 49
2019 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
2020     {
2021 chpe 1086 pcre_uint32 c = 0;
2022 ph10 836 int yield = 0;
2023 nigel 49
2024 ph10 836 if (length < 0)
2025     length = strlen((char *)p);
2026 nigel 49
2027 ph10 836 while (length-- > 0)
2028     {
2029     #if !defined NOUTF
2030     if (use_utf)
2031     {
2032     int rc = utf82ord(p, &c);
2033     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2034     {
2035     length -= rc - 1;
2036     p += rc;
2037     yield += pchar(c, f);
2038     continue;
2039     }
2040     }
2041     #endif
2042     c = *p++;
2043     yield += pchar(c, f);
2044     }
2045    
2046     return yield;
2047 nigel 49 }
2048 nigel 79 #endif
2049 nigel 49
2050    
2051 nigel 79
2052 ph10 836 #ifdef SUPPORT_PCRE16
2053 nigel 63 /*************************************************
2054 ph10 836 * Find length of 0-terminated 16-bit string *
2055 nigel 85 *************************************************/
2056    
2057 ph10 836 static int strlen16(PCRE_SPTR16 p)
2058 nigel 85 {
2059 ph10 1345 PCRE_SPTR16 pp = p;
2060     while (*pp != 0) pp++;
2061     return (int)(pp - p);
2062 nigel 85 }
2063 ph10 836 #endif /* SUPPORT_PCRE16 */
2064 nigel 85
2065    
2066 chpe 1055
2067     #ifdef SUPPORT_PCRE32
2068     /*************************************************
2069     * Find length of 0-terminated 32-bit string *
2070     *************************************************/
2071    
2072     static int strlen32(PCRE_SPTR32 p)
2073     {
2074 ph10 1345 PCRE_SPTR32 pp = p;
2075     while (*pp != 0) pp++;
2076     return (int)(pp - p);
2077 chpe 1055 }
2078     #endif /* SUPPORT_PCRE32 */
2079    
2080    
2081    
2082 ph10 836 #ifdef SUPPORT_PCRE16
2083 nigel 85 /*************************************************
2084 ph10 836 * Print 16-bit character string *
2085 nigel 63 *************************************************/
2086 nigel 49
2087 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2088     If handed a NULL file, just counts chars without printing. */
2089 nigel 49
2090 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2091 nigel 3 {
2092 nigel 63 int yield = 0;
2093 nigel 3
2094 ph10 836 if (length < 0)
2095     length = strlen16(p);
2096    
2097 nigel 63 while (length-- > 0)
2098 nigel 3 {
2099 chpe 1079 pcre_uint32 c = *p++ & 0xffff;
2100 ph10 836 #if !defined NOUTF
2101     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2102 nigel 63 {
2103 ph10 836 int d = *p & 0xffff;
2104 chpe 1263 if (d >= 0xDC00 && d <= 0xDFFF)
2105 nigel 63 {
2106 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2107     length--;
2108     p++;
2109 nigel 63 }
2110     }
2111 nigel 79 #endif
2112 ph10 836 yield += pchar(c, f);
2113     }
2114 nigel 3
2115 ph10 836 return yield;
2116     }
2117     #endif /* SUPPORT_PCRE16 */
2118 nigel 63
2119 ph10 836
2120    
2121 chpe 1055 #ifdef SUPPORT_PCRE32
2122     /*************************************************
2123     * Print 32-bit character string *
2124     *************************************************/
2125    
2126     /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2127     If handed a NULL file, just counts chars without printing. */
2128    
2129 chpe 1117 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2130 chpe 1055 {
2131     int yield = 0;
2132    
2133 ph10 1202 (void)(utf); /* Avoid compiler warning */
2134    
2135 chpe 1055 if (length < 0)
2136     length = strlen32(p);
2137    
2138     while (length-- > 0)
2139     {
2140 ph10 1122 pcre_uint32 c = *p++;
2141 chpe 1055 yield += pchar(c, f);
2142     }
2143    
2144     return yield;
2145     }
2146     #endif /* SUPPORT_PCRE32 */
2147    
2148    
2149    
2150 ph10 836 #ifdef SUPPORT_PCRE8
2151     /*************************************************
2152     * Read a capture name (8-bit) and check it *
2153     *************************************************/
2154    
2155     static pcre_uint8 *
2156     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2157     {
2158     pcre_uint8 *npp = *pp;
2159     while (isalnum(*p)) *npp++ = *p++;
2160     *npp++ = 0;
2161     *npp = 0;
2162     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2163     {
2164     fprintf(outfile, "no parentheses with name \"");
2165     PCHARSV(*pp, 0, -1, outfile);
2166     fprintf(outfile, "\"\n");
2167 nigel 63 }
2168 nigel 3
2169 ph10 836 *pp = npp;
2170     return p;
2171 nigel 63 }
2172 ph10 836 #endif /* SUPPORT_PCRE8 */
2173 nigel 23
2174 nigel 3
2175 nigel 23
2176 ph10 836 #ifdef SUPPORT_PCRE16
2177 nigel 63 /*************************************************
2178 ph10 836 * Read a capture name (16-bit) and check it *
2179     *************************************************/
2180    
2181     /* Note that the text being read is 8-bit. */
2182    
2183     static pcre_uint8 *
2184     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2185     {
2186     pcre_uint16 *npp = *pp;
2187     while (isalnum(*p)) *npp++ = *p++;
2188     *npp++ = 0;
2189     *npp = 0;
2190 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2191 ph10 836 {
2192     fprintf(outfile, "no parentheses with name \"");
2193     PCHARSV(*pp, 0, -1, outfile);
2194     fprintf(outfile, "\"\n");
2195     }
2196     *pp = npp;
2197     return p;
2198     }
2199     #endif /* SUPPORT_PCRE16 */
2200    
2201    
2202    
2203 chpe 1055 #ifdef SUPPORT_PCRE32
2204 ph10 836 /*************************************************
2205 chpe 1055 * Read a capture name (32-bit) and check it *
2206     *************************************************/
2207    
2208     /* Note that the text being read is 8-bit. */
2209    
2210     static pcre_uint8 *
2211     read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2212     {
2213     pcre_uint32 *npp = *pp;
2214     while (isalnum(*p)) *npp++ = *p++;
2215     *npp++ = 0;
2216     *npp = 0;
2217     if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2218     {
2219     fprintf(outfile, "no parentheses with name \"");
2220     PCHARSV(*pp, 0, -1, outfile);
2221     fprintf(outfile, "\"\n");
2222     }
2223     *pp = npp;
2224     return p;
2225     }
2226     #endif /* SUPPORT_PCRE32 */
2227    
2228    
2229    
2230     /*************************************************
2231 ph10 1454 * Stack guard function *
2232     *************************************************/
2233    
2234     /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2235     return when a count overflows. */
2236    
2237     static int stack_guard(void)
2238     {
2239     return stack_guard_return;
2240     }
2241    
2242     /*************************************************
2243 nigel 63 * Callout function *
2244     *************************************************/
2245 nigel 3
2246 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2247     the match. Yield zero unless more callouts than the fail count, or the callout
2248     data is not zero. */
2249 nigel 3
2250 nigel 63 static int callout(pcre_callout_block *cb)
2251     {
2252     FILE *f = (first_callout | callout_extra)? outfile : NULL;
2253 nigel 75 int i, pre_start, post_start, subject_length;
2254 nigel 3
2255 nigel 63 if (callout_extra)
2256     {
2257     fprintf(f, "Callout %d: last capture = %d\n",
2258     cb->callout_number, cb->capture_last);
2259 nigel 3
2260 ph10 1526 if (cb->offset_vector != NULL)
2261     {
2262     for (i = 0; i < cb->capture_top * 2; i += 2)
2263 nigel 63 {
2264 ph10 1526 if (cb->offset_vector[i] < 0)
2265     fprintf(f, "%2d: <unset>\n", i/2);
2266     else
2267     {
2268     fprintf(f, "%2d: ", i/2);
2269     PCHARSV(cb->subject, cb->offset_vector[i],
2270     cb->offset_vector[i+1] - cb->offset_vector[i], f);
2271     fprintf(f, "\n");
2272     }
2273 nigel 63 }
2274 ph10 1526 }
2275 nigel 63 }
2276 nigel 3
2277 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
2278     datails. On subsequent calls in the same match, we use pchars just to find the
2279     printed lengths of the substrings. */
2280 nigel 3
2281 nigel 63 if (f != NULL) fprintf(f, "--->");
2282 nigel 3
2283 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2284     PCHARS(post_start, cb->subject, cb->start_match,
2285 nigel 63 cb->current_position - cb->start_match, f);
2286 nigel 3
2287 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2288 nigel 75
2289 ph10 836 PCHARSV(cb->subject, cb->current_position,
2290 nigel 63 cb->subject_length - cb->current_position, f);
2291 nigel 3
2292 nigel 63 if (f != NULL) fprintf(f, "\n");
2293 nigel 9
2294 nigel 63 /* Always print appropriate indicators, with callout number if not already
2295 nigel 75 shown. For automatic callouts, show the pattern offset. */
2296 nigel 3
2297 nigel 75 if (cb->callout_number == 255)
2298     {
2299     fprintf(outfile, "%+3d ", cb->pattern_position);
2300     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2301     }
2302     else
2303     {
2304     if (callout_extra) fprintf(outfile, " ");
2305     else fprintf(outfile, "%3d ", cb->callout_number);
2306     }
2307 nigel 3
2308 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2309     fprintf(outfile, "^");
2310 nigel 3
2311 nigel 63 if (post_start > 0)
2312     {
2313     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2314     fprintf(outfile, "^");
2315 nigel 3 }
2316    
2317 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2318     fprintf(outfile, " ");
2319    
2320     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2321     pbuffer + cb->pattern_position);
2322    
2323 nigel 63 fprintf(outfile, "\n");
2324     first_callout = 0;
2325 nigel 3
2326 ph10 654 if (cb->mark != last_callout_mark)
2327 ph10 645 {
2328 ph10 836 if (cb->mark == NULL)
2329     fprintf(outfile, "Latest Mark: <unset>\n");
2330     else
2331     {
2332     fprintf(outfile, "Latest Mark: ");
2333     PCHARSV(cb->mark, 0, -1, outfile);
2334     putc('\n', outfile);
2335     }
2336 ph10 654 last_callout_mark = cb->mark;
2337     }
2338 ph10 645
2339 nigel 71 if (cb->callout_data != NULL)
2340 nigel 49 {
2341 nigel 71 int callout_data = *((int *)(cb->callout_data));
2342     if (callout_data != 0)
2343     {
2344     fprintf(outfile, "Callout data = %d\n", callout_data);
2345     return callout_data;
2346     }
2347 nigel 63 }
2348 nigel 49
2349 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
2350     (++callout_count >= callout_fail_count)? 1 : 0;
2351 nigel 3 }
2352    
2353    
2354 nigel 63 /*************************************************
2355 nigel 73 * Local malloc functions *
2356 nigel 63 *************************************************/
2357 nigel 3
2358 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
2359 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
2360     show_malloc variable is set only during matching. */
2361 nigel 3
2362     static void *new_malloc(size_t size)
2363     {
2364 nigel 73 void *block = malloc(size);
2365     if (show_malloc)
2366 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2367 nigel 73 return block;
2368 nigel 3 }
2369    
2370 nigel 73 static void new_free(void *block)
2371     {
2372     if (show_malloc)
2373     fprintf(outfile, "free %p\n", block);
2374     free(block);
2375     }
2376 nigel 3
2377 nigel 73 /* For recursion malloc/free, to test stacking calls */
2378    
2379     static void *stack_malloc(size_t size)
2380     {
2381     void *block = malloc(size);
2382     if (show_malloc)
2383 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2384 nigel 73 return block;
2385     }
2386    
2387     static void stack_free(void *block)
2388     {
2389     if (show_malloc)
2390     fprintf(outfile, "stack_free %p\n", block);
2391     free(block);
2392     }
2393    
2394    
2395 nigel 63 /*************************************************
2396     * Call pcre_fullinfo() *
2397     *************************************************/
2398 nigel 43
2399 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
2400 chpe 1055 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2401 ph10 836 value, but the code is defensive.
2402 nigel 43
2403 ph10 836 Arguments:
2404     re compiled regex
2405     study study data
2406     option PCRE_INFO_xxx option
2407     ptr where to put the data
2408    
2409     Returns: 0 when OK, < 0 on error
2410     */
2411    
2412     static int
2413     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2414 nigel 43 {
2415     int rc;
2416 ph10 836
2417 chpe 1055 if (pcre_mode == PCRE32_MODE)
2418     #ifdef SUPPORT_PCRE32
2419     rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2420     #else
2421     rc = PCRE_ERROR_BADMODE;
2422     #endif
2423     else if (pcre_mode == PCRE16_MODE)
2424 ph10 836 #ifdef SUPPORT_PCRE16
2425 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2426 ph10 836 #else
2427     rc = PCRE_ERROR_BADMODE;
2428     #endif
2429     else
2430     #ifdef SUPPORT_PCRE8
2431     rc = pcre_fullinfo(re, study, option, ptr);
2432     #else
2433     rc = PCRE_ERROR_BADMODE;
2434     #endif
2435    
2436 ph10 1313 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2437 ph10 836 {
2438     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2439 chpe 1055 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2440 ph10 836 if (rc == PCRE_ERROR_BADMODE)
2441 chpe 1055 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2442     "%d-bit mode\n", 8 * CHAR_SIZE,
2443     8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2444 ph10 836 }
2445    
2446     return rc;
2447 nigel 43 }
2448    
2449    
2450    
2451 nigel 63 /*************************************************
2452 ph10 836 * Swap byte functions *
2453 nigel 75 *************************************************/
2454    
2455 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2456     value, respectively.
2457    
2458     Arguments:
2459     value any number
2460    
2461     Returns: the byte swapped value
2462     */
2463    
2464     static pcre_uint32
2465     swap_uint32(pcre_uint32 value)
2466 nigel 75 {
2467     return ((value & 0x000000ff) << 24) |
2468     ((value & 0x0000ff00) << 8) |
2469     ((value & 0x00ff0000) >> 8) |
2470 ph10 836 (value >> 24);
2471 nigel 75 }
2472    
2473 ph10 836 static pcre_uint16
2474     swap_uint16(pcre_uint16 value)
2475     {
2476     return (value >> 8) | (value << 8);
2477     }
2478 nigel 75
2479    
2480    
2481     /*************************************************
2482 ph10 836 * Flip bytes in a compiled pattern *
2483     *************************************************/
2484    
2485     /* This function is called if the 'F' option was present on a pattern that is
2486     to be written to a file. We flip the bytes of all the integer fields in the
2487     regex data block and the study block. In 16-bit mode this also flips relevant
2488     bytes in the pattern itself. This is to make it possible to test PCRE's
2489     ability to reload byte-flipped patterns, e.g. those compiled on a different
2490     architecture. */
2491    
2492 chpe 1055 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2493 ph10 836 static void
2494 chpe 1055 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2495 ph10 836 {
2496 chpe 1055 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2497 ph10 836 #ifdef SUPPORT_PCRE16
2498     int op;
2499     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2500     int length = re->name_count * re->name_entry_size;
2501     #ifdef SUPPORT_UTF
2502     BOOL utf = (re->options & PCRE_UTF16) != 0;
2503     BOOL utf16_char = FALSE;
2504     #endif /* SUPPORT_UTF */
2505     #endif /* SUPPORT_PCRE16 */
2506    
2507     /* Always flip the bytes in the main data block and study blocks. */
2508    
2509     re->magic_number = REVERSED_MAGIC_NUMBER;
2510     re->size = swap_uint32(re->size);
2511     re->options = swap_uint32(re->options);
2512 ph10 1313 re->flags = swap_uint32(re->flags);
2513     re->limit_match = swap_uint32(re->limit_match);
2514     re->limit_recursion = swap_uint32(re->limit_recursion);
2515     re->first_char = swap_uint16(re->first_char);
2516     re->req_char = swap_uint16(re->req_char);
2517     re->max_lookbehind = swap_uint16(re->max_lookbehind);
2518 ph10 836 re->top_bracket = swap_uint16(re->top_bracket);
2519     re->top_backref = swap_uint16(re->top_backref);
2520     re->name_table_offset = swap_uint16(re->name_table_offset);
2521     re->name_entry_size = swap_uint16(re->name_entry_size);
2522     re->name_count = swap_uint16(re->name_count);
2523 ph10 1313 re->ref_count = swap_uint16(re->ref_count);
2524 ph10 836
2525 ph10 1526 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2526 ph10 836 {
2527     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2528     rsd->size = swap_uint32(rsd->size);
2529     rsd->flags = swap_uint32(rsd->flags);
2530     rsd->minlength = swap_uint32(rsd->minlength);
2531     }
2532    
2533     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2534     in the name table, if present, and then in the pattern itself. */
2535    
2536     #ifdef SUPPORT_PCRE16
2537 chpe 1055 if (pcre_mode != PCRE16_MODE) return;
2538 ph10 836
2539     while(TRUE)
2540     {
2541     /* Swap previous characters. */
2542     while (length-- > 0)
2543     {
2544     *ptr = swap_uint16(*ptr);
2545     ptr++;
2546     }
2547     #ifdef SUPPORT_UTF
2548     if (utf16_char)
2549     {
2550     if ((ptr[-1] & 0xfc00) == 0xd800)
2551     {
2552     /* We know that there is only one extra character in UTF-16. */
2553     *ptr = swap_uint16(*ptr);
2554     ptr++;
2555     }
2556     }
2557     utf16_char = FALSE;
2558     #endif /* SUPPORT_UTF */
2559    
2560     /* Get next opcode. */
2561    
2562     length = 0;
2563     op = *ptr;
2564     *ptr++ = swap_uint16(op);
2565    
2566     switch (op)
2567     {
2568     case OP_END:
2569     return;
2570    
2571     #ifdef SUPPORT_UTF
2572     case OP_CHAR:
2573     case OP_CHARI:
2574     case OP_NOT:
2575     case OP_NOTI:
2576     case OP_STAR:
2577     case OP_MINSTAR:
2578     case OP_PLUS:
2579     case OP_MINPLUS:
2580     case OP_QUERY:
2581     case OP_MINQUERY:
2582     case OP_UPTO:
2583     case OP_MINUPTO:
2584     case OP_EXACT:
2585     case OP_POSSTAR:
2586     case OP_POSPLUS:
2587     case OP_POSQUERY:
2588     case OP_POSUPTO:
2589     case OP_STARI:
2590     case OP_MINSTARI:
2591     case OP_PLUSI:
2592     case OP_MINPLUSI:
2593     case OP_QUERYI:
2594     case OP_MINQUERYI:
2595     case OP_UPTOI:
2596     case OP_MINUPTOI:
2597     case OP_EXACTI:
2598     case OP_POSSTARI:
2599     case OP_POSPLUSI:
2600     case OP_POSQUERYI:
2601     case OP_POSUPTOI:
2602     case OP_NOTSTAR:
2603     case OP_NOTMINSTAR:
2604     case OP_NOTPLUS:
2605     case OP_NOTMINPLUS:
2606     case OP_NOTQUERY:
2607     case OP_NOTMINQUERY:
2608     case OP_NOTUPTO:
2609     case OP_NOTMINUPTO:
2610     case OP_NOTEXACT:
2611     case OP_NOTPOSSTAR:
2612     case OP_NOTPOSPLUS:
2613     case OP_NOTPOSQUERY:
2614     case OP_NOTPOSUPTO:
2615     case OP_NOTSTARI:
2616     case OP_NOTMINSTARI:
2617     case OP_NOTPLUSI:
2618     case OP_NOTMINPLUSI:
2619     case OP_NOTQUERYI:
2620     case OP_NOTMINQUERYI:
2621     case OP_NOTUPTOI:
2622     case OP_NOTMINUPTOI:
2623     case OP_NOTEXACTI:
2624     case OP_NOTPOSSTARI:
2625     case OP_NOTPOSPLUSI:
2626     case OP_NOTPOSQUERYI:
2627     case OP_NOTPOSUPTOI:
2628     if (utf) utf16_char = TRUE;
2629     #endif
2630     /* Fall through. */
2631    
2632     default:
2633     length = OP_lengths16[op] - 1;
2634     break;
2635    
2636     case OP_CLASS:
2637     case OP_NCLASS:
2638     /* Skip the character bit map. */
2639     ptr += 32/sizeof(pcre_uint16);
2640     length = 0;
2641     break;
2642    
2643     case OP_XCLASS:
2644 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2645     if (LINK_SIZE > 1)
2646     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2647     - (1 + LINK_SIZE + 1));
2648     else
2649     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2650    
2651 ph10 836 /* Reverse the size of the XCLASS instance. */
2652 zherczeg 839 *ptr = swap_uint16(*ptr);
2653 ph10 836 ptr++;
2654     if (LINK_SIZE > 1)
2655     {
2656 zherczeg 839 *ptr = swap_uint16(*ptr);
2657 ph10 836 ptr++;
2658     }
2659    
2660     op = *ptr;
2661     *ptr = swap_uint16(op);
2662 zherczeg 839 ptr++;
2663 ph10 836 if ((op & XCL_MAP) != 0)
2664     {
2665     /* Skip the character bit map. */
2666     ptr += 32/sizeof(pcre_uint16);
2667     length -= 32/sizeof(pcre_uint16);
2668     }
2669     break;
2670     }
2671     }
2672     /* Control should never reach here in 16 bit mode. */
2673     #endif /* SUPPORT_PCRE16 */
2674     }
2675 chpe 1055 #endif /* SUPPORT_PCRE[8|16] */
2676 ph10 836
2677    
2678    
2679 chpe 1055 #if defined SUPPORT_PCRE32
2680     static void
2681     regexflip_32(pcre *ere, pcre_extra *extra)
2682     {
2683     real_pcre32 *re = (real_pcre32 *)ere;
2684     int op;
2685     pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2686     int length = re->name_count * re->name_entry_size;
2687    
2688     /* Always flip the bytes in the main data block and study blocks. */
2689    
2690     re->magic_number = REVERSED_MAGIC_NUMBER;
2691     re->size = swap_uint32(re->size);
2692     re->options = swap_uint32(re->options);
2693 ph10 1313 re->flags = swap_uint32(re->flags);
2694     re->limit_match = swap_uint32(re->limit_match);
2695     re->limit_recursion = swap_uint32(re->limit_recursion);
2696     re->first_char = swap_uint32(re->first_char);
2697     re->req_char = swap_uint32(re->req_char);
2698     re->max_lookbehind = swap_uint16(re->max_lookbehind);
2699 chpe 1055 re->top_bracket = swap_uint16(re->top_bracket);
2700     re->top_backref = swap_uint16(re->top_backref);
2701     re->name_table_offset = swap_uint16(re->name_table_offset);
2702     re->name_entry_size = swap_uint16(re->name_entry_size);
2703     re->name_count = swap_uint16(re->name_count);
2704 ph10 1313 re->ref_count = swap_uint16(re->ref_count);
2705 chpe 1055
2706 ph10 1526 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2707 chpe 1055 {
2708     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2709     rsd->size = swap_uint32(rsd->size);
2710     rsd->flags = swap_uint32(rsd->flags);
2711     rsd->minlength = swap_uint32(rsd->minlength);
2712     }
2713    
2714 ph10 1140 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2715     the pattern itself. */
2716 chpe 1055
2717     while(TRUE)
2718     {
2719     /* Swap previous characters. */
2720     while (length-- > 0)
2721     {
2722     *ptr = swap_uint32(*ptr);
2723     ptr++;
2724     }
2725    
2726     /* Get next opcode. */
2727    
2728     length = 0;
2729     op = *ptr;
2730     *ptr++ = swap_uint32(op);
2731    
2732     switch (op)
2733     {
2734     case OP_END:
2735     return;
2736    
2737     default:
2738     length = OP_lengths32[op] - 1;
2739     break;
2740    
2741     case OP_CLASS:
2742     case OP_NCLASS:
2743     /* Skip the character bit map. */
2744     ptr += 32/sizeof(pcre_uint32);
2745     length = 0;
2746     break;
2747    
2748     case OP_XCLASS:
2749     /* LINK_SIZE can only be 1 in 32-bit mode. */
2750     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2751    
2752     /* Reverse the size of the XCLASS instance. */
2753     *ptr = swap_uint32(*ptr);
2754     ptr++;
2755    
2756     op = *ptr;
2757     *ptr = swap_uint32(op);
2758     ptr++;
2759     if ((op & XCL_MAP) != 0)
2760     {
2761     /* Skip the character bit map. */
2762     ptr += 32/sizeof(pcre_uint32);
2763     length -= 32/sizeof(pcre_uint32);
2764     }
2765     break;
2766     }
2767     }
2768     /* Control should never reach here in 32 bit mode. */
2769     }
2770    
2771     #endif /* SUPPORT_PCRE32 */
2772    
2773    
2774    
2775     static void
2776     regexflip(pcre *ere, pcre_extra *extra)
2777     {
2778     #if defined SUPPORT_PCRE32
2779     if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2780     regexflip_32(ere, extra);
2781     #endif
2782     #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2783     if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2784     regexflip8_or_16(ere, extra);
2785     #endif
2786     }
2787    
2788    
2789    
2790 ph10 836 /*************************************************
2791 nigel 87 * Check match or recursion limit *
2792     *************************************************/
2793    
2794     static int
2795 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2796 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
2797     int flag, unsigned long int *limit, int errnumber, const char *msg)
2798     {
2799     int count;
2800     int min = 0;
2801     int mid = 64;
2802     int max = -1;
2803    
2804     extra->flags |= flag;
2805    
2806     for (;;)
2807     {
2808     *limit = mid;
2809    
2810 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2811 nigel 87 use_offsets, use_size_offsets);
2812    
2813     if (count == errnumber)
2814     {
2815     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2816     min = mid;
2817     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2818     }
2819    
2820     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2821     count == PCRE_ERROR_PARTIAL)
2822     {
2823     if (mid == min + 1)
2824     {
2825     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2826     break;
2827     }
2828     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2829     max = mid;
2830     mid = (min + mid)/2;
2831     }
2832     else break; /* Some other error */
2833     }
2834    
2835     extra->flags &= ~flag;
2836     return count;
2837     }
2838    
2839    
2840    
2841     /*************************************************
2842 ph10 227 * Case-independent strncmp() function *
2843     *************************************************/
2844    
2845     /*
2846     Arguments:
2847     s first string
2848     t second string
2849     n number of characters to compare
2850    
2851     Returns: < 0, = 0, or > 0, according to the comparison
2852     */
2853    
2854     static int
2855 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2856 ph10 227 {
2857     while (n--)
2858     {
2859     int c = tolower(*s++) - tolower(*t++);
2860     if (c) return c;
2861     }
2862     return 0;
2863     }
2864    
2865    
2866    
2867     /*************************************************
2868 ph10 1388 * Check multicharacter option *
2869 nigel 91 *************************************************/
2870    
2871 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2872     a message and return 0 if there is no match.
2873 nigel 91
2874     Arguments:
2875     p points after the leading '<'
2876     f file for error message
2877 ph10 1404 nl TRUE to check only for newline settings
2878     stype "modifier" or "escape sequence"
2879 nigel 91
2880     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2881     */
2882    
2883     static int
2884 ph10 1388 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2885 nigel 91 {
2886 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2887     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2888     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2889     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2890     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2891     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2892     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2893 ph10 1388
2894     if (!nl)
2895     {
2896     if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2897 ph10 1404 }
2898 ph10 1388
2899     fprintf(f, "Unknown %s at: <%s\n", stype, p);
2900 nigel 91 return 0;
2901     }
2902    
2903    
2904    
2905     /*************************************************
2906 nigel 93 * Usage function *
2907     *************************************************/
2908    
2909     static void
2910     usage(void)
2911     {
2912 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2913     printf("Input and output default to stdin and stdout.\n");
2914 ph10 936 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2915 ph10 287 printf("If input is a terminal, readline() is used to read from it.\n");
2916     #else
2917     printf("This version of pcretest is not linked with readline().\n");
2918     #endif
2919     printf("\nOptions:\n");
2920 ph10 836 #ifdef SUPPORT_PCRE16
2921 ph10 862 printf(" -16 use the 16-bit library\n");
2922 ph10 836 #endif
2923 chpe 1055 #ifdef SUPPORT_PCRE32
2924     printf(" -32 use the 32-bit library\n");
2925     #endif
2926 ph10 862 printf(" -b show compiled code\n");
2927 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2928 ph10 1450 printf(" -C arg show a specific compile-time option and exit\n");
2929     printf(" with its value if numeric (else 0). The arg can be:\n");
2930 ph10 836 printf(" linksize internal link size [2, 3, 4]\n");
2931     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2932     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2933 chpe 1055 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2934 ph10 836 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2935     printf(" ucp Unicode Properties supported [0, 1]\n");
2936     printf(" jit Just-in-time compiler supported [0, 1]\n");
2937 ph10 1450 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2938     printf(" bsr \\R type [ANYCRLF, ANY]\n");
2939 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2940     #if !defined NODFA
2941     printf(" -dfa force DFA matching for all subjects\n");
2942     #endif
2943     printf(" -help show usage information\n");
2944     printf(" -i show information about compiled patterns\n"
2945 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2946 nigel 93 " -m output memory used information\n"
2947 ph10 1404 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2948 nigel 93 " -o <n> set size of offsets vector to <n>\n");
2949     #if !defined NOPOSIX
2950     printf(" -p use POSIX interface\n");
2951     #endif
2952     printf(" -q quiet: do not output PCRE version number at start\n");
2953     printf(" -S <n> set stack size to <n> megabytes\n");
2954 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2955     " -s+ force each pattern to be studied, using JIT if available\n"
2956 ph10 960 " -s++ ditto, verifying when JIT was actually used\n"
2957 ph10 923 " -s+n force each pattern to be studied, using JIT if available,\n"
2958 ph10 960 " where 1 <= n <= 7 selects JIT options\n"
2959     " -s++n ditto, verifying when JIT was actually used\n"
2960 nigel 93 " -t time compilation and execution\n");
2961     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2962     printf(" -tm time execution (matching) only\n");
2963     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2964 ph10 1357 printf(" -T same as -t, but show total times at the end\n");
2965     printf(" -TM same as -tm, but show total time at the end\n");
2966 nigel 93 }
2967    
2968    
2969    
2970     /*************************************************
2971 nigel 63 * Main Program *
2972     *************************************************/
2973 nigel 43
2974 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2975     consist of a regular expression, in delimiters and optionally followed by
2976     options, followed by a set of test data, terminated by an empty line. */
2977    
2978     int main(int argc, char **argv)
2979     {
2980     FILE *infile = stdin;
2981 ph10 836 const char *version;
2982 nigel 3 int options = 0;
2983     int study_options = 0;
2984 ph10 386 int default_find_match_limit = FALSE;
2985 ph10 1363 pcre_uint32 default_options = 0;
2986 nigel 3 int op = 1;
2987     int timeit = 0;
2988 nigel 93 int timeitm = 0;
2989 ph10 1357 int showtotaltimes = 0;
2990 nigel 3 int showinfo = 0;
2991 nigel 31 int showstore = 0;
2992 ph10 667 int force_study = -1;
2993     int force_study_options = 0;
2994 nigel 87 int quiet = 0;
2995 nigel 53 int size_offsets = 45;
2996     int size_offsets_max;
2997 nigel 77 int *offsets = NULL;
2998 nigel 3 int debug = 0;
2999 nigel 11 int done = 0;
3000 nigel 77 int all_use_dfa = 0;
3001 ph10 922 int verify_jit = 0;
3002 nigel 77 int yield = 0;
3003 nigel 91 int stack_size;
3004 chpe 1090 pcre_uint8 *dbuffer = NULL;
3005 ph10 1388 pcre_uint8 lockout[24] = { 0 };
3006 chpe 1090 size_t dbuffer_size = 1u << 14;
3007 ph10 1357 clock_t total_compile_time = 0;
3008     clock_t total_study_time = 0;
3009     clock_t total_match_time = 0;
3010 nigel 3
3011 ph10 960 #if !defined NOPOSIX
3012     int posix = 0;
3013     #endif
3014     #if !defined NODFA
3015     int *dfa_workspace = NULL;
3016     #endif
3017    
3018 ph10 667 pcre_jit_stack *jit_stack = NULL;
3019    
3020 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
3021     substring names, each list itself being terminated by an empty name. Assume
3022     that 1024 is plenty long enough for the few names we'll be testing. It is
3023 chpe 1055 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3024 ph10 881 for the actual memory, to ensure alignment. */
3025 ph10 667
3026 chpe 1055 pcre_uint32 copynames[1024];
3027     pcre_uint32 getnames[1024];
3028 nigel 69
3029 chpe 1055 #ifdef SUPPORT_PCRE32
3030     pcre_uint32 *cn32ptr;
3031     pcre_uint32 *gn32ptr;
3032     #endif
3033    
3034 ph10 881 #ifdef SUPPORT_PCRE16
3035 chpe 1055 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3036     pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3037 ph10 836 pcre_uint16 *cn16ptr;
3038     pcre_uint16 *gn16ptr;
3039 ph10 881 #endif
3040 nigel 91
3041 ph10 881 #ifdef SUPPORT_PCRE8
3042 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3043     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3044     pcre_uint8 *cn8ptr;
3045     pcre_uint8 *gn8ptr;
3046 ph10 881 #endif
3047 nigel 91
3048 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
3049 ph10 1122 debugging. They grow automatically when very long lines are read. The 16-
3050 chpe 1055 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3051 nigel 69
3052 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
3053     pbuffer = (pcre_uint8 *)malloc(buffer_size);
3054 nigel 69
3055 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
3056 nigel 3
3057 nigel 93 outfile = stdout;
3058    
3059     /* The following _setmode() stuff is some Windows magic that tells its runtime
3060     library to translate CRLF into a single LF character. At least, that's what
3061     I've been told: never having used Windows I take this all on trust. Originally
3062     it set 0x8000, but then I was advised that _O_BINARY was better. */
3063    
3064 nigel 75 #if defined(_WIN32) || defined(WIN32)
3065 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
3066     #endif
3067 nigel 75
3068 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
3069     same answer. We just need to ensure that we call one that is available. */
3070    
3071 chpe 1055 #if defined SUPPORT_PCRE8
3072 ph10 836 version = pcre_version();
3073 chpe 1055 #elif defined SUPPORT_PCRE16
3074 ph10 836 version = pcre16_version();
3075 chpe 1055 #elif defined SUPPORT_PCRE32
3076     version = pcre32_version();
3077 ph10 836 #endif
3078    
3079 nigel 3 /* Scan options */
3080    
3081     while (argc > 1 && argv[op][0] == '-')
3082     {
3083 ph10 836 pcre_uint8 *endptr;
3084 ph10 960 char *arg = argv[op];
3085 nigel 53
3086 ph10 922 if (strcmp(arg, "-m") == 0) showstore = 1;
3087     else if (strcmp(arg, "-s") == 0) force_study = 0;
3088 ph10 960
3089 ph10 922 else if (strncmp(arg, "-s+", 3) == 0)
3090 ph10 667 {
3091 ph10 922 arg += 3;
3092     if (*arg == '+') { arg++; verify_jit = TRUE; }
3093 ph10 667 force_study = 1;
3094 ph10 923 if (*arg == 0)
3095 ph10 960 force_study_options = jit_study_bits[6];
3096 ph10 923 else if (*arg >= '1' && *arg <= '7')
3097 ph10 960 force_study_options = jit_study_bits[*arg - '1'];
3098 ph10 923 else goto BAD_ARG;
3099 ph10 691 }
3100 chpe 1097 else if (strcmp(arg, "-8") == 0)
3101     {
3102     #ifdef SUPPORT_PCRE8
3103     pcre_mode = PCRE8_MODE;
3104     #else
3105     printf("** This version of PCRE was built without 8-bit support\n");
3106     exit(1);
3107     #endif
3108     }
3109 ph10 922 else if (strcmp(arg, "-16") == 0)
3110 ph10 836 {
3111     #ifdef SUPPORT_PCRE16
3112 chpe 1055 pcre_mode = PCRE16_MODE;
3113 ph10 836 #else
3114     printf("** This version of PCRE was built without 16-bit support\n");
3115     exit(1);
3116     #endif
3117     }
3118 chpe 1200 else if (strcmp(arg, "-32") == 0)
3119 chpe 1055 {
3120     #ifdef SUPPORT_PCRE32
3121     pcre_mode = PCRE32_MODE;
3122     #else
3123     printf("** This version of PCRE was built without 32-bit support\n");
3124     exit(1);
3125     #endif
3126     }
3127 ph10 922 else if (strcmp(arg, "-q") == 0) quiet = 1;
3128     else if (strcmp(arg, "-b") == 0) debug = 1;
3129     else if (strcmp(arg, "-i") == 0) showinfo = 1;
3130     else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3131     else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3132 ph10 1398 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3133 nigel 79 #if !defined NODFA
3134 ph10 922 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3135 nigel 79 #endif
3136 ph10 922 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3137 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3138 nigel 65 *endptr == 0))
3139 nigel 53 {
3140     op++;
3141     argc--;
3142     }
3143 ph10 1357 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3144     strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3145 nigel 93 {
3146 ph10 1357 int temp;
3147 ph10 922 int both = arg[2] == 0;
3148 ph10 1404 showtotaltimes = arg[1] == 'T';
3149 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3150 nigel 93 *endptr == 0))
3151     {
3152     timeitm = temp;
3153     op++;
3154     argc--;
3155     }
3156     else timeitm = LOOPREPEAT;
3157     if (both) timeit = timeitm;
3158     }
3159 ph10 922 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3160 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3161 nigel 91 *endptr == 0))
3162     {
3163 ph10 1254 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3164 nigel 91 printf("PCRE: -S not supported on this OS\n");
3165     exit(1);
3166     #else
3167     int rc;
3168     struct rlimit rlim;
3169     getrlimit(RLIMIT_STACK, &rlim);
3170     rlim.rlim_cur = stack_size * 1024 * 1024;
3171     rc = setrlimit(RLIMIT_STACK, &rlim);
3172     if (rc != 0)
3173     {
3174     printf("PCRE: setrlimit() failed with error %d\n", rc);
3175     exit(1);
3176     }
3177     op++;
3178     argc--;
3179     #endif
3180     }
3181 nigel 53 #if !defined NOPOSIX
3182 ph10 922 else if (strcmp(arg, "-p") == 0) posix = 1;
3183 nigel 53 #endif
3184 ph10 922 else if (strcmp(arg, "-C") == 0)
3185 nigel 63 {
3186     int rc;
3187 ph10 392 unsigned long int lrc;
3188 ph10 836
3189     if (argc > 2)
3190     {
3191     if (strcmp(argv[op + 1], "linksize") == 0)
3192     {
3193     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3194     printf("%d\n", rc);
3195     yield = rc;
3196 ph10 1320
3197 ph10 1254 #ifdef __VMS
3198     vms_setsymbol("LINKSIZE",0,yield );
3199     #endif
3200 ph10 836 }
3201 ph10 1033 else if (strcmp(argv[op + 1], "pcre8") == 0)
3202 ph10 836 {
3203     #ifdef SUPPORT_PCRE8
3204     printf("1\n");
3205     yield = 1;
3206     #else
3207     printf("0\n");
3208     yield = 0;
3209     #endif
3210 ph10 1254 #ifdef __VMS
3211     vms_setsymbol("PCRE8",0,yield );
3212     #endif
3213 ph10 836 }
3214 ph10 1033 else if (strcmp(argv[op + 1], "pcre16") == 0)
3215 ph10 836 {
3216     #ifdef SUPPORT_PCRE16
3217     printf("1\n");
3218     yield = 1;
3219     #else
3220     printf("0\n");
3221     yield = 0;
3222     #endif
3223 ph10 1254 #ifdef __VMS
3224     vms_setsymbol("PCRE16",0,yield );
3225     #endif
3226 ph10 836 }
3227 chpe 1055 else if (strcmp(argv[op + 1], "pcre32") == 0)
3228 ph10 836 {
3229 chpe 1055 #ifdef SUPPORT_PCRE32
3230     printf("1\n");
3231     yield = 1;
3232     #else
3233     printf("0\n");
3234     yield = 0;
3235     #endif
3236 ph10 1254 #ifdef __VMS
3237     vms_setsymbol("PCRE32",0,yield );
3238     #endif
3239 chpe 1055 }
3240 ph10 1254 else if (strcmp(argv[op + 1], "utf") == 0)
3241 chpe 1055 {
3242 ph10 836 #ifdef SUPPORT_PCRE8
3243 chpe 1055 if (pcre_mode == PCRE8_MODE)
3244     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3245     #endif
3246     #ifdef SUPPORT_PCRE16
3247     if (pcre_mode == PCRE16_MODE)
3248     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3249     #endif
3250     #ifdef SUPPORT_PCRE32
3251     if (pcre_mode == PCRE32_MODE)
3252     (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3253     #endif
3254 ph10 836 printf("%d\n", rc);
3255     yield = rc;
3256 ph10 1254 #ifdef __VMS
3257     vms_setsymbol("UTF",0,yield );
3258     #endif
3259 ph10 836 }
3260 ph10 1033 else if (strcmp(argv[op + 1], "ucp") == 0)
3261 ph10 836 {
3262     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3263     printf("%d\n", rc);
3264     yield = rc;
3265     }
3266 ph10 1033 else if (strcmp(argv[op + 1], "jit") == 0)
3267 ph10 836 {
3268     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3269     printf("%d\n", rc);
3270     yield = rc;
3271     }
3272 ph10 1033 else if (strcmp(argv[op + 1], "newline") == 0)
3273 ph10 842 {
3274 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3275 ph10 1122 print_newline_config(rc, TRUE);
3276 ph10 842 }
3277 ph10 1450 else if (strcmp(argv[op + 1], "bsr") == 0)
3278     {
3279     (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3280     printf("%s\n", rc? "ANYCRLF" : "ANY");
3281     }
3282 ph10 1033 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3283     {
3284     #ifdef EBCDIC
3285     printf("1\n");
3286 ph10 1122 yield = 1;
3287 ph10 1033 #else
3288 ph10 1122 printf("0\n");
3289     #endif
3290 ph10 1033 }
3291     else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3292     {
3293     #ifdef EBCDIC
3294     printf("0x%02x\n", CHAR_LF);
3295     #else
3296 ph10 1122 printf("0\n");
3297     #endif
3298 ph10 1033 }
3299     else
3300 ph10 1122 {
3301 ph10 1033 printf("Unknown -C option: %s\n", argv[op + 1]);
3302 ph10 1122 }
3303 ph10 836 goto EXIT;
3304     }
3305 ph10 1122
3306 ph10 1033 /* No argument for -C: output all configuration information. */
3307 ph10 836
3308     printf("PCRE version %s\n", version);
3309 nigel 63 printf("Compiled with\n");
3310 ph10 1122
3311 ph10 1030 #ifdef EBCDIC
3312     printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3313 ph10 1122 #endif
3314 ph10 836
3315     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3316     are set, either both UTFs are supported or both are not supported. */
3317    
3318 chpe 1055 #ifdef SUPPORT_PCRE8
3319     printf(" 8-bit support\n");
3320 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3321 chpe 1055 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3322     #endif
3323     #ifdef SUPPORT_PCRE16
3324     printf(" 16-bit support\n");
3325 ph10 836 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3326 chpe 1055 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3327 ph10 836 #endif
3328 chpe 1055 #ifdef SUPPORT_PCRE32
3329     printf(" 32-bit support\n");
3330     (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3331     printf (" %sUTF-32 support\n", rc ? "" : "No ");
3332     #endif
3333 ph10 836
3334     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3335 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
3336 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3337 ph10 674 if (rc)
3338 ph10 890 {
3339     const char *arch;
3340 ph10 908 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3341 ph10 890 printf(" Just-in-time compiler support: %s\n", arch);
3342 ph10 903 }
3343 ph10 674 else
3344     printf(" No just-in-time compiler support\n");
3345 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3346 ph10 1122 print_newline_config(rc, FALSE);
3347 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3348 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3349     "all Unicode newlines");
3350 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3351 nigel 63 printf(" Internal link size = %d\n", rc);
3352 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3353 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
3354 ph10 1389 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3355     printf(" Parentheses nest limit = %ld\n", lrc);
3356 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3357 ph10 376 printf(" Default match limit = %ld\n", lrc);
3358 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3359 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
3360 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3361 ph10 895 printf(" Match recursion uses %s", rc? "stack" : "heap");
3362     if (showstore)
3363 ph10 903 {
3364 ph10 901 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3365 ph10 903 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3366 ph10 895 }
3367 ph10 903 printf("\n");
3368 ph10 121 goto EXIT;
3369 nigel 63 }
3370 ph10 922 else if (strcmp(arg, "-help") == 0 ||
3371     strcmp(arg, "--help") == 0)
3372 nigel 93 {
3373     usage();
3374     goto EXIT;
3375     }
3376 nigel 3 else
3377     {
3378 ph10 960 BAD_ARG:
3379 ph10 922 printf("** Unknown or malformed option %s\n", arg);
3380 nigel 93 usage();
3381 nigel 77 yield = 1;
3382     goto EXIT;
3383 nigel 3 }
3384     op++;
3385     argc--;
3386     }
3387    
3388 nigel 53 /* Get the store for the offsets vector, and remember what it was */
3389    
3390     size_offsets_max = size_offsets;
3391 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3392 nigel 53 if (offsets == NULL)
3393     {
3394     printf("** Failed to get %d bytes of memory for offsets vector\n",
3395 ph10 151 (int)(size_offsets_max * sizeof(int)));
3396 nigel 77 yield = 1;
3397     goto EXIT;
3398 nigel 53 }
3399    
3400 nigel 3 /* Sort out the input and output files */
3401    
3402     if (argc > 1)
3403     {
3404 nigel 93 infile = fopen(argv[op], INPUT_MODE);
3405 nigel 3 if (infile == NULL)
3406     {
3407     printf("** Failed to open %s\n", argv[op]);
3408 nigel 77 yield = 1;
3409     goto EXIT;
3410 nigel 3 }
3411     }
3412    
3413     if (argc > 2)
3414     {
3415 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
3416 nigel 3 if (outfile == NULL)
3417     {
3418     printf("** Failed to open %s\n", argv[op+1]);
3419 nigel 77 yield = 1;
3420     goto EXIT;
3421 nigel 3 }
3422     }
3423    
3424     /* Set alternative malloc function */
3425    
3426 ph10 836 #ifdef SUPPORT_PCRE8
3427 nigel 3 pcre_malloc = new_malloc;
3428 nigel 73 pcre_free = new_free;
3429     pcre_stack_malloc = stack_malloc;
3430     pcre_stack_free = stack_free;
3431 ph10 836 #endif
3432 nigel 3
3433 ph10 836 #ifdef SUPPORT_PCRE16
3434     pcre16_malloc = new_malloc;
3435     pcre16_free = new_free;
3436     pcre16_stack_malloc = stack_malloc;
3437     pcre16_stack_free = stack_free;
3438     #endif
3439    
3440 chpe 1055 #ifdef SUPPORT_PCRE32
3441     pcre32_malloc = new_malloc;
3442     pcre32_free = new_free;
3443     pcre32_stack_malloc = stack_malloc;
3444     pcre32_stack_free = stack_free;
3445     #endif
3446    
3447 ph10 1388 /* Heading line unless quiet */
3448 nigel 3
3449 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3450 nigel 3
3451     /* Main loop */
3452    
3453 nigel 11 while (!done)
3454 nigel 3 {
3455     pcre *re = NULL;
3456     pcre_extra *extra = NULL;
3457 nigel 37
3458     #if !defined NOPOSIX /* There are still compilers that require no indent */
3459 ph10 1516 regex_t preg = { NULL, 0, 0} ;
3460 nigel 45 int do_posix = 0;
3461 nigel 37 #endif
3462    
3463 nigel 7 const char *error;
3464 ph10 836 pcre_uint8 *markptr;
3465     pcre_uint8 *p, *pp, *ppp;
3466     pcre_uint8 *to_file = NULL;
3467     const pcre_uint8 *tables = NULL;
3468 zherczeg 847 unsigned long int get_options;
3469 nigel 75 unsigned long int true_size, true_study_size = 0;
3470 ph10 1359 size_t size;
3471 ph10 654 int do_allcaps = 0;
3472 ph10 512 int do_mark = 0;
3473 nigel 3 int do_study = 0;
3474 ph10 654 int no_force_study = 0;
3475 nigel 25 int do_debug = debug;
3476 nigel 35 int do_G = 0;
3477     int do_g = 0;
3478 nigel 25 int do_showinfo = showinfo;
3479 nigel 35 int do_showrest = 0;
3480 ph10 616 int do_showcaprest = 0;
3481 nigel 75 int do_flip = 0;
3482 nigel 93 int erroroffset, len, delimiter, poffset;
3483 ph10 975
3484     #if !defined NODFA
3485 ph10 960 int dfa_matched = 0;
3486 ph10 975 #endif
3487 nigel 3
3488 ph10 836 use_utf = 0;
3489 ph10 211 debug_lengths = 1;
3490 ph10 1454 SET_PCRE_STACK_GUARD(NULL);
3491 nigel 63
3492 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3493 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3494 nigel 63 fflush(outfile);
3495 nigel 3
3496     p = buffer;
3497     while (isspace(*p)) p++;
3498     if (*p == 0) continue;
3499 ph10 1404
3500 ph10 1388 /* Handle option lock-out setting */
3501 ph10 1404
3502 ph10 1388 if (*p == '<' && p[1] == ' ')
3503     {
3504     p += 2;
3505     while (isspace(*p)) p++;
3506     if (strncmp((char *)p, "forbid ", 7) == 0)
3507     {
3508     p += 7;
3509     while (isspace(*p)) p++;
3510 ph10 1404 pp = lockout;
3511 ph10 1388 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3512     *pp++ = *p++;
3513 ph10 1404 *pp = 0;
3514 ph10 1388 }
3515 ph10 1404 else
3516 ph10 1388 {
3517 ph10 1399 printf("** Unrecognized special command '%s'\n", p);
3518 ph10 1388 yield = 1;
3519 ph10 1404 goto EXIT;
3520     }
3521 ph10 1388 continue;
3522 ph10 1404 }
3523 nigel 3
3524 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
3525 nigel 3
3526 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3527     {
3528 zherczeg 841 pcre_uint32 magic;
3529 ph10 836 pcre_uint8 sbuf[8];
3530 nigel 75 FILE *f;
3531    
3532     p++;
3533 zherczeg 839 if (*p == '!')
3534     {
3535     do_debug = TRUE;
3536     do_showinfo = TRUE;
3537     p++;
3538     }
3539    
3540 nigel 75 pp = p + (int)strlen((char *)p);
3541     while (isspace(pp[-1])) pp--;
3542     *pp = 0;
3543    
3544     f = fopen((char *)p, "rb");
3545     if (f == NULL)
3546     {
3547     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3548     continue;
3549     }
3550     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3551    
3552     true_size =
3553     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3554     true_study_size =
3555     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3556    
3557 zherczeg 852 re = (pcre *)new_malloc(true_size);
3558 ph10 1017 if (re == NULL)
3559     {
3560     printf("** Failed to get %d bytes of memory for pcre object\n",
3561     (int)true_size);
3562     yield = 1;
3563     goto EXIT;
3564     }
3565 nigel 75 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3566    
3567 chpe 1055 magic = REAL_PCRE_MAGIC(re);
3568 nigel 75 if (magic != MAGIC_NUMBER)
3569     {
3570 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
3571 nigel 75 {
3572     do_flip = 1;
3573     }
3574     else
3575     {
3576     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3577 ph10 1017 new_free(re);
3578 nigel 75 fclose(f);
3579     continue;
3580     }
3581     }
3582    
3583 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
3584 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3585 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3586 nigel 75
3587 ph10 612 /* Now see if there is any following study data. */
3588 nigel 75
3589     if (true_study_size != 0)
3590     {
3591     pcre_study_data *psd;
3592    
3593     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3594     extra->flags = PCRE_EXTRA_STUDY_DATA;
3595    
3596     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3597     extra->study_data = psd;
3598    
3599     if (fread(psd, 1, true_study_size, f) != true_study_size)
3600     {
3601     FAIL_READ:
3602     fprintf(outfile, "Failed to read data from %s\n", p);
3603 ph10 836 if (extra != NULL)
3604     {
3605     PCRE_FREE_STUDY(extra);
3606     }
3607 ph10 1017 new_free(re);
3608 nigel 75 fclose(f);
3609     continue;
3610     }
3611     fprintf(outfile, "Study data loaded from %s\n", p);
3612     do_study = 1; /* To get the data output if requested */
3613     }
3614     else fprintf(outfile, "No study data\n");
3615    
3616 ph10 836 /* Flip the necessary bytes. */
3617     if (do_flip)
3618     {
3619 zherczeg 839 int rc;
3620     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3621     if (rc == PCRE_ERROR_BADMODE)
3622     {
3623 ph10 1313 pcre_uint32 flags_in_host_byte_order;
3624 zherczeg 1150 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3625     flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3626     else
3627 ph10 1313 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3628 zherczeg 839 /* Simulate the result of the function call below. */
3629     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3630 chpe 1055 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3631     PCRE_INFO_OPTIONS);
3632     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3633 zherczeg 1150 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3634 ph10 1017 new_free(re);
3635     fclose(f);
3636 zherczeg 839 continue;
3637     }
3638 ph10 836 }
3639    
3640     /* Need to know if UTF-8 for printing data strings. */
3641    
3642 ph10 1017 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3643     {
3644     new_free(re);
3645     fclose(f);
3646     continue;
3647     }
3648 ph10 836 use_utf = (get_options & PCRE_UTF8) != 0;
3649    
3650 nigel 75 fclose(f);
3651     goto SHOW_INFO;
3652     }
3653    
3654     /* In-line pattern (the usual case). Get the delimiter and seek the end of
3655 ph10 836 the pattern; if it isn't complete, read more. */
3656 nigel 75
3657 nigel 3 delimiter = *p++;
3658    
3659 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
3660 nigel 3 {
3661 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3662 nigel 3 goto SKIP_DATA;
3663     }
3664    
3665     pp = p;
3666 ph10 530 poffset = (int)(p - buffer);
3667 nigel 3
3668     for(;;)
3669     {
3670 nigel 29 while (*pp != 0)
3671     {
3672     if (*pp == '\\' && pp[1] != 0) pp++;
3673     else if (*pp == delimiter) break;
3674     pp++;
3675     }
3676 nigel 3 if (*pp != 0) break;
3677 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3678 nigel 3 {
3679     fprintf(outfile, "** Unexpected EOF\n");
3680 nigel 11 done = 1;
3681     goto CONTINUE;
3682 nigel 3 }
3683 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3684 nigel 3 }
3685    
3686 nigel 93 /* The buffer may have moved while being extended; reset the start of data
3687     pointer to the correct relative point in the buffer. */
3688    
3689     p = buffer + poffset;
3690    
3691 nigel 29 /* If the first character after the delimiter is backslash, make
3692     the pattern end with backslash. This is purely to provide a way
3693     of testing for the error message when a pattern ends with backslash. */
3694    
3695     if (pp[1] == '\\') *pp++ = '\\';
3696    
3697 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3698     for callouts. */
3699 nigel 3
3700     *pp++ = 0;
3701 nigel 75 strcpy((char *)pbuffer, (char *)p);
3702 nigel 3
3703 ph10 1388 /* Look for modifiers and options after the final delimiter. */
3704 nigel 3
3705 ph10 1363 options = default_options;
3706 ph10 1022 study_options = force_study_options;
3707 nigel 31 log_store = showstore; /* default from command line */
3708    
3709 nigel 3 while (*pp != 0)
3710     {
3711 ph10 1388 /* Check to see whether this modifier has been locked out for this file.
3712 ph10 1404 This is complicated for the multi-character options that begin with '<'.
3713     If there is no '>' in the lockout string, all multi-character modifiers are
3714     locked out. */
3715    
3716 ph10 1388 if (strchr((char *)lockout, *pp) != NULL)
3717     {
3718     if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3719     {
3720     int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3721     if (x == 0) goto SKIP_DATA;
3722 ph10 1404
3723 ph10 1388 for (ppp = lockout; *ppp != 0; ppp++)
3724     {
3725     if (*ppp == '<')
3726     {
3727     int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3728     if (y == 0)
3729     {
3730 ph10 1399 printf("** Error in modifier forbid data - giving up.\n");
3731 ph10 1388 yield = 1;
3732 ph10 1404 goto EXIT;
3733 ph10 1388 }
3734 ph10 1404 if (x == y)
3735 ph10 1388 {
3736     ppp = pp;
3737     while (*ppp != '>') ppp++;
3738 ph10 1404 printf("** The %.*s modifier is locked out - giving up.\n",
3739 ph10 1403 (int)(ppp - pp + 1), pp);
3740 ph10 1388 yield = 1;
3741 ph10 1404 goto EXIT;
3742     }
3743 ph10 1388 }
3744 ph10 1404 }
3745 ph10 1388 }
3746 ph10 1404
3747 ph10 1388 /* The single-character modifiers are straightforward. */
3748 ph10 1404
3749 ph10 1388 else
3750     {
3751 ph10 1399 printf("** The /%c modifier is locked out - giving up.\n", *pp);
3752 ph10 1388 yield = 1;
3753 ph10 1404 goto EXIT;
3754     }
3755     }
3756    
3757 ph10 1388 /* The modifier is not locked out; handle it. */
3758 ph10 1404
3759 nigel 3 switch (*pp++)
3760     {
3761 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
3762 nigel 35 case 'g': do_g = 1; break;
3763 nigel 3 case 'i': options |= PCRE_CASELESS; break;
3764     case 'm': options |= PCRE_MULTILINE; break;
3765     case 's': options |= PCRE_DOTALL; break;
3766     case 'x': options |= PCRE_EXTENDED; break;
3767 nigel 25
3768 ph10 616 case '+':
3769 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3770 ph10 616 break;
3771 ph10 654
3772     case '=': do_allcaps = 1; break;
3773 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
3774 nigel 93 case 'B': do_debug = 1; break;
3775 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3776 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
3777 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3778 nigel 75 case 'F': do_flip = 1; break;
3779 nigel 35 case 'G': do_G = 1; break;
3780 nigel 25 case 'I': do_showinfo = 1; break;
3781 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
3782 ph10 512 case 'K': do_mark = 1; break;
3783 nigel 31 case 'M': log_store = 1; break;
3784 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3785 ph10 1398 case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3786 nigel 37
3787     #if !defined NOPOSIX
3788 nigel 3 case 'P': do_posix = 1; break;
3789 nigel 37 #endif
3790    
3791 ph10 1454 case 'Q':
3792     switch (*pp)
3793     {
3794 ph10 1459 case '0':
3795 ph10 1454 case '1':
3796     stack_guard_return = *pp++ - '0';
3797 ph10 1459 break;
3798 ph10 1454
3799     default:
3800     fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3801     goto SKIP_DATA;
3802     }
3803     SET_PCRE_STACK_GUARD(stack_guard);
3804     break;
3805    
3806 ph10 654 case 'S':
3807 ph10 1022 do_study = 1;
3808     for (;;)
3809 ph10 612 {
3810 ph10 1022 switch (*pp++)
3811 ph10 667 {
3812 ph10 1022 case 'S':
3813     do_study = 0;
3814     no_force_study = 1;
3815     break;
3816    
3817     case '!':
3818     study_options |= PCRE_STUDY_EXTRA_NEEDED;
3819     break;
3820    
3821     case '+':
3822     if (*pp == '+')
3823 ph10 922 {
3824     verify_jit = TRUE;
3825 ph10 960 pp++;
3826     }
3827 ph10 923 if (*pp >= '1' && *pp <= '7')
3828     study_options |= jit_study_bits[*pp++ - '1'];
3829 ph10 960 else
3830     study_options |= jit_study_bits[6];
3831 ph10 1022 break;
3832    
3833     case '-':
3834     study_options &= ~PCRE_STUDY_ALLJIT;
3835     break;
3836    
3837     default:
3838     pp--;
3839     goto ENDLOOP;
3840 ph10 691 }
3841     }
3842 ph10 1022 ENDLOOP:
3843 ph10 612 break;
3844    
3845 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
3846 ph10 535 case 'W': options |= PCRE_UCP; break;
3847 nigel 3 case 'X': options |= PCRE_EXTRA; break;
3848 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3849 ph10 126 case 'Z': debug_lengths = 0; break;
3850 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3851 ph10 1320 case '9': options |= PCRE_NEVER_UTF; break;
3852 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3853 ph10 545
3854 ph10 541 case 'T':
3855     switch (*pp++)
3856     {
3857     case '0': tables = tables0; break;
3858     case '1': tables = tables1; break;
3859 ph10 545
3860 ph10 541 case '\r':
3861     case '\n':
3862 ph10 545 case ' ':
3863     case 0:
3864 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
3865 ph10 545 goto SKIP_DATA;
3866    
3867     default:
3868 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3869 ph10 545 goto SKIP_DATA;
3870 ph10 541 }
3871 ph10 545 break;
3872 nigel 25
3873     case 'L':
3874     ppp = pp;
3875 nigel 93 /* The '\r' test here is so that it works on Windows. */
3876     /* The '0' test is just in case this is an unterminated line. */
3877     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3878 nigel 25 *ppp = 0;
3879     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3880     {
3881     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3882     goto SKIP_DATA;
3883     }
3884 nigel 93 locale_set = 1;
3885 ph10 836 tables = PCRE_MAKETABLES;
3886 nigel 25 pp = ppp;
3887     break;
3888    
3889 nigel 75 case '>':
3890     to_file = pp;
3891     while (*pp != 0) pp++;
3892     while (isspace(pp[-1])) pp--;
3893     *pp = 0;
3894     break;
3895    
3896 nigel 91 case '<':
3897     {
3898 ph10 1388 int x = check_mc_option(pp, outfile, FALSE, "modifier");
3899     if (x == 0) goto SKIP_DATA;
3900     options |= x;
3901     while (*pp++ != '>');
3902 nigel 91 }
3903     break;
3904    
3905 nigel 77 case '\r': /* So that it works in Windows */
3906     case '\n':
3907     case ' ':
3908     break;
3909 nigel 75
3910 nigel 3 default:
3911 ph10 1388 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3912 nigel 3 goto SKIP_DATA;
3913     }
3914     }
3915    
3916 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
3917 nigel 25 timing, showing, or debugging options, nor the ability to pass over
3918 ph10 836 local character tables. Neither does it have 16-bit support. */
3919 nigel 3
3920 nigel