/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1334 - (hide annotations) (download)
Wed May 15 16:53:18 2013 UTC (5 days, 22 hours ago) by ph10
File MIME type: text/plain
File size: 165595 byte(s)
Fix segfault when pcre_dfa_exec() is called with an output vector of length 
less than 2.

1 zherczeg 929 /*************************************************
2 nigel 3 * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 1221 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 ph10 1142 32-bit PCRE libraries in a single program. This is different from the modules
41     such as pcre_compile.c in the library itself, which are compiled separately for
42     each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43     twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44     make use of any of the macros from pcre_internal.h that depend on
45     COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46     SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47     supported library functions. */
48 nigel 75
49 ph10 200 #ifdef HAVE_CONFIG_H
50 ph10 236 #include "config.h"
51 ph10 200 #endif
52 ph10 199
53 nigel 3 #include <ctype.h>
54     #include <stdio.h>
55     #include <string.h>
56     #include <stdlib.h>
57     #include <time.h>
58 nigel 25 #include <locale.h>
59 nigel 75 #include <errno.h>
60 nigel 3
61 ph10 936 /* Both libreadline and libedit are optionally supported. The user-supplied
62 ph10 960 original patch uses readline/readline.h for libedit, but in at least one system
63     it is installed as editline/readline.h, so the configuration code now looks for
64 ph10 936 that first, falling back to readline/readline.h. */
65    
66     #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 ph10 343 #ifdef HAVE_UNISTD_H
68 ph10 287 #include <unistd.h>
69 ph10 343 #endif
70 ph10 936 #if defined(SUPPORT_LIBREADLINE)
71 ph10 287 #include <readline/readline.h>
72     #include <readline/history.h>
73 ph10 936 #else
74     #if defined(HAVE_EDITLINE_READLINE_H)
75     #include <editline/readline.h>
76     #else
77     #include <readline/readline.h>
78 ph10 287 #endif
79 ph10 936 #endif
80     #endif
81 nigel 93
82     /* A number of things vary for Windows builds. Originally, pcretest opened its
83     input and output without "b"; then I was told that "b" was needed in some
84     environments, so it was added for release 5.0 to both the input and output. (It
85     makes no difference on Unix-like systems.) Later I was told that it is wrong
86     for the input on Windows. I've now abstracted the modes into two macros that
87     are set here, to make it easier to fiddle with them, and removed "b" from the
88     input mode under Windows. */
89    
90     #if defined(_WIN32) || defined(WIN32)
91     #include <io.h> /* For _setmode() */
92     #include <fcntl.h> /* For _O_BINARY */
93     #define INPUT_MODE "r"
94     #define OUTPUT_MODE "wb"
95    
96 ph10 411 #ifndef isatty
97     #define isatty _isatty /* This is what Windows calls them, I'm told, */
98     #endif /* though in some environments they seem to */
99     /* be already defined, hence the #ifndefs. */
100     #ifndef fileno
101 ph10 343 #define fileno _fileno
102 ph10 411 #endif
103 ph10 343
104 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106     #ifdef __BORLANDC__
107     #define _setmode(handle, mode) setmode(handle, mode)
108     #endif
109    
110     /* Not Windows */
111    
112 nigel 93 #else
113     #include <sys/time.h> /* These two includes are needed */
114     #include <sys/resource.h> /* for setrlimit(). */
115 ph10 1027 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116     #define INPUT_MODE "r"
117     #define OUTPUT_MODE "w"
118     #else
119 nigel 93 #define INPUT_MODE "rb"
120     #define OUTPUT_MODE "wb"
121 nigel 91 #endif
122 ph10 1027 #endif
123 nigel 91
124 ph10 1254 #ifdef __VMS
125     #include <ssdef.h>
126     void vms_setsymbol( char *, char *, int );
127     #endif
128    
129    
130 zherczeg 905 #define PRIV(name) name
131 nigel 93
132 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
133     displaying the results of pcre_study() and we also need to know about the
134     internal macros, structures, and other internal data values; pcretest has
135     "inside information" compared to a program that strictly follows the PCRE API.
136 nigel 37
137 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139     appropriately for an application, not for building PCRE. */
140 nigel 77
141 ph10 145 #include "pcre.h"
142 nigel 77 #include "pcre_internal.h"
143    
144 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
145     regex, is held in a separate file so that (a) it can be compiled in either
146 chpe 1087 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 ph10 836 when that is compiled in debug mode. */
148    
149     #ifdef SUPPORT_PCRE8
150     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151     #endif
152     #ifdef SUPPORT_PCRE16
153     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154     #endif
155 chpe 1055 #ifdef SUPPORT_PCRE32
156     void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157     #endif
158 ph10 836
159 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
160 ph10 1046 to keep two copies, we include the source files here, changing the names of the
161 ph10 351 external symbols to prevent clashes. */
162 nigel 77
163 ph10 836 #define PCRE_INCLUDED
164 nigel 85
165     #include "pcre_tables.c"
166 ph10 1046 #include "pcre_ucd.c"
167 nigel 85
168 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
169 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
170 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
171     has not been explicitly changed, so as to get consistent output from systems
172     that differ in their output from isprint() even in the "C" locale. */
173 nigel 93
174 ph10 836 #ifdef EBCDIC
175     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176     #else
177     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178     #endif
179 nigel 85
180 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181    
182 chpe 1055 /* Posix support is disabled in 16 or 32 bit only mode. */
183     #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 ph10 836 #define NOPOSIX
185     #endif
186    
187 nigel 37 /* It is possible to compile this test program without including support for
188     testing the POSIX interface, though this is not available via the standard
189     Makefile. */
190    
191     #if !defined NOPOSIX
192 nigel 3 #include "pcreposix.h"
193 nigel 37 #endif
194 nigel 3
195 ph10 836 /* It is also possible, originally for the benefit of a version that was
196     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198     automatically cut out the UTF support if PCRE is built without it. */
199 nigel 79
200 ph10 836 #ifndef SUPPORT_UTF
201     #ifndef NOUTF
202     #define NOUTF
203 ph10 107 #endif
204     #endif
205 nigel 79
206 chpe 1087 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 ph10 836 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208     only from one place and is handled differently). I couldn't dream up any way of
209     using a single macro to do this in a generic way, because of the many different
210     argument requirements. We know that at least one of SUPPORT_PCRE8 and
211     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212     use these in the definitions of generic macros.
213 ph10 107
214 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
215     printed is always given as two arguments: a base address followed by an offset.
216     The base address is cast to the correct data size for 8 or 16 bit data; the
217     offset is in units of this size. If the string were given as base+offset in one
218     argument, the casting might be incorrectly applied. */
219    
220     #ifdef SUPPORT_PCRE8
221    
222     #define PCHARS8(lv, p, offset, len, f) \
223     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224    
225     #define PCHARSV8(p, offset, len, f) \
226     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227    
228 chpe 1055 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 ph10 836 p = read_capture_name8(p, cn8, re)
230    
231 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
232    
233 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
234     pcre_callout = callout
235    
236 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
237     pcre_assign_jit_stack(extra, callback, userdata)
238 ph10 836
239     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
240     re = pcre_compile((char *)pat, options, error, erroffset, tables)
241    
242     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
243     namesptr, cbuffer, size) \
244     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
245     (char *)namesptr, cbuffer, size)
246    
247     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
248     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
249    
250     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
251     offsets, size_offsets, workspace, size_workspace) \
252     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
253     offsets, size_offsets, workspace, size_workspace)
254    
255     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
256     offsets, size_offsets) \
257     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
258     offsets, size_offsets)
259    
260     #define PCRE_FREE_STUDY8(extra) \
261     pcre_free_study(extra)
262    
263     #define PCRE_FREE_SUBSTRING8(substring) \
264     pcre_free_substring(substring)
265    
266     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
267     pcre_free_substring_list(listptr)
268    
269     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
270     getnamesptr, subsptr) \
271     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
272     (char *)getnamesptr, subsptr)
273    
274     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
275     n = pcre_get_stringnumber(re, (char *)ptr)
276    
277     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
278     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
279    
280     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
281     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
282    
283 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
284     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
285 ph10 836
286     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
287     pcre_printint(re, outfile, debug_lengths)
288    
289     #define PCRE_STUDY8(extra, re, options, error) \
290     extra = pcre_study(re, options, error)
291    
292 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
293     pcre_jit_stack_alloc(startsize, maxsize)
294    
295     #define PCRE_JIT_STACK_FREE8(stack) \
296     pcre_jit_stack_free(stack)
297    
298 ph10 1221 #define pcre8_maketables pcre_maketables
299    
300 ph10 836 #endif /* SUPPORT_PCRE8 */
301    
302     /* -----------------------------------------------------------*/
303    
304     #ifdef SUPPORT_PCRE16
305    
306     #define PCHARS16(lv, p, offset, len, f) \
307     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
308    
309     #define PCHARSV16(p, offset, len, f) \
310     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
311    
312 chpe 1055 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
313 ph10 836 p = read_capture_name16(p, cn16, re)
314    
315     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
316    
317     #define SET_PCRE_CALLOUT16(callout) \
318 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
319 ph10 836
320 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
321     pcre16_assign_jit_stack((pcre16_extra *)extra, \
322     (pcre16_jit_callback)callback, userdata)
323 ph10 836
324     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
325 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
326     tables)
327 ph10 836
328     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
329     namesptr, cbuffer, size) \
330 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
331 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
332 ph10 836
333     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
334     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
335 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
336 ph10 836
337     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338     offsets, size_offsets, workspace, size_workspace) \
339 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
340     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
341     workspace, size_workspace)
342 ph10 836
343     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344     offsets, size_offsets) \
345 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
346     len, start_offset, options, offsets, size_offsets)
347 ph10 836
348     #define PCRE_FREE_STUDY16(extra) \
349 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
350 ph10 836
351     #define PCRE_FREE_SUBSTRING16(substring) \
352     pcre16_free_substring((PCRE_SPTR16)substring)
353    
354     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
355     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
356    
357     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
358     getnamesptr, subsptr) \
359 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
360     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
361 ph10 836
362     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
363     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
364    
365     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
366     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
367     (PCRE_SPTR16 *)(void*)subsptr)
368    
369     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
370     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
371     (PCRE_SPTR16 **)(void*)listptr)
372    
373 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
374 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
375     tables)
376 ph10 836
377     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
378     pcre16_printint(re, outfile, debug_lengths)
379    
380     #define PCRE_STUDY16(extra, re, options, error) \
381 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
382 ph10 836
383 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
384     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
385    
386     #define PCRE_JIT_STACK_FREE16(stack) \
387     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
388    
389 ph10 836 #endif /* SUPPORT_PCRE16 */
390    
391 chpe 1055 /* -----------------------------------------------------------*/
392 ph10 836
393 chpe 1055 #ifdef SUPPORT_PCRE32
394    
395     #define PCHARS32(lv, p, offset, len, f) \
396 chpe 1117 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
397 chpe 1055
398 chpe 1117 #define PCHARSV32(p, offset, len, f) \
399     (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
400 chpe 1055
401     #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
402     p = read_capture_name32(p, cn32, re)
403    
404     #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
405    
406     #define SET_PCRE_CALLOUT32(callout) \
407     pcre32_callout = (int (*)(pcre32_callout_block *))callout
408    
409     #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
410     pcre32_assign_jit_stack((pcre32_extra *)extra, \
411     (pcre32_jit_callback)callback, userdata)
412    
413     #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
414     re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
415     tables)
416    
417     #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
418     namesptr, cbuffer, size) \
419     rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
420     count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
421    
422     #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
423     rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
424     (PCRE_UCHAR32 *)cbuffer, size/2)
425    
426     #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427     offsets, size_offsets, workspace, size_workspace) \
428     count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
429     (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
430     workspace, size_workspace)
431    
432     #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433     offsets, size_offsets) \
434     count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
435     len, start_offset, options, offsets, size_offsets)
436    
437     #define PCRE_FREE_STUDY32(extra) \
438     pcre32_free_study((pcre32_extra *)extra)
439    
440     #define PCRE_FREE_SUBSTRING32(substring) \
441     pcre32_free_substring((PCRE_SPTR32)substring)
442    
443     #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
444     pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
445    
446     #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
447     getnamesptr, subsptr) \
448     rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
449     count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
450    
451     #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
452     n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
453    
454     #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
455     rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
456     (PCRE_SPTR32 *)(void*)subsptr)
457    
458     #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
459     rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
460     (PCRE_SPTR32 **)(void*)listptr)
461    
462     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
463     rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
464     tables)
465    
466     #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
467     pcre32_printint(re, outfile, debug_lengths)
468    
469     #define PCRE_STUDY32(extra, re, options, error) \
470     extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
471    
472     #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
473     (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
474    
475     #define PCRE_JIT_STACK_FREE32(stack) \
476     pcre32_jit_stack_free((pcre32_jit_stack *)stack)
477    
478     #endif /* SUPPORT_PCRE32 */
479    
480    
481 ph10 1122 /* ----- More than one mode is supported; a runtime test is needed, except for
482 ph10 836 pcre_config(), and the JIT stack functions, when it doesn't matter which
483 ph10 1140 available version is called. ----- */
484 ph10 836
485 chpe 1055 enum {
486     PCRE8_MODE,
487     PCRE16_MODE,
488     PCRE32_MODE
489     };
490 ph10 836
491 ph10 1122 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
492     defined (SUPPORT_PCRE32)) >= 2
493 ph10 836
494 chpe 1055 #define CHAR_SIZE (1 << pcre_mode)
495    
496 ph10 1122 /* There doesn't seem to be an easy way of writing these macros that can cope
497     with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
498     cases separately. */
499    
500     /* ----- All three modes supported ----- */
501    
502     #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
503    
504 ph10 836 #define PCHARS(lv, p, offset, len, f) \
505 chpe 1055 if (pcre_mode == PCRE32_MODE) \
506     PCHARS32(lv, p, offset, len, f); \
507     else if (pcre_mode == PCRE16_MODE) \
508 ph10 836 PCHARS16(lv, p, offset, len, f); \
509     else \
510     PCHARS8(lv, p, offset, len, f)
511    
512     #define PCHARSV(p, offset, len, f) \
513 chpe 1055 if (pcre_mode == PCRE32_MODE) \
514     PCHARSV32(p, offset, len, f); \
515     else if (pcre_mode == PCRE16_MODE) \
516 ph10 836 PCHARSV16(p, offset, len, f); \
517     else \
518     PCHARSV8(p, offset, len, f)
519    
520 chpe 1055 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
521     if (pcre_mode == PCRE32_MODE) \
522     READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
523     else if (pcre_mode == PCRE16_MODE) \
524     READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
525 ph10 836 else \
526 chpe 1055 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
527 ph10 836
528     #define SET_PCRE_CALLOUT(callout) \
529 chpe 1055 if (pcre_mode == PCRE32_MODE) \
530     SET_PCRE_CALLOUT32(callout); \
531     else if (pcre_mode == PCRE16_MODE) \
532 ph10 836 SET_PCRE_CALLOUT16(callout); \
533     else \
534     SET_PCRE_CALLOUT8(callout)
535    
536 chpe 1055 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
537 ph10 836
538 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
539 chpe 1055 if (pcre_mode == PCRE32_MODE) \
540     PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
541     else if (pcre_mode == PCRE16_MODE) \
542 zherczeg 852 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
543     else \
544     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
545 ph10 836
546     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
547 chpe 1055 if (pcre_mode == PCRE32_MODE) \
548     PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
549     else if (pcre_mode == PCRE16_MODE) \
550 ph10 836 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
551     else \
552     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
553    
554     #define PCRE_CONFIG pcre_config
555    
556     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
557     namesptr, cbuffer, size) \
558 chpe 1055 if (pcre_mode == PCRE32_MODE) \
559     PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
560     namesptr, cbuffer, size); \
561     else if (pcre_mode == PCRE16_MODE) \
562 ph10 836 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
563     namesptr, cbuffer, size); \
564     else \
565     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
566     namesptr, cbuffer, size)
567    
568     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
569 chpe 1055 if (pcre_mode == PCRE32_MODE) \
570     PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
571     else if (pcre_mode == PCRE16_MODE) \
572 ph10 836 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
573     else \
574     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
575    
576     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
577     offsets, size_offsets, workspace, size_workspace) \
578 chpe 1055 if (pcre_mode == PCRE32_MODE) \
579     PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
580     offsets, size_offsets, workspace, size_workspace); \
581     else if (pcre_mode == PCRE16_MODE) \
582 ph10 836 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
583     offsets, size_offsets, workspace, size_workspace); \
584     else \
585     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
586     offsets, size_offsets, workspace, size_workspace)
587    
588     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
589     offsets, size_offsets) \
590 chpe 1055 if (pcre_mode == PCRE32_MODE) \
591     PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
592     offsets, size_offsets); \
593     else if (pcre_mode == PCRE16_MODE) \
594 ph10 836 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
595     offsets, size_offsets); \
596     else \
597     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
598     offsets, size_offsets)
599    
600     #define PCRE_FREE_STUDY(extra) \
601 chpe 1055 if (pcre_mode == PCRE32_MODE) \
602     PCRE_FREE_STUDY32(extra); \
603     else if (pcre_mode == PCRE16_MODE) \
604 ph10 836 PCRE_FREE_STUDY16(extra); \
605     else \
606     PCRE_FREE_STUDY8(extra)
607    
608     #define PCRE_FREE_SUBSTRING(substring) \
609 chpe 1055 if (pcre_mode == PCRE32_MODE) \
610     PCRE_FREE_SUBSTRING32(substring); \
611     else if (pcre_mode == PCRE16_MODE) \
612 ph10 836 PCRE_FREE_SUBSTRING16(substring); \
613     else \
614     PCRE_FREE_SUBSTRING8(substring)
615    
616     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
617 chpe 1055 if (pcre_mode == PCRE32_MODE) \
618     PCRE_FREE_SUBSTRING_LIST32(listptr); \
619     else if (pcre_mode == PCRE16_MODE) \
620 ph10 836 PCRE_FREE_SUBSTRING_LIST16(listptr); \
621     else \
622     PCRE_FREE_SUBSTRING_LIST8(listptr)
623    
624     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
625     getnamesptr, subsptr) \
626 chpe 1055 if (pcre_mode == PCRE32_MODE) \
627     PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
628     getnamesptr, subsptr); \
629     else if (pcre_mode == PCRE16_MODE) \
630 ph10 836 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
631     getnamesptr, subsptr); \
632     else \
633     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
634     getnamesptr, subsptr)
635    
636     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
637 chpe 1055 if (pcre_mode == PCRE32_MODE) \
638     PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
639     else if (pcre_mode == PCRE16_MODE) \
640 ph10 836 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
641     else \
642     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
643    
644     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
645 chpe 1055 if (pcre_mode == PCRE32_MODE) \
646     PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
647     else if (pcre_mode == PCRE16_MODE) \
648 ph10 836 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
649     else \
650     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
651    
652     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
653 chpe 1055 if (pcre_mode == PCRE32_MODE) \
654     PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
655     else if (pcre_mode == PCRE16_MODE) \
656 ph10 836 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
657     else \
658     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
659    
660 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
661 chpe 1055 (pcre_mode == PCRE32_MODE ? \
662     PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
663     : pcre_mode == PCRE16_MODE ? \
664     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
665     : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
666 ph10 836
667 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
668 chpe 1055 if (pcre_mode == PCRE32_MODE) \
669     PCRE_JIT_STACK_FREE32(stack); \
670     else if (pcre_mode == PCRE16_MODE) \
671 zherczeg 852 PCRE_JIT_STACK_FREE16(stack); \
672     else \
673     PCRE_JIT_STACK_FREE8(stack)
674    
675 ph10 836 #define PCRE_MAKETABLES \
676 chpe 1055 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
677 ph10 836
678 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
679 chpe 1055 if (pcre_mode == PCRE32_MODE) \
680     PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
681     else if (pcre_mode == PCRE16_MODE) \
682 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
683 ph10 836 else \
684 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
685 ph10 836
686     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
687 chpe 1055 if (pcre_mode == PCRE32_MODE) \
688     PCRE_PRINTINT32(re, outfile, debug_lengths); \
689     else if (pcre_mode == PCRE16_MODE) \
690 ph10 836 PCRE_PRINTINT16(re, outfile, debug_lengths); \
691     else \
692     PCRE_PRINTINT8(re, outfile, debug_lengths)
693    
694     #define PCRE_STUDY(extra, re, options, error) \
695 chpe 1055 if (pcre_mode == PCRE32_MODE) \
696     PCRE_STUDY32(extra, re, options, error); \
697     else if (pcre_mode == PCRE16_MODE) \
698 ph10 836 PCRE_STUDY16(extra, re, options, error); \
699     else \
700     PCRE_STUDY8(extra, re, options, error)
701    
702 ph10 1122
703 ph10 1140 /* ----- Two out of three modes are supported ----- */
704 ph10 1122
705 ph10 1140 #else
706 ph10 1122
707 ph10 1140 /* We can use some macro trickery to make a single set of definitions work in
708     the three different cases. */
709 ph10 1122
710 ph10 1140 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
711 ph10 1122
712 ph10 1140 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
713     #define BITONE 32
714     #define BITTWO 16
715 ph10 1122
716     /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
717    
718     #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
719 ph10 1140 #define BITONE 32
720     #define BITTWO 8
721 ph10 1122
722 ph10 1140 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723 ph10 1122
724 ph10 1140 #else
725     #define BITONE 16
726     #define BITTWO 8
727     #endif
728 ph10 1122
729 ph10 1140 #define glue(a,b) a##b
730     #define G(a,b) glue(a,b)
731 ph10 1122
732    
733 ph10 1140 /* ----- Common macros for two-mode cases ----- */
734 ph10 1122
735     #define PCHARS(lv, p, offset, len, f) \
736 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737     G(PCHARS,BITONE)(lv, p, offset, len, f); \
738 ph10 1122 else \
739 ph10 1140 G(PCHARS,BITTWO)(lv, p, offset, len, f)
740 ph10 1122
741     #define PCHARSV(p, offset, len, f) \
742 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743     G(PCHARSV,BITONE)(p, offset, len, f); \
744 ph10 1122 else \
745 ph10 1140 G(PCHARSV,BITTWO)(p, offset, len, f)
746 ph10 1122
747     #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
748 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749     G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
750 ph10 1122 else \
751 ph10 1140 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
752 ph10 1122
753     #define SET_PCRE_CALLOUT(callout) \
754 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
755     G(SET_PCRE_CALLOUT,BITONE)(callout); \
756 ph10 1122 else \
757 ph10 1140 G(SET_PCRE_CALLOUT,BITTWO)(callout)
758 ph10 1122
759 ph10 1140 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
760     G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
761 ph10 1122
762     #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
763 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764     G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
765 ph10 1122 else \
766 ph10 1140 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
767 ph10 1122
768     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
769 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
770     G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
771 ph10 1122 else \
772 ph10 1140 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
773 ph10 1122
774 ph10 1140 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
775 ph10 1122
776     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
777     namesptr, cbuffer, size) \
778 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
779     G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
780 ph10 1122 namesptr, cbuffer, size); \
781     else \
782 ph10 1140 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
783 ph10 1122 namesptr, cbuffer, size)
784    
785     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
786 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787     G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
788 ph10 1122 else \
789 ph10 1140 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
790 ph10 1122
791     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
792     offsets, size_offsets, workspace, size_workspace) \
793 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
794     G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
795 ph10 1122 offsets, size_offsets, workspace, size_workspace); \
796     else \
797 ph10 1140 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
798 ph10 1122 offsets, size_offsets, workspace, size_workspace)
799    
800     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
801     offsets, size_offsets) \
802 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
803     G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
804 ph10 1122 offsets, size_offsets); \
805     else \
806 ph10 1140 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
807 ph10 1122 offsets, size_offsets)
808    
809     #define PCRE_FREE_STUDY(extra) \
810 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811     G(PCRE_FREE_STUDY,BITONE)(extra); \
812 ph10 1122 else \
813 ph10 1140 G(PCRE_FREE_STUDY,BITTWO)(extra)
814 ph10 1122
815     #define PCRE_FREE_SUBSTRING(substring) \
816 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817     G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
818 ph10 1122 else \
819 ph10 1140 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
820 ph10 1122
821     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
822 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
823     G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
824 ph10 1122 else \
825 ph10 1140 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
826 ph10 1122
827     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
828     getnamesptr, subsptr) \
829 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
830     G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
831 ph10 1122 getnamesptr, subsptr); \
832     else \
833 ph10 1140 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
834 ph10 1122 getnamesptr, subsptr)
835    
836     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
837 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838     G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
839 ph10 1122 else \
840 ph10 1140 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
841 ph10 1122
842     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
843 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844     G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
845 ph10 1122 else \
846 ph10 1140 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
847 ph10 1122
848     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
849 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
850     G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
851 ph10 1122 else \
852 ph10 1140 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
853 ph10 1122
854     #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
855 ph10 1140 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
856     G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
857     : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
858 ph10 1122
859     #define PCRE_JIT_STACK_FREE(stack) \
860 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861     G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
862 ph10 1122 else \
863 ph10 1140 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
864 ph10 1122
865     #define PCRE_MAKETABLES \
866 ph10 1140 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
867     G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
868 ph10 1122
869     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
870 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871     G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
872 ph10 1122 else \
873 ph10 1140 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
874 ph10 1122
875     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
876 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877     G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
878 ph10 1122 else \
879 ph10 1140 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
880 ph10 1122
881     #define PCRE_STUDY(extra, re, options, error) \
882 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
883     G(PCRE_STUDY,BITONE)(extra, re, options, error); \
884 ph10 1122 else \
885 ph10 1140 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
886 ph10 1122
887 ph10 1140 #endif /* Two out of three modes */
888 ph10 1122
889     /* ----- End of cases where more than one mode is supported ----- */
890    
891    
892 ph10 836 /* ----- Only 8-bit mode is supported ----- */
893    
894     #elif defined SUPPORT_PCRE8
895     #define CHAR_SIZE 1
896     #define PCHARS PCHARS8
897     #define PCHARSV PCHARSV8
898     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
899     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
900     #define STRLEN STRLEN8
901 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
902 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
903     #define PCRE_CONFIG pcre_config
904     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
905     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
906     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
907     #define PCRE_EXEC PCRE_EXEC8
908     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
909     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
910     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
911     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
912     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
913     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
914     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
915 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
916     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
917 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
918     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
919     #define PCRE_PRINTINT PCRE_PRINTINT8
920     #define PCRE_STUDY PCRE_STUDY8
921    
922     /* ----- Only 16-bit mode is supported ----- */
923    
924 chpe 1055 #elif defined SUPPORT_PCRE16
925 ph10 836 #define CHAR_SIZE 2
926     #define PCHARS PCHARS16
927     #define PCHARSV PCHARSV16
928     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
929     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
930     #define STRLEN STRLEN16
931 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
932 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
933     #define PCRE_CONFIG pcre16_config
934     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
935     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
936     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
937     #define PCRE_EXEC PCRE_EXEC16
938     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
939     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
940     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
941     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
942     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
943     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
944     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
945 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
946     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
947 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
948     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
949     #define PCRE_PRINTINT PCRE_PRINTINT16
950     #define PCRE_STUDY PCRE_STUDY16
951 chpe 1055
952     /* ----- Only 32-bit mode is supported ----- */
953    
954     #elif defined SUPPORT_PCRE32
955     #define CHAR_SIZE 4
956     #define PCHARS PCHARS32
957     #define PCHARSV PCHARSV32
958     #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
959     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
960     #define STRLEN STRLEN32
961     #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
962     #define PCRE_COMPILE PCRE_COMPILE32
963     #define PCRE_CONFIG pcre32_config
964     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
965     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
966     #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
967     #define PCRE_EXEC PCRE_EXEC32
968     #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
969     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
970     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
971     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
972     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
973     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
974     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
975     #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
976     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
977     #define PCRE_MAKETABLES pcre32_maketables()
978     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
979     #define PCRE_PRINTINT PCRE_PRINTINT32
980     #define PCRE_STUDY PCRE_STUDY32
981    
982 ph10 836 #endif
983    
984     /* ----- End of mode-specific function call macros ----- */
985    
986    
987 nigel 85 /* Other parameters */
988    
989 nigel 3 #ifndef CLOCKS_PER_SEC
990     #ifdef CLK_TCK
991     #define CLOCKS_PER_SEC CLK_TCK
992     #else
993     #define CLOCKS_PER_SEC 100
994     #endif
995     #endif
996    
997 ph10 960 #if !defined NODFA
998     #define DFA_WS_DIMENSION 1000
999     #endif
1000    
1001 nigel 93 /* This is the default loop count for timing. */
1002    
1003 nigel 75 #define LOOPREPEAT 500000
1004 nigel 3
1005 nigel 85 /* Static variables */
1006    
1007 nigel 3 static FILE *outfile;
1008     static int log_store = 0;
1009 nigel 63 static int callout_count;
1010     static int callout_extra;
1011     static int callout_fail_count;
1012     static int callout_fail_id;
1013 ph10 210 static int debug_lengths;
1014 nigel 63 static int first_callout;
1015 ph10 960 static int jit_was_used;
1016 nigel 93 static int locale_set = 0;
1017 nigel 73 static int show_malloc;
1018 ph10 836 static int use_utf;
1019 nigel 43 static size_t gotten_store;
1020 ph10 836 static size_t first_gotten_store = 0;
1021 ph10 645 static const unsigned char *last_callout_mark = NULL;
1022 nigel 3
1023 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
1024    
1025     static int buffer_size = 50000;
1026 ph10 836 static pcre_uint8 *buffer = NULL;
1027     static pcre_uint8 *pbuffer = NULL;
1028 nigel 3
1029 ph10 1142 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1030 ph10 836
1031     #ifdef COMPILE_PCRE16
1032     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1033     #endif
1034    
1035 chpe 1055 #ifdef COMPILE_PCRE32
1036     #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1037     #endif
1038    
1039 ph10 1142 /* We need buffers for building 16/32-bit strings, and the tables of operator
1040     lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1041     pattern for saving/reloading testing. Luckily, the data for these tables is
1042     defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1043     are used in the tables) are adjusted appropriately for the 16/32-bit world.
1044     LINK_SIZE is also used later in this program. */
1045    
1046     #ifdef SUPPORT_PCRE16
1047     #undef IMM2_SIZE
1048     #define IMM2_SIZE 1
1049    
1050 ph10 836 #if LINK_SIZE == 2
1051     #undef LINK_SIZE
1052     #define LINK_SIZE 1
1053     #elif LINK_SIZE == 3 || LINK_SIZE == 4
1054     #undef LINK_SIZE
1055     #define LINK_SIZE 2
1056     #else
1057     #error LINK_SIZE must be either 2, 3, or 4
1058     #endif
1059    
1060 chpe 1055 static int buffer16_size = 0;
1061     static pcre_uint16 *buffer16 = NULL;
1062 ph10 836 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1063     #endif /* SUPPORT_PCRE16 */
1064    
1065 chpe 1055 #ifdef SUPPORT_PCRE32
1066 ph10 1142 #undef IMM2_SIZE
1067     #define IMM2_SIZE 1
1068     #undef LINK_SIZE
1069     #define LINK_SIZE 1
1070    
1071 chpe 1055 static int buffer32_size = 0;
1072     static pcre_uint32 *buffer32 = NULL;
1073     static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1074     #endif /* SUPPORT_PCRE32 */
1075 ph10 836
1076 ph10 1140 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1077     support, it can be changed by an option. If there is no 8-bit support, there
1078     must be 16-or 32-bit support, so default it to 1. */
1079 chpe 1055
1080     #if defined SUPPORT_PCRE8
1081     static int pcre_mode = PCRE8_MODE;
1082     #elif defined SUPPORT_PCRE16
1083     static int pcre_mode = PCRE16_MODE;
1084     #elif defined SUPPORT_PCRE32
1085     static int pcre_mode = PCRE32_MODE;
1086 ph10 836 #endif
1087    
1088 ph10 923 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1089    
1090     static int jit_study_bits[] =
1091 ph10 960 {
1092     PCRE_STUDY_JIT_COMPILE,
1093 ph10 923 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1094     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1095 ph10 960 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1096     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1097     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1098     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1099     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1100     };
1101 ph10 923
1102 ph10 1022 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1103     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1104    
1105 ph10 598 /* Textual explanations for runtime error codes */
1106 nigel 75
1107 ph10 598 static const char *errtexts[] = {
1108     NULL, /* 0 is no error */
1109     NULL, /* NOMATCH is handled specially */
1110     "NULL argument passed",
1111     "bad option value",
1112     "magic number missing",
1113     "unknown opcode - pattern overwritten?",
1114     "no more memory",
1115 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1116 ph10 598 "match limit exceeded",
1117     "callout error code",
1118 ph10 836 NULL, /* BADUTF8/16 is handled specially */
1119     NULL, /* BADUTF8/16 offset is handled specially */
1120 ph10 598 NULL, /* PARTIAL is handled specially */
1121     "not used - internal error",
1122     "internal error - pattern overwritten?",
1123     "bad count value",
1124     "item unsupported for DFA matching",
1125     "backreference condition or recursion test not supported for DFA matching",
1126     "match limit not supported for DFA matching",
1127     "workspace size exceeded in DFA matching",
1128 ph10 654 "too much recursion for DFA matching",
1129 ph10 598 "recursion limit exceeded",
1130     "not used - internal error",
1131     "invalid combination of newline options",
1132     "bad offset value",
1133 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
1134 ph10 676 "nested recursion at the same subject position",
1135 ph10 836 "JIT stack limit reached",
1136 ph10 960 "pattern compiled in wrong mode: 8-bit/16-bit error",
1137     "pattern compiled with other endianness",
1138 ph10 1189 "invalid data in workspace for DFA restart",
1139     "bad JIT option",
1140 ph10 1221 "bad length"
1141 ph10 598 };
1142    
1143 ph10 654
1144 ph10 541 /*************************************************
1145     * Alternate character tables *
1146     *************************************************/
1147 nigel 49
1148 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1149     using the default tables of the library. However, the T option can be used to
1150     select alternate sets of tables, for different kinds of testing. Note also that
1151 ph10 541 the L (locale) option also adjusts the tables. */
1152    
1153 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
1154 ph10 541 only ASCII characters. */
1155    
1156 ph10 836 static const pcre_uint8 tables0[] = {
1157 ph10 541
1158     /* This table is a lower casing table. */
1159    
1160     0, 1, 2, 3, 4, 5, 6, 7,
1161     8, 9, 10, 11, 12, 13, 14, 15,
1162     16, 17, 18, 19, 20, 21, 22, 23,
1163     24, 25, 26, 27, 28, 29, 30, 31,
1164     32, 33, 34, 35, 36, 37, 38, 39,
1165     40, 41, 42, 43, 44, 45, 46, 47,
1166     48, 49, 50, 51, 52, 53, 54, 55,
1167     56, 57, 58, 59, 60, 61, 62, 63,
1168     64, 97, 98, 99,100,101,102,103,
1169     104,105,106,107,108,109,110,111,
1170     112,113,114,115,116,117,118,119,
1171     120,121,122, 91, 92, 93, 94, 95,
1172     96, 97, 98, 99,100,101,102,103,
1173     104,105,106,107,108,109,110,111,
1174     112,113,114,115,116,117,118,119,
1175     120,121,122,123,124,125,126,127,
1176     128,129,130,131,132,133,134,135,
1177     136,137,138,139,140,141,142,143,
1178     144,145,146,147,148,149,150,151,
1179     152,153,154,155,156,157,158,159,
1180     160,161,162,163,164,165,166,167,
1181     168,169,170,171,172,173,174,175,
1182     176,177,178,179,180,181,182,183,
1183     184,185,186,187,188,189,190,191,
1184     192,193,194,195,196,197,198,199,
1185     200,201,202,203,204,205,206,207,
1186     208,209,210,211,212,213,214,215,
1187     216,217,218,219,220,221,222,223,
1188     224,225,226,227,228,229,230,231,
1189     232,233,234,235,236,237,238,239,
1190     240,241,242,243,244,245,246,247,
1191     248,249,250,251,252,253,254,255,
1192    
1193     /* This table is a case flipping table. */
1194    
1195     0, 1, 2, 3, 4, 5, 6, 7,
1196     8, 9, 10, 11, 12, 13, 14, 15,
1197     16, 17, 18, 19, 20, 21, 22, 23,
1198     24, 25, 26, 27, 28, 29, 30, 31,
1199     32, 33, 34, 35, 36, 37, 38, 39,
1200     40, 41, 42, 43, 44, 45, 46, 47,
1201     48, 49, 50, 51, 52, 53, 54, 55,
1202     56, 57, 58, 59, 60, 61, 62, 63,
1203     64, 97, 98, 99,100,101,102,103,
1204     104,105,106,107,108,109,110,111,
1205     112,113,114,115,116,117,118,119,
1206     120,121,122, 91, 92, 93, 94, 95,
1207     96, 65, 66, 67, 68, 69, 70, 71,
1208     72, 73, 74, 75, 76, 77, 78, 79,
1209     80, 81, 82, 83, 84, 85, 86, 87,
1210     88, 89, 90,123,124,125,126,127,
1211     128,129,130,131,132,133,134,135,
1212     136,137,138,139,140,141,142,143,
1213     144,145,146,147,148,149,150,151,
1214     152,153,154,155,156,157,158,159,
1215     160,161,162,163,164,165,166,167,
1216     168,169,170,171,172,173,174,175,
1217     176,177,178,179,180,181,182,183,
1218     184,185,186,187,188,189,190,191,
1219     192,193,194,195,196,197,198,199,
1220     200,201,202,203,204,205,206,207,
1221     208,209,210,211,212,213,214,215,
1222     216,217,218,219,220,221,222,223,
1223     224,225,226,227,228,229,230,231,
1224     232,233,234,235,236,237,238,239,
1225     240,241,242,243,244,245,246,247,
1226     248,249,250,251,252,253,254,255,
1227    
1228     /* This table contains bit maps for various character classes. Each map is 32
1229     bytes long and the bits run from the least significant end of each byte. The
1230     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1231     graph, print, punct, and cntrl. Other classes are built from combinations. */
1232    
1233     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1234     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1236     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237    
1238     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1239     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1240     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1242    
1243     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1244     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247    
1248     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1250     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1251     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252    
1253     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1255     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1256     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1257    
1258     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1259     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1260     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262    
1263     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1264     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1265     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267    
1268     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1269     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1270     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272    
1273     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1274     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1275     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277    
1278     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1279     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1280     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282    
1283     /* This table identifies various classes of character by individual bits:
1284     0x01 white space character
1285     0x02 letter
1286     0x04 decimal digit
1287     0x08 hexadecimal digit
1288     0x10 alphanumeric or '_'
1289     0x80 regular expression metacharacter or binary zero
1290     */
1291    
1292     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1293     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1294     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1295     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1296     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1297     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1298     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1299     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1300     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1301     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1302     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1303     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1304     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1305     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1306     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1307     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1308     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1309     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1310     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1311     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1312     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1313     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1314     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1315     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1316     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1317     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1318     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1319     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1320     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1321     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1322     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1323     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1324    
1325 ph10 1325 /* This is a set of tables that came originally from a Windows user. It seems to
1326 ph10 545 be at least an approximation of ISO 8859. In particular, there are characters
1327 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
1328    
1329 ph10 836 static const pcre_uint8 tables1[] = {
1330 ph10 541 0,1,2,3,4,5,6,7,
1331     8,9,10,11,12,13,14,15,
1332     16,17,18,19,20,21,22,23,
1333     24,25,26,27,28,29,30,31,
1334     32,33,34,35,36,37,38,39,
1335     40,41,42,43,44,45,46,47,
1336     48,49,50,51,52,53,54,55,
1337     56,57,58,59,60,61,62,63,
1338     64,97,98,99,100,101,102,103,
1339     104,105,106,107,108,109,110,111,
1340     112,113,114,115,116,117,118,119,
1341     120,121,122,91,92,93,94,95,
1342     96,97,98,99,100,101,102,103,
1343     104,105,106,107,108,109,110,111,
1344     112,113,114,115,116,117,118,119,
1345     120,121,122,123,124,125,126,127,
1346     128,129,130,131,132,133,134,135,
1347     136,137,138,139,140,141,142,143,
1348     144,145,146,147,148,149,150,151,
1349     152,153,154,155,156,157,158,159,
1350     160,161,162,163,164,165,166,167,
1351     168,169,170,171,172,173,174,175,
1352     176,177,178,179,180,181,182,183,
1353     184,185,186,187,188,189,190,191,
1354     224,225,226,227,228,229,230,231,
1355     232,233,234,235,236,237,238,239,
1356     240,241,242,243,244,245,246,215,
1357     248,249,250,251,252,253,254,223,
1358     224,225,226,227,228,229,230,231,
1359     232,233,234,235,236,237,238,239,
1360     240,241,242,243,244,245,246,247,
1361     248,249,250,251,252,253,254,255,
1362     0,1,2,3,4,5,6,7,
1363     8,9,10,11,12,13,14,15,
1364     16,17,18,19,20,21,22,23,
1365     24,25,26,27,28,29,30,31,
1366     32,33,34,35,36,37,38,39,
1367     40,41,42,43,44,45,46,47,
1368     48,49,50,51,52,53,54,55,
1369     56,57,58,59,60,61,62,63,
1370     64,97,98,99,100,101,102,103,
1371     104,105,106,107,108,109,110,111,
1372     112,113,114,115,116,117,118,119,
1373     120,121,122,91,92,93,94,95,
1374     96,65,66,67,68,69,70,71,
1375     72,73,74,75,76,77,78,79,
1376     80,81,82,83,84,85,86,87,
1377     88,89,90,123,124,125,126,127,
1378     128,129,130,131,132,133,134,135,
1379     136,137,138,139,140,141,142,143,
1380     144,145,146,147,148,149,150,151,
1381     152,153,154,155,156,157,158,159,
1382     160,161,162,163,164,165,166,167,
1383     168,169,170,171,172,173,174,175,
1384     176,177,178,179,180,181,182,183,
1385     184,185,186,187,188,189,190,191,
1386     224,225,226,227,228,229,230,231,
1387     232,233,234,235,236,237,238,239,
1388     240,241,242,243,244,245,246,215,
1389     248,249,250,251,252,253,254,223,
1390     192,193,194,195,196,197,198,199,
1391     200,201,202,203,204,205,206,207,
1392     208,209,210,211,212,213,214,247,
1393     216,217,218,219,220,221,222,255,
1394     0,62,0,0,1,0,0,0,
1395     0,0,0,0,0,0,0,0,
1396     32,0,0,0,1,0,0,0,
1397     0,0,0,0,0,0,0,0,
1398     0,0,0,0,0,0,255,3,
1399     126,0,0,0,126,0,0,0,
1400     0,0,0,0,0,0,0,0,
1401     0,0,0,0,0,0,0,0,
1402     0,0,0,0,0,0,255,3,
1403     0,0,0,0,0,0,0,0,
1404     0,0,0,0,0,0,12,2,
1405     0,0,0,0,0,0,0,0,
1406     0,0,0,0,0,0,0,0,
1407     254,255,255,7,0,0,0,0,
1408     0,0,0,0,0,0,0,0,
1409     255,255,127,127,0,0,0,0,
1410     0,0,0,0,0,0,0,0,
1411     0,0,0,0,254,255,255,7,
1412     0,0,0,0,0,4,32,4,
1413     0,0,0,128,255,255,127,255,
1414     0,0,0,0,0,0,255,3,
1415     254,255,255,135,254,255,255,7,
1416     0,0,0,0,0,4,44,6,
1417     255,255,127,255,255,255,127,255,
1418     0,0,0,0,254,255,255,255,
1419     255,255,255,255,255,255,255,127,
1420     0,0,0,0,254,255,255,255,
1421     255,255,255,255,255,255,255,255,
1422     0,2,0,0,255,255,255,255,
1423     255,255,255,255,255,255,255,127,
1424     0,0,0,0,255,255,255,255,
1425     255,255,255,255,255,255,255,255,
1426     0,0,0,0,254,255,0,252,
1427     1,0,0,248,1,0,0,120,
1428     0,0,0,0,254,255,255,255,
1429     0,0,128,0,0,0,128,0,
1430     255,255,255,255,0,0,0,0,
1431     0,0,0,0,0,0,0,128,
1432     255,255,255,255,0,0,0,0,
1433     0,0,0,0,0,0,0,0,
1434     128,0,0,0,0,0,0,0,
1435     0,1,1,0,1,1,0,0,
1436     0,0,0,0,0,0,0,0,
1437     0,0,0,0,0,0,0,0,
1438     1,0,0,0,128,0,0,0,
1439     128,128,128,128,0,0,128,0,
1440     28,28,28,28,28,28,28,28,
1441     28,28,0,0,0,0,0,128,
1442     0,26,26,26,26,26,26,18,
1443     18,18,18,18,18,18,18,18,
1444     18,18,18,18,18,18,18,18,
1445     18,18,18,128,128,0,128,16,
1446     0,26,26,26,26,26,26,18,
1447     18,18,18,18,18,18,18,18,
1448     18,18,18,18,18,18,18,18,
1449     18,18,18,128,128,0,0,0,
1450     0,0,0,0,0,1,0,0,
1451     0,0,0,0,0,0,0,0,
1452     0,0,0,0,0,0,0,0,
1453     0,0,0,0,0,0,0,0,
1454     1,0,0,0,0,0,0,0,
1455     0,0,18,0,0,0,0,0,
1456     0,0,20,20,0,18,0,0,
1457     0,20,18,0,0,0,0,0,
1458     18,18,18,18,18,18,18,18,
1459     18,18,18,18,18,18,18,18,
1460     18,18,18,18,18,18,18,0,
1461     18,18,18,18,18,18,18,18,
1462     18,18,18,18,18,18,18,18,
1463     18,18,18,18,18,18,18,18,
1464     18,18,18,18,18,18,18,0,
1465     18,18,18,18,18,18,18,18
1466     };
1467    
1468    
1469    
1470 ph10 558
1471     #ifndef HAVE_STRERROR
1472 nigel 49 /*************************************************
1473 ph10 558 * Provide strerror() for non-ANSI libraries *
1474     *************************************************/
1475    
1476     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1477     in their libraries, but can provide the same facility by this simple
1478     alternative function. */
1479    
1480     extern int sys_nerr;
1481     extern char *sys_errlist[];
1482    
1483     char *
1484     strerror(int n)
1485     {
1486     if (n < 0 || n >= sys_nerr) return "unknown error number";
1487     return sys_errlist[n];
1488     }
1489     #endif /* HAVE_STRERROR */
1490    
1491    
1492 ph10 1030
1493 ph10 667 /*************************************************
1494 ph10 1030 * Print newline configuration *
1495     *************************************************/
1496    
1497 ph10 1122 /*
1498     Arguments:
1499 ph10 1033 rc the return code from PCRE_CONFIG_NEWLINE
1500 ph10 1122 isc TRUE if called from "-C newline"
1501 ph10 1033 Returns: nothing
1502 ph10 1030 */
1503    
1504     static void
1505 ph10 1033 print_newline_config(int rc, BOOL isc)
1506 ph10 1030 {
1507     const char *s = NULL;
1508 ph10 1033 if (!isc) printf(" Newline sequence is ");
1509 ph10 1030 switch(rc)
1510     {
1511     case CHAR_CR: s = "CR"; break;
1512     case CHAR_LF: s = "LF"; break;
1513     case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1514     case -1: s = "ANY"; break;
1515     case -2: s = "ANYCRLF"; break;
1516 ph10 1122
1517 ph10 1030 default:
1518 ph10 1122 printf("a non-standard value: 0x%04x\n", rc);
1519 ph10 1030 return;
1520 ph10 1122 }
1521 ph10 1030
1522     printf("%s\n", s);
1523     }
1524    
1525    
1526    
1527     /*************************************************
1528 ph10 667 * JIT memory callback *
1529     *************************************************/
1530 ph10 558
1531 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1532     {
1533 ph10 926 jit_was_used = TRUE;
1534 ph10 667 return (pcre_jit_stack *)arg;
1535     }
1536 ph10 558
1537 ph10 667
1538 chpe 1055 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1539 ph10 558 /*************************************************
1540 ph10 836 * Convert UTF-8 string to value *
1541     *************************************************/
1542    
1543     /* This function takes one or more bytes that represents a UTF-8 character,
1544     and returns the value of the character.
1545    
1546     Argument:
1547     utf8bytes a pointer to the byte vector
1548     vptr a pointer to an int to receive the value
1549    
1550     Returns: > 0 => the number of bytes consumed
1551     -6 to 0 => malformed UTF-8 character at offset = (-return)
1552     */
1553    
1554     static int
1555 chpe 1086 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1556 ph10 836 {
1557 chpe 1086 pcre_uint32 c = *utf8bytes++;
1558     pcre_uint32 d = c;
1559 ph10 836 int i, j, s;
1560    
1561     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1562     {
1563     if ((d & 0x80) == 0) break;
1564     d <<= 1;
1565     }
1566    
1567     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1568     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1569    
1570     /* i now has a value in the range 1-5 */
1571    
1572     s = 6*i;
1573     d = (c & utf8_table3[i]) << s;
1574    
1575     for (j = 0; j < i; j++)
1576     {
1577     c = *utf8bytes++;
1578     if ((c & 0xc0) != 0x80) return -(j+1);
1579     s -= 6;
1580     d |= (c & 0x3f) << s;
1581     }
1582    
1583     /* Check that encoding was the correct unique one */
1584    
1585     for (j = 0; j < utf8_table1_size; j++)
1586 ph10 1122 if (d <= (pcre_uint32)utf8_table1[j]) break;
1587 ph10 836 if (j != i) return -(i+1);
1588    
1589     /* Valid value */
1590    
1591     *vptr = d;
1592     return i+1;
1593     }
1594     #endif /* NOUTF || SUPPORT_PCRE16 */
1595    
1596    
1597    
1598 ph10 1140 #if defined SUPPORT_PCRE8 && !defined NOUTF
1599 ph10 836 /*************************************************
1600     * Convert character value to UTF-8 *
1601     *************************************************/
1602    
1603     /* This function takes an integer value in the range 0 - 0x7fffffff
1604     and encodes it as a UTF-8 character in 0 to 6 bytes.
1605    
1606     Arguments:
1607     cvalue the character value
1608     utf8bytes pointer to buffer for result - at least 6 bytes long
1609    
1610     Returns: number of characters placed in the buffer
1611     */
1612    
1613     static int
1614 chpe 1086 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1615 ph10 836 {
1616     register int i, j;
1617 chpe 1086 if (cvalue > 0x7fffffffu)
1618     return -1;
1619 ph10 836 for (i = 0; i < utf8_table1_size; i++)
1620 ph10 1122 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1621 ph10 836 utf8bytes += i;
1622     for (j = i; j > 0; j--)
1623     {
1624     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1625     cvalue >>= 6;
1626     }
1627     *utf8bytes = utf8_table2[i] | cvalue;
1628     return i + 1;
1629     }
1630 ph10 842 #endif
1631 ph10 836
1632    
1633     #ifdef SUPPORT_PCRE16
1634     /*************************************************
1635     * Convert a string to 16-bit *
1636     *************************************************/
1637    
1638     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1639     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1640     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1641     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1642     result is always left in buffer16.
1643    
1644     Note that this function does not object to surrogate values. This is
1645     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1646     for the purpose of testing that they are correctly faulted.
1647    
1648 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1649 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1650    
1651     Arguments:
1652     data TRUE if converting a data line; FALSE for a regex
1653     p points to a byte string
1654     utf true if UTF-8 (to be converted to UTF-16)
1655     len number of bytes in the string (excluding trailing zero)
1656    
1657     Returns: number of 16-bit data items used (excluding trailing zero)
1658     OR -1 if a UTF-8 string is malformed
1659     OR -2 if a value > 0x10ffff is encountered
1660 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1661 ph10 836 */
1662    
1663     static int
1664     to16(int data, pcre_uint8 *p, int utf, int len)
1665     {
1666     pcre_uint16 *pp;
1667    
1668     if (buffer16_size < 2*len + 2)
1669     {
1670     if (buffer16 != NULL) free(buffer16);
1671     buffer16_size = 2*len + 2;
1672     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1673     if (buffer16 == NULL)
1674     {
1675     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1676     exit(1);
1677     }
1678     }
1679    
1680     pp = buffer16;
1681    
1682     if (!utf && !data)
1683     {
1684     while (len-- > 0) *pp++ = *p++;
1685     }
1686    
1687     else
1688     {
1689 chpe 1086 pcre_uint32 c = 0;
1690 ph10 836 while (len > 0)
1691     {
1692     int chlen = utf82ord(p, &c);
1693     if (chlen <= 0) return -1;
1694     if (c > 0x10ffff) return -2;
1695     p += chlen;
1696     len -= chlen;
1697     if (c < 0x10000) *pp++ = c; else
1698     {
1699     if (!utf) return -3;
1700     c -= 0x10000;
1701     *pp++ = 0xD800 | (c >> 10);
1702     *pp++ = 0xDC00 | (c & 0x3ff);
1703     }
1704     }
1705     }
1706    
1707     *pp = 0;
1708     return pp - buffer16;
1709     }
1710     #endif
1711    
1712 chpe 1055 #ifdef SUPPORT_PCRE32
1713     /*************************************************
1714     * Convert a string to 32-bit *
1715     *************************************************/
1716 ph10 836
1717 chpe 1055 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1718     8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1719     times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1720     in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1721     result is always left in buffer32.
1722    
1723     Note that this function does not object to surrogate values. This is
1724     deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1725     for the purpose of testing that they are correctly faulted.
1726    
1727     Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1728     in UTF-8 so that values greater than 255 can be handled.
1729    
1730     Arguments:
1731     data TRUE if converting a data line; FALSE for a regex
1732     p points to a byte string
1733     utf true if UTF-8 (to be converted to UTF-32)
1734     len number of bytes in the string (excluding trailing zero)
1735    
1736     Returns: number of 32-bit data items used (excluding trailing zero)
1737     OR -1 if a UTF-8 string is malformed
1738     OR -2 if a value > 0x10ffff is encountered
1739     OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1740     */
1741    
1742     static int
1743     to32(int data, pcre_uint8 *p, int utf, int len)
1744     {
1745     pcre_uint32 *pp;
1746    
1747     if (buffer32_size < 4*len + 4)
1748     {
1749     if (buffer32 != NULL) free(buffer32);
1750     buffer32_size = 4*len + 4;
1751     buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1752     if (buffer32 == NULL)
1753     {
1754     fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1755     exit(1);
1756     }
1757     }
1758    
1759     pp = buffer32;
1760    
1761     if (!utf && !data)
1762     {
1763     while (len-- > 0) *pp++ = *p++;
1764     }
1765    
1766     else
1767     {
1768 chpe 1086 pcre_uint32 c = 0;
1769 chpe 1055 while (len > 0)
1770     {
1771     int chlen = utf82ord(p, &c);
1772     if (chlen <= 0) return -1;
1773     if (utf)
1774     {
1775     if (c > 0x10ffff) return -2;
1776     if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1777     }
1778    
1779     p += chlen;
1780     len -= chlen;
1781     *pp++ = c;
1782     }
1783     }
1784    
1785     *pp = 0;
1786     return pp - buffer32;
1787     }
1788 chpe 1117
1789     /* Check that a 32-bit character string is valid UTF-32.
1790    
1791     Arguments:
1792     string points to the string
1793     length length of string, or -1 if the string is zero-terminated
1794    
1795     Returns: TRUE if the string is a valid UTF-32 string
1796     FALSE otherwise
1797     */
1798    
1799 ph10 1261 #ifdef NEVER /* Not used */
1800 chpe 1117 #ifdef SUPPORT_UTF
1801     static BOOL
1802     valid_utf32(pcre_uint32 *string, int length)
1803     {
1804     register pcre_uint32 *p;
1805     register pcre_uint32 c;
1806    
1807     for (p = string; length-- > 0; p++)
1808     {
1809     c = *p;
1810 ph10 1261 if (c > 0x10ffffu) return FALSE; /* Too big */
1811     if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1812 chpe 1117 }
1813    
1814     return TRUE;
1815     }
1816     #endif /* SUPPORT_UTF */
1817 ph10 1202 #endif /* NEVER */
1818 ph10 1261 #endif /* SUPPORT_PCRE32 */
1819 ph10 1202
1820    
1821 ph10 836 /*************************************************
1822 nigel 91 * Read or extend an input line *
1823     *************************************************/
1824    
1825     /* Input lines are read into buffer, but both patterns and data lines can be
1826     continued over multiple input lines. In addition, if the buffer fills up, we
1827     want to automatically expand it so as to be able to handle extremely large
1828     lines that are needed for certain stress tests. When the input buffer is
1829     expanded, the other two buffers must also be expanded likewise, and the
1830     contents of pbuffer, which are a copy of the input for callouts, must be
1831     preserved (for when expansion happens for a data line). This is not the most
1832     optimal way of handling this, but hey, this is just a test program!
1833    
1834     Arguments:
1835     f the file to read
1836     start where in buffer to start (this *must* be within buffer)
1837 ph10 287 prompt for stdin or readline()
1838 nigel 91
1839     Returns: pointer to the start of new data
1840     could be a copy of start, or could be moved
1841     NULL if no data read and EOF reached
1842     */
1843    
1844 ph10 836 static pcre_uint8 *
1845     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1846 nigel 91 {
1847 ph10 836 pcre_uint8 *here = start;
1848 nigel 91
1849     for (;;)
1850     {
1851 ph10 904 size_t rlen = (size_t)(buffer_size - (here - buffer));
1852 nigel 93
1853 nigel 91 if (rlen > 1000)
1854     {
1855     int dlen;
1856 ph10 289
1857 ph10 936 /* If libreadline or libedit support is required, use readline() to read a
1858     line if the input is a terminal. Note that readline() removes the trailing
1859     newline, so we must put it back again, to be compatible with fgets(). */
1860 ph10 289
1861 ph10 936 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1862 ph10 287 if (isatty(fileno(f)))
1863     {
1864 ph10 289 size_t len;
1865 ph10 287 char *s = readline(prompt);
1866     if (s == NULL) return (here == start)? NULL : start;
1867     len = strlen(s);
1868 ph10 289 if (len > 0) add_history(s);
1869 ph10 287 if (len > rlen - 1) len = rlen - 1;
1870     memcpy(here, s, len);
1871     here[len] = '\n';
1872 ph10 289 here[len+1] = 0;
1873     free(s);
1874 ph10 287 }
1875 ph10 289 else
1876     #endif
1877    
1878 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1879 ph10 289
1880 ph10 287 {
1881 ph10 516 if (f == stdin) printf("%s", prompt);
1882 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1883     return (here == start)? NULL : start;
1884 ph10 289 }
1885    
1886 nigel 91 dlen = (int)strlen((char *)here);
1887     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1888     here += dlen;
1889     }
1890    
1891     else
1892     {
1893     int new_buffer_size = 2*buffer_size;
1894 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1895     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1896 nigel 91
1897 chpe 1090 if (new_buffer == NULL || new_pbuffer == NULL)
1898 nigel 91 {
1899     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1900     exit(1);
1901     }
1902    
1903     memcpy(new_buffer, buffer, buffer_size);
1904     memcpy(new_pbuffer, pbuffer, buffer_size);
1905    
1906     buffer_size = new_buffer_size;
1907    
1908     start = new_buffer + (start - buffer);
1909     here = new_buffer + (here - buffer);
1910    
1911     free(buffer);
1912     free(pbuffer);
1913    
1914     buffer = new_buffer;
1915     pbuffer = new_pbuffer;
1916     }
1917     }
1918    
1919     return NULL; /* Control never gets here */
1920     }
1921    
1922    
1923    
1924     /*************************************************
1925 nigel 63 * Read number from string *
1926     *************************************************/
1927    
1928     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1929     around with conditional compilation, just do the job by hand. It is only used
1930 nigel 93 for unpicking arguments, so just keep it simple.
1931 nigel 63
1932     Arguments:
1933     str string to be converted
1934     endptr where to put the end pointer
1935    
1936     Returns: the unsigned long
1937     */
1938    
1939     static int
1940 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1941 nigel 63 {
1942     int result = 0;
1943     while(*str != 0 && isspace(*str)) str++;
1944     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1945     *endptr = str;
1946     return(result);
1947     }
1948    
1949    
1950    
1951 nigel 49 /*************************************************
1952 ph10 836 * Print one character *
1953 nigel 49 *************************************************/
1954    
1955 ph10 836 /* Print a single character either literally, or as a hex escape. */
1956 nigel 49
1957 chpe 1079 static int pchar(pcre_uint32 c, FILE *f)
1958 nigel 49 {
1959 chpe 1126 int n = 0;
1960 ph10 836 if (PRINTOK(c))
1961     {
1962     if (f != NULL) fprintf(f, "%c", c);
1963     return 1;
1964     }
1965 nigel 49
1966 ph10 836 if (c < 0x100)
1967 nigel 49 {
1968 ph10 836 if (use_utf)
1969     {
1970     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1971     return 6;
1972     }
1973     else
1974     {
1975     if (f != NULL) fprintf(f, "\\x%02x", c);
1976     return 4;
1977     }
1978 nigel 49 }
1979    
1980 chpe 1085 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1981     return n >= 0 ? n : 0;
1982 ph10 836 }
1983 nigel 49
1984    
1985    
1986 ph10 836 #ifdef SUPPORT_PCRE8
1987     /*************************************************
1988     * Print 8-bit character string *
1989     *************************************************/
1990 nigel 49
1991 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1992     If handed a NULL file, just counts chars without printing. */
1993 nigel 49
1994 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
1995     {
1996 chpe 1086 pcre_uint32 c = 0;
1997 ph10 836 int yield = 0;
1998 nigel 49
1999 ph10 836 if (length < 0)
2000     length = strlen((char *)p);
2001 nigel 49
2002 ph10 836 while (length-- > 0)
2003     {
2004     #if !defined NOUTF
2005     if (use_utf)
2006     {
2007     int rc = utf82ord(p, &c);
2008     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2009     {
2010     length -= rc - 1;
2011     p += rc;
2012     yield += pchar(c, f);
2013     continue;
2014     }
2015     }
2016     #endif
2017     c = *p++;
2018     yield += pchar(c, f);
2019     }
2020    
2021     return yield;
2022 nigel 49 }
2023 nigel 79 #endif
2024 nigel 49
2025    
2026 nigel 79
2027 ph10 836 #ifdef SUPPORT_PCRE16
2028 nigel 63 /*************************************************
2029 ph10 836 * Find length of 0-terminated 16-bit string *
2030 nigel 85 *************************************************/
2031    
2032 ph10 836 static int strlen16(PCRE_SPTR16 p)
2033 nigel 85 {
2034 ph10 836 int len = 0;
2035     while (*p++ != 0) len++;
2036     return len;
2037 nigel 85 }
2038 ph10 836 #endif /* SUPPORT_PCRE16 */
2039 nigel 85
2040    
2041 chpe 1055
2042     #ifdef SUPPORT_PCRE32
2043     /*************************************************
2044     * Find length of 0-terminated 32-bit string *
2045     *************************************************/
2046    
2047     static int strlen32(PCRE_SPTR32 p)
2048     {
2049     int len = 0;
2050     while (*p++ != 0) len++;
2051     return len;
2052     }
2053     #endif /* SUPPORT_PCRE32 */
2054    
2055    
2056    
2057 ph10 836 #ifdef SUPPORT_PCRE16
2058 nigel 85 /*************************************************
2059 ph10 836 * Print 16-bit character string *
2060 nigel 63 *************************************************/
2061 nigel 49
2062 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2063     If handed a NULL file, just counts chars without printing. */
2064 nigel 49
2065 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2066 nigel 3 {
2067 nigel 63 int yield = 0;
2068 nigel 3
2069 ph10 836 if (length < 0)
2070     length = strlen16(p);
2071    
2072 nigel 63 while (length-- > 0)
2073 nigel 3 {
2074 chpe 1079 pcre_uint32 c = *p++ & 0xffff;
2075 ph10 836 #if !defined NOUTF
2076     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2077 nigel 63 {
2078 ph10 836 int d = *p & 0xffff;
2079 chpe 1263 if (d >= 0xDC00 && d <= 0xDFFF)
2080 nigel 63 {
2081 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2082     length--;
2083     p++;
2084 nigel 63 }
2085     }
2086 nigel 79 #endif
2087 ph10 836 yield += pchar(c, f);
2088     }
2089 nigel 3
2090 ph10 836 return yield;
2091     }
2092     #endif /* SUPPORT_PCRE16 */
2093 nigel 63
2094 ph10 836
2095    
2096 chpe 1055 #ifdef SUPPORT_PCRE32
2097     /*************************************************
2098     * Print 32-bit character string *
2099     *************************************************/
2100    
2101     /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2102     If handed a NULL file, just counts chars without printing. */
2103    
2104 chpe 1117 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2105 chpe 1055 {
2106     int yield = 0;
2107    
2108 ph10 1202 (void)(utf); /* Avoid compiler warning */
2109    
2110 chpe 1055 if (length < 0)
2111     length = strlen32(p);
2112    
2113     while (length-- > 0)
2114     {
2115 ph10 1122 pcre_uint32 c = *p++;
2116 chpe 1055 yield += pchar(c, f);
2117     }
2118    
2119     return yield;
2120     }
2121     #endif /* SUPPORT_PCRE32 */
2122    
2123    
2124    
2125 ph10 836 #ifdef SUPPORT_PCRE8
2126     /*************************************************
2127     * Read a capture name (8-bit) and check it *
2128     *************************************************/
2129    
2130     static pcre_uint8 *
2131     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132     {
2133     pcre_uint8 *npp = *pp;
2134     while (isalnum(*p)) *npp++ = *p++;
2135     *npp++ = 0;
2136     *npp = 0;
2137     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138     {
2139     fprintf(outfile, "no parentheses with name \"");
2140     PCHARSV(*pp, 0, -1, outfile);
2141     fprintf(outfile, "\"\n");
2142 nigel 63 }
2143 nigel 3
2144 ph10 836 *pp = npp;
2145     return p;
2146 nigel 63 }
2147 ph10 836 #endif /* SUPPORT_PCRE8 */
2148 nigel 23
2149 nigel 3
2150 nigel 23
2151 ph10 836 #ifdef SUPPORT_PCRE16
2152 nigel 63 /*************************************************
2153 ph10 836 * Read a capture name (16-bit) and check it *
2154     *************************************************/
2155    
2156     /* Note that the text being read is 8-bit. */
2157    
2158     static pcre_uint8 *
2159     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160     {
2161     pcre_uint16 *npp = *pp;
2162     while (isalnum(*p)) *npp++ = *p++;
2163     *npp++ = 0;
2164     *npp = 0;
2165 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166 ph10 836 {
2167     fprintf(outfile, "no parentheses with name \"");
2168     PCHARSV(*pp, 0, -1, outfile);
2169     fprintf(outfile, "\"\n");
2170     }
2171     *pp = npp;
2172     return p;
2173     }
2174     #endif /* SUPPORT_PCRE16 */
2175    
2176    
2177    
2178 chpe 1055 #ifdef SUPPORT_PCRE32
2179 ph10 836 /*************************************************
2180 chpe 1055 * Read a capture name (32-bit) and check it *
2181     *************************************************/
2182    
2183     /* Note that the text being read is 8-bit. */
2184    
2185     static pcre_uint8 *
2186     read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2187     {
2188     pcre_uint32 *npp = *pp;
2189     while (isalnum(*p)) *npp++ = *p++;
2190     *npp++ = 0;
2191     *npp = 0;
2192     if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2193     {
2194     fprintf(outfile, "no parentheses with name \"");
2195     PCHARSV(*pp, 0, -1, outfile);
2196     fprintf(outfile, "\"\n");
2197     }
2198     *pp = npp;
2199     return p;
2200     }
2201     #endif /* SUPPORT_PCRE32 */
2202    
2203    
2204    
2205     /*************************************************
2206 nigel 63 * Callout function *
2207     *************************************************/
2208 nigel 3
2209 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2210     the match. Yield zero unless more callouts than the fail count, or the callout
2211     data is not zero. */
2212 nigel 3
2213 nigel 63 static int callout(pcre_callout_block *cb)
2214     {
2215     FILE *f = (first_callout | callout_extra)? outfile : NULL;
2216 nigel 75 int i, pre_start, post_start, subject_length;
2217 nigel 3
2218 nigel 63 if (callout_extra)
2219     {
2220     fprintf(f, "Callout %d: last capture = %d\n",
2221     cb->callout_number, cb->capture_last);
2222 nigel 3
2223 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
2224     {
2225     if (cb->offset_vector[i] < 0)
2226     fprintf(f, "%2d: <unset>\n", i/2);
2227     else
2228     {
2229     fprintf(f, "%2d: ", i/2);
2230 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
2231 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232     fprintf(f, "\n");
2233     }
2234     }
2235     }
2236 nigel 3
2237 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
2238     datails. On subsequent calls in the same match, we use pchars just to find the
2239     printed lengths of the substrings. */
2240 nigel 3
2241 nigel 63 if (f != NULL) fprintf(f, "--->");
2242 nigel 3
2243 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244     PCHARS(post_start, cb->subject, cb->start_match,
2245 nigel 63 cb->current_position - cb->start_match, f);
2246 nigel 3
2247 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2248 nigel 75
2249 ph10 836 PCHARSV(cb->subject, cb->current_position,
2250 nigel 63 cb->subject_length - cb->current_position, f);
2251 nigel 3
2252 nigel 63 if (f != NULL) fprintf(f, "\n");
2253 nigel 9
2254 nigel 63 /* Always print appropriate indicators, with callout number if not already
2255 nigel 75 shown. For automatic callouts, show the pattern offset. */
2256 nigel 3
2257 nigel 75 if (cb->callout_number == 255)
2258     {
2259     fprintf(outfile, "%+3d ", cb->pattern_position);
2260     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2261     }
2262     else
2263     {
2264     if (callout_extra) fprintf(outfile, " ");
2265     else fprintf(outfile, "%3d ", cb->callout_number);
2266     }
2267 nigel 3
2268 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2269     fprintf(outfile, "^");
2270 nigel 3
2271 nigel 63 if (post_start > 0)
2272     {
2273     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2274     fprintf(outfile, "^");
2275 nigel 3 }
2276    
2277 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2278     fprintf(outfile, " ");
2279    
2280     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2281     pbuffer + cb->pattern_position);
2282    
2283 nigel 63 fprintf(outfile, "\n");
2284     first_callout = 0;
2285 nigel 3
2286 ph10 654 if (cb->mark != last_callout_mark)
2287 ph10 645 {
2288 ph10 836 if (cb->mark == NULL)
2289     fprintf(outfile, "Latest Mark: <unset>\n");
2290     else
2291     {
2292     fprintf(outfile, "Latest Mark: ");
2293     PCHARSV(cb->mark, 0, -1, outfile);
2294     putc('\n', outfile);
2295     }
2296 ph10 654 last_callout_mark = cb->mark;
2297     }
2298 ph10 645
2299 nigel 71 if (cb->callout_data != NULL)
2300 nigel 49 {
2301 nigel 71 int callout_data = *((int *)(cb->callout_data));
2302     if (callout_data != 0)
2303     {
2304     fprintf(outfile, "Callout data = %d\n", callout_data);
2305     return callout_data;
2306     }
2307 nigel 63 }
2308 nigel 49
2309 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
2310     (++callout_count >= callout_fail_count)? 1 : 0;
2311 nigel 3 }
2312    
2313    
2314 nigel 63 /*************************************************
2315 nigel 73 * Local malloc functions *
2316 nigel 63 *************************************************/
2317 nigel 3
2318 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
2319 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
2320     show_malloc variable is set only during matching. */
2321 nigel 3
2322     static void *new_malloc(size_t size)
2323     {
2324 nigel 73 void *block = malloc(size);
2325 nigel 43 gotten_store = size;
2326 ph10 836 if (first_gotten_store == 0) first_gotten_store = size;
2327 nigel 73 if (show_malloc)
2328 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2329 nigel 73 return block;
2330 nigel 3 }
2331    
2332 nigel 73 static void new_free(void *block)
2333     {
2334     if (show_malloc)
2335     fprintf(outfile, "free %p\n", block);
2336     free(block);
2337     }
2338 nigel 3
2339 nigel 73 /* For recursion malloc/free, to test stacking calls */
2340    
2341     static void *stack_malloc(size_t size)
2342     {
2343     void *block = malloc(size);
2344     if (show_malloc)
2345 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346 nigel 73 return block;
2347     }
2348    
2349     static void stack_free(void *block)
2350     {
2351     if (show_malloc)
2352     fprintf(outfile, "stack_free %p\n", block);
2353     free(block);
2354     }
2355    
2356    
2357 nigel 63 /*************************************************
2358     * Call pcre_fullinfo() *
2359     *************************************************/
2360 nigel 43
2361 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
2362 chpe 1055 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2363 ph10 836 value, but the code is defensive.
2364 nigel 43
2365 ph10 836 Arguments:
2366     re compiled regex
2367     study study data
2368     option PCRE_INFO_xxx option
2369     ptr where to put the data
2370    
2371     Returns: 0 when OK, < 0 on error
2372     */
2373    
2374     static int
2375     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376 nigel 43 {
2377     int rc;
2378 ph10 836
2379 chpe 1055 if (pcre_mode == PCRE32_MODE)
2380     #ifdef SUPPORT_PCRE32
2381     rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2382     #else
2383     rc = PCRE_ERROR_BADMODE;
2384     #endif
2385     else if (pcre_mode == PCRE16_MODE)
2386 ph10 836 #ifdef SUPPORT_PCRE16
2387 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388 ph10 836 #else
2389     rc = PCRE_ERROR_BADMODE;
2390     #endif
2391     else
2392     #ifdef SUPPORT_PCRE8
2393     rc = pcre_fullinfo(re, study, option, ptr);
2394     #else
2395     rc = PCRE_ERROR_BADMODE;
2396     #endif
2397    
2398 ph10 1313 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2399 ph10 836 {
2400     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2401 chpe 1055 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2402 ph10 836 if (rc == PCRE_ERROR_BADMODE)
2403 chpe 1055 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2404     "%d-bit mode\n", 8 * CHAR_SIZE,
2405     8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2406 ph10 836 }
2407    
2408     return rc;
2409 nigel 43 }
2410    
2411    
2412    
2413 nigel 63 /*************************************************
2414 ph10 836 * Swap byte functions *
2415 nigel 75 *************************************************/
2416    
2417 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418     value, respectively.
2419    
2420     Arguments:
2421     value any number
2422    
2423     Returns: the byte swapped value
2424     */
2425    
2426     static pcre_uint32
2427     swap_uint32(pcre_uint32 value)
2428 nigel 75 {
2429     return ((value & 0x000000ff) << 24) |
2430     ((value & 0x0000ff00) << 8) |
2431     ((value & 0x00ff0000) >> 8) |
2432 ph10 836 (value >> 24);
2433 nigel 75 }
2434    
2435 ph10 836 static pcre_uint16
2436     swap_uint16(pcre_uint16 value)
2437     {
2438     return (value >> 8) | (value << 8);
2439     }
2440 nigel 75
2441    
2442    
2443     /*************************************************
2444 ph10 836 * Flip bytes in a compiled pattern *
2445     *************************************************/
2446    
2447     /* This function is called if the 'F' option was present on a pattern that is
2448     to be written to a file. We flip the bytes of all the integer fields in the
2449     regex data block and the study block. In 16-bit mode this also flips relevant
2450     bytes in the pattern itself. This is to make it possible to test PCRE's
2451     ability to reload byte-flipped patterns, e.g. those compiled on a different
2452     architecture. */
2453    
2454 chpe 1055 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2455 ph10 836 static void
2456 chpe 1055 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2457 ph10 836 {
2458 chpe 1055 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2459 ph10 836 #ifdef SUPPORT_PCRE16
2460     int op;
2461     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462     int length = re->name_count * re->name_entry_size;
2463     #ifdef SUPPORT_UTF
2464     BOOL utf = (re->options & PCRE_UTF16) != 0;
2465     BOOL utf16_char = FALSE;
2466     #endif /* SUPPORT_UTF */
2467     #endif /* SUPPORT_PCRE16 */
2468    
2469     /* Always flip the bytes in the main data block and study blocks. */
2470    
2471     re->magic_number = REVERSED_MAGIC_NUMBER;
2472     re->size = swap_uint32(re->size);
2473     re->options = swap_uint32(re->options);
2474 ph10 1313 re->flags = swap_uint32(re->flags);
2475     re->limit_match = swap_uint32(re->limit_match);
2476     re->limit_recursion = swap_uint32(re->limit_recursion);
2477     re->first_char = swap_uint16(re->first_char);
2478     re->req_char = swap_uint16(re->req_char);
2479     re->max_lookbehind = swap_uint16(re->max_lookbehind);
2480 ph10 836 re->top_bracket = swap_uint16(re->top_bracket);
2481     re->top_backref = swap_uint16(re->top_backref);
2482     re->name_table_offset = swap_uint16(re->name_table_offset);
2483     re->name_entry_size = swap_uint16(re->name_entry_size);
2484     re->name_count = swap_uint16(re->name_count);
2485 ph10 1313 re->ref_count = swap_uint16(re->ref_count);
2486 ph10 836
2487     if (extra != NULL)
2488     {
2489     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2490     rsd->size = swap_uint32(rsd->size);
2491     rsd->flags = swap_uint32(rsd->flags);
2492     rsd->minlength = swap_uint32(rsd->minlength);
2493     }
2494    
2495     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2496     in the name table, if present, and then in the pattern itself. */
2497    
2498     #ifdef SUPPORT_PCRE16
2499 chpe 1055 if (pcre_mode != PCRE16_MODE) return;
2500 ph10 836
2501     while(TRUE)
2502     {
2503     /* Swap previous characters. */
2504     while (length-- > 0)
2505     {
2506     *ptr = swap_uint16(*ptr);
2507     ptr++;
2508     }
2509     #ifdef SUPPORT_UTF
2510     if (utf16_char)
2511     {
2512     if ((ptr[-1] & 0xfc00) == 0xd800)
2513     {
2514     /* We know that there is only one extra character in UTF-16. */
2515     *ptr = swap_uint16(*ptr);
2516     ptr++;
2517     }
2518     }
2519     utf16_char = FALSE;
2520     #endif /* SUPPORT_UTF */
2521    
2522     /* Get next opcode. */
2523    
2524     length = 0;
2525     op = *ptr;
2526     *ptr++ = swap_uint16(op);
2527    
2528     switch (op)
2529     {
2530     case OP_END:
2531     return;
2532    
2533     #ifdef SUPPORT_UTF
2534     case OP_CHAR:
2535     case OP_CHARI:
2536     case OP_NOT:
2537     case OP_NOTI:
2538     case OP_STAR:
2539     case OP_MINSTAR:
2540     case OP_PLUS:
2541     case OP_MINPLUS:
2542     case OP_QUERY:
2543     case OP_MINQUERY:
2544     case OP_UPTO:
2545     case OP_MINUPTO:
2546     case OP_EXACT:
2547     case OP_POSSTAR:
2548     case OP_POSPLUS:
2549     case OP_POSQUERY:
2550     case OP_POSUPTO:
2551     case OP_STARI:
2552     case OP_MINSTARI:
2553     case OP_PLUSI:
2554     case OP_MINPLUSI:
2555     case OP_QUERYI:
2556     case OP_MINQUERYI:
2557     case OP_UPTOI:
2558     case OP_MINUPTOI:
2559     case OP_EXACTI:
2560     case OP_POSSTARI:
2561     case OP_POSPLUSI:
2562     case OP_POSQUERYI:
2563     case OP_POSUPTOI:
2564     case OP_NOTSTAR:
2565     case OP_NOTMINSTAR:
2566     case OP_NOTPLUS:
2567     case OP_NOTMINPLUS:
2568     case OP_NOTQUERY:
2569     case OP_NOTMINQUERY:
2570     case OP_NOTUPTO:
2571     case OP_NOTMINUPTO:
2572     case OP_NOTEXACT:
2573     case OP_NOTPOSSTAR:
2574     case OP_NOTPOSPLUS:
2575     case OP_NOTPOSQUERY:
2576     case OP_NOTPOSUPTO:
2577     case OP_NOTSTARI:
2578     case OP_NOTMINSTARI:
2579     case OP_NOTPLUSI:
2580     case OP_NOTMINPLUSI:
2581     case OP_NOTQUERYI:
2582     case OP_NOTMINQUERYI:
2583     case OP_NOTUPTOI:
2584     case OP_NOTMINUPTOI:
2585     case OP_NOTEXACTI:
2586     case OP_NOTPOSSTARI:
2587     case OP_NOTPOSPLUSI:
2588     case OP_NOTPOSQUERYI:
2589     case OP_NOTPOSUPTOI:
2590     if (utf) utf16_char = TRUE;
2591     #endif
2592     /* Fall through. */
2593    
2594     default:
2595     length = OP_lengths16[op] - 1;
2596     break;
2597    
2598     case OP_CLASS:
2599     case OP_NCLASS:
2600     /* Skip the character bit map. */
2601     ptr += 32/sizeof(pcre_uint16);
2602     length = 0;
2603     break;
2604    
2605     case OP_XCLASS:
2606 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2607     if (LINK_SIZE > 1)
2608     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2609     - (1 + LINK_SIZE + 1));
2610     else
2611     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2612    
2613 ph10 836 /* Reverse the size of the XCLASS instance. */
2614 zherczeg 839 *ptr = swap_uint16(*ptr);
2615 ph10 836 ptr++;
2616     if (LINK_SIZE > 1)
2617     {
2618 zherczeg 839 *ptr = swap_uint16(*ptr);
2619 ph10 836 ptr++;
2620     }
2621    
2622     op = *ptr;
2623     *ptr = swap_uint16(op);
2624 zherczeg 839 ptr++;
2625 ph10 836 if ((op & XCL_MAP) != 0)
2626     {
2627     /* Skip the character bit map. */
2628     ptr += 32/sizeof(pcre_uint16);
2629     length -= 32/sizeof(pcre_uint16);
2630     }
2631     break;
2632     }
2633     }
2634     /* Control should never reach here in 16 bit mode. */
2635     #endif /* SUPPORT_PCRE16 */
2636     }
2637 chpe 1055 #endif /* SUPPORT_PCRE[8|16] */
2638 ph10 836
2639    
2640    
2641 chpe 1055 #if defined SUPPORT_PCRE32
2642     static void
2643     regexflip_32(pcre *ere, pcre_extra *extra)
2644     {
2645     real_pcre32 *re = (real_pcre32 *)ere;
2646     int op;
2647     pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2648     int length = re->name_count * re->name_entry_size;
2649    
2650     /* Always flip the bytes in the main data block and study blocks. */
2651    
2652     re->magic_number = REVERSED_MAGIC_NUMBER;
2653     re->size = swap_uint32(re->size);
2654     re->options = swap_uint32(re->options);
2655 ph10 1313 re->flags = swap_uint32(re->flags);
2656     re->limit_match = swap_uint32(re->limit_match);
2657     re->limit_recursion = swap_uint32(re->limit_recursion);
2658     re->first_char = swap_uint32(re->first_char);
2659     re->req_char = swap_uint32(re->req_char);
2660     re->max_lookbehind = swap_uint16(re->max_lookbehind);
2661 chpe 1055 re->top_bracket = swap_uint16(re->top_bracket);
2662     re->top_backref = swap_uint16(re->top_backref);
2663     re->name_table_offset = swap_uint16(re->name_table_offset);
2664     re->name_entry_size = swap_uint16(re->name_entry_size);
2665     re->name_count = swap_uint16(re->name_count);
2666 ph10 1313 re->ref_count = swap_uint16(re->ref_count);
2667 chpe 1055
2668     if (extra != NULL)
2669     {
2670     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2671     rsd->size = swap_uint32(rsd->size);
2672     rsd->flags = swap_uint32(rsd->flags);
2673     rsd->minlength = swap_uint32(rsd->minlength);
2674     }
2675    
2676 ph10 1140 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2677     the pattern itself. */
2678 chpe 1055
2679     while(TRUE)
2680     {
2681     /* Swap previous characters. */
2682     while (length-- > 0)
2683     {
2684     *ptr = swap_uint32(*ptr);
2685     ptr++;
2686     }
2687    
2688     /* Get next opcode. */
2689    
2690     length = 0;
2691     op = *ptr;
2692     *ptr++ = swap_uint32(op);
2693    
2694     switch (op)
2695     {
2696     case OP_END:
2697     return;
2698    
2699     default:
2700     length = OP_lengths32[op] - 1;
2701     break;
2702    
2703     case OP_CLASS:
2704     case OP_NCLASS:
2705     /* Skip the character bit map. */
2706     ptr += 32/sizeof(pcre_uint32);
2707     length = 0;
2708     break;
2709    
2710     case OP_XCLASS:
2711     /* LINK_SIZE can only be 1 in 32-bit mode. */
2712     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2713    
2714     /* Reverse the size of the XCLASS instance. */
2715     *ptr = swap_uint32(*ptr);
2716     ptr++;
2717    
2718     op = *ptr;
2719     *ptr = swap_uint32(op);
2720     ptr++;
2721     if ((op & XCL_MAP) != 0)
2722     {
2723     /* Skip the character bit map. */
2724     ptr += 32/sizeof(pcre_uint32);
2725     length -= 32/sizeof(pcre_uint32);
2726     }
2727     break;
2728     }
2729     }
2730     /* Control should never reach here in 32 bit mode. */
2731     }
2732    
2733     #endif /* SUPPORT_PCRE32 */
2734    
2735    
2736    
2737     static void
2738     regexflip(pcre *ere, pcre_extra *extra)
2739     {
2740     #if defined SUPPORT_PCRE32
2741     if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2742     regexflip_32(ere, extra);
2743     #endif
2744     #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2745     if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2746     regexflip8_or_16(ere, extra);
2747     #endif
2748     }
2749    
2750    
2751    
2752 ph10 836 /*************************************************
2753 nigel 87 * Check match or recursion limit *
2754     *************************************************/
2755    
2756     static int
2757 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2758 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
2759     int flag, unsigned long int *limit, int errnumber, const char *msg)
2760     {
2761     int count;
2762     int min = 0;
2763     int mid = 64;
2764     int max = -1;
2765    
2766     extra->flags |= flag;
2767    
2768     for (;;)
2769     {
2770     *limit = mid;
2771    
2772 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2773 nigel 87 use_offsets, use_size_offsets);
2774    
2775     if (count == errnumber)
2776     {
2777     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2778     min = mid;
2779     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2780     }
2781    
2782     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2783     count == PCRE_ERROR_PARTIAL)
2784     {
2785     if (mid == min + 1)
2786     {
2787     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2788     break;
2789     }
2790     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2791     max = mid;
2792     mid = (min + mid)/2;
2793     }
2794     else break; /* Some other error */
2795     }
2796    
2797     extra->flags &= ~flag;
2798     return count;
2799     }
2800    
2801    
2802    
2803     /*************************************************
2804 ph10 227 * Case-independent strncmp() function *
2805     *************************************************/
2806    
2807     /*
2808     Arguments:
2809     s first string
2810     t second string
2811     n number of characters to compare
2812    
2813     Returns: < 0, = 0, or > 0, according to the comparison
2814     */
2815    
2816     static int
2817 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2818 ph10 227 {
2819     while (n--)
2820     {
2821     int c = tolower(*s++) - tolower(*t++);
2822     if (c) return c;
2823     }
2824     return 0;
2825     }
2826    
2827    
2828    
2829     /*************************************************
2830 nigel 91 * Check newline indicator *
2831     *************************************************/
2832    
2833 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2834     a message and return 0 if there is no match.
2835 nigel 91
2836     Arguments:
2837     p points after the leading '<'
2838     f file for error message
2839    
2840     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2841     */
2842    
2843     static int
2844 ph10 836 check_newline(pcre_uint8 *p, FILE *f)
2845 nigel 91 {
2846 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2847     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2848     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2849     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2850     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2851     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2852     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2853 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
2854     return 0;
2855     }
2856    
2857    
2858    
2859     /*************************************************
2860 nigel 93 * Usage function *
2861     *************************************************/
2862    
2863     static void
2864     usage(void)
2865     {
2866 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2867     printf("Input and output default to stdin and stdout.\n");
2868 ph10 936 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2869 ph10 287 printf("If input is a terminal, readline() is used to read from it.\n");
2870     #else
2871     printf("This version of pcretest is not linked with readline().\n");
2872     #endif
2873     printf("\nOptions:\n");
2874 ph10 836 #ifdef SUPPORT_PCRE16
2875 ph10 862 printf(" -16 use the 16-bit library\n");
2876 ph10 836 #endif
2877 chpe 1055 #ifdef SUPPORT_PCRE32
2878     printf(" -32 use the 32-bit library\n");
2879     #endif
2880 ph10 862 printf(" -b show compiled code\n");
2881 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2882 ph10 836 printf(" -C arg show a specific compile-time option\n");
2883     printf(" and exit with its value. The arg can be:\n");
2884     printf(" linksize internal link size [2, 3, 4]\n");
2885     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2886     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2887 chpe 1055 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2888 ph10 836 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2889     printf(" ucp Unicode Properties supported [0, 1]\n");
2890     printf(" jit Just-in-time compiler supported [0, 1]\n");
2891 zherczeg 839 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2892 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2893     #if !defined NODFA
2894     printf(" -dfa force DFA matching for all subjects\n");
2895     #endif
2896     printf(" -help show usage information\n");
2897     printf(" -i show information about compiled patterns\n"
2898 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2899 nigel 93 " -m output memory used information\n"
2900     " -o <n> set size of offsets vector to <n>\n");
2901     #if !defined NOPOSIX
2902     printf(" -p use POSIX interface\n");
2903     #endif
2904     printf(" -q quiet: do not output PCRE version number at start\n");
2905     printf(" -S <n> set stack size to <n> megabytes\n");
2906 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2907     " -s+ force each pattern to be studied, using JIT if available\n"
2908 ph10 960 " -s++ ditto, verifying when JIT was actually used\n"
2909 ph10 923 " -s+n force each pattern to be studied, using JIT if available,\n"
2910 ph10 960 " where 1 <= n <= 7 selects JIT options\n"
2911     " -s++n ditto, verifying when JIT was actually used\n"
2912 nigel 93 " -t time compilation and execution\n");
2913     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2914     printf(" -tm time execution (matching) only\n");
2915     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2916     }
2917    
2918    
2919    
2920     /*************************************************
2921 nigel 63 * Main Program *
2922     *************************************************/
2923 nigel 43
2924 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2925     consist of a regular expression, in delimiters and optionally followed by
2926     options, followed by a set of test data, terminated by an empty line. */
2927    
2928     int main(int argc, char **argv)
2929     {
2930     FILE *infile = stdin;
2931 ph10 836 const char *version;
2932 nigel 3 int options = 0;
2933     int study_options = 0;
2934 ph10 386 int default_find_match_limit = FALSE;
2935 nigel 3 int op = 1;
2936     int timeit = 0;
2937 nigel 93 int timeitm = 0;
2938 nigel 3 int showinfo = 0;
2939 nigel 31 int showstore = 0;
2940 ph10 667 int force_study = -1;
2941     int force_study_options = 0;
2942 nigel 87 int quiet = 0;
2943 nigel 53 int size_offsets = 45;
2944     int size_offsets_max;
2945 nigel 77 int *offsets = NULL;
2946 nigel 3 int debug = 0;
2947 nigel 11 int done = 0;
2948 nigel 77 int all_use_dfa = 0;
2949 ph10 922 int verify_jit = 0;
2950 nigel 77 int yield = 0;
2951 nigel 91 int stack_size;
2952 chpe 1090 pcre_uint8 *dbuffer = NULL;
2953     size_t dbuffer_size = 1u << 14;
2954 nigel 3
2955 ph10 960 #if !defined NOPOSIX
2956     int posix = 0;
2957     #endif
2958     #if !defined NODFA
2959     int *dfa_workspace = NULL;
2960     #endif
2961    
2962 ph10 667 pcre_jit_stack *jit_stack = NULL;
2963    
2964 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
2965     substring names, each list itself being terminated by an empty name. Assume
2966     that 1024 is plenty long enough for the few names we'll be testing. It is
2967 chpe 1055 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2968 ph10 881 for the actual memory, to ensure alignment. */
2969 ph10 667
2970 chpe 1055 pcre_uint32 copynames[1024];
2971     pcre_uint32 getnames[1024];
2972 nigel 69
2973 chpe 1055 #ifdef SUPPORT_PCRE32
2974     pcre_uint32 *cn32ptr;
2975     pcre_uint32 *gn32ptr;
2976     #endif
2977    
2978 ph10 881 #ifdef SUPPORT_PCRE16
2979 chpe 1055 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2980     pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2981 ph10 836 pcre_uint16 *cn16ptr;
2982     pcre_uint16 *gn16ptr;
2983 ph10 881 #endif
2984 nigel 91
2985 ph10 881 #ifdef SUPPORT_PCRE8
2986 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2987     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2988     pcre_uint8 *cn8ptr;
2989     pcre_uint8 *gn8ptr;
2990 ph10 881 #endif
2991 nigel 91
2992 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
2993 ph10 1122 debugging. They grow automatically when very long lines are read. The 16-
2994 chpe 1055 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2995 nigel 69
2996 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
2997     pbuffer = (pcre_uint8 *)malloc(buffer_size);
2998 nigel 69
2999 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
3000 nigel 3
3001 nigel 93 outfile = stdout;
3002    
3003     /* The following _setmode() stuff is some Windows magic that tells its runtime
3004     library to translate CRLF into a single LF character. At least, that's what
3005     I've been told: never having used Windows I take this all on trust. Originally
3006     it set 0x8000, but then I was advised that _O_BINARY was better. */
3007    
3008 nigel 75 #if defined(_WIN32) || defined(WIN32)
3009 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
3010     #endif
3011 nigel 75
3012 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
3013     same answer. We just need to ensure that we call one that is available. */
3014    
3015 chpe 1055 #if defined SUPPORT_PCRE8
3016 ph10 836 version = pcre_version();
3017 chpe 1055 #elif defined SUPPORT_PCRE16
3018 ph10 836 version = pcre16_version();
3019 chpe 1055 #elif defined SUPPORT_PCRE32
3020     version = pcre32_version();
3021 ph10 836 #endif
3022    
3023 nigel 3 /* Scan options */
3024    
3025     while (argc > 1 && argv[op][0] == '-')
3026     {
3027 ph10 836 pcre_uint8 *endptr;
3028 ph10 960 char *arg = argv[op];
3029 nigel 53
3030 ph10 922 if (strcmp(arg, "-m") == 0) showstore = 1;
3031     else if (strcmp(arg, "-s") == 0) force_study = 0;
3032 ph10 960
3033 ph10 922 else if (strncmp(arg, "-s+", 3) == 0)
3034 ph10 667 {
3035 ph10 922 arg += 3;
3036     if (*arg == '+') { arg++; verify_jit = TRUE; }
3037 ph10 667 force_study = 1;
3038 ph10 923 if (*arg == 0)
3039 ph10 960 force_study_options = jit_study_bits[6];
3040 ph10 923 else if (*arg >= '1' && *arg <= '7')
3041 ph10 960 force_study_options = jit_study_bits[*arg - '1'];
3042 ph10 923 else goto BAD_ARG;
3043 ph10 691 }
3044 chpe 1097 else if (strcmp(arg, "-8") == 0)
3045     {
3046     #ifdef SUPPORT_PCRE8
3047     pcre_mode = PCRE8_MODE;
3048     #else
3049     printf("** This version of PCRE was built without 8-bit support\n");
3050     exit(1);
3051     #endif
3052     }
3053 ph10 922 else if (strcmp(arg, "-16") == 0)
3054 ph10 836 {
3055     #ifdef SUPPORT_PCRE16
3056 chpe 1055 pcre_mode = PCRE16_MODE;
3057 ph10 836 #else
3058     printf("** This version of PCRE was built without 16-bit support\n");
3059     exit(1);
3060     #endif
3061     }
3062 chpe 1200 else if (strcmp(arg, "-32") == 0)
3063 chpe 1055 {
3064     #ifdef SUPPORT_PCRE32
3065     pcre_mode = PCRE32_MODE;
3066     #else
3067     printf("** This version of PCRE was built without 32-bit support\n");
3068     exit(1);
3069     #endif
3070     }
3071 ph10 922 else if (strcmp(arg, "-q") == 0) quiet = 1;
3072     else if (strcmp(arg, "-b") == 0) debug = 1;
3073     else if (strcmp(arg, "-i") == 0) showinfo = 1;
3074     else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3075     else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3076 nigel 79 #if !defined NODFA
3077 ph10 922 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3078 nigel 79 #endif
3079 ph10 922 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3080 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3081 nigel 65 *endptr == 0))
3082 nigel 53 {
3083     op++;
3084     argc--;
3085     }
3086 ph10 922 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3087 nigel 93 {
3088 ph10 922 int both = arg[2] == 0;
3089 nigel 93 int temp;
3090 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3091 nigel 93 *endptr == 0))
3092     {
3093     timeitm = temp;
3094     op++;
3095     argc--;
3096     }
3097     else timeitm = LOOPREPEAT;
3098     if (both) timeit = timeitm;
3099     }
3100 ph10 922 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3101 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3102 nigel 91 *endptr == 0))
3103     {
3104 ph10 1254 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3105 nigel 91 printf("PCRE: -S not supported on this OS\n");
3106     exit(1);
3107     #else
3108     int rc;
3109     struct rlimit rlim;
3110     getrlimit(RLIMIT_STACK, &rlim);
3111     rlim.rlim_cur = stack_size * 1024 * 1024;
3112     rc = setrlimit(RLIMIT_STACK, &rlim);
3113     if (rc != 0)
3114     {
3115     printf("PCRE: setrlimit() failed with error %d\n", rc);
3116     exit(1);
3117     }
3118     op++;
3119     argc--;
3120     #endif
3121     }
3122 nigel 53 #if !defined NOPOSIX
3123 ph10 922 else if (strcmp(arg, "-p") == 0) posix = 1;
3124 nigel 53 #endif
3125 ph10 922 else if (strcmp(arg, "-C") == 0)
3126 nigel 63 {
3127     int rc;
3128 ph10 392 unsigned long int lrc;
3129 ph10 836
3130     if (argc > 2)
3131     {
3132     if (strcmp(argv[op + 1], "linksize") == 0)
3133     {
3134     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3135     printf("%d\n", rc);
3136     yield = rc;
3137 ph10 1320
3138 ph10 1254 #ifdef __VMS
3139     vms_setsymbol("LINKSIZE",0,yield );
3140     #endif
3141 ph10 836 }
3142 ph10 1033 else if (strcmp(argv[op + 1], "pcre8") == 0)
3143 ph10 836 {
3144     #ifdef SUPPORT_PCRE8
3145     printf("1\n");
3146     yield = 1;
3147     #else
3148     printf("0\n");
3149     yield = 0;
3150     #endif
3151 ph10 1254 #ifdef __VMS
3152     vms_setsymbol("PCRE8",0,yield );
3153     #endif
3154 ph10 836 }
3155 ph10 1033 else if (strcmp(argv[op + 1], "pcre16") == 0)
3156 ph10 836 {
3157     #ifdef SUPPORT_PCRE16
3158     printf("1\n");
3159     yield = 1;
3160     #else
3161     printf("0\n");
3162     yield = 0;
3163     #endif
3164 ph10 1254 #ifdef __VMS
3165     vms_setsymbol("PCRE16",0,yield );
3166     #endif
3167 ph10 836 }
3168 chpe 1055 else if (strcmp(argv[op + 1], "pcre32") == 0)
3169 ph10 836 {
3170 chpe 1055 #ifdef SUPPORT_PCRE32
3171     printf("1\n");
3172     yield = 1;
3173     #else
3174     printf("0\n");
3175     yield = 0;
3176     #endif
3177 ph10 1254 #ifdef __VMS
3178     vms_setsymbol("PCRE32",0,yield );
3179     #endif
3180 chpe 1055 }
3181 ph10 1254 else if (strcmp(argv[op + 1], "utf") == 0)
3182 chpe 1055 {
3183 ph10 836 #ifdef SUPPORT_PCRE8
3184 chpe 1055 if (pcre_mode == PCRE8_MODE)
3185     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3186     #endif
3187     #ifdef SUPPORT_PCRE16
3188     if (pcre_mode == PCRE16_MODE)
3189     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3190     #endif
3191     #ifdef SUPPORT_PCRE32
3192     if (pcre_mode == PCRE32_MODE)
3193     (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3194     #endif
3195 ph10 836 printf("%d\n", rc);
3196     yield = rc;
3197 ph10 1254 #ifdef __VMS
3198     vms_setsymbol("UTF",0,yield );
3199     #endif
3200 ph10 836 }
3201 ph10 1033 else if (strcmp(argv[op + 1], "ucp") == 0)
3202 ph10 836 {
3203     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3204     printf("%d\n", rc);
3205     yield = rc;
3206     }
3207 ph10 1033 else if (strcmp(argv[op + 1], "jit") == 0)
3208 ph10 836 {
3209     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3210     printf("%d\n", rc);
3211     yield = rc;
3212     }
3213 ph10 1033 else if (strcmp(argv[op + 1], "newline") == 0)
3214 ph10 842 {
3215 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3216 ph10 1122 print_newline_config(rc, TRUE);
3217 ph10 842 }
3218 ph10 1033 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3219     {
3220     #ifdef EBCDIC
3221     printf("1\n");
3222 ph10 1122 yield = 1;
3223 ph10 1033 #else
3224 ph10 1122 printf("0\n");
3225     #endif
3226 ph10 1033 }
3227     else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3228     {
3229     #ifdef EBCDIC
3230     printf("0x%02x\n", CHAR_LF);
3231     #else
3232 ph10 1122 printf("0\n");
3233     #endif
3234 ph10 1033 }
3235     else
3236 ph10 1122 {
3237 ph10 1033 printf("Unknown -C option: %s\n", argv[op + 1]);
3238 ph10 1122 }
3239 ph10 836 goto EXIT;
3240     }
3241 ph10 1122
3242 ph10 1033 /* No argument for -C: output all configuration information. */
3243 ph10 836
3244     printf("PCRE version %s\n", version);
3245 nigel 63 printf("Compiled with\n");
3246 ph10 1122
3247 ph10 1030 #ifdef EBCDIC
3248     printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3249 ph10 1122 #endif
3250 ph10 836
3251     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3252     are set, either both UTFs are supported or both are not supported. */
3253    
3254 chpe 1055 #ifdef SUPPORT_PCRE8
3255     printf(" 8-bit support\n");
3256 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3257 chpe 1055 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3258     #endif
3259     #ifdef SUPPORT_PCRE16
3260     printf(" 16-bit support\n");
3261 ph10 836 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3262 chpe 1055 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3263 ph10 836 #endif
3264 chpe 1055 #ifdef SUPPORT_PCRE32
3265     printf(" 32-bit support\n");
3266     (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3267     printf (" %sUTF-32 support\n", rc ? "" : "No ");
3268     #endif
3269 ph10 836
3270     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3271 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
3272 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3273 ph10 674 if (rc)
3274 ph10 890 {
3275     const char *arch;
3276 ph10 908 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3277 ph10 890 printf(" Just-in-time compiler support: %s\n", arch);
3278 ph10 903 }
3279 ph10 674 else
3280     printf(" No just-in-time compiler support\n");
3281 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3282 ph10 1122 print_newline_config(rc, FALSE);
3283 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3284 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3285     "all Unicode newlines");
3286 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3287 nigel 63 printf(" Internal link size = %d\n", rc);
3288 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3289 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
3290 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3291 ph10 376 printf(" Default match limit = %ld\n", lrc);
3292 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3293 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
3294 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3295 ph10 895 printf(" Match recursion uses %s", rc? "stack" : "heap");
3296     if (showstore)
3297 ph10 903 {
3298 ph10 901 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3299 ph10 903 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3300 ph10 895 }
3301 ph10 903 printf("\n");
3302 ph10 121 goto EXIT;
3303 nigel 63 }
3304 ph10 922 else if (strcmp(arg, "-help") == 0 ||
3305     strcmp(arg, "--help") == 0)
3306 nigel 93 {
3307     usage();
3308     goto EXIT;
3309     }
3310 nigel 3 else
3311     {
3312 ph10 960 BAD_ARG:
3313 ph10 922 printf("** Unknown or malformed option %s\n", arg);
3314 nigel 93 usage();
3315 nigel 77 yield = 1;
3316     goto EXIT;
3317 nigel 3 }
3318     op++;
3319     argc--;
3320     }
3321    
3322 nigel 53 /* Get the store for the offsets vector, and remember what it was */
3323    
3324     size_offsets_max = size_offsets;
3325 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3326 nigel 53 if (offsets == NULL)
3327     {
3328     printf("** Failed to get %d bytes of memory for offsets vector\n",
3329 ph10 151 (int)(size_offsets_max * sizeof(int)));
3330 nigel 77 yield = 1;
3331     goto EXIT;
3332 nigel 53 }
3333    
3334 nigel 3 /* Sort out the input and output files */
3335    
3336     if (argc > 1)
3337     {
3338 nigel 93 infile = fopen(argv[op], INPUT_MODE);
3339 nigel 3 if (infile == NULL)
3340     {
3341     printf("** Failed to open %s\n", argv[op]);
3342 nigel 77 yield = 1;
3343     goto EXIT;
3344 nigel 3 }
3345     }
3346    
3347     if (argc > 2)
3348     {
3349 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
3350 nigel 3 if (outfile == NULL)
3351     {
3352     printf("** Failed to open %s\n", argv[op+1]);
3353 nigel 77 yield = 1;
3354     goto EXIT;
3355 nigel 3 }
3356     }
3357    
3358     /* Set alternative malloc function */
3359    
3360 ph10 836 #ifdef SUPPORT_PCRE8
3361 nigel 3 pcre_malloc = new_malloc;
3362 nigel 73 pcre_free = new_free;
3363     pcre_stack_malloc = stack_malloc;
3364     pcre_stack_free = stack_free;
3365 ph10 836 #endif
3366 nigel 3
3367 ph10 836 #ifdef SUPPORT_PCRE16
3368     pcre16_malloc = new_malloc;
3369     pcre16_free = new_free;
3370     pcre16_stack_malloc = stack_malloc;
3371     pcre16_stack_free = stack_free;
3372     #endif
3373    
3374 chpe 1055 #ifdef SUPPORT_PCRE32
3375     pcre32_malloc = new_malloc;
3376     pcre32_free = new_free;
3377     pcre32_stack_malloc = stack_malloc;
3378     pcre32_stack_free = stack_free;
3379     #endif
3380    
3381 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
3382 nigel 3
3383 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3384 nigel 3
3385     /* Main loop */
3386    
3387 nigel 11 while (!done)
3388 nigel 3 {
3389     pcre *re = NULL;
3390     pcre_extra *extra = NULL;
3391 nigel 37
3392     #if !defined NOPOSIX /* There are still compilers that require no indent */
3393 nigel 3 regex_t preg;
3394 nigel 45 int do_posix = 0;
3395 nigel 37 #endif
3396    
3397 nigel 7 const char *error;
3398 ph10 836 pcre_uint8 *markptr;
3399     pcre_uint8 *p, *pp, *ppp;
3400     pcre_uint8 *to_file = NULL;
3401     const pcre_uint8 *tables = NULL;
3402 zherczeg 847 unsigned long int get_options;
3403 nigel 75 unsigned long int true_size, true_study_size = 0;
3404     size_t size, regex_gotten_store;
3405 ph10 654 int do_allcaps = 0;
3406 ph10 512 int do_mark = 0;
3407 nigel 3 int do_study = 0;
3408 ph10 654 int no_force_study = 0;
3409 nigel 25 int do_debug = debug;
3410 nigel 35 int do_G = 0;
3411     int do_g = 0;
3412 nigel 25 int do_showinfo = showinfo;
3413 nigel 35 int do_showrest = 0;
3414 ph10 616 int do_showcaprest = 0;
3415 nigel 75 int do_flip = 0;
3416 nigel 93 int erroroffset, len, delimiter, poffset;
3417 ph10 975
3418     #if !defined NODFA
3419 ph10 960 int dfa_matched = 0;
3420 ph10 975 #endif
3421 nigel 3
3422 ph10 836 use_utf = 0;
3423 ph10 211 debug_lengths = 1;
3424 nigel 63
3425 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3426 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3427 nigel 63 fflush(outfile);
3428 nigel 3
3429     p = buffer;
3430     while (isspace(*p)) p++;
3431     if (*p == 0) continue;
3432    
3433 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
3434 nigel 3
3435 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3436     {
3437 zherczeg 841 pcre_uint32 magic;
3438 ph10 836 pcre_uint8 sbuf[8];
3439 nigel 75 FILE *f;
3440    
3441     p++;
3442 zherczeg 839 if (*p == '!')
3443     {
3444     do_debug = TRUE;
3445     do_showinfo = TRUE;
3446     p++;
3447     }
3448    
3449 nigel 75 pp = p + (int)strlen((char *)p);
3450     while (isspace(pp[-1])) pp--;
3451     *pp = 0;
3452    
3453     f = fopen((char *)p, "rb");
3454     if (f == NULL)
3455     {
3456     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3457     continue;
3458     }
3459    
3460 zherczeg 839 first_gotten_store = 0;
3461 nigel 75 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3462    
3463     true_size =
3464     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3465     true_study_size =
3466     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3467    
3468 zherczeg 852 re = (pcre *)new_malloc(true_size);
3469 ph10 1017 if (re == NULL)
3470     {
3471     printf("** Failed to get %d bytes of memory for pcre object\n",
3472     (int)true_size);
3473     yield = 1;
3474     goto EXIT;
3475     }
3476 ph10 836 regex_gotten_store = first_gotten_store;
3477 nigel 75
3478     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3479    
3480 chpe 1055 magic = REAL_PCRE_MAGIC(re);
3481 nigel 75 if (magic != MAGIC_NUMBER)
3482     {
3483 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
3484 nigel 75 {
3485     do_flip = 1;
3486     }
3487     else
3488     {
3489     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3490 ph10 1017 new_free(re);
3491 nigel 75 fclose(f);
3492     continue;
3493     }
3494     }
3495    
3496 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
3497 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3498 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3499 nigel 75
3500 ph10 612 /* Now see if there is any following study data. */
3501 nigel 75
3502     if (true_study_size != 0)
3503     {
3504     pcre_study_data *psd;
3505    
3506     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3507     extra->flags = PCRE_EXTRA_STUDY_DATA;
3508    
3509     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3510     extra->study_data = psd;
3511    
3512     if (fread(psd, 1, true_study_size, f) != true_study_size)
3513     {
3514     FAIL_READ:
3515     fprintf(outfile, "Failed to read data from %s\n", p);
3516 ph10 836 if (extra != NULL)
3517     {
3518     PCRE_FREE_STUDY(extra);
3519     }
3520 ph10 1017 new_free(re);
3521 nigel 75 fclose(f);
3522     continue;
3523     }
3524     fprintf(outfile, "Study data loaded from %s\n", p);
3525     do_study = 1; /* To get the data output if requested */
3526     }
3527     else fprintf(outfile, "No study data\n");
3528    
3529 ph10 836 /* Flip the necessary bytes. */
3530     if (do_flip)
3531     {
3532 zherczeg 839 int rc;
3533     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3534     if (rc == PCRE_ERROR_BADMODE)
3535     {
3536 ph10 1313 pcre_uint32 flags_in_host_byte_order;
3537 zherczeg 1150 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3538     flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3539     else
3540 ph10 1313 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3541 zherczeg 839 /* Simulate the result of the function call below. */
3542     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3543 chpe 1055 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3544     PCRE_INFO_OPTIONS);
3545     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3546 zherczeg 1150 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3547 ph10 1017 new_free(re);
3548     fclose(f);
3549 zherczeg 839 continue;
3550     }
3551 ph10 836 }
3552    
3553     /* Need to know if UTF-8 for printing data strings. */
3554    
3555 ph10 1017 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3556     {
3557     new_free(re);
3558     fclose(f);
3559     continue;
3560     }
3561 ph10 836 use_utf = (get_options & PCRE_UTF8) != 0;
3562    
3563 nigel 75 fclose(f);
3564     goto SHOW_INFO;
3565     }
3566    
3567     /* In-line pattern (the usual case). Get the delimiter and seek the end of
3568 ph10 836 the pattern; if it isn't complete, read more. */
3569 nigel 75
3570 nigel 3 delimiter = *p++;
3571    
3572 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
3573 nigel 3 {
3574 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3575 nigel 3 goto SKIP_DATA;
3576     }
3577    
3578     pp = p;
3579 ph10 530 poffset = (int)(p - buffer);
3580 nigel 3
3581     for(;;)
3582     {
3583 nigel 29 while (*pp != 0)
3584     {
3585     if (*pp == '\\' && pp[1] != 0) pp++;
3586     else if (*pp == delimiter) break;
3587     pp++;
3588     }
3589 nigel 3 if (*pp != 0) break;
3590 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3591 nigel 3 {
3592     fprintf(outfile, "** Unexpected EOF\n");
3593 nigel 11 done = 1;
3594     goto CONTINUE;
3595 nigel 3 }
3596 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3597 nigel 3 }
3598    
3599 nigel 93 /* The buffer may have moved while being extended; reset the start of data
3600     pointer to the correct relative point in the buffer. */
3601    
3602     p = buffer + poffset;
3603    
3604 nigel 29 /* If the first character after the delimiter is backslash, make
3605     the pattern end with backslash. This is purely to provide a way
3606     of testing for the error message when a pattern ends with backslash. */
3607    
3608     if (pp[1] == '\\') *pp++ = '\\';
3609    
3610 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3611     for callouts. */
3612 nigel 3
3613     *pp++ = 0;
3614 nigel 75 strcpy((char *)pbuffer, (char *)p);
3615 nigel 3
3616     /* Look for options after final delimiter */
3617    
3618     options = 0;
3619 ph10 1022 study_options = force_study_options;
3620 nigel 31 log_store = showstore; /* default from command line */
3621    
3622 nigel 3 while (*pp != 0)
3623     {
3624     switch (*pp++)
3625     {
3626 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
3627 nigel 35 case 'g': do_g = 1; break;
3628 nigel 3 case 'i': options |= PCRE_CASELESS; break;
3629     case 'm': options |= PCRE_MULTILINE; break;
3630     case 's': options |= PCRE_DOTALL; break;
3631     case 'x': options |= PCRE_EXTENDED; break;
3632 nigel 25
3633 ph10 616 case '+':
3634 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3635 ph10 616 break;
3636 ph10 654
3637     case '=': do_allcaps = 1; break;
3638 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
3639 nigel 93 case 'B': do_debug = 1; break;
3640 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3641 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
3642 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3643 nigel 75 case 'F': do_flip = 1; break;
3644 nigel 35 case 'G': do_G = 1; break;
3645 nigel 25 case 'I': do_showinfo = 1; break;
3646 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
3647 ph10 512 case 'K': do_mark = 1; break;
3648 nigel 31 case 'M': log_store = 1; break;
3649 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3650 nigel 37
3651     #if !defined NOPOSIX
3652 nigel 3 case 'P': do_posix = 1; break;
3653 nigel 37 #endif
3654    
3655 ph10 654 case 'S':
3656 ph10 1022 do_study = 1;
3657     for (;;)
3658 ph10 612 {
3659 ph10 1022 switch (*pp++)
3660 ph10 667 {
3661 ph10 1022 case 'S':
3662     do_study = 0;
3663     no_force_study = 1;
3664     break;
3665    
3666     case '!':
3667     study_options |= PCRE_STUDY_EXTRA_NEEDED;
3668     break;
3669    
3670     case '+':
3671     if (*pp == '+')
3672 ph10 922 {
3673     verify_jit = TRUE;
3674 ph10 960 pp++;
3675     }
3676 ph10 923 if (*pp >= '1' && *pp <= '7')
3677     study_options |= jit_study_bits[*pp++ - '1'];
3678 ph10 960 else
3679     study_options |= jit_study_bits[6];
3680 ph10 1022 break;
3681    
3682     case '-':
3683     study_options &= ~PCRE_STUDY_ALLJIT;
3684     break;
3685    
3686     default:
3687     pp--;
3688     goto ENDLOOP;
3689 ph10 691 }
3690     }
3691 ph10 1022 ENDLOOP:
3692 ph10 612 break;
3693    
3694 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
3695 ph10 535 case 'W': options |= PCRE_UCP; break;
3696 nigel 3 case 'X': options |= PCRE_EXTRA; break;
3697 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3698 ph10 126 case 'Z': debug_lengths = 0; break;
3699 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3700 ph10 1320 case '9': options |= PCRE_NEVER_UTF; break;
3701 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3702 ph10 545
3703 ph10 541 case 'T':
3704     switch (*pp++)
3705     {
3706     case '0': tables = tables0; break;
3707     case '1': tables = tables1; break;
3708 ph10 545
3709 ph10 541 case '\r':
3710     case '\n':
3711 ph10 545 case ' ':
3712     case 0:
3713 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
3714 ph10 545 goto SKIP_DATA;
3715    
3716     default:
3717 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3718 ph10 545 goto SKIP_DATA;
3719 ph10 541 }
3720 ph10 545 break;
3721 nigel 25
3722     case 'L':
3723     ppp = pp;
3724 nigel 93 /* The '\r' test here is so that it works on Windows. */
3725     /* The '0' test is just in case this is an unterminated line. */
3726     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3727 nigel 25 *ppp = 0;
3728     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3729     {
3730     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3731     goto SKIP_DATA;
3732     }
3733 nigel 93 locale_set = 1;
3734 ph10 836 tables = PCRE_MAKETABLES;
3735 nigel 25 pp = ppp;
3736     break;
3737    
3738 nigel 75 case '>':
3739     to_file = pp;
3740     while (*pp != 0) pp++;
3741     while (isspace(pp[-1])) pp--;
3742     *pp = 0;
3743     break;
3744    
3745 nigel 91 case '<':
3746     {
3747 ph10 836 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3748 ph10 336 {
3749     options |= PCRE_JAVASCRIPT_COMPAT;
3750 ph10 345 pp += 3;
3751 ph10 336 }
3752     else
3753 ph10 345 {
3754 ph10 336 int x = check_newline(pp, outfile);
3755     if (x == 0) goto SKIP_DATA;
3756     options |= x;
3757     while (*pp++ != '>');
3758 ph10 345 }
3759 nigel 91 }
3760     break;
3761    
3762 nigel 77 case '\r': /* So that it works in Windows */
3763     case '\n':
3764     case ' ':
3765     break;
3766 nigel 75
3767 nigel 3 default:
3768     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3769     goto SKIP_DATA;
3770     }
3771     }
3772    
3773 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
3774 nigel 25 timing, showing, or debugging options, nor the ability to pass over
3775 ph10 836 local character tables. Neither does it have 16-bit support. */
3776 nigel 3
3777 nigel 37 #if !defined NOPOSIX
3778 nigel 3 if (posix || do_posix)
3779     {
3780     int rc;
3781     int cflags = 0;
3782 nigel 75
3783 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3784     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3785 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3786 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3787     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3788 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3789 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3790 nigel 87
3791 ph10 836 first_gotten_store = 0;
3792 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
3793    
3794     /* Compilation failed; go back for another re, skipping to blank line
3795     if non-interactive. */
3796    
3797     if (rc != 0)
3798     {
3799 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3800 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3801     goto SKIP_DATA;
3802     }
3803     }
3804    
3805     /* Handle compiling via the native interface */
3806    
3807     else
3808 nigel 37 #endif /* !defined NOPOSIX */
3809    
3810 nigel 3 {
3811 chpe 1055 /* In 16- or 32-bit mode, convert the input. */
3812 ph10 836
3813     #ifdef SUPPORT_PCRE16
3814 chpe 1055 if (pcre_mode == PCRE16_MODE)
3815 ph10 836 {
3816     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3817     {
3818     case -1:
3819     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3820     "converted to UTF-16\n");
3821     goto SKIP_DATA;
3822    
3823     case -2:
3824     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3825     "cannot be converted to UTF-16\n");
3826     goto SKIP_DATA;
3827 ph10 842
3828 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3829     fprintf(outfile, "**Failed: character value greater than 0xffff "
3830     "cannot be converted to 16-bit in non-UTF mode\n");
3831 ph10 842 goto SKIP_DATA;
3832 ph10 836
3833     default:
3834     break;
3835     }
3836     p = (pcre_uint8 *)buffer16;
3837     }
3838     #endif
3839    
3840 chpe 1055 #ifdef SUPPORT_PCRE32
3841     if (pcre_mode == PCRE32_MODE)
3842     {
3843     switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3844     {
3845     case -1:
3846     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3847     "converted to UTF-32\n");
3848     goto SKIP_DATA;
3849    
3850     case -2:
3851     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3852     "cannot be converted to UTF-32\n");
3853     goto SKIP_DATA;
3854    
3855     case -3:
3856     fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3857     goto SKIP_DATA;
3858    
3859     default:
3860     break;
3861     }
3862     p = (pcre_uint8 *)buffer32;
3863     }
3864     #endif
3865    
3866 ph10 836 /* Compile many times when timing */
3867    
3868 nigel 93 if (timeit > 0)
3869 nigel 3 {
3870     register int i;
3871     clock_t time_taken;
3872     clock_t start_time = clock();
3873 nigel 93 for (i = 0; i < timeit; i++)
3874 nigel 3 {
3875 ph10 836 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3876 nigel 3 if (re != NULL) free(re);
3877     }
3878     time_taken = clock() - start_time;
3879 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
3880     (((double)time_taken * 1000.0) / (double)timeit) /
3881 nigel 63 (double)CLOCKS_PER_SEC);
3882 nigel 3 }
3883    
3884 ph10 836 first_gotten_store = 0;
3885     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3886 nigel 3
3887     /* Compilation failed; go back for another re, skipping to blank line
3888     if non-interactive. */
3889    
3890     if (re == NULL)
3891     {
3892     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3893     SKIP_DATA:
3894     if (infile != stdin)
3895     {
3896     for (;;)
3897     {
3898 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
3899 nigel 11 {
3900     done = 1;
3901     goto CONTINUE;
3902     }
3903 nigel 3 len = (int)strlen((char *)buffer);
3904     while (len > 0 && isspace(buffer[len-1])) len--;
3905     if (len == 0) break;
3906     }
3907     fprintf(outfile, "\n");
3908     }
3909 nigel 25 goto CONTINUE;
3910 nigel 3 }
3911 ph10 416
3912     /* Compilation succeeded. It is now possible to set the UTF-8 option from
3913     within the regex; check for this so that we know how to process the data
3914 ph10 412 lines. */
3915 ph10 416
3916 ph10 836 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3917     goto SKIP_DATA;
3918     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3919 nigel 3
3920 ph10 836 /* Extract the size for possible writing before possibly flipping it,
3921     and remember the store that was got. */
3922 nigel 3
3923 chpe 1055 true_size = REAL_PCRE_SIZE(re);
3924 ph10 836 regex_gotten_store = first_gotten_store;
3925    
3926     /* Output code size information if requested */
3927    
3928 nigel 63 if (log_store)
3929 chpe 1055 {
3930     int name_count, name_entry_size, real_pcre_size;
3931    
3932     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3933     new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3934 chpe 1126 real_pcre_size = 0;
3935 chpe 1055 #ifdef SUPPORT_PCRE8
3936     if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3937     real_pcre_size = sizeof(real_pcre);
3938     #endif
3939     #ifdef SUPPORT_PCRE16
3940     if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3941     real_pcre_size = sizeof(real_pcre16);
3942     #endif
3943     #ifdef SUPPORT_PCRE32
3944     if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3945     real_pcre_size = sizeof(real_pcre32);
3946     #endif