/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1459 - (hide annotations) (download)
Tue Mar 4 10:45:15 2014 UTC (6 months, 4 weeks ago) by ph10
File MIME type: text/plain
File size: 171603 byte(s)
Preparations for next release.

1 zherczeg 929 /*************************************************
2 nigel 3 * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 1221 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 ph10 1142 32-bit PCRE libraries in a single program. This is different from the modules
41     such as pcre_compile.c in the library itself, which are compiled separately for
42     each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43     twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44     make use of any of the macros from pcre_internal.h that depend on
45     COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46     SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47     supported library functions. */
48 nigel 75
49 ph10 200 #ifdef HAVE_CONFIG_H
50 ph10 236 #include "config.h"
51 ph10 200 #endif
52 ph10 199
53 nigel 3 #include <ctype.h>
54     #include <stdio.h>
55     #include <string.h>
56     #include <stdlib.h>
57     #include <time.h>
58 nigel 25 #include <locale.h>
59 nigel 75 #include <errno.h>
60 nigel 3
61 ph10 936 /* Both libreadline and libedit are optionally supported. The user-supplied
62 ph10 960 original patch uses readline/readline.h for libedit, but in at least one system
63     it is installed as editline/readline.h, so the configuration code now looks for
64 ph10 936 that first, falling back to readline/readline.h. */
65    
66     #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 ph10 343 #ifdef HAVE_UNISTD_H
68 ph10 287 #include <unistd.h>
69 ph10 343 #endif
70 ph10 936 #if defined(SUPPORT_LIBREADLINE)
71 ph10 287 #include <readline/readline.h>
72     #include <readline/history.h>
73 ph10 936 #else
74     #if defined(HAVE_EDITLINE_READLINE_H)
75     #include <editline/readline.h>
76     #else
77     #include <readline/readline.h>
78 ph10 287 #endif
79 ph10 936 #endif
80     #endif
81 nigel 93
82     /* A number of things vary for Windows builds. Originally, pcretest opened its
83     input and output without "b"; then I was told that "b" was needed in some
84     environments, so it was added for release 5.0 to both the input and output. (It
85     makes no difference on Unix-like systems.) Later I was told that it is wrong
86     for the input on Windows. I've now abstracted the modes into two macros that
87     are set here, to make it easier to fiddle with them, and removed "b" from the
88     input mode under Windows. */
89    
90     #if defined(_WIN32) || defined(WIN32)
91     #include <io.h> /* For _setmode() */
92     #include <fcntl.h> /* For _O_BINARY */
93     #define INPUT_MODE "r"
94     #define OUTPUT_MODE "wb"
95    
96 ph10 411 #ifndef isatty
97     #define isatty _isatty /* This is what Windows calls them, I'm told, */
98     #endif /* though in some environments they seem to */
99     /* be already defined, hence the #ifndefs. */
100     #ifndef fileno
101 ph10 343 #define fileno _fileno
102 ph10 411 #endif
103 ph10 343
104 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106     #ifdef __BORLANDC__
107     #define _setmode(handle, mode) setmode(handle, mode)
108     #endif
109    
110     /* Not Windows */
111    
112 nigel 93 #else
113     #include <sys/time.h> /* These two includes are needed */
114     #include <sys/resource.h> /* for setrlimit(). */
115 ph10 1027 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116     #define INPUT_MODE "r"
117     #define OUTPUT_MODE "w"
118     #else
119 nigel 93 #define INPUT_MODE "rb"
120     #define OUTPUT_MODE "wb"
121 nigel 91 #endif
122 ph10 1027 #endif
123 nigel 91
124 ph10 1254 #ifdef __VMS
125     #include <ssdef.h>
126     void vms_setsymbol( char *, char *, int );
127     #endif
128    
129    
130 zherczeg 905 #define PRIV(name) name
131 nigel 93
132 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
133     displaying the results of pcre_study() and we also need to know about the
134     internal macros, structures, and other internal data values; pcretest has
135     "inside information" compared to a program that strictly follows the PCRE API.
136 nigel 37
137 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139     appropriately for an application, not for building PCRE. */
140 nigel 77
141 ph10 145 #include "pcre.h"
142 nigel 77 #include "pcre_internal.h"
143    
144 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
145     regex, is held in a separate file so that (a) it can be compiled in either
146 chpe 1087 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 ph10 836 when that is compiled in debug mode. */
148    
149     #ifdef SUPPORT_PCRE8
150     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151     #endif
152     #ifdef SUPPORT_PCRE16
153     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154     #endif
155 chpe 1055 #ifdef SUPPORT_PCRE32
156     void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157     #endif
158 ph10 836
159 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
160 ph10 1046 to keep two copies, we include the source files here, changing the names of the
161 ph10 351 external symbols to prevent clashes. */
162 nigel 77
163 ph10 836 #define PCRE_INCLUDED
164 nigel 85
165     #include "pcre_tables.c"
166 ph10 1046 #include "pcre_ucd.c"
167 nigel 85
168 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
169 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
170 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
171     has not been explicitly changed, so as to get consistent output from systems
172     that differ in their output from isprint() even in the "C" locale. */
173 nigel 93
174 ph10 836 #ifdef EBCDIC
175     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176     #else
177     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178     #endif
179 nigel 85
180 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181    
182 chpe 1055 /* Posix support is disabled in 16 or 32 bit only mode. */
183     #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 ph10 836 #define NOPOSIX
185     #endif
186    
187 nigel 37 /* It is possible to compile this test program without including support for
188     testing the POSIX interface, though this is not available via the standard
189     Makefile. */
190    
191     #if !defined NOPOSIX
192 nigel 3 #include "pcreposix.h"
193 nigel 37 #endif
194 nigel 3
195 ph10 836 /* It is also possible, originally for the benefit of a version that was
196     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198     automatically cut out the UTF support if PCRE is built without it. */
199 nigel 79
200 ph10 836 #ifndef SUPPORT_UTF
201     #ifndef NOUTF
202     #define NOUTF
203 ph10 107 #endif
204     #endif
205 nigel 79
206 chpe 1087 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 ph10 836 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208     only from one place and is handled differently). I couldn't dream up any way of
209     using a single macro to do this in a generic way, because of the many different
210     argument requirements. We know that at least one of SUPPORT_PCRE8 and
211     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212     use these in the definitions of generic macros.
213 ph10 107
214 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
215     printed is always given as two arguments: a base address followed by an offset.
216     The base address is cast to the correct data size for 8 or 16 bit data; the
217     offset is in units of this size. If the string were given as base+offset in one
218     argument, the casting might be incorrectly applied. */
219    
220     #ifdef SUPPORT_PCRE8
221    
222     #define PCHARS8(lv, p, offset, len, f) \
223     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224    
225     #define PCHARSV8(p, offset, len, f) \
226     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227    
228 chpe 1055 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 ph10 836 p = read_capture_name8(p, cn8, re)
230    
231 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
232    
233 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
234     pcre_callout = callout
235    
236 ph10 1454 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237     pcre_stack_guard = stack_guard
238    
239 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240     pcre_assign_jit_stack(extra, callback, userdata)
241 ph10 836
242     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243     re = pcre_compile((char *)pat, options, error, erroffset, tables)
244    
245     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246     namesptr, cbuffer, size) \
247     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248     (char *)namesptr, cbuffer, size)
249    
250     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
252    
253     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254     offsets, size_offsets, workspace, size_workspace) \
255     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256     offsets, size_offsets, workspace, size_workspace)
257    
258     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259     offsets, size_offsets) \
260     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261     offsets, size_offsets)
262    
263     #define PCRE_FREE_STUDY8(extra) \
264     pcre_free_study(extra)
265    
266     #define PCRE_FREE_SUBSTRING8(substring) \
267     pcre_free_substring(substring)
268    
269     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270     pcre_free_substring_list(listptr)
271    
272     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273     getnamesptr, subsptr) \
274     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275     (char *)getnamesptr, subsptr)
276    
277     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278     n = pcre_get_stringnumber(re, (char *)ptr)
279    
280     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
282    
283     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
285    
286 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
288 ph10 836
289     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290     pcre_printint(re, outfile, debug_lengths)
291    
292     #define PCRE_STUDY8(extra, re, options, error) \
293     extra = pcre_study(re, options, error)
294    
295 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296     pcre_jit_stack_alloc(startsize, maxsize)
297    
298     #define PCRE_JIT_STACK_FREE8(stack) \
299     pcre_jit_stack_free(stack)
300    
301 ph10 1221 #define pcre8_maketables pcre_maketables
302    
303 ph10 836 #endif /* SUPPORT_PCRE8 */
304    
305     /* -----------------------------------------------------------*/
306    
307     #ifdef SUPPORT_PCRE16
308    
309     #define PCHARS16(lv, p, offset, len, f) \
310     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
311    
312     #define PCHARSV16(p, offset, len, f) \
313     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
314    
315 chpe 1055 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316 ph10 836 p = read_capture_name16(p, cn16, re)
317    
318     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
319    
320     #define SET_PCRE_CALLOUT16(callout) \
321 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
322 ph10 836
323 ph10 1454 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324     pcre16_stack_guard = (int (*)(void))stack_guard
325    
326 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327     pcre16_assign_jit_stack((pcre16_extra *)extra, \
328     (pcre16_jit_callback)callback, userdata)
329 ph10 836
330     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332     tables)
333 ph10 836
334     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335     namesptr, cbuffer, size) \
336 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338 ph10 836
339     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
342 ph10 836
343     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344     offsets, size_offsets, workspace, size_workspace) \
345 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347     workspace, size_workspace)
348 ph10 836
349     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350     offsets, size_offsets) \
351 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352     len, start_offset, options, offsets, size_offsets)
353 ph10 836
354     #define PCRE_FREE_STUDY16(extra) \
355 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
356 ph10 836
357     #define PCRE_FREE_SUBSTRING16(substring) \
358     pcre16_free_substring((PCRE_SPTR16)substring)
359    
360     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362    
363     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364     getnamesptr, subsptr) \
365 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367 ph10 836
368     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370    
371     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373     (PCRE_SPTR16 *)(void*)subsptr)
374    
375     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377     (PCRE_SPTR16 **)(void*)listptr)
378    
379 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381     tables)
382 ph10 836
383     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384     pcre16_printint(re, outfile, debug_lengths)
385    
386     #define PCRE_STUDY16(extra, re, options, error) \
387 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388 ph10 836
389 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391    
392     #define PCRE_JIT_STACK_FREE16(stack) \
393     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394    
395 ph10 836 #endif /* SUPPORT_PCRE16 */
396    
397 chpe 1055 /* -----------------------------------------------------------*/
398 ph10 836
399 chpe 1055 #ifdef SUPPORT_PCRE32
400    
401     #define PCHARS32(lv, p, offset, len, f) \
402 chpe 1117 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403 chpe 1055
404 chpe 1117 #define PCHARSV32(p, offset, len, f) \
405     (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406 chpe 1055
407     #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408     p = read_capture_name32(p, cn32, re)
409    
410     #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412     #define SET_PCRE_CALLOUT32(callout) \
413     pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415 ph10 1454 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416     pcre32_stack_guard = (int (*)(void))stack_guard
417    
418 chpe 1055 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419     pcre32_assign_jit_stack((pcre32_extra *)extra, \
420     (pcre32_jit_callback)callback, userdata)
421    
422     #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423     re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424     tables)
425    
426     #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427     namesptr, cbuffer, size) \
428     rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429     count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
430    
431     #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432     rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433     (PCRE_UCHAR32 *)cbuffer, size/2)
434    
435     #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436     offsets, size_offsets, workspace, size_workspace) \
437     count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438     (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439     workspace, size_workspace)
440    
441     #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442     offsets, size_offsets) \
443     count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444     len, start_offset, options, offsets, size_offsets)
445    
446     #define PCRE_FREE_STUDY32(extra) \
447     pcre32_free_study((pcre32_extra *)extra)
448    
449     #define PCRE_FREE_SUBSTRING32(substring) \
450     pcre32_free_substring((PCRE_SPTR32)substring)
451    
452     #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453     pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
454    
455     #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456     getnamesptr, subsptr) \
457     rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458     count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
459    
460     #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461     n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
462    
463     #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464     rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465     (PCRE_SPTR32 *)(void*)subsptr)
466    
467     #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468     rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469     (PCRE_SPTR32 **)(void*)listptr)
470    
471     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472     rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473     tables)
474    
475     #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476     pcre32_printint(re, outfile, debug_lengths)
477    
478     #define PCRE_STUDY32(extra, re, options, error) \
479     extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
480    
481     #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482     (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
483    
484     #define PCRE_JIT_STACK_FREE32(stack) \
485     pcre32_jit_stack_free((pcre32_jit_stack *)stack)
486    
487     #endif /* SUPPORT_PCRE32 */
488    
489    
490 ph10 1122 /* ----- More than one mode is supported; a runtime test is needed, except for
491 ph10 836 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 ph10 1140 available version is called. ----- */
493 ph10 836
494 chpe 1055 enum {
495     PCRE8_MODE,
496     PCRE16_MODE,
497     PCRE32_MODE
498     };
499 ph10 836
500 ph10 1122 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501     defined (SUPPORT_PCRE32)) >= 2
502 ph10 836
503 chpe 1055 #define CHAR_SIZE (1 << pcre_mode)
504    
505 ph10 1122 /* There doesn't seem to be an easy way of writing these macros that can cope
506     with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507     cases separately. */
508    
509     /* ----- All three modes supported ----- */
510    
511     #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
512    
513 ph10 836 #define PCHARS(lv, p, offset, len, f) \
514 chpe 1055 if (pcre_mode == PCRE32_MODE) \
515     PCHARS32(lv, p, offset, len, f); \
516     else if (pcre_mode == PCRE16_MODE) \
517 ph10 836 PCHARS16(lv, p, offset, len, f); \
518     else \
519     PCHARS8(lv, p, offset, len, f)
520    
521     #define PCHARSV(p, offset, len, f) \
522 chpe 1055 if (pcre_mode == PCRE32_MODE) \
523     PCHARSV32(p, offset, len, f); \
524     else if (pcre_mode == PCRE16_MODE) \
525 ph10 836 PCHARSV16(p, offset, len, f); \
526     else \
527     PCHARSV8(p, offset, len, f)
528    
529 chpe 1055 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530     if (pcre_mode == PCRE32_MODE) \
531     READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532     else if (pcre_mode == PCRE16_MODE) \
533     READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534 ph10 836 else \
535 chpe 1055 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
536 ph10 836
537     #define SET_PCRE_CALLOUT(callout) \
538 chpe 1055 if (pcre_mode == PCRE32_MODE) \
539     SET_PCRE_CALLOUT32(callout); \
540     else if (pcre_mode == PCRE16_MODE) \
541 ph10 836 SET_PCRE_CALLOUT16(callout); \
542     else \
543     SET_PCRE_CALLOUT8(callout)
544    
545 ph10 1454 #define SET_PCRE_STACK_GUARD(stack_guard) \
546     if (pcre_mode == PCRE32_MODE) \
547     SET_PCRE_STACK_GUARD32(stack_guard); \
548     else if (pcre_mode == PCRE16_MODE) \
549     SET_PCRE_STACK_GUARD16(stack_guard); \
550     else \
551     SET_PCRE_STACK_GUARD8(stack_guard)
552    
553 chpe 1055 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
554 ph10 836
555 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556 chpe 1055 if (pcre_mode == PCRE32_MODE) \
557     PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558     else if (pcre_mode == PCRE16_MODE) \
559 zherczeg 852 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560     else \
561     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
562 ph10 836
563     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564 chpe 1055 if (pcre_mode == PCRE32_MODE) \
565     PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566     else if (pcre_mode == PCRE16_MODE) \
567 ph10 836 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568     else \
569     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
570    
571     #define PCRE_CONFIG pcre_config
572    
573     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574     namesptr, cbuffer, size) \
575 chpe 1055 if (pcre_mode == PCRE32_MODE) \
576     PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577     namesptr, cbuffer, size); \
578     else if (pcre_mode == PCRE16_MODE) \
579 ph10 836 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580     namesptr, cbuffer, size); \
581     else \
582     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583     namesptr, cbuffer, size)
584    
585     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586 chpe 1055 if (pcre_mode == PCRE32_MODE) \
587     PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588     else if (pcre_mode == PCRE16_MODE) \
589 ph10 836 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590     else \
591     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
592    
593     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594     offsets, size_offsets, workspace, size_workspace) \
595 chpe 1055 if (pcre_mode == PCRE32_MODE) \
596     PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597     offsets, size_offsets, workspace, size_workspace); \
598     else if (pcre_mode == PCRE16_MODE) \
599 ph10 836 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600     offsets, size_offsets, workspace, size_workspace); \
601     else \
602     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603     offsets, size_offsets, workspace, size_workspace)
604    
605     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606     offsets, size_offsets) \
607 chpe 1055 if (pcre_mode == PCRE32_MODE) \
608     PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609     offsets, size_offsets); \
610     else if (pcre_mode == PCRE16_MODE) \
611 ph10 836 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612     offsets, size_offsets); \
613     else \
614     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615     offsets, size_offsets)
616    
617     #define PCRE_FREE_STUDY(extra) \
618 chpe 1055 if (pcre_mode == PCRE32_MODE) \
619     PCRE_FREE_STUDY32(extra); \
620     else if (pcre_mode == PCRE16_MODE) \
621 ph10 836 PCRE_FREE_STUDY16(extra); \
622     else \
623     PCRE_FREE_STUDY8(extra)
624    
625     #define PCRE_FREE_SUBSTRING(substring) \
626 chpe 1055 if (pcre_mode == PCRE32_MODE) \
627     PCRE_FREE_SUBSTRING32(substring); \
628     else if (pcre_mode == PCRE16_MODE) \
629 ph10 836 PCRE_FREE_SUBSTRING16(substring); \
630     else \
631     PCRE_FREE_SUBSTRING8(substring)
632    
633     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634 chpe 1055 if (pcre_mode == PCRE32_MODE) \
635     PCRE_FREE_SUBSTRING_LIST32(listptr); \
636     else if (pcre_mode == PCRE16_MODE) \
637 ph10 836 PCRE_FREE_SUBSTRING_LIST16(listptr); \
638     else \
639     PCRE_FREE_SUBSTRING_LIST8(listptr)
640    
641     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642     getnamesptr, subsptr) \
643 chpe 1055 if (pcre_mode == PCRE32_MODE) \
644     PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645     getnamesptr, subsptr); \
646     else if (pcre_mode == PCRE16_MODE) \
647 ph10 836 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648     getnamesptr, subsptr); \
649     else \
650     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651     getnamesptr, subsptr)
652    
653     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654 chpe 1055 if (pcre_mode == PCRE32_MODE) \
655     PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656     else if (pcre_mode == PCRE16_MODE) \
657 ph10 836 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658     else \
659     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
660    
661     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662 chpe 1055 if (pcre_mode == PCRE32_MODE) \
663     PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664     else if (pcre_mode == PCRE16_MODE) \
665 ph10 836 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666     else \
667     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
668    
669     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670 chpe 1055 if (pcre_mode == PCRE32_MODE) \
671     PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672     else if (pcre_mode == PCRE16_MODE) \
673 ph10 836 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674     else \
675     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
676    
677 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678 chpe 1055 (pcre_mode == PCRE32_MODE ? \
679     PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680     : pcre_mode == PCRE16_MODE ? \
681     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682     : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
683 ph10 836
684 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
685 chpe 1055 if (pcre_mode == PCRE32_MODE) \
686     PCRE_JIT_STACK_FREE32(stack); \
687     else if (pcre_mode == PCRE16_MODE) \
688 zherczeg 852 PCRE_JIT_STACK_FREE16(stack); \
689     else \
690     PCRE_JIT_STACK_FREE8(stack)
691    
692 ph10 836 #define PCRE_MAKETABLES \
693 chpe 1055 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
694 ph10 836
695 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696 chpe 1055 if (pcre_mode == PCRE32_MODE) \
697     PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698     else if (pcre_mode == PCRE16_MODE) \
699 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700 ph10 836 else \
701 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
702 ph10 836
703     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704 chpe 1055 if (pcre_mode == PCRE32_MODE) \
705     PCRE_PRINTINT32(re, outfile, debug_lengths); \
706     else if (pcre_mode == PCRE16_MODE) \
707 ph10 836 PCRE_PRINTINT16(re, outfile, debug_lengths); \
708     else \
709     PCRE_PRINTINT8(re, outfile, debug_lengths)
710    
711     #define PCRE_STUDY(extra, re, options, error) \
712 chpe 1055 if (pcre_mode == PCRE32_MODE) \
713     PCRE_STUDY32(extra, re, options, error); \
714     else if (pcre_mode == PCRE16_MODE) \
715 ph10 836 PCRE_STUDY16(extra, re, options, error); \
716     else \
717     PCRE_STUDY8(extra, re, options, error)
718    
719 ph10 1122
720 ph10 1140 /* ----- Two out of three modes are supported ----- */
721 ph10 1122
722 ph10 1140 #else
723 ph10 1122
724 ph10 1140 /* We can use some macro trickery to make a single set of definitions work in
725     the three different cases. */
726 ph10 1122
727 ph10 1140 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
728 ph10 1122
729 ph10 1140 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730     #define BITONE 32
731     #define BITTWO 16
732 ph10 1122
733     /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
734    
735     #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 ph10 1140 #define BITONE 32
737     #define BITTWO 8
738 ph10 1122
739 ph10 1140 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
740 ph10 1122
741 ph10 1140 #else
742     #define BITONE 16
743     #define BITTWO 8
744     #endif
745 ph10 1122
746 ph10 1140 #define glue(a,b) a##b
747     #define G(a,b) glue(a,b)
748 ph10 1122
749    
750 ph10 1140 /* ----- Common macros for two-mode cases ----- */
751 ph10 1122
752     #define PCHARS(lv, p, offset, len, f) \
753 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754     G(PCHARS,BITONE)(lv, p, offset, len, f); \
755 ph10 1122 else \
756 ph10 1140 G(PCHARS,BITTWO)(lv, p, offset, len, f)
757 ph10 1122
758     #define PCHARSV(p, offset, len, f) \
759 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760     G(PCHARSV,BITONE)(p, offset, len, f); \
761 ph10 1122 else \
762 ph10 1140 G(PCHARSV,BITTWO)(p, offset, len, f)
763 ph10 1122
764     #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766     G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767 ph10 1122 else \
768 ph10 1140 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
769 ph10 1122
770     #define SET_PCRE_CALLOUT(callout) \
771 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772     G(SET_PCRE_CALLOUT,BITONE)(callout); \
773 ph10 1122 else \
774 ph10 1140 G(SET_PCRE_CALLOUT,BITTWO)(callout)
775 ph10 1122
776 ph10 1454 #define SET_PCRE_STACK_GUARD(stack_guard) \
777     if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778     G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779     else \
780     G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
781    
782 ph10 1140 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783     G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
784 ph10 1122
785     #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787     G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788 ph10 1122 else \
789 ph10 1140 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
790 ph10 1122
791     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793     G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794 ph10 1122 else \
795 ph10 1140 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
796 ph10 1122
797 ph10 1140 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
798 ph10 1122
799     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800     namesptr, cbuffer, size) \
801 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802     G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803 ph10 1122 namesptr, cbuffer, size); \
804     else \
805 ph10 1140 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806 ph10 1122 namesptr, cbuffer, size)
807    
808     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810     G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811 ph10 1122 else \
812 ph10 1140 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
813 ph10 1122
814     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815     offsets, size_offsets, workspace, size_workspace) \
816 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817     G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818 ph10 1122 offsets, size_offsets, workspace, size_workspace); \
819     else \
820 ph10 1140 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821 ph10 1122 offsets, size_offsets, workspace, size_workspace)
822    
823     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824     offsets, size_offsets) \
825 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826     G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827 ph10 1122 offsets, size_offsets); \
828     else \
829 ph10 1140 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830 ph10 1122 offsets, size_offsets)
831    
832     #define PCRE_FREE_STUDY(extra) \
833 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834     G(PCRE_FREE_STUDY,BITONE)(extra); \
835 ph10 1122 else \
836 ph10 1140 G(PCRE_FREE_STUDY,BITTWO)(extra)
837 ph10 1122
838     #define PCRE_FREE_SUBSTRING(substring) \
839 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840     G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841 ph10 1122 else \
842 ph10 1140 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
843 ph10 1122
844     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846     G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847 ph10 1122 else \
848 ph10 1140 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
849 ph10 1122
850     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851     getnamesptr, subsptr) \
852 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853     G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854 ph10 1122 getnamesptr, subsptr); \
855     else \
856 ph10 1140 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857 ph10 1122 getnamesptr, subsptr)
858    
859     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861     G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862 ph10 1122 else \
863 ph10 1140 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
864 ph10 1122
865     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867     G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868 ph10 1122 else \
869 ph10 1140 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
870 ph10 1122
871     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873     G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874 ph10 1122 else \
875 ph10 1140 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
876 ph10 1122
877     #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878 ph10 1140 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879     G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880     : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
881 ph10 1122
882     #define PCRE_JIT_STACK_FREE(stack) \
883 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884     G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885 ph10 1122 else \
886 ph10 1140 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
887 ph10 1122
888     #define PCRE_MAKETABLES \
889 ph10 1140 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890     G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
891 ph10 1122
892     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894     G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895 ph10 1122 else \
896 ph10 1140 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
897 ph10 1122
898     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900     G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901 ph10 1122 else \
902 ph10 1140 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
903 ph10 1122
904     #define PCRE_STUDY(extra, re, options, error) \
905 ph10 1140 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906     G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907 ph10 1122 else \
908 ph10 1140 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
909 ph10 1122
910 ph10 1140 #endif /* Two out of three modes */
911 ph10 1122
912     /* ----- End of cases where more than one mode is supported ----- */
913    
914    
915 ph10 836 /* ----- Only 8-bit mode is supported ----- */
916    
917     #elif defined SUPPORT_PCRE8
918     #define CHAR_SIZE 1
919     #define PCHARS PCHARS8
920     #define PCHARSV PCHARSV8
921     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 ph10 1454 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 ph10 836 #define STRLEN STRLEN8
925 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
927     #define PCRE_CONFIG pcre_config
928     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931     #define PCRE_EXEC PCRE_EXEC8
932     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
942     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943     #define PCRE_PRINTINT PCRE_PRINTINT8
944     #define PCRE_STUDY PCRE_STUDY8
945    
946     /* ----- Only 16-bit mode is supported ----- */
947    
948 chpe 1055 #elif defined SUPPORT_PCRE16
949 ph10 836 #define CHAR_SIZE 2
950     #define PCHARS PCHARS16
951     #define PCHARSV PCHARSV16
952     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 ph10 1454 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 ph10 836 #define STRLEN STRLEN16
956 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
958     #define PCRE_CONFIG pcre16_config
959     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962     #define PCRE_EXEC PCRE_EXEC16
963     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
973     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974     #define PCRE_PRINTINT PCRE_PRINTINT16
975     #define PCRE_STUDY PCRE_STUDY16
976 chpe 1055
977     /* ----- Only 32-bit mode is supported ----- */
978    
979     #elif defined SUPPORT_PCRE32
980     #define CHAR_SIZE 4
981     #define PCHARS PCHARS32
982     #define PCHARSV PCHARSV32
983     #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 ph10 1454 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 chpe 1055 #define STRLEN STRLEN32
987     #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988     #define PCRE_COMPILE PCRE_COMPILE32
989     #define PCRE_CONFIG pcre32_config
990     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992     #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993     #define PCRE_EXEC PCRE_EXEC32
994     #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001     #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003     #define PCRE_MAKETABLES pcre32_maketables()
1004     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005     #define PCRE_PRINTINT PCRE_PRINTINT32
1006     #define PCRE_STUDY PCRE_STUDY32
1007    
1008 ph10 836 #endif
1009    
1010     /* ----- End of mode-specific function call macros ----- */
1011    
1012    
1013 nigel 85 /* Other parameters */
1014    
1015 nigel 3 #ifndef CLOCKS_PER_SEC
1016     #ifdef CLK_TCK
1017     #define CLOCKS_PER_SEC CLK_TCK
1018     #else
1019     #define CLOCKS_PER_SEC 100
1020     #endif
1021     #endif
1022    
1023 ph10 960 #if !defined NODFA
1024     #define DFA_WS_DIMENSION 1000
1025     #endif
1026    
1027 nigel 93 /* This is the default loop count for timing. */
1028    
1029 nigel 75 #define LOOPREPEAT 500000
1030 nigel 3
1031 nigel 85 /* Static variables */
1032    
1033 nigel 3 static FILE *outfile;
1034     static int log_store = 0;
1035 nigel 63 static int callout_count;
1036     static int callout_extra;
1037     static int callout_fail_count;
1038     static int callout_fail_id;
1039 ph10 210 static int debug_lengths;
1040 nigel 63 static int first_callout;
1041 ph10 960 static int jit_was_used;
1042 nigel 93 static int locale_set = 0;
1043 nigel 73 static int show_malloc;
1044 ph10 1454 static int stack_guard_return;
1045 ph10 836 static int use_utf;
1046 ph10 645 static const unsigned char *last_callout_mark = NULL;
1047 nigel 3
1048 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
1049    
1050     static int buffer_size = 50000;
1051 ph10 836 static pcre_uint8 *buffer = NULL;
1052     static pcre_uint8 *pbuffer = NULL;
1053 nigel 3
1054 ph10 1142 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1055 ph10 836
1056     #ifdef COMPILE_PCRE16
1057     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058     #endif
1059    
1060 chpe 1055 #ifdef COMPILE_PCRE32
1061     #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062     #endif
1063    
1064 ph10 1142 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065     lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066     pattern for saving/reloading testing. Luckily, the data for these tables is
1067     defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068     are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069     LINK_SIZE is also used later in this program. */
1070    
1071     #ifdef SUPPORT_PCRE16
1072     #undef IMM2_SIZE
1073     #define IMM2_SIZE 1
1074    
1075 ph10 836 #if LINK_SIZE == 2
1076     #undef LINK_SIZE
1077     #define LINK_SIZE 1
1078     #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079     #undef LINK_SIZE
1080     #define LINK_SIZE 2
1081     #else
1082     #error LINK_SIZE must be either 2, 3, or 4
1083     #endif
1084    
1085 chpe 1055 static int buffer16_size = 0;
1086     static pcre_uint16 *buffer16 = NULL;
1087 ph10 836 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088     #endif /* SUPPORT_PCRE16 */
1089    
1090 chpe 1055 #ifdef SUPPORT_PCRE32
1091 ph10 1142 #undef IMM2_SIZE
1092     #define IMM2_SIZE 1
1093     #undef LINK_SIZE
1094     #define LINK_SIZE 1
1095    
1096 chpe 1055 static int buffer32_size = 0;
1097     static pcre_uint32 *buffer32 = NULL;
1098     static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099     #endif /* SUPPORT_PCRE32 */
1100 ph10 836
1101 ph10 1140 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102     support, it can be changed by an option. If there is no 8-bit support, there
1103     must be 16-or 32-bit support, so default it to 1. */
1104 chpe 1055
1105     #if defined SUPPORT_PCRE8
1106     static int pcre_mode = PCRE8_MODE;
1107     #elif defined SUPPORT_PCRE16
1108     static int pcre_mode = PCRE16_MODE;
1109     #elif defined SUPPORT_PCRE32
1110     static int pcre_mode = PCRE32_MODE;
1111 ph10 836 #endif
1112    
1113 ph10 923 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1114    
1115     static int jit_study_bits[] =
1116 ph10 960 {
1117     PCRE_STUDY_JIT_COMPILE,
1118 ph10 923 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120 ph10 960 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1125     };
1126 ph10 923
1127 ph10 1022 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1129    
1130 ph10 598 /* Textual explanations for runtime error codes */
1131 nigel 75
1132 ph10 598 static const char *errtexts[] = {
1133     NULL, /* 0 is no error */
1134     NULL, /* NOMATCH is handled specially */
1135     "NULL argument passed",
1136     "bad option value",
1137     "magic number missing",
1138     "unknown opcode - pattern overwritten?",
1139     "no more memory",
1140 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141 ph10 598 "match limit exceeded",
1142     "callout error code",
1143 ph10 836 NULL, /* BADUTF8/16 is handled specially */
1144     NULL, /* BADUTF8/16 offset is handled specially */
1145 ph10 598 NULL, /* PARTIAL is handled specially */
1146     "not used - internal error",
1147     "internal error - pattern overwritten?",
1148     "bad count value",
1149     "item unsupported for DFA matching",
1150     "backreference condition or recursion test not supported for DFA matching",
1151     "match limit not supported for DFA matching",
1152     "workspace size exceeded in DFA matching",
1153 ph10 654 "too much recursion for DFA matching",
1154 ph10 598 "recursion limit exceeded",
1155     "not used - internal error",
1156     "invalid combination of newline options",
1157     "bad offset value",
1158 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
1159 ph10 676 "nested recursion at the same subject position",
1160 ph10 836 "JIT stack limit reached",
1161 ph10 960 "pattern compiled in wrong mode: 8-bit/16-bit error",
1162     "pattern compiled with other endianness",
1163 ph10 1189 "invalid data in workspace for DFA restart",
1164     "bad JIT option",
1165 ph10 1221 "bad length"
1166 ph10 598 };
1167    
1168 ph10 654
1169 ph10 541 /*************************************************
1170     * Alternate character tables *
1171     *************************************************/
1172 nigel 49
1173 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174     using the default tables of the library. However, the T option can be used to
1175     select alternate sets of tables, for different kinds of testing. Note also that
1176 ph10 541 the L (locale) option also adjusts the tables. */
1177    
1178 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 ph10 541 only ASCII characters. */
1180    
1181 ph10 836 static const pcre_uint8 tables0[] = {
1182 ph10 541
1183     /* This table is a lower casing table. */
1184    
1185     0, 1, 2, 3, 4, 5, 6, 7,
1186     8, 9, 10, 11, 12, 13, 14, 15,
1187     16, 17, 18, 19, 20, 21, 22, 23,
1188     24, 25, 26, 27, 28, 29, 30, 31,
1189     32, 33, 34, 35, 36, 37, 38, 39,
1190     40, 41, 42, 43, 44, 45, 46, 47,
1191     48, 49, 50, 51, 52, 53, 54, 55,
1192     56, 57, 58, 59, 60, 61, 62, 63,
1193     64, 97, 98, 99,100,101,102,103,
1194     104,105,106,107,108,109,110,111,
1195     112,113,114,115,116,117,118,119,
1196     120,121,122, 91, 92, 93, 94, 95,
1197     96, 97, 98, 99,100,101,102,103,
1198     104,105,106,107,108,109,110,111,
1199     112,113,114,115,116,117,118,119,
1200     120,121,122,123,124,125,126,127,
1201     128,129,130,131,132,133,134,135,
1202     136,137,138,139,140,141,142,143,
1203     144,145,146,147,148,149,150,151,
1204     152,153,154,155,156,157,158,159,
1205     160,161,162,163,164,165,166,167,
1206     168,169,170,171,172,173,174,175,
1207     176,177,178,179,180,181,182,183,
1208     184,185,186,187,188,189,190,191,
1209     192,193,194,195,196,197,198,199,
1210     200,201,202,203,204,205,206,207,
1211     208,209,210,211,212,213,214,215,
1212     216,217,218,219,220,221,222,223,
1213     224,225,226,227,228,229,230,231,
1214     232,233,234,235,236,237,238,239,
1215     240,241,242,243,244,245,246,247,
1216     248,249,250,251,252,253,254,255,
1217    
1218     /* This table is a case flipping table. */
1219    
1220     0, 1, 2, 3, 4, 5, 6, 7,
1221     8, 9, 10, 11, 12, 13, 14, 15,
1222     16, 17, 18, 19, 20, 21, 22, 23,
1223     24, 25, 26, 27, 28, 29, 30, 31,
1224     32, 33, 34, 35, 36, 37, 38, 39,
1225     40, 41, 42, 43, 44, 45, 46, 47,
1226     48, 49, 50, 51, 52, 53, 54, 55,
1227     56, 57, 58, 59, 60, 61, 62, 63,
1228     64, 97, 98, 99,100,101,102,103,
1229     104,105,106,107,108,109,110,111,
1230     112,113,114,115,116,117,118,119,
1231     120,121,122, 91, 92, 93, 94, 95,
1232     96, 65, 66, 67, 68, 69, 70, 71,
1233     72, 73, 74, 75, 76, 77, 78, 79,
1234     80, 81, 82, 83, 84, 85, 86, 87,
1235     88, 89, 90,123,124,125,126,127,
1236     128,129,130,131,132,133,134,135,
1237     136,137,138,139,140,141,142,143,
1238     144,145,146,147,148,149,150,151,
1239     152,153,154,155,156,157,158,159,
1240     160,161,162,163,164,165,166,167,
1241     168,169,170,171,172,173,174,175,
1242     176,177,178,179,180,181,182,183,
1243     184,185,186,187,188,189,190,191,
1244     192,193,194,195,196,197,198,199,
1245     200,201,202,203,204,205,206,207,
1246     208,209,210,211,212,213,214,215,
1247     216,217,218,219,220,221,222,223,
1248     224,225,226,227,228,229,230,231,
1249     232,233,234,235,236,237,238,239,
1250     240,241,242,243,244,245,246,247,
1251     248,249,250,251,252,253,254,255,
1252    
1253     /* This table contains bit maps for various character classes. Each map is 32
1254     bytes long and the bits run from the least significant end of each byte. The
1255     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256     graph, print, punct, and cntrl. Other classes are built from combinations. */
1257    
1258     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262    
1263     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267    
1268     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272    
1273     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277    
1278     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282    
1283     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287    
1288     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1292    
1293     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1297    
1298     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1302    
1303     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1307    
1308     /* This table identifies various classes of character by individual bits:
1309     0x01 white space character
1310     0x02 letter
1311     0x04 decimal digit
1312     0x08 hexadecimal digit
1313     0x10 alphanumeric or '_'
1314     0x80 regular expression metacharacter or binary zero
1315     */
1316    
1317     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318 ph10 1405 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319 ph10 541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1349    
1350 ph10 1405 /* This is a set of tables that came originally from a Windows user. It seems
1351     to be at least an approximation of ISO 8859. In particular, there are
1352     characters greater than 128 that are marked as spaces, letters, etc. */
1353 ph10 541
1354 ph10 836 static const pcre_uint8 tables1[] = {
1355 ph10 541 0,1,2,3,4,5,6,7,
1356     8,9,10,11,12,13,14,15,
1357     16,17,18,19,20,21,22,23,
1358     24,25,26,27,28,29,30,31,
1359     32,33,34,35,36,37,38,39,
1360     40,41,42,43,44,45,46,47,
1361     48,49,50,51,52,53,54,55,
1362     56,57,58,59,60,61,62,63,
1363     64,97,98,99,100,101,102,103,
1364     104,105,106,107,108,109,110,111,
1365     112,113,114,115,116,117,118,119,
1366     120,121,122,91,92,93,94,95,
1367     96,97,98,99,100,101,102,103,
1368     104,105,106,107,108,109,110,111,
1369     112,113,114,115,116,117,118,119,
1370     120,121,122,123,124,125,126,127,
1371     128,129,130,131,132,133,134,135,
1372     136,137,138,139,140,141,142,143,
1373     144,145,146,147,148,149,150,151,
1374     152,153,154,155,156,157,158,159,
1375     160,161,162,163,164,165,166,167,
1376     168,169,170,171,172,173,174,175,
1377     176,177,178,179,180,181,182,183,
1378     184,185,186,187,188,189,190,191,
1379     224,225,226,227,228,229,230,231,
1380     232,233,234,235,236,237,238,239,
1381     240,241,242,243,244,245,246,215,
1382     248,249,250,251,252,253,254,223,
1383     224,225,226,227,228,229,230,231,
1384     232,233,234,235,236,237,238,239,
1385     240,241,242,243,244,245,246,247,
1386     248,249,250,251,252,253,254,255,
1387     0,1,2,3,4,5,6,7,
1388     8,9,10,11,12,13,14,15,
1389     16,17,18,19,20,21,22,23,
1390     24,25,26,27,28,29,30,31,
1391     32,33,34,35,36,37,38,39,
1392     40,41,42,43,44,45,46,47,
1393     48,49,50,51,52,53,54,55,
1394     56,57,58,59,60,61,62,63,
1395     64,97,98,99,100,101,102,103,
1396     104,105,106,107,108,109,110,111,
1397     112,113,114,115,116,117,118,119,
1398     120,121,122,91,92,93,94,95,
1399     96,65,66,67,68,69,70,71,
1400     72,73,74,75,76,77,78,79,
1401     80,81,82,83,84,85,86,87,
1402     88,89,90,123,124,125,126,127,
1403     128,129,130,131,132,133,134,135,
1404     136,137,138,139,140,141,142,143,
1405     144,145,146,147,148,149,150,151,
1406     152,153,154,155,156,157,158,159,
1407     160,161,162,163,164,165,166,167,
1408     168,169,170,171,172,173,174,175,
1409     176,177,178,179,180,181,182,183,
1410     184,185,186,187,188,189,190,191,
1411     224,225,226,227,228,229,230,231,
1412     232,233,234,235,236,237,238,239,
1413     240,241,242,243,244,245,246,215,
1414     248,249,250,251,252,253,254,223,
1415     192,193,194,195,196,197,198,199,
1416     200,201,202,203,204,205,206,207,
1417     208,209,210,211,212,213,214,247,
1418     216,217,218,219,220,221,222,255,
1419     0,62,0,0,1,0,0,0,
1420     0,0,0,0,0,0,0,0,
1421     32,0,0,0,1,0,0,0,
1422     0,0,0,0,0,0,0,0,
1423     0,0,0,0,0,0,255,3,
1424     126,0,0,0,126,0,0,0,
1425     0,0,0,0,0,0,0,0,
1426     0,0,0,0,0,0,0,0,
1427     0,0,0,0,0,0,255,3,
1428     0,0,0,0,0,0,0,0,
1429     0,0,0,0,0,0,12,2,
1430     0,0,0,0,0,0,0,0,
1431     0,0,0,0,0,0,0,0,
1432     254,255,255,7,0,0,0,0,
1433     0,0,0,0,0,0,0,0,
1434     255,255,127,127,0,0,0,0,
1435     0,0,0,0,0,0,0,0,
1436     0,0,0,0,254,255,255,7,
1437     0,0,0,0,0,4,32,4,
1438     0,0,0,128,255,255,127,255,
1439     0,0,0,0,0,0,255,3,
1440     254,255,255,135,254,255,255,7,
1441     0,0,0,0,0,4,44,6,
1442     255,255,127,255,255,255,127,255,
1443     0,0,0,0,254,255,255,255,
1444     255,255,255,255,255,255,255,127,
1445     0,0,0,0,254,255,255,255,
1446     255,255,255,255,255,255,255,255,
1447     0,2,0,0,255,255,255,255,
1448     255,255,255,255,255,255,255,127,
1449     0,0,0,0,255,255,255,255,
1450     255,255,255,255,255,255,255,255,
1451     0,0,0,0,254,255,0,252,
1452     1,0,0,248,1,0,0,120,
1453     0,0,0,0,254,255,255,255,
1454     0,0,128,0,0,0,128,0,
1455     255,255,255,255,0,0,0,0,
1456     0,0,0,0,0,0,0,128,
1457     255,255,255,255,0,0,0,0,
1458     0,0,0,0,0,0,0,0,
1459     128,0,0,0,0,0,0,0,
1460     0,1,1,0,1,1,0,0,
1461     0,0,0,0,0,0,0,0,
1462     0,0,0,0,0,0,0,0,
1463     1,0,0,0,128,0,0,0,
1464     128,128,128,128,0,0,128,0,
1465     28,28,28,28,28,28,28,28,
1466     28,28,0,0,0,0,0,128,
1467     0,26,26,26,26,26,26,18,
1468     18,18,18,18,18,18,18,18,
1469     18,18,18,18,18,18,18,18,
1470     18,18,18,128,128,0,128,16,
1471     0,26,26,26,26,26,26,18,
1472     18,18,18,18,18,18,18,18,
1473     18,18,18,18,18,18,18,18,
1474     18,18,18,128,128,0,0,0,
1475     0,0,0,0,0,1,0,0,
1476     0,0,0,0,0,0,0,0,
1477     0,0,0,0,0,0,0,0,
1478     0,0,0,0,0,0,0,0,
1479     1,0,0,0,0,0,0,0,
1480     0,0,18,0,0,0,0,0,
1481     0,0,20,20,0,18,0,0,
1482     0,20,18,0,0,0,0,0,
1483     18,18,18,18,18,18,18,18,
1484     18,18,18,18,18,18,18,18,
1485     18,18,18,18,18,18,18,0,
1486     18,18,18,18,18,18,18,18,
1487     18,18,18,18,18,18,18,18,
1488     18,18,18,18,18,18,18,18,
1489     18,18,18,18,18,18,18,0,
1490     18,18,18,18,18,18,18,18
1491     };
1492    
1493    
1494    
1495 ph10 558
1496     #ifndef HAVE_STRERROR
1497 nigel 49 /*************************************************
1498 ph10 558 * Provide strerror() for non-ANSI libraries *
1499     *************************************************/
1500    
1501     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502     in their libraries, but can provide the same facility by this simple
1503     alternative function. */
1504    
1505     extern int sys_nerr;
1506     extern char *sys_errlist[];
1507    
1508     char *
1509     strerror(int n)
1510     {
1511     if (n < 0 || n >= sys_nerr) return "unknown error number";
1512     return sys_errlist[n];
1513     }
1514     #endif /* HAVE_STRERROR */
1515    
1516    
1517 ph10 1030
1518 ph10 667 /*************************************************
1519 ph10 1030 * Print newline configuration *
1520     *************************************************/
1521    
1522 ph10 1122 /*
1523     Arguments:
1524 ph10 1033 rc the return code from PCRE_CONFIG_NEWLINE
1525 ph10 1122 isc TRUE if called from "-C newline"
1526 ph10 1033 Returns: nothing
1527 ph10 1030 */
1528    
1529     static void
1530 ph10 1033 print_newline_config(int rc, BOOL isc)
1531 ph10 1030 {
1532     const char *s = NULL;
1533 ph10 1033 if (!isc) printf(" Newline sequence is ");
1534 ph10 1030 switch(rc)
1535     {
1536     case CHAR_CR: s = "CR"; break;
1537     case CHAR_LF: s = "LF"; break;
1538     case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539     case -1: s = "ANY"; break;
1540     case -2: s = "ANYCRLF"; break;
1541 ph10 1122
1542 ph10 1030 default:
1543 ph10 1122 printf("a non-standard value: 0x%04x\n", rc);
1544 ph10 1030 return;
1545 ph10 1122 }
1546 ph10 1030
1547     printf("%s\n", s);
1548     }
1549    
1550    
1551    
1552     /*************************************************
1553 ph10 667 * JIT memory callback *
1554     *************************************************/
1555 ph10 558
1556 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1557     {
1558 ph10 926 jit_was_used = TRUE;
1559 ph10 667 return (pcre_jit_stack *)arg;
1560     }
1561 ph10 558
1562 ph10 667
1563 chpe 1055 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 ph10 558 /*************************************************
1565 ph10 836 * Convert UTF-8 string to value *
1566     *************************************************/
1567    
1568     /* This function takes one or more bytes that represents a UTF-8 character,
1569     and returns the value of the character.
1570    
1571     Argument:
1572     utf8bytes a pointer to the byte vector
1573     vptr a pointer to an int to receive the value
1574    
1575     Returns: > 0 => the number of bytes consumed
1576     -6 to 0 => malformed UTF-8 character at offset = (-return)
1577     */
1578    
1579     static int
1580 chpe 1086 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1581 ph10 836 {
1582 chpe 1086 pcre_uint32 c = *utf8bytes++;
1583     pcre_uint32 d = c;
1584 ph10 836 int i, j, s;
1585    
1586     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1587     {
1588     if ((d & 0x80) == 0) break;
1589     d <<= 1;
1590     }
1591    
1592     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1594    
1595     /* i now has a value in the range 1-5 */
1596    
1597     s = 6*i;
1598     d = (c & utf8_table3[i]) << s;
1599    
1600     for (j = 0; j < i; j++)
1601     {
1602     c = *utf8bytes++;
1603     if ((c & 0xc0) != 0x80) return -(j+1);
1604     s -= 6;
1605     d |= (c & 0x3f) << s;
1606     }
1607    
1608     /* Check that encoding was the correct unique one */
1609    
1610     for (j = 0; j < utf8_table1_size; j++)
1611 ph10 1122 if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 ph10 836 if (j != i) return -(i+1);
1613    
1614     /* Valid value */
1615    
1616     *vptr = d;
1617     return i+1;
1618     }
1619     #endif /* NOUTF || SUPPORT_PCRE16 */
1620    
1621    
1622    
1623 ph10 1140 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 ph10 836 /*************************************************
1625     * Convert character value to UTF-8 *
1626     *************************************************/
1627    
1628     /* This function takes an integer value in the range 0 - 0x7fffffff
1629     and encodes it as a UTF-8 character in 0 to 6 bytes.
1630    
1631     Arguments:
1632     cvalue the character value
1633     utf8bytes pointer to buffer for result - at least 6 bytes long
1634    
1635     Returns: number of characters placed in the buffer
1636     */
1637    
1638     static int
1639 chpe 1086 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1640 ph10 836 {
1641     register int i, j;
1642 chpe 1086 if (cvalue > 0x7fffffffu)
1643     return -1;
1644 ph10 836 for (i = 0; i < utf8_table1_size; i++)
1645 ph10 1122 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 ph10 836 utf8bytes += i;
1647     for (j = i; j > 0; j--)
1648     {
1649     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650     cvalue >>= 6;
1651     }
1652     *utf8bytes = utf8_table2[i] | cvalue;
1653     return i + 1;
1654     }
1655 ph10 842 #endif
1656 ph10 836
1657    
1658     #ifdef SUPPORT_PCRE16
1659     /*************************************************
1660     * Convert a string to 16-bit *
1661     *************************************************/
1662    
1663     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667     result is always left in buffer16.
1668    
1669     Note that this function does not object to surrogate values. This is
1670     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671     for the purpose of testing that they are correctly faulted.
1672    
1673 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1675    
1676     Arguments:
1677     data TRUE if converting a data line; FALSE for a regex
1678     p points to a byte string
1679     utf true if UTF-8 (to be converted to UTF-16)
1680     len number of bytes in the string (excluding trailing zero)
1681    
1682     Returns: number of 16-bit data items used (excluding trailing zero)
1683     OR -1 if a UTF-8 string is malformed
1684     OR -2 if a value > 0x10ffff is encountered
1685 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1686 ph10 836 */
1687    
1688     static int
1689     to16(int data, pcre_uint8 *p, int utf, int len)
1690     {
1691     pcre_uint16 *pp;
1692    
1693     if (buffer16_size < 2*len + 2)
1694     {
1695     if (buffer16 != NULL) free(buffer16);
1696     buffer16_size = 2*len + 2;
1697     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698     if (buffer16 == NULL)
1699     {
1700     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701     exit(1);
1702     }
1703     }
1704    
1705     pp = buffer16;
1706    
1707     if (!utf && !data)
1708     {
1709     while (len-- > 0) *pp++ = *p++;
1710     }
1711    
1712     else
1713     {
1714 chpe 1086 pcre_uint32 c = 0;
1715 ph10 836 while (len > 0)
1716     {
1717     int chlen = utf82ord(p, &c);
1718     if (chlen <= 0) return -1;
1719     if (c > 0x10ffff) return -2;
1720     p += chlen;
1721     len -= chlen;
1722     if (c < 0x10000) *pp++ = c; else
1723     {
1724     if (!utf) return -3;
1725     c -= 0x10000;
1726     *pp++ = 0xD800 | (c >> 10);
1727     *pp++ = 0xDC00 | (c & 0x3ff);
1728     }
1729     }
1730     }
1731    
1732     *pp = 0;
1733     return pp - buffer16;
1734     }
1735     #endif
1736    
1737 chpe 1055 #ifdef SUPPORT_PCRE32
1738     /*************************************************
1739     * Convert a string to 32-bit *
1740     *************************************************/
1741 ph10 836
1742 chpe 1055 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743     8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744     times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745     in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746     result is always left in buffer32.
1747    
1748     Note that this function does not object to surrogate values. This is
1749     deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750     for the purpose of testing that they are correctly faulted.
1751    
1752     Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753     in UTF-8 so that values greater than 255 can be handled.
1754    
1755     Arguments:
1756     data TRUE if converting a data line; FALSE for a regex
1757     p points to a byte string
1758     utf true if UTF-8 (to be converted to UTF-32)
1759     len number of bytes in the string (excluding trailing zero)
1760    
1761     Returns: number of 32-bit data items used (excluding trailing zero)
1762     OR -1 if a UTF-8 string is malformed
1763     OR -2 if a value > 0x10ffff is encountered
1764     OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1765     */
1766    
1767     static int
1768     to32(int data, pcre_uint8 *p, int utf, int len)
1769     {
1770     pcre_uint32 *pp;
1771    
1772     if (buffer32_size < 4*len + 4)
1773     {
1774     if (buffer32 != NULL) free(buffer32);
1775     buffer32_size = 4*len + 4;
1776     buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777     if (buffer32 == NULL)
1778     {
1779     fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780     exit(1);
1781     }
1782     }
1783    
1784     pp = buffer32;
1785    
1786     if (!utf && !data)
1787     {
1788     while (len-- > 0) *pp++ = *p++;
1789     }
1790    
1791     else
1792     {
1793 chpe 1086 pcre_uint32 c = 0;
1794 chpe 1055 while (len > 0)
1795     {
1796     int chlen = utf82ord(p, &c);
1797     if (chlen <= 0) return -1;
1798     if (utf)
1799     {
1800     if (c > 0x10ffff) return -2;
1801     if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1802     }
1803    
1804     p += chlen;
1805     len -= chlen;
1806     *pp++ = c;
1807     }
1808     }
1809    
1810     *pp = 0;
1811     return pp - buffer32;
1812     }
1813 chpe 1117
1814     /* Check that a 32-bit character string is valid UTF-32.
1815    
1816     Arguments:
1817     string points to the string
1818     length length of string, or -1 if the string is zero-terminated
1819    
1820     Returns: TRUE if the string is a valid UTF-32 string
1821     FALSE otherwise
1822     */
1823    
1824 ph10 1261 #ifdef NEVER /* Not used */
1825 chpe 1117 #ifdef SUPPORT_UTF
1826     static BOOL
1827     valid_utf32(pcre_uint32 *string, int length)
1828     {
1829     register pcre_uint32 *p;
1830     register pcre_uint32 c;
1831    
1832     for (p = string; length-- > 0; p++)
1833     {
1834     c = *p;
1835 ph10 1261 if (c > 0x10ffffu) return FALSE; /* Too big */
1836     if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1837 chpe 1117 }
1838    
1839     return TRUE;
1840     }
1841     #endif /* SUPPORT_UTF */
1842 ph10 1202 #endif /* NEVER */
1843 ph10 1261 #endif /* SUPPORT_PCRE32 */
1844 ph10 1202
1845    
1846 ph10 836 /*************************************************
1847 nigel 91 * Read or extend an input line *
1848     *************************************************/
1849    
1850     /* Input lines are read into buffer, but both patterns and data lines can be
1851     continued over multiple input lines. In addition, if the buffer fills up, we
1852     want to automatically expand it so as to be able to handle extremely large
1853     lines that are needed for certain stress tests. When the input buffer is
1854     expanded, the other two buffers must also be expanded likewise, and the
1855     contents of pbuffer, which are a copy of the input for callouts, must be
1856     preserved (for when expansion happens for a data line). This is not the most
1857     optimal way of handling this, but hey, this is just a test program!
1858    
1859     Arguments:
1860     f the file to read
1861     start where in buffer to start (this *must* be within buffer)
1862 ph10 287 prompt for stdin or readline()
1863 nigel 91
1864     Returns: pointer to the start of new data
1865     could be a copy of start, or could be moved
1866     NULL if no data read and EOF reached
1867     */
1868    
1869 ph10 836 static pcre_uint8 *
1870     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1871 nigel 91 {
1872 ph10 836 pcre_uint8 *here = start;
1873 nigel 91
1874     for (;;)
1875     {
1876 ph10 904 size_t rlen = (size_t)(buffer_size - (here - buffer));
1877 nigel 93
1878 nigel 91 if (rlen > 1000)
1879     {
1880     int dlen;
1881 ph10 289
1882 ph10 936 /* If libreadline or libedit support is required, use readline() to read a
1883     line if the input is a terminal. Note that readline() removes the trailing
1884     newline, so we must put it back again, to be compatible with fgets(). */
1885 ph10 289
1886 ph10 936 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887 ph10 287 if (isatty(fileno(f)))
1888     {
1889 ph10 289 size_t len;
1890 ph10 287 char *s = readline(prompt);
1891     if (s == NULL) return (here == start)? NULL : start;
1892     len = strlen(s);
1893 ph10 289 if (len > 0) add_history(s);
1894 ph10 287 if (len > rlen - 1) len = rlen - 1;
1895     memcpy(here, s, len);
1896     here[len] = '\n';
1897 ph10 289 here[len+1] = 0;
1898     free(s);
1899 ph10 287 }
1900 ph10 289 else
1901     #endif
1902    
1903 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1904 ph10 289
1905 ph10 287 {
1906 ph10 516 if (f == stdin) printf("%s", prompt);
1907 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1908     return (here == start)? NULL : start;
1909 ph10 289 }
1910    
1911 nigel 91 dlen = (int)strlen((char *)here);
1912     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913     here += dlen;
1914     }
1915    
1916     else
1917     {
1918     int new_buffer_size = 2*buffer_size;
1919 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1921 nigel 91
1922 chpe 1090 if (new_buffer == NULL || new_pbuffer == NULL)
1923 nigel 91 {
1924     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925     exit(1);
1926     }
1927    
1928     memcpy(new_buffer, buffer, buffer_size);
1929     memcpy(new_pbuffer, pbuffer, buffer_size);
1930    
1931     buffer_size = new_buffer_size;
1932    
1933     start = new_buffer + (start - buffer);
1934     here = new_buffer + (here - buffer);
1935    
1936     free(buffer);
1937     free(pbuffer);
1938    
1939     buffer = new_buffer;
1940     pbuffer = new_pbuffer;
1941     }
1942     }
1943    
1944 ph10 1346 /* Control never gets here */
1945 nigel 91 }
1946    
1947    
1948    
1949     /*************************************************
1950 nigel 63 * Read number from string *
1951     *************************************************/
1952    
1953     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954     around with conditional compilation, just do the job by hand. It is only used
1955 nigel 93 for unpicking arguments, so just keep it simple.
1956 nigel 63
1957     Arguments:
1958     str string to be converted
1959     endptr where to put the end pointer
1960    
1961     Returns: the unsigned long
1962     */
1963    
1964     static int
1965 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1966 nigel 63 {
1967     int result = 0;
1968     while(*str != 0 && isspace(*str)) str++;
1969     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970     *endptr = str;
1971     return(result);
1972     }
1973    
1974    
1975    
1976 nigel 49 /*************************************************
1977 ph10 836 * Print one character *
1978 nigel 49 *************************************************/
1979    
1980 ph10 836 /* Print a single character either literally, or as a hex escape. */
1981 nigel 49
1982 chpe 1079 static int pchar(pcre_uint32 c, FILE *f)
1983 nigel 49 {
1984 chpe 1126 int n = 0;
1985 ph10 836 if (PRINTOK(c))
1986     {
1987     if (f != NULL) fprintf(f, "%c", c);
1988     return 1;
1989     }
1990 nigel 49
1991 ph10 836 if (c < 0x100)
1992 nigel 49 {
1993 ph10 836 if (use_utf)
1994     {
1995     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1996     return 6;
1997     }
1998     else
1999     {
2000     if (f != NULL) fprintf(f, "\\x%02x", c);
2001     return 4;
2002     }
2003 nigel 49 }
2004    
2005 chpe 1085 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2006     return n >= 0 ? n : 0;
2007 ph10 836 }
2008 nigel 49
2009    
2010    
2011 ph10 836 #ifdef SUPPORT_PCRE8
2012     /*************************************************
2013     * Print 8-bit character string *
2014     *************************************************/
2015 nigel 49
2016 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2017     If handed a NULL file, just counts chars without printing. */
2018 nigel 49
2019 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
2020     {
2021 chpe 1086 pcre_uint32 c = 0;
2022 ph10 836 int yield = 0;
2023 nigel 49
2024 ph10 836 if (length < 0)
2025     length = strlen((char *)p);
2026 nigel 49
2027 ph10 836 while (length-- > 0)
2028     {
2029     #if !defined NOUTF
2030     if (use_utf)
2031     {
2032     int rc = utf82ord(p, &c);
2033     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2034     {
2035     length -= rc - 1;
2036     p += rc;
2037     yield += pchar(c, f);
2038     continue;
2039     }
2040     }
2041     #endif
2042     c = *p++;
2043     yield += pchar(c, f);
2044     }
2045    
2046     return yield;
2047 nigel 49 }
2048 nigel 79 #endif
2049 nigel 49
2050    
2051 nigel 79
2052 ph10 836 #ifdef SUPPORT_PCRE16
2053 nigel 63 /*************************************************
2054 ph10 836 * Find length of 0-terminated 16-bit string *
2055 nigel 85 *************************************************/
2056    
2057 ph10 836 static int strlen16(PCRE_SPTR16 p)
2058 nigel 85 {
2059 ph10 1345 PCRE_SPTR16 pp = p;
2060     while (*pp != 0) pp++;
2061     return (int)(pp - p);
2062 nigel 85 }
2063 ph10 836 #endif /* SUPPORT_PCRE16 */
2064 nigel 85
2065    
2066 chpe 1055
2067     #ifdef SUPPORT_PCRE32
2068     /*************************************************
2069     * Find length of 0-terminated 32-bit string *
2070     *************************************************/
2071    
2072     static int strlen32(PCRE_SPTR32 p)
2073     {
2074 ph10 1345 PCRE_SPTR32 pp = p;
2075     while (*pp != 0) pp++;
2076     return (int)(pp - p);
2077 chpe 1055 }
2078     #endif /* SUPPORT_PCRE32 */
2079    
2080    
2081    
2082 ph10 836 #ifdef SUPPORT_PCRE16
2083 nigel 85 /*************************************************
2084 ph10 836 * Print 16-bit character string *
2085 nigel 63 *************************************************/
2086 nigel 49
2087 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2088     If handed a NULL file, just counts chars without printing. */
2089 nigel 49
2090 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2091 nigel 3 {
2092 nigel 63 int yield = 0;
2093 nigel 3
2094 ph10 836 if (length < 0)
2095     length = strlen16(p);
2096    
2097 nigel 63 while (length-- > 0)
2098 nigel 3 {
2099 chpe 1079 pcre_uint32 c = *p++ & 0xffff;
2100 ph10 836 #if !defined NOUTF
2101     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2102 nigel 63 {
2103 ph10 836 int d = *p & 0xffff;
2104 chpe 1263 if (d >= 0xDC00 && d <= 0xDFFF)
2105 nigel 63 {
2106 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2107     length--;
2108     p++;
2109 nigel 63 }
2110     }
2111 nigel 79 #endif
2112 ph10 836 yield += pchar(c, f);
2113     }
2114 nigel 3
2115 ph10 836 return yield;
2116     }
2117     #endif /* SUPPORT_PCRE16 */
2118 nigel 63
2119 ph10 836
2120    
2121 chpe 1055 #ifdef SUPPORT_PCRE32
2122     /*************************************************
2123     * Print 32-bit character string *
2124     *************************************************/
2125    
2126     /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2127     If handed a NULL file, just counts chars without printing. */
2128    
2129 chpe 1117 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2130 chpe 1055 {
2131     int yield = 0;
2132    
2133 ph10 1202 (void)(utf); /* Avoid compiler warning */
2134    
2135 chpe 1055 if (length < 0)
2136     length = strlen32(p);
2137    
2138     while (length-- > 0)
2139     {
2140 ph10 1122 pcre_uint32 c = *p++;
2141 chpe 1055 yield += pchar(c, f);
2142     }
2143    
2144     return yield;
2145     }
2146     #endif /* SUPPORT_PCRE32 */
2147    
2148    
2149    
2150 ph10 836 #ifdef SUPPORT_PCRE8
2151     /*************************************************
2152     * Read a capture name (8-bit) and check it *
2153     *************************************************/
2154    
2155     static pcre_uint8 *
2156     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2157     {
2158     pcre_uint8 *npp = *pp;
2159     while (isalnum(*p)) *npp++ = *p++;
2160     *npp++ = 0;
2161     *npp = 0;
2162     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2163     {
2164     fprintf(outfile, "no parentheses with name \"");
2165     PCHARSV(*pp, 0, -1, outfile);
2166     fprintf(outfile, "\"\n");
2167 nigel 63 }
2168 nigel 3
2169 ph10 836 *pp = npp;
2170     return p;
2171 nigel 63 }
2172 ph10 836 #endif /* SUPPORT_PCRE8 */
2173 nigel 23
2174 nigel 3
2175 nigel 23
2176 ph10 836 #ifdef SUPPORT_PCRE16
2177 nigel 63 /*************************************************
2178 ph10 836 * Read a capture name (16-bit) and check it *
2179     *************************************************/
2180    
2181     /* Note that the text being read is 8-bit. */
2182    
2183     static pcre_uint8 *
2184     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2185     {
2186     pcre_uint16 *npp = *pp;
2187     while (isalnum(*p)) *npp++ = *p++;
2188     *npp++ = 0;
2189     *npp = 0;
2190 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2191 ph10 836 {
2192     fprintf(outfile, "no parentheses with name \"");
2193     PCHARSV(*pp, 0, -1, outfile);
2194     fprintf(outfile, "\"\n");
2195     }
2196     *pp = npp;
2197     return p;
2198     }
2199     #endif /* SUPPORT_PCRE16 */
2200    
2201    
2202    
2203 chpe 1055 #ifdef SUPPORT_PCRE32
2204 ph10 836 /*************************************************
2205 chpe 1055 * Read a capture name (32-bit) and check it *
2206     *************************************************/
2207    
2208     /* Note that the text being read is 8-bit. */
2209    
2210     static pcre_uint8 *
2211     read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2212     {
2213     pcre_uint32 *npp = *pp;
2214     while (isalnum(*p)) *npp++ = *p++;
2215     *npp++ = 0;
2216     *npp = 0;
2217     if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2218     {
2219     fprintf(outfile, "no parentheses with name \"");
2220     PCHARSV(*pp, 0, -1, outfile);
2221     fprintf(outfile, "\"\n");
2222     }
2223     *pp = npp;
2224     return p;
2225     }
2226     #endif /* SUPPORT_PCRE32 */
2227    
2228    
2229    
2230     /*************************************************
2231 ph10 1454 * Stack guard function *
2232     *************************************************/
2233    
2234     /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2235     return when a count overflows. */
2236    
2237     static int stack_guard(void)
2238     {
2239     return stack_guard_return;
2240     }
2241    
2242     /*************************************************
2243 nigel 63 * Callout function *
2244     *************************************************/
2245 nigel 3
2246 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2247     the match. Yield zero unless more callouts than the fail count, or the callout
2248     data is not zero. */
2249 nigel 3
2250 nigel 63 static int callout(pcre_callout_block *cb)
2251     {
2252     FILE *f = (first_callout | callout_extra)? outfile : NULL;
2253 nigel 75 int i, pre_start, post_start, subject_length;
2254 nigel 3
2255 nigel 63 if (callout_extra)
2256     {
2257     fprintf(f, "Callout %d: last capture = %d\n",
2258     cb->callout_number, cb->capture_last);
2259 nigel 3
2260 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
2261     {
2262     if (cb->offset_vector[i] < 0)
2263     fprintf(f, "%2d: <unset>\n", i/2);
2264     else
2265     {
2266     fprintf(f, "%2d: ", i/2);
2267 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
2268 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2269     fprintf(f, "\n");
2270     }
2271     }
2272     }
2273 nigel 3
2274 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
2275     datails. On subsequent calls in the same match, we use pchars just to find the
2276     printed lengths of the substrings. */
2277 nigel 3
2278 nigel 63 if (f != NULL) fprintf(f, "--->");
2279 nigel 3
2280 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2281     PCHARS(post_start, cb->subject, cb->start_match,
2282 nigel 63 cb->current_position - cb->start_match, f);
2283 nigel 3
2284 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2285 nigel 75
2286 ph10 836 PCHARSV(cb->subject, cb->current_position,
2287 nigel 63 cb->subject_length - cb->current_position, f);
2288 nigel 3
2289 nigel 63 if (f != NULL) fprintf(f, "\n");
2290 nigel 9
2291 nigel 63 /* Always print appropriate indicators, with callout number if not already
2292 nigel 75 shown. For automatic callouts, show the pattern offset. */
2293 nigel 3
2294 nigel 75 if (cb->callout_number == 255)
2295     {
2296     fprintf(outfile, "%+3d ", cb->pattern_position);
2297     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2298     }
2299     else
2300     {
2301     if (callout_extra) fprintf(outfile, " ");
2302     else fprintf(outfile, "%3d ", cb->callout_number);
2303     }
2304 nigel 3
2305 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2306     fprintf(outfile, "^");
2307 nigel 3
2308 nigel 63 if (post_start > 0)
2309     {
2310     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2311     fprintf(outfile, "^");
2312 nigel 3 }
2313    
2314 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2315     fprintf(outfile, " ");
2316    
2317     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2318     pbuffer + cb->pattern_position);
2319    
2320 nigel 63 fprintf(outfile, "\n");
2321     first_callout = 0;
2322 nigel 3
2323 ph10 654 if (cb->mark != last_callout_mark)
2324 ph10 645 {
2325 ph10 836 if (cb->mark == NULL)
2326     fprintf(outfile, "Latest Mark: <unset>\n");
2327     else
2328     {
2329     fprintf(outfile, "Latest Mark: ");
2330     PCHARSV(cb->mark, 0, -1, outfile);
2331     putc('\n', outfile);
2332     }
2333 ph10 654 last_callout_mark = cb->mark;
2334     }
2335 ph10 645
2336 nigel 71 if (cb->callout_data != NULL)
2337 nigel 49 {
2338 nigel 71 int callout_data = *((int *)(cb->callout_data));
2339     if (callout_data != 0)
2340     {
2341     fprintf(outfile, "Callout data = %d\n", callout_data);
2342     return callout_data;
2343     }
2344 nigel 63 }
2345 nigel 49
2346 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
2347     (++callout_count >= callout_fail_count)? 1 : 0;
2348 nigel 3 }
2349    
2350    
2351 nigel 63 /*************************************************
2352 nigel 73 * Local malloc functions *
2353 nigel 63 *************************************************/
2354 nigel 3
2355 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
2356 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
2357     show_malloc variable is set only during matching. */
2358 nigel 3
2359     static void *new_malloc(size_t size)
2360     {
2361 nigel 73 void *block = malloc(size);
2362     if (show_malloc)
2363 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2364 nigel 73 return block;
2365 nigel 3 }
2366    
2367 nigel 73 static void new_free(void *block)
2368     {
2369     if (show_malloc)
2370     fprintf(outfile, "free %p\n", block);
2371     free(block);
2372     }
2373 nigel 3
2374 nigel 73 /* For recursion malloc/free, to test stacking calls */
2375    
2376     static void *stack_malloc(size_t size)
2377     {
2378     void *block = malloc(size);
2379     if (show_malloc)
2380 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2381 nigel 73 return block;
2382     }
2383    
2384     static void stack_free(void *block)
2385     {
2386     if (show_malloc)
2387     fprintf(outfile, "stack_free %p\n", block);
2388     free(block);
2389     }
2390    
2391    
2392 nigel 63 /*************************************************
2393     * Call pcre_fullinfo() *
2394     *************************************************/
2395 nigel 43
2396 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
2397 chpe 1055 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2398 ph10 836 value, but the code is defensive.
2399 nigel 43
2400 ph10 836 Arguments:
2401     re compiled regex
2402     study study data
2403     option PCRE_INFO_xxx option
2404     ptr where to put the data
2405    
2406     Returns: 0 when OK, < 0 on error
2407     */
2408    
2409     static int
2410     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2411 nigel 43 {
2412     int rc;
2413 ph10 836
2414 chpe 1055 if (pcre_mode == PCRE32_MODE)
2415     #ifdef SUPPORT_PCRE32
2416     rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2417     #else
2418     rc = PCRE_ERROR_BADMODE;
2419     #endif
2420     else if (pcre_mode == PCRE16_MODE)
2421 ph10 836 #ifdef SUPPORT_PCRE16
2422 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2423 ph10 836 #else
2424     rc = PCRE_ERROR_BADMODE;
2425     #endif
2426     else
2427     #ifdef SUPPORT_PCRE8
2428     rc = pcre_fullinfo(re, study, option, ptr);
2429     #else
2430     rc = PCRE_ERROR_BADMODE;
2431     #endif
2432    
2433 ph10 1313 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2434 ph10 836 {
2435     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2436 chpe 1055 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2437 ph10 836 if (rc == PCRE_ERROR_BADMODE)
2438 chpe 1055 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2439     "%d-bit mode\n", 8 * CHAR_SIZE,
2440     8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2441 ph10 836 }
2442    
2443     return rc;
2444 nigel 43 }
2445    
2446    
2447    
2448 nigel 63 /*************************************************
2449 ph10 836 * Swap byte functions *
2450 nigel 75 *************************************************/
2451    
2452 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2453     value, respectively.
2454    
2455     Arguments:
2456     value any number
2457    
2458     Returns: the byte swapped value
2459     */
2460    
2461     static pcre_uint32
2462     swap_uint32(pcre_uint32 value)
2463 nigel 75 {
2464     return ((value & 0x000000ff) << 24) |
2465     ((value & 0x0000ff00) << 8) |
2466     ((value & 0x00ff0000) >> 8) |
2467 ph10 836 (value >> 24);
2468 nigel 75 }
2469    
2470 ph10 836 static pcre_uint16
2471     swap_uint16(pcre_uint16 value)
2472     {
2473     return (value >> 8) | (value << 8);
2474     }
2475 nigel 75
2476    
2477    
2478     /*************************************************
2479 ph10 836 * Flip bytes in a compiled pattern *
2480     *************************************************/
2481    
2482     /* This function is called if the 'F' option was present on a pattern that is
2483     to be written to a file. We flip the bytes of all the integer fields in the
2484     regex data block and the study block. In 16-bit mode this also flips relevant
2485     bytes in the pattern itself. This is to make it possible to test PCRE's
2486     ability to reload byte-flipped patterns, e.g. those compiled on a different
2487     architecture. */
2488    
2489 chpe 1055 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2490 ph10 836 static void
2491 chpe 1055 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2492 ph10 836 {
2493 chpe 1055 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2494 ph10 836 #ifdef SUPPORT_PCRE16
2495     int op;
2496     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2497     int length = re->name_count * re->name_entry_size;
2498     #ifdef SUPPORT_UTF
2499     BOOL utf = (re->options & PCRE_UTF16) != 0;
2500     BOOL utf16_char = FALSE;
2501     #endif /* SUPPORT_UTF */
2502     #endif /* SUPPORT_PCRE16 */
2503    
2504     /* Always flip the bytes in the main data block and study blocks. */
2505    
2506     re->magic_number = REVERSED_MAGIC_NUMBER;
2507     re->size = swap_uint32(re->size);
2508     re->options = swap_uint32(re->options);
2509 ph10 1313 re->flags = swap_uint32(re->flags);
2510     re->limit_match = swap_uint32(re->limit_match);
2511     re->limit_recursion = swap_uint32(re->limit_recursion);
2512     re->first_char = swap_uint16(re->first_char);
2513     re->req_char = swap_uint16(re->req_char);
2514     re->max_lookbehind = swap_uint16(re->max_lookbehind);
2515 ph10 836 re->top_bracket = swap_uint16(re->top_bracket);
2516     re->top_backref = swap_uint16(re->top_backref);
2517     re->name_table_offset = swap_uint16(re->name_table_offset);
2518     re->name_entry_size = swap_uint16(re->name_entry_size);
2519     re->name_count = swap_uint16(re->name_count);
2520 ph10 1313 re->ref_count = swap_uint16(re->ref_count);
2521 ph10 836
2522     if (extra != NULL)
2523     {
2524     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2525     rsd->size = swap_uint32(rsd->size);
2526     rsd->flags = swap_uint32(rsd->flags);
2527     rsd->minlength = swap_uint32(rsd->minlength);
2528     }
2529    
2530     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2531     in the name table, if present, and then in the pattern itself. */
2532    
2533     #ifdef SUPPORT_PCRE16
2534 chpe 1055 if (pcre_mode != PCRE16_MODE) return;
2535 ph10 836
2536     while(TRUE)
2537     {
2538     /* Swap previous characters. */
2539     while (length-- > 0)
2540     {
2541     *ptr = swap_uint16(*ptr);
2542     ptr++;
2543     }
2544     #ifdef SUPPORT_UTF
2545     if (utf16_char)
2546     {
2547     if ((ptr[-1] & 0xfc00) == 0xd800)
2548     {
2549     /* We know that there is only one extra character in UTF-16. */
2550     *ptr = swap_uint16(*ptr);
2551     ptr++;
2552     }
2553     }
2554     utf16_char = FALSE;
2555     #endif /* SUPPORT_UTF */
2556    
2557     /* Get next opcode. */
2558    
2559     length = 0;
2560     op = *ptr;
2561     *ptr++ = swap_uint16(op);
2562    
2563     switch (op)
2564     {
2565     case OP_END:
2566     return;
2567    
2568     #ifdef SUPPORT_UTF
2569     case OP_CHAR:
2570     case OP_CHARI:
2571     case OP_NOT:
2572     case OP_NOTI:
2573     case OP_STAR:
2574     case OP_MINSTAR:
2575     case OP_PLUS:
2576     case OP_MINPLUS:
2577     case OP_QUERY:
2578     case OP_MINQUERY:
2579     case OP_UPTO:
2580     case OP_MINUPTO:
2581     case OP_EXACT:
2582     case OP_POSSTAR:
2583     case OP_POSPLUS:
2584     case OP_POSQUERY:
2585     case OP_POSUPTO:
2586     case OP_STARI:
2587     case OP_MINSTARI:
2588     case OP_PLUSI:
2589     case OP_MINPLUSI:
2590     case OP_QUERYI:
2591     case OP_MINQUERYI:
2592     case OP_UPTOI:
2593     case OP_MINUPTOI:
2594     case OP_EXACTI:
2595     case OP_POSSTARI:
2596     case OP_POSPLUSI:
2597     case OP_POSQUERYI:
2598     case OP_POSUPTOI:
2599     case OP_NOTSTAR:
2600     case OP_NOTMINSTAR:
2601     case OP_NOTPLUS:
2602     case OP_NOTMINPLUS:
2603     case OP_NOTQUERY:
2604     case OP_NOTMINQUERY:
2605     case OP_NOTUPTO:
2606     case OP_NOTMINUPTO:
2607     case OP_NOTEXACT:
2608     case OP_NOTPOSSTAR:
2609     case OP_NOTPOSPLUS:
2610     case OP_NOTPOSQUERY:
2611     case OP_NOTPOSUPTO:
2612     case OP_NOTSTARI:
2613     case OP_NOTMINSTARI:
2614     case OP_NOTPLUSI:
2615     case OP_NOTMINPLUSI:
2616     case OP_NOTQUERYI:
2617     case OP_NOTMINQUERYI:
2618     case OP_NOTUPTOI:
2619     case OP_NOTMINUPTOI:
2620     case OP_NOTEXACTI:
2621     case OP_NOTPOSSTARI:
2622     case OP_NOTPOSPLUSI:
2623     case OP_NOTPOSQUERYI:
2624     case OP_NOTPOSUPTOI:
2625     if (utf) utf16_char = TRUE;
2626     #endif
2627     /* Fall through. */
2628    
2629     default:
2630     length = OP_lengths16[op] - 1;
2631     break;
2632    
2633     case OP_CLASS:
2634     case OP_NCLASS:
2635     /* Skip the character bit map. */
2636     ptr += 32/sizeof(pcre_uint16);
2637     length = 0;
2638     break;
2639    
2640     case OP_XCLASS:
2641 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2642     if (LINK_SIZE > 1)
2643     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2644     - (1 + LINK_SIZE + 1));
2645     else
2646     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2647    
2648 ph10 836 /* Reverse the size of the XCLASS instance. */
2649 zherczeg 839 *ptr = swap_uint16(*ptr);
2650 ph10 836 ptr++;
2651     if (LINK_SIZE > 1)
2652     {
2653 zherczeg 839 *ptr = swap_uint16(*ptr);
2654 ph10 836 ptr++;
2655     }
2656    
2657     op = *ptr;
2658     *ptr = swap_uint16(op);
2659 zherczeg 839 ptr++;
2660 ph10 836 if ((op & XCL_MAP) != 0)
2661     {
2662     /* Skip the character bit map. */
2663     ptr += 32/sizeof(pcre_uint16);
2664     length -= 32/sizeof(pcre_uint16);
2665     }
2666     break;
2667     }
2668     }
2669     /* Control should never reach here in 16 bit mode. */
2670     #endif /* SUPPORT_PCRE16 */
2671     }
2672 chpe 1055 #endif /* SUPPORT_PCRE[8|16] */
2673 ph10 836
2674    
2675    
2676 chpe 1055 #if defined SUPPORT_PCRE32
2677     static void
2678     regexflip_32(pcre *ere, pcre_extra *extra)
2679     {
2680     real_pcre32 *re = (real_pcre32 *)ere;
2681     int op;
2682     pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2683     int length = re->name_count * re->name_entry_size;
2684    
2685     /* Always flip the bytes in the main data block and study blocks. */
2686    
2687     re->magic_number = REVERSED_MAGIC_NUMBER;
2688     re->size = swap_uint32(re->size);
2689     re->options = swap_uint32(re->options);
2690 ph10 1313 re->flags = swap_uint32(re->flags);
2691     re->limit_match = swap_uint32(re->limit_match);
2692     re->limit_recursion = swap_uint32(re->limit_recursion);
2693     re->first_char = swap_uint32(re->first_char);
2694     re->req_char = swap_uint32(re->req_char);
2695     re->max_lookbehind = swap_uint16(re->max_lookbehind);
2696 chpe 1055 re->top_bracket = swap_uint16(re->top_bracket);
2697     re->top_backref = swap_uint16(re->top_backref);
2698     re->name_table_offset = swap_uint16(re->name_table_offset);
2699     re->name_entry_size = swap_uint16(re->name_entry_size);
2700     re->name_count = swap_uint16(re->name_count);
2701 ph10 1313 re->ref_count = swap_uint16(re->ref_count);
2702 chpe 1055
2703     if (extra != NULL)
2704     {
2705     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2706     rsd->size = swap_uint32(rsd->size);
2707     rsd->flags = swap_uint32(rsd->flags);
2708     rsd->minlength = swap_uint32(rsd->minlength);
2709     }
2710    
2711 ph10 1140 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2712     the pattern itself. */
2713 chpe 1055
2714     while(TRUE)
2715     {
2716     /* Swap previous characters. */
2717     while (length-- > 0)
2718     {
2719     *ptr = swap_uint32(*ptr);
2720     ptr++;
2721     }
2722    
2723     /* Get next opcode. */
2724    
2725     length = 0;
2726     op = *ptr;
2727     *ptr++ = swap_uint32(op);
2728    
2729     switch (op)
2730     {
2731     case OP_END:
2732     return;
2733    
2734     default:
2735     length = OP_lengths32[op] - 1;
2736     break;
2737    
2738     case OP_CLASS:
2739     case OP_NCLASS:
2740     /* Skip the character bit map. */
2741     ptr += 32/sizeof(pcre_uint32);
2742     length = 0;
2743     break;
2744    
2745     case OP_XCLASS:
2746     /* LINK_SIZE can only be 1 in 32-bit mode. */
2747     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2748    
2749     /* Reverse the size of the XCLASS instance. */
2750     *ptr = swap_uint32(*ptr);
2751     ptr++;
2752    
2753     op = *ptr;
2754     *ptr = swap_uint32(op);
2755     ptr++;
2756     if ((op & XCL_MAP) != 0)
2757     {
2758     /* Skip the character bit map. */
2759     ptr += 32/sizeof(pcre_uint32);
2760     length -= 32/sizeof(pcre_uint32);
2761     }
2762     break;
2763     }
2764     }
2765     /* Control should never reach here in 32 bit mode. */
2766     }
2767    
2768     #endif /* SUPPORT_PCRE32 */
2769    
2770    
2771    
2772     static void
2773     regexflip(pcre *ere, pcre_extra *extra)
2774     {
2775     #if defined SUPPORT_PCRE32
2776     if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2777     regexflip_32(ere, extra);
2778     #endif
2779     #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2780     if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2781     regexflip8_or_16(ere, extra);
2782     #endif
2783     }
2784    
2785    
2786    
2787 ph10 836 /*************************************************
2788 nigel 87 * Check match or recursion limit *
2789     *************************************************/
2790    
2791     static int
2792 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2793 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
2794     int flag, unsigned long int *limit, int errnumber, const char *msg)
2795     {
2796     int count;
2797     int min = 0;
2798     int mid = 64;
2799     int max = -1;
2800    
2801     extra->flags |= flag;
2802    
2803     for (;;)
2804     {
2805     *limit = mid;
2806    
2807 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2808 nigel 87 use_offsets, use_size_offsets);
2809    
2810     if (count == errnumber)
2811     {
2812     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2813     min = mid;
2814     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2815     }
2816    
2817     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2818     count == PCRE_ERROR_PARTIAL)
2819     {
2820     if (mid == min + 1)
2821     {
2822     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2823     break;
2824     }
2825     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2826     max = mid;
2827     mid = (min + mid)/2;
2828     }
2829     else break; /* Some other error */
2830     }
2831    
2832     extra->flags &= ~flag;
2833     return count;
2834     }
2835    
2836    
2837    
2838     /*************************************************
2839 ph10 227 * Case-independent strncmp() function *
2840     *************************************************/
2841    
2842     /*
2843     Arguments:
2844     s first string
2845     t second string
2846     n number of characters to compare
2847    
2848     Returns: < 0, = 0, or > 0, according to the comparison
2849     */
2850    
2851     static int
2852 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2853 ph10 227 {
2854     while (n--)
2855     {
2856     int c = tolower(*s++) - tolower(*t++);
2857     if (c) return c;
2858     }
2859     return 0;
2860     }
2861    
2862    
2863    
2864     /*************************************************
2865 ph10 1388 * Check multicharacter option *
2866 nigel 91 *************************************************/
2867    
2868 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2869     a message and return 0 if there is no match.
2870 nigel 91
2871     Arguments:
2872     p points after the leading '<'
2873     f file for error message
2874 ph10 1404 nl TRUE to check only for newline settings
2875     stype "modifier" or "escape sequence"
2876 nigel 91
2877     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2878     */
2879    
2880     static int
2881 ph10 1388 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2882 nigel 91 {
2883 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2884     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2885     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2886     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2887     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2888     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2889     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2890 ph10 1388
2891     if (!nl)
2892     {
2893     if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2894 ph10 1404 }
2895 ph10 1388
2896     fprintf(f, "Unknown %s at: <%s\n", stype, p);
2897 nigel 91 return 0;
2898     }
2899    
2900    
2901    
2902     /*************************************************
2903 nigel 93 * Usage function *
2904     *************************************************/
2905    
2906     static void
2907     usage(void)
2908     {
2909 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2910     printf("Input and output default to stdin and stdout.\n");
2911 ph10 936 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2912 ph10 287 printf("If input is a terminal, readline() is used to read from it.\n");
2913     #else
2914     printf("This version of pcretest is not linked with readline().\n");
2915     #endif
2916     printf("\nOptions:\n");
2917 ph10 836 #ifdef SUPPORT_PCRE16
2918 ph10 862 printf(" -16 use the 16-bit library\n");
2919 ph10 836 #endif
2920 chpe 1055 #ifdef SUPPORT_PCRE32
2921     printf(" -32 use the 32-bit library\n");
2922     #endif
2923 ph10 862 printf(" -b show compiled code\n");
2924 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2925 ph10 1450 printf(" -C arg show a specific compile-time option and exit\n");
2926     printf(" with its value if numeric (else 0). The arg can be:\n");
2927 ph10 836 printf(" linksize internal link size [2, 3, 4]\n");
2928     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2929     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2930 chpe 1055 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2931 ph10 836 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2932     printf(" ucp Unicode Properties supported [0, 1]\n");
2933     printf(" jit Just-in-time compiler supported [0, 1]\n");
2934 ph10 1450 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2935     printf(" bsr \\R type [ANYCRLF, ANY]\n");
2936 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2937     #if !defined NODFA
2938     printf(" -dfa force DFA matching for all subjects\n");
2939     #endif
2940     printf(" -help show usage information\n");
2941     printf(" -i show information about compiled patterns\n"
2942 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2943 nigel 93 " -m output memory used information\n"
2944 ph10 1404 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2945 nigel 93 " -o <n> set size of offsets vector to <n>\n");
2946     #if !defined NOPOSIX
2947     printf(" -p use POSIX interface\n");
2948     #endif
2949     printf(" -q quiet: do not output PCRE version number at start\n");
2950     printf(" -S <n> set stack size to <n> megabytes\n");
2951 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2952     " -s+ force each pattern to be studied, using JIT if available\n"
2953 ph10 960 " -s++ ditto, verifying when JIT was actually used\n"
2954 ph10 923 " -s+n force each pattern to be studied, using JIT if available,\n"
2955 ph10 960 " where 1 <= n <= 7 selects JIT options\n"
2956     " -s++n ditto, verifying when JIT was actually used\n"
2957 nigel 93 " -t time compilation and execution\n");
2958     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2959     printf(" -tm time execution (matching) only\n");
2960     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2961 ph10 1357 printf(" -T same as -t, but show total times at the end\n");
2962     printf(" -TM same as -tm, but show total time at the end\n");
2963 nigel 93 }
2964    
2965    
2966    
2967     /*************************************************
2968 nigel 63 * Main Program *
2969     *************************************************/
2970 nigel 43
2971 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2972     consist of a regular expression, in delimiters and optionally followed by
2973     options, followed by a set of test data, terminated by an empty line. */
2974    
2975     int main(int argc, char **argv)
2976     {
2977     FILE *infile = stdin;
2978 ph10 836 const char *version;
2979 nigel 3 int options = 0;
2980     int study_options = 0;
2981 ph10 386 int default_find_match_limit = FALSE;
2982 ph10 1363 pcre_uint32 default_options = 0;
2983 nigel 3 int op = 1;
2984     int timeit = 0;
2985 nigel 93 int timeitm = 0;
2986 ph10 1357 int showtotaltimes = 0;
2987 nigel 3 int showinfo = 0;
2988 nigel 31 int showstore = 0;
2989 ph10 667 int force_study = -1;
2990     int force_study_options = 0;
2991 nigel 87 int quiet = 0;
2992 nigel 53 int size_offsets = 45;
2993     int size_offsets_max;
2994 nigel 77 int *offsets = NULL;
2995 nigel 3 int debug = 0;
2996 nigel 11 int done = 0;
2997 nigel 77 int all_use_dfa = 0;
2998 ph10 922 int verify_jit = 0;
2999 nigel 77 int yield = 0;
3000 nigel 91 int stack_size;
3001 chpe 1090 pcre_uint8 *dbuffer = NULL;
3002 ph10 1388 pcre_uint8 lockout[24] = { 0 };
3003 chpe 1090 size_t dbuffer_size = 1u << 14;
3004 ph10 1357 clock_t total_compile_time = 0;
3005     clock_t total_study_time = 0;
3006     clock_t total_match_time = 0;
3007 nigel 3
3008 ph10 960 #if !defined NOPOSIX
3009     int posix = 0;
3010     #endif
3011     #if !defined NODFA
3012     int *dfa_workspace = NULL;
3013     #endif
3014    
3015 ph10 667 pcre_jit_stack *jit_stack = NULL;
3016    
3017 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
3018     substring names, each list itself being terminated by an empty name. Assume
3019     that 1024 is plenty long enough for the few names we'll be testing. It is
3020 chpe 1055 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3021 ph10 881 for the actual memory, to ensure alignment. */
3022 ph10 667
3023 chpe 1055 pcre_uint32 copynames[1024];
3024     pcre_uint32 getnames[1024];
3025 nigel 69
3026 chpe 1055 #ifdef SUPPORT_PCRE32
3027     pcre_uint32 *cn32ptr;
3028     pcre_uint32 *gn32ptr;
3029     #endif
3030    
3031 ph10 881 #ifdef SUPPORT_PCRE16
3032 chpe 1055 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3033     pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3034 ph10 836 pcre_uint16 *cn16ptr;
3035     pcre_uint16 *gn16ptr;
3036 ph10 881 #endif
3037 nigel 91
3038 ph10 881 #ifdef SUPPORT_PCRE8
3039 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3040     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3041     pcre_uint8 *cn8ptr;
3042     pcre_uint8 *gn8ptr;
3043 ph10 881 #endif
3044 nigel 91
3045 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
3046 ph10 1122 debugging. They grow automatically when very long lines are read. The 16-
3047 chpe 1055 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3048 nigel 69
3049 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
3050     pbuffer = (pcre_uint8 *)malloc(buffer_size);
3051 nigel 69
3052 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
3053 nigel 3
3054 nigel 93 outfile = stdout;
3055    
3056     /* The following _setmode() stuff is some Windows magic that tells its runtime
3057     library to translate CRLF into a single LF character. At least, that's what
3058     I've been told: never having used Windows I take this all on trust. Originally
3059     it set 0x8000, but then I was advised that _O_BINARY was better. */
3060    
3061 nigel 75 #if defined(_WIN32) || defined(WIN32)
3062 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
3063     #endif
3064 nigel 75
3065 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
3066     same answer. We just need to ensure that we call one that is available. */
3067    
3068 chpe 1055 #if defined SUPPORT_PCRE8
3069 ph10 836 version = pcre_version();
3070 chpe 1055 #elif defined SUPPORT_PCRE16
3071 ph10 836 version = pcre16_version();
3072 chpe 1055 #elif defined SUPPORT_PCRE32
3073     version = pcre32_version();
3074 ph10 836 #endif
3075    
3076 nigel 3 /* Scan options */
3077    
3078     while (argc > 1 && argv[op][0] == '-')
3079     {
3080 ph10 836 pcre_uint8 *endptr;
3081 ph10 960 char *arg = argv[op];
3082 nigel 53
3083 ph10 922 if (strcmp(arg, "-m") == 0) showstore = 1;
3084     else if (strcmp(arg, "-s") == 0) force_study = 0;
3085 ph10 960
3086 ph10 922 else if (strncmp(arg, "-s+", 3) == 0)
3087 ph10 667 {
3088 ph10 922 arg += 3;
3089     if (*arg == '+') { arg++; verify_jit = TRUE; }
3090 ph10 667 force_study = 1;
3091 ph10 923 if (*arg == 0)
3092 ph10 960 force_study_options = jit_study_bits[6];
3093 ph10 923 else if (*arg >= '1' && *arg <= '7')
3094 ph10 960 force_study_options = jit_study_bits[*arg - '1'];
3095 ph10 923 else goto BAD_ARG;
3096 ph10 691 }
3097 chpe 1097 else if (strcmp(arg, "-8") == 0)
3098     {
3099     #ifdef SUPPORT_PCRE8
3100     pcre_mode = PCRE8_MODE;
3101     #else
3102     printf("** This version of PCRE was built without 8-bit support\n");
3103     exit(1);
3104     #endif
3105     }
3106 ph10 922 else if (strcmp(arg, "-16") == 0)
3107 ph10 836 {
3108     #ifdef SUPPORT_PCRE16
3109 chpe 1055 pcre_mode = PCRE16_MODE;
3110 ph10 836 #else
3111     printf("** This version of PCRE was built without 16-bit support\n");
3112     exit(1);
3113     #endif
3114     }
3115 chpe 1200 else if (strcmp(arg, "-32") == 0)
3116 chpe 1055 {
3117     #ifdef SUPPORT_PCRE32
3118     pcre_mode = PCRE32_MODE;
3119     #else
3120     printf("** This version of PCRE was built without 32-bit support\n");
3121     exit(1);
3122     #endif
3123     }
3124 ph10 922 else if (strcmp(arg, "-q") == 0) quiet = 1;
3125     else if (strcmp(arg, "-b") == 0) debug = 1;
3126     else if (strcmp(arg, "-i") == 0) showinfo = 1;
3127     else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3128     else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3129 ph10 1398 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3130 nigel 79 #if !defined NODFA
3131 ph10 922 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3132 nigel 79 #endif
3133 ph10 922 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3134 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3135 nigel 65 *endptr == 0))
3136 nigel 53 {
3137     op++;
3138     argc--;
3139     }
3140 ph10 1357 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3141     strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3142 nigel 93 {
3143 ph10 1357 int temp;
3144 ph10 922 int both = arg[2] == 0;
3145 ph10 1404 showtotaltimes = arg[1] == 'T';
3146 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3147 nigel 93 *endptr == 0))
3148     {
3149     timeitm = temp;
3150     op++;
3151     argc--;
3152     }
3153     else timeitm = LOOPREPEAT;
3154     if (both) timeit = timeitm;
3155     }
3156 ph10 922 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3157 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3158 nigel 91 *endptr == 0))
3159     {
3160 ph10 1254 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3161 nigel 91 printf("PCRE: -S not supported on this OS\n");
3162     exit(1);
3163     #else
3164     int rc;
3165     struct rlimit rlim;
3166     getrlimit(RLIMIT_STACK, &rlim);
3167     rlim.rlim_cur = stack_size * 1024 * 1024;
3168     rc = setrlimit(RLIMIT_STACK, &rlim);
3169     if (rc != 0)
3170     {
3171     printf("PCRE: setrlimit() failed with error %d\n", rc);
3172     exit(1);
3173     }
3174     op++;
3175     argc--;
3176     #endif
3177     }
3178 nigel 53 #if !defined NOPOSIX
3179 ph10 922 else if (strcmp(arg, "-p") == 0) posix = 1;
3180 nigel 53 #endif
3181 ph10 922 else if (strcmp(arg, "-C") == 0)
3182 nigel 63 {
3183     int rc;
3184 ph10 392 unsigned long int lrc;
3185 ph10 836
3186     if (argc > 2)
3187     {
3188     if (strcmp(argv[op + 1], "linksize") == 0)
3189     {
3190     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3191     printf("%d\n", rc);
3192     yield = rc;
3193 ph10 1320
3194 ph10 1254 #ifdef __VMS
3195     vms_setsymbol("LINKSIZE",0,yield );
3196     #endif
3197 ph10 836 }
3198 ph10 1033 else if (strcmp(argv[op + 1], "pcre8") == 0)
3199 ph10 836 {
3200     #ifdef SUPPORT_PCRE8
3201     printf("1\n");
3202     yield = 1;
3203     #else
3204     printf("0\n");
3205     yield = 0;
3206     #endif
3207 ph10 1254 #ifdef __VMS
3208     vms_setsymbol("PCRE8",0,yield );
3209     #endif
3210 ph10 836 }
3211 ph10 1033 else if (strcmp(argv[op + 1], "pcre16") == 0)
3212 ph10 836 {
3213     #ifdef SUPPORT_PCRE16
3214     printf("1\n");
3215     yield = 1;
3216     #else
3217     printf("0\n");
3218     yield = 0;
3219     #endif
3220 ph10 1254 #ifdef __VMS
3221     vms_setsymbol("PCRE16",0,yield );
3222     #endif
3223 ph10 836 }
3224 chpe 1055 else if (strcmp(argv[op + 1], "pcre32") == 0)
3225 ph10 836 {
3226 chpe 1055 #ifdef SUPPORT_PCRE32
3227     printf("1\n");
3228     yield = 1;
3229     #else
3230     printf("0\n");
3231     yield = 0;
3232     #endif
3233 ph10 1254 #ifdef __VMS
3234     vms_setsymbol("PCRE32",0,yield );
3235     #endif
3236 chpe 1055 }
3237 ph10 1254 else if (strcmp(argv[op + 1], "utf") == 0)
3238 chpe 1055 {
3239 ph10 836 #ifdef SUPPORT_PCRE8
3240 chpe 1055 if (pcre_mode == PCRE8_MODE)
3241     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3242     #endif
3243     #ifdef SUPPORT_PCRE16
3244     if (pcre_mode == PCRE16_MODE)
3245     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3246     #endif
3247     #ifdef SUPPORT_PCRE32
3248     if (pcre_mode == PCRE32_MODE)
3249     (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3250     #endif
3251 ph10 836 printf("%d\n", rc);
3252     yield = rc;
3253 ph10 1254 #ifdef __VMS
3254     vms_setsymbol("UTF",0,yield );
3255     #endif
3256 ph10 836 }
3257 ph10 1033 else if (strcmp(argv[op + 1], "ucp") == 0)
3258 ph10 836 {
3259     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3260     printf("%d\n", rc);
3261     yield = rc;
3262     }
3263 ph10 1033 else if (strcmp(argv[op + 1], "jit") == 0)
3264 ph10 836 {
3265     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3266     printf("%d\n", rc);
3267     yield = rc;
3268     }
3269 ph10 1033 else if (strcmp(argv[op + 1], "newline") == 0)
3270 ph10 842 {
3271 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3272 ph10 1122 print_newline_config(rc, TRUE);
3273 ph10 842 }
3274 ph10 1450 else if (strcmp(argv[op + 1], "bsr") == 0)
3275     {
3276     (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3277     printf("%s\n", rc? "ANYCRLF" : "ANY");
3278     }
3279 ph10 1033 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3280     {
3281     #ifdef EBCDIC
3282     printf("1\n");
3283 ph10 1122 yield = 1;
3284 ph10 1033 #else
3285 ph10 1122 printf("0\n");
3286     #endif
3287 ph10 1033 }
3288     else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3289     {
3290     #ifdef EBCDIC
3291     printf("0x%02x\n", CHAR_LF);
3292     #else
3293 ph10 1122 printf("0\n");
3294     #endif
3295 ph10 1033 }
3296     else
3297 ph10 1122 {
3298 ph10 1033 printf("Unknown -C option: %s\n", argv[op + 1]);
3299 ph10 1122 }
3300 ph10 836 goto EXIT;
3301     }
3302 ph10 1122
3303 ph10 1033 /* No argument for -C: output all configuration information. */
3304 ph10 836
3305     printf("PCRE version %s\n", version);
3306 nigel 63 printf("Compiled with\n");
3307 ph10 1122
3308 ph10 1030 #ifdef EBCDIC
3309     printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3310 ph10 1122 #endif
3311 ph10 836
3312     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3313     are set, either both UTFs are supported or both are not supported. */
3314    
3315 chpe 1055 #ifdef SUPPORT_PCRE8
3316     printf(" 8-bit support\n");
3317 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3318 chpe 1055 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3319     #endif
3320     #ifdef SUPPORT_PCRE16
3321     printf(" 16-bit support\n");
3322 ph10 836 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3323 chpe 1055 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3324 ph10 836 #endif
3325 chpe 1055 #ifdef SUPPORT_PCRE32
3326     printf(" 32-bit support\n");
3327     (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3328     printf (" %sUTF-32 support\n", rc ? "" : "No ");
3329     #endif
3330 ph10 836
3331     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3332 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
3333 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3334 ph10 674 if (rc)
3335 ph10 890 {
3336     const char *arch;
3337 ph10 908 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3338 ph10 890 printf(" Just-in-time compiler support: %s\n", arch);
3339 ph10 903 }
3340 ph10 674 else
3341     printf(" No just-in-time compiler support\n");
3342 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3343 ph10 1122 print_newline_config(rc, FALSE);
3344 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3345 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3346     "all Unicode newlines");
3347 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3348 nigel 63 printf(" Internal link size = %d\n", rc);
3349 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3350 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
3351 ph10 1389 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3352     printf(" Parentheses nest limit = %ld\n", lrc);
3353 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3354 ph10 376 printf(" Default match limit = %ld\n", lrc);
3355 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3356 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
3357 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3358 ph10 895 printf(" Match recursion uses %s", rc? "stack" : "heap");
3359     if (showstore)
3360 ph10 903 {
3361 ph10 901 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3362 ph10 903 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3363 ph10 895 }
3364 ph10 903 printf("\n");
3365 ph10 121 goto EXIT;
3366 nigel 63 }
3367 ph10 922 else if (strcmp(arg, "-help") == 0 ||
3368     strcmp(arg, "--help") == 0)
3369 nigel 93 {
3370     usage();
3371     goto EXIT;
3372     }
3373 nigel 3 else
3374     {
3375 ph10 960 BAD_ARG:
3376 ph10 922 printf("** Unknown or malformed option %s\n", arg);
3377 nigel 93 usage();
3378 nigel 77 yield = 1;
3379     goto EXIT;
3380 nigel 3 }
3381     op++;
3382     argc--;
3383     }
3384    
3385 nigel 53 /* Get the store for the offsets vector, and remember what it was */
3386    
3387     size_offsets_max = size_offsets;
3388 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3389 nigel 53 if (offsets == NULL)
3390     {
3391     printf("** Failed to get %d bytes of memory for offsets vector\n",
3392 ph10 151 (int)(size_offsets_max * sizeof(int)));
3393 nigel 77 yield = 1;
3394     goto EXIT;
3395 nigel 53 }
3396    
3397 nigel 3 /* Sort out the input and output files */
3398    
3399     if (argc > 1)
3400     {
3401 nigel 93 infile = fopen(argv[op], INPUT_MODE);
3402 nigel 3 if (infile == NULL)
3403     {
3404     printf("** Failed to open %s\n", argv[op]);
3405 nigel 77 yield = 1;
3406     goto EXIT;
3407 nigel 3 }
3408     }
3409    
3410     if (argc > 2)
3411     {
3412 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
3413 nigel 3 if (outfile == NULL)
3414     {
3415     printf("** Failed to open %s\n", argv[op+1]);
3416 nigel 77 yield = 1;
3417     goto EXIT;
3418 nigel 3 }
3419     }
3420    
3421     /* Set alternative malloc function */
3422    
3423 ph10 836 #ifdef SUPPORT_PCRE8
3424 nigel 3 pcre_malloc = new_malloc;
3425 nigel 73 pcre_free = new_free;
3426     pcre_stack_malloc = stack_malloc;
3427     pcre_stack_free = stack_free;
3428 ph10 836 #endif
3429 nigel 3
3430 ph10 836 #ifdef SUPPORT_PCRE16
3431     pcre16_malloc = new_malloc;
3432     pcre16_free = new_free;
3433     pcre16_stack_malloc = stack_malloc;
3434     pcre16_stack_free = stack_free;
3435     #endif
3436    
3437 chpe 1055 #ifdef SUPPORT_PCRE32
3438     pcre32_malloc = new_malloc;
3439     pcre32_free = new_free;
3440     pcre32_stack_malloc = stack_malloc;
3441     pcre32_stack_free = stack_free;
3442     #endif
3443    
3444 ph10 1388 /* Heading line unless quiet */
3445 nigel 3
3446 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3447 nigel 3
3448     /* Main loop */
3449    
3450 nigel 11 while (!done)
3451 nigel 3 {
3452     pcre *re = NULL;
3453     pcre_extra *extra = NULL;
3454 nigel 37
3455     #if !defined NOPOSIX /* There are still compilers that require no indent */
3456 nigel 3 regex_t preg;
3457 nigel 45 int do_posix = 0;
3458 nigel 37 #endif
3459    
3460 nigel 7 const char *error;
3461 ph10 836 pcre_uint8 *markptr;
3462     pcre_uint8 *p, *pp, *ppp;
3463     pcre_uint8 *to_file = NULL;
3464     const pcre_uint8 *tables = NULL;
3465 zherczeg 847 unsigned long int get_options;
3466 nigel 75 unsigned long int true_size, true_study_size = 0;
3467 ph10 1359 size_t size;
3468 ph10 654 int do_allcaps = 0;
3469 ph10 512 int do_mark = 0;
3470 nigel 3 int do_study = 0;
3471 ph10 654 int no_force_study = 0;
3472 nigel 25 int do_debug = debug;
3473 nigel 35 int do_G = 0;
3474     int do_g = 0;
3475 nigel 25 int do_showinfo = showinfo;
3476 nigel 35 int do_showrest = 0;
3477 ph10 616 int do_showcaprest = 0;
3478 nigel 75 int do_flip = 0;
3479 nigel 93 int erroroffset, len, delimiter, poffset;
3480 ph10 975
3481     #if !defined NODFA
3482 ph10 960 int dfa_matched = 0;
3483 ph10 975 #endif
3484 nigel 3
3485 ph10 836 use_utf = 0;
3486 ph10 211 debug_lengths = 1;
3487 ph10 1454 SET_PCRE_STACK_GUARD(NULL);
3488 nigel 63
3489 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3490 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3491 nigel 63 fflush(outfile);
3492 nigel 3
3493     p = buffer;
3494     while (isspace(*p)) p++;
3495     if (*p == 0) continue;
3496 ph10 1404
3497 ph10 1388 /* Handle option lock-out setting */
3498 ph10 1404
3499 ph10 1388 if (*p == '<' && p[1] == ' ')
3500     {
3501     p += 2;
3502     while (isspace(*p)) p++;
3503     if (strncmp((char *)p, "forbid ", 7) == 0)
3504     {
3505     p += 7;
3506     while (isspace(*p)) p++;
3507 ph10 1404 pp = lockout;
3508 ph10 1388 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3509     *pp++ = *p++;
3510 ph10 1404 *pp = 0;
3511 ph10 1388 }
3512 ph10 1404 else
3513 ph10 1388 {
3514 ph10 1399 printf("** Unrecognized special command '%s'\n", p);
3515 ph10 1388 yield = 1;
3516 ph10 1404 goto EXIT;
3517     }
3518 ph10 1388 continue;
3519 ph10 1404 }
3520 nigel 3
3521 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
3522 nigel 3
3523 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3524     {
3525 zherczeg 841 pcre_uint32 magic;
3526 ph10 836 pcre_uint8 sbuf[8];
3527 nigel 75 FILE *f;
3528    
3529     p++;
3530 zherczeg 839 if (*p == '!')
3531     {
3532     do_debug = TRUE;
3533     do_showinfo = TRUE;
3534     p++;
3535     }
3536    
3537 nigel 75 pp = p + (int)strlen((char *)p);
3538     while (isspace(pp[-1])) pp--;
3539     *pp = 0;
3540    
3541     f = fopen((char *)p, "rb");
3542     if (f == NULL)
3543     {
3544     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3545     continue;
3546     }
3547     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3548    
3549     true_size =
3550     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3551     true_study_size =
3552     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3553    
3554 zherczeg 852 re = (pcre *)new_malloc(true_size);
3555 ph10 1017 if (re == NULL)
3556     {
3557     printf("** Failed to get %d bytes of memory for pcre object\n",
3558     (int)true_size);
3559     yield = 1;
3560     goto EXIT;
3561     }
3562 nigel 75 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3563    
3564 chpe 1055 magic = REAL_PCRE_MAGIC(re);
3565 nigel 75 if (magic != MAGIC_NUMBER)
3566     {
3567 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
3568 nigel 75 {
3569     do_flip = 1;
3570     }
3571     else
3572     {
3573     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3574 ph10 1017 new_free(re);
3575 nigel 75 fclose(f);
3576     continue;
3577     }
3578     }
3579    
3580 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
3581 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3582 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3583 nigel 75
3584 ph10 612 /* Now see if there is any following study data. */
3585 nigel 75
3586     if (true_study_size != 0)
3587     {
3588     pcre_study_data *psd;
3589    
3590     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3591     extra->flags = PCRE_EXTRA_STUDY_DATA;
3592    
3593     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3594     extra->study_data = psd;
3595    
3596     if (fread(psd, 1, true_study_size, f) != true_study_size)
3597     {
3598     FAIL_READ:
3599     fprintf(outfile, "Failed to read data from %s\n", p);
3600 ph10 836 if (extra != NULL)
3601     {
3602     PCRE_FREE_STUDY(extra);
3603     }
3604 ph10 1017 new_free(re);
3605 nigel 75 fclose(f);
3606     continue;
3607     }
3608     fprintf(outfile, "Study data loaded from %s\n", p);
3609     do_study = 1; /* To get the data output if requested */
3610     }
3611     else fprintf(outfile, "No study data\n");
3612    
3613 ph10 836 /* Flip the necessary bytes. */
3614     if (do_flip)
3615     {
3616 zherczeg 839 int rc;
3617     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3618     if (rc == PCRE_ERROR_BADMODE)
3619     {
3620 ph10 1313 pcre_uint32 flags_in_host_byte_order;
3621 zherczeg 1150 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3622     flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3623     else
3624 ph10 1313 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3625 zherczeg 839 /* Simulate the result of the function call below. */
3626     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3627 chpe 1055 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3628     PCRE_INFO_OPTIONS);
3629     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3630 zherczeg 1150 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3631 ph10 1017 new_free(re);
3632     fclose(f);
3633 zherczeg 839 continue;
3634     }
3635 ph10 836 }
3636    
3637     /* Need to know if UTF-8 for printing data strings. */
3638    
3639 ph10 1017 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3640     {
3641     new_free(re);
3642     fclose(f);
3643     continue;
3644     }
3645 ph10 836 use_utf = (get_options & PCRE_UTF8) != 0;
3646    
3647 nigel 75 fclose(f);
3648     goto SHOW_INFO;
3649     }
3650    
3651     /* In-line pattern (the usual case). Get the delimiter and seek the end of
3652 ph10 836 the pattern; if it isn't complete, read more. */
3653 nigel 75
3654 nigel 3 delimiter = *p++;
3655    
3656 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
3657 nigel 3 {
3658 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3659 nigel 3 goto SKIP_DATA;
3660     }
3661    
3662     pp = p;
3663 ph10 530 poffset = (int)(p - buffer);
3664 nigel 3
3665     for(;;)
3666     {
3667 nigel 29 while (*pp != 0)
3668     {
3669     if (*pp == '\\' && pp[1] != 0) pp++;
3670     else if (*pp == delimiter) break;
3671     pp++;
3672     }
3673 nigel 3 if (*pp != 0) break;
3674 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3675 nigel 3 {
3676     fprintf(outfile, "** Unexpected EOF\n");
3677 nigel 11 done = 1;
3678     goto CONTINUE;
3679 nigel 3 }
3680 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3681 nigel 3 }
3682    
3683 nigel 93 /* The buffer may have moved while being extended; reset the start of data
3684     pointer to the correct relative point in the buffer. */
3685    
3686     p = buffer + poffset;
3687    
3688 nigel 29 /* If the first character after the delimiter is backslash, make
3689     the pattern end with backslash. This is purely to provide a way
3690     of testing for the error message when a pattern ends with backslash. */
3691    
3692     if (pp[1] == '\\') *pp++ = '\\';
3693    
3694 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3695     for callouts. */
3696 nigel 3
3697     *pp++ = 0;
3698 nigel 75 strcpy((char *)pbuffer, (char *)p);
3699 nigel 3
3700 ph10 1388 /* Look for modifiers and options after the final delimiter. */
3701 nigel 3
3702 ph10 1363 options = default_options;
3703 ph10 1022 study_options = force_study_options;
3704 nigel 31 log_store = showstore; /* default from command line */
3705    
3706 nigel 3 while (*pp != 0)
3707     {
3708 ph10 1388 /* Check to see whether this modifier has been locked out for this file.
3709 ph10 1404 This is complicated for the multi-character options that begin with '<'.
3710     If there is no '>' in the lockout string, all multi-character modifiers are
3711     locked out. */
3712    
3713 ph10 1388 if (strchr((char *)lockout, *pp) != NULL)
3714     {
3715     if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3716     {
3717     int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3718     if (x == 0) goto SKIP_DATA;
3719 ph10 1404
3720 ph10 1388 for (ppp = lockout; *ppp != 0; ppp++)
3721     {
3722     if (*ppp == '<')
3723     {
3724     int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3725     if (y == 0)
3726     {
3727 ph10 1399 printf("** Error in modifier forbid data - giving up.\n");
3728 ph10 1388 yield = 1;
3729 ph10 1404 goto EXIT;
3730 ph10 1388 }
3731 ph10 1404 if (x == y)
3732 ph10 1388 {
3733     ppp = pp;
3734     while (*ppp != '>') ppp++;
3735 ph10 1404 printf("** The %.*s modifier is locked out - giving up.\n",
3736 ph10 1403 (int)(ppp - pp + 1), pp);
3737 ph10 1388 yield = 1;
3738 ph10 1404 goto EXIT;
3739     }
3740 ph10 1388 }
3741 ph10 1404 }
3742 ph10 1388 }
3743 ph10 1404
3744 ph10 1388 /* The single-character modifiers are straightforward. */
3745 ph10 1404
3746 ph10 1388 else
3747     {
3748 ph10 1399 printf("** The /%c modifier is locked out - giving up.\n", *pp);
3749 ph10 1388 yield = 1;
3750 ph10 1404 goto EXIT;
3751     }
3752     }
3753    
3754 ph10 1388 /* The modifier is not locked out; handle it. */
3755 ph10 1404
3756 nigel 3 switch (*pp++)
3757     {
3758 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
3759 nigel 35 case 'g': do_g = 1; break;
3760 nigel 3 case 'i': options |= PCRE_CASELESS; break;
3761     case 'm': options |= PCRE_MULTILINE; break;
3762     case 's': options |= PCRE_DOTALL; break;
3763     case 'x': options |= PCRE_EXTENDED; break;
3764 nigel 25
3765 ph10 616 case '+':
3766 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3767 ph10 616 break;
3768 ph10 654
3769     case '=': do_allcaps = 1; break;
3770 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
3771 nigel 93 case 'B': do_debug = 1; break;
3772 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3773 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
3774 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3775 nigel 75 case 'F': do_flip = 1; break;
3776 nigel 35 case 'G': do_G = 1; break;
3777 nigel 25 case 'I': do_showinfo = 1; break;
3778 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
3779 ph10 512 case 'K': do_mark = 1; break;
3780 nigel 31 case 'M': log_store = 1; break;
3781 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3782 ph10 1398 case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3783 nigel 37
3784     #if !defined NOPOSIX
3785 nigel 3 case 'P': do_posix = 1; break;
3786 nigel 37 #endif
3787    
3788 ph10 1454 case 'Q':
3789     switch (*pp)
3790     {
3791 ph10 1459 case '0':
3792 ph10 1454 case '1':
3793     stack_guard_return = *pp++ - '0';
3794 ph10 1459 break;
3795 ph10 1454
3796     default:
3797     fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3798     goto SKIP_DATA;
3799     }
3800     SET_PCRE_STACK_GUARD(stack_guard);
3801     break;
3802    
3803 ph10 654 case 'S':
3804 ph10 1022 do_study = 1;
3805     for (;;)
3806 ph10 612 {
3807 ph10 1022 switch (*pp++)
3808 ph10 667 {
3809 ph10 1022 case 'S':
3810     do_study = 0;
3811     no_force_study = 1;
3812     break;
3813    
3814     case '!':
3815     study_options |= PCRE_STUDY_EXTRA_NEEDED;
3816     break;
3817    
3818     case '+':
3819     if (*pp == '+')
3820 ph10 922 {
3821     verify_jit = TRUE;
3822 ph10 960 pp++;
3823     }
3824 ph10 923 if (*pp >= '1' && *pp <= '7')
3825     study_options |= jit_study_bits[*pp++ - '1'];
3826 ph10 960 else
3827     study_options |= jit_study_bits[6];
3828 ph10 1022 break;
3829    
3830     case '-':
3831     study_options &= ~PCRE_STUDY_ALLJIT;
3832     break;
3833    
3834     default:
3835     pp--;
3836     goto ENDLOOP;
3837 ph10 691 }
3838     }
3839 ph10 1022 ENDLOOP:
3840 ph10 612 break;
3841    
3842 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
3843 ph10 535 case 'W': options |= PCRE_UCP; break;
3844 nigel 3 case 'X': options |= PCRE_EXTRA; break;
3845 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3846 ph10 126 case 'Z': debug_lengths = 0; break;
3847 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3848 ph10 1320 case '9': options |= PCRE_NEVER_UTF; break;
3849 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3850 ph10 545
3851 ph10 541 case 'T':
3852     switch (*pp++)
3853     {
3854     case '0': tables = tables0; break;
3855     case '1': tables = tables1; break;
3856 ph10 545
3857 ph10 541 case '\r':
3858     case '\n':
3859 ph10 545 case ' ':
3860     case 0:
3861 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
3862 ph10 545 goto SKIP_DATA;
3863    
3864     default:
3865 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3866 ph10 545 goto SKIP_DATA;
3867 ph10 541 }
3868 ph10 545 break;
3869 nigel 25
3870     case 'L':
3871     ppp = pp;
3872 nigel 93 /* The '\r' test here is so that it works on Windows. */
3873     /* The '0' test is just in case this is an unterminated line. */
3874     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3875 nigel 25 *ppp = 0;
3876     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3877     {
3878     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3879     goto SKIP_DATA;
3880     }
3881 nigel 93 locale_set = 1;
3882 ph10 836 tables = PCRE_MAKETABLES;
3883 nigel 25 pp = ppp;
3884     break;
3885    
3886 nigel 75 case '>':
3887     to_file = pp;
3888     while (*pp != 0) pp++;
3889     while (isspace(pp[-1])) pp--;
3890     *pp = 0;
3891     break;
3892    
3893 nigel 91 case '<':
3894     {
3895 ph10 1388 int x = check_mc_option(pp, outfile, FALSE, "modifier");
3896     if (x == 0) goto SKIP_DATA;
3897     options |= x;
3898     while (*pp++ != '>');
3899 nigel 91 }
3900     break;
3901    
3902 nigel 77 case '\r': /* So that it works in Windows */
3903     case '\n':
3904     case ' ':
3905     break;
3906 nigel 75
3907 nigel 3 default:
3908 ph10 1388 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3909 nigel 3 goto SKIP_DATA;
3910     }
3911     }
3912    
3913 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
3914 nigel 25 timing, showing, or debugging options, nor the ability to pass over
3915 ph10 836 local character tables. Neither does it have 16-bit support. */
3916 nigel 3
3917 nigel 37 #if !defined NOPOSIX
3918 nigel 3 if (posix || do_posix)
3919     {
3920     int rc;
3921     int cflags = 0;
3922 nigel 75
3923 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3924     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3925 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3926 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3927     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3928 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3929 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3930 nigel 87
3931 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
3932    
3933     /* Compilation failed; go back for another re, skipping to blank line
3934     if non-interactive. */
3935    
3936     if (rc != 0)
3937     {
3938 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3939 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3940     goto SKIP_DATA;
3941     }
3942     }
3943    
3944     /* Handle compiling via the native interface */
3945    
3946     else
3947 nigel 37 #endif /* !defined NOPOSIX */
3948    
3949 nigel 3 {
3950 chpe 1055 /* In 16- or 32-bit mode, convert the input. */
3951 ph10 836
3952     #ifdef SUPPORT_PCRE16
3953 chpe 1055 if (pcre_mode == PCRE16_MODE)
3954 ph10 836 {
3955     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3956     {
3957     case -1:
3958     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3959     "converted to UTF-16\n");
3960     goto SKIP_DATA;
3961    
3962     case -2:
3963     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3964     "cannot be converted to UTF-16\n");
3965     goto SKIP_DATA;
3966 ph10 842
3967 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3968     fprintf(outfile, "**Failed: character value greater than 0xffff "
3969     "cannot be converted to 16-bit in non-UTF mode\n");
3970 ph10 842 goto SKIP_DATA;
3971 ph10 836
3972     default:
3973     break;
3974     }
3975     p = (pcre_uint8 *)buffer16;
3976     }
3977     #endif
3978    
3979 chpe 1055 #ifdef SUPPORT_PCRE32
3980     if (pcre_mode == PCRE32_MODE)
3981     {
3982     switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3983     {
3984     case -1:
3985     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3986     "converted to UTF-32\n");
3987     goto SKIP_DATA;
3988    
3989     case -2:
3990     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3991     "cannot be converted to UTF-32\n");
3992     goto SKIP_DATA;
3993    
3994     case -3:
3995     fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3996     goto SKIP_DATA;
3997